#!/usr/bin/perl -w

#  Who:  Javed A. Aslam
#  What: perceptron.pl
#  When: 8/19/11
#  Why:  Runs the percepton training algorithm on input data set; outputs predictor
#
#  How:  perceptron.pl data
#
#        Assumes that data is tab-delimited with last column equal to {+1,-1} label

#
# Usage...
#

if (@ARGV != 1) {
  die "\nUsage: perceptron.pl dataFile\n\n"
  }

#
# Get args...
#

$dataFile = shift;

#
# Process data
#

open (FILE, "<", $dataFile)
  or die "\nCan't open $dataFile: $!\n\n";

$records = 0;                              # number of records

while (<FILE>) {

  chomp;                                   # remove newline

  push @{$data[$records++]}, split(/\t/);

  }

# print "Num records = $records\nFirst record:\n";
# print @{$data[0]};

# Hack to set our initial weight vector with proper length.
# Note that the last element will be the weight for the offset
# parameter.  $dim will be the number of fields in the record,
# which is the number of dimensions + 1, which is the number
# of parameters in our linear classifier.

$dim = 0;
foreach $i (@{$data[0]}) {
  $w[$dim++] = 0;
  }

$mistakes = 0;                            # number of mistakes
$iter = 0;                                # number of iterations

do {

  $flag = 0;                              # flag for a mistake; none so far this iter

  foreach $recordPtr (@data) {

    # Entries 0 through $dim-2 are data values.
    # Entry $dim-1 is {+1,-1} label.
    #
    # So, compute dot product between record
    # and weight vector for entries 0 through $dim-2,
    # then add $w[$dim-1], which is 1 times offset weight,
    # then multiply by label, which turns negative to positive
    # examples.

    $dotProd = $w[$dim-1];
    for ($i=0; $i<=$dim-2; $i++) {
      $dotProd += $w[$i] * $recordPtr->[$i];
      }

    $dotProd *= $recordPtr->[$dim-1];

    if ($dotProd <= 0) {                  # mistake...  add example to @w

      $flag = 1;
      $mistakes++;

      $w[$dim-1] += $recordPtr->[$dim-1];
      for ($i=0; $i<=$dim-2; $i++) {
        $w[$i] += $recordPtr->[$i] * $recordPtr->[$dim-1];
        }

      }

    }
      
  $iter++;

  print STDERR "Iteration $iter, total mistakes $mistakes\n";

  } while ($flag);

print "\nClassifier weights: ";
for ($i=0;$i<$dim;$i++) {
  print "$w[$i] ";
  }
print "\n\nNormalized with threshold: ";
for ($i=0;$i<=$dim-2;$i++) {
  $norm = - $w[$i] / $w[$dim-1];
  print "$norm ";
  }
print "1\n";
