#!/usr/bin/perl -w

#  Who:  Javed A. Aslam
#  What: createData.pl
#  When: 8/18/11
#  Why:  Creates linearly separable data for perceptron learning.
#
#  How:  createData.pl <pts> <dim> <vDelRatio2>
#
#        <pts> is number of data points to create
#        <dim> is number of dimensions
#        <vDelRatio2> is the ratio of |v|/del, squared,
#          where v is the separating hyperplane and del is the margin

#
# Usage...
#

if (@ARGV != 3) {
  die "\nUsage: createData.pl numPoints numDimensions vDelRatio2 > outfile\n\n"
  }

#
# Get args...
#

$numPts = shift;
$numDim = shift;
$vDelRatio2 = shift;

# Create classifier: v_1*x_1 + v_2*x_2 + ... > 1.
#
# Coefficients are chosen to have an *average* value of 2.
# Since coordinates are chosen at random in range [0,1),
# they have an expected value of 1/2.  Thus, the expected
# dot product is 1, and as such, about half points should be
# positive and half negative.

@coeff = ();                  # coefficients of classifier
$vMag2 = 1;                   # initial squared magnitude of v (from threshold 1)

for ($i=1; $i<=$numDim; $i++) {
  $v = 4*$i/($numDim+1)/$numDim;
  push @coeff, $v;
  $vMag2 += $v*$v;
  }

$delta = sqrt($vMag2 / $vDelRatio2);

# Now create data points, filtering those points that are too close
# to the separating hyperplane.

$filtered = 0;                # number of points filtered

while ($numPts > 0) {

  @vec = ();                  # current vector of points

  for ($i=1; $i<=$numDim; $i++) {
    push @vec, rand;
    }

  $ptDel = -1;                # delta for point, initially -1 from theshold

  for ($j=0; $j<$numDim; $j++) {
    $ptDel += $coeff[$j] * $vec[$j];
    }

  if (abs($ptDel) >= $delta) {
    for ($j=0; $j<$numDim; $j++) {
      print "$vec[$j]\t";
      }
    if ($ptDel > 0) {
      print "1\n";
      }
    else {
      print "-1\n";
      }
    $numPts--;
    }
  else {
    $filtered++;
    }

  }

print STDERR "\nCoefficients:"; 
for ($j=0; $j<$numDim; $j++) {
  print STDERR " $coeff[$j]";
  }
print STDERR "\nNumber of points filtered: $filtered\n\n";

