# first, we need to define our subroutines (functions)
# which will be used in our program.
# these all begin with the keyword "sub"

# function to read in a file given its filename
sub readFile {
    my ($filename) = @_;
    my $seq = "";
    open(FILE, $filename);
    while (<FILE>) {
        chomp($_);
        if (substr($_, 0, 1) ne ">") {
            $seq .= $_;
        }
    }
    return $seq;
}

# function to return the maximum value of three numbers
# as well as an identifier indicating which value was the maximum.
# This corresponds to: upArrow = 1, leftArrow = 2, upper-leftArrow = 3
sub getMax {
    my ($s1, $s2, $s3) = @_;
    my $maxValue = $s1;
    my $maxNum = 1; # value of Arrow
    if ($maxValue < $s2) {
        $maxValue = $s2;
        $maxNum = 2;
    }
    if ($maxValue < $s3) {
        $maxValue = $s3;
        $maxNum = 3;
    }
    return ($maxValue, $maxNum);
}

# a utility function to print the score matrix
sub printScores {
    my ($v, $w, @scores) = @_;
    my $n = length $v;
    my $m = length $w;
    print "*$w\n";
    for ($i=1; $i<=$n; $i++) {
        print substr($v,$i-1,1);
        for ($j=1; $j<=$m; $j++) {
            print $scores[$i][$j];
        }
        print "\n";
    }
}

# the alignment function
sub LCS {
    my ($v, $w) = @_;
    #initialize the matrices
    my @scores = ();
    my @arrows = ();
    my $n = length $v;
    my $m = length $w;

    for ($i=0; $i <= $n; $i++) {
        my @newRow = ();
        for ($j=0; $j <= $m; $j++) {
            push(@newRow, 0);
        }
        push(@scores, [@newRow]);
    }
    for ($i=1; $i <= $n; $i++) {
        my $vi = substr($v, $i-1, 1);
        for ($j=1; $j <= $m; $j++) {
            my $wj = substr($w, $j-1, 1);
            my $s1 = $scores[$i-1][$j];
            my $s2 = $scores[$i][$j-1];
            if ($vi eq $wj) {
                $s3 = $scores[$i-1][$j-1] + 1;
            } else {
                $s3 = -1;
            } 
            ($scores[$i][$j], $arrows[$i][$j]) = getMax($s1, $s2, $s3);
        }
    }
    #printScores($v, $w, @scores);
    return ($scores[$n][$m], @arrows);
}

# a utility function to print the arrow matrix
sub printArrows {
    my ($v, $w, @arrows) = @_;
    my $n = length $v;
    my $m = length $w;
    for ($i=0; $i<=$n; $i++) {
        for ($j=0; $j<$m; $j++) {
            print $arrows[$i][$j];
        }
        print "\n";
    }
}

# a utility function to print the common subsequence
sub printLCS {
    my ($v, $i, $j, $seqnum, @arrows) = @_;
    if ($i == 0 or $j == 0) {
        return;
    }
    if ($arrows[$i][$j] == 3) {
        printLCS($v, $i-1, $j-1, $seqnum, @arrows);
        print substr($v, $i-1, 1);
    } else {
        if ($arrows[$i][$j] == 1) {
            printLCS($v, $i-1, $j, $seqnum, @arrows);
        } else {
            printLCS($v, $i, $j-1, $seqnum, @arrows);
        }
    }
}

# we are now finished with our subroutines.

# the main part of the program starts here!

# first, read in the two filenames from the runtime arguments
my $seq1 = readFile($ARGV[0]);
my $seq2 = readFile($ARGV[1]);

print "Read two sequences:\n";
print "$seq1\n";
print "$seq2\n";
print "\nTheir longest common subsequence is as follows:\n";

# calculate the longest common subsequence
($topScore, @arrows) = LCS($seq1, $seq2);

#printArrows($seq1, $seq2, @arrows);

my $n = length($seq1);
my $m = length($seq2);

# print the longest common subsequence
printLCS($seq1, $n, $m, 1, @arrows);


