#file: PWMScore.pl #input: text file of 19 nucleotide sequence strings #output: calculates matrix similarity score for input sequence strings compared to position weight matrix of bona fide dioxin response elements $file1 = shift; open FILE1, "<$file1" or die "cannot open file1:$!"; @ci = qw(34 33 15 15 24 33 55 100 100 100 100 100 57 38 37 25 37 23 21); #defines the array of Ci vectors $pwm{1} = [7, 53, 33, 7]; #defines hash of arrays containing nucleotide distribution frequencies [A, C, G, T] for each position {1-19} $pwm{2} = [33, 40, 27, 0]; $pwm{3} = [27, 20, 33, 20]; $pwm{4} = [20, 27, 33, 20]; $pwm{5} = [7, 40, 20, 27]; $pwm{6} = [7, 13, 20, 60]; $pwm{7} = [2, 7, 0, 73]; $pwm{8} = [0, 0, 100, 0]; $pwm{9} = [0, 100, 0, 0]; $pwm{10} = [0, 0, 100, 0]; $pwm{11} = [0, 0, 0, 100]; $pwm{12} = [0, 0, 100, 0]; $pwm{13} = [47, 53, 0, 0]; $pwm{14} = [13, 40, 47, 0]; $pwm{15} = [60, 27, 7, 7]; $pwm{16} = [47, 20, 27, 7]; $pwm{17} = [20, 27, 53, 0]; $pwm{18} = [33, 40, 20, 7]; $pwm{19} = [7, 27, 33, 33]; open OUT, ">>matrixscore.txt"; #opens file for output while () { #the meat and potato(e)s chomp; @seq = $_ =~ /\w{1}/g; $i=1; foreach (@seq) { if ($_ eq a) {$score = $score+(($pwm{$i}[0]) * ($ci[($i-1)])) } elsif ($_ eq c) {$score = $score+(($pwm{$i}[1]) * ($ci[($i-1)])) } elsif ($_ eq g) {$score = $score+(($pwm{$i}[2]) * ($ci[($i-1)])) } elsif ($_ eq t) {$score = $score+(($pwm{$i}[3]) * ($ci[($i-1)])) } $i++; } $matrix_similarity_score = $score / 72858; print OUT "$matrix_similarity_score\t$_\n"; $score = 0; }