annotate external_tools/linux/lib/hh/scripts/Align.pm @ 6:2277dd59b9f9 draft

Uploaded
author hammock
date Wed, 01 Nov 2017 05:54:28 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
1 # Package Align.pl
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
2 # (c) Johannes Soeding, 2006
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
3 # Perl functions for Smith-Waterman and Needleman-Wunsch sequence alignment
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
4
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
5 # HHsuite version 2.0
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
6 #
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
7 # Reference:
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
8 # Remmert M., Biegert A., Hauser A., and Soding J.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
9 # HHblits: Lightning-fast iterative protein sequence searching by HMM-HMM alignment.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
10 # Nat. Methods, epub Dec 25, doi: 10.1038/NMETH.1818 (2011).
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
11
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
12 # (C) Johannes Soeding and Michael Remmert, 2012
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
13
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
14 # This program is free software: you can redistribute it and/or modify
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
15 # it under the terms of the GNU General Public License as published by
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
16 # the Free Software Foundation, either version 3 of the License, or
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
17 # (at your option) any later version.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
18
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
19 # This program is distributed in the hope that it will be useful,
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
20 # but WITHOUT ANY WARRANTY; without even the implied warranty of
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
22 # GNU General Public License for more details.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
23
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
24 # You should have received a copy of the GNU General Public License
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
25 # along with this program. If not, see <http://www.gnu.org/licenses/>.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
26
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
27 # We are very grateful for bug reports! Please contact us at soeding@genzentrum.lmu.de
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
28
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
29 #############################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
30 # Subroutine AlignSW
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
31 # Smith-Waterman local alignment
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
32 # usage:
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
33 # 1. Use global variables of package Align.pm:
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
34 # $score = &AlignSW();
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
35 # printf(" XSEQ: $Align::xseq\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
36 # printf(" MATCH: $Align::Sstr\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
37 # printf(" YSEQ: $Align::yseq\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
38 # etc.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
39 #
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
40 # 2. Use references and/or global variables
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
41 # $score = &AlignSW(\$xseq,\$yseq);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
42 # $score = &AlignNW(\$xseq,\$yseq,\@i,\@j,\$imin,\$imax,\$jmin,\$jmax,\$Sstr,\@S);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
43 # printf(" XSEQ: $xseq\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
44 # printf(" MATCH: $Sstr\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
45 # printf(" YSEQ: $yseq\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
46 #
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
47 # Input: $xseq, $yseq : sequences x and y as strings
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
48 # Param: $main::d : gap opening penalty
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
49 # $main::e : gap extension penalty
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
50 # Output: return value : bit score
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
51 # $xseq, $yseq : aligned residues of x and y (with - as gap)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
52 # @i : $i[$col],$j[$col] are aligned residues in column $col
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
53 # @j : (first is 1 (NOT 0!), 0 means gap)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
54 # $imin : first aligned residue of sequence x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
55 # $imax : last aligned residue of sequence x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
56 # $jmin : first aligned residue of sequence y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
57 # $jmax : last aligned residue of sequence y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
58 # $Sstr : string belonging to $xseq and $yseq showing quality of alignment
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
59 # $S[$col] : match score for aligning positions $i[$col] and $j[$col]
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
60 #############################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
61
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
62 #############################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
63 # Subroutine AlignNW
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
64 # Needleman-Wunsch global alignment
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
65 # usage: $score = &AlignNW();
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
66 # $score = &AlignNW(\$xseq,\$yseq);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
67 # $score = &AlignNW(\$xseq,\$yseq,\@i,\@j);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
68 # $score = &AlignNW(\$xseq,\$yseq,\@i,\@j,\$imin,\$imax,\$jmin,\$jmax,\$Sstr,\@S);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
69 #
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
70 # Input: $xseq, $yseq : sequences x and y as strings
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
71 # Param: $main::d : gap opening penalty
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
72 # $main::e : gap extension penalty
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
73 # $main::g : end gap penalty
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
74 # Output: return value : bit score
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
75 # $xseq, $yseq : aligned residues of x and y (with - as gap)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
76 # @i : $i[$col],$j[$col] are aligned residues in column $col
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
77 # @j : (first is 1 (NOT 0!), 0 means gap)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
78 # $imin : first aligned residue of sequence x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
79 # $imax : last aligned residue of sequence x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
80 # $jmin : first aligned residue of sequence y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
81 # $jmax : last aligned residue of sequence y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
82 # $Sstr : string belonging to $xseq and $yseq showing quality of alingment
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
83 # $S[$col] : match score for aligning positions $i[$col] and $j[$col]
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
84 #############################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
85
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
86 package Align;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
87
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
88 use strict;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
89 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
90 use Exporter;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
91 our @ISA = qw(Exporter);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
92 our @EXPORT = qw(&AlignSW &AlignNW $matrix);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
93
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
94 our $xseq; # first sequence
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
95 our $yseq; # second sequence
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
96 our $ri; # reference to input array: $i[$col] -> $ri->[$col]
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
97 our $rj; # reference to input array: $j[$col] -> $rj->[$col]
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
98 our $imin; # first aligned residue of sequence x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
99 our $imax; # last aligned residue of sequence x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
100 our $jmax; # first aligned residue of sequence y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
101 our $jmin; # last aligned residue of sequence y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
102 our $Sstr; # $Sstr annotates the match quality
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
103 our $rS; # reference $rS->[$col] -> $S[$col] = match score for aligning positions $i[$col] and $j[$col]
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
104 our $matrix;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
105
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
106 my $firstcall=1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
107 my @Sab; # Substitution matrix in bit
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
108 # A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
109 my @ch2i=( 0, 3, 4, 3, 6,13, 7, 8, 9,20,11,10,12, 2,20,14, 5, 1,15,16, 4,19,17,20,18, 6);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
110 my @Gonnet = (
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
111 # A R N D C Q E G H I L K M F P S T W Y V X
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
112 # The Gonnet matrix is in units of 10*log10()
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
113 [ 2.4,-0.6,-0.3,-0.3, 0.5,-0.2, 0.0, 0.5,-0.8,-0.8,-1.2,-0.4,-0.7,-2.3, 0.3, 1.1, 0.6,-3.6,-2.2, 0.1,-1.0,-9.9], # A
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
114 [-0.6, 4.7, 0.3,-0.3,-2.2, 1.5, 0.4,-1.0, 0.6,-2.4,-2.2, 2.7,-1.7,-3.2,-0.9,-0.2,-0.2,-1.6,-1.8,-2.0,-1.0,-9.9], # R
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
115 [-0.3, 0.3, 3.8, 2.2,-1.8, 0.7, 0.9, 0.4, 1.2,-2.8,-3.0, 0.8,-2.2,-3.1,-0.9, 0.9, 0.5,-3.6,-1.4,-2.2,-1.0,-9.9], # N
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
116 [-0.3,-0.3, 2.2, 4.7,-3.2, 0.9, 2.7, 0.1, 0.4,-3.8,-4.0, 0.5,-3.0,-4.5,-0.7, 0.5, 0.0,-5.2,-2.8,-2.9,-1.0,-9.9], # D
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
117 [ 0.5,-2.2,-1.8,-3.2,11.5,-2.4,-3.0,-2.0,-1.3,-1.1,-1.5,-2.8,-0.9,-0.8,-3.1, 0.1,-0.5,-1.0,-0.5, 0.0,-1.0,-9.9], # C
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
118 [-0.2, 1.5, 0.7, 0.9,-2.4, 2.7, 1.7,-1.0, 1.2,-1.9,-1.6, 1.5,-1.0,-2.6,-0.2, 0.2, 0.0,-2.7,-1.7,-1.5,-1.0,-9.9], # Q
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
119 [ 0.0, 0.4, 0.9, 2.7,-3.0, 1.7, 3.6,-0.8, 0.4,-2.7,-2.8, 1.2,-2.0,-3.9,-0.5, 0.2,-0.1,-4.3,-2.7,-1.9,-1.0,-9.9], # E
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
120 [ 0.5,-1.0, 0.4, 0.1,-2.0,-1.0,-0.8, 6.6,-1.4,-4.5,-4.4,-1.1,-3.5,-5.2,-1.6, 0.4,-1.1,-4.0,-4.0,-3.3,-1.0,-9.9], # G
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
121 [-0.8, 0.6, 1.2, 0.4,-1.3, 1.2, 0.4,-1.4, 6.0,-2.2,-1.9, 0.6,-1.3,-0.1,-1.1,-0.2,-0.3,-0.8,-2.2,-2.0,-1.0,-9.9], # H
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
122 [-0.8,-2.4,-2.8,-3.8,-1.1,-1.9,-2.7,-4.5,-2.2, 4.0, 2.8,-2.1, 2.5, 1.0,-2.6,-1.8,-0.6,-1.8,-0.7, 3.1,-1.0,-9.9], # I
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
123 [-1.2,-2.2,-3.0,-4.0,-1.5,-1.6,-2.8,-4.4,-1.9, 2.8, 4.0,-2.1, 2.8, 2.0,-2.3,-2.1,-1.3,-0.7, 0.0, 1.8,-1.0,-9.9], # L
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
124 [-0.4, 2.7, 0.8, 0.5,-2.8, 1.5, 1.2,-1.1, 0.6,-2.1,-2.1, 3.2,-1.4,-3.3,-0.6, 0.1, 0.1,-3.5,-2.1,-1.7,-1.0,-9.9], # K
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
125 [-0.7,-1.7,-2.2,-3.0,-0.9,-1.0,-2.0,-3.5,-1.3, 2.5, 2.8,-1.4, 4.3, 1.6,-2.4,-1.4,-0.6,-1.0,-0.2, 1.6,-1.0,-9.9], # M
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
126 [-2.3,-3.2,-3.1,-4.5,-0.8,-2.6,-3.9,-5.2,-0.1, 1.0, 2.0,-3.3, 1.6, 7.0,-3.8,-2.8,-2.2, 3.6, 5.1, 0.1,-1.0,-9.9], # F
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
127 [ 0.3,-0.9,-0.9,-0.7,-3.1,-0.2,-0.5,-1.6,-1.1,-2.6,-2.3,-0.6,-2.4,-3.8, 7.6, 0.4, 0.1,-5.0,-3.1,-1.8,-1.0,-9.9], # P
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
128 [ 1.1,-0.2, 0.9, 0.5, 0.1, 0.2, 0.2, 0.4,-0.2,-1.8,-2.1, 0.1,-1.4,-2.8, 0.4, 2.2, 1.5,-3.3,-1.9,-1.0,-1.0,-9.9], # S
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
129 [ 0.6,-0.2, 0.5, 0.0,-0.5, 0.0,-0.1,-1.1,-0.3,-0.6,-1.3, 0.1,-0.6,-2.2, 0.1, 1.5, 2.5,-3.5,-1.9, 0.0,-1.0,-9.9], # T
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
130 [-3.6,-1.6,-3.6,-5.2,-1.0,-2.7,-4.3,-4.0,-0.8,-1.8,-0.7,-3.5,-1.0, 3.6,-5.0,-3.3,-3.5,14.2, 4.1,-2.6,-1.0,-9.9], # W
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
131 [-2.2,-1.8,-1.4,-2.8,-0.5,-1.7,-2.7,-4.0,-2.2,-0.7, 0.0,-2.1,-0.2, 5.1,-3.1,-1.9,-1.9, 4.1, 7.8,-1.1,-1.0,-9.9], # Y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
132 [ 0.1,-2.0,-2.2,-2.9, 0.0,-1.5,-1.9,-3.3,-2.0, 3.1, 1.8,-1.7, 1.6, 0.1,-1.8,-1.0, 0.0,-2.6,-1.1, 3.4,-1.0,-9.9], # V
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
133 [-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,+1.0,-9.9], # X
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
134 [-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9,-9.9] # ~
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
135 );
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
136
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
137 # A R N D C Q E G H I L K M F P S T W Y V X
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
138 my @BLOSUM62 = (
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
139 [ 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0, 0,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
140 [-1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
141 [-2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
142 [-2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
143 [ 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-2,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
144 [-1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
145 [-1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
146 [ 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
147 [-2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
148 [-1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
149 [-1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
150 [-1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
151 [-1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
152 [-2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
153 [-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-2,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
154 [ 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2, 0,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
155 [ 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0, 0,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
156 [-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-2,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
157 [-2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
158 [ 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
159 [ 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,+1,-9],
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
160 [-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9]
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
161 );
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
162
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
163 # print("Substitution matrix:\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
164 # for ($a=0; $a<=20; $a++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
165 # for ($b=0; $b<=20; $b++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
166 # printf("%6.1f ",$Sab[$a][$b]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
167 # }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
168 # printf("\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
169 # }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
170
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
171
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
172 # Set substitution matrix in bits (do only at first call of one of the alignment routines)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
173 sub SetSubstitutionMatrix {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
174 if ($firstcall) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
175 # Transform to bits;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
176 if (defined($matrix) && $matrix eq "Gonnet") {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
177 for (my $a=0; $a<=20; ++$a) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
178 for (my $b=0; $b<=20; ++$b) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
179 $Sab[$a][$b] = $Gonnet[$a][$b]*0.3322; # 1*log(10)/log(2);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
180 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
181 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
182 } elsif (defined($matrix) && $matrix eq "Blosum62") {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
183 {printf("Using Blosum62 matrix...\n");}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
184 for (my $a=0; $a<=20; $a++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
185 for (my $b=0; $b<=20; $b++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
186 $Sab[$a][$b] = $BLOSUM62[$a][$b];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
187 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
188 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
189 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
190 for (my $a=0; $a<20; ++$a) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
191 for (my $b=0; $b<20; ++$b) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
192 $Sab[$a][$b] = -1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
193 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
194 $Sab[$a][$a] = 2;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
195 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
196 for (my $b=0; $b<=20; ++$b) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
197 $Sab[20][$b] = $Sab[$b][20] = 0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
198 $Sab[21][$b] = $Sab[$b][21] = -10;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
199 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
200 $Sab[20][20] = $Sab[20][20] = +1;# if in doubt, match X with X
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
201 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
202
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
203 $firstcall=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
204 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
205 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
206
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
207 # maxbt(val1,...,valx,\$bt) finds maximum of values and puts index of maximum into $bt
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
208 sub maxbt {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
209 my $rbt=pop @_; # last element of @_ is address of $bt
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
210 my $max = shift;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
211 my $i=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
212 $$rbt = 0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
213 foreach $_ (@_) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
214 $i++;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
215 if ($_>$max) {$max=$_; $$rbt=$i;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
216 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
217 return $max;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
218 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
219
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
220 # max3bt(val1,val2,val3,\$bt) finds maximum of values and puts index of maximum into $bt
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
221 sub max3bt {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
222 if ($_[1] < $_[0]) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
223 if ($_[2] < $_[0]) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
224 ${$_[3]}=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
225 return $_[0];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
226 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
227 ${$_[3]}=2;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
228 return $_[2];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
229 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
230 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
231 if ($_[2] < $_[1]) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
232 ${$_[3]}=1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
233 return $_[1];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
234 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
235 ${$_[3]}=2;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
236 return $_[2];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
237 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
238 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
239 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
240
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
241 # max2bt(val1,val2,\$bt) finds maximum of values and puts index of maximum into $bt
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
242 sub max2bt {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
243 if ($_[1] < $_[0]) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
244 ${$_[2]}=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
245 return $_[0];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
246 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
247 ${$_[2]}=1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
248 return $_[1];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
249 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
250 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
251
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
252
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
253 #############################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
254 # Subroutien AlignSW
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
255 # Smith-Waterman local alignment
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
256 #############################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
257 sub AlignSW {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
258 if (@_>=1) {$xseq=$_[0];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
259 if (@_>=2) {$yseq=$_[1];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
260 if (@_>=3) {$ri=$_[2];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
261 if (@_>=4) {$rj=$_[3];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
262 if (@_>=5) {$imin=$_[4];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
263 if (@_>=6) {$imax=$_[5];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
264 if (@_>=7) {$jmin=$_[6];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
265 if (@_>=8) {$jmax=$_[7];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
266 if (@_>=9) {$Sstr=$_[8];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
267 if (@_>=10) {$rS=$_[9];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
268
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
269 if (length($$xseq)<1) {warn ("ERROR in Align.pm: sequence x is empty\n"); return 0;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
270 if (length($$yseq)<1) {warn ("ERROR in Align.pm: sequence x is empty\n"); return 0;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
271
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
272 my @xchr; # ASCII characters of $xseq
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
273 my @ychr; # ASCII characters of $yseq
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
274 my @xres; # internal integer representation of residues of x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
275 my @yres; # internal integer representation of residues of y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
276
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
277 $$xseq =~ s/\s//g;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
278 $$yseq =~ s/\s//g;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
279 @xchr = split(//,$$xseq);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
280 @ychr = split(//,$$yseq);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
281
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
282 my $Lx=@xchr; # length of sequence x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
283 my $Ly=@ychr; # length of sequence y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
284 my @M; # $M[a][b] = score of best alignment of x[1..a] and y[1..b] ending in match state
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
285 my @A; # $A[a][b] = score of best alignment of x[1..a] and y[1..b] ending in gap in x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
286 my @B; # $B[a][b] = score of best alignment of x[1..a] and y[1..b] ending in gap in y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
287 my @Mbt; # $Mbt[a][b] = 0:STOP 1:M 2:A 3:B
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
288 my @Abt; # $Abt[a][b] = 0:A 1:M
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
289 my @Bbt; # $Bbt[a][b] = 0:B 1:M
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
290 my $score; # bit score of alignment
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
291 my $bt; # backtracing variable set by &maxbt: which argument was largest? (first=0)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
292 my $state; # STOP:0 M:1 A:2 B:3
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
293 my ($i, $j); # indices for sequence x and y, respectively
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
294
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
295 my $dx = $main::dx;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
296 my $dy = $main::dy;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
297 if (! defined $dx) {$dx = $main::d;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
298 if (! defined $dy) {$dy = $main::d;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
299
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
300 # Transform @xres and @yres to integer
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
301 for ($i=0; $i<@xchr; $i++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
302 my $a=ord(uc($xchr[$i]));
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
303 if ($a<65 || $a>90) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
304 if ($a!=ord(".") && $a!=ord("-") && $a!=ord("~")) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
305 printf(STDERR "\nWARNING: invalid symbol '%s' in pos $i of first sequence to be aligned\n",$xchr[$i]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
306 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
307 $xres[$i]=21;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
308 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
309 $xres[$i]=$ch2i[$a-65];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
310 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
311 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
312 for ($j=0; $j<@ychr; $j++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
313 my $a=ord(uc($ychr[$j]));
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
314 if ($a<65 || $a>90) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
315 if ($a!=ord(".") && $a!=ord("-") && $a!=ord("~")) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
316 printf(STDERR "\nWARNING: invalid symbol '%s' in pos $j of second sequence to be aligned\n",$ychr[$j]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
317 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
318 $yres[$j]=21;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
319 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
320 $yres[$j]=$ch2i[$a-65];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
321 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
322 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
323 unshift (@xres,21); unshift (@xchr," "); # insert dummy 0'th element
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
324 unshift (@yres,21); unshift (@ychr," "); # insert dummy 0'th element
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
325
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
326 &SetSubstitutionMatrix;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
327
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
328 # Initialization
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
329 for ($i=0; $i<=$Lx; $i++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
330 $M[$i][0]=-999; $A[$i][0]=-999; $B[$i][0]=-999;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
331 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
332 for ($j=1; $j<=$Ly; $j++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
333 $M[0][$j]=-999; $A[0][$j]=-999; $B[0][$j]=-999;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
334 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
335
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
336 # Iteration
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
337 for ($i=1; $i<=$Lx; ++$i) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
338 my $Mi =$M[$i];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
339 my $Mi1=$M[$i-1];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
340 my $Ai =$A[$i];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
341 my $Ai1=$A[$i-1];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
342 my $Bi =$B[$i];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
343 my $Bi1=$B[$i-1];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
344 my $Sabx=$Sab[$xres[$i]];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
345 my $j1=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
346 for ($j=1; $j<=$Ly; ++$j, ++$j1) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
347 ${$Mi}[$j] = max3bt(${$Mi1}[$j1], ${$Ai1}[$j1], ${$Bi1}[$j1], \$Mbt[$i][$j]) + ${$Sabx}[$yres[$j]];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
348 ${$Ai}[$j] = max2bt(${$Ai}[$j1]-$main::e, ${$Mi}[$j1]-$dx, \$Abt[$i][$j]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
349 ${$Bi}[$j] = max2bt(${$Bi1}[$j]-$main::e, ${$Mi1}[$j]-$dy, \$Bbt[$i][$j]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
350 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
351 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
352
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
353 # Finding maximum
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
354 $score = -1000;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
355 for ($i=1; $i<=$Lx; $i++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
356 my $Mi =$M[$i];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
357 for ($j=1; $j<=$Ly; $j++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
358 if (${$Mi}[$j]>$score) {$score=${$Mi}[$j]; $$imax=$i; $$jmax=$j;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
359 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
360 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
361
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
362 # Backtracing
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
363 @$ri=();
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
364 @$rj=();
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
365 @$rS=();
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
366 $state=1; # last state is M
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
367 $i=$$imax; $j=$$jmax;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
368 $$xseq=""; $$yseq="";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
369 while ($state) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
370 if ($state==1) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
371 # current state is M (match-match)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
372 unshift(@$ri,$i);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
373 unshift(@$rj,$j);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
374 $state = $Mbt[$i][$j];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
375 $$xseq=$xchr[$i].$$xseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
376 $$yseq=$ychr[$j].$$yseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
377 unshift(@$rS, $Sab[$xres[$i]][$yres[$j]]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
378 $$imin=$i; $$jmin=$j;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
379 $i--; $j--;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
380 } elsif ($state==2) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
381 # current state is A (gap in x)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
382 unshift(@$ri,0);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
383 unshift(@$rj,$j);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
384 $$xseq="-".$$xseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
385 $$yseq=$ychr[$j].$$yseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
386 $bt = $Abt[$i][$j--];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
387 if ($bt) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
388 # previous state was M
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
389 unshift(@$rS,-$dx);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
390 $state = 1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
391 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
392 # previous state was A
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
393 unshift(@$rS,-$main::e);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
394 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
395 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
396 # current state is B (gap in y)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
397 unshift(@$ri,$i);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
398 unshift(@$rj,0);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
399 $$xseq=$xchr[$i].$$xseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
400 $$yseq="-".$$yseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
401 $bt = $Bbt[$i--][$j];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
402 if ($bt) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
403 # previous state was M
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
404 unshift(@$rS,-$dy);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
405 $state = 1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
406 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
407 # previous state was B
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
408 unshift(@$rS,-$main::e);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
409 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
410 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
411 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
412
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
413 # Set annotation string representing match quality
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
414 $$Sstr="";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
415 for (my $col=0; $col<@$ri; $col++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
416 if ($xres[$ri->[$col]] eq $yres[$rj->[$col]]) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
417 $$Sstr.=uc($xchr[$ri->[$col]]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
418 } elsif ($rS->[$col] > 0 ) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
419 $$Sstr.="+";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
420 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
421 $$Sstr.=".";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
422 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
423 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
424 return $score;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
425 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
426
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
427
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
428 #############################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
429 # Subroutien AlignNW
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
430 # Needleman-Wunsch global alignment
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
431 #############################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
432 sub AlignNW {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
433 if (@_>=1) {$xseq=$_[0];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
434 if (@_>=2) {$yseq=$_[1];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
435 if (@_>=3) {$ri=$_[2];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
436 if (@_>=4) {$rj=$_[3];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
437 if (@_>=5) {$imin=$_[4];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
438 if (@_>=6) {$imax=$_[5];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
439 if (@_>=7) {$jmin=$_[6];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
440 if (@_>=8) {$jmax=$_[7];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
441 if (@_>=9) {$Sstr=$_[8];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
442 if (@_>=10) {$rS=$_[9];}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
443
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
444 if (length($$xseq)<1) {warn ("ERROR in Align.pm: sequence x is empty\n"); return 0;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
445 if (length($$yseq)<1) {warn ("ERROR in Align.pm: sequence x is empty\n"); return 0;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
446
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
447 my @xchr; # ASCII characters of $xseq
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
448 my @ychr; # ASCII characters of $yseq
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
449 my @xres; # internal integer representation of residues of x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
450 my @yres; # internal integer representation of residues of y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
451
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
452 $$xseq =~ s/\s//g;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
453 $$yseq =~ s/\s//g;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
454 @xchr = split(//,$$xseq);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
455 @ychr = split(//,$$yseq);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
456
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
457 my $Lx=@xchr; # length of sequence x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
458 my $Ly=@ychr; # length of sequence y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
459 my @M; # $M[a][b] = score of best alignment of x[1..a] and y[1..b] ending in match state
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
460 my @A; # $A[a][b] = score of best alignment of x[1..a] and y[1..b] ending in gap in x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
461 my @B; # $B[a][b] = score of best alignment of x[1..a] and y[1..b] ending in gap in y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
462 my @Mbt; # $Mbt[a][b] = 0:STOP 1:M 2:A 3:B
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
463 my @Abt; # $Abt[a][b] = 0:A 1:M
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
464 my @Bbt; # $Bbt[a][b] = 0:B 1:M
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
465 my $score; # bit score of alignment
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
466 my $bt; # backtracing variable set by &maxbt: which argument was largest? (first=0)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
467 my $state; # STOP:0 M:1 A:2 B:3
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
468 my ($i, $j); # indices for sequence x and y, respectively
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
469
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
470 my $dx = $main::dx;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
471 my $dy = $main::dy;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
472 if (! defined $dx) {$dx = $main::d;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
473 if (! defined $dy) {$dy = $main::d;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
474 printf("dx=%f dy=%f\n",$dx,$dy); ##############DEBUG#############
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
475
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
476 # Transform @xres and @yres to integer
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
477 for ($i=0; $i<@xchr; $i++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
478 my $a=ord(uc($xchr[$i]));
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
479 if ($a<65 || $a>90) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
480 if ($a!=ord(".") && $a!=ord("-") && $a!=ord("~")) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
481 printf(STDERR "\nWARNING: invalid symbol '%s' in pos $i of first sequence to be aligned\n",$xchr[$i]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
482 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
483 $xres[$i]=21;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
484 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
485 $xres[$i]=$ch2i[$a-65];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
486 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
487 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
488 for ($j=0; $j<@ychr; $j++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
489 my $a=ord(uc($ychr[$j]));
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
490 if ($a<65 || $a>90) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
491 if ($a!=ord(".") && $a!=ord("-") && $a!=ord("~")) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
492 printf(STDERR "\nWARNING: invalid symbol '%s' in pos $j of second sequence to be aligned\n",$ychr[$j]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
493 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
494 $yres[$j]=21;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
495 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
496 $yres[$j]=$ch2i[$a-65];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
497 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
498 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
499 unshift (@xres,21); unshift (@xchr," "); # insert dummy 0'th element
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
500 unshift (@yres,21); unshift (@ychr," "); # insert dummy 0'th element
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
501
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
502 &SetSubstitutionMatrix;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
503
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
504 # Initialization
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
505 $M[0][0]=$A[0][0]=$B[0][0]=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
506 for ($i=1; $i<=$Lx; $i++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
507 $M[$i][0] = -999;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
508 $A[$i][0] = -999;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
509 $B[$i][0] = -$i*$main::g;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
510 $Bbt[$i][0] = 0; # previous state was B as well (gap in y)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
511 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
512 for ($j=1; $j<=$Ly; $j++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
513 $M[0][$j] = -999;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
514 $A[0][$j] = -$j*$main::g;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
515 $B[0][$j] = -999;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
516 $Abt[0][$j] = 0; # previous state was A as well (gap in x)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
517 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
518
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
519 # Iteration
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
520 for ($i=1; $i<=$Lx; ++$i) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
521 my $Mi =$M[$i];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
522 my $Mi1=$M[$i-1];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
523 my $Ai =$A[$i];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
524 my $Ai1=$A[$i-1];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
525 my $Bi =$B[$i];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
526 my $Bi1=$B[$i-1];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
527 my $Sabx=$Sab[$xres[$i]];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
528 my $j1=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
529 for ($j=1; $j<=$Ly; ++$j, ++$j1) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
530 ${$Mi}[$j] = max3bt(${$Mi1}[$j1], ${$Ai1}[$j1], ${$Bi1}[$j1], \$Mbt[$i][$j]) + ${$Sabx}[$yres[$j]];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
531 ${$Ai}[$j] = max2bt(${$Ai}[$j1]-$main::e, ${$Mi}[$j1]-$dx, \$Abt[$i][$j]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
532 ${$Bi}[$j] = max2bt(${$Bi1}[$j]-$main::e, ${$Mi1}[$j]-$dy, \$Bbt[$i][$j]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
533 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
534 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
535
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
536 # Finding maximum
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
537 $score = -1000;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
538 for ($i=1; $i<=$Lx; $i++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
539 my $endgappenalty = ($Lx-$i)*$main::g;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
540 if ($M[$i][$Ly]-$endgappenalty > $score) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
541 $score=$M[$i][$Ly]-$endgappenalty; $$imax=$i; $$jmax=$Ly; $state = 1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
542 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
543 if ($A[$i][$Ly]-$endgappenalty > $score) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
544 $score=$A[$i][$Ly]-$endgappenalty; $$imax=$i; $$jmax=$Ly; $state = 2;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
545 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
546 if ($B[$i][$Ly]-$endgappenalty > $score) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
547 $score=$B[$i][$Ly]-$endgappenalty; $$imax=$i; $$jmax=$Ly; $state = 3;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
548 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
549 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
550 for ($j=1; $j<$Ly; $j++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
551 my $endgappenalty = ($Ly-$j)*$main::g;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
552 if ($M[$Lx][$j]-$endgappenalty > $score) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
553 $score=$M[$Lx][$j]-$endgappenalty; $$imax=$Lx; $$jmax=$j; $state = 1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
554 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
555 if ($A[$Lx][$j]-$endgappenalty > $score) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
556 $score=$A[$Lx][$j]-$endgappenalty; $$imax=$Lx; $$jmax=$j; $state = 2;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
557 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
558 if ($B[$Lx][$j]-$endgappenalty > $score) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
559 $score=$B[$Lx][$j]-$endgappenalty; $$imax=$Lx; $$jmax=$j; $state = 3;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
560 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
561 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
562
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
563 # Make sure the end gapped regions are also backtraced
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
564 if ($$jmax<$Ly) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
565 $Abt[$Lx][$$jmax+1] = $state;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
566 for ($j=$$jmax+2; $j<=$Ly; $j++) {$Abt[$Lx][$j] = 0;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
567 $state = 2;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
568 } elsif ($$imax<$Lx) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
569 $Bbt[$$imax+1][$Ly] = $state;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
570 for ($i=$$imax+2; $i<=$Lx; $i++) {$Bbt[$i][$Ly] = 0;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
571 $state = 3;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
572 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
573 $state = 1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
574 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
575
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
576
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
577
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
578 # Backtracing
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
579 @$ri=();
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
580 @$rj=();
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
581
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
582 @$rS=();
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
583 $i=$Lx; $j=$Ly;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
584 $$xseq=""; $$yseq="";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
585 while ($i || $j) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
586 if ($state==1) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
587 # current state is M (match-match)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
588 unshift(@$ri,$i);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
589 unshift(@$rj,$j);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
590 $state = $Mbt[$i][$j]+1; # previous state
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
591 $$xseq=$xchr[$i].$$xseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
592 $$yseq=$ychr[$j].$$yseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
593 unshift(@$rS, $Sab[$xres[$i]][$yres[$j]]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
594 $$imin=$i; $$jmin=$j;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
595 $i--; $j--;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
596 } elsif ($state==2) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
597 # current state is A (gap in x)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
598 unshift(@$ri,0); # $ri->[$col]=0 for gap in $x
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
599 unshift(@$rj,$j);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
600 $$xseq="-".$$xseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
601 $$yseq=$ychr[$j].$$yseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
602 $bt = $Abt[$i][$j--];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
603 if ($bt) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
604 # previous state was M
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
605 if ($i==$Lx || $i==0) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
606 unshift(@$rS,-$main::g); # end gap
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
607 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
608 unshift(@$rS,-$dx); # gap opening
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
609 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
610 $state = 1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
611 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
612 # previous state was A
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
613 if ($i==$Lx || $i==0) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
614 unshift(@$rS,-$main::g); # end gap
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
615 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
616 unshift(@$rS,-$main::e); # gap extension
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
617 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
618 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
619 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
620 # current state is B (gap in y)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
621 unshift(@$ri,$i);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
622 unshift(@$rj,0); # $j[$col]=0 for gap in $y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
623 $$xseq=$xchr[$i].$$xseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
624 $$yseq="-".$$yseq;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
625 $bt = $Bbt[$i--][$j];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
626 if ($bt) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
627 # previous state was M
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
628 if ($j==$Ly || $j==0) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
629 unshift(@$rS,-$main::g); # end gap
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
630 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
631 unshift(@$rS,-$dy); # gap opening
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
632 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
633 $state = 1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
634 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
635 # previous state was B
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
636 if ($j==$Ly || $j==0) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
637 unshift(@$rS,-$main::g); # end gap
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
638 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
639 unshift(@$rS,-$main::e); # gap extension
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
640 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
641 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
642 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
643 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
644
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
645 # Set annotation string representing match quality
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
646 $$Sstr="";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
647 for (my $col=0; $col<@$ri; $col++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
648 if ($xres[$ri->[$col]] eq $yres[$rj->[$col]]) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
649 $$Sstr.=uc($xchr[$ri->[$col]]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
650 } elsif ($rS->[$col] > 0 ) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
651 $$Sstr.="+";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
652 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
653 $$Sstr.=".";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
654 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
655 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
656 return $score;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
657 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
658
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
659 1;