annotate microsatellite_birthdeath.pl @ 0:4e31fad3f08e draft

Uploaded tool tarball.
author devteam
date Wed, 25 Sep 2013 11:26:02 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/perl -w
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2 use strict;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3 use warnings;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4 use Term::ANSIColor;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
5 use Pod::Checker;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
6 use File::Basename;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
7 use IO::Handle;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
8 use Cwd;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
9 use File::Path qw(make_path remove_tree);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
10 use File::Temp qw/ tempfile tempdir /;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
11 my $tdir = tempdir( CLEANUP => 1 );
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
12 chdir $tdir;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
13 my $dir = getcwd;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
14 # print "current dit=$dir\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
15 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
16 use vars qw (%treesToReject %template $printer $interr_poscord $interrcord $no_of_interruptionscord $stringfile @tags
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
17 $infocord $typecord $startcord $strandcord $endcord $microsatcord $motifcord $sequencepos $no_of_species
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
18 $gapcord %thresholdhash $tree_decipherer @sp_ident %revHash %sameHash %treesToIgnore %alternate @exactspecies_orig @exactspecies @exacttags
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
19 $mono_flanksimplicityRepno $di_flanksimplicityRepno $prop_of_seq_allowedtoAT $prop_of_seq_allowedtoCG);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
20 use FileHandle;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
21 use IO::Handle; # 5.004 or higher
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
22
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
23
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
24
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
25 #my @ar = ("/Users/ydk/work/rhesus_microsat/results/galay/chr22_5sp.maf.txt", "/Users/ydk/work/rhesus_microsat/results/galay/dataset_11.dat",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
26 #"/Users/ydk/work/rhesus_microsat/results/galay/chr22_5spec.maf.summ","hg18,panTro2,ponAbe2,rheMac2,calJac1","((((hg18, panTro2), ponAbe2), rheMac2), calJac1)","9,10,12,12",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
27 #"10","0.8");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
28 my @ar = @ARGV;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
29 my ($maf, $orth, $summout, $species_set, $tree_definition, $thresholds, $FLANK_SUPPORT, $SIMILARITY_THRESH) = @ar;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
30 $SIMILARITY_THRESH=$SIMILARITY_THRESH/100;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
31 #########################
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
32 $SIMILARITY_THRESH = $SIMILARITY_THRESH/100;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
33 my $EDGE_DISTANCE = 10;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
34 my $COMPLEXITY_SUPPORT = 20;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
35 load_thresholds("9_10_12_12");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
36 my $FLANKINDEL_MAXTHRESH = 0.3;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
37
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
38 my $mono_flanksimplicityRepno=6;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
39 my $di_flanksimplicityRepno=10;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
40 my $prop_of_seq_allowedtoAT=0.5;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
41 my $prop_of_seq_allowedtoCG=0.66;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
42
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
43 #########################
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
44 my $tspecies_set = $species_set;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
45
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
46 my %speciesReplacement = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
47 my %speciesReplacementTag = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
48 my %replacementArr= ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
49 my %replacementArrTag= ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
50 my %backReplacementArr= ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
51 my %backReplacementArrTag= ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
52 $tree_definition=~s/\s+//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
53
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
54 my $tree_definition_split = $tree_definition;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
55 $tree_definition_split =~ s/[\(\)]//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
56 my @gotSpecies = ($tree_definition_split =~ /(,)/g);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
57 # print "gotSpecies = @gotSpecies\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
58
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
59 if (scalar(@gotSpecies)+1 ==5){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
60
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
61 $speciesReplacement{1}="calJac1";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
62 $speciesReplacement{2}="rheMac2";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
63 $speciesReplacement{3}="ponAbe2";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
64 $speciesReplacement{4}="panTro2";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
65 $speciesReplacement{5}="hg18";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
66
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
67 $speciesReplacementTag{1}="M";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
68 $speciesReplacementTag{2}="R";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
69 $speciesReplacementTag{3}="O";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
70 $speciesReplacementTag{4}="C";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
71 $speciesReplacementTag{5}="H";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
72 $species_set="hg18,panTro2,ponAbe2,rheMac2,calJac1";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
73
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
74 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
75 if (scalar(@gotSpecies)+1 ==4){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
76
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
77 $speciesReplacement{1}="rheMac2";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
78 $speciesReplacement{2}="ponAbe2";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
79 $speciesReplacement{3}="panTro2";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
80 $speciesReplacement{4}="hg18";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
81
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
82 $speciesReplacementTag{1}="R";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
83 $speciesReplacementTag{2}="O";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
84 $speciesReplacementTag{3}="C";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
85 $speciesReplacementTag{4}="H";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
86 $species_set="hg18,panTro2,ponAbe2,rheMac2";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
87
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
88 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
89
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
90 # $tree_definition = "((((hg18,panTro2),ponAbe2),rheMac2),calJac1)";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
91 my $tree_definition_copy = $tree_definition;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
92 my $tree_definition_orig = $tree_definition;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
93 my $brackets = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
94
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
95 while (1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
96 #last if $tree_definition_copy !~ /\(/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
97 $brackets++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
98 # print "brackets = $brackets\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
99 last if $brackets > 6;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
100 $tree_definition_copy =~ s/^\(//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
101 $tree_definition_copy =~ s/\)$//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
102 # print "tree_definition_copy = $tree_definition_copy\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
103 my @arr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
104
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
105 if ($tree_definition_copy =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_\(\),]+)\)$/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
106 @arr = $tree_definition_copy =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_\(\),]+)$/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
107 # print "arr = @arr\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
108 $tree_definition_copy = $2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
109 $replacementArr{$1} = $speciesReplacement{$brackets};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
110 $backReplacementArr{$speciesReplacement{$brackets}}=$1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
111
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
112 $replacementArrTag{$1} = $speciesReplacementTag{$brackets};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
113 $backReplacementArrTag{$speciesReplacementTag{$brackets}}=$1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
114 # print "replacing $1 with $replacementArr{$1}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
115
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
116 $sp_ident[$brackets-1] = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
117
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
118 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
119 elsif ($tree_definition_copy =~ /^\(([a-zA-Z0-9_\(\),]+),([a-zA-Z0-9_]+)$/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
120 @arr = $tree_definition_copy =~ /^([a-zA-Z0-9_\(\),]+),([a-zA-Z0-9_]+)$/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
121 # print "arr = @arr\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
122 $tree_definition_copy = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
123 $replacementArr{$2} = $speciesReplacement{$brackets};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
124 $backReplacementArr{$speciesReplacement{$brackets}}=$2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
125
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
126 $replacementArrTag{$2} = $speciesReplacementTag{$brackets};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
127 $backReplacementArrTag{$speciesReplacementTag{$brackets}}=$2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
128 # print "replacing $2 with $replacementArr{$2}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
129
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
130 $sp_ident[$brackets-1] = $2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
131 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
132 elsif ($tree_definition_copy =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_]+)$/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
133 @arr = $tree_definition_copy =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_]+)$/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
134 # print "arr = @arr .. TERMINAL\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
135 $tree_definition_copy = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
136 $replacementArr{$2} = $speciesReplacement{$brackets};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
137 $replacementArr{$1} = $speciesReplacement{$brackets+1};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
138 $backReplacementArr{$speciesReplacement{$brackets}}=$2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
139 $backReplacementArr{$speciesReplacement{$brackets+1}}=$1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
140
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
141 $replacementArrTag{$1} = $speciesReplacementTag{$brackets+1};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
142 $backReplacementArrTag{$speciesReplacementTag{$brackets+1}}=$1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
143
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
144 $replacementArrTag{$2} = $speciesReplacementTag{$brackets};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
145 $backReplacementArrTag{$speciesReplacementTag{$brackets}}=$2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
146 # print "replacing $1 with $replacementArr{$1}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
147 # print "replacing $2 with $replacementArr{$2}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
148 # print "replacing $1 with $replacementArrTag{$1}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
149 # print "replacing $2 with $replacementArrTag{$2}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
150
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
151 $sp_ident[$brackets-1] = $2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
152 $sp_ident[$brackets] = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
153
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
154
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
155 last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
156
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
157 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
158 elsif ($tree_definition_copy =~ /^\(([a-zA-Z0-9_\(\),]+),([a-zA-Z0-9_\(\),]+)\)$/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
159 $tree_definition_copy =~ s/^\(//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
160 $tree_definition_copy =~ s/\)$//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
161 $brackets--;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
162 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
163 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
164
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
165 foreach my $key (keys %replacementArr){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
166 my $replacement = $replacementArr{$key};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
167 $tree_definition =~ s/$key/$replacement/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
168 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
169 @sp_ident = reverse(@sp_ident);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
170 # print "modified tree_definition = $tree_definition\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
171 # print "done .. tree_definition = $tree_definition\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
172 # print "sp_ident = @sp_ident\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
173 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
174
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
175
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
176 my $complexity=int($COMPLEXITY_SUPPORT * (1/40));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
177
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
178 #print "complexity=$complexity\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
179 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
180
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
181 $printer = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
182
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
183 my $rando = int(rand(1000));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
184 my $localdate = `date`;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
185 $localdate =~ /([0-9]+):([0-9]+):([0-9]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
186 my $info = $rando.$1.$2.$3;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
187
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
188 #---------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
189 # GETTING INPUT INFORMATION AND OPENING INPUT AND OUTPUT FILES
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
190
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
191
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
192 my @thresharr = (0, split(/,/,$thresholds));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
193 my $randno=int(rand(100000));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
194 my $megamatch = $randno.".megamatch.net.axt"; #"/gpfs/home/ydk104/work/rhesus_microsat/axtNet/hg18.panTro2.ponAbe2.rheMac2.calJac1/chr1.hg18.panTro2.ponAbe2.rheMac2.calJac1.net.axt";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
195 my $megamatchlck = $megamatch.".lck";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
196 unlink $megamatchlck;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
197
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
198 my $selected= $orth;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
199 #my $eventfile = $orth;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
200 $selected = $selected."_SELECTED";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
201 #$selected = $selected."_".$SIMILARITY_THRESH;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
202 #my $runtime = $selected.".runtime";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
203
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
204 my $inputtags = "H:C:O:R:M";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
205 $inputtags = $ARGV[3] if exists $ARGV[3] && $ARGV[3] =~ /[A-Z]:[A-Z]/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
206
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
207 my @all_tags = split(/:/, $inputtags);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
208 my $inputsp = "hg18:panTro2:ponAbe2:rheMac2:calJac1";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
209 $inputsp = $ARGV[4] if exists $ARGV[4] && $ARGV[3] =~ /[0-9]+:/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
210 #@sp_ident = split(/:/,$inputsp);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
211 my $junkfile = $orth."_junk";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
212
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
213 my $sh = load_sameHash(1);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
214 my $rh = load_revHash(1);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
215
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
216 #print "inputs are : \n"; foreach(@ARGV){print $_,"\n";}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
217 #open (SELECT, ">$selected") or die "Cannot open selected file: $selected: $!";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
218 open (SUMMARY, ">$summout") or die "Cannot open summout file: $summout: $!";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
219 #open (RUN, ">$runtime") or die "Cannot open orth file: $runtime: $!";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
220 #my $ctlfile = "baseml\.ctl"; #$ARGV[4];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
221 #my $treefile = "/gpfs/home/ydk104/work/rhesus_microsat/codes/lib/"; #1 THIS IS THE THE TREE UNDER CONSIDERATION, IN NEWICK
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
222 my %registeredTrees = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
223 my @removalReasons =
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
224 ("microsatellite is compound",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
225 "complex structure",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
226 "if no. if micros is more than no. of species",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
227 "if more than one micro per species ",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
228 "if microsat contains N",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
229 "different motif than required ",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
230 "more than zero interruptions",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
231 "microsat could not form key ",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
232 "orthologous microsats of different motif size ",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
233 "orthologous microsats of different motifs ",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
234 "microsats belong to different alignment blocks altogether",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
235 "microsat near edge",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
236 "microsat in low complexity region",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
237 "microsat flanks dont align well",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
238 "phylogeny not informative");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
239 my %allowedhash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
240 #---------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
241 # WORKING ON MAKING THE MEGAMATCH FILE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
242 my $chromt=int(rand(10000));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
243 my $p_chr=$chromt;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
244
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
245 my $tree_definition_orig_copy = $tree_definition_orig;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
246
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
247 $tree_definition=~s/,/, /g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
248 $tree_definition =~ s/, +/, /g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
249 $tree_definition_orig=~s/,/, /g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
250 $tree_definition_orig =~ s/, +/, /g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
251 my @exactspeciesset_unarranged = split(/,/,$species_set);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
252 my @exactspeciesset_unarranged_orig = split(/,/,$tspecies_set);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
253 my $largesttree = "$tree_definition;";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
254 my $largesttree_orig = "$tree_definition_orig;";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
255 # print "largesttree = $largesttree\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
256 $tree_definition =~ s/\(//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
257 $tree_definition =~ s/\)//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
258 $tree_definition=~s/[\)\(, ]/\t/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
259 $tree_definition =~ s/\t+/\t/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
260
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
261 $tree_definition_orig =~ s/\(//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
262 $tree_definition_orig =~ s/\)//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
263 $tree_definition_orig =~s/[\)\(, ]/\t/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
264 $tree_definition_orig =~ s/\t+/\t/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
265 # print "tree_definition = $tree_definition tree_definition_orig = $tree_definition_orig\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
266
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
267 my @treespecies=split(/\t+/,$tree_definition);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
268 my @treespecies_orig=split(/\t+/,$tree_definition_orig);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
269 # print "tree_definition = $tree_definition .. treespecies=@treespecies ... treespecies_orig=@treespecies_orig\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
270 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
271
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
272 foreach my $spec (@treespecies){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
273 foreach my $espec (@exactspeciesset_unarranged){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
274 # print "spec=$spec and espec=$espec\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
275 push @exactspecies, $spec if $spec eq $espec;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
276 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
277 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
278
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
279 foreach my $spec (@treespecies_orig){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
280 foreach my $espec (@exactspeciesset_unarranged_orig){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
281 # print "spec=$spec and espec=$espec\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
282 push @exactspecies_orig, $spec if $spec eq $espec;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
283 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
284 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
285
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
286 my $focalspec = $exactspecies[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
287 my $focalspec_orig = $exactspecies_orig[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
288 # print "exactspecies=@exactspecies ... focalspec=$focalspec\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
289 # print "texactspecies=@exactspecies_orig ... focalspec_orig=$focalspec_orig\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
290 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
291 my $arranged_species_set = join(".",@exactspecies);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
292 my $arranged_species_set_orig = join(".",@exactspecies_orig);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
293
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
294 @exacttags=@exactspecies;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
295 my @exacttags_orig=@exactspecies_orig;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
296
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
297 foreach my $extag (@exacttags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
298 $extag =~ s/hg18/H/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
299 $extag =~ s/panTro2/C/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
300 $extag =~ s/ponAbe2/O/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
301 $extag =~ s/rheMac2/R/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
302 $extag =~ s/calJac1/M/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
303 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
304
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
305 foreach my $extag (@exacttags_orig){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
306 $extag =~ s/hg18/H/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
307 $extag =~ s/panTro2/C/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
308 $extag =~ s/ponAbe2/O/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
309 $extag =~ s/rheMac2/R/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
310 $extag =~ s/calJac1/M/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
311 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
312
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
313 my $chr_name = join(".",("chr".$p_chr),$arranged_species_set, "net", "axt");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
314 #print "sending to maftoAxt_multispecies: $maf, $tree_definition, $chr_name, $species_set .. focalspec=$focalspec \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
315
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
316 maftoAxt_multispecies($maf, $tree_definition_orig_copy, $chr_name, $tspecies_set);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
317 #print "made files\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
318 my @filterseqfiles= ($chr_name);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
319 $largesttree =~ s/hg18/H/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
320 $largesttree =~ s/panTro2/C/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
321 $largesttree =~ s/ponAbe2/O/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
322 $largesttree =~ s/rheMac2/R/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
323 $largesttree =~ s/calJac1/M/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
324 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
325 #---------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
326
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
327 my ($lagestnodes, $largestbranches) = get_nodes($largesttree);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
328 shift (@$lagestnodes);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
329 my @extendedtitle=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
330
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
331 my $title = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
332 my $parttitle = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
333 my @titlearr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
334 my @firsttitle=($focalspec_orig."chrom", $focalspec_orig."start", $focalspec_orig."end", $focalspec_orig."motif", $focalspec_orig."motifsize", $focalspec_orig."threshold");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
335
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
336 my @finames= qw(chr start end motif motifsize microsat mutation mutation.position mutation.from mutation.to insertion.details deletion.details);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
337
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
338 my @fititle=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
339
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
340 foreach my $spec (split(",",$tspecies_set)){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
341 push @fititle, $spec;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
342 foreach my $name (@finames){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
343 push @fititle, $spec.".".$name;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
344 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
345 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
346
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
347
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
348 my @othertitle=qw(somechr somestart somened event source);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
349
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
350 my @fnames = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
351 push @fnames, qw(insertions_num deletions_num motinsertions_num motinsertionsf_num motdeletions_num motdeletionsf_num noninsertions_num nondeletions_num) ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
352 push @fnames, qw(binsertions_num bdeletions_num bmotinsertions_num bmotinsertionsf_num bmotdeletions_num bmotdeletionsf_num bnoninsertions_num bnondeletions_num) ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
353 push @fnames, qw(dinsertions_num ddeletions_num dmotinsertions_num dmotinsertionsf_num dmotdeletions_num dmotdeletionsf_num dnoninsertions_num dnondeletions_num) ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
354 push @fnames, qw(ninsertions_num ndeletions_num nmotinsertions_num nmotinsertionsf_num nmotdeletions_num nmotdeletionsf_num nnoninsertions_num nnondeletions_num) ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
355 push @fnames, qw(substitutions_num bsubstitutions_num dsubstitutions_num nsubstitutions_num indels_num subs_num);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
356
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
357 my @fullnames = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
358 # print "revising\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
359 # print "H = $backReplacementArrTag{H}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
360 # print "C = $backReplacementArrTag{C}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
361 # print "O = $backReplacementArrTag{O}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
362 # print "R = $backReplacementArrTag{R}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
363 # print "M = $backReplacementArrTag{M}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
364
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
365 foreach my $lnode (@$lagestnodes){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
366 my @pair = @$lnode;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
367 my @nodemutarr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
368 for my $p (@pair){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
369 # print "p = $p\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
370 $p =~ s/[\(\), ]+//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
371 $p =~ s/([A-Z])/$1./g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
372 $p =~ s/\.$//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
373
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
374 $p =~ s/H/$backReplacementArrTag{H}/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
375 $p =~ s/C/$backReplacementArrTag{C}/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
376 $p =~ s/O/$backReplacementArrTag{O}/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
377 $p =~ s/R/$backReplacementArrTag{R}/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
378 $p =~ s/M/$backReplacementArrTag{M}/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
379 foreach my $n (@fnames) { push @fullnames, $p.".".$n;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
380 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
381 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
382
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
383 #print SUMMARY "#",join("\t", @firsttitle, @fititle, @othertitle);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
384
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
385 #print SUMMARY "\t",join("\t", @fullnames);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
386 my $header = join("\t",@firsttitle, @fititle, @othertitle, @fullnames, @fnames, "tree", "cleancase");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
387 # print "header= $header\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
388 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
389
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
390 #print SUMMARY "\t",join("\t", @fnames);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
391 #$title= $title."\t".join("\t", @fnames);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
392
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
393 #print SUMMARY "\t","tree","\t", "cleancase", "\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
394 #$title= $title."\t"."tree"."\t"."cleancase". "\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
395
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
396 #print $title; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
397
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
398 #print "all_tags = @all_tags\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
399
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
400 for my $no (3 ... $#all_tags+1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
401 # print "no=$no\n"; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
402 @tags = @all_tags[0 ... $no-1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
403 # print "all_tags=>@all_tags< , tags = >@tags<\n" if $printer == 1; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
404 %template=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
405 my @nextcounter = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
406 #next if scalar(@tags) < 4;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
407 # print "now doing tags = @tags, no = $no\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
408 open (ORTH, "<$orth") or die "Cannot open orth file: $orth: $!";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
409
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
410 # print SUMMARY join "\t", qw (species chr start end branch motif microsat mutation position from to insertion deletion);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
411
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
412
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
413 ##################### T E M P O R A R Y #####################
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
414 my @finaltitle=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
415 my @singletitle = qw (species chr start end motif motifsize microsat strand microsatsize col10 col11 col12 col13);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
416 my $endtitle = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
417 foreach my $tag (@tags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
418 my @tempsingle = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
419
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
420 foreach my $single (@singletitle){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
421 push @tempsingle, $tag.$single;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
422 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
423 @finaltitle = (@finaltitle, @tempsingle);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
424 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
425
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
426 # print SUMMARY join("\t",@finaltitle),"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
427
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
428 #############################################################
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
429
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
430 #---------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
431 # GET THE TREE FROM TREE FILE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
432 my $tree = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
433 $tree = "((H, C), O)" if $no == 3;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
434 $tree = "(((H, C), O), R)" if $no == 4;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
435 $tree = "((((H, C), O), R), M)" if $no == 5;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
436 # $tree=~s/;$//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
437 # print "our tree = $tree\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
438 #---------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
439 # LOADING HASH CONTAINING ALL POSSIBLE TREES:
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
440 $tree_decipherer = "/gpfs/home/ydk104/work/rhesus_microsat/codes/lib/tree_analysis_".join("",@tags).".txt";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
441 %template=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
442 %alternate=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
443 load_allPossibleTrees($tree_decipherer, \%template, \%alternate);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
444
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
445 #---------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
446 # LOADING THE TREES TO REJECT FOR BIRTH ANALYSIS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
447 %treesToReject=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
448 %treesToIgnore=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
449 load_treesToReject(@tags);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
450 load_treesToIgnore(@tags);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
451 #---------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
452 # LOADING INPUT DATA INTO HASHES AND ARRAYS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
453
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
454
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
455 #1 THIS IS THE POINT WHERE WE CAN FILTER OUT LARGE MICROSAT CLUSTERS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
456 #2 AS WELL AS MULTIPLE-ALIGNMENT-BLOCKS-SPANNING MICROSATS (KIND OF
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
457 #3 IMPLICIT IN THE FIRST PART OF THE SENTENCE ITSELF IN MOST CASES).
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
458
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
459 my %orths=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
460 my $counterm = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
461 my $loaded = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
462 my %seen = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
463 my @allowedchrs = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
464 # print "no = $no\n"; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
465
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
466 while (my $line = <ORTH>){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
467 # print "line=$line\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
468 my $register1 = $line =~ s/>$exactspecies_orig[0]/>$replacementArrTag{$exactspecies_orig[0]}/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
469 my $register2 = $line =~ s/>$exactspecies_orig[1]/>$replacementArrTag{$exactspecies_orig[1]}/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
470 my $register3 = $line =~ s/>$exactspecies_orig[2]/>$replacementArrTag{$exactspecies_orig[2]}/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
471 my $register4 = $line =~ s/>$exactspecies_orig[3]/>$replacementArrTag{$exactspecies_orig[3]}/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
472 my $register5 = $line =~ s/>$exactspecies_orig[4]/>$replacementArrTag{$exactspecies_orig[4]}/g if exists $exactspecies_orig[4];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
473
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
474 # print "line = $line\n"; <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
475
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
476
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
477 # next if $register1 + $register2 + $register3 + $register4 + $register5 > scalar(@tags);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
478 my @micros = split(/>/,$line); # LOADING ALL THE MICROSAT ENTRIES FROM THE CLUSTER INTO @micros
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
479 #print "micros=",printarr(@micros),"\n"; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
480 shift @micros; # EMPTYING THE FIRST, EMTPY ELEMENT OF THE ARRAY
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
481
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
482 $no_of_species = adjustCoordinates($micros[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
483 # print "A: $no_of_species\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
484 next if $no_of_species != $no;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
485 # print "no = $no ... no_of_species=$no_of_species\n";#<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
486 $counterm++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
487 #------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
488 $nextcounter[0]++ if $line =~ /compound/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
489 next if $line =~ /compound/; # GETTING RID OF COMPOUND MICROSATS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
490 #------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
491 #next if $line =~ /[A-Za-z]>[a-zA-Z]/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
492 #------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
493 chomp $line;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
494 my $match_count = ($line =~ s/>/>/g); # COUNTING THE NUMBER OF MICROSAT ENTRIES IN THE CLUSTER
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
495 #print "number of species = $match_count\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
496 my $stopper = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
497 foreach my $mic (@micros){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
498 my @local = split(/\t/,$mic);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
499 if ($local[$typecord] =~ /\./ || exists($local[$no_of_interruptionscord+2])) {$stopper = 1; $nextcounter[1]++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
500 last; }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
501 # REMOVING CLUSTERS WITH THE CYRPTIC, (UNRESOLVABLY COMPLEX) MICROSAT ENTRIES IN THEM
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
502 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
503 next if $stopper ==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
504 #------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
505 $nextcounter[2]++ if (scalar(@micros) >$no_of_species);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
506
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
507 next if (scalar(@micros) >$no_of_species); #1 REMOVING MICROSAT CLUSTERS WITH MORE NUMBER OF MICROSAT ENTRIES THAN THE NUMBER OF SPECIES IN THE DATASET.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
508 #2 THIS IS SO BECAUSE SUCH CLUSTERS IMPLY THAT IN AT LEAST ONE SPECIES, THERE IS MORE THAN ONE MICROSAT ENTRY
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
509 #3 IN THE CLUSTER. THUS, HERE WE ARE GETTING RID OF MICROSATS CLUSTERS THAT INCLUDE MULTUPLE, NEIGHBORING
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
510 #4 MICROSATS, AND STICK TO CLEAN MICROSATS THAT DO NOT HAVE ANY MICROSATS IN NEIGHBORHOOD.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
511 #5 THIS 'NEIGHBORHOOD-RANGE' HAD BEEN DECIDED PREVIOUSLY IN OUR CODE multiSpecies_orthFinder4.pl
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
512 my $nexter = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
513 foreach my $tag (@tags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
514 my $tagcount = ($line =~ s/>$tag\t/>$tag\t/g);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
515 if ($tagcount > 1) { $nexter =1; #print colored ['red'],"multiple entires per species : $tagcount of $tag\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
516 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
517 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
518 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
519
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
520 if ($nexter == 1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
521 $nextcounter[3]++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
522 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
523 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
524 #------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
525 foreach my $mic (@micros){ #1 REMOVING MICROSATELLITES WITH ANY 'N's IN THEM
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
526 my @local = split(/\t/,$mic);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
527 if ($local[$microsatcord] =~ /N/) {$stopper =1; $nextcounter[4]++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
528 last;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
529 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
530 next if $stopper ==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
531 #print "till here 1\n"; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
532 #------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
533 my @micros_copy = @micros;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
534
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
535 my $tempmicro = shift(@micros_copy); #1 CURRENTLY OBTAINING INFORMATION FOR THE FIRST
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
536 #2 MICROSAT IN THE CLUSTER.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
537 my @tempfields = split(/\t/,$tempmicro);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
538 my $prevtype = $tempfields[$typecord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
539 my $tempmotif = $tempfields[$motifcord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
540
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
541 my $tempfirstmotif = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
542 if (scalar(@tempfields) > $microsatcord + 2){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
543 if ($tempfields[$no_of_interruptionscord] >= 1) { #1 DISCARDING MICROSATS WITH MORE THAN ZERO INTERRUPTIONS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
544 #2 IN THE FIRST MICROSAT OF THE CLUSTER
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
545 $nexter =1; #print colored ['blue'],"more than one interruptions \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
546 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
547 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
548 if ($nexter == 1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
549 $nextcounter[6]++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
550 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
551 } #1 DONE OBTAINING INFORMATION REGARDING
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
552 #2 THE FIRST MICROSAT FROM THE CLUSTER
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
553
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
554 if ($tempmotif =~ /^\[/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
555 $tempmotif =~ s/^\[//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
556 $tempmotif =~ /([a-zA-Z]+)\].*/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
557 $tempfirstmotif = $1; #1 OBTAINING THE FIRTS MOTIF OF MICROSAT
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
558 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
559 else {$tempfirstmotif = $tempmotif;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
560 my $prevmotif = $tempfirstmotif;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
561
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
562 my $key = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
563 # print "searching temp micro for 0-9 $focalspec chr0-9a-zA-Z 0-9 0-9 \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
564 # print "tempmicro = $tempmicro .. looking for ([0-9]+)\s+($focalspec_orig)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\n"; <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
565 if ($tempmicro =~ /([0-9]+)\s+($focalspec_orig)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
566 # print "B: $no_of_species\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
567 $key = join("_",$2, $3, $4, $5);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
568 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
569 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
570 # print "counld not form a key for temp\n"; # if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
571 $nextcounter[7]++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
572 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
573 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
574 #----------------- #1 NOW, AFTER OBTAINING INFORMATION ABOUT
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
575 #2 THE FIRST MICROSAT IN THE CLUSTER, THE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
576 #3 FOLLOWING LOOP GOES THROUGH THE OTHER MICROSATS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
577 #4 TO SEE IF THEY SHARE THE REQUIRED FEATURES (BELOW)
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
578
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
579 foreach my $micro (@micros_copy){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
580 my @fields = split(/\t/,$micro);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
581 #-----------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
582 if (scalar(@fields) > $microsatcord + 2){ #1 DISCARDING MICROSATS WITH MORE THAN ONE INTERRUPTIONS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
583 if ($fields[$no_of_interruptionscord] >= 1) {$nexter =1; #print colored ['blue'],"more than one interruptions \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
584 $nextcounter[6]++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
585 last; }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
586 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
587 #-----------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
588 if (($prevtype ne "0") && ($prevtype ne $fields[$typecord])) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
589 $nexter =1; #print colored ['yellow'],"microsat of different type \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
590 $nextcounter[8]++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
591 last; } #1 DISCARDING MICROSAT CLUSTERS WHERE MICROSATS BELONG
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
592 #----------------- #2 TO DIFFERENT TYPES (MONOS, DIS, TRIS ETC.)
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
593 $prevtype = $fields[$typecord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
594
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
595 my $motif = $fields[$motifcord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
596 my $firstmotif = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
597
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
598 if ($motif =~ /^\[/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
599 $motif =~ s/^\[//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
600 $motif =~ /([a-zA-Z]+)\].*/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
601 $firstmotif = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
602 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
603 else {$firstmotif = $motif;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
604
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
605 my $motifpattern = $firstmotif.$firstmotif;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
606 my $prevmotifpattern = $prevmotif.$prevmotif;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
607
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
608 if (($prevmotif ne "0")&&(($motifpattern !~ /$prevmotif/i)||($prevmotifpattern !~ /$firstmotif/i)) ) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
609 $nexter =1; #print colored ['green'],"different motifs used \n$line\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
610 $nextcounter[9]++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
611 last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
612 } #1 DISCARDING MICROSAT CLUSTERS WHERE MICROSATS BELONG
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
613 #2 TO DIFFERENT MOTIFS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
614 my $prevmotif = $firstmotif;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
615 #-----------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
616
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
617 for my $t (0 ... $#tags){ #1 DISCARDING MICROSAT CLUSTERS WHERE MICROSAT ENTRIES BELONG
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
618 #2 DIFFERENT ALIGNMENT BLOCKS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
619 if ($micro =~ /([0-9]+)\s+($focalspec_orig)\s([_0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
620 my $key2 = join("_",$2, $3, $4, $5);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
621 # print "key = $key .. key2 = $key2\n"; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
622 if ($key2 ne $key){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
623 # print "microsats belong to diffferent alignment blocks altogether\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
624 $nextcounter[10]++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
625 $nexter = 1; last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
626 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
627 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
628 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
629 # print "counld not form a key for $line\n"; # if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
630 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
631 $nexter = 1; last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
632 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
633 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
634 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
635 #print "D2: $no_of_species\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
636
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
637 #####################
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
638 if ($nexter == 1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
639 # print "nexting\n"; # if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
640 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
641 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
642 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
643 # print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n$key:\n$line\nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
644 push (@{$orths{$key}},$line);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
645 $loaded++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
646 if ($line =~ /($focalspec_orig)\s([_a-zA-Z0-9]+)\s([0-9]+)\s([0-9]+)/ ) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
647
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
648 # print "$line\n" if $printer == 1; #if $line =~ /Contig/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
649 # print "################ ################\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
650 push @allowedchrs, $2 if !exists $allowedhash{$2};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
651 $allowedhash{$2} = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
652 my $key = join("\t",$1, $2, $3, $4);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
653 # print "C: $no_of_species .. key = $key\n";#<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
654 # print "print the shit: $key\n" ; #if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
655 $seen{$key} = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
656 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
657 else { #print "Key could not be formed in SPUT for ($focalspec_orig) (chrom) ([0-9]+) ([0-9]+)\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
658 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
659 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
660 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
661 close ORTH;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
662 # print "now studying where we lost microsatellites: @nextcounter\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
663 for my $reason (0 ... $#nextcounter){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
664 #print $removalReasons[$reason]."\t".$nextcounter[$reason],"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
665 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
666 # print "\ntotal number of keys formed = ", scalar(keys %orths), " = \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
667 # print "done filtering .. counterm = $counterm and loaded = $loaded\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
668 #----------------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
669 # NOW GENERATING THE ALIGNMENT FILE WITH RELELEVENT ALIGNMENTS STORED ONLY.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
670 # print "adding files @filterseqfiles \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
671 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
672
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
673 while (1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
674 if (-e $megamatchlck){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
675 # print "waiting to write into $megamatchlck\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
676 sleep 10;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
677 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
678 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
679 open (MEGAMLCK, ">$megamatchlck") or die "Cannot open megamatchlck file $megamatchlck: $!";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
680 open (MEGAM, ">$megamatch") or die "Cannot open megamatch file $megamatch: $!";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
681 last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
682 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
683 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
684 foreach my $seqfile (@filterseqfiles){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
685 my $fullpath = $seqfile;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
686 open (MATCH, "<$fullpath") or die "Cannot open MATCH file $fullpath: $!";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
687 my $matchlines = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
688
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
689 while (my $line = <MATCH>) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
690 #print "checking $line";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
691 if ($line =~ /($focalspec_orig)\s([a-zA-Z0-9]+)\s([0-9]+)\s([0-9]+)/ ) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
692 my $key = join("\t",$1, $2, $3, $4);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
693 # print "key = $key\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
694 #print "------------------------------------------------------\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
695 #print "asking $line\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
696 if (exists $seen{$key}){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
697
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
698 #print "seen $line \n"; <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
699 while (1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
700 $matchlines++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
701 print MEGAM $line;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
702 $line = <MATCH>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
703 print MEGAM "\n" if $line !~ /[0-9a-zA-Z]/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
704 last if $line !~/[0-9a-zA-Z]/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
705 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
706 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
707 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
708 # print "not seen\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
709 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
710 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
711 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
712 # print "matchlines = $matchlines\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
713 close MATCH;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
714 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
715 close MEGAMLCK;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
716
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
717 unlink $megamatchlck;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
718 close MEGAM;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
719 undef %seen;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
720 # print "done writitn to $megamatch\n";#<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
721 #----------------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
722 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
723 #---------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
724 # NOW, AFTER FILTERING MANY MICROSATS, AND LOADING THE FILTERED ONES INTO
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
725 # THE HASH %orths , WE GO THROUGH THE ALIGNMENT FILE, AND STUDY THE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
726 # FLANKING SEQUENCES OF ALL THESE MICROSATS, TO FILTER THEM FURTHER
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
727 #$printer = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
728
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
729 my $microreadcounter=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
730 my $contigsentered=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
731 my $contignotrightcounter=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
732 my $keynotformedcounter=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
733 my $keynotfoundcounter= 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
734 my $dotcounter = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
735
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
736 # print "opening $megamatch\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
737
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
738 open (BO, "<$megamatch") or die "Cannot open alignment file: $megamatch: $!";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
739 # print "doing $megamatch\n " ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
740
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
741 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
742
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
743 while (my $line = <BO>){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
744 # print $line; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
745 # print "." if $dotcounter % 100 ==0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
746 # print "\n" if $dotcounter % 5000 ==0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
747 # print "dotcounter = $dotcounter\n " if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
748 next if $line !~ /^[0-9]+/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
749 $dotcounter++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
750 # print colored ['green'], "~" x 60, "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
751 # print colored ['green'], $line;# if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
752 chomp $line;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
753 my @fields2 = split(/\t/,$line);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
754 my $key2 = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
755 my $alignment_no = (); #1 TEMPORARY
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
756 if ($line =~ /([0-9]+)\s+($focalspec_orig)\s([_\-s0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
757 # $key2 = join("\t",$1, $2, $4, $5);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
758 $key2 = join("_",$2, $3, $4, $5);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
759 # print "key = $key2\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
760
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
761 # print "key = $key2\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
762 $alignment_no=$1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
763 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
764 else {print "seq line $line incompatible\n"; $keynotformedcounter++; next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
765
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
766 $no_of_species = adjustCoordinates($line);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
767
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
768 $contignotrightcounter++ if $no_of_species != $no;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
769 # print "contignotrightcounter=$contignotrightcounter\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
770 # print "no_of_species=$no_of_species\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
771 # print "no=$no\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
772
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
773 next if $no_of_species != $no;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
774 # print "D: $no_of_species\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
775 # print "E: $no_of_species\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
776 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
777 # print "key = $key2\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
778 my @clusters = (); #1 EXTRACTING MICROSATS CORRESPONDING TO THIS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
779 #2 ALIGNMENT BLOCK
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
780 if (exists($orths{$key2})){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
781 @clusters = @{$orths{$key2}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
782 $contigsentered++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
783 delete $orths{$key2};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
784 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
785 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
786 # print "orth does not exist\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
787 $keynotfoundcounter++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
788 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
789 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
790
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
791 my %sequences=(); #1 WILL STORE SEQUENCES IN THE CURRENT ALIGNMENT BLOCK
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
792 my $humseq = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
793 foreach my $tag (@tags){ #1 READING THE ALIGNMENT FILE AND CAPTURING SEQUENCES
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
794 my $seq = <BO>; #2 OF ALL SPECIES.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
795 chomp $seq;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
796 $sequences{$tag} = " ".$seq;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
797 #print "sequences = $sequences{$tag}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
798 $humseq = $seq if $tag =~ /H/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
799 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
800
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
801
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
802 foreach my $cluster (@clusters){ #1 NOW, GOING THROUGH THE CLUSTER OF MICROSATS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
803 # print "x" x 60, "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
804 # print colored ['red'],"cluster = $cluster\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
805 $largesttree =~ s/hg18/H/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
806 $largesttree =~ s/panTro2/C/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
807 $largesttree =~ s/ponAbe2/O/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
808 $largesttree =~ s/rheMac2/R/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
809 $largesttree =~ s/calJac1/M/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
810
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
811 $microreadcounter++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
812 my @micros = split(/>/,$cluster);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
813
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
814
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
815
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
816
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
817
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
818
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
819 shift @micros;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
820
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
821 my $edge_microsat=0; #1 THIS WILL HAVE VALUE "1" IF MICROSAT IS FOUND
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
822 #2 TO BE TOO CLOSE TO THE EDGES OF ALIGNMENT BLOCK
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
823
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
824 my @starts= (); my %start_hash=(); #1 STORES THE START AND END COORDINATES OF MICROSATELLITES
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
825 my @ends = (); my %end_hash=(); #2 SO THAT LATER, WE WILL BE ABLE TO FIND THE EXTREME
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
826 #3 COORDINATE VALUES OF THE ORTHOLOGOUS MIROSATELLITES.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
827
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
828 my %microhash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
829 my %microsathash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
830 my %nonmicrosathash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
831 my $motif=(); #1 BASIC MOTIF OF THE MICROSATELLITE.. THERE'S ONLY 1
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
832 # print "tags=@tags\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
833 for my $i (0 ... $#tags){ #1 FINDING THE MICROSAT, AND THE ALIGNMENT SEQUENCE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
834 #2 CORRESPONDING TO THE PARTICULAR SPECIES (AS PER
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
835 #3 THE VARIABLE $TAG;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
836 my $tag = $tags[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
837 # print $seq;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
838 my $locus="NULL"; #1 THIS WILL STORE THE MICROSAT OF THIS SPECIES.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
839 #2 IF THERE IS NO MICROSAT, IT WILL REMAIN "NULL"
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
840
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
841 foreach my $micro (@micros){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
842 # print "micro=$micro, tag=$tag\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
843 if ($micro =~ /^$tag/){ #1 MICROSAT OF THIS SPECIES FOUND..
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
844 $locus = $micro;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
845 my @fields = split(/\t/,$micro);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
846 $motif = $fields[$motifcord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
847 $microsathash{$tag}=$fields[$microsatcord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
848 # print "fields=@fields, and startcord=$startcord = $fields[$startcord]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
849 push(@starts, $fields[$startcord]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
850 push(@ends, $fields[$endcord]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
851 $start_hash{$tag}=$fields[$startcord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
852 $end_hash{$tag}=$fields[$endcord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
853 last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
854 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
855 else{$microsathash{$tag}="NULL"}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
856 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
857 $microhash{$tag}=$locus;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
858
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
859 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
860
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
861
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
862
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
863 my $extreme_start = smallest_number(@starts); #1 THESE TWO ARE THE EXTREME COORDINATES OF THE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
864 my $extreme_end = largest_number(@ends); #2 MICROSAT CLUSTER ACCROSS ALL THE SPECIES IN
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
865 #3 WHOM IT IS FOUND TO BE ORTHOLOGOUS.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
866
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
867 # print "starts=@starts... ends=@ends\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
868
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
869 my %up_flanks = (); #1 CONTAINS UPSTEAM FLANKING REGIONS FOR EACH SPECIES
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
870 my %down_flanks = (); #1 CONTAINS DOWNDTREAM FLANKING REGIONS FOR EACH SPECIES
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
871
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
872 my %up_largeflanks = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
873 my %down_largeflanks = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
874
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
875 my %locusandflanks = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
876 my %locusandlargeflanks = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
877
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
878 my %up_internal_flanks=(); #1 CONTAINS SEQUENCE BETWEEN THE $extreme_start and the
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
879 #2 ACTUAL START OF MICROSATELLITE IN THE SPECIES
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
880 my %down_internal_flanks=(); #1 CONTAINS SEQUENCE BETWEEN THE $extreme_end and the
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
881 #2 ACTUAL end OF MICROSATELLITE IN THE SPECIES
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
882
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
883 my %alignment=(); #1 CONTAINS ACTUAL ALIGNMENT SEQUENCE BETWEEN THE TWO
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
884 #2 EXTEME VALUES.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
885
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
886 my %microsatstarts=(); #1 WITHIN EACH ALIGNMENT, IF THERE EXISTS A MICROSATELLITE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
887 #2 THIS HASH CONTAINS THE START SITE OF THE MICROSATELLITE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
888 #3 WIHIN THE ALIGNMENT
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
889 next if !defined $extreme_start;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
890 next if !defined $extreme_end;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
891 next if $extreme_start > length($sequences{$tags[0]});
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
892 next if $extreme_start < 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
893 next if $extreme_end > length($sequences{$tags[0]});
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
894
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
895 for my $i (0 ... $#tags){ #1 NOW THAT WE HAVE GATHERED INFORMATION REGARDING
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
896 #2 SEQUENCE ALIGNMENT AND MICROSATELLITE COORDINATES
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
897 #3 AS WELL AS THE EXTREME COORDINATES OF THE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
898 #4 MICROSAT CLUSTER, WE WILL PROCEED TO EXTRACT THE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
899 #5 FLANKING SEQUENCE OF ALL ORGS, AND STUDY IT IN
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
900 #6 MORE DETAIL.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
901 my $tag = $tags[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
902 # print "tag=$tag.. seqlength = ",length($sequences{$tag})," extreme_start=$extreme_start and extreme_end=$extreme_end\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
903 my $upstream_gaps = (substr($sequences{$tag}, 0, $extreme_start) =~ s/\-/-/g); #1 NOW MEASURING THE NUMBER OF GAPS IN THE UPSTEAM
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
904 #2 AND DOWNSTREAM SEQUENCES OF THE MICROSATs IN THIS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
905 #3 CLUSTER.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
906 # print "seq length $tag = $sequences{$tag} = ",length($sequences{$tag})," extreme_end=$extreme_end\n" ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
907 my $downstream_gaps = (substr($sequences{$tag}, $extreme_end) =~ s/\-/-/g);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
908 if (($extreme_start - $upstream_gaps )< $EDGE_DISTANCE || (length($sequences{$tag}) - $extreme_end - $downstream_gaps) < $EDGE_DISTANCE){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
909 $edge_microsat=1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
910
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
911 last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
912 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
913 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
914 $up_flanks{$tag} = substr($sequences{$tag}, $extreme_start - $FLANK_SUPPORT, $FLANK_SUPPORT);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
915 $down_flanks{$tag} = substr($sequences{$tag}, $extreme_end+1, $FLANK_SUPPORT);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
916
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
917 $up_largeflanks{$tag} = substr($sequences{$tag}, $extreme_start - $COMPLEXITY_SUPPORT, $COMPLEXITY_SUPPORT);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
918 $down_largeflanks{$tag} = substr($sequences{$tag}, $extreme_end+1, $COMPLEXITY_SUPPORT);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
919
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
920
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
921 $alignment{$tag} = substr($sequences{$tag}, $extreme_start, $extreme_end-$extreme_start+1);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
922 $locusandflanks{$tag} = $up_flanks{$tag}."[".$alignment{$tag}."]".$down_flanks{$tag};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
923 $locusandlargeflanks{$tag} = $up_largeflanks{$tag}."[".$alignment{$tag}."]".$down_largeflanks{$tag};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
924
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
925 if ($microhash{$tag} ne "NULL"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
926 $up_internal_flanks{$tag} = substr($sequences{$tag}, $extreme_start , $start_hash{$tag}-$extreme_start);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
927 $down_internal_flanks{$tag} = substr($sequences{$tag}, $end_hash{$tag} , $extreme_end-$end_hash{$tag});
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
928 $microsatstarts{$tag}=$start_hash{$tag}-$extreme_start;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
929 # print "tag = $tag, internal flanks = $up_internal_flanks{$tag} and $down_internal_flanks{$tag} and start = $microsatstarts{$tag}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
930 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
931 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
932 $nonmicrosathash{$tag}=substr($sequences{$tag}, $extreme_start, $extreme_end-$extreme_start+1);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
933
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
934 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
935 # print "up flank for species $tag = $up_flanks{$tag} \ndown flank for species $tag = $down_flanks{$tag} \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
936
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
937 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
938
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
939 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
940 $nextcounter[11]++ if $edge_microsat==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
941 next if $edge_microsat==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
942
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
943
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
944 my $low_complexity = 0; #1 VALUE WILL BE 1 IF ANY OF THE FLANKING REGIONS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
945 #2 IS FOUND TO BE OF LOW COMPLEXITY, BY USING THE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
946 #3 FUNCTION sub test_complexity
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
947
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
948
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
949 for my $i (0 ... $#tags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
950 # print "i = $tags[$i]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
951 if (test_complexity($up_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT) eq "LOW" || test_complexity($down_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT) eq "LOW"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
952 # print "i = $i, low complexity regions: $up_largeflanks{$tags[$i]}: ",test_complexity($up_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT), " and $down_largeflanks{$tags[$i]} = ",test_complexity($down_largeflanks{$tags[$i]}, $COMPLEXITY_SUPPORT),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
953 $low_complexity =1; last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
954 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
955 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
956
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
957 $nextcounter[12]++ if $low_complexity==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
958 next if $low_complexity == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
959
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
960
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
961 my $sequence_dissimilarity = 0; #1 THIS VALYE WILL BE 1 IF THE SEQUENCE SIMILARITY
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
962 #2 BETWEEN ANY OF THE SPECIES AGAINST THE HUMAN
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
963 #3 FLANKING SEQUENCES IS BELOW A CERTAIN THRESHOLD
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
964 #4 AS DESCRIBED IN FUNCTION sub sequence_similarity
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
965 my %donepair = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
966 for my $i (0 ... $#tags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
967 # print "i = $tags[$i]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
968 # next if $i == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
969 # print colored ['magenta'],"THIS IS UP\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
970
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
971 for my $b (0 ... $#tags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
972 next if $b == $i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
973 my $pair = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
974 $pair = $i."_".$b if $i < $b;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
975 $pair = $b."_".$i if $b < $i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
976 next if exists $donepair{$pair};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
977 my ($up_similarity,$upnucdiffs, $upindeldiffs) = sequence_similarity($up_flanks{$tags[$i]}, $up_flanks{$tags[$b]}, $SIMILARITY_THRESH, $info);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
978 my ($down_similarity,$downnucdiffs, $downindeldiffs) = sequence_similarity($down_flanks{$tags[$i]}, $down_flanks{$tags[$b]}, $SIMILARITY_THRESH, $info);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
979 $donepair{$pair} = $up_similarity."_".$down_similarity;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
980
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
981 # print RUN "$up_similarity $upnucdiffs $upindeldiffs $down_similarity $downnucdiffs $downindeldiffs\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
982
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
983 if ( $up_similarity < $SIMILARITY_THRESH || $down_similarity < $SIMILARITY_THRESH){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
984 $sequence_dissimilarity =1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
985 last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
986 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
987 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
988 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
989 $nextcounter[13]++ if $sequence_dissimilarity==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
990
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
991 next if $sequence_dissimilarity == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
992 my ($simplified_microsat, $Hchrom, $Hstart, $Hend, $locusmotif, $locusmotifsize) = summarize_microsat($cluster, $humseq);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
993 # print "simplified_microsat=$simplified_microsat\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
994 my ($tree_analysis, $conformation) = treeStudy($simplified_microsat);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
995 # print "tree_analysis = $tree_analysis .. conformation=$conformation\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
996 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
997
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
998 # print SELECT "\"$conformation\"\t$tree_analysis\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
999
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1000 next if $tree_analysis =~ /DISCARD/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1001 if (exists $treesToReject{$tree_analysis}){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1002 $nextcounter[14]++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1003 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1004 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1005
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1006 # print "F: $no_of_species\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1007
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1008 # my $adjuster=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1009 # if ($no_of_species == 4){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1010 # my @sields = split(/\t/,$simplified_microsat);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1011 # my $somend = pop(@sields);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1012 # my $somestart = pop(@sields);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1013 # my $somechr = pop(@sields);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1014 # $adjuster = "NA\t" x 13 ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1015 # $simplified_microsat = join ("\t", @sields, $adjuster).$somechr."\t".$somestart."\t".$somend;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1016 # }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1017 # if ($no_of_species == 3){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1018 # my @sields = split(/\t/,$simplified_microsat);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1019 # my $somend = pop(@sields);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1020 # my $somestart = pop(@sields);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1021 # my $somechr = pop(@sields);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1022 # $adjuster = "NA\t" x 26 ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1023 # $simplified_microsat = join ("\t", @sields, $adjuster).$somechr."\t".$somestart."\t".$somend;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1024 # }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1025 #
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1026 $registeredTrees{$tree_analysis} = 1 if !exists $registeredTrees{$tree_analysis};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1027 $registeredTrees{$tree_analysis}++ if exists $registeredTrees{$tree_analysis};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1028
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1029 if (exists $treesToIgnore{$tree_analysis}){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1030 my @appendarr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1031
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1032 # print SUMMARY $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize], "\t", $simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1033 #print "SUMMARY ",$Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t", $thresharr[$locusmotifsize], "\t", $simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1034 # print SELECT $Hchrom,"\t",$Hstart,"\t",$Hend,"\t","NOEVENT", "\t\t", $cluster,"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1035
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1036 foreach my $lnode (@$lagestnodes){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1037 my @pair = @$lnode;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1038 my @nodemutarr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1039 for my $p (@pair){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1040 my @mutinfoarray1 = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1041 for (1 ... 38){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1042 # push (@mutinfoarray1, "NA")
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1043 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1044 # print SUMMARY join ("\t", @mutinfoarray1[0...($#mutinfoarray1)] ),"\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1045 # print join ("\t", @mutinfoarray1[0...($#mutinfoarray1)] ),"\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1046 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1047
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1048 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1049 for (1 ... 38){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1050 push (@appendarr, "NA")
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1051 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1052 # print SUMMARY join ("\t", @appendarr,"NULL", "NULL"),"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1053 # print join ("\t", @appendarr,"NULL", "NULL"),"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1054 # print "SUMMARY ",join ("\t", @appendarr,"NULL", "NULL"),"\n"; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1055 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1056 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1057 # print colored ['blue'],"cluster = $cluster\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1058
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1059 my ($mutations_array, $nodes, $branches_hash, $alivehash, $primaryalignment) = peel_onion($tree, \%sequences, \%alignment, \@tags, \%microsathash, \%nonmicrosathash, $motif, $tree_analysis, $thresholdhash{length($motif)}, \%microsatstarts);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1060
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1061 if ($mutations_array eq "NULL"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1062 # print "cluster = $cluster \n"; <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1063 my @appendarr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1064
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1065 # print SUMMARY $Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize, "\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1066
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1067 # foreach my $lnode (@$lagestnodes){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1068 # my @pair = @$lnode;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1069 # my @nodemutarr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1070 # for my $p (@pair){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1071 # my @mutinfoarray1 = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1072 # for (1 ... 38){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1073 # push (@mutinfoarray1, "NA")
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1074 # }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1075 # print SUMMARY join ("\t", @mutinfoarray1[0...($#mutinfoarray1)] ),"\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1076 # print join ("\t", @mutinfoarray1[0...($#mutinfoarray1)] ),"\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1077 # }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1078 # }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1079 # for (1 ... 38){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1080 # push (@appendarr, "NA")
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1081 # }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1082 # print SUMMARY join ("\t", @appendarr,"NULL", "NULL"),"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1083 # print join ("\t", @appendarr,"NULL", "NULL"),"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1084 # print join ("\t","SUMMARY", @appendarr,"NULL", "NULL"),"\n"; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1085 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1086 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1087
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1088
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1089 # print "sent: \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1090 # print "nodes = @$nodes, branches array:\n" if $mutations_array ne "NULL" && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1091
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1092 my ($newstatus, $newmutations_array, $newnodes, $newbranches_hash, $newalivehash, $finalalignment) = fillAlignmentGaps($tree, \%sequences, \%alignment, \@tags, \%microsathash, \%nonmicrosathash, $motif, $tree_analysis, $thresholdhash{length($motif)}, \%microsatstarts);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1093 # print "newmutations_array returned = \n",join("\n",@$newmutations_array),"\n" if $newmutations_array ne "NULL" && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1094 my @finalmutations_array= ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1095 @finalmutations_array = selectMutationArray($mutations_array, $newmutations_array, \@tags, $alivehash, \%alignment, $motif) if $newmutations_array ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1096 @finalmutations_array = selectMutationArray($mutations_array, $mutations_array, \@tags, $alivehash, \%alignment, $motif) if $newmutations_array eq "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1097 # print "alt = $alternate{$conformation}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1098
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1099 my ($besttree, $treescore) = selectBetterTree($tree_analysis, $alternate{$conformation}, \@finalmutations_array);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1100 my $cleancase = "UNCLEAN";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1101 $cleancase = checkCleanCase($besttree, $finalalignment) if $treescore > 0 && $finalalignment ne "NULL" && $finalalignment =~ /\!/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1102 $cleancase = checkCleanCase($besttree, $primaryalignment) if $treescore > 0 && $finalalignment eq "NULL" && $primaryalignment =~ /\!/ && $primaryalignment ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1103 $cleancase = "CLEAN" if $finalalignment eq "NULL" && $primaryalignment !~ /\!/ && $primaryalignment ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1104 $cleancase = "CLEAN" if $finalalignment ne "NULL" && $finalalignment !~ /\!/ ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1105 # print "besttree = $besttree ... cleancase=$cleancase\n"; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1106
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1107 my @selects = ("-C","+C","-H","+H","-HC","+HC","-O","+O","-H.-C","-H.-O","-HC,+C","-HC,+H","-HC.-O","-HCO,+HC","-HCO,+O","-O.-C","-O.-H",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1108 "+C.+O","+H.+C","+H.+O","+HC,-C","+HC,-H","+HC.+O","+HCO,-C","+HCO,-H","+HCO,-HC","+HCO,-O","+O.+C","+O.+H","+H.+C.+O","-H.-C.-O","+HCO","-HCO");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1109 next if (oneOf(@selects, $besttree) == 0);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1110 if ( ($besttree =~ /,/ || $besttree =~ /\./) && $cleancase eq "UNCLEAN"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1111 $besttree = "$besttree / $tree_analysis";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1112 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1113
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1114 $besttree = "NULL" if $treescore <= 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1115
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1116 while ($besttree =~ /[A-Z][A-Z]/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1117 $besttree =~ s/([A-Z])([A-Z])/$1:$2/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1118 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1119
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1120 if ($besttree !~ /NULL/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1121 my @elements = ($besttree =~ /([A-Z])/g);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1122
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1123 foreach my $ele (@elements){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1124 # print "replacing $ele with $backReplacementArrTag{$ele}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1125 $besttree =~ s/$ele/$backReplacementArrTag{$ele}/g if exists $backReplacementArrTag{$ele};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1126 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1127 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1128 my $endendstate = $focalspec_orig.".".$Hchrom."\t".$Hstart."\t".$Hend."\t".$locusmotif."\t".$locusmotifsize."\t".$tree_analysis."\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1129 next if $endendstate =~ /NA\tNA\tNA/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1130
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1131 print SUMMARY $focalspec_orig,".",$Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1132 # print "SUMMARY\t", $focalspec_orig,".",$Hchrom,"\t",$Hstart,"\t",$Hend,"\t",$locusmotif,"\t",$locusmotifsize,"\t",$tree_analysis,"\t" ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1133
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1134 my @mutinfoarray =();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1135
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1136 foreach my $lnode (@$lagestnodes){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1137 my @pair = @$lnode;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1138 my $joint = "(".join(", ",@pair).")";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1139 my @nodemutarr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1140
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1141 for my $p (@pair){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1142 foreach my $mut (@finalmutations_array){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1143 $mut =~ /node=([A-Z, \(\)]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1144 push @nodemutarr, $mut if $p eq $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1145 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1146 @mutinfoarray = summarizeMutations(\@nodemutarr, $besttree);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1147
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1148 # print SUMMARY join ("\t", @mutinfoarray[0...($#mutinfoarray-1)] ),"\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1149 # print join ("\t", @mutinfoarray[0...($#mutinfoarray-1)] ),"\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1150 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1151 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1152
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1153 # print "G: $no_of_species\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1154
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1155 my @alignmentarr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1156
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1157 foreach my $key (keys %alignment){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1158 push @alignmentarr, $backReplacementArrTag{$key}.":".$alignment{$key};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1159
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1160 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1161 # print "alignmentarr = @alignmentarr"; <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1162
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1163 @mutinfoarray = summarizeMutations(\@finalmutations_array, $besttree);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1164 print SUMMARY join ("\t", @mutinfoarray ),"\t";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1165 print SUMMARY join(",",@alignmentarr),"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1166 # print join("\t","--------------","\n",$besttree, join("",@tags)),"\n" if scalar(@tags) < 5;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1167 # <STDIN> if scalar(@tags) < 5;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1168 # print $cleancase, "\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1169 # print join ("\t", @mutinfoarray,$cleancase,join(",",@alignmentarr)),"\n"; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1170 # print "summarized\n"; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1171
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1172
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1173
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1174 my %indelcatch = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1175 my %substcatch = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1176 my %typecatch = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1177 my %nodescatch = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1178 my $mutconcat = join("\t", @finalmutations_array)."\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1179 my %indelposcatch = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1180 my %subsposcatch = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1181
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1182 foreach my $fmut ( @finalmutations_array){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1183 # next if $fmut !~ /indeltype=[a-zA-Z]+/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1184 #print RUN $fmut, "\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1185 $fmut =~ /node=([a-zA-Z, \(\)]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1186 my $lnode = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1187 $nodescatch{$1}=1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1188
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1189 if ($fmut =~ /type=substitution/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1190 # print "fmut=$fmut\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1191 $fmut =~ /from=([a-zA-Z\-]+)\tto=([a-zA-Z\-]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1192 my $from=$1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1193 # print "from=$from\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1194 my $to=$2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1195 # print "to=$to\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1196 push @{$substcatch{$lnode}} , ("from:".$from." to:".$to);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1197 $fmut =~ /position=([0-9]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1198 push @{$subsposcatch{$lnode}}, $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1199 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1200
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1201 if ($fmut =~ /insertion=[a-zA-Z\-]+/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1202 $fmut =~ /insertion=([a-zA-Z\-]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1203 push @{$indelcatch{$lnode}} , $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1204 $fmut =~ /indeltype=([a-zA-Z]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1205 push @{$typecatch{$lnode}}, $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1206 $fmut =~ /position=([0-9]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1207 push @{$indelposcatch{$lnode}}, $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1208 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1209 if ($fmut =~ /deletion=[a-zA-Z\-]+/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1210 $fmut =~ /deletion=([a-zA-Z\-]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1211 push @{$indelcatch{$lnode}} , $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1212 $fmut =~ /indeltype=([a-zA-Z]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1213 push @{$typecatch{$lnode}}, $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1214 $fmut =~ /position=([0-9]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1215 push @{$indelposcatch{$lnode}}, $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1216 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1217 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1218
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1219 # print $simplified_microsat,"\t", $tree_analysis,"\t", join("",@tags), "\t" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1220 # print join ("<\t>", @mutinfoarray),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1221 # print "where mutinfoarray = @mutinfoarray\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1222 # #print RUN ".";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1223
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1224 # print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1225 # print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1226
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1227 # print colored ['red'],"finalmutations_array=\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1228 # foreach (@finalmutations_array) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1229 # print colored ['red'], "$_\n" if $_ =~ /type=substitution/ && $printer == 1 ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1230 # print colored ['yellow'], "$_\n" if $_ !~ /type=substitution/ && $printer == 1 ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1231
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1232 # }# if $line =~ /cal/;# && $line =~ /chr4/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1233
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1234 # print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1235 # print colored ['red'], "-------------------------------------------------------------\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1236 # print "tree analysis = $tree_analysis\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1237
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1238 # my $mutations = "@$mutations_array";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1239
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1240
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1241 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1242 for my $keys (@$nodes) {foreach my $key (@$keys){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1243 #print "key = $key, => $branches_hash->{$key}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1244 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1245 # print "x" x 50, "\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1246 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1247 my ($birth_steps, $death_steps) = decipher_history($mutations_array,join("",@tags),$nodes,$branches_hash,$tree_analysis,$conformation, $alivehash, $simplified_microsat);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1248 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1249 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1250 close BO;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1251 # print "now studying where we lost microsatellites:\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1252 # print "x" x 60,"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1253 for my $reason (0 ... $#nextcounter){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1254 # print $removalReasons[$reason]."\t".$nextcounter[$reason],"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1255 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1256 # print "x" x 60,"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1257 # print "In total we read $microreadcounter microsatellites after reading through $contigsentered contigs\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1258 # print " we lost $keynotformedcounter contigs as they did not form the key, \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1259 # print "$contignotrightcounter contigs as they were not of the right species configuration\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1260 # print "$keynotfoundcounter contigs as they did not contain the microsats\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1261 # print "... In total we went through a file that had $dotcounter contigs...\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1262 # print join ("\n","remaining orth keys = ", (keys %orths),"");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1263 # print "------ ------ ------ ------ ------ ------ ------ ------ ------ ------ ------ \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1264 # print "now printing counted trees: \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1265 if (scalar(keys %registeredTrees) > 0){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1266 foreach my $keyb ( sort (keys %registeredTrees) )
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1267 {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1268 # print "$keyb : $registeredTrees{$keyb}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1269 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1270 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1271
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1272
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1273 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1274 close SUMMARY;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1275
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1276 my @summarizarr = ("+C=+C +R.+C -HCOR,+C",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1277 "+H=+H +R.+H -HCOR,+H",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1278 "-C=-C -R.-C +HCOR,-C",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1279 "-H=-H -R.-H +HCOR,-H",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1280 "+HC=+HC",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1281 "-HC=-HC",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1282 "+O=+O -HCOR,+O",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1283 "-O=-O +HCOR,-O",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1284 "+HCO=+HCO",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1285 "-HCO=-HCO",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1286 "+R=+R +R.+C +R.+H",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1287 "-R=-R -R.-C -R.-H");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1288
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1289 foreach my $line (@summarizarr){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1290 next if $line !~ /[A-Za-z0-9]/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1291 # print $line;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1292 chomp $line;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1293 my @fields = split(/=/,$line);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1294 # print "title = $fields[0]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1295 my @parts=split(/ +/, $fields[1]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1296 my %partshash = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1297 foreach my $part (@parts){$partshash{$part}=1;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1298 my $count=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1299 foreach my $key ( sort keys %registeredTrees ){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1300 next if !exists $partshash{$key};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1301 # print "now adding $registeredTrees{$key} from $key\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1302 $count+=$registeredTrees{$key};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1303 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1304 # print "$fields[0] : $count\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1305 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1306 my $rootdir = $dir;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1307 $rootdir =~ s/\/[A-Za-z0-9\-_]+$//;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1308 chdir $rootdir;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1309 remove_tree($dir);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1310
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1311
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1312 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1313 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1314 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1315 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1316 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1317 sub largest_number{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1318 my $counter = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1319 my($max) = shift(@_);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1320 foreach my $temp (@_) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1321
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1322 #print "finding largest array: $maxcounter \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1323 if($temp > $max){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1324 $max = $temp;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1325 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1326 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1327 return($max);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1328 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1329
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1330 sub smallest_number{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1331 my $counter = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1332 my($min) = shift(@_);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1333 foreach my $temp (@_) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1334 #print "finding largest array: $maxcounter \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1335 if($temp < $min){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1336 $min = $temp;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1337 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1338 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1339 return($min);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1340 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1341 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1342 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1343 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1344 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1345 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1346 sub baseml_parser{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1347 my $outputfile = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1348 open(BOUT,"<$outputfile") or die "Cannot open output of upstream baseml $outputfile: $!";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1349 my @info = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1350 my @branchields = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1351 my @distanceields = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1352 my @bout = <BOUT>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1353 #print colored ['red'], @bout ,"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1354 for my $b (0 ... $#bout){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1355 my $bine=$bout[$b];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1356 #print colored ['yellow'], "sentence = ",$bine;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1357 if ($bine =~ /TREE/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1358 $bine=$bout[$b++];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1359 $bine=$bout[$b++];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1360 $bine=$bout[$b++];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1361 #print "FOUND",$bine;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1362 chomp $bine;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1363 $bine =~ s/^\s+//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1364 @branchields = split(/\s+/,$bine);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1365 $bine=$bout[$b++];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1366 chomp $bine;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1367 $bine =~ s/^\s+//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1368 @distanceields = split(/\s+/,$bine);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1369 #print "LASTING..............\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1370 last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1371 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1372 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1373 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1374 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1375
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1376 close BOUT;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1377 # print "branchfields = @branchields and distanceields = @distanceields\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1378 my %distance_hash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1379 for my $d (0 ... $#branchields){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1380 $distance_hash{$branchields[$d]} = $distanceields[$d];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1381 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1382
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1383 $info[0] = $distance_hash{"9..1"} + $distance_hash{"9..2"};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1384 $info[1] = $distance_hash{"9..1"} + $distance_hash{"8..9"}+ $distance_hash{"8..3"};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1385 $info[2] = $distance_hash{"9..1"} + $distance_hash{"8..9"}+$distance_hash{"7..8"}+$distance_hash{"7..4"};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1386 $info[3] = $distance_hash{"9..1"} + $distance_hash{"8..9"}+$distance_hash{"7..8"}+$distance_hash{"6..7"}+$distance_hash{"6..5"};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1387
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1388 # print "\nsending back: @info\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1389
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1390 return join("\t",@info);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1391
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1392 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1393
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1394
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1395 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1396 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1397 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1398 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1399 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1400 sub test_complexity{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1401 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1402 my $sequence = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1403 #print "sequence = $sequence\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1404 my $COMPLEXITY_SUPPORT = $_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1405 my $complexity=int($COMPLEXITY_SUPPORT * (1/40)); #1 THIS IS AN ARBITRARY THRESHOLD SET FOR LOW COMPLEXITY.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1406 #2 THE INSPIRATION WAS WEB MILLER'S MAIL SENT ON
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1407 #3 19 Apr 2008 WHERE HE CLASSED AS HIGH COMPLEXITY
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1408 #4 REGION, IF 40 BP OF SEQUENCE HAS AT LEAST 3 OF
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1409 #5 EACH NUCLEOTIDE. HENCE, I NORMALIZE THIS PARAMETER
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1410 #6 FOR THE ACTUAL LENGTH OF $FLANK_SUPPORT SET BY
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1411 #7 THE USER.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1412 #8 WEB MILLER SENT THE MAIL TO YDK104@PSU.EDU
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1413
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1414
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1415
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1416 my $As = ($sequence=~ s/A/A/gi);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1417 my $Ts = ($sequence=~ s/T/T/gi);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1418 my $Gs = ($sequence=~ s/G/G/gi);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1419 my $Cs = ($sequence=~ s/C/C/gi);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1420 my $dashes = ($sequence=~ s/\-/-/gi);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1421 $dashes = 0 if $sequence !~ /\-/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1422 # print "seq = $sequence, As=$As, Ts=$Ts, Gs=$Gs, Cs=$Cs, dashes=$dashes\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1423 return "LOW" if $dashes > length($sequence)/2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1424
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1425 my $ans = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1426
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1427 return "HIGH" if $As >= $complexity && $Ts >= $complexity && $Cs >= $complexity && $Gs >= $complexity;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1428
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1429 my @nts = ("A","T","G","C","-");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1430
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1431 my $lowcomplex = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1432
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1433 foreach my $nt (@nts){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1434 $lowcomplex =1 if $sequence =~ /(($nt\-*){$mono_flanksimplicityRepno,})/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1435 $lowcomplex =1 if $sequence =~ /(($nt[A-Za-z]){$di_flanksimplicityRepno,})/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1436 $lowcomplex =1 if $sequence =~ /(([A-Za-z]$nt){$di_flanksimplicityRepno,})/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1437 my $nont = ($sequence=~ s/$nt/$nt/gi);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1438 $lowcomplex = 1 if $nont > (length($sequence) * $prop_of_seq_allowedtoAT) && ($nt =~ /[AT\-]/);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1439 $lowcomplex = 1 if $nont > (length($sequence) * $prop_of_seq_allowedtoCG) && ($nt =~ /[CG]/);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1440 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1441 # print "leaving for now.. $sequence\n" if $printer == 1 && $lowcomplex == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1442 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1443 return "HIGH" if $lowcomplex == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1444 return "LOW" ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1445 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1446 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1447 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1448 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1449 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1450 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1451 sub sequence_similarity{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1452 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1453 my @seq1 = split(/\s*/, $_[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1454 my @seq2 = split(/\s*/, $_[1]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1455 my $similarity_thresh = $_[2];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1456 my $info = $_[3];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1457 # print "input = @_\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1458 my $seq1str = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1459 my $seq2str = $_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1460 $seq1str=~s/\-//g; $seq2str=~s/\-//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1461 my $similarity=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1462
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1463 my $nucdiffs=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1464 my $nucsims=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1465 my $indeldiffs=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1466
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1467 for my $i (0...$#seq1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1468 $similarity++ if $seq1[$i] =~ /$seq2[$i]/i ; #|| $seq1[$i] =~ /\-/i || $seq2[$i] =~ /\-/i ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1469 $nucsims++ if $seq1[$i] =~ /$seq2[$i]/i && ($seq1[$i] =~ /[a-zA-Z]/i && $seq2[$i] =~ /[a-zA-Z]/i);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1470 $nucdiffs++ if $seq1[$i] !~ /$seq2[$i]/i && ($seq1[$i] =~ /[a-zA-Z]/i && $seq2[$i] =~ /[a-zA-Z]/i);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1471 $indeldiffs++ if $seq1[$i] !~ /$seq2[$i]/i && $seq1[$i] =~ /\-/i || $seq2[$i] =~ /\-/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1472 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1473 my $sim = $similarity/length($_[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1474 return ( $sim, $nucdiffs, $indeldiffs ); #<= $similarity_thresh;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1475 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1476 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1477 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1478 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1479
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1480 sub load_treesToReject{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1481 my @rejectlist = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1482 my $alltags = join("",@_);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1483 @rejectlist = qw (-HCOR +HCOR) if $alltags eq "HCORM";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1484 @rejectlist = qw ( -HCO|+R +HCO|-R) if $alltags eq "HCOR";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1485 @rejectlist = qw ( -HC|+O +HC|-O) if $alltags eq "HCO";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1486
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1487 %treesToReject=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1488 $treesToReject{$_} = $_ foreach (@rejectlist);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1489 #print "loaded to reject for $alltags; ", $treesToReject{$_},"\n" foreach (@rejectlist); #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1490 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1491 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1492 sub load_treesToIgnore{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1493 my @rejectlist = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1494 my $alltags = join("",@_);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1495 @rejectlist = qw (-HCOR +HCOR +HCORM -HCORM) if $alltags eq "HCORM";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1496 @rejectlist = qw ( -HCO|+R +HCO|-R +HCOR -HCOR) if $alltags eq "HCOR";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1497 @rejectlist = qw ( -HC|+O +HC|-O +HCO -HCO) if $alltags eq "HCO";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1498
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1499 %treesToIgnore=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1500 $treesToIgnore{$_} = $_ foreach (@rejectlist);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1501 #print "loaded ", $treesToIgnore{$_},"\n" foreach (@rejectlist);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1502 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1503 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1504 sub load_thresholds{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1505 my @threshold_array=split(/[,_]/,$_[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1506 unshift @threshold_array, "0";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1507 for my $size (1 ... 4){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1508 $thresholdhash{$size}=$threshold_array[$size];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1509 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1510 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1511 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1512 sub load_allPossibleTrees{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1513 #1 THIS FILE STORES ALL POSSIBLE SCENARIOS OF MICROSATELLITE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1514 #2 BIRTH AND DEATH EVENTS ON A 5-PRIMATE TREE OF H,C,O,R,M
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1515 #3 IN FORM OF A TEXT FILE. THIS WILL BE USED AS A TEMPLET
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1516 #4 TO COMPARE EACH MICROSATELLITE CLUSTER TO UNDERSTAND THE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1517 #5 EVOLUTION OF EACH LOCUS. WE WILL THEN DISCARD SOME
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1518 #6 MICROSATS ACCRODING TO THEIR EVOLUTIONARY BEHAVIOUR ON
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1519 #7 THE TREE. MOST PROBABLY WE WILL REMOVE THOSE MICROSATS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1520 #8 THAT ARE NOT SUFFICIENTLY INFORMATIVE, LIKE IN CASE OF
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1521 #9 AN OUTGROUP MICROSATELLITE BEING DIFFERENT FRON ALL OTHER
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1522 #10 SPECIES IN THE TREE.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1523 my $tree_list = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1524 # print "file to be loaded: $tree_list\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1525
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1526 my @trarr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1527 @trarr = ("#H C O CONCLUSION ALTERNATE",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1528 "+ + + +HCO NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1529 "+ _ _ +H NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1530 "_ + _ +C NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1531 "_ _ + -HC|+O NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1532 "+ _ + -C +H",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1533 "_ + + -H +C",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1534 "+ + _ +HC|-O NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1535 "_ _ _ -HCO NA") if $tree_list =~ /_HCO\.txt/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1536 @trarr = ("#H C O R CONCLUSION ALTERNATE",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1537 "_ _ _ _ -HCOR NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1538 "+ + + + +HCOR NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1539 "+ + + _ +HCO|-R +H.+C.+O",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1540 "+ + _ _ +HC +H.+C;-O",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1541 "+ _ _ _ +H +HC,-C;+HC,-C",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1542 "_ + _ _ +C +HC,-H;+HC,-H",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1543 "_ _ + _ +O -HC|-H.-C",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1544 "_ _ + + -HC -H.-C",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1545 "+ _ _ + +H|-C.-O +HC,-C",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1546 "_ + _ + +C -H.-O",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1547 "_ + + _ -H +C.+O",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1548 "_ _ _ + -HCO|+R NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1549 "+ _ + _ +H.+O|-C NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1550 "_ + + + -H -HC,+C",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1551 "+ _ + + -C -HC,+H",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1552 "+ + _ + -O +HC") if $tree_list =~ /_HCOR\.txt/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1553
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1554 @trarr = ("#H C O R M CONCLUSION ALTERNATE",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1555 "+ + _ + + -O -HCO,+HC|-HCO,+HC;-HCO,(+H.+C)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1556 "+ _ + + + -C -HC,+H;+HCO,(+H.+O)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1557 "_ + + + + -H -HC,+C;-HCO,(+C.+O)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1558 "_ _ + _ _ +O +HCO,-HC;+HCO,(-H.-C)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1559 "_ + _ _ _ +C +HC,-H;+HCO,(-H.-O)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1560 "+ _ _ _ _ +H +HC,-C;+HCO,(-C.-O)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1561 "+ + + _ _ +HCO +H.+C.+O",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1562 "_ _ _ + + -HCO -HC.-O;-H.-C.-O",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1563 "+ _ _ + + -O.-C|-HCO,+H +R.+H;-HCO,(+R.+H)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1564 "_ + _ + + -O.-H|-HCO,+C +R.+C;-HCO,(+R.+C)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1565 "_ + + _ _ +HCO,-H|+O.+C NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1566 "+ _ + _ _ +HCO,-C|+O.+H NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1567 "_ _ + + + -HC -H.-C|-HCO,+O",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1568 "+ + _ _ _ +HC +H.+C|+HCO,-O|-HCO,+HC;-HCO,(+H.+C)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1569 "+ + + + + +HCORM NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1570 "_ _ + _ + DISCARD +O;+HCO,-HC;+HCO,(-H.-C)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1571 "_ + _ _ + +C +HC,-H;+HCO,(-H.-O)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1572 "+ _ _ _ + +H +HC,-C;+HCO,(-C.-O)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1573 "+ + _ _ + +HC -R.-O|+HCO,-O|+H.+C;-HCO,+HC;-HCO,(+H.+C)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1574 "+ _ + _ + DISCARD -R.-C|+HCO,-C|+H.+O NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1575 "_ + + _ + DISCARD -R.-H|+HCO,-H|+C.+O NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1576 "_ _ _ _ + DISCARD -HCOR NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1577 "_ _ _ + _ DISCARD +R;-HC.-O;-H.-C.-O",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1578 "+ + _ + _ -O +R.+HC|-HCO,+HC;+H.+C.+R|-HCO,(+H.+C)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1579 "+ + + + _ +HCOR NA",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1580 "+ + + _ + DISCARD -R;+HCO;+HC.+O;+H.+C.+O",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1581 "+ _ + + _ -C -HC,+H;+H.+O.+R|-HCO,(+H.+O)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1582 "_ + + + _ -H -HC,+C;+C.+O.+R|-HCO,(+C.+O)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1583 "_ _ + + _ -HC +R.+O|-HCO,+O|+HCO,-HC",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1584 "_ + _ + _ +C +R.+C|-HCO,+C|-HC,+C +HCO,(-H.-O)",
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1585 "+ _ _ + _ +H +R.+H|-C.-O +HCO,(-C.-O)"
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1586 ) if $tree_list =~ /_HCORM\.txt/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1587
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1588
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1589 my $template_p = $_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1590 my $alternate_p = $_[2];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1591 #1 THIS IS THE HASH IN WHICH INFORMATION FROM THE ABOVE FILE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1592 #2 GETS STORED, USING THE WHILE LOOP BELOW. HERE, THE KEY
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1593 #3 OF EACH ROW IS THE EVOLUTIONARY CONFIGURATION OF A LOCUS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1594 #4 ON THE PRIMATE TREE, BASED ON PRESENCE/ABSENCE OF A MICROSAT
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1595 #5 AT THAT LOCUS, LIKE SAY "+ + + _ _" .. EACH COLUMN BELONGS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1596 #6 TO ONE SPECIES; HERE THE COLUMN NAMES ARE "H C O R M".
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1597 #7 THE VALUE FOR EACH ENTRY IS THE MEANING OF THE ABOVE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1598 #8 CONFIGURATION (I.E., CONFIGURAION OF THE KEY. HERE, THE
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1599 #9 VALUE WILL BE +HCO, SIGNIFYING A BIRTH IN HUMAN-CHIMP-ORANG
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1600 #10 COMMON ANCESTOR. THIS HASH HAS BEEN LOADED HERE TO BE USED
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1601 #11 LATER BY THE SUBROUTINE sub treeStudy{} THAT STUDIES
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1602 #12 EVOLUTIONARY CONFIGURAION OF EACH MICROSAT LOCUS, AS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1603 #13 MENTIONED ABOVE.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1604 my @keys_array=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1605 foreach my $line (@trarr){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1606 # print $line,"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1607 next if $line =~ /^#/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1608 chomp $line;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1609 my @fields = split("\t", $line);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1610 push @keys_array, $fields[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1611 # print "loading: $fields[0]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1612 $template_p->{$fields[0]}[0] = $fields[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1613 $template_p->{$fields[0]}[1] = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1614 $alternate_p->{$fields[0]} = $fields[2];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1615 # $alternate_p->{$fields[1]} = $fields[2];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1616 # print "loading alternate_p $fields[1] $fields[2]\n"; #<STDIN> if $fields[1] eq "+H";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1617 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1618 # print "loaded the trees with keys: @keys_array\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1619 return $template_p, \@keys_array, $alternate_p;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1620 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1621
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1622 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1623 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1624 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1625 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1626 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1627 sub checkCleanCase{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1628 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1629 my $tree = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1630 my $finalalignment = $_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1631
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1632 #print "IN checkCleanCase: @_\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1633 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1634 my @indivspecies = $tree =~ /[A-Z]/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1635 $finalalignment =~ s/\./_/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1636 my @captured = $finalalignment =~ /[A-Za-z, \(\):]+\![:A-Za-z, \(\)]/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1637
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1638 my $unclean = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1639
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1640 foreach my $sp (@indivspecies){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1641 foreach my $cap (@captured){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1642 $cap =~ s/:[A-Za-z\-]+//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1643 my @sps = $cap =~ /[A-Z]+/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1644 my $spsc = join("", @sps);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1645 # print "checking whether imp species $sp is present in $cap i.e, in $spsc\n " if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1646 if ($spsc =~ /$sp/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1647 # print "foind : $sp\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1648 $unclean = 1; last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1649 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1650 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1651 last if $unclean == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1652 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1653 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1654 return "CLEAN" if $unclean == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1655 return "UNCLEAN";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1656 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1657
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1658 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1659 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1660 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1661 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1662 #--------------------------------------------------------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1663
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1664
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1665 sub adjustCoordinates{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1666 my $line = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1667 return 0 if !defined $line;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1668 #print "------x------x------x------x------x------x------x------x------\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1669 #print $line,"\n\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1670 my $no_of_species = $line =~ s/(chr[0-9a-zA-Z]+)|(Contig[0-9a-zA-Z\._\-]+)|(scaffold[0-9a-zA-Z\._\-]+)|(supercontig[0-9a-zA-Z\._\-]+)/x/ig;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1671 #print $line,"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1672 #print "------x------x------x------x------x------x------x------x------\n\n\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1673 # my @got = ($line =~ s/(chr[0-9a-zA-Z]+)|(Contig[0-9a-zA-Z\._\-]+)/x/g);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1674 # print "line = $line\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1675 $infocord = 2 + (4*$no_of_species) - 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1676 $typecord = 2 + (4*$no_of_species) + 1 - 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1677 $motifcord = 2 + (4*$no_of_species) + 2 - 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1678 $gapcord = $motifcord+1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1679 $startcord = $gapcord+1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1680 $strandcord = $startcord+1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1681 $endcord = $strandcord + 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1682 $microsatcord = $endcord + 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1683 $sequencepos = 2 + (5*$no_of_species) + 1 -1 ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1684 $interr_poscord = $microsatcord + 3;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1685 $no_of_interruptionscord = $microsatcord + 4;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1686 $interrcord = $microsatcord + 2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1687 # print "$line\n startcord = $startcord, and endcord = $endcord and no_of_species = $no_of_species\n" if $line !~ /calJac/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1688
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1689 return $no_of_species;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1690 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1691
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1692
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1693 sub printhash{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1694 my $alivehash = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1695 my @tags = @$_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1696 # print "print hash\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1697 foreach my $tag (@tags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1698 # print "$tag=",$alivehash->{$tag},"\n" if exists $alivehash->{$tag};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1699 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1700
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1701 return "\n"
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1702 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1703 sub peel_onion{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1704 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1705 # print "received: @_\n" ; #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1706 $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1707 my ($tree, $sequences, $alignment, $tagarray, $microsathash, $nonmicrosathash, $motif, $tree_analysis, $threshold, $microsatstarts) = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1708 # print "in peel onion.. tree = $tree \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1709 my %sequence_hash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1710
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1711
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1712 # for my $i (0 ... $#sequences){ $sequence_hash{$species[$i]}=$sequences->[$i]; }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1713
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1714
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1715 my %node_sequences=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1716
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1717 my %node_alignments = (); #NEW, Nov 28 2008
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1718 my @tags=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1719 my @locus_sequences=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1720 my %alivehash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1721 foreach my $tag (@$tagarray) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1722 #print "adding: $tag\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1723 push(@tags, $tag);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1724 $node_sequences{$tag}=join ".",split(/\s*/,$microsathash->{$tag}) if $microsathash->{$tag} ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1725 $alivehash{$tag}= $tag if $microsathash->{$tag} ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1726 $node_sequences{$tag}=join ".",split(/\s*/,$nonmicrosathash->{$tag}) if $microsathash->{$tag} eq "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1727 $node_alignments{$tag}=join ".",split(/\s*/,$alignment->{$tag}) ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1728 push @locus_sequences, $node_sequences{$tag};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1729 # print "adding to node_seq: $tag = ",$node_alignments{$tag},"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1730 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1731
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1732 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1733
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1734 my ($nodes_arr, $branches_hash) = get_nodes($tree);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1735 my @nodes=@$nodes_arr;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1736 # print "recieved nodes = " if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1737 # foreach my $key (@nodes) {print "@$key " if $printer == 1;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1738
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1739 # print "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1740
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1741 #POPULATE branches_hash WITH INFORMATION ABOUT LIVESTATUS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1742 foreach my $keys (@nodes){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1743 my @pair = @$keys;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1744 my $joint = "(".join(", ",@pair).")";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1745 my $copykey = join "", @pair;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1746 $copykey =~ s/[\W ]+//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1747 # print "for node: $keys, copykey = $copykey and joint = $joint\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1748 my $livestatus = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1749 foreach my $copy (split(/\s*/,$copykey)){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1750 $livestatus = 0 if !exists $alivehash{$copy};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1751 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1752 $alivehash{$joint} = $joint if !exists $alivehash{$joint} && $livestatus == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1753 # print "alivehash = $alivehash{$joint}\n" if exists $alivehash{$joint} && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1754 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1755
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1756 @nodes = reverse(@nodes); #1 THIS IS IN ORDER TO GO THROUGH THE TREE FROM LEAVES TO ROOT.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1757
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1758 my @mutations_array=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1759
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1760 my $joint = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1761 foreach my $node (@nodes){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1762 my @pair = @$node;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1763 # print "now in the nodes for loop, pair = @pair\n and sequences=\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1764 $joint = "(".join(", ",@pair).")";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1765 my @pair_sequences=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1766
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1767 foreach my $tag (@pair){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1768 # print "$tag: $node_alignments{$tag}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1769 # print $node_alignments{$tag},"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1770 push @pair_sequences, $node_alignments{$tag};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1771 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1772 # print "ppeel onion joint = $joint , pair_sequences=>@pair_sequences< , pair=>@pair<\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1773
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1774 my ($compared, $substitutions_list) = base_by_base_simple($motif,\@pair_sequences, scalar(@pair_sequences), @pair, $joint);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1775 $node_alignments{$joint}=$compared;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1776 push( @mutations_array,split(/:/,$substitutions_list));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1777 # print "newly added to node_sequences: $node_alignments{$joint} and list of mutations =\n", join("\n",@mutations_array),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1778 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1779
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1780
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1781 my $analayzed_mutations = analyze_mutations(\@mutations_array, \@nodes, $branches_hash, $alignment, \@tags, \%alivehash, \%node_sequences, $microsatstarts, $motif);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1782
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1783 return ($analayzed_mutations, \@nodes, $branches_hash, \%alivehash, $node_alignments{$joint}) if scalar @mutations_array > 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1784 return ("NULL",\@nodes,$branches_hash, \%alivehash, "NULL") if scalar @mutations_array == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1785 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1786
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1787 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1788 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1789
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1790 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1791 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1792
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1793 sub get_nodes{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1794 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1795
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1796 my $tree=$_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1797 #$tree =~ s/ +//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1798 $tree =~ s/\t+//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1799 $tree=~s/;//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1800 # print "tree=$tree\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1801 my @nodes = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1802 my @onions=($tree);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1803 my %branches=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1804 foreach my $bite (@onions){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1805 $bite=~ s/^\(|\)$//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1806 chomp $bite;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1807 # print "tree = $bite \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1808 # <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1809 $bite=~ /([ ,\(\)A-Z]+)\,\s*([ ,\(\)A-Z]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1810 #$tree =~ /(\(\(\(H, C\), O\), R\))\, (M)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1811 my @raw_nodes = ($1, $2);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1812 # print "raw nodes = $1 and $2\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1813 push(@nodes, [@raw_nodes]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1814 foreach my $node (@raw_nodes) {push (@onions, $node) if $node =~ /,/;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1815 foreach my $node (@raw_nodes) {$branches{$node}="(".$bite.")"; }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1816 # print "onions = @onions\n" if $printer == 1;<STDIN> if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1817 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1818 $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1819 return \@nodes, \%branches;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1820 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1821
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1822
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1823 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1824 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1825 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1826 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1827 sub analyze_mutations{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1828 my ($mutations_array, $nodes, $branches_hash, $alignment, $tags, $alivehash, $node_sequences, $microsatstarts, $motif) = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1829 my $locuslength = length($alignment->{$tags->[0]});
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1830 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1831
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1832
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1833 # print " IN analyzed_mutations....\n" if $printer == 1; # \n mutations array = @$mutations_array, \nAND locuslength = $locuslength\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1834 my %mutation_hash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1835 my %froms_megahash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1836 my %tos_megahash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1837 my %position_hash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1838 my @solutions_array=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1839 foreach my $mutation (@$mutations_array){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1840 # print "loadin mutation: $mutation\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1841 my %localhash= $mutation =~ /([\S ]+)=([\S ]+)/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1842 $mutation_hash{$localhash{"position"}} = {%localhash};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1843 push @{$position_hash{$localhash{"position"}}},$localhash{"node"};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1844 # print "feeding position hash with $localhash{position}: $position_hash{$localhash{position}}[0]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1845 $froms_megahash{$localhash{"position"}}{$localhash{"node"}}=$localhash{"from"};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1846 $tos_megahash{$localhash{"position"}}{$localhash{"node"}}=$localhash{"to"};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1847 # print "just a trial: $mutation_hash{$localhash{position}}{position}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1848 # print "loadin in tos_megahash: $localhash{position} {$localhash{node} = $localhash{to}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1849 # print "loadin in from: $localhash{position} {$localhash{node} = $localhash{from}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1850 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1851
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1852 # print "now going through each position in loculength:\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1853 ## <STDIN> if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1854
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1855 for my $pos (0 ... $locuslength-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1856 # print "at position: $pos\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1857
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1858 if (exists($mutation_hash{$pos})){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1859 my @local_nodes=@{$position_hash{$pos}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1860 # print "found mutation: @{$position_hash{$pos}} : @local_nodes\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1861
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1862 foreach my $local_node (@local_nodes){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1863 # print "at local node: $local_node ... from state = $froms_megahash{$pos}{$local_node}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1864 my $open_insertion=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1865 my $open_deletion=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1866 my $open_to_substitution=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1867 my $open_from_substitution=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1868 if ($froms_megahash{$pos}{$local_node} eq "-"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1869 # print "here exists a microsatellite from $local_node to $branches_hash->{$local_node}\n" if $printer == 1 && exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1870 # print "for localnode $local_node, amd the realated branches_hash:$branches_hash->{$local_node}, nexting as exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}}\n" if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}} && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1871 #next if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1872 $open_insertion=$tos_megahash{$pos}{$local_node};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1873 for my $posnext ($pos+1 ... $locuslength-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1874 # print "in first if .... studying posnext: $posnext\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1875 last if !exists ($froms_megahash{$posnext}{$local_node});
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1876 # print "for posnext: $posnext, there exists $froms_megahash{$posnext}{$local_node}.. already, open_insertion = $open_insertion.. checking is $froms_megahash{$posnext}{$local_node} matters\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1877 $open_insertion = $open_insertion.$tos_megahash{$posnext}{$local_node} if $froms_megahash{$posnext}{$local_node} eq "-";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1878 # print "now open_insertion=$open_insertion\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1879 delete $mutation_hash{$posnext} if $froms_megahash{$posnext}{$local_node} eq "-";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1880 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1881 # print "1 Feeding in: ", join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_insertion", "deletion="),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1882 push (@solutions_array, join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_insertion", "deletion="));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1883 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1884 elsif ($tos_megahash{$pos}{$local_node} eq "-"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1885 # print "here exists a microsatellite to $local_node from $branches_hash->{$local_node}\n" if $printer == 1 && exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1886 # print "for localnode $local_node, amd the realated branches_hash:$branches_hash->{$local_node}, nexting as exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}}\n" if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1887 #next if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1888 $open_deletion=$froms_megahash{$pos}{$local_node};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1889 for my $posnext ($pos+1 ... $locuslength-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1890 # print "in 1st elsif studying posnext: $posnext\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1891 # print "nexting as nextpos does not exist\n" if !exists ($tos_megahash{$posnext}{$local_node}) && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1892 last if !exists ($tos_megahash{$posnext}{$local_node});
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1893 # print "for posnext: $posnext, there exists $tos_megahash{$posnext}{$local_node}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1894 $open_deletion = $open_deletion.$froms_megahash{$posnext}{$local_node} if $tos_megahash{$posnext}{$local_node} eq "-";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1895 delete $mutation_hash{$posnext} if $tos_megahash{$posnext}{$local_node} eq "-";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1896 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1897 # print "2 Feeding in:", join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_deletion"), "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1898 push (@solutions_array, join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_deletion"));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1899 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1900 elsif ($tos_megahash{$pos}{$local_node} ne "-"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1901 # print "here exists a microsatellite from $local_node to $branches_hash->{$local_node}\n" if $printer == 1 && exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1902 # print "for localnode $local_node, amd the realated branches_hash:$branches_hash->{$local_node}, nexting as exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}}\n" if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1903 #next if exists $alivehash->{$local_node} && exists $alivehash->{$branches_hash->{$local_node}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1904 # print "microsatstart = $microsatstarts->{$local_node} \n" if exists $microsatstarts->{$local_node} && $pos < $microsatstarts->{$local_node} && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1905 next if exists $microsatstarts->{$local_node} && $pos < $microsatstarts->{$local_node};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1906 $open_to_substitution=$tos_megahash{$pos}{$local_node};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1907 $open_from_substitution=$froms_megahash{$pos}{$local_node};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1908 # print "open from substitution: $open_from_substitution \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1909 for my $posnext ($pos+1 ... $locuslength-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1910 #print "in last elsif studying posnext: $posnext\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1911 last if !exists ($tos_megahash{$posnext}{$local_node});
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1912 # print "for posnext: $posnext, there exists $tos_megahash{$posnext}{$local_node}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1913 $open_to_substitution = $open_to_substitution.$tos_megahash{$posnext}{$local_node} if $tos_megahash{$posnext}{$local_node} ne "-";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1914 $open_from_substitution = $open_from_substitution.$froms_megahash{$posnext}{$local_node} if $tos_megahash{$posnext}{$local_node} ne "-";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1915 delete $mutation_hash{$posnext} if $tos_megahash{$posnext}{$local_node} ne "-" && $froms_megahash{$posnext}{$local_node} ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1916 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1917 # print "open from substitution: $open_from_substitution \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1918
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1919 #IS THE STRETCH OF SUBSTITUTION MICROSATELLITE-LIKE?
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1920 my @motif_parts=split(/\s*/,$motif);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1921 #GENERATING THE FLEXIBLE LEFT END
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1922 my $left_query=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1923 for my $k (1 ... $#motif_parts) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1924 $left_query= $motif_parts[$k]."|)";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1925 $left_query="(".$left_query;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1926 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1927 $left_query=$left_query."?";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1928 # print "left_quewry = $left_query\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1929 #GENERATING THE FLEXIBLE RIGHT END
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1930 my $right_query=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1931 for my $k (0 ... ($#motif_parts-1)) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1932 $right_query= "(|".$motif_parts[$k];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1933 $right_query=$right_query.")";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1934 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1935 $right_query=$right_query."?";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1936 # print "right_query = $right_query\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1937 # print "Hence, searching for: ^$left_query($motif)+$right_query\$\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1938
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1939 my $motifcomb=$motif x 50;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1940 # print "motifcomb = $motifcomb\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1941 if ( ($motifcomb =~/$open_to_substitution/i) && (length ($open_to_substitution) >= length($motif)) ){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1942 # print "sequence microsat-like\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1943 my $all_microsat_like = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1944 # print "3 feeding in: ", join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_from_substitution"), "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1945 push (@solutions_array, join("\t", "node=$local_node","type=deletion" ,"position=$pos", "from=", "to=", "insertion=", "deletion=$open_from_substitution"));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1946 # print "4 feeding in: ", join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_to_substitution", "deletion="), "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1947 push (@solutions_array, join("\t", "node=$local_node","type=insertion" ,"position=$pos", "from=", "to=", "insertion=$open_to_substitution", "deletion="));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1948
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1949 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1950 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1951 # print "5 feeding in: ", join("\t", "node=$local_node","type=substitution" ,"position=$pos", "from=$open_from_substitution", "to=$open_to_substitution", "insertion=", "deletion="), "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1952 push (@solutions_array, join("\t", "node=$local_node","type=substitution" ,"position=$pos", "from=$open_from_substitution", "to=$open_to_substitution", "insertion=", "deletion="));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1953 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1954 #IS THE FROM-SEQUENCE MICROSATELLITE-LIKE?
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1955
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1956 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1957 #<STDIN> if $printer ==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1958 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1959 #<STDIN> if $printer ==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1960 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1961 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1962 # print "\n", "#" x 50, "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1963 foreach my $tag (@$tags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1964 # print "$tag: $alignment->{$tag}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1965 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1966 # print "\n", "#" x 50, "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1967 # print "returning SOLUTIONS ARRAY : \n",join("\n", @solutions_array),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1968 #print "end\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1969 #<STDIN> if
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1970 return \@solutions_array;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1971 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1972 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1973 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1974 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1975 #+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#+++++++++++#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1976
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1977 sub base_by_base_simple{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1978 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1979 my ($motif, $locus, $no, $pair0, $pair1, $joint) = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1980 my @seq_array=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1981 # print "IN SUBROUTUNE base_by_base_simple.. information received = @_\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1982 # print "pair0 = $pair0 and pair1 = $pair1\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1983
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1984 my @example=split(/\./,$locus->[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1985 # print "example, for length = @example\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1986 for my $i (0...$no-1){push(@seq_array, [split(/\./,$locus->[$i])]); }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1987
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1988 my @compared_sequence=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1989 my @substitutions_list;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1990 for my $i (0...scalar(@example)-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1991
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1992 #print "i = $i\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1993 #print "comparing $seq_array[0][$i] and $seq_array[1][$i] \n" ;#if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1994 if ($seq_array[0][$i] =~ /!/ && $seq_array[1][$i] !~ /!/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1995
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1996 my $resolution= resolve_base($seq_array[0][$i],$seq_array[1][$i], $pair1 ,"keep" );
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1997 # print "ancestral = $resolution\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1998
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
1999 if ($resolution =~ /$seq_array[1][$i]/i && $resolution !~ /!/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2000 push @substitutions_list, add_mutation($i, $pair0, $seq_array[0][$i], $resolution );
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2001 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2002 elsif ( $resolution !~ /!/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2003 push @substitutions_list, add_mutation($i, $pair1, $seq_array[1][$i], $resolution);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2004 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2005 push @compared_sequence,$resolution;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2006 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2007 elsif ($seq_array[0][$i] !~ /!/ && $seq_array[1][$i] =~ /!/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2008
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2009 my $resolution= resolve_base($seq_array[1][$i],$seq_array[0][$i], $pair0, "invert" );
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2010 # print "ancestral = $resolution\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2011
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2012 if ($resolution =~ /$seq_array[0][$i]/i && $resolution !~ /!/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2013 push @substitutions_list, add_mutation($i, $pair1, $seq_array[1][$i], $resolution);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2014 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2015 elsif ( $resolution !~ /!/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2016 push @substitutions_list, add_mutation($i, $pair0, $seq_array[0][$i], $resolution);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2017 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2018 push @compared_sequence,$resolution;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2019 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2020 elsif($seq_array[0][$i] =~ /!/ && $seq_array[1][$i] =~ /!/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2021 push @compared_sequence, add_bases($seq_array[0][$i],$seq_array[1][$i], $pair0, $pair1, $joint );
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2022 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2023 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2024 if($seq_array[0][$i] !~ /^$seq_array[1][$i]$/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2025 push @compared_sequence, $pair0.":".$seq_array[0][$i]."!".$pair1.":".$seq_array[1][$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2026 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2027 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2028 # print "perfect match\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2029 push @compared_sequence, $seq_array[0][$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2030 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2031 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2032 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2033 # print "returning: comared = @compared_sequence \nand substitutions list =\n", join("\n",@substitutions_list),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2034 return join(".",@compared_sequence), join(":", @substitutions_list) if scalar (@substitutions_list) > 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2035 return join(".",@compared_sequence), "" if scalar (@substitutions_list) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2036 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2037
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2038
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2039 sub resolve_base{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2040 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2041 # print "IN SUBROUTUNE resolve_base.. information received = @_\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2042 my ($optional, $single, $singlesp, $arg) = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2043 my @options=split(/!/,$optional);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2044 foreach my $option(@options) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2045 $option=~s/[A-Z\(\) ,]+://g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2046 if ($option =~ /$single/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2047 # print "option = $option , returning single: $single\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2048 return $single;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2049 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2050 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2051 # print "returning ",$optional."!".$singlesp.":".$single. "\n" if $arg eq "keep" && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2052 # print "returning ",$singlesp.":".$single."!".$optional. "\n" if $arg eq "invert" && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2053 return $optional."!".$singlesp.":".$single if $arg eq "keep";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2054 return $singlesp.":".$single."!".$optional if $arg eq "invert";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2055
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2056 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2057
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2058 sub same_length{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2059 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2060 my @locus = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2061 my $temp = shift @locus;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2062 $temp=~s/-|,//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2063 foreach my $l (@locus){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2064 $l=~s/-|,//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2065 return 0 if length($l) != length($temp);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2066 $temp = $l;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2067 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2068 return 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2069 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2070 sub treeStudy{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2071 my $printer = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2072 # print "template DEFINED.. received: @_\n" if defined %template;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2073 # print "only received = @_" if !defined %template;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2074 my $stopper = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2075 # if (!defined %template){ TEMP MASKED OCT 18 2012
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2076 $stopper = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2077 %template=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2078 # print "tree decipherer = $tree_decipherer\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2079 my ( $template_ref, $keys_array)=load_allPossibleTrees($tree_decipherer, \%template);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2080 # print "return = $template_ref and @{$keys_array}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2081 foreach my $key (@$keys_array){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2082 # print "addding : $template_ref->{$key} for $key\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2083 $template{$key} = $template_ref->{$key};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2084 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2085 # } TEMP MASK OCT 18 2012 END
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2086 # <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2087 for my $templet ( keys %template ) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2088 # print "$templet => @{$template{$templet}}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2089 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2090 # <STDIN> if !defined %template;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2091
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2092 my $strict = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2093
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2094 my $H = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2095 my $Hchr = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2096 my $Hstart = 2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2097 my $Hend = 3;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2098 my $Hmotif = 4;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2099 my $Hmotiflen = 5;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2100 my $Hmicro = 6;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2101 my $Hstrand = 7;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2102 my $Hmicrolen = 8;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2103 my $Hinterpos = 9;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2104 my $Hrelativepos = 10;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2105 my $Hinter = 11;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2106 my $Hinterlen = 12;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2107
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2108 my $C = 13;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2109 my $Cchr = 14;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2110 my $Cstart = 15;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2111 my $Cend = 16;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2112 my $Cmotif = 17;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2113 my $Cmotiflen = 18;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2114 my $Cmicro = 19;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2115 my $Cstrand = 20;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2116 my $Cmicrolen = 21;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2117 my $Cinterpos = 22;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2118 my $Crelativepos = 23;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2119 my $Cinter = 24;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2120 my $Cinterlen = 25;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2121
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2122 my $O = 26;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2123 my $Ochr = 27;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2124 my $Ostart = 28;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2125 my $Oend = 29;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2126 my $Omotif = 30;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2127 my $Omotiflen = 31;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2128 my $Omicro = 32;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2129 my $Ostrand = 33;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2130 my $Omicrolen = 34;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2131 my $Ointerpos = 35;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2132 my $Orelativepos = 36;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2133 my $Ointer = 37;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2134 my $Ointerlen = 38;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2135
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2136 my $R = 39;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2137 my $Rchr = 40;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2138 my $Rstart = 41;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2139 my $Rend = 42;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2140 my $Rmotif = 43;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2141 my $Rmotiflen = 44;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2142 my $Rmicro = 45;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2143 my $Rstrand = 46;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2144 my $Rmicrolen = 47;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2145 my $Rinterpos = 48;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2146 my $Rrelativepos = 49;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2147 my $Rinter = 50;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2148 my $Rinterlen = 51;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2149
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2150 my $Mchr = 52;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2151 my $Mstart = 53;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2152 my $Mend = 54;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2153 my $M = 55;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2154 my $Mmotif = 56;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2155 my $Mmotiflen = 57;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2156 my $Mmicro = 58;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2157 my $Mstrand = 59;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2158 my $Mmicrolen = 60;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2159 my $Minterpos = 61;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2160 my $Mrelativepos = 62;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2161 my $Minter = 63;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2162 my $Minterlen = 64;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2163
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2164 #-------------------------------------------------------------------------------#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2165 my @analysis=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2166
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2167
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2168 my %speciesOrder = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2169 $speciesOrder{"H"} = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2170 $speciesOrder{"C"} = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2171 $speciesOrder{"O"} = 2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2172 $speciesOrder{"R"} = 3;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2173 $speciesOrder{"M"} = 4;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2174 #-------------------------------------------------------------------------------#
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2175
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2176 my $line = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2177 chomp $line;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2178
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2179 my @f = split(/\t/,$line);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2180 # print "received array : @f.. recieved tags = @tags\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2181
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2182 # collect all motifs
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2183 my @motifs=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2184 @motifs = ($f[$Hmotif], $f[$Cmotif], $f[$Omotif], $f[$Rmotif], $f[$Mmotif]) if $tags[$#tags] =~ /M/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2185 @motifs = ($f[$Hmotif], $f[$Cmotif], $f[$Omotif], $f[$Rmotif]) if $tags[$#tags] =~ /R/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2186 @motifs = ($f[$Hmotif], $f[$Cmotif], $f[$Omotif]) if $tags[$#tags] =~ /O/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2187 # print "motifs in the array = $f[$Hmotif], $f[$Cmotif], $f[$Omotif], $f[$Rmotif]\n" if $tags[$#tags] =~ /R/;;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2188 # print "motifs = @motifs\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2189 my @translation = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2190 foreach my $motif (@motifs){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2191 push(@translation, "_") if $motif eq "NA";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2192 push(@translation, "+") if $motif ne "NA";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2193 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2194 my $translate = join(" ", @translation);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2195 # print "translate = >$translate< and analysis = $template{$translate}[0].. on the other hand, ",$template{"- - +"}[0],"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2196 my @analyses = split(/\|/,$template{$translate}[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2197 # print "motifs = @motifs, analyses = @analyses\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2198
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2199 if (scalar(@analyses) == 1) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2200 #print "analysis = $analyses[0]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2201 if ($analyses[0] !~ /,|\./ ){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2202 if ($analyses[0] =~ /\+/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2203 my $analysis = $analyses[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2204 $analysis =~ s/\+|\-//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2205 my @species = split(/\s*/,$analysis);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2206 my @currentMotifs = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2207 foreach my $specie (@species){ push(@currentMotifs, $motifs[$speciesOrder{$specie}]); #print "pushing into currentMotifs: $speciesOrder{$specie}: $motifs[$speciesOrder{$specie}]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2208 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2209 # print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2210 $template{$translate}[1]++ if $strict == 1 && consistency(@currentMotifs) ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2211 $template{$translate}[1]++ if $strict == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2212 # print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2213 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2214 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2215 my $analysis = $analyses[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2216 $analysis =~ s/\+|\-//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2217 my @species = split(/\s*/,$analysis);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2218 my @currentMotifs = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2219 my @complementarySpecies = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2220 my $allSpecies = join("",@tags);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2221 foreach my $specie (@species){ $allSpecies =~ s/$specie//g; }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2222 foreach my $specie (split(/\s*/,$allSpecies)){ push(@currentMotifs, $motifs[$speciesOrder{$specie}]); #print "pushing into currentMotifs: $speciesOrder{$specie}: $motifs[$speciesOrder{$specie}]\n" if $printer == 1;;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2223 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2224 # print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2225 $template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2226 $template{$translate}[1]=$template{$translate}[1]+1 if $strict == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2227 # print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2228 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2229 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2230
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2231 elsif ($analyses[0] =~ /,/) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2232 my @events = split(/,/,$analyses[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2233 # print "events = @events \n " if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2234 if ($events[0] =~ /\+/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2235 my $analysis1 = $events[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2236 $analysis1 =~ s/\+|\-//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2237 my $analysis2 = $events[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2238 $analysis2 =~ s/\+|\-//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2239 my @nSpecies = split(/\s*/,$analysis2);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2240 # print "original anslysis = $analysis1 " if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2241 foreach my $specie (@nSpecies){ $analysis1=~ s/$specie//g;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2242 # print "processed anslysis = $analysis1 \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2243 my @currentMotifs = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2244 foreach my $specie (split(/\s*/,$analysis1)){push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2245 # print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2246 $template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2247 $template{$translate}[1]=$template{$translate}[1]+1 if $strict == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2248 # print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2249 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2250 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2251 my $analysis1 = $events[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2252 $analysis1 =~ s/\+|\-//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2253 my $analysis2 = $events[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2254 $analysis2 =~ s/\+|\-//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2255 my @pSpecies = split(/\s*/,$analysis2);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2256 my @currentMotifs = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2257 foreach my $specie (@pSpecies){ push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2258 # print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2259 $template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2260 $template{$translate}[1]=$template{$translate}[1]+1 if $strict == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2261 # print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2262
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2263 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2264
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2265 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2266 elsif ($analyses[0] =~ /\./) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2267 my @events = split(/\./,$analyses[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2268 foreach my $event (@events){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2269 # print "event = $event \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2270 if ($event =~ /\+/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2271 my $analysis = $event;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2272 $analysis =~ s/\+|\-//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2273 my @species = split(/\s*/,$analysis);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2274 my @currentMotifs = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2275 foreach my $specie (@species){ push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2276 #print consistency(@currentMotifs),"<- \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2277 # print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2278 $template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2279 $template{$translate}[1]=$template{$translate}[1]+1 if $strict == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2280 # print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2281 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2282 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2283 my $analysis = $event;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2284 $analysis =~ s/\+|\-//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2285 my @species = split(/\s*/,$analysis);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2286 my @currentMotifs = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2287 my @complementarySpecies = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2288 my $allSpecies = join("",@tags);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2289 foreach my $specie (@species){ $allSpecies =~ s/$specie//g; }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2290 foreach my $specie (split(/\s*/,$allSpecies)){ push(@currentMotifs, $motifs[$speciesOrder{$specie}]); }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2291 #print consistency(@currentMotifs),"<- \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2292 # print "current motifs = @currentMotifs and consistency? ", (consistency(@currentMotifs))," \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2293 $template{$translate}[1]=$template{$translate}[1]+1 if $strict == 1 && consistency(@currentMotifs) ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2294 $template{$translate}[1]=$template{$translate}[1]+1 if $strict == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2295 # print "adding to template $translate: $template{$translate}[1]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2296 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2297 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2298
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2299 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2300 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2301 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2302 my $finalanalysis = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2303 $template{$translate}[1]++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2304 foreach my $analysis (@analyses){ ;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2305 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2306 # test if motifs where microsats are present, as indeed of same the motif composition
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2307
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2308
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2309
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2310 for my $templet ( keys %template ) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2311 if (@{ $template{$templet} }[1] > 0){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2312
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2313 $template{$templet}[1] = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2314 # print "now returning: @{$template{$templet}}[0], $templet\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2315 return (@{$template{$templet}}[0], $templet);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2316 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2317 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2318 undef %template;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2319 # print "sending NULL\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2320 return ("NULL", "NULL");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2321
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2322 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2323
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2324
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2325 sub consistency{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2326 my @motifs = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2327 # print "in consistency \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2328 # print "motifs sent = >",join("|",@motifs),"< \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2329 return $motifs[0] if scalar(@motifs) == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2330 my $prevmotif = shift(@motifs);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2331 my $stopper = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2332 for my $i (0 ... $#motifs){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2333 next if $motifs[$i] eq "NA";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2334 my $templet = $motifs[$i].$motifs[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2335 if ($templet !~ /$prevmotif/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2336 $stopper = 1; last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2337 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2338 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2339 return $prevmotif if $stopper == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2340 return "NULL" if $stopper == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2341 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2342 sub summarize_microsat{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2343 my $printer = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2344 my $line = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2345 my $humseq = $_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2346
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2347 my @gaps = $line =~ /[0-9]+\t[0-9]+\t[\+\-]/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2348 my @starts = $line =~ /[0-9]+\t[\+\-]/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2349 my @ends = $line =~ /[\+\-]\t[0-9]+/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2350 # print "starts = @starts\tends = @ends\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2351 for my $i (0 ... $#gaps) {$gaps[$i] =~ s/\t[0-9]+\t[\+\-]//g;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2352 for my $i (0 ... $#starts) {$starts[$i] =~ s/\t[\+\-]//g;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2353 for my $i (0 ... $#ends) {$ends[$i] =~ s/[\+\-]\t//g;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2354
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2355 my $minstart = array_smallest_number(@starts);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2356 my $maxend = array_largest_number(@ends);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2357
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2358 my $humupstream_st = substr($humseq, 0, $minstart);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2359 my $humupstream_en = substr($humseq, 0, $maxend);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2360 my $no_of_gaps_to_start = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2361 my $no_of_gaps_to_end = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2362 $no_of_gaps_to_start = ($humupstream_st =~ s/\-/x/g) if $humupstream_st=~/\-/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2363 $no_of_gaps_to_end = ($humupstream_en =~ s/\-/x/g) if $humupstream_en=~/\-/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2364
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2365 my $locusmotif = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2366 # print "IN SUB SUMMARIZE_MICROSAT $line\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2367 #return "NULL" if $line =~ /compound/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2368 my $Hstart = "NA";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2369 my $Hend = "NA";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2370 chomp $line;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2371 my $match_count = ($line =~ s/>/>/g);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2372 #print "number of species = $match_count\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2373 my @micros = split(/>/,$line);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2374 shift @micros;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2375 my $stopper = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2376
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2377
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2378 foreach my $mic (@micros){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2379 my @local = split(/\t/,$mic);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2380 if ($local[$microsatcord] =~ /N/) {$stopper =1; last;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2381 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2382 return "NULL" if $stopper ==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2383
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2384 #------------------------------------------------------
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2385
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2386 my @arranged = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2387 for my $arr (0 ... $#exacttags) {$arranged[$arr] = '0';}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2388
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2389 foreach my $micro (@micros){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2390 for my $i (0 ... $#exacttags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2391 if ($micro =~ /^$exacttags[$i]/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2392 $arranged[$i] = $micro;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2393 last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2394 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2395 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2396 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2397 # print "arranged = @arranged \n" ; <STDIN>;;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2398
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2399 my @endstatement = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2400 my $turn = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2401 my $species_counter = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2402 # print scalar(@arranged),"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2403
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2404 my $species_no=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2405
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2406 my $orthHchr = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2407
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2408 foreach my $micro (@arranged) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2409 $micro =~ s/\t\t/\t \t/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2410 $micro =~ s/\t,/\t ,/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2411 $micro =~ s/,\t/, \t/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2412 # print "------------------------------------------------------------------------------------------\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2413 chomp $micro;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2414 if ($micro eq '0'){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2415 push(@endstatement, join("\t",$exacttags[$species_counter],"NA","NA","NA","NA",0 ,"NA", "NA", 0,"NA","NA","NA", "NA" ));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2416 $species_counter++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2417 # print join("|","ENDSTATEMENT:",@endstatement),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2418 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2419 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2420 # print $micro,"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2421 # print "micro = $micro \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2422 my @fields = split(/\t/,$micro);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2423 my $microcopy = $fields[$microsatcord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2424 $microcopy =~ s/\[|\]|-//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2425 my $microsatlength = length($microcopy);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2426 # print "microsat = $fields[$microsatcord] and microsatlength = $microsatlength\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2427 # print "sp_ident = @sp_ident.. species_no=$species_no\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2428 $micro =~ /$sp_ident[$species_no]\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2429 # print "$micro =~ /$sp_ident[$species_no] ([0-9a-zA-Z_]+) ([0-9]+) ([0-9]+)/\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2430 my $sp_chr=$1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2431 my $sp_start=$2 + $fields[$startcord] - $fields[$gapcord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2432 my $sp_end= $sp_start + $microsatlength - 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2433
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2434 $species_no++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2435
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2436 $micro =~ /$focalspec_orig\s(\S+)\s([0-9]+)\s([0-9]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2437 $orthHchr=$1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2438 $Hstart=$2+$minstart-$no_of_gaps_to_start;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2439 $Hend=$2+$maxend-$no_of_gaps_to_end;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2440 # print "Hstart = $Hstart = $fields[4] + $fields[$startcord] - $fields[$gapcord]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2441
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2442 my $motif = $fields[$motifcord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2443 my $firstmotif = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2444 my $strand = $fields[$strandcord];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2445 # print "strand = $strand\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2446
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2447
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2448 if ($motif =~ /^\[/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2449 $motif =~ s/^\[//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2450 $motif =~ /([a-zA-Z]+)\].*/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2451 $firstmotif = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2452 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2453
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2454 else {$firstmotif = $motif;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2455 # print "firstmotif =$firstmotif : \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2456 $firstmotif = allCaps($firstmotif);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2457
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2458 if (exists $revHash{$firstmotif} && $turn == 0) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2459 $turn=1 if $species_counter==0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2460 $firstmotif = $revHash{$firstmotif};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2461 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2462
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2463 elsif (exists $revHash{$firstmotif} && $turn == 1) {$firstmotif = $revHash{$firstmotif}; $turn = 1;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2464 # print "changed firstmotif =$firstmotif\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2465 # <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2466 $locusmotif = $firstmotif;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2467
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2468 if (scalar(@fields) > $microsatcord + 2){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2469 # print "fields = @fields ... interr_poscord=$interr_poscord=$fields[$interr_poscord] .. interrcord=$interrcord=$fields[$interrcord]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2470
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2471 my @interposes = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2472 @interposes = split(",",$fields[$interr_poscord]) if $fields[$interr_poscord] =~ /,/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2473 $interposes[0] = $fields[$interr_poscord] if $fields[$interr_poscord] !~ /,/ ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2474 # print "interposes=@interposes\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2475 my @relativeposes = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2476 my @interruptions = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2477 @interruptions = split(",",$fields[$interrcord]) if $fields[$interrcord] =~ /,/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2478 $interruptions[0] = $fields[$interrcord] if $fields[$interrcord] !~ /,/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2479 my @interlens = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2480
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2481
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2482 for my $i (0 ... $#interposes){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2483
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2484 my $interpos = $interposes[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2485 my $nexter = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2486 my $interruption = $interruptions[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2487 my $interlen = length($interruption);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2488 push (@interlens, $interlen);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2489
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2490
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2491 my $relativepos = (100 * $interpos) / $microsatlength;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2492 # print "relativepos = $relativepos ,interpos=$interpos, interruption=$interruption, interlen=$interlen \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2493 $relativepos = (100 * ($interpos-$interlen)) / $microsatlength if $relativepos > 50;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2494 # print "--> = $relativepos\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2495 $interruption = "IND" if length($interruption) < 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2496
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2497 if ($turn == 1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2498 $fields[$microsatcord] = switch_micro($fields[$microsatcord]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2499 $interruption = switch_nucl($interruption) unless $interruption eq "IND";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2500 $interpos = ($microsatlength - $interpos) - $interlen + 2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2501 # print "turn interpos = $interpos for $fields[$microsatcord]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2502 $relativepos = (100 * $interpos) / $microsatlength;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2503 $relativepos = (100 * ($interpos-$interlen)) / $microsatlength if $relativepos > 50;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2504
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2505
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2506 $strand = '+' if $strand eq '-';
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2507 $strand = '-' if $strand eq '+';
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2508 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2509 # print "final relativepos = $relativepos\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2510 push(@relativeposes, $relativepos);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2511 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2512 push(@endstatement,join("\t",($exacttags[$species_counter],$sp_chr, $sp_start, $sp_end, $firstmotif,length($firstmotif),$fields[$microsatcord],$strand,$microsatlength,join(",",@interposes),join(",",@relativeposes),join(",",@interruptions), join(",",@interlens))));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2513 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2514
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2515 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2516 push(@endstatement, join("\t",$exacttags[$species_counter],$sp_chr, $sp_start, $sp_end, $firstmotif,length($firstmotif),$fields[$microsatcord],$strand,$microsatlength,"NA","NA","NA", "NA"));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2517 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2518
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2519 $species_counter++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2520 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2521
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2522 $locusmotif = $sameHash{$locusmotif} if exists $sameHash{$locusmotif};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2523 $locusmotif = $revHash{$locusmotif} if exists $revHash{$locusmotif};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2524
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2525 my $endst = join("\t", @endstatement, $orthHchr, $Hstart, $Hend);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2526 # print join("\t", @endstatement, $orthHchr, $Hstart, $Hend), "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2527
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2528
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2529 return (join("\t", @endstatement, $orthHchr, $Hstart, $Hend), $orthHchr, $Hstart, $Hend, $locusmotif, length($locusmotif));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2530
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2531 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2532
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2533 sub switch_nucl{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2534 my @strand = split(/\s*/,$_[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2535 for my $i (0 ... $#strand){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2536 if ($strand[$i] =~ /c/i) {$strand[$i] = "G";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2537 if ($strand[$i] =~ /a/i) {$strand[$i] = "T";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2538 if ($strand[$i] =~ /t/i) { $strand[$i] = "A";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2539 if ($strand[$i] =~ /g/i) {$strand[$i] = "C";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2540 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2541 return join("",@strand);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2542 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2543
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2544
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2545 sub switch_micro{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2546 my $micro = reverse($_[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2547 my @strand = split(/\s*/,$micro);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2548 for my $i (0 ... $#strand){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2549 if ($strand[$i] =~ /c/i) {$strand[$i] = "G";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2550 if ($strand[$i] =~ /a/i) {$strand[$i] = "T";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2551 if ($strand[$i] =~ /t/i) { $strand[$i] = "A";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2552 if ($strand[$i] =~ /g/i) {$strand[$i] = "C";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2553 if ($strand[$i] =~ /\[/i) {$strand[$i] = "]";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2554 if ($strand[$i] =~ /\]/i) {$strand[$i] = "[";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2555 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2556 return join("",@strand);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2557 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2558 sub decipher_history{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2559 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2560 my ($mutations_array, $tags_string, $nodes, $branches_hash, $tree_analysis, $confirmation_string, $alivehash) = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2561 my %mutations_hash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2562 foreach my $mutation (@$mutations_array){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2563 # print "mutation = $mutation\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2564 my %local = $mutation =~ /([\S ]+)=([\S ]+)/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2565 push @{$mutations_hash{$local{"node"}}},$mutation;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2566 # print "just for confirmation: $local{node} pushed as: $mutation\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2567 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2568 my @nodes;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2569 my @birth_steps=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2570 my @death_steps=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2571
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2572 my @tags=split(/\s*/,$tags_string);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2573 my @confirmation=split(/\s+/,$confirmation_string);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2574 my %info=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2575
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2576 for my $i (0 ... $#tags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2577 $info{$tags[$i]}=$confirmation[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2578 # print "feeding info: $tags[$i] = $info{$tags[$i]}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2579 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2580
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2581 for my $keys (@$nodes) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2582 foreach my $key (@$keys){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2583 # print "current key = $key\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2584 my $copykey = $key;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2585 $copykey =~ s/[\W ]+//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2586 my @copykeys=split(/\s*/,$copykey);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2587 my $states=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2588 foreach my $copy (@copykeys){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2589 $states=$states.$info{$copy};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2590 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2591 # print "reduced key = $copykey and state = $states\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2592
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2593 if (exists $mutations_hash{$key}) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2594
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2595 if ($states=~/\+/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2596 push @birth_steps, @{$mutations_hash{$key}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2597 $birth_steps[$#birth_steps] =~ s/\S+=//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2598 delete $mutations_hash{$key};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2599 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2600 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2601 push @death_steps, @{$mutations_hash{$key}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2602 $death_steps[$#death_steps] =~ s/\S+=//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2603 delete $mutations_hash{$key};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2604 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2605 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2606 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2607 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2608 # print "conformation = $confirmation_string\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2609 push (@birth_steps, "NULL") if scalar(@birth_steps) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2610 push (@death_steps, "NULL") if scalar(@death_steps) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2611 # print "birth steps = ",join("\n",@birth_steps)," and death steps = ",join("\n",@death_steps),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2612 return \@birth_steps, \@death_steps;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2613 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2614
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2615 sub fillAlignmentGaps{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2616 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2617 # print "received: @_\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2618 my ($tree, $sequences, $alignment, $tagarray, $microsathash, $nonmicrosathash, $motif, $tree_analysis, $threshold, $microsatstarts) = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2619 # print "in fillAlignmentGaps.. tree = $tree \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2620 my %sequence_hash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2621
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2622 my @phases = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2623 my $concat = $motif.$motif;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2624 my $motifsize = length($motif);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2625
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2626 for my $i (1 ... $motifsize){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2627 push @phases, substr($concat, $i, $motifsize);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2628 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2629
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2630 my $concatalignment = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2631 foreach my $tag (@tags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2632 $concatalignment = $concatalignment.$alignment->{$tag};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2633 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2634 # print "returningg NULL","NULL","NULL", "NULL\n" if $concatalignment !~ /-/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2635 return 0, "NULL","NULL","NULL", "NULL","NULL" if $concatalignment !~ /-/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2636
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2637
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2638
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2639 my %node_sequences_temp=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2640 my %node_alignments_temp =(); #NEW, Nov 28 2008
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2641
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2642 my @tags=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2643 my @locus_sequences=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2644 my %alivehash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2645
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2646 # print "IN fillAlignmentGaps\n";# <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2647 my %fillrecord = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2648
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2649 my $change = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2650 foreach my $tag (@$tagarray) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2651 #print "adding: $tag\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2652 push(@tags, $tag);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2653 if (exists $microsathash->{$tag}){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2654 my $micro = $microsathash->{$tag};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2655 my $orig_micro = $micro;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2656 ($micro, $fillrecord{$tag}) = fillgaps($micro, \@phases);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2657 $change = 1 if uc($micro) ne uc($orig_micro);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2658 $node_sequences_temp{$tag}=$micro if $microsathash->{$tag} ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2659 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2660 if (exists $nonmicrosathash->{$tag}){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2661 my $micro = $nonmicrosathash->{$tag};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2662 my $orig_micro = $micro;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2663 ($micro, $fillrecord{$tag}) = fillgaps($micro, \@phases);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2664 $change = 1 if uc($micro) ne uc($orig_micro);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2665 $node_sequences_temp{$tag}=$micro if $nonmicrosathash->{$tag} ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2666 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2667
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2668 if (exists $alignment->{$tag}){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2669 my $micro = $alignment->{$tag};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2670 my $orig_micro = $micro;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2671 ($micro, $fillrecord{$tag}) = fillgaps($micro, \@phases);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2672 $change = 1 if uc($micro) ne uc($orig_micro);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2673 $node_alignments_temp{$tag}=$micro if $alignment->{$tag} ne "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2674 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2675
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2676 #print "adding to node_sequences: $tag = ",$node_sequences_temp{$tag},"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2677 #print "adding to node_alignments: $tag = ",$node_alignments_temp{$tag},"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2678 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2679
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2680
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2681 my %node_sequences=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2682 my %node_alignments =(); #NEW, Nov 28 2008
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2683 foreach my $tag (@$tagarray) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2684 $node_sequences{$tag} = join ".",split(/\s*/,$node_sequences_temp{$tag});
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2685 $node_alignments{$tag} = join ".",split(/\s*/,$node_alignments_temp{$tag});
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2686 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2687 # print "\n", "#" x 50, "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2688 foreach my $tag (@tags){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2689 # print "$tag: $alignment->{$tag} = $node_alignments{$tag}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2690 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2691 # print "\n", "#" x 50, "\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2692 # print "change = $change\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2693 #<STDIN> if $concatalignment=~/\-/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2694
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2695 # <STDIN> if $printer == 1 && $concatalignment =~ /\-/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2696
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2697 return 0, "NULL","NULL","NULL", "NULL", "NULL" if $change == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2698
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2699 my ($nodes_arr, $branches_hash) = get_nodes($tree);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2700 my @nodes=@$nodes_arr;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2701 # print "recieved nodes = @nodes\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2702
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2703
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2704 #POPULATE branches_hash WITH INFORMATION ABOUT LIVESTATUS
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2705 foreach my $keys (@nodes){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2706 my @pair = @$keys;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2707 my $joint = "(".join(", ",@pair).")";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2708 my $copykey = join "", @pair;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2709 $copykey =~ s/[\W ]+//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2710 # print "for node: $keys, copykey = $copykey and joint = $joint\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2711 my $livestatus = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2712 foreach my $copy (split(/\s*/,$copykey)){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2713 $livestatus = 0 if !exists $alivehash{$copy};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2714 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2715 $alivehash{$joint} = $joint if !exists $alivehash{$joint} && $livestatus == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2716 # print "alivehash = $alivehash{$joint}\n" if exists $alivehash{$joint} && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2717 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2718
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2719
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2720
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2721 @nodes = reverse(@nodes); #1 THIS IS IN ORDER TO GO THROUGH THE TREE FROM LEAVES TO ROOT.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2722
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2723 my @mutations_array=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2724
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2725 my $joint = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2726 foreach my $node (@nodes){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2727 my @pair = @$node;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2728 # print "now in the nodes for loop, pair = @pair\n and sequences=\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2729 $joint = "(".join(", ",@pair).")";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2730 # print "joint = $joint \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2731 my @pair_sequences=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2732
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2733 foreach my $tag (@pair){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2734 # print "tag = $tag: " if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2735 # print $node_alignments{$tag},"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2736 push @pair_sequences, $node_alignments{$tag};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2737 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2738 # print "fillgap\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2739 my ($compared, $substitutions_list) = base_by_base_simple($motif,\@pair_sequences, scalar(@pair_sequences), @pair, $joint);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2740 $node_alignments{$joint}=$compared;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2741 push( @mutations_array,split(/:/,$substitutions_list));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2742 # print "newly added to node_sequences: $node_alignments{$joint} and list of mutations = @mutations_array\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2743 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2744 # print "now sending for analyze_mutations: mutation_array=@mutations_array, nodes=@nodes, branches_hash=$branches_hash, alignment=$alignment, tags=@tags, alivehash=%alivehash, node_sequences=\%node_sequences, microsatstarts=$microsatstarts, motif=$motif\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2745 # <STDIN> if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2746
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2747 my $analayzed_mutations = analyze_mutations(\@mutations_array, \@nodes, $branches_hash, $alignment, \@tags, \%alivehash, \%node_sequences, $microsatstarts, $motif);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2748
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2749 # print "returningt: ", $analayzed_mutations, \@nodes,"\n" if scalar @mutations_array > 0;;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2750 # print "returningy: NULL, NULL, NULL " if scalar @mutations_array == 0 && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2751 # print "final node alignment after filling for $joint= " if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2752 # print "$node_alignments{$joint}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2753
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2754
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2755 return 1, $analayzed_mutations, \@nodes, $branches_hash, \%alivehash, $node_alignments{$joint} if scalar @mutations_array > 0 ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2756 return 1, "NULL","NULL","NULL", "NULL", "NULL" if scalar @mutations_array == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2757 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2758
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2759
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2760
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2761 sub add_mutation{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2762 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2763 # print "IN SUBROUTUNE add_mutation.. information received = @_\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2764 my ($i , $bite, $to, $from) = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2765 # print "bite = $bite.. all received info = ",join("^", @_),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2766 # print "to=$to\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2767 # print "tis split = ",join(" and ",split(/!/,$to)),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2768 my @toields = split "!",$to;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2769 # print "toilds = @toields\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2770 my @mutations=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2771
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2772 foreach my $toield (@toields){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2773 my @toinfo=split(":",$toield);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2774 # print " at toinfo=@toinfo \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2775 next if $toinfo[1] =~ /$from/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2776 my @mutation = @toinfo if $toinfo[1] !~ /$from/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2777 # print "adding to mutaton list: ", join(",", "node=$mutation[0]","type=substitution" ,"position=$i", "from=$from", "to=$mutation[1]", "insertion=", "deletion="),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2778 push (@mutations, join("\t", "node=$mutation[0]","type=substitution" ,"position=$i", "from=$from", "to=$mutation[1]", "insertion=", "deletion="));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2779 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2780 return @mutations;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2781 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2782
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2783
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2784 sub add_bases{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2785
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2786 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2787 # print "IN SUBROUTUNE add_bases.. information received = @_\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2788 my ($optional0, $optional1, $pair0, $pair1,$joint) = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2789 my $total_list=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2790
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2791 my @total_list0=split(/!/,$optional0);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2792 my @total_list1=split(/!/,$optional1);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2793 my @all_list=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2794 my %total_hash0=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2795 foreach my $entry (@total_list0) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2796 $entry = uc $entry;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2797 $entry =~ /(\S+):(\S+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2798 $total_hash0{$2}=$1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2799 push @all_list, $2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2800 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2801
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2802 my %total_hash1=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2803 foreach my $entry (@total_list1) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2804 $entry = uc $entry;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2805 $entry =~ /(\S+):(\S+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2806 $total_hash1{$2}=$1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2807 push @all_list, $2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2808 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2809
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2810 my %alphabetical_hash=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2811 my @return_options=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2812
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2813 for my $i (0 ... $#all_list){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2814 my $alph = $all_list[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2815 if (exists $total_hash0{$alph} && exists $total_hash1{$alph}){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2816 push(@return_options, $joint.":".$alph);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2817 delete $total_hash0{$alph}; delete $total_hash1{$alph};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2818 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2819 if (exists $total_hash0{$alph} && !exists $total_hash1{$alph}){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2820 push(@return_options, $pair0.":".$alph);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2821 delete $total_hash0{$alph};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2822 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2823 if (!exists $total_hash0{$alph} && exists $total_hash1{$alph}){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2824 push(@return_options, $pair1.":".$alph);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2825 delete $total_hash1{$alph};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2826 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2827
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2828 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2829 # print "returning ",join "!",@return_options,"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2830 return join "!",@return_options;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2831
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2832 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2833
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2834
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2835 sub fillgaps{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2836 # print "IN fillgaps: @_\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2837 my ($micro, $phasesinput) = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2838 #print "in microsathash ,,.. micro = $micro\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2839 return $micro if $micro !~ /\-/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2840 my $orig_micro = $micro;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2841 my @phases = @$phasesinput;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2842
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2843 my %tested_patterns = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2844
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2845 foreach my $phase (@phases){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2846 # print "considering phase: $phase\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2847 my @phase_prefixes = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2848 my @prephase_left_contexts = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2849 my @prephase_right_contexts = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2850 my @pregapsize = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2851 my @prepostfilins = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2852
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2853 my @phase_suffixes;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2854 my @suffphase_left_contexts;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2855 my @suffphase_right_contexts;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2856 my @suffgapsize;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2857 my @suffpostfilins;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2858
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2859 my @postfilins = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2860 my $motifsize = length($phases[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2861
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2862 my $change = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2863
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2864 for my $u (0 ... $motifsize-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2865 my $concat = $phase.$phase.$phase.$phase;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2866 my @concatarr = split(/\s*/, $concat);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2867 my $l = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2868 while ($l < $u){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2869 shift @concatarr;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2870 $l++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2871 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2872 $concat = join ("", @concatarr);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2873
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2874 for my $t (0 ... $motifsize-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2875 for my $k (1 ... $motifsize-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2876 push @phase_prefixes, substr($concat, $motifsize+$t, $k);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2877 push @prephase_left_contexts, substr ($concat, $t, $motifsize);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2878 push @prephase_right_contexts, substr ($concat, $motifsize+$t+$k+($motifsize-$k), 1);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2879 push @pregapsize, $k;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2880 push @prepostfilins, substr($concat, $motifsize+$t+$k, ($motifsize-$k));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2881 # print "reading: $concat, t=$t, k=$k prefix: $prephase_left_contexts[$#prephase_left_contexts] $phase_prefixes[$#phase_prefixes] -x$pregapsize[$#pregapsize] $prephase_right_contexts[$#prephase_right_contexts]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2882 # print "phase_prefixes = $phase_prefixes[$#phase_prefixes]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2883 # print "prephase_left_contexts = $prephase_left_contexts[$#prephase_left_contexts]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2884 # print "prephase_right_contexts = $prephase_right_contexts[$#prephase_right_contexts]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2885 # print "pregapsize = $pregapsize[$#pregapsize]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2886 # print "prepostfilins = $prepostfilins[$#prepostfilins]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2887 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2888 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2889 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2890
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2891 # print "looking if $micro =~ /($phase\-{$motifsize})/i || $micro =~ /^(\-{$motifsize,}$phase)/i\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2892 if ($micro =~ /($phase\-{$motifsize,})$/i || $micro =~ /^(\-{$motifsize,}$phase)/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2893 # print "micro: $micro needs further gap removal: $1\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2894 while ($micro =~ /$phase(\-{$motifsize,})$/i || $micro =~ /^(\-{$motifsize,})$phase/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2895 # print "micro: $micro needs further gap removal: $1\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2896
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2897 # print "phase being considered = $phase\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2898 my $num = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2899 $num = $micro =~ s/$phase\-{$motifsize}/$phase$phase/gi if $micro =~ /$phase\-{$motifsize,}/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2900 $num = $micro =~ s/\-{$motifsize}$phase/$phase$phase/gi if $micro =~ /\-{$motifsize,}$phase/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2901 # print "num = $num\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2902 $change = 1 if $num == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2903 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2904 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2905
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2906 elsif ($micro =~ /(($phase)+)\-{$motifsize,}(($phase)+)/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2907 while ($micro =~ /(($phase)+)\-{$motifsize,}(($phase)+)/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2908 # print "checking lengths of $1 and $3 for $micro... \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2909 my $num = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2910 if (length($1) >= length($3)){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2911 # print "$micro matches (($phase)+)\-{$motifsize,}(($phase)+) = $1, >= , $3 \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2912 $num = $micro =~ s/$phase\-{$motifsize}/$phase$phase/gi ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2913 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2914 if (length($1) < length($3)){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2915 # print "$micro matches (($phase)+)\-{$motifsize,}(($phase)+) = $1, < , $3 \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2916 $num = $micro =~ s/\-{$motifsize}$phase/$phase$phase/gi ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2917 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2918 # print "micro changed to $micro\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2919 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2920 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2921 elsif ($micro =~ /([A-Z]+)\-{$motifsize,}(($phase)+)/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2922 while ($micro =~ /([A-Z]+)\-{$motifsize,}(($phase)+)/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2923 # print "$micro matches ([A-Z]+)\-{$motifsize}(($phase)+) = 1=$1, - , 3=$3 \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2924 my $num = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2925 $num = $micro =~ s/\-{$motifsize}$phase/$phase$phase/gi ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2926 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2927 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2928 elsif ($micro =~ /(($phase)+)\-{$motifsize,}([A-Z]+)/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2929 while ($micro =~ /(($phase)+)\-{$motifsize,}([A-Z]+)/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2930 # print "$micro matches (($phase)+)\-{$motifsize,}([A-Z]+) = 1=$1, - , 3=$3 \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2931 my $num = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2932 $num = $micro =~ s/$phase\-{$motifsize}/$phase$phase/gi ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2933 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2934 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2935
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2936 # print "$orig_micro to $micro\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2937
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2938 #s <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2939
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2940 for my $h (0 ... $#phase_prefixes){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2941 # print "searching using prefix : $prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2942 my $pattern = $prephase_left_contexts[$h].$phase_prefixes[$h].$pregapsize[$h].$prephase_right_contexts[$h];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2943 # print "returning orig_micro = $orig_micro, micro = $micro \n" if exists $tested_patterns{$pattern};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2944 if ($micro =~ /$prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h]/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2945 return $orig_micro if exists $tested_patterns{$pattern};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2946 while ($micro =~ /($prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h])/i){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2947 $tested_patterns{$pattern} = $pattern;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2948 # print "micro: $micro needs further gap removal: $1\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2949
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2950 # print "prefix being considered = $phase_prefixes[$h]\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2951 my $num = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2952 $num = ($micro =~ s/$prephase_left_contexts[$h]$phase_prefixes[$h]\-{$pregapsize[$h]}$prephase_right_contexts[$h]/$prephase_left_contexts[$h]$phase_prefixes[$h]$prepostfilins[$h]$prephase_right_contexts[$h]/gi) ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2953 # print "num = $num, micro = $micro\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2954 $change = 1 if $num == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2955
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2956 return $orig_micro if $num > 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2957 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2958 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2959
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2960 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2961 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2962 return $orig_micro if length($micro) != length($orig_micro);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2963 return $micro;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2964 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2965
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2966 sub selectMutationArray{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2967 my $printer =0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2968
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2969 my $oldmutspt = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2970 my $newmutspt = $_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2971 my $tagstringpt = $_[2];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2972 my $alivehashpt = $_[3];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2973 my $alignmentpt = $_[4];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2974 my $motif = $_[5];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2975
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2976 my @alivehasharr=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2977
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2978 my @tags = @$tagstringpt;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2979 my $alignmentln = length($alignmentpt->{$tags[0]});
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2980
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2981 foreach my $key (keys %$alivehashpt) { push @alivehasharr, $key; }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2982
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2983 my %newside = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2984 my %oldside = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2985 my %newmuts = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2986
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2987 my %commons = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2988 my %olds = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2989 foreach my $old (@$oldmutspt){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2990 $olds{$old} = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2991 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2992 foreach my $new (@$newmutspt){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2993 $commons{$new} = 1 if exists $olds{$new};;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2994 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2995
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2996
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2997 foreach my $pos ( 0 ... $alignmentln){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2998 #print "pos = $pos\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
2999 my $newyes = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3000 foreach my $mut (@$newmutspt){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3001 $newmuts{$mut} = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3002 chomp $mut;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3003 $newyes++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3004 $mut =~ s/=\t/= \t/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3005 $mut =~ s/=$/= /g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3006
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3007 $mut =~ /node=([A-Z\(\), ]+)\stype=([a-zA-Z ]+)\sposition=([0-9 ]+)\sfrom=([a-zA-Z\- ]+)\sto=([a-zA-Z\- ]+)\sinsertion=([a-zA-Z\- ]+)\sdeletion=([a-zA-Z\- ]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3008 my $node = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3009 next if $3 != $pos;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3010 # print "new mut = $mut\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3011 # print "node = $node, pos = $3 ... and alivehasharr = >@alivehasharr<\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3012 my $alivenode = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3013 foreach my $key (@alivehasharr){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3014 $alivenode = 1 if $key =~ /$node/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3015 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3016 # next if $alivenode == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3017 my $indel_type = " ";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3018 if ($2 eq "insertion" || $2 eq "deletion"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3019 my $thisindel = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3020 $thisindel = $6 if $2 eq "insertion";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3021 $thisindel = $7 if $2 eq "deletion";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3022
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3023 $indel_type = "i".checkIndelType($node, $thisindel, $motif,$alignmentpt,$3, $2) if $2 eq "insertion";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3024 $indel_type = "d".checkIndelType($node, $thisindel, $motif,$alignmentpt, $3, $2) if $2 eq "deletion";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3025 $indel_type = $indel_type."f" if $indel_type =~ /mot/ && length($thisindel) >= length($motif);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3026 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3027 # print "indeltype = $indel_type\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3028 my $added = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3029
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3030 if (exists $newside{$pos} && $indel_type =~ /[a-z]+/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3031 # print "we have a preexisting one for $pos\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3032 my @preexisting = @{$newside{$pos}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3033 foreach my $pre (@preexisting){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3034 # print "looking at $pre\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3035 next if $pre !~ /node=$node/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3036 next if $pre !~ /indeltype=([a-z]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3037 my $currtype = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3038
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3039 if ($currtype =~ /inon/ && $indel_type =~ /dmot/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3040 delete $newside{$pos};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3041 push @{$newside{$pos}}, $pre;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3042 $added = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3043 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3044 if ($currtype =~ /dnon/ && $indel_type =~ /imot/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3045 delete $newside{$pos};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3046 push @{$newside{$pos}}, $pre;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3047 $added = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3048 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3049 if ($currtype =~ /dmot/ && $indel_type =~ /inon/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3050 delete $newside{$pos};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3051 push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3052 $added = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3053 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3054 if ($currtype =~ /imot/ && $indel_type =~ /dnon/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3055 delete $newside{$pos};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3056 push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3057 $added = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3058 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3059 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3060 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3061 # print "added = $added\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3062 push @{$newside{$pos}}, $mut."\tindeltype=$indel_type" if $added == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3063 # print "for new pos,: $pos we have: @{$newside{$pos}}\n " if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3064 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3065 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3066
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3067 foreach my $pos ( 0 ... $alignmentln){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3068 my $oldyes = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3069 foreach my $mut (@$oldmutspt){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3070 chomp $mut;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3071 $oldyes++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3072 $mut =~ s/=\t/= \t/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3073 $mut =~ s/=$/= /g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3074 $mut =~ /node=([A-Z\(\), ]+)\ttype=([a-zA-Z ]+)\tposition=([0-9 ]+)\tfrom=([a-zA-Z\- ]+)\tto=([a-zA-Z\- ]+)\tinsertion=([a-zA-Z\- ]+)\tdeletion=([a-zA-Z\- ]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3075 my $node = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3076 next if $3 != $pos;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3077 # print "old mut = $mut\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3078 my $alivenode = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3079 foreach my $key (@alivehasharr){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3080 $alivenode = 1 if $key =~ /$node/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3081 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3082 #next if $alivenode == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3083 my $indel_type = " ";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3084 if ($2 eq "insertion" || $2 eq "deletion"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3085 $indel_type = "i".checkIndelType($node, $6, $motif,$alignmentpt, $3, $2) if $2 eq "insertion";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3086 $indel_type = "d".checkIndelType($node, $7, $motif,$alignmentpt, $3, $2) if $2 eq "deletion";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3087 next if $indel_type =~/non/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3088 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3089 else{ next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3090
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3091 my $imp=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3092 $imp = 1 if $indel_type =~ /dmot/ && $alivenode == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3093 $imp = 1 if $indel_type =~ /imot/ && $alivenode == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3094
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3095
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3096 if (exists $newside{$pos} && $indel_type =~ /[a-z]+/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3097 my @preexisting = @{$newside{$pos}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3098 # print "we have a preexisting one for $pos: @preexisting\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3099 next if $imp == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3100
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3101 if (scalar(@preexisting) == 1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3102 my $foundmut = $preexisting[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3103 $foundmut=~ /node=([A-Z, \(\)]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3104 next if $1 eq $node;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3105
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3106 if (exists $oldside{$pos} || exists $commons{$foundmut}){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3107 # print "not replacing, but just adding\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3108 push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3109 push @{$oldside{$pos}}, $mut."\tindeltype=$indel_type";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3110 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3111 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3112
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3113 delete $newside{$pos};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3114 push @{$oldside{$pos}}, $mut."\tindeltype=$indel_type";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3115 push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3116 # print "now new one is : @{$newside{$pos}}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3117 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3118 # print "for pos: $pos: @{$newside{$pos}}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3119 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3120 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3121
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3122
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3123 my @news = @{$newside{$pos}} if exists $newside{$pos};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3124 # print "mut = $mut and news = @news\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3125 push @{$oldside{$pos}}, $mut."\tindeltype=$indel_type";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3126 push @{$newside{$pos}}, $mut."\tindeltype=$indel_type";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3127 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3128 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3129 # print "in the end, our collected mutations = \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3130 my @returnarr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3131 foreach my $key (keys %newside) {push @returnarr,@{$newside{$key}};}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3132 # print join("\n", @returnarr),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3133 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3134 return @returnarr;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3135
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3136 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3137
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3138
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3139 sub checkIndelType{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3140 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3141 my $node = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3142 my $indel = $_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3143 my $motif = $_[2];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3144 my $alignmentpt = $_[3];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3145 my $posit = $_[4];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3146 my $type = $_[5];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3147 my @phases =();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3148 my %prephases = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3149 my %postphases = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3150 #print "motif = $motif\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3151 # print "IN checkIndelType ... received: @_\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3152 my $concat = $motif.$motif.$motif.$motif;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3153 my $motiflength = length($motif);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3154
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3155 if ($motiflength > length ($indel)){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3156 return "non" if $motif !~ /$indel/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3157 return checkIndelType_ComplexAnalysis($node, $indel, $motif, $alignmentpt, $posit, $type);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3158 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3159
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3160 my $firstpass = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3161 for my $y (0 ... $motiflength-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3162 my $phase = substr($concat, $motiflength+$y, $motiflength);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3163 push @phases, $phase;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3164 $firstpass = 1 if $indel =~ /$phase/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3165 for my $k (0 ... length($motif)-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3166 # print "at: motiflength=$motiflength , y=$y , k=$k.. for pre: $motiflength+$y-$k and post: $motiflength+$y-$k+$motiflength in $concat\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3167 my $pre = substr($concat, $motiflength+$y-$k, $k );
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3168 my $post = substr($concat, $motiflength+$y+$motiflength, $k);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3169 # print "adding to phases : $phase - $pre and $post\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3170 push @{$prephases{$phase}} , $pre;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3171 push @{$postphases{$phase}} , $post;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3172 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3173
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3174 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3175 # print "firstpass 1= $firstpass\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3176 return "non" if $firstpass ==0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3177 $firstpass =0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3178
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3179 foreach my $phase (@phases){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3180 my @pres = @{$prephases{$phase}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3181 my @posts = @{$postphases{$phase}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3182
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3183 foreach my $pre (@pres){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3184 foreach my $post (@posts){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3185
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3186 $firstpass = 1 if $indel =~ /($pre)?($phase)+($post)?/i && length($indel) > (3 * length($motif));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3187 $firstpass = 1 if $indel =~ /^($pre)?($phase)+($post)?$/i && length($indel) < (3 * length($motif));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3188 # print "matched here : ($pre)?($phase)+($post)?\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3189 last if $firstpass == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3190 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3191 last if $firstpass == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3192 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3193 last if $firstpass == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3194 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3195 # print "firstpass 2= $firstpass\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3196 return "non" if $firstpass ==0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3197 return "mot" if $firstpass ==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3198 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3199
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3200
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3201 sub checkIndelType_ComplexAnalysis{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3202 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3203 my $node = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3204 my $indel = $_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3205 my $motif = $_[2];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3206 my $alignmentpt = $_[3];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3207 my $pos = $_[4];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3208 my $type = $_[5];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3209 my @speciesinvolved = $node =~ /[A-Z]+/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3210
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3211 my @seqs = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3212 my $residualseq = length($motif) - length($indel);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3213 # print "IN COMPLEX ANALYSIS ... received: @_ .... speciesinvolved = @speciesinvolved\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3214 # print "we have position = $pos, sseq = $alignmentpt->{$speciesinvolved[0]}\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3215 # print "residualseq = $residualseq\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3216 # print "pos=$pos... got: @_\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3217 foreach my $sp (@speciesinvolved){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3218 my $spseq = $alignmentpt->{$sp};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3219 #print "orig spseq = $spseq\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3220 my $subseq = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3221
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3222 if ($type eq "deletion"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3223 my @indelparts = split(/\s*/,$indel);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3224 my @seqparts = split(/\s*/,$spseq);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3225
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3226 for my $p ($pos ... $pos+length($indel)-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3227 $seqparts[$p] = shift @indelparts;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3228 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3229 $spseq = join("",@seqparts);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3230 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3231 #print "mod spseq = $spseq\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3232 # $spseq=~ s/\-//g if $type !~ /deletion/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3233 # print "substr($spseq, $pos-($residualseq), length($indel)+$residualseq+$residualseq)\n" if $pos > 0 && $pos < (length($spseq) - length($motif)) && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3234 # print "substr($spseq, 0, length($indel)+$residualseq)\n" if $pos == 0 && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3235 # print "substr($spseq, $pos - $residualseq, length($indel)+$residualseq)\n" if $pos >= (length($spseq) - length($motif)) && $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3236
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3237 $subseq = substr($spseq, $pos-($residualseq), length($indel)+$residualseq+$residualseq) if $pos > 0 && $pos < (length($spseq) - length($motif)) ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3238 $subseq = substr($spseq, 0, length($indel)+$residualseq) if $pos == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3239 $subseq = substr($spseq, $pos - $residualseq, length($indel)+$residualseq) if $pos >= (length($spseq) - length($motif)) ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3240 # print "spseq = $spseq . subseq=$subseq . type = $type\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3241 #<STDIN> if $subseq !~ /[a-z\-]/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3242 $subseq =~ s/\-/$indel/g if $type =~ /insertion/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3243 push @seqs, $subseq;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3244 # print "seqs = @seqs\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3245 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3246 return "non" if checkIfSeqsIdentical(@seqs) eq "NO";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3247 # print "checking for $seqs[0] \n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3248
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3249 my @phases =();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3250 my %prephases = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3251 my %postphases = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3252 my $concat = $motif.$motif.$motif.$motif;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3253 my $motiflength = length($motif);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3254
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3255 my $firstpass = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3256
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3257 for my $y (0 ... $motiflength-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3258 my $phase = substr($concat, $motiflength+$y, $motiflength);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3259 push @phases, $phase;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3260 $firstpass = 1 if $seqs[0] =~ /$phase/i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3261 for my $k (0 ... length($motif)-1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3262 my $pre = substr($concat, $motiflength+$y-$k, $k );
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3263 my $post = substr($concat, $motiflength+$y+$motiflength, $k);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3264 # print "adding to phases : $phase - $pre and $post\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3265 push @{$prephases{$phase}} , $pre;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3266 push @{$postphases{$phase}} , $post;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3267 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3268
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3269 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3270 # print "firstpass 1= $firstpass.. also, res-d = ",(length($seqs[0]))%(length($motif)),"\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3271 return "non" if $firstpass ==0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3272 $firstpass =0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3273 foreach my $phase (@phases){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3274
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3275 $firstpass = 1 if $seqs[0] =~ /^($phase)+$/i && ((length($seqs[0]))%(length($motif))) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3276
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3277 if (((length($seqs[0]))%(length($motif))) != 0){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3278 my @pres = @{$prephases{$phase}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3279 my @posts = @{$postphases{$phase}};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3280 foreach my $pre (@pres){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3281 foreach my $post (@posts){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3282 next if $pre !~ /\S/ && $post !~ /\S/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3283 $firstpass = 1 if ($seqs[0] =~ /^($pre)($phase)+($post)$/i || $seqs[0] =~ /^($pre)($phase)+$/i || $seqs[0] =~ /^($phase)+($post)$/i);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3284 # print "caught with $pre $phase $post\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3285 last if $firstpass == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3286 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3287 last if $firstpass == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3288 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3289 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3290
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3291 last if $firstpass == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3292 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3293
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3294 #print "indel = $indel.. motif = $motif.. firstpass 2= mot\n" if $firstpass ==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3295 #print "indel = $indel.. motif = $motif.. firstpass 2= non\n" if $firstpass ==0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3296 #<STDIN>;# if $firstpass ==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3297 return "non" if $firstpass ==0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3298 return "mot" if $firstpass ==1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3299
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3300 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3301
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3302 sub checkIfSeqsIdentical{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3303 my @seqs = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3304 my $identical = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3305
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3306 for my $j (1 ... $#seqs){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3307 $identical = 0 if uc($seqs[0]) ne uc($seqs[$j]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3308 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3309 return "NO" if $identical == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3310 return "YES" if $identical == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3311
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3312 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3313
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3314 sub summarizeMutations{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3315 my $mutspt = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3316 my @muts = @$mutspt;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3317 my $tree = $_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3318
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3319 my @returnarr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3320
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3321 for (1 ... 38){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3322 push @returnarr, "NA";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3323 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3324 push @returnarr, "NULL";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3325 return @returnarr if $tree eq "NULL" || scalar(@muts) < 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3326
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3327
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3328 my @bspecies = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3329 my @dspecies = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3330 my $treecopy = $tree;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3331 $treecopy =~ s/[\(\)]//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3332 my @treeparts = split(/[\.,]+/, $treecopy);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3333
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3334 for my $part (@treeparts){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3335 if ($part =~ /\+/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3336 $part =~ s/\+//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3337 #my @sp = split(/\s*/, $part);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3338 #foreach my $p (@sp) {push @bspecies, $p;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3339 push @bspecies, $part;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3340 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3341 if ($part =~ /\-/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3342 $part =~ s/\-//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3343 #my @sp = split(/\s*/, $part);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3344 #foreach my $p (@sp) {push @dspecies, $p;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3345 push @dspecies, $part;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3346 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3347
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3348 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3349 #print "-------------------------------------------------------\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3350
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3351 my ($insertions, $deletions, $motinsertions, $motinsertionsf, $motdeletions, $motdeletionsf, $noninsertions, $nondeletions) = (0,0,0,0,0,0,0,0);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3352 my ($binsertions, $bdeletions, $bmotinsertions,$bmotinsertionsf, $bmotdeletions, $bmotdeletionsf, $bnoninsertions, $bnondeletions) = (0,0,0,0,0,0,0,0);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3353 my ($dinsertions, $ddeletions, $dmotinsertions,$dmotinsertionsf, $dmotdeletions, $dmotdeletionsf, $dnoninsertions, $dnondeletions) = (0,0,0,0,0,0,0,0);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3354 my ($ninsertions, $ndeletions, $nmotinsertions,$nmotinsertionsf, $nmotdeletions, $nmotdeletionsf, $nnoninsertions, $nnondeletions) = (0,0,0,0,0,0,0,0);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3355 my ($substitutions, $bsubstitutions, $dsubstitutions, $nsubstitutions, $indels, $subs) = (0,0,0,0,"NA","NA");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3356
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3357 my @insertionsarr = (" ");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3358 my @deletionsarr = (" ");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3359
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3360 my @substitutionsarr = (" ");
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3361
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3362
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3363 foreach my $mut (@muts){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3364 # print "mut = $mut\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3365 chomp $mut;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3366 $mut =~ s/=\t/= /g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3367 $mut =~ s/=$/= /g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3368 my %mhash = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3369 my @mields = split(/\t/,$mut);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3370
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3371 foreach my $m (@mields){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3372 my @fields = split(/=/,$m);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3373 next if $fields[1] eq " ";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3374 $mhash{$fields[0]} = $fields[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3375 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3376
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3377 my $myutype = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3378 my $decided = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3379
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3380 my $localnode = $mhash{"node"};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3381 $localnode =~ s/[\(\)\. ,]//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3382
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3383
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3384 foreach my $s (@bspecies){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3385 if ($localnode eq $s) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3386 $decided = 1; $myutype = "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3387 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3388 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3389
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3390 foreach my $s (@dspecies){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3391 if ($localnode eq $s) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3392 $decided = 1; $myutype = "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3393 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3394 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3395
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3396 $myutype = "n" if $decided != 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3397
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3398
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3399 # print "tree=$tree, birth species=@bspecies, death species=@dspecies, node=$mhash{node} .. myutype=$myutype .. \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3400 # <STDIN> if $mhash{"type"} eq "insertion" && $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3401
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3402
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3403 if ($mhash{"type"} eq "substitution"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3404 $substitutions++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3405 $bsubstitutions++ if $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3406 $dsubstitutions++ if $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3407 $nsubstitutions++ if $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3408 # print "substitution: from= $mhash{from}, to = $mhash{to}, and type = myutype\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3409 push @substitutionsarr, "$mhash{position}:".$mhash{"from"}.">".$mhash{"to"} if $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3410 push @substitutionsarr, "$mhash{position}:".$mhash{"from"}.">".$mhash{"to"} if $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3411 push @substitutionsarr, "$mhash{position}:".$mhash{"from"}.">".$mhash{"to"} if $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3412 # print "substitutionsarr = @substitutionsarr\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3413 # <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3414 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3415 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3416 #print "tree=$tree, birth species=@bspecies, death species=@dspecies, node=$mhash{node} .. myutype=$myutype .. indeltype=$mhash{indeltype}\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3417 if ($mhash{"type"} eq "deletion"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3418 $deletions++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3419
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3420 $motdeletions++ if $mhash{"indeltype"} =~ /dmot/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3421 $motdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3422
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3423 $nondeletions++ if $mhash{"indeltype"} =~ /dnon/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3424
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3425 $bdeletions++ if $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3426 $ddeletions++ if $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3427 $ndeletions++ if $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3428
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3429 $bmotdeletions++ if $mhash{"indeltype"} =~ /dmot/ && $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3430 $bmotdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/ && $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3431 $bnondeletions++ if $mhash{"indeltype"} =~ /dnon/ && $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3432
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3433 $dmotdeletions++ if $mhash{"indeltype"} =~ /dmot/ && $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3434 $dmotdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/ && $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3435 $dnondeletions++ if $mhash{"indeltype"} =~ /dnon/ && $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3436
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3437 $nmotdeletions++ if $mhash{"indeltype"} =~ /dmot/ && $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3438 $nmotdeletionsf++ if $mhash{"indeltype"} =~ /dmotf/ && $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3439 $nnondeletions++ if $mhash{"indeltype"} =~ /dnon/ && $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3440
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3441 push @deletionsarr, "$mhash{indeltype}:$mhash{position}:".$mhash{"deletion"} if $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3442 push @deletionsarr, "$mhash{indeltype}:$mhash{position}:".$mhash{"deletion"} if $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3443 push @deletionsarr, "$mhash{indeltype}:$mhash{position}:".$mhash{"deletion"} if $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3444 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3445
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3446 if ($mhash{"type"} eq "insertion"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3447 $insertions++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3448
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3449 $motinsertions++ if $mhash{"indeltype"} =~ /imot/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3450 $motinsertionsf++ if $mhash{"indeltype"} =~ /imotf/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3451 $noninsertions++ if $mhash{"indeltype"} =~ /inon/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3452
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3453 $binsertions++ if $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3454 $dinsertions++ if $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3455 $ninsertions++ if $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3456
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3457 $bmotinsertions++ if $mhash{"indeltype"} =~ /imot/ && $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3458 $bmotinsertionsf++ if $mhash{"indeltype"} =~ /imotf/ && $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3459 $bnoninsertions++ if $mhash{"indeltype"} =~ /inon/ && $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3460
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3461 $dmotinsertions++ if $mhash{"indeltype"} =~ /imot/ && $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3462 $dmotinsertionsf++ if $mhash{"indeltype"} =~ /imotf/ && $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3463 $dnoninsertions++ if $mhash{"indeltype"} =~ /inon/ && $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3464
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3465 $nmotinsertions++ if $mhash{"indeltype"} =~ /imot/ && $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3466 $nmotinsertionsf++ if $mhash{"indeltype"} =~ /imotf/ && $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3467 $nnoninsertions++ if $mhash{"indeltype"} =~ /inon/ && $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3468
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3469 push @insertionsarr, "$mhash{indeltype}:$mhash{position}:".$mhash{"insertion"} if $myutype eq "b";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3470 push @insertionsarr, "$mhash{indeltype}:$mhash{position}:".$mhash{"insertion"} if $myutype eq "d";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3471 push @insertionsarr, "$mhash{indeltype}:$mhash{position}:".$mhash{"insertion"} if $myutype eq "n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3472
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3473 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3474 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3475 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3476
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3477
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3478
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3479 $indels = "ins=".join(",",@insertionsarr).";dels=".join(",",@deletionsarr) if scalar(@insertionsarr) > 1 || scalar(@deletionsarr) > 1 ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3480 $subs = join(",",@substitutionsarr) if scalar(@substitutionsarr) > 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3481 $indels =~ s/ //g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3482 $subs =~ s/ //g ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3483
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3484 #print "indels = $indels, subs=$subs\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3485 ##<STDIN> if $indels =~ /[a-zA-Z0-9]/ || $subs =~ /[a-zA-Z0-9]/ ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3486 #print "tree = $tree, indels = $indels, subs = $subs, bspecies = @bspecies, dspecies = @dspecies \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3487 my @returnarray = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3488
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3489 push (@returnarray, $indels, $subs) ;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3490
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3491 push @returnarray, $tree;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3492
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3493 my @copy = @returnarray;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3494 #print "\n\nreturnarray = @returnarray ... binsertions=$binsertions dinsertions=$dinsertions bsubstitutions=$bsubstitutions dsubstitutions=$dsubstitutions\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3495 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3496 return (@returnarray);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3497
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3498 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3499
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3500 sub selectBetterTree{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3501 my $printer = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3502 my $treestudy = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3503 my $alt = $_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3504 my $mutspt = $_[2];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3505 my @muts = @$mutspt;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3506 my @trees = (); my @alternatetrees=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3507
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3508 @trees = split(/\|/,$treestudy) if $treestudy =~ /\|/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3509 @alternatetrees = split(/[\|;]/,$alt) if $alt =~ /[\|;\(\)]/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3510
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3511 $trees[0] = $treestudy if $treestudy !~ /\|/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3512 $alternatetrees[0] = $alt if $alt !~ /[\|;\(\)]/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3513
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3514 my @alltrees = (@trees, @alternatetrees);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3515 # push(@alltrees,@alternatetrees);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3516
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3517 my %mutspecies = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3518 # print "IN selectBetterTree..treestudy=$treestudy. alt=$alt. for: @_. trees=@trees<. alternatetrees=@alternatetrees\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3519 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3520 foreach my $mut (@muts){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3521 # print colored ['green'],"mut = $mut\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3522 $mut =~ /node=([A-Z,\(\) ]+)/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3523 my $node = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3524 $node =~s/[,\(\) ]+//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3525 my @indivspecies = $node =~ /[A-Z]+/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3526 #print "adding node: $node\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3527 $mutspecies{$node} = $node;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3528
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3529 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3530
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3531 my @treerecords = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3532 my $treecount = -1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3533 foreach my $tree (@alltrees){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3534 # print "checking with tree $tree\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3535 $treecount++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3536 $treerecords[$treecount] = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3537 my @indivspecies = ($tree =~ /[A-Z]+/g);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3538 # print "indivspecies=@indivspecies\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3539 foreach my $species (@indivspecies){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3540 # print "checkin if exists species: $species\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3541 $treerecords[$treecount]+=2 if exists $mutspecies{$species} && $mutspecies{$species} !~ /indeltype=[a-z]mot/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3542 $treerecords[$treecount]+=1.5 if exists $mutspecies{$species} && $mutspecies{$species} =~ /indeltype=[a-z]mot/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3543 $treerecords[$treecount]-- if !exists $mutspecies{$species};
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3544 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3545 # print "for tree $tree, our treecount = $treerecords[$treecount]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3546 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3547
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3548 my @best_tree = array_largest_number_arrayPosition(@treerecords);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3549 # print "treerecords = @treerecords. hence, best tree = @best_tree = $alltrees[$best_tree[0]], $treerecords[$best_tree[0]]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3550 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3551 return ($alltrees[$best_tree[0]], $treerecords[$best_tree[0]]) if scalar(@best_tree) == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3552 # print "best_tree[0] = $best_tree[0], and treerecords = $treerecords[$best_tree[0]]\n" if $printer == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3553 return ("NULL", -1) if $treerecords[$best_tree[0]] < 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3554 my $rando = int(rand($#trees));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3555 return ($alltrees[$rando], $treerecords[$rando]) if scalar(@best_tree) > 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3556
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3557 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3558
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3559
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3560
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3561
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3562 sub load_sameHash{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3563 #my $g = %$_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3564 $sameHash{"CAGT"}="AGTC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3565 $sameHash{"ATGA"}="AATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3566 $sameHash{"CAAC"}="AACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3567 $sameHash{"GGAA"}="AAGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3568 $sameHash{"TAAG"}="AAGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3569 $sameHash{"CGAG"}="AGCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3570 $sameHash{"TAGG"}="AGGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3571 $sameHash{"GCAG"}="AGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3572 $sameHash{"TAGA"}="ATAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3573 $sameHash{"TGA"}="ATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3574 $sameHash{"CAAG"}="AAGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3575 $sameHash{"CTAA"}="AACT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3576 $sameHash{"CAAT"}="AATC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3577 $sameHash{"GTAG"}="AGGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3578 $sameHash{"GAAG"}="AAGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3579 $sameHash{"CGA"}="ACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3580 $sameHash{"GTAA"}="AAGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3581 $sameHash{"ACAA"}="AAAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3582 $sameHash{"GCGG"}="GGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3583 $sameHash{"ATCA"}="AATC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3584 $sameHash{"TAAC"}="AACT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3585 $sameHash{"GGCA"}="AGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3586 $sameHash{"TGAG"}="AGTG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3587 $sameHash{"AACA"}="AAAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3588 $sameHash{"GAGC"}="AGCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3589 $sameHash{"ACCA"}="AACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3590 $sameHash{"TGAA"}="AATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3591 $sameHash{"ACA"}="AAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3592 $sameHash{"GAAC"}="AACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3593 $sameHash{"GCA"}="AGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3594 $sameHash{"CCAC"}="ACCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3595 $sameHash{"CATA"}="ATAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3596 $sameHash{"CAC"}="ACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3597 $sameHash{"TACA"}="ATAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3598 $sameHash{"GGAC"}="ACGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3599 $sameHash{"AGA"}="AAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3600 $sameHash{"ATAA"}="AAAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3601 $sameHash{"CA"}="AC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3602 $sameHash{"CCCA"}="ACCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3603 $sameHash{"TCAA"}="AATC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3604 $sameHash{"CAGA"}="AGAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3605 $sameHash{"AATA"}="AAAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3606 $sameHash{"CCA"}="ACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3607 $sameHash{"AGAA"}="AAAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3608 $sameHash{"AGTA"}="AAGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3609 $sameHash{"GACG"}="ACGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3610 $sameHash{"TCAG"}="AGTC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3611 $sameHash{"ACGA"}="AACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3612 $sameHash{"CGCA"}="ACGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3613 $sameHash{"GAGT"}="AGTG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3614 $sameHash{"GA"}="AG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3615 $sameHash{"TA"}="AT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3616 $sameHash{"TAA"}="AAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3617 $sameHash{"CAG"}="AGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3618 $sameHash{"GATA"}="ATAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3619 $sameHash{"GTA"}="AGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3620 $sameHash{"CCAA"}="AACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3621 $sameHash{"TAG"}="AGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3622 $sameHash{"CAAA"}="AAAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3623 $sameHash{"AAGA"}="AAAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3624 $sameHash{"CACG"}="ACGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3625 $sameHash{"GTCA"}="AGTC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3626 $sameHash{"GGA"}="AGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3627 $sameHash{"GGAT"}="ATGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3628 $sameHash{"CGGG"}="GGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3629 $sameHash{"CGGA"}="ACGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3630 $sameHash{"AGGA"}="AAGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3631 $sameHash{"TAAA"}="AAAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3632 $sameHash{"GAGA"}="AGAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3633 $sameHash{"ACTA"}="AACT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3634 $sameHash{"GCGA"}="AGCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3635 $sameHash{"CACA"}="ACAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3636 $sameHash{"AGAT"}="ATAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3637 $sameHash{"GAGG"}="AGGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3638 $sameHash{"CGAC"}="ACCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3639 $sameHash{"GGAG"}="AGGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3640 $sameHash{"GCCA"}="AGCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3641 $sameHash{"CCAG"}="AGCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3642 $sameHash{"GAAA"}="AAAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3643 $sameHash{"CAGG"}="AGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3644 $sameHash{"GAC"}="ACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3645 $sameHash{"CAA"}="AAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3646 $sameHash{"GACC"}="ACCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3647 $sameHash{"GGCG"}="GGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3648 $sameHash{"GGTA"}="AGGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3649 $sameHash{"AGCA"}="AAGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3650 $sameHash{"GATG"}="ATGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3651 $sameHash{"GTGA"}="AGTG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3652 $sameHash{"ACAG"}="AGAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3653 $sameHash{"CGG"}="GGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3654 $sameHash{"ATA"}="AAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3655 $sameHash{"GACA"}="AGAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3656 $sameHash{"GCAA"}="AAGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3657 $sameHash{"CAGC"}="AGCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3658 $sameHash{"GGGA"}="AGGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3659 $sameHash{"GAG"}="AGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3660 $sameHash{"ACAT"}="ATAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3661 $sameHash{"GAAT"}="AATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3662 $sameHash{"CACC"}="ACCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3663 $sameHash{"GAT"}="ATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3664 $sameHash{"GCG"}="GGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3665 $sameHash{"GCAC"}="ACGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3666 $sameHash{"GAA"}="AAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3667 $sameHash{"TGGA"}="ATGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3668 $sameHash{"CCGA"}="ACCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3669 $sameHash{"CGAA"}="AACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3670 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3671
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3672
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3673
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3674 sub load_revHash{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3675 $revHash{"CTGA"}="AGTC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3676 $revHash{"TCTT"}="AAAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3677 $revHash{"CTAG"}="AGCT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3678 $revHash{"GGTG"}="ACCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3679 $revHash{"GCC"}="GGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3680 $revHash{"GCTT"}="AAGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3681 $revHash{"GCGT"}="ACGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3682 $revHash{"GTTG"}="AACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3683 $revHash{"CTCC"}="AGGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3684 $revHash{"ATC"}="ATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3685 $revHash{"CGAT"}="ATCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3686 $revHash{"TTAA"}="AATT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3687 $revHash{"GTTC"}="AACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3688 $revHash{"CTGC"}="AGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3689 $revHash{"TCGA"}="ATCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3690 $revHash{"ATCT"}="ATAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3691 $revHash{"GGTT"}="AACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3692 $revHash{"CTTA"}="AAGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3693 $revHash{"TGGC"}="AGCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3694 $revHash{"CCG"}="GGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3695 $revHash{"CGGC"}="GGCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3696 $revHash{"TTAG"}="AACT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3697 $revHash{"GTG"}="ACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3698 $revHash{"CTTT"}="AAAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3699 $revHash{"TGCA"}="ATGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3700 $revHash{"CGCT"}="AGCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3701 $revHash{"TTCC"}="AAGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3702 $revHash{"CT"}="AG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3703 $revHash{"C"}="G";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3704 $revHash{"CTCT"}="AGAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3705 $revHash{"ACTT"}="AAGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3706 $revHash{"GGTC"}="ACCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3707 $revHash{"ATTC"}="AATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3708 $revHash{"GGGT"}="ACCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3709 $revHash{"CCTA"}="AGGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3710 $revHash{"CGCG"}="GCGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3711 $revHash{"GTGT"}="ACAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3712 $revHash{"GCCC"}="GGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3713 $revHash{"GTCG"}="ACCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3714 $revHash{"TCCC"}="AGGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3715 $revHash{"TTCA"}="AATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3716 $revHash{"AGTT"}="AACT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3717 $revHash{"CCCT"}="AGGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3718 $revHash{"CCGC"}="GGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3719 $revHash{"CTT"}="AAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3720 $revHash{"TTGG"}="AACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3721 $revHash{"ATT"}="AAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3722 $revHash{"TAGC"}="AGCT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3723 $revHash{"ACTG"}="AGTC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3724 $revHash{"TCAC"}="AGTG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3725 $revHash{"CTGT"}="AGAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3726 $revHash{"TGTG"}="ACAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3727 $revHash{"ATCC"}="ATGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3728 $revHash{"GTGG"}="ACCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3729 $revHash{"TGGG"}="ACCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3730 $revHash{"TCGG"}="ACCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3731 $revHash{"CGGT"}="ACCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3732 $revHash{"GCTC"}="AGCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3733 $revHash{"TACG"}="ACGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3734 $revHash{"GTTT"}="AAAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3735 $revHash{"CAT"}="ATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3736 $revHash{"CATG"}="ATGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3737 $revHash{"GTTA"}="AACT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3738 $revHash{"CACT"}="AGTG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3739 $revHash{"TCAT"}="AATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3740 $revHash{"TTA"}="AAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3741 $revHash{"TGTA"}="ATAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3742 $revHash{"TTTC"}="AAAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3743 $revHash{"TACT"}="AAGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3744 $revHash{"TGTT"}="AAAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3745 $revHash{"CTA"}="AGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3746 $revHash{"GACT"}="AGTC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3747 $revHash{"TTGC"}="AAGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3748 $revHash{"TTC"}="AAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3749 $revHash{"GCT"}="AGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3750 $revHash{"GCAT"}="ATGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3751 $revHash{"TGGT"}="AACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3752 $revHash{"CCT"}="AGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3753 $revHash{"CATC"}="ATGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3754 $revHash{"CCAT"}="ATGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3755 $revHash{"CCCG"}="GGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3756 $revHash{"TGCC"}="AGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3757 $revHash{"TG"}="AC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3758 $revHash{"TGCT"}="AAGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3759 $revHash{"GCCG"}="GGCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3760 $revHash{"TCTG"}="AGAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3761 $revHash{"TGT"}="AAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3762 $revHash{"TTAT"}="AAAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3763 $revHash{"TAGT"}="AACT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3764 $revHash{"TATG"}="ATAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3765 $revHash{"TTTA"}="AAAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3766 $revHash{"CGTA"}="ACGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3767 $revHash{"TA"}="AT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3768 $revHash{"TGTC"}="AGAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3769 $revHash{"CTAT"}="ATAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3770 $revHash{"TATA"}="ATAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3771 $revHash{"TAC"}="AGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3772 $revHash{"TC"}="AG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3773 $revHash{"CATT"}="AATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3774 $revHash{"TCG"}="ACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3775 $revHash{"ATTT"}="AAAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3776 $revHash{"CGTG"}="ACGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3777 $revHash{"CTG"}="AGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3778 $revHash{"TCGT"}="AACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3779 $revHash{"TCCG"}="ACGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3780 $revHash{"GTT"}="AAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3781 $revHash{"ATGT"}="ATAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3782 $revHash{"CTTG"}="AAGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3783 $revHash{"CCTT"}="AAGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3784 $revHash{"GATC"}="ATCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3785 $revHash{"CTGG"}="AGCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3786 $revHash{"TTCT"}="AAAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3787 $revHash{"CGTC"}="ACGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3788 $revHash{"CG"}="GC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3789 $revHash{"TATT"}="AAAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3790 $revHash{"CTCG"}="AGCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3791 $revHash{"TCTC"}="AGAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3792 $revHash{"TCCT"}="AAGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3793 $revHash{"TGG"}="ACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3794 $revHash{"ACTC"}="AGTG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3795 $revHash{"CTC"}="AGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3796 $revHash{"CGC"}="GGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3797 $revHash{"TTG"}="AAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3798 $revHash{"ACCT"}="AGGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3799 $revHash{"TCTA"}="ATAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3800 $revHash{"GTAC"}="ACGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3801 $revHash{"TTGA"}="AATC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3802 $revHash{"GTCC"}="ACGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3803 $revHash{"GATT"}="AATC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3804 $revHash{"T"}="A";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3805 $revHash{"CGTT"}="AACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3806 $revHash{"GTC"}="ACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3807 $revHash{"GCCT"}="AGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3808 $revHash{"TGC"}="AGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3809 $revHash{"TTTG"}="AAAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3810 $revHash{"GGCT"}="AGCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3811 $revHash{"TCA"}="ATG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3812 $revHash{"GTGC"}="ACGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3813 $revHash{"TGAT"}="AATC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3814 $revHash{"TAT"}="AAT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3815 $revHash{"CTAC"}="AGGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3816 $revHash{"TGCG"}="ACGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3817 $revHash{"CTCA"}="AGTG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3818 $revHash{"CTTC"}="AAGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3819 $revHash{"GCTG"}="AGCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3820 $revHash{"TATC"}="ATAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3821 $revHash{"TAAT"}="AATT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3822 $revHash{"ACT"}="AGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3823 $revHash{"TCGC"}="AGCG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3824 $revHash{"GGT"}="ACC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3825 $revHash{"TCC"}="AGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3826 $revHash{"TTGT"}="AAAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3827 $revHash{"TGAC"}="AGTC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3828 $revHash{"TTAC"}="AAGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3829 $revHash{"CGT"}="ACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3830 $revHash{"ATTA"}="AATT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3831 $revHash{"ATTG"}="AATC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3832 $revHash{"CCTC"}="AGGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3833 $revHash{"CCGG"}="GGCC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3834 $revHash{"CCGT"}="ACGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3835 $revHash{"TCCA"}="ATGG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3836 $revHash{"CGCC"}="GGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3837 $revHash{"GT"}="AC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3838 $revHash{"TTCG"}="AACG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3839 $revHash{"CCTG"}="AGGC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3840 $revHash{"TCT"}="AAG";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3841 $revHash{"GTAT"}="ATAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3842 $revHash{"GTCT"}="AGAC";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3843 $revHash{"GCTA"}="AGCT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3844 $revHash{"TACC"}="AGGT";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3845 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3846
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3847
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3848 sub allCaps{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3849 my $motif = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3850 $motif =~ s/a/A/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3851 $motif =~ s/c/C/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3852 $motif =~ s/t/T/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3853 $motif =~ s/g/G/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3854 return $motif;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3855 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3856
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3857
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3858 sub all_caps{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3859 my @strand = split(/\s*/,$_[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3860 for my $i (0 ... $#strand){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3861 if ($strand[$i] =~ /c/) {$strand[$i] = "C";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3862 if ($strand[$i] =~ /a/) {$strand[$i] = "A";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3863 if ($strand[$i] =~ /t/) { $strand[$i] = "T";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3864 if ($strand[$i] =~ /g/) {$strand[$i] = "G";next;}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3865 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3866 return join("",@strand);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3867 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3868 sub array_mean{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3869 return "NA" if scalar(@_) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3870 my $sum = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3871 foreach my $val (@_){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3872 $sum = $sum + $val;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3873 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3874 return ($sum/scalar(@_));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3875 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3876 sub array_sum{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3877 return "NA" if scalar(@_) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3878 my $sum = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3879 foreach my $val (@_){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3880 $sum = $sum + $val;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3881 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3882 return ($sum);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3883 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3884
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3885 sub variance{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3886 return "NA" if scalar(@_) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3887 return 0 if scalar(@_) == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3888 my $mean = array_mean(@_);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3889 my $num = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3890 return 0 if scalar(@_) == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3891 # print "mean = $mean .. array = >@_<\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3892 foreach my $ele (@_){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3893 # print "$num = $num + ($ele-$mean)*($ele-$mean)\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3894 $num = $num + ($ele-$mean)*($ele-$mean);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3895 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3896 my $var = $num / scalar(@_);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3897 return $var;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3898 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3899
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3900 sub array_95confIntervals{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3901 return "NA" if scalar(@_) <= 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3902 my @sorted = sort { $a <=> $b } @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3903 # print "@sorted=",scalar(@sorted), "\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3904 my $aDeechNo = int((scalar(@sorted) * 2.5) / 100);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3905 my $saaDeNo = int((scalar(@sorted) * 97.5) / 100);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3906
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3907 return ($sorted[$aDeechNo], $sorted[$saaDeNo]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3908 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3909
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3910 sub array_median{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3911 return "NA" if scalar(@_) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3912 return $_[0] if scalar(@_) == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3913 my @sorted = sort { $a <=> $b } @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3914 my $totalno = scalar(@sorted);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3915
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3916 #print "sorted = @sorted\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3917
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3918 my $pick = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3919 if ($totalno % 2 == 1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3920 #print "odd set .. totalno = $totalno\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3921 my $mid = $totalno / 2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3922 my $onehalfno = $mid - $mid % 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3923 my $secondhalfno = $onehalfno + 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3924 my $onehalf = $sorted[$onehalfno-1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3925 my $secondhalf = $sorted[$secondhalfno-1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3926 #print "onehalfno = $onehalfno and secondhalfno = $secondhalfno \n onehalf = $onehalf and secondhalf = $secondhalf\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3927
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3928 $pick = $secondhalf;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3929 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3930 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3931 #print "even set .. totalno = $totalno\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3932 my $mid = $totalno / 2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3933 my $onehalfno = $mid;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3934 my $secondhalfno = $onehalfno + 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3935 my $onehalf = $sorted[$onehalfno-1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3936 my $secondhalf = $sorted[$secondhalfno-1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3937 #print "onehalfno = $onehalfno and secondhalfno = $secondhalfno \n onehalf = $onehalf and secondhalf = $secondhalf\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3938 $pick = ($onehalf + $secondhalf )/2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3939
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3940 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3941 #print "pick = $pick..\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3942 return $pick;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3943
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3944 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3945
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3946
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3947 sub array_numerical_sort{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3948 return "NA" if scalar(@_) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3949 my @sorted = sort { $a <=> $b } @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3950 return (@sorted);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3951 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3952
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3953 sub array_smallest_number{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3954 return "NA" if scalar(@_) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3955 return $_[0] if scalar(@_) == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3956 my @sorted = sort { $a <=> $b } @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3957 return $sorted[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3958 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3959
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3960
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3961 sub array_largest_number{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3962 return "NA" if scalar(@_) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3963 return $_[0] if scalar(@_) == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3964 my @sorted = sort { $a <=> $b } @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3965 return $sorted[$#sorted];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3966 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3967
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3968
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3969 sub array_largest_number_arrayPosition{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3970 return "NA" if scalar(@_) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3971 return 0 if scalar(@_) == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3972 my $maxpos = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3973 my @maxposes = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3974 my @maxvals = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3975 my $maxval = array_smallest_number(@_);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3976 for my $i (0 ... $#_){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3977 if ($_[$i] > $maxval){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3978 $maxval = $_[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3979 $maxpos = $i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3980 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3981 if ($_[$i] == $maxval){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3982 $maxval = $_[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3983 if (scalar(@maxposes) == 0){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3984 push @maxposes, $i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3985 push @maxvals, $_[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3986
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3987 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3988 elsif ($maxvals[0] == $maxval){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3989 push @maxposes, $i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3990 push @maxvals, $_[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3991 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3992 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3993 @maxposes = (); @maxvals = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3994 push @maxposes, $i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3995 push @maxvals, $_[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3996 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3997
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3998 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
3999
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4000 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4001 return $maxpos if scalar(@maxposes) < 2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4002 return (@maxposes);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4003 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4004
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4005 sub array_smallest_number_arrayPosition{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4006 return "NA" if scalar(@_) == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4007 return 0 if scalar(@_) == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4008 my $minpos = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4009 my @minposes = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4010 my @minvals = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4011 my $minval = array_largest_number(@_);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4012 my $maxval = array_smallest_number(@_);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4013 #print "starting with $maxval, ending with $minval\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4014 for my $i (0 ... $#_){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4015 if ($_[$i] < $minval){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4016 $minval = $_[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4017 $minpos = $i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4018 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4019 if ($_[$i] == $minval){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4020 $minval = $_[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4021 if (scalar(@minposes) == 0){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4022 push @minposes, $i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4023 push @minvals, $_[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4024
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4025 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4026 elsif ($minvals[0] == $minval){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4027 push @minposes, $i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4028 push @minvals, $_[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4029 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4030 else{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4031 @minposes = (); @minvals = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4032 push @minposes, $i;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4033 push @minvals, $_[$i];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4034 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4035
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4036 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4037
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4038 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4039 #print "minposes=@minposes\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4040
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4041 return $minpos if scalar(@minposes) < 2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4042 return (@minposes);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4043 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4044
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4045 sub basic_stats{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4046 my @arr = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4047 # print " array_smallest_number= ", array_smallest_number(@arr)," array_largest_number= ", array_largest_number(@arr), " array_mean= ",array_mean(@arr),"\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4048 return ":";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4049 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4050 #xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4051
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4052 sub maftoAxt_multispecies {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4053 my $printer = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4054 #print "in maftoAxt_multispecies : got @_\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4055 my $fname=$_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4056 open(IN,"<$_[0]") or die "Cannot open $_[0]: $! \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4057 my $treedefinition = $_[1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4058 #print "treedefinition= $treedefinition\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4059
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4060 my @treedefinitions = MakeTrees($treedefinition);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4061
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4062
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4063 open(OUT,">$_[2]") or die "Cannot open $_[2]: $! \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4064 my $counter = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4065 my $exactspeciesset = $_[3];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4066 my @exactspeciesset_unarranged = split(/,/,$exactspeciesset);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4067
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4068 $treedefinition=~s/[\)\(, ]/\t/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4069 my @species=split(/\t+/,$treedefinition);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4070 @exactspeciesset_unarranged = @species;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4071 # print "species=@species\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4072
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4073 my @exactspeciesarr=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4074
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4075
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4076 foreach my $def (@treedefinitions){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4077 $def=~s/[\)\(, ]/\t/g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4078 my @specs = split(/\t+/,$def);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4079 my @exactspecies=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4080 foreach my $spec (@specs){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4081 foreach my $espec (@exactspeciesset_unarranged){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4082 # print "pushing >$spec< nd >$espec<\n" if $spec eq $espec && $espec =~ /[a-zA-Z0-9]/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4083 push @exactspecies, $spec if $spec eq $espec && $espec =~ /[a-zA-Z0-9]/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4084 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4085
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4086 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4087 #print "exactspecies = >@exactspecies<\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4088 push @exactspeciesarr, [@exactspecies];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4089 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4090 #<STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4091 #print "exactspeciesarr=@exactspeciesarr\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4092
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4093 ###########
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4094 my $select = 2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4095 #select = 1 if all species need sequences to be present for each block otherwise, it is 0
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4096 #select = 2 only the allowed set make up the alignment. use the removeset
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4097 # information to detect alignmenets that have other important genomes aligned.
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4098 ###########
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4099 my @allowedset = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4100 @allowedset = split(/;/,allowedSetOfSpecies(join("_",@species))) if $select == 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4101 @allowedset = join("_",0,@species) if $select == 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4102 #print "species = @species , allowedset =",join("\n", @allowedset) ," \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4103
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4104
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4105 foreach my $set (@exactspeciesarr){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4106 my @openset = @$set;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4107 push @allowedset, join("_",0,@openset) if $select == 2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4108 # print "openset = >@openset<, allowedset = @allowedset and exactspecies = @exactspecies\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4109 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4110 # <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4111 my $start = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4112 my @sequences = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4113 my @titles = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4114 my $species_counter = "0";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4115 my $countermatch = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4116 my $outsideSpecies=0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4117
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4118 while(my $line = <IN>){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4119 next if $line =~ /^#/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4120 next if $line =~ /^i/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4121 # print "$line .. species = @species\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4122 chomp $line;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4123 my @fields = split(/\s+/,$line);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4124 chomp $line;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4125 if ($line =~ /^a /){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4126 $start = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4127 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4128
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4129 if ($line =~ /^s /){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4130 # print "fields1 = $fields[1] , start = $start\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4131
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4132 foreach my $sp (@species){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4133 if ($fields[1] =~ /$sp/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4134 $species_counter = $species_counter."_".$sp;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4135 push(@sequences, $fields[6]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4136 my @sp_info = split(/\./,$fields[1]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4137 my $title = join(" ",@sp_info, $fields[2], ($fields[2]+$fields[3]), $fields[4]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4138 push(@titles, $title);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4139
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4140 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4141 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4142 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4143
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4144 if (($line !~ /^a/) && ($line !~ /^s/) && ($line !~ /^#/) && ($line !~ /^i/) && ($start = 1)){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4145
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4146 my $arranged = reorderSpecies($species_counter, @species);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4147 # print "species_counter=$species_counter .. arranged = $arranged\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4148
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4149 my $stopper = 1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4150 my $arrno = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4151 foreach my $set (@allowedset){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4152 # print "checking $set with $arranged\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4153 if ($arranged eq $set){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4154 # print "checked $set with $arranged\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4155 $stopper = 0; last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4156 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4157 $arrno++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4158 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4159
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4160 if ($stopper == 0) {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4161 # print " accepted\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4162 @titles = split ";", orderInfo(join(";", @titles), $species_counter, $arranged) if $species_counter ne $arranged;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4163
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4164 @sequences = split ";", orderInfo(join(";", @sequences), $species_counter, $arranged) if $species_counter ne $arranged;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4165 my $filteredseq = filter_gaps(@sequences);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4166
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4167 if ($filteredseq ne "SHORT"){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4168 $counter++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4169 print OUT join (" ",$counter, @titles), "\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4170 print OUT $filteredseq, "\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4171 print OUT "\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4172 $countermatch++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4173 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4174 # my @filtered_seq = split(/\t/,filter_gaps(@sequences) );
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4175 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4176 else{#print "\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4177 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4178
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4179 @sequences = (); @titles = (); $start = 0;$species_counter = "0";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4180 next;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4181
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4182 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4183 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4184 close IN;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4185 close OUT;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4186 # print "countermatch = $countermatch\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4187 # <STDIN>;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4188 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4189
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4190 sub reorderSpecies{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4191 my @inarr=@_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4192 my $currSpecies = shift (@inarr);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4193 my $ordered_species = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4194 my @species=@inarr;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4195 foreach my $order (@species){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4196 $ordered_species = $ordered_species."_".$order if $currSpecies=~ /$order/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4197 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4198 return $ordered_species;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4199
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4200 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4201
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4202 sub filter_gaps{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4203 my @sequences = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4204 # print "sequences sent are @sequences\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4205 my $seq_length = length($sequences[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4206 my $seq_no = scalar(@sequences);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4207 my $allgaps = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4208 for (1 ... $seq_no){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4209 $allgaps = $allgaps."-";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4210 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4211
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4212 my @seq_array = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4213 my $seq_counter = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4214 foreach my $seq (@sequences){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4215 # my @sequence = split(/\s*/,$seq);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4216 $seq_array[$seq_counter] = [split(/\s*/,$seq)];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4217 # push @seq_array, [@sequence];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4218 $seq_counter++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4219 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4220 my $g = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4221 while ( $g < $seq_length){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4222 last if (!exists $seq_array[0][$g]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4223 my $bases = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4224 for my $u (0 ... $#seq_array){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4225 $bases = $bases.$seq_array[$u][$g];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4226 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4227 # print $bases, "\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4228 if ($bases eq $allgaps){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4229 # print "bases are $bases, position is $g \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4230 for my $seq (@seq_array){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4231 splice(@$seq , $g, 1);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4232 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4233 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4234 else {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4235 $g++;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4236 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4237 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4238
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4239 my @outs = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4240
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4241 foreach my $seq (@seq_array){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4242 push(@outs, join("",@$seq));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4243 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4244 return "SHORT" if length($outs[0]) <=100;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4245 return (join("\n", @outs));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4246 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4247
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4248
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4249 sub allowedSetOfSpecies{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4250 my @allowed_species = split(/_/,$_[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4251 unshift @allowed_species, 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4252 # print "allowed set = @allowed_species \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4253 my @output = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4254 for (0 ... scalar(@allowed_species) - 4){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4255 push(@output, join("_",@allowed_species));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4256 pop @allowed_species;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4257 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4258 return join(";",reverse(@output));
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4259
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4260 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4261
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4262
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4263 sub orderInfo{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4264 my @info = split(/;/,$_[0]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4265 # print "info = @info";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4266 my @old = split(/_/,$_[1]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4267 my @new = split(/_/,$_[2]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4268 shift @old; shift @new;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4269 my @outinfo = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4270 foreach my $spe (@new){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4271 for my $no (0 ... $#old){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4272 if ($spe eq $old[$no]){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4273 push(@outinfo, $info[$no]);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4274 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4275 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4276 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4277 # print "outinfo = @outinfo \n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4278 return join(";", @outinfo);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4279 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4280
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4281 #xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4282
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4283 sub printarr {
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4284 # print ">::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4285 # foreach my $line (@_) {print "$line\n";}
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4286 # print "::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::<\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4287 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4288
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4289 sub oneOf{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4290 my @arr = @_;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4291 my $element = $arr[$#arr];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4292 @arr = @arr[0 ... $#arr-1];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4293 my $present = 0;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4294
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4295 foreach my $el (@arr){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4296 $present = 1 if $el eq $element;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4297 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4298 return $present;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4299 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4300
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4301 #xxxxxxxxxxxxxx MakeTrees xxxxxxxxxxxxxxxxxxxxxxxxxxxx MakeTrees xxxxxxxxxxxxxxxxxxxxxxxxxxxx MakeTrees xxxxxxxxxxxxxxxxxxxxxxxxxxxx
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4302
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4303 sub MakeTrees{
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4304 my $tree = $_[0];
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4305 # my @parts=($tree);
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4306 my @parts=();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4307
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4308 # print "parts=@parts\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4309
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4310 while (1){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4311 $tree =~ s/^\(//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4312 $tree =~ s/\)$//g;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4313 my @arr = ();
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4314
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4315 if ($tree =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_\(\),]+)\)$/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4316 @arr = $tree =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_\(\),]+)$/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4317 push @parts, "(".$tree.")";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4318 $tree = $2;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4319 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4320 elsif ($tree =~ /^\(([a-zA-Z0-9_\(\),]+),([a-zA-Z0-9_]+)$/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4321 @arr = $tree =~ /^([a-zA-Z0-9_\(\),]+),([a-zA-Z0-9_]+)$/;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4322 push @parts, "(".$tree.")";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4323 $tree = $1;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4324 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4325 elsif ($tree =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_]+)$/){
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4326 last;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4327 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4328 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4329 #print "parts=@parts\n";
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4330 return @parts;
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4331 }
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4332
4e31fad3f08e Uploaded tool tarball.
devteam
parents:
diff changeset
4333