Mercurial > repos > mcharles > rapsosnp
comparison rapsodyn/ParseBlastForUniqueMatch.pl @ 7:3f7b0788a1c4 draft
Uploaded
author | mcharles |
---|---|
date | Tue, 07 Oct 2014 10:34:34 -0400 |
parents | 442a7c88b886 |
children | 0a6c1cfe4dc8 |
comparison
equal
deleted
inserted
replaced
6:1776b8ddd87e | 7:3f7b0788a1c4 |
---|---|
1 #!/usr/bin/perl | 1 #!/usr/bin/perl |
2 #V1.0.1 added log, option parameters | |
2 use strict; | 3 use strict; |
3 use warnings; | 4 use warnings; |
5 use Getopt::Long; | |
4 | 6 |
5 | 7 |
6 my $input_variant_file = $ARGV[0]; | 8 my $input_variant_file; |
7 my $input_blast_file = $ARGV[1]; | 9 my $input_blast_file; |
8 my $window_length = $ARGV[2]; | 10 my $log_file; |
9 my $nb_mismatch_max = $ARGV[3]; | 11 my $WINDOW_LENGTH= 50; |
12 my $NB_MISMATCH_MAX = 3; | |
10 | 13 |
14 GetOptions ( | |
15 "input_variant_file=s" => \$input_variant_file, | |
16 "input_blast_file=s" => \$input_blast_file, | |
17 "window_length=s" => \$WINDOW_LENGTH, | |
18 "log_file=s" => \$log_file, | |
19 "nb_mismatch_max=s" => \$NB_MISMATCH_MAX | |
20 ) or die("Error in command line arguments\n"); | |
21 | |
22 my $nb_variant_checked=0; | |
23 my $nb_variant_selected=0; | |
11 my %hash_name; | 24 my %hash_name; |
12 | 25 |
13 open(INB, $input_blast_file) or die ("Can't open $input_blast_file\n"); | 26 open(INB, $input_blast_file) or die ("Can't open $input_blast_file\n"); |
14 while (my $line =<INB>){ | 27 while (my $line =<INB>){ |
15 my @fields = split (/\s+/,$line); | 28 my @fields = split (/\s+/,$line); |
69 chomp($subject_aln); | 82 chomp($subject_aln); |
70 | 83 |
71 my $nb_gap_query=0; | 84 my $nb_gap_query=0; |
72 | 85 |
73 if (length($query_aln) == length($subject_aln)){ | 86 if (length($query_aln) == length($subject_aln)){ |
74 if (length($query_aln)<$window_length-$nb_mismatch_max){ | 87 if (length($query_aln)<$WINDOW_LENGTH-$NB_MISMATCH_MAX){ |
75 } | 88 } |
76 else { | 89 else { |
77 my @q = split(//,$query_aln); | 90 my @q = split(//,$query_aln); |
78 my @s = split(//,$subject_aln); | 91 my @s = split(//,$subject_aln); |
79 for (my $i=0;$i<=$#q;$i++){ | 92 for (my $i=0;$i<=$#q;$i++){ |
80 my $global_idx = $query_start-1+$i-$nb_gap_query; | 93 my $global_idx = $query_start-1+$i-$nb_gap_query; |
81 if ($q[$i] eq "-"){ | 94 if ($q[$i] eq "-"){ |
82 if ($global_idx < $window_length){ | 95 if ($global_idx < $WINDOW_LENGTH){ |
83 $compt_mismatch_5p++; | 96 $compt_mismatch_5p++; |
84 } | 97 } |
85 elsif ($global_idx > $window_length){ | 98 elsif ($global_idx > $WINDOW_LENGTH){ |
86 $compt_mismatch_3p++; | 99 $compt_mismatch_3p++; |
87 } | 100 } |
88 $nb_gap_query++; #On compte les gap dans la query pour les soustraire de l'index global | 101 $nb_gap_query++; #On compte les gap dans la query pour les soustraire de l'index global |
89 } | 102 } |
90 else { | 103 else { |
91 if ($q[$i] ne $s[$i]){ | 104 if ($q[$i] ne $s[$i]){ |
92 if ($global_idx < $window_length){ | 105 if ($global_idx < $WINDOW_LENGTH){ |
93 $compt_mismatch_5p++; | 106 $compt_mismatch_5p++; |
94 } | 107 } |
95 elsif ($global_idx > $window_length){ | 108 elsif ($global_idx > $WINDOW_LENGTH){ |
96 $compt_mismatch_3p++; | 109 $compt_mismatch_3p++; |
97 } | 110 } |
98 } | 111 } |
99 } | 112 } |
100 } | 113 } |
101 $compt_mismatch_5p += $query_start-1; | 114 $compt_mismatch_5p += $query_start-1; |
102 $compt_mismatch_3p += $window_length *2 + 1 - $query_stop; | 115 $compt_mismatch_3p += $WINDOW_LENGTH *2 + 1 - $query_stop; |
103 | 116 |
104 # for (my $i=0;$i<$window_length;$i++){ | 117 # for (my $i=0;$i<$window_length;$i++){ |
105 # if ($tbl_q_aln[$i] eq "#"){ | 118 # if ($tbl_q_aln[$i] eq "#"){ |
106 # $compt_mismatch_5p++; | 119 # $compt_mismatch_5p++; |
107 # } | 120 # } |
120 # $compt_mismatch_3p++; | 133 # $compt_mismatch_3p++; |
121 # } | 134 # } |
122 # else { | 135 # else { |
123 # } | 136 # } |
124 # } | 137 # } |
125 if (($compt_mismatch_5p <= $nb_mismatch_max)||($compt_mismatch_3p <= $nb_mismatch_max)){ | 138 if (($compt_mismatch_5p <= $NB_MISMATCH_MAX)||($compt_mismatch_3p <= $NB_MISMATCH_MAX)){ |
126 $hash_name{$name}++; | 139 $hash_name{$name}++; |
127 } | 140 } |
128 | 141 |
129 } | 142 } |
130 } | 143 } |
149 close (INB); | 162 close (INB); |
150 | 163 |
151 open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n"); | 164 open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n"); |
152 | 165 |
153 while (my $ligne = <INV>) { | 166 while (my $ligne = <INV>) { |
167 $nb_variant_checked++; | |
154 | 168 |
155 my @champs = split (/\s+/,$ligne); | 169 my @champs = split (/\s+/,$ligne); |
156 my $header = $champs[0]."_".$champs[1]; | 170 my $header = $champs[0]."_".$champs[1]; |
157 | 171 |
158 if ($hash_name{$header}){ | 172 if ($hash_name{$header}){ |
159 if ($hash_name{$header}==1){ | 173 if ($hash_name{$header}==1){ |
160 print $ligne; | 174 print $ligne; |
175 $nb_variant_selected++; | |
161 } | 176 } |
162 } | 177 } |
163 else { | 178 else { |
164 #print STDERR "No blast result for ",$header,"\n"; | 179 #print STDERR "No blast result for ",$header,"\n"; |
165 } | 180 } |
167 | 182 |
168 } | 183 } |
169 | 184 |
170 close(INV); | 185 close(INV); |
171 | 186 |
187 open (LF,">$log_file") or die("Can't open $log_file\n"); | |
188 print LF "\n####\t Blast filtering \n"; | |
189 print LF "Variant checked :\t$nb_variant_checked\n"; | |
190 print LF "Variant selected :\t$nb_variant_selected\n"; | |
191 close (LF); | |
192 | |
172 | 193 |
173 # foreach my $key (sort keys %hash_name){ | 194 # foreach my $key (sort keys %hash_name){ |
174 # print $key,"\t",$hash_name{$key},"\n"; | 195 # print $key,"\t",$hash_name{$key},"\n"; |
175 | 196 |
176 # } | 197 # } |