annotate mpileupfilter/mpileupfilter.pl @ 5:26d2c851d48e draft

Uploaded
author mcharles
date Mon, 16 Jun 2014 06:18:07 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
2 use strict;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
3 use Getopt::Long;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
4
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
5 #
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
6 # Filter a pileup file on forward/reverse presence and %read having the variant
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
7 # The error code
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
8 # 1 : multiple variant type detected insertion/deletion/mutation
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
9 # 1i : inconsistency in insertion
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
10 # 1d : inconsistency in deletion
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
11 # 1m : inconsistency in mutation
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
12 # 2 : insufficient depth
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
13 # 3 : insufficient variant frequency
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
14 # 4 : variant position not covered by forward and reverse reads
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
15 # 5 : variant with other variant in neighbourhood
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
16 # 6 : too much depth
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
17 # 8 : parsing error (couldn't parse the mpileup line correctly)
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
18 # 9 : parsing error (couldn't parse the readbase string correctly)
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
19
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
20
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
21 my $inputfile;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
22 my $logfile;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
23 my $MIN_DISTANCE=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
24 my $MIN_VARIANTFREQUENCY=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
25 my $MIN_FORWARDREVERSE=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
26 my $MIN_DEPTH=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
27 my $MAX_DEPTH=500;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
28 my $VERBOSE=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
29 my $ONLY_UNFILTERED_VARIANT="OFF";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
30
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
31 if ($#ARGV<0){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
32 print "\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
33 print "perl 020_FilterPileupv6 -input_file <mpileup_file> [OPTION]\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
34 print "-input_file \tinputfile in mpileup format\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
35 print "-log_file \tlogfile containing discarded mpileup lines and the errorcode associated\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
36 print "-min_depth \tminimum depth required [1]\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
37 print "-max_depth \tmaximim depth (position with more coverage will be discarded) [100]\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
38 print "-min_frequency \tminimum variant frequency (0->1) [1] (default 1 => 100% reads show the variant at this position)\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
39 print "-min_distance \tminimum distance between variant [0]\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
40 print "-min_forward_and_reverse \tminimum number of reads in forward and reverse covering the variant required [0]\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
41 print "\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
42 exit(0);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
43 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
44
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
45 GetOptions (
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
46 "input_file=s" => \$inputfile,
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
47 "log_file=s" => \$logfile,
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
48 "min_depth=i" => \$MIN_DEPTH,
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
49 "max_depth=i" => \$MAX_DEPTH,
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
50 "min_frequency=f" => \$MIN_VARIANTFREQUENCY,
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
51 "min_distance=i" => \$MIN_DISTANCE,
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
52 "min_forward_and_reverse=i" => \$MIN_FORWARDREVERSE,
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
53 "variant_only=s" => \$ONLY_UNFILTERED_VARIANT,
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
54 "v=i" => \$VERBOSE
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
55 ) or die("Error in command line arguments\n");
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
56
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
57
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
58 open(IF, $inputfile) or die("Can't open $inputfile\n");
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
59
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
60 my @tbl_line;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
61 my @tbl_variant_position;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
62 my @tbl_variant_chr;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
63 my @tbl_variant_refbase;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
64 my @tbl_variant_coverage;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
65 my @tbl_variant_readbase_string;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
66 my @tbl_variant_quality_string;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
67
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
68 #Extraction des variants
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
69 my $nb_line=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
70 while (my $line=<IF>){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
71 $nb_line++;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
72 if (($nb_line % 1000000 == 0)&&($VERBOSE==1)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
73 print "$nb_line\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
74 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
75 my $error_code=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
76 if ($line=~/(.*?)\s+(\d+)\s+([ATGCN])\s+(\d+)\s+(.*?)\s+(.*?)$/){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
77 my $current_chromosome = $1;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
78 my $current_position = $2;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
79 my $current_refbase = $3;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
80 my $current_coverage = $4;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
81 my $current_readbase_string = $5;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
82 my $current_quality_string = $6;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
83
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
84 #Suppression of mPileUp special character
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
85 $current_readbase_string =~ s/\$//g; #the read start at this position
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
86 $current_readbase_string =~ s/\^.//g; #the read end at this position followed by quality char
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
87
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
88 if ($current_readbase_string =~ /[ATGCNatgcn\d]/){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
89 push(@tbl_line,$line);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
90 push(@tbl_variant_chr,$current_chromosome);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
91 push(@tbl_variant_position,$current_position);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
92 push(@tbl_variant_refbase,$current_refbase);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
93 push(@tbl_variant_coverage,$current_coverage);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
94 push(@tbl_variant_readbase_string,$current_readbase_string);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
95 push(@tbl_variant_quality_string,$current_quality_string);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
96 if ($ONLY_UNFILTERED_VARIANT eq "ON"){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
97 print $line;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
98 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
99
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
100 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
101 else {
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
102 #Position with no variant
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
103 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
104
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
105 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
106 else {
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
107 #Error Parsing
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
108 print STDERR "$line #8";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
109 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
110 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
111 close(IF);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
112
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
113 if ($ONLY_UNFILTERED_VARIANT eq "ON"){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
114 exit(0);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
115 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
116
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
117 ####Checking the distance between variant and other filter
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
118
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
119 if ($logfile){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
120 open(LF,">$logfile") or die ("Cant't open $logfile\n");
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
121 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
122
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
123 for (my $i=0;$i<=$#tbl_line;$i++){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
124 # print "ligne : $tbl_line[$i]\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
125
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
126 my $error_code=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
127 if ($i==0){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
128 #Comparing $i and $i+1 for neighbourhood filter;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
129 if ($#tbl_line>0){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
130 if (($tbl_variant_chr[$i+1] eq $tbl_variant_chr[$i])&&($tbl_variant_position[$i]+$MIN_DISTANCE>=$tbl_variant_position[$i+1])){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
131 $error_code=5;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
132 chomp($tbl_line[$i]);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
133 if ($logfile){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
134 print LF "$tbl_line[$i]\tcode:$error_code\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
135 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
136 next;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
137 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
138 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
139
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
140 #Additionnal filters
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
141 $error_code = check_error($tbl_variant_chr[$i],$tbl_variant_position[$i],$tbl_variant_refbase[$i],$tbl_variant_coverage[$i],$tbl_variant_readbase_string[$i]);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
142
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
143 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
144 else {
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
145 #Compairing $i and $i-1 for neighbourhood filter
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
146 if (($tbl_variant_chr[$i-1] eq $tbl_variant_chr[$i])&&($tbl_variant_position[$i-1]+$MIN_DISTANCE>=$tbl_variant_position[$i])){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
147 $error_code=5;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
148 chomp($tbl_line[$i]);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
149 if ($logfile){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
150 print LF "$tbl_line[$i]\tcode:$error_code\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
151 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
152 next;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
153 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
154 else {
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
155 #Additionnal filters
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
156 $error_code = check_error($tbl_variant_chr[$i],$tbl_variant_position[$i],$tbl_variant_refbase[$i],$tbl_variant_coverage[$i],$tbl_variant_readbase_string[$i]);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
157 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
158 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
159 if ($error_code == 0){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
160 print $tbl_line[$i];
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
161 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
162 else {
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
163 chomp($tbl_line[$i]);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
164 if ($logfile){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
165 print LF "$tbl_line[$i]\tcode:$error_code\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
166 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
167 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
168 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
169
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
170 if ($logfile){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
171 close (LF);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
172 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
173
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
174 sub check_error{
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
175 my $current_chromosome = shift;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
176 my $current_position = shift;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
177 my $current_refbase = shift;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
178 my $current_coverage = shift;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
179 my $current_readbase_string = shift;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
180
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
181 # print "test : $current_readbase_string\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
182
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
183
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
184
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
185 #Extraction of insertions
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
186
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
187 ##################################################################
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
188 # my @IN = $current_readbase_string =~ m/\+[0-9]+[ACGTNacgtn]+/g;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
189 # my @DEL = $current_readbase_string =~ m/\-[0-9]+[ACGTNacgtn]+/g;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
190 # print "IN : @IN\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
191 # print "DEL :@DEL\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
192 #$current_readbase_string=~s/[\+\-][0-9]+[ACGTNacgtn]+//g;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
193 ##################################################################
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
194 #!!! marche pas : exemple .+1Ct. correspond a . / +1C / t /. mais le match de l'expression vire +1Ct
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
195 ##################################################################
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
196
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
197 # => parcours de boucle
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
198 my @readbase = split(//,$current_readbase_string);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
199 my $cleaned_readbase_string="";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
200 my @IN;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
201 my @DEL;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
202 my $current_IN="";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
203 my $current_DEL="";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
204 my $current_size=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
205
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
206 for (my $i=0;$i<=$#readbase;$i++){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
207 if ($readbase[$i] eq "+"){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
208 #Ouverture de IN
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
209 $current_IN="+";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
210
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
211 #Recuperation de la taille
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
212 my $sub = substr $current_readbase_string,$i;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
213 if ($sub=~/^\+(\d+)/){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
214 $current_size = $1;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
215 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
216 my $remaining_size = $current_size;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
217 while (($remaining_size>0)&&($i<=$#readbase)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
218 $i++;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
219 $current_IN.=$readbase[$i];
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
220 if ($readbase[$i]=~ /[ATGCNatgcn]/){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
221 $remaining_size--;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
222 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
223 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
224 push(@IN,$current_IN);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
225 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
226 elsif ($readbase[$i] eq "-"){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
227 #Ouverture de DEL
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
228 $current_DEL="-";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
229
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
230 #Recuperation de la taille
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
231 my $sub = substr $current_readbase_string,$i;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
232 if ($sub=~/^\-(\d+)/){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
233 $current_size = $1;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
234 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
235 my $remaining_size = $current_size;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
236 while (($remaining_size>0)&&($i<=$#readbase)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
237 $i++;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
238 $current_DEL.=$readbase[$i];
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
239 if ($readbase[$i]=~ /[ATGCNatgcn]/){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
240 $remaining_size--;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
241 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
242 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
243 push(@DEL,$current_DEL);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
244
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
245 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
246 else {
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
247 #Ajout a la string
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
248 $cleaned_readbase_string .= $readbase[$i];
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
249 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
250 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
251
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
252
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
253 # print "IN : @IN\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
254 # print "DEL :@DEL\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
255 # print "$cleaned_readbase_string\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
256
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
257 my @current_readbase_array = split(//,$cleaned_readbase_string);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
258
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
259 #Filtering : error detection
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
260
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
261 if ($#current_readbase_array+1 != $current_coverage){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
262 return 9;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
263 #parsing error (couldn't parse the readbase string correctly)
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
264 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
265 elsif ($current_coverage<$MIN_DEPTH){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
266 return 2;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
267 # 2 : insufficient depth
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
268 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
269 elsif ($current_coverage>$MAX_DEPTH){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
270 return 6;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
271 # 6 : too much depth
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
272 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
273 else {
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
274 if ($#IN>=0){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
275 if (($cleaned_readbase_string=~/[ACGTNacgtn]/)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
276 return 1;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
277 # 1 : variant type overload (multiple variant type detected insertion/deletion/mutation)
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
278 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
279 else {
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
280 ########## TEST de coherence des insertions ################
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
281 # for (my $i=0;$i<=$#IN;$i++){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
282 # if (uc($IN[0]) ne uc($IN[$i])){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
283 # print uc($IN[0]),"\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
284 # print uc($IN[$i]),"\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
285 # return "1i";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
286 # }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
287 # }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
288 ###########################################################
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
289
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
290 if($#IN+1 < $current_coverage*$MIN_VARIANTFREQUENCY){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
291 return 3;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
292 # 3 : insufficient variant frequency
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
293 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
294 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
295 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
296 elsif ($#DEL>=0){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
297 if (($cleaned_readbase_string=~/[ACGTNacgtn]/)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
298 return 1;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
299 # 1 : variant type overload (multiple variant type detected insertion/deletion/mutation)
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
300 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
301 else {
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
302 ########## TEST de coherence des deletions ################
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
303 # for (my $i=0;$i<=$#DEL;$i++){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
304 # if (uc($DEL[0]) ne uc($DEL[$i])){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
305 # print uc($DEL[0]),"\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
306 # print uc($DEL[$i]),"\n";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
307 # return "1d";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
308 # }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
309 # }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
310 ###########################################################
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
311
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
312 if($#DEL+1 < $current_coverage*$MIN_VARIANTFREQUENCY){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
313 return 3;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
314 # 3 : insufficient variant frequency
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
315 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
316 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
317 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
318 else {
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
319 my $nbA=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
320 $nbA++ while ($current_readbase_string =~ m/A/g);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
321 my $nbC=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
322 $nbC++ while ($current_readbase_string =~ m/C/g);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
323 my $nbT=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
324 $nbT++ while ($current_readbase_string =~ m/T/g);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
325 my $nbG=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
326 $nbG++ while ($current_readbase_string =~ m/G/g);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
327 my $nbN=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
328 $nbN++ while ($current_readbase_string =~ m/N/g);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
329 my $nba=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
330 $nba++ while ($current_readbase_string =~ m/a/g);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
331 my $nbc=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
332 $nbc++ while ($current_readbase_string =~ m/c/g);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
333 my $nbt=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
334 $nbt++ while ($current_readbase_string =~ m/t/g);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
335 my $nbg=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
336 $nbg++ while ($current_readbase_string =~ m/g/g);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
337 my $nbn=0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
338 $nbn++ while ($current_readbase_string =~ m/n/g);
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
339
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
340 if (($nbA+$nba>0)&&($nbT+$nbt+$nbG+$nbg+$nbC+$nbc+$nbN+$nbn>0)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
341 return "1m";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
342 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
343 if (($nbT+$nbt>0)&&($nbA+$nba+$nbG+$nbg+$nbC+$nbc+$nbN+$nbn>0)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
344 return "1m";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
345 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
346 if (($nbG+$nbg>0)&&($nbA+$nba+$nbT+$nbt+$nbC+$nbc+$nbN+$nbn>0)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
347 return "1m";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
348 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
349 if (($nbC+$nbc>0)&&($nbA+$nba+$nbT+$nbt+$nbG+$nbg+$nbN+$nbn>0)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
350 return "1m";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
351 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
352 if (($nbN+$nbn>0)&&($nbA+$nba+$nbT+$nbt+$nbG+$nbg+$nbC+$nbc>0)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
353 return "1m";
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
354 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
355
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
356 if ($nbA+$nba >= $current_coverage*$MIN_VARIANTFREQUENCY){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
357 if (($nbA<$MIN_FORWARDREVERSE)||($nba<$MIN_FORWARDREVERSE)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
358 return 4;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
359 # 4 : variant position not covered by forward and reverse reads
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
360 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
361 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
362 elsif ($nbT+$nbt >= $current_coverage*$MIN_VARIANTFREQUENCY){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
363 if (($nbT<$MIN_FORWARDREVERSE)||($nbt<$MIN_FORWARDREVERSE)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
364 return 4;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
365 # 4 : variant position not covered by forward and reverse reads
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
366 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
367 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
368 elsif ($nbG+$nbg >= $current_coverage*$MIN_VARIANTFREQUENCY){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
369 if (($nbG<$MIN_FORWARDREVERSE)||($nbg<$MIN_FORWARDREVERSE)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
370 return 4;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
371 # 4 : variant position not covered by forward and reverse reads
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
372 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
373 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
374 elsif ($nbC+$nbc >= $current_coverage*$MIN_VARIANTFREQUENCY){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
375 if (($nbC<$MIN_FORWARDREVERSE)||($nbc<$MIN_FORWARDREVERSE)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
376 return 4;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
377 # 4 : variant position not covered by forward and reverse reads
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
378 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
379 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
380 elsif ($nbN+$nbn >= $current_coverage*$MIN_VARIANTFREQUENCY){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
381 if (($nbN<$MIN_FORWARDREVERSE)||($nbn<$MIN_FORWARDREVERSE)){
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
382 return 4;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
383 # 4 : variant position not covered by forward and reverse reads
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
384 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
385 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
386 else {
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
387 return 3;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
388 # 3 : insufficient variant frequency
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
389 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
390 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
391 }
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
392
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
393 return 0;
26d2c851d48e Uploaded
mcharles
parents:
diff changeset
394 }