Mercurial > repos > mcharles > rapsosnp
comparison rapsodyn/PrepareFastqLight.pl @ 15:56d328bce3a7 draft default tip
Uploaded
author | mcharles |
---|---|
date | Thu, 29 Jan 2015 08:54:06 -0500 |
parents | 0a6c1cfe4dc8 |
children |
comparison
equal
deleted
inserted
replaced
14:93e6f2af1ce2 | 15:56d328bce3a7 |
---|---|
1 #!/usr/bin/perl | 1 #!/usr/bin/perl |
2 #v1.1.1 new check on read synchro | |
2 #v1.1.0 manage empty files | 3 #v1.1.0 manage empty files |
3 #v1.0.4 bug correction, last read not considered | 4 #v1.0.4 bug correction, last read not considered |
4 #v1.0.3 support rapsodyn header (.... 1:... / .... 2:...) | 5 #v1.0.3 support rapsodyn header (.... 1:... / .... 2:...) |
5 #V1.0.2 added auto type detection | 6 #V1.0.2 added auto type detection |
6 #V1.0.1 added log, option parameters | 7 #V1.0.1 added log, option parameters |
130 my $ligne4_r1 =<READ1>; | 131 my $ligne4_r1 =<READ1>; |
131 my $ligne1_r2 =<READ2>; | 132 my $ligne1_r2 =<READ2>; |
132 my $ligne2_r2 =<READ2>; | 133 my $ligne2_r2 =<READ2>; |
133 my $ligne3_r2 =<READ2>; | 134 my $ligne3_r2 =<READ2>; |
134 my $ligne4_r2 =<READ2>; | 135 my $ligne4_r2 =<READ2>; |
135 # chomp($ligne1_r1); | |
136 # chomp($ligne2_r1); | |
137 # chomp($ligne3_r1); | |
138 # chomp($ligne4_r1); | |
139 # chomp($ligne2_r1); | |
140 | 136 |
141 $compt++; | 137 $compt++; |
142 $nb_read1++; | 138 $nb_read1++; |
143 $nb_read2++; | 139 $nb_read2++; |
144 | 140 |
145 #@ 1 sec | 141 |
146 if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){ | 142 if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){ |
147 if ($VERBOSE eq "ON"){ | 143 if ($VERBOSE eq "ON"){ |
148 print "Error in file format"; | 144 print "Error in file format"; |
149 if ($ligne1_r1){print $ligne1_r1;} | 145 if ($ligne1_r1){print $ligne1_r1;} |
150 if ($ligne2_r1){print $ligne2_r1;} | 146 if ($ligne2_r1){print $ligne2_r1;} |
171 print $ligne4_r2; | 167 print $ligne4_r2; |
172 print "\n"; | 168 print "\n"; |
173 } | 169 } |
174 $error2++; | 170 $error2++; |
175 } | 171 } |
176 #@ 1 - 2 sec | 172 |
177 else { | 173 else { |
178 | 174 |
179 my $length_seq1 = length(chomp($ligne2_r1)); | 175 my $length_seq1 = length(chomp($ligne2_r1)); |
180 my $length_qual1 =length(chomp($ligne4_r1)); | 176 my $length_qual1 =length(chomp($ligne4_r1)); |
181 my $seq1; | 177 my $seq1; |
188 my $header1=""; | 184 my $header1=""; |
189 my $header2=""; | 185 my $header2=""; |
190 my $repheader1=""; | 186 my $repheader1=""; |
191 my $repheader2=""; | 187 my $repheader2=""; |
192 | 188 |
193 | 189 my @tbl_header1; |
194 if ($ligne1_r1 =~/^\@(.*?)[\s\/]/){ | 190 my @tbl_header2; |
191 if ($ligne1_r1 =~/^\@(.*?)\s*$/){ | |
195 $header1 = $1; | 192 $header1 = $1; |
196 } | 193 @tbl_header1 = split(//,$header1); |
197 | 194 } |
198 if ($ligne3_r1 =~/^\+(.*?)[\s\/]/){ | 195 |
196 if ($ligne3_r1 =~/^\+(.*?)\s*$/){ | |
199 $repheader1 = $1; | 197 $repheader1 = $1; |
200 } | 198 } |
201 | 199 |
202 if ($ligne1_r2 =~/^\@(.*?)[\s\/]/){ | 200 if ($ligne1_r2 =~/^\@(.*?)\s*$/){ |
203 $header2 = $1; | 201 $header2 = $1; |
202 @tbl_header2 = split(//,$header2); | |
204 } | 203 } |
205 | 204 |
206 if ($ligne3_r2 =~/^\+(.*?)[\s\/]/){ | 205 if ($ligne3_r2 =~/^\+(.*?)\s*$/){ |
207 $repheader2 = $1; | 206 $repheader2 = $1; |
208 } | 207 } |
209 #@ 2 sec | 208 my $diffheader=0; |
210 | 209 if ($#tbl_header1 == $#tbl_header2){ |
211 ### Verification de la coherence sequence /qualité @ 1 sec | 210 for (my $i=0;$i<=$#tbl_header1;$i++){ |
212 if (($TYPE eq "illumina")&&((!$header1)||(!$header2)||(!$repheader1)||(!$repheader2))){ | 211 if ($tbl_header1[$i] ne $tbl_header2[$i]){ |
212 $diffheader++; | |
213 } | |
214 } | |
215 } | |
216 | |
217 | |
218 | |
219 ### Verification de la coherence sequence /qualité | |
220 if ((!$header1)||(!$header2)){ | |
213 if ($VERBOSE eq "ON"){ | 221 if ($VERBOSE eq "ON"){ |
214 print "Error in header : empty\n"; | 222 print "Error in header : empty\n"; |
215 print $ligne1_r1; | 223 print $ligne1_r1; |
216 print $ligne2_r1; | 224 print $ligne2_r1; |
217 print $ligne3_r1; | 225 print $ligne3_r1; |
222 print $ligne4_r2; | 230 print $ligne4_r2; |
223 print "\n"; | 231 print "\n"; |
224 } | 232 } |
225 $error3++; | 233 $error3++; |
226 } | 234 } |
227 elsif (($TYPE eq "sanger")&&((!$header1)||(!$header2))){ | 235 elsif ((($repheader1)&&($header1 ne $repheader1))||(($repheader2)&&($header2 ne $repheader2))){ |
228 if ($VERBOSE eq "ON"){ | 236 if ($VERBOSE eq "ON"){ |
229 print "Error in header ref : empty\n"; | 237 print "Error : difference in header and header repeat\n"; |
230 print $ligne1_r1; | 238 print $ligne1_r1; |
231 print $ligne2_r1; | 239 print $ligne2_r1; |
232 print $ligne3_r1; | 240 print $ligne3_r1; |
233 print $ligne4_r1; | 241 print $ligne4_r1; |
234 print $ligne1_r2; | 242 print $ligne1_r2; |
235 print $ligne2_r2; | 243 print $ligne2_r2; |
236 print $ligne3_r2; | 244 print $ligne3_r2; |
237 print $ligne4_r2; | 245 print $ligne4_r2; |
238 print "\n"; | 246 print "\n"; |
239 } | 247 } |
240 $error3++; | 248 $error4++; |
241 } | 249 } |
242 elsif (($TYPE eq "illumina")&&(($header1 ne $repheader1)||($header2 ne $repheader2)||($header1 ne $header2))){ | 250 elsif ($#tbl_header1 != $#tbl_header2){ |
243 if ($VERBOSE eq "ON"){ | 251 if ($VERBOSE eq "ON"){ |
244 print "Error in header : different\n"; | 252 print "Error : difference in header size between reads\n"; |
245 print $ligne1_r1; | 253 print $ligne1_r1; |
246 print $ligne2_r1; | 254 print $ligne2_r1; |
247 print $ligne3_r1; | 255 print $ligne3_r1; |
248 print $ligne4_r1; | 256 print $ligne4_r1; |
249 print $ligne1_r2; | 257 print $ligne1_r2; |
252 print $ligne4_r2; | 260 print $ligne4_r2; |
253 print "\n"; | 261 print "\n"; |
254 } | 262 } |
255 $error4++; | 263 $error4++; |
256 } | 264 } |
257 elsif (($TYPE eq "sanger")&&($header1 ne $header2)){ | 265 elsif ($diffheader > 1 ){ # More than ...1 and ...2 difference in read1 and read2 header |
258 if ($VERBOSE eq "ON"){ | 266 if ($VERBOSE eq "ON"){ |
259 print "Error in header : different\n"; | 267 print "Error can't establish synchro between reads, more than 1 difference between headers\n"; |
260 print $ligne1_r1; | 268 print $ligne1_r1; |
261 print $ligne2_r1; | 269 print $ligne2_r1; |
262 print $ligne3_r1; | 270 print $ligne3_r1; |
263 print $ligne4_r1; | 271 print $ligne4_r1; |
264 print $ligne1_r2; | 272 print $ligne1_r2; |