comparison rapsodyn/PrepareFastqLight.pl @ 15:56d328bce3a7 draft default tip

Uploaded
author mcharles
date Thu, 29 Jan 2015 08:54:06 -0500
parents 0a6c1cfe4dc8
children
comparison
equal deleted inserted replaced
14:93e6f2af1ce2 15:56d328bce3a7
1 #!/usr/bin/perl 1 #!/usr/bin/perl
2 #v1.1.1 new check on read synchro
2 #v1.1.0 manage empty files 3 #v1.1.0 manage empty files
3 #v1.0.4 bug correction, last read not considered 4 #v1.0.4 bug correction, last read not considered
4 #v1.0.3 support rapsodyn header (.... 1:... / .... 2:...) 5 #v1.0.3 support rapsodyn header (.... 1:... / .... 2:...)
5 #V1.0.2 added auto type detection 6 #V1.0.2 added auto type detection
6 #V1.0.1 added log, option parameters 7 #V1.0.1 added log, option parameters
130 my $ligne4_r1 =<READ1>; 131 my $ligne4_r1 =<READ1>;
131 my $ligne1_r2 =<READ2>; 132 my $ligne1_r2 =<READ2>;
132 my $ligne2_r2 =<READ2>; 133 my $ligne2_r2 =<READ2>;
133 my $ligne3_r2 =<READ2>; 134 my $ligne3_r2 =<READ2>;
134 my $ligne4_r2 =<READ2>; 135 my $ligne4_r2 =<READ2>;
135 # chomp($ligne1_r1);
136 # chomp($ligne2_r1);
137 # chomp($ligne3_r1);
138 # chomp($ligne4_r1);
139 # chomp($ligne2_r1);
140 136
141 $compt++; 137 $compt++;
142 $nb_read1++; 138 $nb_read1++;
143 $nb_read2++; 139 $nb_read2++;
144 140
145 #@ 1 sec 141
146 if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){ 142 if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){
147 if ($VERBOSE eq "ON"){ 143 if ($VERBOSE eq "ON"){
148 print "Error in file format"; 144 print "Error in file format";
149 if ($ligne1_r1){print $ligne1_r1;} 145 if ($ligne1_r1){print $ligne1_r1;}
150 if ($ligne2_r1){print $ligne2_r1;} 146 if ($ligne2_r1){print $ligne2_r1;}
171 print $ligne4_r2; 167 print $ligne4_r2;
172 print "\n"; 168 print "\n";
173 } 169 }
174 $error2++; 170 $error2++;
175 } 171 }
176 #@ 1 - 2 sec 172
177 else { 173 else {
178 174
179 my $length_seq1 = length(chomp($ligne2_r1)); 175 my $length_seq1 = length(chomp($ligne2_r1));
180 my $length_qual1 =length(chomp($ligne4_r1)); 176 my $length_qual1 =length(chomp($ligne4_r1));
181 my $seq1; 177 my $seq1;
188 my $header1=""; 184 my $header1="";
189 my $header2=""; 185 my $header2="";
190 my $repheader1=""; 186 my $repheader1="";
191 my $repheader2=""; 187 my $repheader2="";
192 188
193 189 my @tbl_header1;
194 if ($ligne1_r1 =~/^\@(.*?)[\s\/]/){ 190 my @tbl_header2;
191 if ($ligne1_r1 =~/^\@(.*?)\s*$/){
195 $header1 = $1; 192 $header1 = $1;
196 } 193 @tbl_header1 = split(//,$header1);
197 194 }
198 if ($ligne3_r1 =~/^\+(.*?)[\s\/]/){ 195
196 if ($ligne3_r1 =~/^\+(.*?)\s*$/){
199 $repheader1 = $1; 197 $repheader1 = $1;
200 } 198 }
201 199
202 if ($ligne1_r2 =~/^\@(.*?)[\s\/]/){ 200 if ($ligne1_r2 =~/^\@(.*?)\s*$/){
203 $header2 = $1; 201 $header2 = $1;
202 @tbl_header2 = split(//,$header2);
204 } 203 }
205 204
206 if ($ligne3_r2 =~/^\+(.*?)[\s\/]/){ 205 if ($ligne3_r2 =~/^\+(.*?)\s*$/){
207 $repheader2 = $1; 206 $repheader2 = $1;
208 } 207 }
209 #@ 2 sec 208 my $diffheader=0;
210 209 if ($#tbl_header1 == $#tbl_header2){
211 ### Verification de la coherence sequence /qualité @ 1 sec 210 for (my $i=0;$i<=$#tbl_header1;$i++){
212 if (($TYPE eq "illumina")&&((!$header1)||(!$header2)||(!$repheader1)||(!$repheader2))){ 211 if ($tbl_header1[$i] ne $tbl_header2[$i]){
212 $diffheader++;
213 }
214 }
215 }
216
217
218
219 ### Verification de la coherence sequence /qualité
220 if ((!$header1)||(!$header2)){
213 if ($VERBOSE eq "ON"){ 221 if ($VERBOSE eq "ON"){
214 print "Error in header : empty\n"; 222 print "Error in header : empty\n";
215 print $ligne1_r1; 223 print $ligne1_r1;
216 print $ligne2_r1; 224 print $ligne2_r1;
217 print $ligne3_r1; 225 print $ligne3_r1;
222 print $ligne4_r2; 230 print $ligne4_r2;
223 print "\n"; 231 print "\n";
224 } 232 }
225 $error3++; 233 $error3++;
226 } 234 }
227 elsif (($TYPE eq "sanger")&&((!$header1)||(!$header2))){ 235 elsif ((($repheader1)&&($header1 ne $repheader1))||(($repheader2)&&($header2 ne $repheader2))){
228 if ($VERBOSE eq "ON"){ 236 if ($VERBOSE eq "ON"){
229 print "Error in header ref : empty\n"; 237 print "Error : difference in header and header repeat\n";
230 print $ligne1_r1; 238 print $ligne1_r1;
231 print $ligne2_r1; 239 print $ligne2_r1;
232 print $ligne3_r1; 240 print $ligne3_r1;
233 print $ligne4_r1; 241 print $ligne4_r1;
234 print $ligne1_r2; 242 print $ligne1_r2;
235 print $ligne2_r2; 243 print $ligne2_r2;
236 print $ligne3_r2; 244 print $ligne3_r2;
237 print $ligne4_r2; 245 print $ligne4_r2;
238 print "\n"; 246 print "\n";
239 } 247 }
240 $error3++; 248 $error4++;
241 } 249 }
242 elsif (($TYPE eq "illumina")&&(($header1 ne $repheader1)||($header2 ne $repheader2)||($header1 ne $header2))){ 250 elsif ($#tbl_header1 != $#tbl_header2){
243 if ($VERBOSE eq "ON"){ 251 if ($VERBOSE eq "ON"){
244 print "Error in header : different\n"; 252 print "Error : difference in header size between reads\n";
245 print $ligne1_r1; 253 print $ligne1_r1;
246 print $ligne2_r1; 254 print $ligne2_r1;
247 print $ligne3_r1; 255 print $ligne3_r1;
248 print $ligne4_r1; 256 print $ligne4_r1;
249 print $ligne1_r2; 257 print $ligne1_r2;
252 print $ligne4_r2; 260 print $ligne4_r2;
253 print "\n"; 261 print "\n";
254 } 262 }
255 $error4++; 263 $error4++;
256 } 264 }
257 elsif (($TYPE eq "sanger")&&($header1 ne $header2)){ 265 elsif ($diffheader > 1 ){ # More than ...1 and ...2 difference in read1 and read2 header
258 if ($VERBOSE eq "ON"){ 266 if ($VERBOSE eq "ON"){
259 print "Error in header : different\n"; 267 print "Error can't establish synchro between reads, more than 1 difference between headers\n";
260 print $ligne1_r1; 268 print $ligne1_r1;
261 print $ligne2_r1; 269 print $ligne2_r1;
262 print $ligne3_r1; 270 print $ligne3_r1;
263 print $ligne4_r1; 271 print $ligne4_r1;
264 print $ligne1_r2; 272 print $ligne1_r2;