comparison rapsodyn/PrepareFastqLight.pl @ 10:0a6c1cfe4dc8 draft

Uploaded
author mcharles
date Mon, 19 Jan 2015 04:33:21 -0500
parents d857538d9fea
children 56d328bce3a7
comparison
equal deleted inserted replaced
9:0e7c6fe60646 10:0a6c1cfe4dc8
1 #!/usr/bin/perl 1 #!/usr/bin/perl
2 #v1.1.0 manage empty files
3 #v1.0.4 bug correction, last read not considered
2 #v1.0.3 support rapsodyn header (.... 1:... / .... 2:...) 4 #v1.0.3 support rapsodyn header (.... 1:... / .... 2:...)
3 #V1.0.2 added auto type detection 5 #V1.0.2 added auto type detection
4 #V1.0.1 added log, option parameters 6 #V1.0.1 added log, option parameters
5 use strict; 7 use strict;
6 use warnings; 8 use warnings;
46 open(READ1, $read1_file) or die ("Can't open $read1_file\n"); 48 open(READ1, $read1_file) or die ("Can't open $read1_file\n");
47 open(READ2, $read2_file) or die ("Can't open $read2_file\n"); 49 open(READ2, $read2_file) or die ("Can't open $read2_file\n");
48 open(OUT1, ">$output1_file") or die ("Can't open $output1_file\n"); 50 open(OUT1, ">$output1_file") or die ("Can't open $output1_file\n");
49 open(OUT2, ">$output2_file") or die ("Can't open $output2_file\n"); 51 open(OUT2, ">$output2_file") or die ("Can't open $output2_file\n");
50 open (LF,">$log_file") or die("Can't open $log_file\n"); 52 open (LF,">$log_file") or die("Can't open $log_file\n");
53
54 if (( -z READ1)&&( -z READ2)){
55 exit(0);
56 }
57 elsif (( -z READ1)||( -z READ2)){
58 print STDERR "One empty File\n";
59 exit(0);
60 }
51 61
52 62
53 my $error1=0; 63 my $error1=0;
54 my $error2=0; 64 my $error2=0;
55 my $error3=0; 65 my $error3=0;
111 } 121 }
112 } 122 }
113 123
114 124
115 125
116 126 my $compt=0;
117 while (my $ligne1_r1 =<READ1>){ 127 while (my $ligne1_r1 =<READ1>){
118 my $ligne2_r1 =<READ1>; 128 my $ligne2_r1 =<READ1>;
119 my $ligne3_r1 =<READ1>; 129 my $ligne3_r1 =<READ1>;
120 my $ligne4_r1 =<READ1>; 130 my $ligne4_r1 =<READ1>;
121 my $ligne1_r2 =<READ2>; 131 my $ligne1_r2 =<READ2>;
122 my $ligne2_r2 =<READ2>; 132 my $ligne2_r2 =<READ2>;
123 my $ligne3_r2 =<READ2>; 133 my $ligne3_r2 =<READ2>;
124 my $ligne4_r2 =<READ2>; 134 my $ligne4_r2 =<READ2>;
125 135 # chomp($ligne1_r1);
136 # chomp($ligne2_r1);
137 # chomp($ligne3_r1);
138 # chomp($ligne4_r1);
139 # chomp($ligne2_r1);
140
141 $compt++;
126 $nb_read1++; 142 $nb_read1++;
127 $nb_read2++; 143 $nb_read2++;
128 144
129 #@ 1 sec 145 #@ 1 sec
130 if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){ 146 if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){
158 $error2++; 174 $error2++;
159 } 175 }
160 #@ 1 - 2 sec 176 #@ 1 - 2 sec
161 else { 177 else {
162 178
163 my $length_seq1 = length($ligne2_r1); 179 my $length_seq1 = length(chomp($ligne2_r1));
164 my $length_qual1 =length($ligne4_r1); 180 my $length_qual1 =length(chomp($ligne4_r1));
165 my $seq1; 181 my $seq1;
166 my $qual1; 182 my $qual1;
167 183
168 my $length_seq2 = length($ligne2_r2); 184 my $length_seq2 = length(chomp($ligne2_r2));
169 my $length_qual2 =length($ligne4_r2); 185 my $length_qual2 =length(chomp($ligne4_r2));
170 my $seq2; 186 my $seq2;
171 my $qual2; 187 my $qual2;
172 my $header1=""; 188 my $header1="";
173 my $header2=""; 189 my $header2="";
174 my $repheader1=""; 190 my $repheader1="";
208 } 224 }
209 $error3++; 225 $error3++;
210 } 226 }
211 elsif (($TYPE eq "sanger")&&((!$header1)||(!$header2))){ 227 elsif (($TYPE eq "sanger")&&((!$header1)||(!$header2))){
212 if ($VERBOSE eq "ON"){ 228 if ($VERBOSE eq "ON"){
213 print "Error in header refgsd : empty\n"; 229 print "Error in header ref : empty\n";
214 print $ligne1_r1; 230 print $ligne1_r1;
215 print $ligne2_r1; 231 print $ligne2_r1;
216 print $ligne3_r1; 232 print $ligne3_r1;
217 print $ligne4_r1; 233 print $ligne4_r1;
218 print $ligne1_r2; 234 print $ligne1_r2;
254 $error4++; 270 $error4++;
255 } 271 }
256 elsif (($length_seq1 != $length_qual1)||($length_seq2 != $length_qual2)){ 272 elsif (($length_seq1 != $length_qual1)||($length_seq2 != $length_qual2)){
257 if ($VERBOSE eq "ON"){ 273 if ($VERBOSE eq "ON"){
258 print "Error in seq/qual length\n"; 274 print "Error in seq/qual length\n";
275 print "$length_seq1 / $length_qual1 \t $length_seq2 / $length_qual2\n";
259 print $ligne1_r1; 276 print $ligne1_r1;
260 print $ligne2_r1; 277 print $ligne2_r1;
261 print $ligne3_r1; 278 print $ligne3_r1;
262 print $ligne4_r1; 279 print $ligne4_r1;
280 print "\n";
263 print $ligne1_r2; 281 print $ligne1_r2;
264 print $ligne2_r2; 282 print $ligne2_r2;
265 print $ligne3_r2; 283 print $ligne3_r2;
266 print $ligne4_r2; 284 print $ligne4_r2;
267 print "\n"; 285 print "\n";
268 } 286 }
269 $error5++; 287 $error5++;
270 } 288 }
271 #@ 1 - 2 sec 289 #@ 1 - 2 sec
272 else { 290 else {
291 #print "TEST : $compt\n";
273 ### Parsing sequence & qualité 292 ### Parsing sequence & qualité
274 if ($ligne2_r1 =~ /^([ATGCNX]+)\s*$/i){ 293 if ($ligne2_r1 =~ /^([ATGCNX]+)\s*$/i){
275 $seq1 = $1; 294 $seq1 = $1;
276 $nb_base_read1 += length($seq1); 295 $nb_base_read1 += length($seq1);
277 } 296 }