comparison rapsodyn/PrepareFastqLight.pl @ 8:d857538d9fea draft

Uploaded
author mcharles
date Fri, 10 Oct 2014 07:05:36 -0400
parents 3f7b0788a1c4
children 0a6c1cfe4dc8
comparison
equal deleted inserted replaced
7:3f7b0788a1c4 8:d857538d9fea
1 #!/usr/bin/perl 1 #!/usr/bin/perl
2 #v1.0.3 support rapsodyn header (.... 1:... / .... 2:...)
3 #V1.0.2 added auto type detection
2 #V1.0.1 added log, option parameters 4 #V1.0.1 added log, option parameters
3 use strict; 5 use strict;
4 use warnings; 6 use warnings;
5 use Getopt::Long; 7 use Getopt::Long;
6 8
39 my $nb_read2_t=0; 41 my $nb_read2_t=0;
40 my $nb_base_read2_t=0; 42 my $nb_base_read2_t=0;
41 43
42 my $nb_base_current_t=0; 44 my $nb_base_current_t=0;
43 45
44
45 open(READ1, $read1_file) or die ("Can't open $read1_file\n"); 46 open(READ1, $read1_file) or die ("Can't open $read1_file\n");
46 open(READ2, $read2_file) or die ("Can't open $read2_file\n"); 47 open(READ2, $read2_file) or die ("Can't open $read2_file\n");
47 open(OUT1, ">$output1_file") or die ("Can't open $output1_file\n"); 48 open(OUT1, ">$output1_file") or die ("Can't open $output1_file\n");
48 open(OUT2, ">$output2_file") or die ("Can't open $output2_file\n"); 49 open(OUT2, ">$output2_file") or die ("Can't open $output2_file\n");
50 open (LF,">$log_file") or die("Can't open $log_file\n");
51
49 52
50 my $error1=0; 53 my $error1=0;
51 my $error2=0; 54 my $error2=0;
52 my $error3=0; 55 my $error3=0;
53 my $error4=0; 56 my $error4=0;
55 my $error6=0; 58 my $error6=0;
56 my $error7=0; 59 my $error7=0;
57 my $error8=0; 60 my $error8=0;
58 my $error9=0; 61 my $error9=0;
59 my $error10=0; 62 my $error10=0;
63
64 my $auto_type="";
65 my %qual;
66 if ($TYPE eq "auto"){
67 my $compt=0;
68 open(DETECT, $read1_file) or die ("Can't open $read1_file\n");
69 while (my $ligne1_r1 =<DETECT>){
70 my $ligne2_r1 =<DETECT>;
71 my $ligne3_r1 =<DETECT>;
72 my $ligne4_r1 =<DETECT>;
73 $compt++;
74 if ($ligne4_r1 =~ /^(.*)\s*$/i){
75 my $qual = $1;
76 my @q = split(//,$qual);
77 for (my $i=0;$i<=$#q;$i++){
78 my $num = ord($q[$i]);
79 if ($qual{$num}){
80 $qual{$num}++;
81 }
82 else {
83 $qual{$num} = 1;
84 }
85 #range sanger / illumina 1.8+ : 33->94
86 #range illumina 1.3->1.7 : 64->105
87 if ($num > 94){$auto_type = "illumina";last;}
88 if ($num < 64){$auto_type = "sanger";last;}
89 }
90 }
91 else {
92 print STDERR "Error in format detection : quality not recognized\n$ligne4_r1";
93 exit(0);
94 }
95
96 if ($auto_type ne ""){
97 last;
98 }
99
100 }
101 close (DETECT);
102 if ($auto_type eq ""){
103 print STDERR "Error in format detection : type not recognized parsing read1\n";
104 foreach my $key (sort {$a <=> $b} keys %qual){
105 print "$key\t:\t",$qual{$key},"\n";
106 }
107 exit(0);
108 }
109 else {
110 $TYPE = $auto_type;
111 }
112 }
113
114
115
60 116
61 while (my $ligne1_r1 =<READ1>){ 117 while (my $ligne1_r1 =<READ1>){
62 my $ligne2_r1 =<READ1>; 118 my $ligne2_r1 =<READ1>;
63 my $ligne3_r1 =<READ1>; 119 my $ligne3_r1 =<READ1>;
64 my $ligne4_r1 =<READ1>; 120 my $ligne4_r1 =<READ1>;
117 my $header2=""; 173 my $header2="";
118 my $repheader1=""; 174 my $repheader1="";
119 my $repheader2=""; 175 my $repheader2="";
120 176
121 177
122 if ($ligne1_r1 =~/^\@(.*?)\#/){ 178 if ($ligne1_r1 =~/^\@(.*?)[\s\/]/){
123 $header1 = $1; 179 $header1 = $1;
124 } 180 }
125 181
126 if ($ligne3_r1 =~/^\+(.*?)\#/){ 182 if ($ligne3_r1 =~/^\+(.*?)[\s\/]/){
127 $repheader1 = $1; 183 $repheader1 = $1;
128 } 184 }
129 185
130 if ($ligne1_r2 =~/^\@(.*?)\#/){ 186 if ($ligne1_r2 =~/^\@(.*?)[\s\/]/){
131 $header2 = $1; 187 $header2 = $1;
132 } 188 }
133 189
134 if ($ligne3_r2 =~/^\+(.*?)\#/){ 190 if ($ligne3_r2 =~/^\+(.*?)[\s\/]/){
135 $repheader2 = $1; 191 $repheader2 = $1;
136 } 192 }
137 #@ 2 sec 193 #@ 2 sec
138 194
139 ### Verification de la coherence sequence /qualité @ 1 sec 195 ### Verification de la coherence sequence /qualité @ 1 sec
296 close (READ1); 352 close (READ1);
297 close (READ2); 353 close (READ2);
298 close (OUT1); 354 close (OUT1);
299 close (OUT2); 355 close (OUT2);
300 356
301 open (LF,">$log_file") or die("Can't open $log_file\n");
302 print LF "\n####\t Fastq preparation \n"; 357 print LF "\n####\t Fastq preparation \n";
358 print LF "Fastq format : $TYPE\n";
303 print LF "## Before preparation\n"; 359 print LF "## Before preparation\n";
304 print LF "#Read1 :\t$nb_read1\t#Base :\t$nb_base_read1\n"; 360 print LF "#Read1 :\t$nb_read1\t#Base :\t$nb_base_read1\n";
305 print LF "#Read2 :\t$nb_read2\t#Base :\t$nb_base_read2\n"; 361 print LF "#Read2 :\t$nb_read2\t#Base :\t$nb_base_read2\n";
306 print LF "## After preparation\n"; 362 print LF "## After preparation\n";
307 print LF "#Read1 :\t$nb_read1_t\t#Base :\t$nb_base_read1_t\n"; 363 print LF "#Read1 :\t$nb_read1_t\t#Base :\t$nb_base_read1_t\n";