Mercurial > repos > mcharles > rapsosnp
diff rapsodyn/PrepareFastqLight.pl @ 8:d857538d9fea draft
Uploaded
author | mcharles |
---|---|
date | Fri, 10 Oct 2014 07:05:36 -0400 |
parents | 3f7b0788a1c4 |
children | 0a6c1cfe4dc8 |
line wrap: on
line diff
--- a/rapsodyn/PrepareFastqLight.pl Tue Oct 07 10:34:34 2014 -0400 +++ b/rapsodyn/PrepareFastqLight.pl Fri Oct 10 07:05:36 2014 -0400 @@ -1,4 +1,6 @@ #!/usr/bin/perl +#v1.0.3 support rapsodyn header (.... 1:... / .... 2:...) +#V1.0.2 added auto type detection #V1.0.1 added log, option parameters use strict; use warnings; @@ -41,11 +43,12 @@ my $nb_base_current_t=0; - open(READ1, $read1_file) or die ("Can't open $read1_file\n"); open(READ2, $read2_file) or die ("Can't open $read2_file\n"); open(OUT1, ">$output1_file") or die ("Can't open $output1_file\n"); open(OUT2, ">$output2_file") or die ("Can't open $output2_file\n"); +open (LF,">$log_file") or die("Can't open $log_file\n"); + my $error1=0; my $error2=0; @@ -58,6 +61,59 @@ my $error9=0; my $error10=0; +my $auto_type=""; +my %qual; +if ($TYPE eq "auto"){ + my $compt=0; + open(DETECT, $read1_file) or die ("Can't open $read1_file\n"); + while (my $ligne1_r1 =<DETECT>){ + my $ligne2_r1 =<DETECT>; + my $ligne3_r1 =<DETECT>; + my $ligne4_r1 =<DETECT>; + $compt++; + if ($ligne4_r1 =~ /^(.*)\s*$/i){ + my $qual = $1; + my @q = split(//,$qual); + for (my $i=0;$i<=$#q;$i++){ + my $num = ord($q[$i]); + if ($qual{$num}){ + $qual{$num}++; + } + else { + $qual{$num} = 1; + } + #range sanger / illumina 1.8+ : 33->94 + #range illumina 1.3->1.7 : 64->105 + if ($num > 94){$auto_type = "illumina";last;} + if ($num < 64){$auto_type = "sanger";last;} + } + } + else { + print STDERR "Error in format detection : quality not recognized\n$ligne4_r1"; + exit(0); + } + + if ($auto_type ne ""){ + last; + } + + } + close (DETECT); + if ($auto_type eq ""){ + print STDERR "Error in format detection : type not recognized parsing read1\n"; + foreach my $key (sort {$a <=> $b} keys %qual){ + print "$key\t:\t",$qual{$key},"\n"; + } + exit(0); + } + else { + $TYPE = $auto_type; + } +} + + + + while (my $ligne1_r1 =<READ1>){ my $ligne2_r1 =<READ1>; my $ligne3_r1 =<READ1>; @@ -119,19 +175,19 @@ my $repheader2=""; - if ($ligne1_r1 =~/^\@(.*?)\#/){ + if ($ligne1_r1 =~/^\@(.*?)[\s\/]/){ $header1 = $1; } - if ($ligne3_r1 =~/^\+(.*?)\#/){ + if ($ligne3_r1 =~/^\+(.*?)[\s\/]/){ $repheader1 = $1; } - if ($ligne1_r2 =~/^\@(.*?)\#/){ + if ($ligne1_r2 =~/^\@(.*?)[\s\/]/){ $header2 = $1; } - if ($ligne3_r2 =~/^\+(.*?)\#/){ + if ($ligne3_r2 =~/^\+(.*?)[\s\/]/){ $repheader2 = $1; } #@ 2 sec @@ -298,8 +354,8 @@ close (OUT1); close (OUT2); -open (LF,">$log_file") or die("Can't open $log_file\n"); print LF "\n####\t Fastq preparation \n"; +print LF "Fastq format : $TYPE\n"; print LF "## Before preparation\n"; print LF "#Read1 :\t$nb_read1\t#Base :\t$nb_base_read1\n"; print LF "#Read2 :\t$nb_read2\t#Base :\t$nb_base_read2\n";