changeset 13:827da1a9a326 draft

Uploaded
author mcharles
date Mon, 19 Jan 2015 10:38:29 -0500
parents c794dafd1ae5
children 93e6f2af1ce2
files rapsodyn/MergeLogFiles.pl
diffstat 1 files changed, 253 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/rapsodyn/MergeLogFiles.pl	Mon Jan 19 04:46:31 2015 -0500
+++ b/rapsodyn/MergeLogFiles.pl	Mon Jan 19 10:38:29 2015 -0500
@@ -11,6 +11,257 @@
 ) or die("Error in command line arguments\n");
 
 my @files = split(/,/,$input_log_files);
-for (my $i=0;$i<=$#files;$i++){
-	print $files[$i],"\n";
+
+
+
+my $FastqPrep_detected=0;
+my $FastqPrep_type="NA";
+my $FastqPrep_before_read1_nbreads=0;
+my $FastqPrep_before_read1_nbbases=0;
+my $FastqPrep_before_read2_nbreads=0;
+my $FastqPrep_before_read2_nbbases=0;
+my $FastqPrep_after_read1_nbreads=0;
+my $FastqPrep_after_read1_nbbases=0;
+my $FastqPrep_after_read2_nbreads=0;
+my $FastqPrep_after_read2_nbbases=0;
+
+my $Samfilter_detected = 0;
+my %Samfilter_before_hash;
+my %Samfilter_after_hash;
+my $Samfilter_before_nbreads=0;
+my $Samfilter_after_nbreads=0;
+
+my $Pileupvariant_detected = 0;
+my $Pileupvariant="";
+
+my $Listfiltering_detected = 0;
+my $Listfiltering="";
+
+my $Pileupfiltering_detected = 0;
+my $Pileupfiltering="";
+
+my $Blastfiltering_detected = 0;
+my $Blastfiltering_checked = 0;
+my $Blastfiltering_selected = 0;
+
+
+
+for (my $f=0;$f<=$#files;$f++){
+	my $current_file = $files[$f];
+	open(IN, $current_file) or die ("Can't open $current_file\n");
+	if ( -z IN){
+		next;
+	}
+	
+	while (my $line =<IN>){
+		if ($line =~ /Fastq preparation/){
+			$FastqPrep_detected=1;
+			my $line1 = <IN>;
+			my $line2 = <IN>;
+			my $line3 = <IN>;
+			my $line4 = <IN>;
+			my $line5 = <IN>;
+			my $line6 = <IN>;
+			my $line7 = <IN>;
+			if ($line1=~/Fastq format \:\s*(\w+)\s*/){
+				$FastqPrep_type=$1;
+			}
+			if ($line3=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){
+				$FastqPrep_before_read1_nbreads += $1;
+				$FastqPrep_before_read1_nbbases += $2;
+			}
+			if ($line4=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){
+				$FastqPrep_before_read2_nbreads += $1;
+				$FastqPrep_before_read2_nbbases += $2;
+			}
+			if ($line6=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){
+				$FastqPrep_after_read1_nbreads += $1;
+				$FastqPrep_after_read1_nbbases += $2;
+			}
+			if ($line7=~/.*?\:\s*(\d+).*?\:\s*(\d+)/){
+				$FastqPrep_after_read2_nbreads += $1;
+				$FastqPrep_after_read2_nbbases += $2;
+			}
+			
+		}
+		elsif ($line =~ /Blast filtering/){
+			$Blastfiltering_detected=1;
+			my $line1 = <IN>;
+			my $line2 = <IN>;
+			my $current_checked = 0;
+			my $current_selected = 0;
+			if ($line1=~/(\d+)/){
+				$current_checked = $1;
+				$Blastfiltering_checked += $current_checked;
+			}
+			if ($line2=~/(\d+)/){
+				$current_selected = $1;
+				$Blastfiltering_selected += $current_selected;
+			}
+			
+		}
+		elsif ($line=~/Sam filtering/){
+			$Samfilter_detected=1;
+			my $line1 = <IN>;
+			my $line2 = <IN>;
+			my $line3 = <IN>;
+			my $line4 = <IN>;
+			my $line5 = <IN>;
+			my $line6 = <IN>;
+			my @tbl_score_before = split(/[\*\:]/,$line2);
+			my @tbl_number_before = split(/[\*\:]/,$line3);
+			
+			my @tbl_score_after = split(/[\*\:]/,$line5);
+			my @tbl_number_after = split(/[\*\:]/,$line6);
+			
+			if ($#tbl_score_before != $#tbl_number_before){
+				print STDERR "Error Formating in Sam Filtering\n";
+				exit(0);
+			}
+			else {
+				for (my $i=0;$i<=$#tbl_score_before;$i++){
+					if ($tbl_score_before[$i] =~ /(\d+)/){
+						my $current_score_before = $1;
+						if ($tbl_number_before[$i] =~ /(\d+)/){
+							my $current_number_before = $1;
+							$Samfilter_before_nbreads += $current_number_before;
+							if ($Samfilter_before_hash{$current_score_before}){
+								$Samfilter_before_hash{$current_score_before} += $current_number_before;
+							}
+							else {
+								$Samfilter_before_hash{$current_score_before} = $current_number_before;
+							}
+						}
+						else {
+							print STDERR "Error Formating in Sam Filtering\n";
+							exit(0);
+						}
+					}
+					else {
+						next;
+					}
+				}
+			}
+			
+			if ($#tbl_score_after != $#tbl_number_after){
+				print STDERR "Error Formating in Sam Filtering\n";
+				exit(0);
+			}
+			else {
+				for (my $i=0;$i<=$#tbl_score_after;$i++){
+					if ($tbl_score_after[$i] =~ /(\d+)/){
+						my $current_score_after = $1;
+						if ($tbl_number_after[$i] =~ /(\d+)/){
+							my $current_number_after = $1;
+							$Samfilter_after_nbreads += $current_number_after;
+							if ($Samfilter_after_hash{$current_score_after}){
+								$Samfilter_after_hash{$current_score_after} += $current_number_after;
+							}
+							else {
+								$Samfilter_after_hash{$current_score_after} = $current_number_after;
+							}
+						}
+						else {
+							print STDERR "Error Formating in Sam Filtering\n";
+							exit(0);
+						}
+					}
+					else {
+						next;
+					}
+				}
+			}
+		}
+		elsif ($line=~/Variant extraction/){
+			$Pileupvariant_detected=1;
+			$Pileupvariant .= $line;
+			while ($line = <IN>){
+				$Pileupvariant .= $line;
+				if ($line=~/^\s*$/){
+					last;
+				}
+			}
+		}
+		elsif ($line=~/List Filtering/){
+			$Listfiltering_detected =1;
+			$Listfiltering .= $line;
+			while ($line = <IN>){
+				$Listfiltering .= $line;
+				if ($line=~/^\s*$/){
+					last;
+				}
+			}
+		}
+		elsif ($line=~/MPileup filtering/){
+			$Pileupfiltering_detected =1;
+			$Pileupfiltering.= $line;
+			while ($line = <IN>){
+				$Pileupfiltering .= $line;
+				if ($line=~/^\s*$/){
+					last;
+				}
+			}
+		}
+		
+	}
+	close (IN);
+}
+
+if ($FastqPrep_detected == 1){
+	print "####\tFastq preparation\n";
+	print "Fastq format : ",$FastqPrep_type,"\n";
+	print "## Before preparation\n";
+	print "#Read1 :	",$FastqPrep_before_read1_nbreads,"\t#Base : ",$FastqPrep_before_read1_nbbases,"\n";
+	print "#Read2 :	",$FastqPrep_before_read2_nbreads,"\t#Base : ",$FastqPrep_before_read2_nbbases,"\n";
+	print "## After preparation\n";
+	print "#Read1 :	",$FastqPrep_after_read1_nbreads,"\t#Base : ",$FastqPrep_after_read1_nbbases,"\n";
+	print "#Read2 :	",$FastqPrep_after_read2_nbreads,"\t#Base : ",$FastqPrep_after_read2_nbbases,"\n";
+	print "\n";
+}
+
+
+if ($Samfilter_detected == 1){
+	print "####	 Sam filtering \n";
+	print "## Before filtering ($Samfilter_before_nbreads)\n";
+	print "bitscore	:";
+	foreach my $key (sort{$Samfilter_before_hash{$b}<=>$Samfilter_before_hash{$a}} keys %Samfilter_before_hash){
+		print "\t$key\t*";
+	}	
+	print "\n";
+	print " number 	:";
+	foreach my $key (sort{$Samfilter_before_hash{$b}<=>$Samfilter_before_hash{$a}} keys %Samfilter_before_hash){
+		print "\t",$Samfilter_before_hash{$key},"\t*";
+	}	
+	print "\n";
+	print "## After filtering ($Samfilter_after_nbreads)\n";
+	print "bitscore	:";
+	foreach my $key (sort{$Samfilter_after_hash{$b}<=>$Samfilter_after_hash{$a}} keys %Samfilter_after_hash){
+		print "\t$key\t";
+	}	
+	print "\n";
+	print " number 	:";
+	foreach my $key (sort{$Samfilter_after_hash{$b}<=>$Samfilter_after_hash{$a}} keys %Samfilter_after_hash){
+		print "\t",$Samfilter_after_hash{$key},"\t*";
+	}
+	print "\n";
+	print "\n";	
+}
+
+if ($Pileupvariant_detected == 1){
+	print $Pileupvariant,"\n";
+}
+
+if ($Listfiltering_detected == 1){
+	print $Listfiltering,"\n";
+}
+
+if ($Blastfiltering_detected == 1){
+	print "####	 Blast filtering\n";
+	print "Variant checked  :\t$Blastfiltering_checked\n";
+	print "Variant selected :\t$Blastfiltering_selected\n";
+	print "\n";
+}
+
+if ($Pileupfiltering_detected == 1){
+	print $Pileupfiltering,"\n";
 }
\ No newline at end of file