diff rapsodyn/PileupVariant.pl @ 10:0a6c1cfe4dc8 draft

Uploaded
author mcharles
date Mon, 19 Jan 2015 04:33:21 -0500
parents 3f7b0788a1c4
children
line wrap: on
line diff
--- a/rapsodyn/PileupVariant.pl	Mon Oct 20 05:58:31 2014 -0400
+++ b/rapsodyn/PileupVariant.pl	Mon Jan 19 04:33:21 2015 -0500
@@ -3,39 +3,94 @@
 use strict;
 use warnings;
 use Getopt::Long;
+#v1.1.0 choose exclusion file
+#v1.0.2 manage empty files
+#v1.0.1 bug correction
+#V1.0.1 added log, option parameters
 
 my $input_pileup_file;
 my $output_pileup_file;
+my $input_exclusion_file;
 my $log_file;
 
 my $nb_base_covered=0;
 my $nb_variant=0;
+my $nb_variant_conserved=0;
+my $do_exclusion=0;
+my $nb_exclusion=0;
+my $nb_excluded=0;
+my %exclusion;
 GetOptions (
 "input_pileup_file=s" => \$input_pileup_file,
+"input_exclusion_file=s" => \$input_exclusion_file,
 "log_file=s" => \$log_file
 ) or die("Error in command line arguments\n");
 
 open(IN, $input_pileup_file) or die ("Can't open $input_pileup_file\n");
+if ( -z IN){
+	exit(0);
+}
+
+if ($input_exclusion_file){
+	open (EX,$input_exclusion_file)  or die ("Can't open $input_exclusion_file\n");
+	if ( -z EX){
+		
+	}
+	else {
+		$do_exclusion=1;
+		while (my $line=<EX>){
+			$nb_exclusion++;
+			chomp($line);
+			my @fields = split(/\s+/,$line);
+			if (($fields[0])&&($fields[1])){
+				$exclusion{"$fields[0]\t$fields[1]"}=1;
+			}
+			else {
+				print STDERR "Error formatting in Exclusion File\nExclusion file lines should be 'chromosome number' TAB 'position'\n$line\n",$fields[0],"\n",$fields[1],"\n";
+			}
+		}
+		
+	}
+	close (EX);
+}
 
 #Extraction des variants
 my $nb_line=0;
 while (my $line=<IN>){
 	#print $line;
-	$nb_base_covered++;
-	$line =~ s/\$//g; #the read start at this position
-	$line =~ s/\^.//g; #the read end at this position followed by quality char
+	if ($line !~ /^\s*$/){
+		my @fields = split(/\s+/,$line);
+		if ($fields[4]){
+			$nb_base_covered++;
+			my $pile = $fields[4];
+			$pile =~ s/\$//g; #the read start at this position
+			$pile =~ s/\^.//g; #the read end at this position followed by quality char
+			if ($fields[4]=~/[ATGCN]/i){ #Indel are +/-\d[ATGCatgc]+ and SNP are [ATGCatgc]+ so [ATGCN]/i detection cover indel and snp
+				$nb_variant++;
+				if (($do_exclusion==1)&&($exclusion{"$fields[0]\t$fields[1]"})){
+					
+				}
+				else {
+					print $line;
+					$nb_variant_conserved++;
+				}
+			}
+		}
+		elsif ($fields[3]==0){
+		}
+		else {
+			print STDERR "Error in pileup format\nPileup result expected at column 5\n$line\n"; 
+		}
+	}
 	#print $line;
 	
-	my @field = split(/\s+/,$line);
-	if ($field[4]=~/[ATGCN]/i){
-		print $line;
-		$nb_variant++;
-	}
+	
 }
 close(IN);
 
 open (LF,">$log_file") or die("Can't open $log_file\n");
 print LF "\n####\t Variant extraction \n";
-print LF "Position covered :\t$nb_base_covered\n";
-print LF "Variant detected :\t$nb_variant\n";
+print LF "Position covered\t:\t$nb_base_covered\n";
+print LF "Variant detected\t:\t$nb_variant\n";
+print LF "Variant conserved\t:\t$nb_variant_conserved\n";
 close (LF);