diff rapsodyn/rapsosnp_stats.pl @ 1:7f36bd129321 draft

Uploaded
author mcharles
date Wed, 10 Sep 2014 10:24:01 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rapsodyn/rapsosnp_stats.pl	Wed Sep 10 10:24:01 2014 -0400
@@ -0,0 +1,215 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+my $read1_row = $ARGV[0];
+my $read2_row = $ARGV[1];
+
+my $read1_trimmed = $ARGV[2];
+my $read2_trimmed = $ARGV[3];
+
+my $sam_row = $ARGV[4];
+my $sam_filtered = $ARGV[5];
+
+my $mpileup_variant = $ARGV[6];
+
+my $list_filtered = $ARGV[7];
+
+my $blast_filtered = $ARGV[8];
+
+my $snp_selected = $ARGV[9];
+
+
+open(INR1R, $read1_row) or die ("Can't open $read1_row\n");
+my $nbread=0;
+my $nbbase =0;
+while (my $line1=<INR1R>){
+	my $line2 = <INR1R>;
+	my $line3 = <INR1R>;
+	my $line4 = <INR1R>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+	}
+}
+print "Row Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
+close (INR1R);
+
+open(INR2R, $read2_row) or die ("Can't open $read2_row\n");
+$nbread=0;
+$nbbase =0;
+while (my $line1=<INR2R>){
+	my $line2 = <INR2R>;
+	my $line3 = <INR2R>;
+	my $line4 = <INR2R>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+	}
+}
+print "Row Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
+close (INR2R);
+
+open(INR1T, $read1_trimmed) or die ("Can't open $read1_trimmed\n");
+$nbread=0;
+$nbbase =0;
+while (my $line1=<INR1T>){
+	my $line2 = <INR1T>;
+	my $line3 = <INR1T>;
+	my $line4 = <INR1T>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+		else {
+			print STDERR "$line1\n$line2\n";
+		}
+	}
+}
+print "Trimmed Reads 1\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
+close (INR1T);
+
+open(INR2T, $read2_trimmed) or die ("Can't open $read2_trimmed\n");
+$nbread=0;
+$nbbase =0;
+while (my $line1=<INR2T>){
+	my $line2 = <INR2T>;
+	my $line3 = <INR2T>;
+	my $line4 = <INR2T>;
+	if ($line1 =~ /^@/){
+		$nbread++;
+		if ($line2=~/([ATGCNX]+)/i){
+			$nbbase += length($1);
+		}
+		else {
+			print STDERR "$line1\n$line2\n";
+		}
+	}
+}
+print "Trimmed Reads 2\t\tNumber of reads : ",$nbread,"\tnumber of bases : ",$nbbase,"\n";
+close (INR2T);
+
+print "\nSAM row\n";
+open(SAM, $sam_row) or die ("Can't open $sam_row\n");
+my %bitscore;
+while (my $line=<SAM>){
+	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
+		my @fields = split(/\s+/,$line);
+		my $bit = $fields[1];
+		if ($bitscore{$bit}){
+			$bitscore{$bit}++;
+		}
+		else {
+			$bitscore{$bit}=1;
+		}
+	}
+}
+
+print "bitscore\t";
+foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
+	print $key,"\t*\t";
+}
+print "\n";
+
+print " number \t";
+foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
+	print $bitscore{$key},"\t*\t";
+}
+print "\n";
+close (SAM);
+
+print "\nSAM filtered\n";
+open(SAMF, $sam_filtered) or die ("Can't open $sam_filtered\n");
+undef %bitscore;
+while (my $line=<SAMF>){
+	if (($line !~ /^\@SQ/)&&($line !~ /^\@PG/)){
+		my @fields = split(/\s+/,$line);
+		my $bit = $fields[1];
+		if ($bitscore{$bit}){
+			$bitscore{$bit}++;
+		}
+		else {
+			$bitscore{$bit}=1;
+		}
+	}
+}
+
+print "bitscore\t";
+foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
+	print $key,"\t*\t";
+}
+print "\n";
+
+print " number \t";
+foreach my $key (sort {$bitscore{$b} <=> $bitscore{$a}} keys %bitscore) {
+	print $bitscore{$key},"\t*\t";
+}
+print "\n";
+close (SAMF);
+
+print "\nMPILEUP variant\n";
+open(MPV, $mpileup_variant) or die ("Can't open $mpileup_variant\n");
+
+my $nbvariant=0;
+while (my $line=<MPV>){
+	my @fields = split(/\s+/,$line);
+	if ($#fields >= 4){
+		my $match = $fields[4];
+		$match =~ s/\$//g; #the read start at this position
+		$match =~ s/\^.//g; #the read end at this position followed by quality char
+		if ($match =~/[ACGTNacgtn]+/){
+			$nbvariant++;
+		}
+	}
+	else {
+		#print STDERR "Erreur : $line\n";
+	}
+}
+
+print "Variant detected :\t$nbvariant\n";
+close (MPV);
+
+
+print "\nMPILEUP filtered without dubious position\n";
+open(LF, $list_filtered) or die ("Can't open $list_filtered\n");
+$nbvariant=0;
+while (my $line=<LF>){
+	$nbvariant++;
+}
+
+print "Variant selected :\t$nbvariant\n";
+close (LF);
+
+print "\nMPILEUP filtered without dubious position and BLAST\n";
+open(BF, $blast_filtered) or die ("Can't open $blast_filtered\n");
+$nbvariant=0;
+while (my $line=<BF>){
+	$nbvariant++;
+}
+
+print "Variant selected :\t$nbvariant\n";
+close (BF);
+
+
+print "\nSNP selected after mpileup filtering : \t";
+open(SNP, $snp_selected) or die ("Can't open $snp_selected\n");
+$nbvariant=0;
+while (my $line=<SNP>){
+	$nbvariant++;
+}
+
+print "$nbvariant\n";
+close (SNP);
+
+
+
+
+
+
+
+