diff AnnotationStatsFromVCF/AnnotationStatsFromVCF.pl @ 6:ebb0ac9b6fa9 draft

planemo upload
author gandres
date Mon, 23 May 2016 17:49:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/AnnotationStatsFromVCF/AnnotationStatsFromVCF.pl	Mon May 23 17:49:17 2016 -0400
@@ -0,0 +1,127 @@
+#!/usr/bin/perl
+
+use strict;
+use Switch;
+use Getopt::Long;
+use Bio::SeqIO;
+
+my $usage = qq~Usage:$0 <args> [<opts>]
+where <args> are:
+    -v, --vcf           <VCF input>
+    -o, --out           <output>
+    -s, --step          <step (in bp)>
+~;
+$usage .= "\n";
+
+my ($vcf,$out,$step,$min_depth);
+$min_depth = 5;
+
+GetOptions(
+	"vcf=s"      => \$vcf,
+	"out=s"      => \$out,
+	"min_depth=s"=> \$min_depth,
+	"step=s"     => \$step,
+);
+
+
+die $usage
+  if ( !$vcf || !$step || !$out );
+
+if ($step =~/^(\d+)\s*$/){
+	$step = $1;
+}
+else{
+	die "Error: step size must be an integer\n";
+}
+
+
+my $VCFTOOLS_EXE = "vcftools"; 
+ 
+my %max;
+my %counts_ns;
+my %counts_s;
+my $nb_gene = 0;
+my $nb_intergenic = 0;
+my $nb_exon = 0;
+my $nb_intron = 0;
+my $nb_UTR = 0;
+my $nb_syn = 0;
+my $nb_nsyn = 0;
+if ($vcf =~/\.bcf/){
+	system("$VCFTOOLS_EXE --bcf $vcf --recode --recode-INFO-all --out $out");
+	$vcf = "$out.recode.vcf";
+}
+open(my $VCF,$vcf);
+while(<$VCF>)
+{
+	my @infos = split(/\t/,$_);
+	if (scalar @infos > 8 && !/#CHROM/)
+	{
+		my $chrom = $infos[0];
+		my $position = $infos[1];
+		my $id = $infos[2];
+		
+		
+		my $classe_position = int($position/$step);
+		if (/=NON_SYNONYMOUS_CODING/){
+			$counts_ns{$chrom}{$classe_position}++;
+			$nb_nsyn++;
+		}
+		if (/=SYNONYMOUS_CODING/){
+			$counts_s{$chrom}{$classe_position}++;
+			$nb_syn++;
+		}
+		if (/INTERGENIC/){
+			$nb_intergenic++;
+		}
+		elsif (/EFF=/){
+			$nb_gene++;
+		}
+		if (/CODING/){
+			}
+		elsif (/UTR/){
+			$nb_UTR++;
+		}
+		elsif (/INTRON/){
+			$nb_intron++;
+		}
+		$max{$chrom} = $classe_position;
+	}
+}
+my $nb_exon = $nb_gene - $nb_intron - $nb_UTR;
+
+open(my $OUT,">$out");
+#print $OUT "Chrom	Bin	Nb synonymous SNPs	Nb non-synonymous SNPs	dN/dS ratio\n";
+print $OUT "Chrom	Bin	dN/dS ratio\n";
+foreach my $chrom(sort keys(%counts_s))
+{
+	my $maximum = $max{$chrom};
+	for(my $i=1;$i<=$maximum;$i++)
+	{
+		my $classe_position = $i;
+		my $nb_s = 0;
+		my $nb_ns = 0;
+		my $ratio = 0;
+		if ($counts_s{$chrom}{$classe_position}){$nb_s = $counts_s{$chrom}{$classe_position};}
+		if ($counts_ns{$chrom}{$classe_position}){$nb_ns = $counts_ns{$chrom}{$classe_position};}
+		if ($nb_s){$ratio = $nb_ns/$nb_s;}
+		my $bin = $classe_position * $step;
+		#print $OUT "$chrom	$classe_position	$nb_s	$nb_ns	$ratio\n";
+		print $OUT "$chrom	$bin	$ratio\n";
+	}
+}
+close($OUT);
+
+
+
+open(my $A2, ">$out.location");
+print $A2 "Intergenic	$nb_intergenic	Intergenic:$nb_intergenic\n";
+print $A2 "Genic	$nb_gene	Exon:$nb_exon	Intron:$nb_intron	UTR:$nb_UTR\n";
+close($A2);
+
+open(my $A2, ">$out.effect");
+print $A2 "Intron	$nb_intron	Intron:$nb_intron\n";
+print $A2 "UTR	$nb_UTR	UTR:$nb_UTR\n";
+print $A2 "Exon	$nb_exon	Synonym:$nb_syn	Non-syn:$nb_nsyn\n";
+close($A2);
+