annotate AnnotationStatsFromVCF/AnnotationStatsFromVCF.pl @ 6:ebb0ac9b6fa9 draft

planemo upload
author gandres
date Mon, 23 May 2016 17:49:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
1 #!/usr/bin/perl
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
2
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
3 use strict;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
4 use Switch;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
5 use Getopt::Long;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
6 use Bio::SeqIO;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
7
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
8 my $usage = qq~Usage:$0 <args> [<opts>]
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
9 where <args> are:
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
10 -v, --vcf <VCF input>
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
11 -o, --out <output>
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
12 -s, --step <step (in bp)>
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
13 ~;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
14 $usage .= "\n";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
15
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
16 my ($vcf,$out,$step,$min_depth);
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
17 $min_depth = 5;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
18
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
19 GetOptions(
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
20 "vcf=s" => \$vcf,
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
21 "out=s" => \$out,
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
22 "min_depth=s"=> \$min_depth,
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
23 "step=s" => \$step,
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
24 );
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
25
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
26
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
27 die $usage
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
28 if ( !$vcf || !$step || !$out );
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
29
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
30 if ($step =~/^(\d+)\s*$/){
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
31 $step = $1;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
32 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
33 else{
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
34 die "Error: step size must be an integer\n";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
35 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
36
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
37
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
38 my $VCFTOOLS_EXE = "vcftools";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
39
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
40 my %max;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
41 my %counts_ns;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
42 my %counts_s;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
43 my $nb_gene = 0;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
44 my $nb_intergenic = 0;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
45 my $nb_exon = 0;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
46 my $nb_intron = 0;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
47 my $nb_UTR = 0;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
48 my $nb_syn = 0;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
49 my $nb_nsyn = 0;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
50 if ($vcf =~/\.bcf/){
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
51 system("$VCFTOOLS_EXE --bcf $vcf --recode --recode-INFO-all --out $out");
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
52 $vcf = "$out.recode.vcf";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
53 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
54 open(my $VCF,$vcf);
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
55 while(<$VCF>)
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
56 {
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
57 my @infos = split(/\t/,$_);
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
58 if (scalar @infos > 8 && !/#CHROM/)
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
59 {
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
60 my $chrom = $infos[0];
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
61 my $position = $infos[1];
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
62 my $id = $infos[2];
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
63
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
64
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
65 my $classe_position = int($position/$step);
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
66 if (/=NON_SYNONYMOUS_CODING/){
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
67 $counts_ns{$chrom}{$classe_position}++;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
68 $nb_nsyn++;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
69 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
70 if (/=SYNONYMOUS_CODING/){
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
71 $counts_s{$chrom}{$classe_position}++;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
72 $nb_syn++;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
73 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
74 if (/INTERGENIC/){
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
75 $nb_intergenic++;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
76 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
77 elsif (/EFF=/){
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
78 $nb_gene++;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
79 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
80 if (/CODING/){
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
81 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
82 elsif (/UTR/){
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
83 $nb_UTR++;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
84 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
85 elsif (/INTRON/){
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
86 $nb_intron++;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
87 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
88 $max{$chrom} = $classe_position;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
89 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
90 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
91 my $nb_exon = $nb_gene - $nb_intron - $nb_UTR;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
92
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
93 open(my $OUT,">$out");
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
94 #print $OUT "Chrom Bin Nb synonymous SNPs Nb non-synonymous SNPs dN/dS ratio\n";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
95 print $OUT "Chrom Bin dN/dS ratio\n";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
96 foreach my $chrom(sort keys(%counts_s))
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
97 {
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
98 my $maximum = $max{$chrom};
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
99 for(my $i=1;$i<=$maximum;$i++)
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
100 {
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
101 my $classe_position = $i;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
102 my $nb_s = 0;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
103 my $nb_ns = 0;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
104 my $ratio = 0;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
105 if ($counts_s{$chrom}{$classe_position}){$nb_s = $counts_s{$chrom}{$classe_position};}
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
106 if ($counts_ns{$chrom}{$classe_position}){$nb_ns = $counts_ns{$chrom}{$classe_position};}
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
107 if ($nb_s){$ratio = $nb_ns/$nb_s;}
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
108 my $bin = $classe_position * $step;
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
109 #print $OUT "$chrom $classe_position $nb_s $nb_ns $ratio\n";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
110 print $OUT "$chrom $bin $ratio\n";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
111 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
112 }
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
113 close($OUT);
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
114
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
115
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
116
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
117 open(my $A2, ">$out.location");
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
118 print $A2 "Intergenic $nb_intergenic Intergenic:$nb_intergenic\n";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
119 print $A2 "Genic $nb_gene Exon:$nb_exon Intron:$nb_intron UTR:$nb_UTR\n";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
120 close($A2);
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
121
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
122 open(my $A2, ">$out.effect");
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
123 print $A2 "Intron $nb_intron Intron:$nb_intron\n";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
124 print $A2 "UTR $nb_UTR UTR:$nb_UTR\n";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
125 print $A2 "Exon $nb_exon Synonym:$nb_syn Non-syn:$nb_nsyn\n";
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
126 close($A2);
ebb0ac9b6fa9 planemo upload
gandres
parents:
diff changeset
127