annotate AnnotationStatsFromVCF/AnnotationStatsFromVCF.pl @ 9:98c37a5d67f4 draft

Uploaded
author dereeper
date Wed, 07 Feb 2018 22:08:47 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
2
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
3 use strict;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
4
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
5 use Getopt::Long;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
6 use Bio::SeqIO;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
7
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
8 my $usage = qq~Usage:$0 <args> [<opts>]
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
9 where <args> are:
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
10 -v, --vcf <VCF input>
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
11 -o, --out <output>
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
12 -s, --step <step (in bp)>
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
13 ~;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
14 $usage .= "\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
15
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
16 my ($vcf,$out,$step,$min_depth);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
17 $min_depth = 5;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
18
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
19 GetOptions(
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
20 "vcf=s" => \$vcf,
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
21 "out=s" => \$out,
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
22 "min_depth=s"=> \$min_depth,
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
23 "step=s" => \$step,
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
24 );
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
25
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
26
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
27 die $usage
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
28 if ( !$vcf || !$step || !$out );
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
29
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
30 if ($step =~/^(\d+)\s*$/){
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
31 $step = $1;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
32 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
33 else{
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
34 die "Error: step size must be an integer\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
35 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
36
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
37
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
38 my $VCFTOOLS_EXE = "vcftools";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
39
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
40 my %max;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
41 my %counts_ns;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
42 my %counts_s;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
43 my $nb_gene = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
44 my $nb_intergenic = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
45 my $nb_exon = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
46 my $nb_intron = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
47 my $nb_UTR = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
48 my $nb_syn = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
49 my $nb_nsyn = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
50 if ($vcf =~/\.bcf/){
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
51 system("$VCFTOOLS_EXE --bcf $vcf --recode --recode-INFO-all --out $out");
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
52 $vcf = "$out.recode.vcf";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
53 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
54 open(my $VCF,$vcf);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
55 while(<$VCF>)
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
56 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
57 my @infos = split(/\t/,$_);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
58 if (scalar @infos > 8 && !/#CHROM/)
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
59 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
60 my $chrom = $infos[0];
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
61 my $position = $infos[1];
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
62 my $id = $infos[2];
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
63
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
64
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
65 my $classe_position = int($position/$step);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
66 if (/=NON_SYNONYMOUS_CODING/){
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
67 $counts_ns{$chrom}{$classe_position}++;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
68 $nb_nsyn++;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
69 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
70 if (/=SYNONYMOUS_CODING/){
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
71 $counts_s{$chrom}{$classe_position}++;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
72 $nb_syn++;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
73 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
74 if (/INTERGENIC/){
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
75 $nb_intergenic++;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
76 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
77 elsif (/EFF=/){
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
78 $nb_gene++;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
79 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
80 if (/CODING/){
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
81 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
82 elsif (/UTR/){
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
83 $nb_UTR++;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
84 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
85 elsif (/INTRON/){
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
86 $nb_intron++;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
87 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
88 $max{$chrom} = $classe_position;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
89 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
90 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
91 my $nb_exon = $nb_gene - $nb_intron - $nb_UTR;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
92
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
93 open(my $OUT,">$out");
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
94 #print $OUT "Chrom Bin Nb synonymous SNPs Nb non-synonymous SNPs dN/dS ratio\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
95 print $OUT "Chrom Bin dN/dS ratio\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
96 foreach my $chrom(sort keys(%counts_s))
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
97 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
98 my $maximum = $max{$chrom};
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
99 for(my $i=1;$i<=$maximum;$i++)
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
100 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
101 my $classe_position = $i;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
102 my $nb_s = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
103 my $nb_ns = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
104 my $ratio = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
105 if ($counts_s{$chrom}{$classe_position}){$nb_s = $counts_s{$chrom}{$classe_position};}
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
106 if ($counts_ns{$chrom}{$classe_position}){$nb_ns = $counts_ns{$chrom}{$classe_position};}
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
107 if ($nb_s){$ratio = $nb_ns/$nb_s;}
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
108 my $bin = $classe_position * $step;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
109 #print $OUT "$chrom $classe_position $nb_s $nb_ns $ratio\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
110 print $OUT "$chrom $bin $ratio\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
111 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
112 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
113 close($OUT);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
114
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
115
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
116
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
117 open(my $A2, ">$out.location");
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
118 print $A2 "Intergenic $nb_intergenic Intergenic:$nb_intergenic\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
119 print $A2 "Genic $nb_gene Exon:$nb_exon Intron:$nb_intron UTR:$nb_UTR\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
120 close($A2);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
121
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
122 open(my $A2, ">$out.effect");
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
123 print $A2 "Intron $nb_intron Intron:$nb_intron\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
124 print $A2 "UTR $nb_UTR UTR:$nb_UTR\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
125 print $A2 "Exon $nb_exon Synonym:$nb_syn Non-syn:$nb_nsyn\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
126 close($A2);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
127