Mercurial > repos > dereeper > sniplay
comparison AnnotationStatsFromVCF/AnnotationStatsFromVCF.pl @ 9:98c37a5d67f4 draft
Uploaded
author | dereeper |
---|---|
date | Wed, 07 Feb 2018 22:08:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
8:6bf69b40365c | 9:98c37a5d67f4 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 | |
5 use Getopt::Long; | |
6 use Bio::SeqIO; | |
7 | |
8 my $usage = qq~Usage:$0 <args> [<opts>] | |
9 where <args> are: | |
10 -v, --vcf <VCF input> | |
11 -o, --out <output> | |
12 -s, --step <step (in bp)> | |
13 ~; | |
14 $usage .= "\n"; | |
15 | |
16 my ($vcf,$out,$step,$min_depth); | |
17 $min_depth = 5; | |
18 | |
19 GetOptions( | |
20 "vcf=s" => \$vcf, | |
21 "out=s" => \$out, | |
22 "min_depth=s"=> \$min_depth, | |
23 "step=s" => \$step, | |
24 ); | |
25 | |
26 | |
27 die $usage | |
28 if ( !$vcf || !$step || !$out ); | |
29 | |
30 if ($step =~/^(\d+)\s*$/){ | |
31 $step = $1; | |
32 } | |
33 else{ | |
34 die "Error: step size must be an integer\n"; | |
35 } | |
36 | |
37 | |
38 my $VCFTOOLS_EXE = "vcftools"; | |
39 | |
40 my %max; | |
41 my %counts_ns; | |
42 my %counts_s; | |
43 my $nb_gene = 0; | |
44 my $nb_intergenic = 0; | |
45 my $nb_exon = 0; | |
46 my $nb_intron = 0; | |
47 my $nb_UTR = 0; | |
48 my $nb_syn = 0; | |
49 my $nb_nsyn = 0; | |
50 if ($vcf =~/\.bcf/){ | |
51 system("$VCFTOOLS_EXE --bcf $vcf --recode --recode-INFO-all --out $out"); | |
52 $vcf = "$out.recode.vcf"; | |
53 } | |
54 open(my $VCF,$vcf); | |
55 while(<$VCF>) | |
56 { | |
57 my @infos = split(/\t/,$_); | |
58 if (scalar @infos > 8 && !/#CHROM/) | |
59 { | |
60 my $chrom = $infos[0]; | |
61 my $position = $infos[1]; | |
62 my $id = $infos[2]; | |
63 | |
64 | |
65 my $classe_position = int($position/$step); | |
66 if (/=NON_SYNONYMOUS_CODING/){ | |
67 $counts_ns{$chrom}{$classe_position}++; | |
68 $nb_nsyn++; | |
69 } | |
70 if (/=SYNONYMOUS_CODING/){ | |
71 $counts_s{$chrom}{$classe_position}++; | |
72 $nb_syn++; | |
73 } | |
74 if (/INTERGENIC/){ | |
75 $nb_intergenic++; | |
76 } | |
77 elsif (/EFF=/){ | |
78 $nb_gene++; | |
79 } | |
80 if (/CODING/){ | |
81 } | |
82 elsif (/UTR/){ | |
83 $nb_UTR++; | |
84 } | |
85 elsif (/INTRON/){ | |
86 $nb_intron++; | |
87 } | |
88 $max{$chrom} = $classe_position; | |
89 } | |
90 } | |
91 my $nb_exon = $nb_gene - $nb_intron - $nb_UTR; | |
92 | |
93 open(my $OUT,">$out"); | |
94 #print $OUT "Chrom Bin Nb synonymous SNPs Nb non-synonymous SNPs dN/dS ratio\n"; | |
95 print $OUT "Chrom Bin dN/dS ratio\n"; | |
96 foreach my $chrom(sort keys(%counts_s)) | |
97 { | |
98 my $maximum = $max{$chrom}; | |
99 for(my $i=1;$i<=$maximum;$i++) | |
100 { | |
101 my $classe_position = $i; | |
102 my $nb_s = 0; | |
103 my $nb_ns = 0; | |
104 my $ratio = 0; | |
105 if ($counts_s{$chrom}{$classe_position}){$nb_s = $counts_s{$chrom}{$classe_position};} | |
106 if ($counts_ns{$chrom}{$classe_position}){$nb_ns = $counts_ns{$chrom}{$classe_position};} | |
107 if ($nb_s){$ratio = $nb_ns/$nb_s;} | |
108 my $bin = $classe_position * $step; | |
109 #print $OUT "$chrom $classe_position $nb_s $nb_ns $ratio\n"; | |
110 print $OUT "$chrom $bin $ratio\n"; | |
111 } | |
112 } | |
113 close($OUT); | |
114 | |
115 | |
116 | |
117 open(my $A2, ">$out.location"); | |
118 print $A2 "Intergenic $nb_intergenic Intergenic:$nb_intergenic\n"; | |
119 print $A2 "Genic $nb_gene Exon:$nb_exon Intron:$nb_intron UTR:$nb_UTR\n"; | |
120 close($A2); | |
121 | |
122 open(my $A2, ">$out.effect"); | |
123 print $A2 "Intron $nb_intron Intron:$nb_intron\n"; | |
124 print $A2 "UTR $nb_UTR UTR:$nb_UTR\n"; | |
125 print $A2 "Exon $nb_exon Synonym:$nb_syn Non-syn:$nb_nsyn\n"; | |
126 close($A2); | |
127 |