9
|
1 #!/usr/bin/perl
|
|
2
|
|
3 use strict;
|
|
4
|
|
5 use Getopt::Long;
|
|
6 use Bio::SeqIO;
|
|
7
|
|
8 my $usage = qq~Usage:$0 <args> [<opts>]
|
|
9 where <args> are:
|
|
10 -v, --vcf <VCF input>
|
|
11 -o, --out <output>
|
|
12 -s, --step <step (in bp)>
|
|
13 ~;
|
|
14 $usage .= "\n";
|
|
15
|
|
16 my ($vcf,$out,$step,$min_depth);
|
|
17 $min_depth = 5;
|
|
18
|
|
19 GetOptions(
|
|
20 "vcf=s" => \$vcf,
|
|
21 "out=s" => \$out,
|
|
22 "min_depth=s"=> \$min_depth,
|
|
23 "step=s" => \$step,
|
|
24 );
|
|
25
|
|
26
|
|
27 die $usage
|
|
28 if ( !$vcf || !$step || !$out );
|
|
29
|
|
30 if ($step =~/^(\d+)\s*$/){
|
|
31 $step = $1;
|
|
32 }
|
|
33 else{
|
|
34 die "Error: step size must be an integer\n";
|
|
35 }
|
|
36
|
|
37
|
|
38 my $VCFTOOLS_EXE = "vcftools";
|
|
39
|
|
40 my %max;
|
|
41 my %counts_ns;
|
|
42 my %counts_s;
|
|
43 my $nb_gene = 0;
|
|
44 my $nb_intergenic = 0;
|
|
45 my $nb_exon = 0;
|
|
46 my $nb_intron = 0;
|
|
47 my $nb_UTR = 0;
|
|
48 my $nb_syn = 0;
|
|
49 my $nb_nsyn = 0;
|
|
50 if ($vcf =~/\.bcf/){
|
|
51 system("$VCFTOOLS_EXE --bcf $vcf --recode --recode-INFO-all --out $out");
|
|
52 $vcf = "$out.recode.vcf";
|
|
53 }
|
|
54 open(my $VCF,$vcf);
|
|
55 while(<$VCF>)
|
|
56 {
|
|
57 my @infos = split(/\t/,$_);
|
|
58 if (scalar @infos > 8 && !/#CHROM/)
|
|
59 {
|
|
60 my $chrom = $infos[0];
|
|
61 my $position = $infos[1];
|
|
62 my $id = $infos[2];
|
|
63
|
|
64
|
|
65 my $classe_position = int($position/$step);
|
|
66 if (/=NON_SYNONYMOUS_CODING/){
|
|
67 $counts_ns{$chrom}{$classe_position}++;
|
|
68 $nb_nsyn++;
|
|
69 }
|
|
70 if (/=SYNONYMOUS_CODING/){
|
|
71 $counts_s{$chrom}{$classe_position}++;
|
|
72 $nb_syn++;
|
|
73 }
|
|
74 if (/INTERGENIC/){
|
|
75 $nb_intergenic++;
|
|
76 }
|
|
77 elsif (/EFF=/){
|
|
78 $nb_gene++;
|
|
79 }
|
|
80 if (/CODING/){
|
|
81 }
|
|
82 elsif (/UTR/){
|
|
83 $nb_UTR++;
|
|
84 }
|
|
85 elsif (/INTRON/){
|
|
86 $nb_intron++;
|
|
87 }
|
|
88 $max{$chrom} = $classe_position;
|
|
89 }
|
|
90 }
|
|
91 my $nb_exon = $nb_gene - $nb_intron - $nb_UTR;
|
|
92
|
|
93 open(my $OUT,">$out");
|
|
94 #print $OUT "Chrom Bin Nb synonymous SNPs Nb non-synonymous SNPs dN/dS ratio\n";
|
|
95 print $OUT "Chrom Bin dN/dS ratio\n";
|
|
96 foreach my $chrom(sort keys(%counts_s))
|
|
97 {
|
|
98 my $maximum = $max{$chrom};
|
|
99 for(my $i=1;$i<=$maximum;$i++)
|
|
100 {
|
|
101 my $classe_position = $i;
|
|
102 my $nb_s = 0;
|
|
103 my $nb_ns = 0;
|
|
104 my $ratio = 0;
|
|
105 if ($counts_s{$chrom}{$classe_position}){$nb_s = $counts_s{$chrom}{$classe_position};}
|
|
106 if ($counts_ns{$chrom}{$classe_position}){$nb_ns = $counts_ns{$chrom}{$classe_position};}
|
|
107 if ($nb_s){$ratio = $nb_ns/$nb_s;}
|
|
108 my $bin = $classe_position * $step;
|
|
109 #print $OUT "$chrom $classe_position $nb_s $nb_ns $ratio\n";
|
|
110 print $OUT "$chrom $bin $ratio\n";
|
|
111 }
|
|
112 }
|
|
113 close($OUT);
|
|
114
|
|
115
|
|
116
|
|
117 open(my $A2, ">$out.location");
|
|
118 print $A2 "Intergenic $nb_intergenic Intergenic:$nb_intergenic\n";
|
|
119 print $A2 "Genic $nb_gene Exon:$nb_exon Intron:$nb_intron UTR:$nb_UTR\n";
|
|
120 close($A2);
|
|
121
|
|
122 open(my $A2, ">$out.effect");
|
|
123 print $A2 "Intron $nb_intron Intron:$nb_intron\n";
|
|
124 print $A2 "UTR $nb_UTR UTR:$nb_UTR\n";
|
|
125 print $A2 "Exon $nb_exon Synonym:$nb_syn Non-syn:$nb_nsyn\n";
|
|
126 close($A2);
|
|
127
|