Mercurial > repos > dereeper > sniplay
comparison SNP_density/CalculateSlidingWindowsSNPdensitiesFromVCF.pl @ 6:ebb0ac9b6fa9 draft
planemo upload
author | gandres |
---|---|
date | Mon, 23 May 2016 17:49:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:ec22fcacb66c | 6:ebb0ac9b6fa9 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use Switch; | |
5 use Getopt::Long; | |
6 use Bio::SeqIO; | |
7 | |
8 my $usage = qq~Usage:$0 <args> [<opts>] | |
9 where <args> are: | |
10 -i, --input <VCF/Hapmap input> | |
11 -o, --out <output in tabular format> | |
12 -s, --step <step (in bp)> | |
13 ~; | |
14 $usage .= "\n"; | |
15 | |
16 my ($input,$out,$step); | |
17 | |
18 GetOptions( | |
19 "input=s" => \$input, | |
20 "out=s" => \$out, | |
21 "step=s" => \$step, | |
22 ); | |
23 | |
24 | |
25 die $usage | |
26 if ( !$input || !$step || !$out ); | |
27 | |
28 my $max_chr_num = 100; | |
29 | |
30 my %counts; | |
31 my %counts_by_ind; | |
32 | |
33 my $VCFTOOLS_EXE = "vcftools"; | |
34 | |
35 my $is_vcf = `head -4000 $input | grep -c CHROM`; | |
36 my $is_bcf = 0; | |
37 if ($input =~/\.bcf/){ | |
38 $is_bcf = 1; | |
39 } | |
40 | |
41 | |
42 my $IN; | |
43 my $headers; | |
44 my $start_indiv_retrieval = 12; | |
45 my $chrom_retrieval = 2; | |
46 my $pos_retrieval = 3; | |
47 if ($is_vcf or $is_bcf){ | |
48 $start_indiv_retrieval = 9; | |
49 $chrom_retrieval = 0; | |
50 $pos_retrieval = 1; | |
51 if ($is_vcf){ | |
52 $headers = `grep '#CHROM' $input`; | |
53 open($IN,$input); | |
54 } | |
55 elsif ($is_bcf){ | |
56 my $cmd = "$VCFTOOLS_EXE --bcf $input --stdout --recode | head -4000 | grep CHROM"; | |
57 $headers = `$cmd`; | |
58 my $cmd2 = "$VCFTOOLS_EXE --bcf $input --stdout --recode"; | |
59 open $IN, '-|' , "$cmd2" or die "Can not run Vcftools"; | |
60 } | |
61 } | |
62 else{ | |
63 $headers= <$IN>; | |
64 open($IN,$input); | |
65 } | |
66 $headers=~s/\n//g; | |
67 $headers=~s/\r//g; | |
68 my @ind_names = split(/\t/,$headers); | |
69 my @individual_names; | |
70 for (my $i = $start_indiv_retrieval; $i <= $#ind_names; $i++) | |
71 { | |
72 push(@individual_names,$ind_names[$i]); | |
73 } | |
74 my %maximums; | |
75 while(<$IN>) | |
76 { | |
77 my $line = $_; | |
78 $line=~s/\n//g; | |
79 $line=~s/\r//g; | |
80 my @infos = split(/\t/,$line); | |
81 if (scalar @infos > 8 && !/#CHROM/){ | |
82 my $chrom = $infos[$chrom_retrieval]; | |
83 my $position = $infos[$pos_retrieval]; | |
84 if ($position > $maximums{$chrom}){$maximums{$chrom}=$position;} | |
85 my $classe_position = int($position/$step); | |
86 $counts{$chrom}{$classe_position}++; | |
87 | |
88 my $ref_allele = $infos[11]; | |
89 if ($is_vcf or $is_bcf){ | |
90 $ref_allele = "0/0"; | |
91 } | |
92 for (my $i = $start_indiv_retrieval; $i <= $#infos; $i++){ | |
93 if (!$counts_by_ind{$chrom}{$classe_position}{$i}){$counts_by_ind{$chrom}{$classe_position}{$i} = 0;} | |
94 if ($infos[$i] ne $ref_allele){ | |
95 $counts_by_ind{$chrom}{$classe_position}{$i}++; | |
96 } | |
97 } | |
98 } | |
99 } | |
100 close($IN); | |
101 | |
102 ####################################################### | |
103 # global | |
104 ####################################################### | |
105 open(my $OUT,">$out"); | |
106 print $OUT "Chromosome Position SNPs\n"; | |
107 my $chr_num = 0; | |
108 foreach my $chrom(sort keys(%counts)) | |
109 { | |
110 $chr_num++; | |
111 my $ref_counts = $counts{$chrom}; | |
112 my %final_counts = %$ref_counts; | |
113 my $x = 0; | |
114 #foreach my $classe_position(sort {$a<=>$b} keys(%final_counts)) | |
115 for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) | |
116 { | |
117 my $nb = 0; | |
118 if ($counts{$chrom}{$classe_position}) | |
119 { | |
120 $nb = $counts{$chrom}{$classe_position}; | |
121 } | |
122 $x += $step; | |
123 print $OUT "$chrom $x $nb\n"; | |
124 } | |
125 if ($chr_num >= $max_chr_num){last;} | |
126 } | |
127 close($OUT); | |
128 | |
129 ####################################################### | |
130 # For each individual | |
131 ####################################################### | |
132 open(my $OUT2,">$out.by_sample"); | |
133 $chr_num = 0; | |
134 print $OUT2 "Chromosome ".join("\t",@individual_names) . "\n"; | |
135 foreach my $chrom(sort keys(%counts_by_ind)) | |
136 { | |
137 $chr_num++; | |
138 my $ref_counts = $counts_by_ind{$chrom}; | |
139 my %final_counts = %$ref_counts; | |
140 for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) | |
141 { | |
142 print $OUT2 "$chrom"; | |
143 my $num_ind = $start_indiv_retrieval; | |
144 foreach my $indiv(@individual_names) | |
145 { | |
146 my $val = 0; | |
147 | |
148 if ($counts_by_ind{$chrom}{$classe_position}{$num_ind}) | |
149 { | |
150 $val = $counts_by_ind{$chrom}{$classe_position}{$num_ind}; | |
151 } | |
152 print $OUT2 " $val"; | |
153 $num_ind++; | |
154 } | |
155 print $OUT2 "\n"; | |
156 } | |
157 if ($chr_num >= $max_chr_num){last;} | |
158 } | |
159 close($OUT2); |