Mercurial > repos > dereeper > sniplay
diff SNP_density/CalculateSlidingWindowsSNPdensitiesFromVCF.pl @ 6:ebb0ac9b6fa9 draft
planemo upload
author | gandres |
---|---|
date | Mon, 23 May 2016 17:49:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/CalculateSlidingWindowsSNPdensitiesFromVCF.pl Mon May 23 17:49:17 2016 -0400 @@ -0,0 +1,159 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -i, --input <VCF/Hapmap input> + -o, --out <output in tabular format> + -s, --step <step (in bp)> +~; +$usage .= "\n"; + +my ($input,$out,$step); + +GetOptions( + "input=s" => \$input, + "out=s" => \$out, + "step=s" => \$step, +); + + +die $usage + if ( !$input || !$step || !$out ); + +my $max_chr_num = 100; + +my %counts; +my %counts_by_ind; + +my $VCFTOOLS_EXE = "vcftools"; + +my $is_vcf = `head -4000 $input | grep -c CHROM`; +my $is_bcf = 0; +if ($input =~/\.bcf/){ + $is_bcf = 1; +} + + +my $IN; +my $headers; +my $start_indiv_retrieval = 12; +my $chrom_retrieval = 2; +my $pos_retrieval = 3; +if ($is_vcf or $is_bcf){ + $start_indiv_retrieval = 9; + $chrom_retrieval = 0; + $pos_retrieval = 1; + if ($is_vcf){ + $headers = `grep '#CHROM' $input`; + open($IN,$input); + } + elsif ($is_bcf){ + my $cmd = "$VCFTOOLS_EXE --bcf $input --stdout --recode | head -4000 | grep CHROM"; + $headers = `$cmd`; + my $cmd2 = "$VCFTOOLS_EXE --bcf $input --stdout --recode"; + open $IN, '-|' , "$cmd2" or die "Can not run Vcftools"; + } +} +else{ + $headers= <$IN>; + open($IN,$input); +} +$headers=~s/\n//g; +$headers=~s/\r//g; +my @ind_names = split(/\t/,$headers); +my @individual_names; +for (my $i = $start_indiv_retrieval; $i <= $#ind_names; $i++) +{ + push(@individual_names,$ind_names[$i]); +} +my %maximums; +while(<$IN>) +{ + my $line = $_; + $line=~s/\n//g; + $line=~s/\r//g; + my @infos = split(/\t/,$line); + if (scalar @infos > 8 && !/#CHROM/){ + my $chrom = $infos[$chrom_retrieval]; + my $position = $infos[$pos_retrieval]; + if ($position > $maximums{$chrom}){$maximums{$chrom}=$position;} + my $classe_position = int($position/$step); + $counts{$chrom}{$classe_position}++; + + my $ref_allele = $infos[11]; + if ($is_vcf or $is_bcf){ + $ref_allele = "0/0"; + } + for (my $i = $start_indiv_retrieval; $i <= $#infos; $i++){ + if (!$counts_by_ind{$chrom}{$classe_position}{$i}){$counts_by_ind{$chrom}{$classe_position}{$i} = 0;} + if ($infos[$i] ne $ref_allele){ + $counts_by_ind{$chrom}{$classe_position}{$i}++; + } + } + } +} +close($IN); + +####################################################### +# global +####################################################### +open(my $OUT,">$out"); +print $OUT "Chromosome Position SNPs\n"; +my $chr_num = 0; +foreach my $chrom(sort keys(%counts)) +{ + $chr_num++; + my $ref_counts = $counts{$chrom}; + my %final_counts = %$ref_counts; + my $x = 0; + #foreach my $classe_position(sort {$a<=>$b} keys(%final_counts)) + for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) + { + my $nb = 0; + if ($counts{$chrom}{$classe_position}) + { + $nb = $counts{$chrom}{$classe_position}; + } + $x += $step; + print $OUT "$chrom $x $nb\n"; + } + if ($chr_num >= $max_chr_num){last;} +} +close($OUT); + +####################################################### +# For each individual +####################################################### +open(my $OUT2,">$out.by_sample"); +$chr_num = 0; +print $OUT2 "Chromosome ".join("\t",@individual_names) . "\n"; +foreach my $chrom(sort keys(%counts_by_ind)) +{ + $chr_num++; + my $ref_counts = $counts_by_ind{$chrom}; + my %final_counts = %$ref_counts; + for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) + { + print $OUT2 "$chrom"; + my $num_ind = $start_indiv_retrieval; + foreach my $indiv(@individual_names) + { + my $val = 0; + + if ($counts_by_ind{$chrom}{$classe_position}{$num_ind}) + { + $val = $counts_by_ind{$chrom}{$classe_position}{$num_ind}; + } + print $OUT2 " $val"; + $num_ind++; + } + print $OUT2 "\n"; + } + if ($chr_num >= $max_chr_num){last;} +} +close($OUT2);