Mercurial > repos > dereeper > sniplay
diff SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl @ 1:420b57c3c185 draft
Uploaded
author | dereeper |
---|---|
date | Fri, 10 Jul 2015 04:39:30 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl Fri Jul 10 04:39:30 2015 -0400 @@ -0,0 +1,124 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -i, --input <Hapmap input> + -o, --out <output in tabular format> + -s, --step <step (in bp)> +~; +$usage .= "\n"; + +my ($input,$out,$step); + +GetOptions( + "input=s" => \$input, + "out=s" => \$out, + "step=s" => \$step, +); + + +die $usage + if ( !$input || !$step || !$out ); + +my $max_chr_num = 100; + +my %counts; +my %counts_by_ind; +open(my $HAPMAP,$input); +my $headers= <$HAPMAP>; +$headers=~s/\n//g; +$headers=~s/\r//g; +my @ind_names = split(/\t/,$headers); +my @individual_names; +for (my $i = 12; $i <= $#ind_names; $i++) +{ + push(@individual_names,$ind_names[$i]); +} +my %maximums; +while(<$HAPMAP>) +{ + my $line = $_; + $line=~s/\n//g; + $line=~s/\r//g; + my @infos = split(/\t/,$line); + my $chrom = $infos[2]; + my $position = $infos[3]; + if ($position > $maximums{$chrom}){$maximums{$chrom}=$position;} + my $classe_position = int($position/$step); + $counts{$chrom}{$classe_position}++; + + my $ref_allele = $infos[11]; + for (my $i = 12; $i <= $#infos; $i++) + { + if (!$counts_by_ind{$chrom}{$classe_position}{$i}){$counts_by_ind{$chrom}{$classe_position}{$i} = 0;} + if ($infos[$i] ne $ref_allele) + { + $counts_by_ind{$chrom}{$classe_position}{$i}++; + } + } +} +close($HAPMAP); + +####################################################### +# global +####################################################### +open(my $OUT,">$out"); +print $OUT "Chromosome Position SNPs\n"; +my $chr_num = 0; +foreach my $chrom(sort keys(%counts)) +{ + $chr_num++; + my $ref_counts = $counts{$chrom}; + my %final_counts = %$ref_counts; + my $x = 0; + #foreach my $classe_position(sort {$a<=>$b} keys(%final_counts)) + for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) + { + my $nb = 0; + if ($counts{$chrom}{$classe_position}) + { + $nb = $counts{$chrom}{$classe_position}; + } + $x += $step; + print $OUT "$chrom $x $nb\n"; + } + if ($chr_num >= $max_chr_num){last;} +} +close($OUT); + +####################################################### +# For each individual +####################################################### +open(my $OUT2,">$out.by_sample"); +$chr_num = 0; +print $OUT2 "Chromosome ".join("\t",@individual_names) . "\n"; +foreach my $chrom(sort keys(%counts_by_ind)) +{ + $chr_num++; + my $ref_counts = $counts_by_ind{$chrom}; + my %final_counts = %$ref_counts; + for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) + { + print $OUT2 "$chrom"; + my $num_ind = 12; + foreach my $indiv(@individual_names) + { + my $val = 0; + + if ($counts_by_ind{$chrom}{$classe_position}{$num_ind}) + { + $val = $counts_by_ind{$chrom}{$classe_position}{$num_ind}; + } + print $OUT2 " $val"; + $num_ind++; + } + print $OUT2 "\n"; + } + if ($chr_num >= $max_chr_num){last;} +} +close($OUT2);