view SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl @ 1:420b57c3c185 draft

Uploaded
author dereeper
date Fri, 10 Jul 2015 04:39:30 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl

use strict;
use Switch;
use Getopt::Long;
use Bio::SeqIO;

my $usage = qq~Usage:$0 <args> [<opts>]
where <args> are:
    -i, --input         <Hapmap input>
    -o, --out           <output in tabular format>
    -s, --step          <step (in bp)>
~;
$usage .= "\n";

my ($input,$out,$step);

GetOptions(
	"input=s"    => \$input,
	"out=s"      => \$out,
	"step=s"     => \$step,
);


die $usage
  if ( !$input || !$step || !$out );
  
my $max_chr_num = 100;

my %counts;
my %counts_by_ind;
open(my $HAPMAP,$input);
my $headers= <$HAPMAP>;
$headers=~s/\n//g;
$headers=~s/\r//g;
my @ind_names = split(/\t/,$headers);
my @individual_names;
for (my $i = 12; $i <= $#ind_names; $i++)
{
	push(@individual_names,$ind_names[$i]);
}
my %maximums;
while(<$HAPMAP>)
{
	my $line = $_;
	$line=~s/\n//g;
	$line=~s/\r//g;
	my @infos = split(/\t/,$line);
	my $chrom = $infos[2];
	my $position = $infos[3];
	if ($position > $maximums{$chrom}){$maximums{$chrom}=$position;}
	my $classe_position = int($position/$step);
	$counts{$chrom}{$classe_position}++;
		
	my $ref_allele = $infos[11];
	for (my $i = 12; $i <= $#infos; $i++)
	{
		if (!$counts_by_ind{$chrom}{$classe_position}{$i}){$counts_by_ind{$chrom}{$classe_position}{$i} = 0;}
		if ($infos[$i] ne $ref_allele)
		{
			$counts_by_ind{$chrom}{$classe_position}{$i}++;
		}
	}
}
close($HAPMAP);

#######################################################
# global
#######################################################
open(my $OUT,">$out");
print $OUT "Chromosome	Position	SNPs\n";
my $chr_num = 0;
foreach my $chrom(sort keys(%counts))
{
	$chr_num++;
	my $ref_counts = $counts{$chrom};
	my %final_counts = %$ref_counts;
	my $x = 0;
	#foreach my $classe_position(sort {$a<=>$b} keys(%final_counts))
	for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++)
	{
		my $nb = 0;
		if ($counts{$chrom}{$classe_position})
		{
			$nb = $counts{$chrom}{$classe_position};
		}
		$x += $step;
		print $OUT "$chrom	$x	$nb\n";
	}
	if ($chr_num >= $max_chr_num){last;}
}
close($OUT);

#######################################################
# For each individual
#######################################################
open(my $OUT2,">$out.by_sample");
$chr_num = 0;
print $OUT2 "Chromosome	".join("\t",@individual_names) . "\n";
foreach my $chrom(sort keys(%counts_by_ind))
{
	$chr_num++;
        my $ref_counts = $counts_by_ind{$chrom};
        my %final_counts = %$ref_counts;
	for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++)
        {
			print $OUT2 "$chrom";
			my $num_ind = 12;
			foreach my $indiv(@individual_names)
			{
				my $val = 0;
				
				if ($counts_by_ind{$chrom}{$classe_position}{$num_ind})
				{
					$val = $counts_by_ind{$chrom}{$classe_position}{$num_ind};
				}
				print $OUT2 "	$val";
				$num_ind++;
			}
			print $OUT2 "\n";
        }
	if ($chr_num >= $max_chr_num){last;}
}
close($OUT2);