diff SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl @ 1:420b57c3c185 draft

Uploaded
author dereeper
date Fri, 10 Jul 2015 04:39:30 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl	Fri Jul 10 04:39:30 2015 -0400
@@ -0,0 +1,124 @@
+#!/usr/bin/perl
+
+use strict;
+use Switch;
+use Getopt::Long;
+use Bio::SeqIO;
+
+my $usage = qq~Usage:$0 <args> [<opts>]
+where <args> are:
+    -i, --input         <Hapmap input>
+    -o, --out           <output in tabular format>
+    -s, --step          <step (in bp)>
+~;
+$usage .= "\n";
+
+my ($input,$out,$step);
+
+GetOptions(
+	"input=s"    => \$input,
+	"out=s"      => \$out,
+	"step=s"     => \$step,
+);
+
+
+die $usage
+  if ( !$input || !$step || !$out );
+  
+my $max_chr_num = 100;
+
+my %counts;
+my %counts_by_ind;
+open(my $HAPMAP,$input);
+my $headers= <$HAPMAP>;
+$headers=~s/\n//g;
+$headers=~s/\r//g;
+my @ind_names = split(/\t/,$headers);
+my @individual_names;
+for (my $i = 12; $i <= $#ind_names; $i++)
+{
+	push(@individual_names,$ind_names[$i]);
+}
+my %maximums;
+while(<$HAPMAP>)
+{
+	my $line = $_;
+	$line=~s/\n//g;
+	$line=~s/\r//g;
+	my @infos = split(/\t/,$line);
+	my $chrom = $infos[2];
+	my $position = $infos[3];
+	if ($position > $maximums{$chrom}){$maximums{$chrom}=$position;}
+	my $classe_position = int($position/$step);
+	$counts{$chrom}{$classe_position}++;
+		
+	my $ref_allele = $infos[11];
+	for (my $i = 12; $i <= $#infos; $i++)
+	{
+		if (!$counts_by_ind{$chrom}{$classe_position}{$i}){$counts_by_ind{$chrom}{$classe_position}{$i} = 0;}
+		if ($infos[$i] ne $ref_allele)
+		{
+			$counts_by_ind{$chrom}{$classe_position}{$i}++;
+		}
+	}
+}
+close($HAPMAP);
+
+#######################################################
+# global
+#######################################################
+open(my $OUT,">$out");
+print $OUT "Chromosome	Position	SNPs\n";
+my $chr_num = 0;
+foreach my $chrom(sort keys(%counts))
+{
+	$chr_num++;
+	my $ref_counts = $counts{$chrom};
+	my %final_counts = %$ref_counts;
+	my $x = 0;
+	#foreach my $classe_position(sort {$a<=>$b} keys(%final_counts))
+	for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++)
+	{
+		my $nb = 0;
+		if ($counts{$chrom}{$classe_position})
+		{
+			$nb = $counts{$chrom}{$classe_position};
+		}
+		$x += $step;
+		print $OUT "$chrom	$x	$nb\n";
+	}
+	if ($chr_num >= $max_chr_num){last;}
+}
+close($OUT);
+
+#######################################################
+# For each individual
+#######################################################
+open(my $OUT2,">$out.by_sample");
+$chr_num = 0;
+print $OUT2 "Chromosome	".join("\t",@individual_names) . "\n";
+foreach my $chrom(sort keys(%counts_by_ind))
+{
+	$chr_num++;
+        my $ref_counts = $counts_by_ind{$chrom};
+        my %final_counts = %$ref_counts;
+	for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++)
+        {
+			print $OUT2 "$chrom";
+			my $num_ind = 12;
+			foreach my $indiv(@individual_names)
+			{
+				my $val = 0;
+				
+				if ($counts_by_ind{$chrom}{$classe_position}{$num_ind})
+				{
+					$val = $counts_by_ind{$chrom}{$classe_position}{$num_ind};
+				}
+				print $OUT2 "	$val";
+				$num_ind++;
+			}
+			print $OUT2 "\n";
+        }
+	if ($chr_num >= $max_chr_num){last;}
+}
+close($OUT2);