annotate SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl @ 1:420b57c3c185 draft

Uploaded
author dereeper
date Fri, 10 Jul 2015 04:39:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
2
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
3 use strict;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
4 use Switch;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
5 use Getopt::Long;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
6 use Bio::SeqIO;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
7
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
8 my $usage = qq~Usage:$0 <args> [<opts>]
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
9 where <args> are:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
10 -i, --input <Hapmap input>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
11 -o, --out <output in tabular format>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
12 -s, --step <step (in bp)>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
13 ~;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
14 $usage .= "\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
15
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
16 my ($input,$out,$step);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
17
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
18 GetOptions(
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
19 "input=s" => \$input,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
20 "out=s" => \$out,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
21 "step=s" => \$step,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
22 );
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
23
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
24
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
25 die $usage
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
26 if ( !$input || !$step || !$out );
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
27
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
28 my $max_chr_num = 100;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
29
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
30 my %counts;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
31 my %counts_by_ind;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
32 open(my $HAPMAP,$input);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
33 my $headers= <$HAPMAP>;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
34 $headers=~s/\n//g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
35 $headers=~s/\r//g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
36 my @ind_names = split(/\t/,$headers);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
37 my @individual_names;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
38 for (my $i = 12; $i <= $#ind_names; $i++)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
39 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
40 push(@individual_names,$ind_names[$i]);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
41 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
42 my %maximums;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
43 while(<$HAPMAP>)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
44 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
45 my $line = $_;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
46 $line=~s/\n//g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
47 $line=~s/\r//g;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
48 my @infos = split(/\t/,$line);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
49 my $chrom = $infos[2];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
50 my $position = $infos[3];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
51 if ($position > $maximums{$chrom}){$maximums{$chrom}=$position;}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
52 my $classe_position = int($position/$step);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
53 $counts{$chrom}{$classe_position}++;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
54
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
55 my $ref_allele = $infos[11];
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
56 for (my $i = 12; $i <= $#infos; $i++)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
57 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
58 if (!$counts_by_ind{$chrom}{$classe_position}{$i}){$counts_by_ind{$chrom}{$classe_position}{$i} = 0;}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
59 if ($infos[$i] ne $ref_allele)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
60 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
61 $counts_by_ind{$chrom}{$classe_position}{$i}++;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
62 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
63 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
64 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
65 close($HAPMAP);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
66
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
67 #######################################################
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
68 # global
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
69 #######################################################
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
70 open(my $OUT,">$out");
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
71 print $OUT "Chromosome Position SNPs\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
72 my $chr_num = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
73 foreach my $chrom(sort keys(%counts))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
74 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
75 $chr_num++;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
76 my $ref_counts = $counts{$chrom};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
77 my %final_counts = %$ref_counts;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
78 my $x = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
79 #foreach my $classe_position(sort {$a<=>$b} keys(%final_counts))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
80 for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
81 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
82 my $nb = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
83 if ($counts{$chrom}{$classe_position})
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
84 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
85 $nb = $counts{$chrom}{$classe_position};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
86 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
87 $x += $step;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
88 print $OUT "$chrom $x $nb\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
89 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
90 if ($chr_num >= $max_chr_num){last;}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
91 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
92 close($OUT);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
93
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
94 #######################################################
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
95 # For each individual
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
96 #######################################################
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
97 open(my $OUT2,">$out.by_sample");
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
98 $chr_num = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
99 print $OUT2 "Chromosome ".join("\t",@individual_names) . "\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
100 foreach my $chrom(sort keys(%counts_by_ind))
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
101 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
102 $chr_num++;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
103 my $ref_counts = $counts_by_ind{$chrom};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
104 my %final_counts = %$ref_counts;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
105 for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
106 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
107 print $OUT2 "$chrom";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
108 my $num_ind = 12;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
109 foreach my $indiv(@individual_names)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
110 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
111 my $val = 0;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
112
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
113 if ($counts_by_ind{$chrom}{$classe_position}{$num_ind})
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
114 {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
115 $val = $counts_by_ind{$chrom}{$classe_position}{$num_ind};
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
116 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
117 print $OUT2 " $val";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
118 $num_ind++;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
119 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
120 print $OUT2 "\n";
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
121 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
122 if ($chr_num >= $max_chr_num){last;}
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
123 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
124 close($OUT2);