annotate 2.4/script/cluster.pair.pl @ 16:8eb7d93f7e58 draft

Uploaded
author plus91-technologies-pvt-ltd
date Sat, 31 May 2014 11:23:36 -0400
parents e3609c8714fb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
1 #!/usr/bin/perl
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
2 use strict;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
3 use POSIX;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
4
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
5 my $usage = "cluster.pair.pl maxdist\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
6 my $maxdist = shift or die $usage;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
7
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
8 my %count;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
9
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
10 while (<STDIN>){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
11 chomp;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
12 my ($sample, $chrstart, $start, $chrend, $end) = split /\t/;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
13 my $nstart = floor ($start/$maxdist);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
14 my $nend = floor ($end/$maxdist);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
15 my $coord = {start=>$start, end=>$end};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
16
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
17 push @{$count{$chrstart}->{$nstart}->{$chrend}->{$nend}->{$sample}}, $coord;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
18 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
19
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
20 print_groups (\%count);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
21
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
22 sub print_groups {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
23 my ($rcount) = @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
24 my %count = %{$rcount};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
25
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
26 foreach my $chrstart (sort {$a<=>$b} keys %count) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
27 foreach my $posstart (sort {$a<=>$b} keys %{$count{$chrstart}}) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
28 my %fcoord = %{$count{$chrstart}->{$posstart}};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
29
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
30 foreach my $chrend (sort {$a<=>$b} keys %fcoord) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
31 foreach my $posend (sort {$a<=>$b} keys %{$fcoord{$chrend}}){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
32 my @nsamples = sort {$a cmp $b} (keys %{$fcoord{$chrend}->{$posend}});
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
33
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
34 my $cpos = $fcoord{$chrend}->{$posend};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
35
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
36 my @coords;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
37 my $totnum=0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
38
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
39 foreach my $sample (@nsamples) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
40 my ($num, $avgx, $avgy) = calc_moments(@{$cpos->{$sample}});
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
41 push (@coords, {start=>$avgx, end=>$avgy});
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
42 $totnum+=$num;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
43 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
44
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
45 my ($num, $avgx, $avgy) = calc_moments(@coords);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
46
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
47 print $chrstart."\t".$avgx."\t".$chrend."\t".$avgy ."\t".$num."\t".$totnum."\t" ;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
48
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
49 print $_."\t" foreach (@nsamples);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
50 print "\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
51 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
52 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
53 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
54 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
55 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
56
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
57 sub calc_moments {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
58 my (@pos) = @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
59
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
60 my ($num, $sumx, $sumy) = (0,0,0);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
61 foreach my $cpos (@pos) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
62 $num++;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
63 $sumx+=$cpos->{start};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
64 $sumy+=$cpos->{end};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
65 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
66 my $avgx = sprintf ("%d", $sumx/$num);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
67 my $avgy = sprintf ("%d", $sumy/$num);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
68
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
69 return ($num, $avgx, $avgy);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
70 }