annotate nibls.pl @ 0:87fe81de0931 draft default tip

Uploaded
author bigrna
date Sun, 04 Jan 2015 02:47:25 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
1 #!/usr/bin/perl
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
2 #####################################################################################################
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
3 #LocusPocus is a free script, it is provided with the hope that you will enjoy, you may freely redistribute it at will. We would be greatful if you would keep these acknowledgements with it.
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
4 #
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
5 # Dan MacLean
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
6 # dan.maclean@sainsbury-laboratory.ac.uk
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
7 #
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
8 # This program is free academic software; academic and non-profit
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
9 # users may redistribute it freely.
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
10 #
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
11 # This program is distributed in the hope that it will be useful,
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
14 #
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
15 # This software is released under GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
16 # see included file GPL3.txt
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
17 #
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
18 #
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
19
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
20
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
21 ###Dont forget you will need ...
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
22 #####################################################################################################
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
23 # Boost::Graph
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
24 #Copyright 2005 by David Burdick
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
25 # Available from http://search.cpan.org/~dburdick/Boost-Graph-1.2/Graph.pm
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
26 #Boost::Graph is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
27 #####################################################################################################
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
28
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
29
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
30
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
31 use strict;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
32 use warnings;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
33 use Boost::Graph;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
34 use Getopt::Long;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
35
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
36
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
37 my $usage = "usage: $0 -f GFF_FILE [options]\n\n -m minimum inclusion distance (default 5)\n -c clustering coefficient (default 0.6) -b buffer between graphs (default 0) -k sample mark -o output file -t temp output file\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
38
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
39 my $gff_file ;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
40 my $min_inc = 5;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
41 my $clus = 0.6;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
42 my $buff = 0;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
43 my $output_file;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
44 my $temp;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
45 my $mark;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
46
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
47 GetOptions(
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
48
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
49 'c=f' => \$clus,
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
50 'm=i' => \$min_inc,
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
51 'f|file=s' => \$gff_file,
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
52 'b=i' => \$buff,
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
53 'o=s' => \$output_file,
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
54 't=s' => \$temp,
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
55 'k=s' => \$mark
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
56 ) ;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
57
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
58
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
59 die $usage unless $gff_file;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
60
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
61
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
62 my $starttime = time;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
63 warn "started $starttime\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
64
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
65 ## load in data
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
66 my %molecules; # stores starts and ends of srnas
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
67 open GFF, "<$gff_file";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
68
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
69 while (my $entry = <GFF>){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
70
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
71 chomp $entry;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
72 next if($entry=~/^\#/);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
73 my @data = split(/\t/,$entry);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
74 my $chr=shift @data;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
75 my $strand=shift @data;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
76 my $start=shift @data;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
77 my $end=shift @data;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
78 # my $length1=$end-$start+1;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
79 # if ($length1>30) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
80 # $length1=40;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
81 # }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
82 my $total;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
83 for (my $s=0;$s<@data ;$s++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
84 $total+=$data[$s];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
85 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
86 push @data,$total;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
87 # push @data,$length1;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
88 # if (defined $molecules{$chr}{$start}{$end}{$strand}) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
89 # my @old_data=split(/;/,$molecules{$chr}{$start}{$end}{$strand});
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
90 # for (my $i=0;$i<$#old_data ;$i++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
91 # $data[$i]+=$old_data[$i];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
92 # }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
93 # }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
94 my $data=join ";",@data;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
95 $molecules{$chr}{$start}{$end}{$strand} = $data;#chr#start#end#strand#add Tags information
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
96 #print "$chr\t$start\t$end\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
97 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
98
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
99 close GFF;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
100
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
101 warn "Data loaded...\nBuilding graphs and finding loci\nPlease be patient, this can take a while...\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
102
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
103 my @sample=split/\#/,$mark;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
104 $mark=join"\"\t\"",@sample;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
105 open OUT, ">$output_file";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
106 print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
107 open CLUSTER,">$temp";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
108 print CLUSTER "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
109 foreach my $chromosome (keys %molecules){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
110 my $g = new Boost::Graph(directed=>0);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
111 my @starts = keys(%{$molecules{$chromosome}} );
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
112 @starts = sort {$a <=> $b} @starts;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
113
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
114 while (my $srna_start = shift @starts){ ## work from left most sRNA to right most, add to graph if they close enough
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
115
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
116
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
117 foreach my $srna_end (keys %{$molecules{$chromosome}{$srna_start}}){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
118
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
119
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
120 ###use new graph if the next srna is too far away from this one..
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
121 if(defined $starts[0] and $srna_end + $min_inc + $buff < $starts[0]){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
122
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
123
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
124 ##dump the info from the old graph
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
125 if (scalar(@{$g->get_nodes()}) > 2){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
126
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
127 my $cluster_coeff = get_cc($g);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
128 if ($cluster_coeff >= $clus){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
129 dump_locus($g, $cluster_coeff);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
130 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
131 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
132
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
133
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
134 $g = new Boost::Graph(directed=>0);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
135
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
136 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
137
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
138 foreach my $e (keys %{$molecules{$chromosome}{$srna_start}}){ ### extra bit because all loci with same start and different end overlap by definition. but are not collected by main search below
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
139
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
140 unless ($e eq $srna_end){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
141 my $sn = $chromosome. ':' . $srna_start . ':' . $srna_end; ## turn coordinate of sRNA inro a node name
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
142 my $en = $chromosome. ':' . $srna_start . ':' . $e;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
143 $g->add_edge(node1=>"$sn", node2=>"$en", weight=>'1');
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
144 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
145
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
146 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
147
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
148 foreach my $start (@starts){ ##build graph of overlaps
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
149 my $new = 0;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
150 last if $start - $min_inc > $srna_end;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
151 if ($start - $min_inc <= $srna_end){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
152
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
153 my $start_node = $chromosome . ':' . $srna_start . ':' . $srna_end;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
154 foreach my $end (keys %{$molecules{$chromosome}{$start}}){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
155
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
156 my $end_node = $chromosome . ':' . $start . ':' . $end;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
157 $g->add_edge(node1=>"$start_node", node2=>"$end_node", weight=>'1');
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
158 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
159
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
160 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
161 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
162 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
163
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
164 if (!(defined $starts[0])) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
165 ##dump the info from the last graph
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
166 if (scalar(@{$g->get_nodes()}) > 2){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
167
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
168 my $cluster_coeff = get_cc($g);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
169 if ($cluster_coeff >= $clus){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
170 dump_locus($g, $cluster_coeff);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
171 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
172 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
173 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
174 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
175 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
176
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
177 warn "Loci printed\nFinished\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
178
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
179 my $endtime = time;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
180
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
181 my $elapsed = $endtime - $starttime;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
182
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
183 warn "Time elapsed = $elapsed s\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
184 close OUT;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
185 close CLUSTER;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
186 #########################################################################################
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
187 sub get_cc{ ## do cluster coeff calculation. No useful method anyway so self implemented NB, this is an undirected graph so k is n(n-1)/2
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
188
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
189 my $graph = shift;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
190
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
191 my @component = @{$graph->get_nodes()}; #number of nodes
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
192 my @clustering_coefficients;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
193
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
194 foreach my $vertex (@component)
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
195 {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
196
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
197 my @neighbours = @{$graph->neighbors($vertex)};
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
198
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
199 my %edges_in_graph;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
200
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
201 my $n = @neighbours; #n = the number of neighbours
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
202 my $k = ($n * ($n - 1))/2; #k = total number of possible connections
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
203
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
204 my $e= 0; #actual number of connections within sub-graph
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
205
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
206 foreach my $neighbour (@neighbours)
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
207 {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
208 foreach my $neighbour_2 (@neighbours)
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
209 {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
210 my $edge1 = "$neighbour\t$neighbour_2";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
211 my $edge2 = "$neighbour_2\t$neighbour";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
212 unless (exists $edges_in_graph{$edge1} or exists $edges_in_graph{$edge2})
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
213 {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
214 if ($graph->has_edge($neighbour, $neighbour_2) or $graph->has_edge($neighbour_2, $neighbour))
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
215 {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
216 ++$e;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
217 $edges_in_graph{$edge1}=1;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
218 $edges_in_graph{$edge2}=1;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
219 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
220 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
221 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
222 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
223
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
224 if ($k >= 1)
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
225 {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
226 my $c = $e / $k;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
227 push @clustering_coefficients, $c;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
228 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
229 else {push @clustering_coefficients, '0';}
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
230 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
231
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
232 my $graph_n = scalar(@clustering_coefficients);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
233 my $graph_cc = 0;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
234 foreach my $cc (@clustering_coefficients){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
235
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
236 $graph_cc = $graph_cc + $cc;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
237
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
238 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
239 $graph_cc = $graph_cc / $graph_n;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
240
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
241 return $graph_cc;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
242 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
243
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
244 ############################################################################################################
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
245
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
246 sub dump_locus{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
247
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
248 my $g = shift;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
249 my $cc = shift;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
250 my $chr;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
251 my $start = 1000000000000000000000000000000000000000000000;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
252 my $end = -1;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
253 my @sample;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
254 my @tag;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
255 foreach my $node (@{$g->get_nodes()}){
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
256
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
257 $node =~ m/^(\S+):(\d+):(\d+)$/;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
258 my $c=$1;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
259 my $s=$2;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
260 my $e=$3;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
261 # my @data;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
262 foreach my $str (keys %{$molecules{$c}{$s}{$e}}) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
263 my @data=split(/;/,$molecules{$c}{$s}{$e}{$str});
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
264 push @tag,($s.",".$e.",".$str.",".$data[-1]);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
265 # for (my $i=0;$i<$#old_data ;$i++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
266 # $data[$i]+=$old_data[$i];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
267 # }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
268 my $length=$e-$s+1;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
269 if ($length>30) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
270 $length=40;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
271 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
272 push @data,$length;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
273 my $data=join ";",@data;#sample_exp/total_exp/length;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
274 push @sample,$data;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
275 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
276
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
277 $chr = $c;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
278 $start = $s if $s < $start;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
279 $end = $e if $e > $end;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
280 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
281 my $tag=join";",@tag;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
282 my $tag_number=@tag;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
283 my ($max_length,$max_p,@cluster_exp)=Max_length(\@sample);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
284 if ($max_length==40) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
285 $max_length="\>30";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
286 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
287 my $cluster_exp=join"\t",@cluster_exp;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
288 my $gff = $chr."\:$start\-$end\t".$max_length."nt\t".$max_p."\t" . $cluster_exp;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
289 print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag_number\t$tag\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
290 print OUT $gff, "\n";
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
291 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
292
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
293 sub Max_length{
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
294 my @exp=@{$_[0]};
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
295 my %sample_length;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
296 my $total_exp;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
297 my @each;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
298 for (my $i=0;$i<=$#exp ;$i++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
299 my @tag=split/;/,$exp[$i];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
300 my $length=pop(@tag);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
301 my $exp=pop(@tag);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
302 $sample_length{$length}+=$exp;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
303 $total_exp+=$exp;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
304 for (my $j=0;$j<=$#tag ;$j++) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
305 $each[$j]+=$tag[$j];
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
306 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
307 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
308 my $max=0;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
309 my $max_key;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
310 foreach my $key (sort keys %sample_length) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
311 my $p=$sample_length{$key}/$total_exp;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
312 if ($p>$max) {
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
313 $max=$p;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
314 $max_key=$key;
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
315 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
316 $sample_length{$key}=sprintf("%.2f",$p);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
317 }
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
318 return($max_key,$sample_length{$max_key},@each);
87fe81de0931 Uploaded
bigrna
parents:
diff changeset
319 }