annotate mirplant2/nibls.pl @ 1:a2f4ef376417 draft default tip

Change the R revision
author adefelicibus
date Mon, 11 Apr 2016 12:20:48 -0400
parents 6006e58458ae
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
1 #!/usr/bin/perl
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
2 #####################################################################################################
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
3 #LocusPocus is a free script, it is provided with the hope that you will enjoy, you may freely redistribute it at will. We would be greatful if you would keep these acknowledgements with it.
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
4 #
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
5 # Dan MacLean
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
6 # dan.maclean@sainsbury-laboratory.ac.uk
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
7 #
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
8 # This program is free academic software; academic and non-profit
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
9 # users may redistribute it freely.
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
10 #
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
11 # This program is distributed in the hope that it will be useful,
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
14 #
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
15 # This software is released under GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
16 # see included file GPL3.txt
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
17 #
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
18 #
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
19
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
20
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
21 ###Dont forget you will need ...
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
22 #####################################################################################################
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
23 # Boost::Graph
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
24 #Copyright 2005 by David Burdick
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
25 # Available from http://search.cpan.org/~dburdick/Boost-Graph-1.2/Graph.pm
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
26 #Boost::Graph is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
27 #####################################################################################################
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
28
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
29
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
30
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
31 use strict;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
32 use warnings;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
33 use Boost::Graph;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
34 use Getopt::Long;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
35
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
36
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
37 my $usage = "usage: $0 -f GFF_FILE [options]\n\n -m minimum inclusion distance (default 5)\n -c clustering coefficient (default 0.6) -b buffer between graphs (default 0) -k sample mark -o output file -t temp output file\n";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
38
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
39 my $gff_file ;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
40 my $min_inc = 5;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
41 my $clus = 0.6;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
42 my $buff = 0;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
43 my $output_file;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
44 my $temp;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
45 my $mark;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
46
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
47 GetOptions(
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
48
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
49 'c=f' => \$clus,
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
50 'm=i' => \$min_inc,
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
51 'f|file=s' => \$gff_file,
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
52 'b=i' => \$buff,
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
53 'o=s' => \$output_file,
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
54 't=s' => \$temp,
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
55 'k=s' => \$mark
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
56 ) ;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
57
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
58
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
59 die $usage unless $gff_file;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
60
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
61
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
62 my $starttime = time;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
63 warn "started $starttime\n";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
64
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
65 ## load in data
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
66 my %molecules; # stores starts and ends of srnas
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
67 open GFF, "<$gff_file";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
68
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
69 while (my $entry = <GFF>){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
70
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
71 chomp $entry;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
72 next if($entry=~/^\#/);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
73 my @data = split(/\t/,$entry);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
74 my $chr=shift @data;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
75 my $strand=shift @data;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
76 my $start=shift @data;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
77 my $end=shift @data;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
78 # my $length1=$end-$start+1;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
79 # if ($length1>30) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
80 # $length1=40;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
81 # }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
82 my $total;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
83 for (my $s=0;$s<@data ;$s++) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
84 $total+=$data[$s];
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
85 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
86 push @data,$total;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
87 # push @data,$length1;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
88 # if (defined $molecules{$chr}{$start}{$end}{$strand}) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
89 # my @old_data=split(/;/,$molecules{$chr}{$start}{$end}{$strand});
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
90 # for (my $i=0;$i<$#old_data ;$i++) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
91 # $data[$i]+=$old_data[$i];
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
92 # }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
93 # }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
94 my $data=join ";",@data;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
95 $molecules{$chr}{$start}{$end}{$strand} = $data;#chr#start#end#strand#add Tags information
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
96 #print "$chr\t$start\t$end\n";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
97 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
98
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
99 close GFF;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
100
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
101 warn "Data loaded...\nBuilding graphs and finding loci\nPlease be patient, this can take a while...\n";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
102
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
103 my @sample=split/\#/,$mark;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
104 $mark=join"\"\t\"",@sample;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
105 open OUT, ">$output_file";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
106 print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
107 open CLUSTER,">$temp";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
108 print CLUSTER "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
109 foreach my $chromosome (keys %molecules){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
110 my $g = new Boost::Graph(directed=>0);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
111 my @starts = keys(%{$molecules{$chromosome}} );
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
112 @starts = sort {$a <=> $b} @starts;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
113
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
114 while (my $srna_start = shift @starts){ ## work from left most sRNA to right most, add to graph if they close enough
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
115
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
116
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
117 foreach my $srna_end (keys %{$molecules{$chromosome}{$srna_start}}){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
118
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
119
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
120 ###use new graph if the next srna is too far away from this one..
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
121 if(defined $starts[0] and $srna_end + $min_inc + $buff < $starts[0]){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
122
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
123
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
124 ##dump the info from the old graph
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
125 if (scalar(@{$g->get_nodes()}) > 2){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
126
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
127 my $cluster_coeff = get_cc($g);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
128 if ($cluster_coeff >= $clus){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
129 dump_locus($g, $cluster_coeff);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
130 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
131 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
132
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
133
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
134 $g = new Boost::Graph(directed=>0);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
135
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
136 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
137
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
138 foreach my $e (keys %{$molecules{$chromosome}{$srna_start}}){ ### extra bit because all loci with same start and different end overlap by definition. but are not collected by main search below
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
139
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
140 unless ($e eq $srna_end){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
141 my $sn = $chromosome. ':' . $srna_start . ':' . $srna_end; ## turn coordinate of sRNA inro a node name
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
142 my $en = $chromosome. ':' . $srna_start . ':' . $e;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
143 $g->add_edge(node1=>"$sn", node2=>"$en", weight=>'1');
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
144 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
145
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
146 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
147
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
148 foreach my $start (@starts){ ##build graph of overlaps
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
149 my $new = 0;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
150 last if $start - $min_inc > $srna_end;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
151 if ($start - $min_inc <= $srna_end){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
152
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
153 my $start_node = $chromosome . ':' . $srna_start . ':' . $srna_end;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
154 foreach my $end (keys %{$molecules{$chromosome}{$start}}){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
155
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
156 my $end_node = $chromosome . ':' . $start . ':' . $end;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
157 $g->add_edge(node1=>"$start_node", node2=>"$end_node", weight=>'1');
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
158 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
159
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
160 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
161 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
162 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
163
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
164 if (!(defined $starts[0])) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
165 ##dump the info from the last graph
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
166 if (scalar(@{$g->get_nodes()}) > 2){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
167
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
168 my $cluster_coeff = get_cc($g);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
169 if ($cluster_coeff >= $clus){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
170 dump_locus($g, $cluster_coeff);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
171 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
172 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
173 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
174 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
175 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
176
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
177 warn "Loci printed\nFinished\n";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
178
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
179 my $endtime = time;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
180
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
181 my $elapsed = $endtime - $starttime;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
182
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
183 warn "Time elapsed = $elapsed s\n";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
184 close OUT;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
185 close CLUSTER;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
186 #########################################################################################
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
187 sub get_cc{ ## do cluster coeff calculation. No useful method anyway so self implemented NB, this is an undirected graph so k is n(n-1)/2
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
188
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
189 my $graph = shift;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
190
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
191 my @component = @{$graph->get_nodes()}; #number of nodes
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
192 my @clustering_coefficients;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
193
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
194 foreach my $vertex (@component)
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
195 {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
196
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
197 my @neighbours = @{$graph->neighbors($vertex)};
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
198
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
199 my %edges_in_graph;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
200
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
201 my $n = @neighbours; #n = the number of neighbours
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
202 my $k = ($n * ($n - 1))/2; #k = total number of possible connections
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
203
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
204 my $e= 0; #actual number of connections within sub-graph
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
205
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
206 foreach my $neighbour (@neighbours)
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
207 {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
208 foreach my $neighbour_2 (@neighbours)
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
209 {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
210 my $edge1 = "$neighbour\t$neighbour_2";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
211 my $edge2 = "$neighbour_2\t$neighbour";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
212 unless (exists $edges_in_graph{$edge1} or exists $edges_in_graph{$edge2})
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
213 {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
214 if ($graph->has_edge($neighbour, $neighbour_2) or $graph->has_edge($neighbour_2, $neighbour))
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
215 {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
216 ++$e;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
217 $edges_in_graph{$edge1}=1;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
218 $edges_in_graph{$edge2}=1;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
219 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
220 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
221 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
222 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
223
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
224 if ($k >= 1)
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
225 {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
226 my $c = $e / $k;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
227 push @clustering_coefficients, $c;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
228 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
229 else {push @clustering_coefficients, '0';}
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
230 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
231
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
232 my $graph_n = scalar(@clustering_coefficients);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
233 my $graph_cc = 0;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
234 foreach my $cc (@clustering_coefficients){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
235
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
236 $graph_cc = $graph_cc + $cc;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
237
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
238 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
239 $graph_cc = $graph_cc / $graph_n;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
240
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
241 return $graph_cc;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
242 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
243
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
244 ############################################################################################################
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
245
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
246 sub dump_locus{
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
247
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
248 my $g = shift;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
249 my $cc = shift;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
250 my $chr;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
251 my $start = 1000000000000000000000000000000000000000000000;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
252 my $end = -1;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
253 my @sample;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
254 my @tag;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
255 foreach my $node (@{$g->get_nodes()}){
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
256
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
257 $node =~ m/^(\S+):(\d+):(\d+)$/;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
258 my $c=$1;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
259 my $s=$2;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
260 my $e=$3;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
261 # my @data;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
262 foreach my $str (keys %{$molecules{$c}{$s}{$e}}) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
263 my @data=split(/;/,$molecules{$c}{$s}{$e}{$str});
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
264 push @tag,($s.",".$e.",".$str.",".$data[-1]);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
265 # for (my $i=0;$i<$#old_data ;$i++) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
266 # $data[$i]+=$old_data[$i];
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
267 # }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
268 my $length=$e-$s+1;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
269 if ($length>30) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
270 $length=40;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
271 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
272 push @data,$length;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
273 my $data=join ";",@data;#sample_exp/total_exp/length;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
274 push @sample,$data;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
275 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
276
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
277 $chr = $c;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
278 $start = $s if $s < $start;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
279 $end = $e if $e > $end;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
280 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
281 my $tag=join";",@tag;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
282 my $tag_number=@tag;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
283 my ($max_length,$max_p,@cluster_exp)=Max_length(\@sample);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
284 if ($max_length==40) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
285 $max_length="\>30";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
286 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
287 my $cluster_exp=join"\t",@cluster_exp;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
288 my $gff = $chr."\:$start\-$end\t".$max_length."nt\t".$max_p."\t" . $cluster_exp;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
289 print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag_number\t$tag\n";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
290 print OUT $gff, "\n";
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
291 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
292
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
293 sub Max_length{
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
294 my @exp=@{$_[0]};
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
295 my %sample_length;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
296 my $total_exp;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
297 my @each;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
298 for (my $i=0;$i<=$#exp ;$i++) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
299 my @tag=split/;/,$exp[$i];
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
300 my $length=pop(@tag);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
301 my $exp=pop(@tag);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
302 $sample_length{$length}+=$exp;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
303 $total_exp+=$exp;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
304 for (my $j=0;$j<=$#tag ;$j++) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
305 $each[$j]+=$tag[$j];
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
306 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
307 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
308 my $max=0;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
309 my $max_key;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
310 foreach my $key (sort keys %sample_length) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
311 my $p=$sample_length{$key}/$total_exp;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
312 if ($p>$max) {
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
313 $max=$p;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
314 $max_key=$key;
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
315 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
316 $sample_length{$key}=sprintf("%.2f",$p);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
317 }
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
318 return($max_key,$sample_length{$max_key},@each);
6006e58458ae Uploaded
adefelicibus
parents:
diff changeset
319 }