0
|
1 #!/usr/bin/perl
|
|
2 use strict;
|
|
3 use File::Basename;
|
|
4 use List::MoreUtils qw( minmax );
|
|
5
|
|
6 # --------------------------------------------------------------------------------------------------
|
|
7 # author: steffen lott
|
|
8 # mail: steffen.lott@uni-freiburg.de
|
|
9 # date: 06-10-2014
|
|
10 # version: 1.6
|
|
11 #
|
|
12 # description:
|
|
13 # The tool converts an output from MEGAN in a special network which can visuallized with
|
|
14 # cytoscape. Gaper produces two files, the first one contains the network and the second one
|
|
15 # describes the attributes of the network.
|
|
16 # --------------------------------------------------------------------------------------------------
|
|
17
|
|
18 # return version number
|
|
19 if (@ARGV == 0) {
|
|
20 print "CoVennTree-Version 1.6\n";
|
|
21 print "COMMAND\n";
|
|
22 print "coventree argv0 argv1 argv2 argv3 argv4\n";
|
|
23 print "--------------\n";
|
|
24 print "argv0 = input file\n";
|
|
25 print "argv1 = color mode [1,4]\n";
|
|
26 print "argv2 = transformation function [1,7]\n";
|
|
27 print "argv3 = only leaf information => 0 ; all information => 1\n";
|
|
28 print "argv4 = output file name network\n";
|
|
29 print "argv5 = output file name attributes\n";
|
|
30 exit;
|
|
31 }
|
|
32
|
|
33
|
|
34
|
|
35
|
|
36 # container to represent the network
|
|
37 my @network = ();
|
|
38
|
|
39
|
|
40
|
|
41 # 0 PARAMETER_______________
|
|
42 # read argument from command-line
|
|
43 # important: DSV -> taxon-path, count(s) -> assigned -> tab
|
|
44 my $megan_file = $ARGV[0];
|
|
45
|
|
46
|
|
47 # 1 PARAMETER_______________
|
|
48 my $colorMode;
|
|
49 # color mode for venn-diagrams 0,1,2,3,4
|
|
50 if(defined $ARGV[1]){
|
|
51 $colorMode = $ARGV[1];
|
|
52 }else{
|
|
53 $colorMode = 3;
|
|
54 }
|
|
55
|
|
56
|
|
57 # 2 PARAMETER_______________
|
|
58 # 2 different transformations functions
|
|
59 my $transFnc = "";
|
|
60 if(defined $ARGV[2]){ # small datasets
|
|
61 $transFnc = $ARGV[2];
|
|
62 }else{
|
|
63 $transFnc = 1;
|
|
64 }
|
|
65
|
|
66
|
|
67 # 3 PARAMETER_______________
|
|
68 # the user can switch between "only leaf information"
|
|
69 # or the complete tree information. the last one takes also the not assigned reads
|
|
70 # and creates artificial nodes to keep this number
|
|
71 my $onlyLeafs;
|
|
72 if(defined $ARGV[3]){
|
|
73 if($ARGV[3] == 0){
|
|
74 $onlyLeafs = "on";
|
|
75 }elsif($ARGV[3] == 1){
|
|
76 $onlyLeafs = "off";
|
|
77 }
|
|
78 }else{ # all information will be used! not assigned and assigned
|
|
79 $onlyLeafs = "off";
|
|
80 }
|
|
81
|
|
82 # 4 PARAMETER_______________
|
|
83 # output -> network
|
|
84 my $out_network = $ARGV[4];
|
|
85
|
|
86 # 5 PARAMETER_______________
|
|
87 # output -> attributes
|
|
88 my $out_attributes = $ARGV[5];
|
|
89
|
|
90
|
|
91
|
|
92 # check the input format of the file. only a file with exactly three datasets are excepted. the other one will fill up with zeros
|
|
93
|
|
94
|
|
95 # read-in MEGAN-file
|
|
96 # if #{data-sets} = 1 -> no heade line
|
|
97 # if #{data-sets} > 1 -> heade line " #Datasets set1 set2 ..."
|
|
98 open(inFile , "<$megan_file") || die "File not found - \"Path-File\"!\n";
|
|
99 my @pairIds = ();
|
|
100 my $header = "";
|
|
101 my @input_file = ();
|
|
102 my @numberOfSets = ();
|
|
103
|
|
104 while(<inFile>){
|
|
105 chomp($_);
|
|
106 if($_ =~ /^#/){
|
|
107 $header = $_;
|
|
108 @numberOfSets = split("\t", $_);
|
|
109 }else{
|
|
110 #print @numberOfSets . "\n";
|
|
111 # check the number of datasets are included
|
|
112 if(@numberOfSets == 0 || @numberOfSets == 1 || @numberOfSets > 4){ # no set is in the file
|
|
113 print "Error: File doesn't contain any dataset or contain more than three!";
|
|
114 exit;
|
|
115 }elsif(@numberOfSets == 2){ # only one set is in the file -> add 2x zeros
|
|
116 $_ .= "\t" . 0 . "\t" . 0;
|
|
117 }elsif(@numberOfSets == 3){ # only two sets are in the file -> add 1x zeros
|
|
118 $_ .= "\t" . 0;
|
|
119 }
|
|
120
|
|
121 addToNetwork($_);
|
|
122 push(@input_file, $_);
|
|
123 }
|
|
124 }
|
|
125 close(inFile);
|
|
126
|
|
127
|
|
128 # --------------------------------------------------------------------------------------------------------------
|
|
129 # --------------------------------------------------------------------------------------------------------------
|
|
130 # (1) PREPROCESSING: detect all leaf nodes
|
|
131 my $modifiedInput = detectNonLeafs();
|
|
132
|
|
133 # (2) MAIN COMPUTATION: compute deep by deep (path deep ex. root;Viruses; => deep 2)
|
|
134 my ($vennClusterOut, $specialNumberOut) = clusterVennBottomUp();
|
|
135
|
|
136 # (3) VENN-END-PREPERATION: sum up all single values (d1-d3), transform abs values into
|
|
137 my $vennToStore = vennForCytoscape($vennClusterOut, $specialNumberOut);
|
|
138
|
|
139 # (4) SAVE RESULTS INTO FILES: one file contains the network (.sif), the other one contains the attributes
|
|
140 storeNetwork();
|
|
141 store2FileVenn($vennToStore);
|
|
142 # --------------------------------------------------------------------------------------------------------------
|
|
143 # --------------------------------------------------------------------------------------------------------------
|
|
144
|
|
145
|
|
146
|
|
147
|
|
148 sub store2FileVenn{
|
|
149 my $outVenn = $_[0];
|
|
150 # test
|
|
151 my $tmpFileName = $out_attributes;
|
|
152
|
|
153 #my $tmpFileName = "./network.venn";
|
|
154 open(FILE , ">$tmpFileName") || die "File can't be written - \"venn - File\"!\n";
|
|
155 print FILE join("\n", @{$outVenn}) . "\n";
|
|
156 close(FILE);
|
|
157 }
|
|
158
|
|
159
|
|
160 sub vennForCytoscape{
|
|
161 my $vennCluster = $_[0];
|
|
162 my $specialNum = $_[1];
|
|
163 my $specNformat = 0;
|
|
164 my @out = ();
|
|
165 # datastructure $vennCluster=> vennCluster[]{}{} => values
|
|
166 #delete $vennCluster->[0]{"no"};
|
|
167 my $frameSize = 0;
|
|
168 my $values = 0;
|
|
169 my $googleURL = "";
|
|
170 my $outStr = "";
|
|
171
|
|
172 for(my $i = 0 ; $i < @{$vennCluster}; $i++){
|
|
173 while ( my($key, $value) = each %{$vennCluster->[$i]} ){
|
|
174 while ( my($key2, $value2) = each %{$vennCluster->[$i]{$key}} ){
|
|
175 $values = $vennCluster->[$i]{$key}{$key2};
|
|
176 $frameSize = getCorrectedFrameSize($values);
|
|
177
|
|
178 if(defined $specialNum->[$i]{$key}{$key2}){
|
|
179 $specNformat = $key2 . "[" . sprintf("%.3f", $specialNum->[$i]{$key}{$key2}) . "]";
|
|
180 }else{
|
|
181 $specNformat = $key2;
|
|
182 }
|
|
183
|
|
184 # old version, this version works pretty well
|
|
185 #$frameSize = getFrameSize($values);
|
|
186 $googleURL = computeGoogleApiStrRotation($frameSize,$values,$colorMode);
|
|
187 $outStr = $key2 . "\t" . $googleURL . "\t" . $specNformat . "\t" . $values;
|
|
188 push(@out, $outStr);
|
|
189 }
|
|
190 }
|
|
191 }
|
|
192 return \@out;
|
|
193 }
|
|
194
|
|
195
|
|
196 # this function keep the biggest node in the lowest depth,...
|
|
197 sub computeGoogleApiStrRotation{
|
|
198 my $frameSize = $_[0];
|
|
199 my $values = $_[1];
|
|
200 my $colMode = $_[2];
|
|
201 my @relVal = ();
|
|
202 my @col = ();
|
|
203 my %sort = ();
|
|
204 my @store = ();
|
|
205 my %ovHash = ();
|
|
206 my @storeOldPos = ();
|
|
207 my @spVal = split(" ", $values);
|
|
208 my $sum = $spVal[0] + $spVal[1] + $spVal[2];
|
|
209
|
|
210 # user color-mode
|
|
211 if($colMode == 0){
|
|
212 $col[0] = "18A3F2"; $col[1] = "FA0800"; $col[2] = "FFF905";
|
|
213 }elsif($colMode == 1){
|
|
214 $col[0] = "FF2A00"; $col[1] = "9CFF00"; $col[2] = "00CCFF";
|
|
215 }elsif($colMode == 2){
|
|
216 $col[0] = "B4FF00"; $col[1] = "FF00C6"; $col[2] = "00AEFF";
|
|
217 }elsif($colMode == 3){
|
|
218 $col[0] = "82FF00"; $col[1] = "7E00FF"; $col[2] = "FF003B";
|
|
219 }elsif($colMode == 4){
|
|
220 $col[0] = "1A1A1A"; $col[1] = "8A8A8A"; $col[2] = "C7C7C7";
|
|
221 }
|
|
222
|
|
223 $sort{"0"} = $spVal[0]; $sort{"1"} = $spVal[1]; $sort{"2"} = $spVal[2];
|
|
224
|
|
225 my $tmp = 0;
|
|
226 foreach(@spVal){
|
|
227 if($sum != 0){
|
|
228 $tmp = $_ * 100 / $sum;
|
|
229 }else{
|
|
230 $tmp = 0;
|
|
231 }
|
|
232 push(@relVal,$tmp);
|
|
233 }
|
|
234
|
|
235 $ovHash{"01"} = $relVal[3]; $ovHash{"10"} = $relVal[3];
|
|
236 $ovHash{"02"} = $relVal[4]; $ovHash{"20"} = $relVal[4];
|
|
237 $ovHash{"21"} = $relVal[5]; $ovHash{"12"} = $relVal[5];
|
|
238
|
|
239 my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . "&chco=";
|
|
240 # change color position in the google output string corresponding to the highest value
|
|
241 foreach my $k( sort {$sort{$b}<=>$sort{$a}} keys %sort) {
|
|
242 $url .= $col[$k] . ",";
|
|
243 push(@store, $k);
|
|
244 }
|
|
245 chop($url);
|
|
246
|
|
247 $url .= "&cht=v&chd=t:";
|
|
248 # sort node values in the right order
|
|
249 for(my $i = 0 ; $i < @relVal - 4 ; $i++){
|
|
250 #print $i . "\t" . $store[$i] . "\t" . $relVal[$store[$i]] . "\n";
|
|
251 $url .= sprintf("%.1f", $relVal[$store[$i]]) . ",";
|
|
252 }
|
|
253 # sort intersection values in the right order
|
|
254 my $tStr0 = $store[0] . $store[1];
|
|
255 my $tStr1 = $store[0] . $store[2];
|
|
256 my $tStr2 = $store[1] . $store[2];
|
|
257 $url .= sprintf("%.1f", $ovHash{$tStr0}) . "," . sprintf("%.1f", $ovHash{$tStr1}) . "," . sprintf("%.1f", $ovHash{$tStr2}) . ",";
|
|
258 $url .= "0.0";
|
|
259 $url .= "&chf=bg,s,e0dede00";
|
|
260 }
|
|
261
|
|
262
|
|
263 # original function without any node rotation. the order of the nodes is always the same
|
|
264 sub computeGoogleApiStr{
|
|
265 my $frameSize = $_[0];
|
|
266 my $values = $_[1];
|
|
267 my @relVal = ();
|
|
268 my @spVal = split(" ", $values);
|
|
269 my $sum = $spVal[0] + $spVal[1] + $spVal[2];
|
|
270
|
|
271 foreach(@spVal){
|
|
272 my $tmp = $_ * 100 / $sum;
|
|
273 push(@relVal,$tmp);
|
|
274 }
|
|
275 my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize .
|
|
276 #color
|
|
277 "&chco=FF6342,ADDE63,63C6DE" .
|
|
278 #"&chco=0000FF,0099FF,00FFFF" .
|
|
279
|
|
280 "&cht=v&chd=t:";
|
|
281
|
|
282 for(my $i = 0 ; $i < @relVal - 1 ; $i++){
|
|
283 $url .= sprintf("%.1f", $relVal[$i]) . ",";
|
|
284 }
|
|
285 $url .= sprintf("%.1f", $relVal[-1]);
|
|
286
|
|
287 $url .= "&chf=bg,s,e0dede00";
|
|
288 }
|
|
289
|
|
290
|
|
291 sub getCorrectedFrameSize{
|
|
292 my $values = $_[0];
|
|
293 my @spVal = split(" ", $values);
|
|
294 my $sum = $spVal[0] + $spVal[1] + $spVal[2];
|
|
295 my $frame = lookupPixelSQRT($sum);
|
|
296
|
|
297 # find maxValue position ([0] - [2])
|
|
298 my $arrPos = getMaxPos($spVal[0], $spVal[1], $spVal[2]);
|
|
299 my $addOver= -1;
|
|
300 my $addNode= -1;
|
|
301 my $addSum = -1;
|
|
302
|
|
303 if($arrPos == 0){
|
|
304 $addNode = $spVal[1] + $spVal[2];
|
|
305 $addOver = $spVal[3] + $spVal[4];
|
|
306 }elsif($arrPos == 1){
|
|
307 $addNode = $spVal[0] + $spVal[2];
|
|
308 $addOver = $spVal[3] + $spVal[5];
|
|
309 }elsif($arrPos == 2){
|
|
310 $addNode = $spVal[0] + $spVal[1];
|
|
311 $addOver = $spVal[4] + $spVal[5];
|
|
312 }
|
|
313 # if the 2 of 3 nodes have no overlap to the largest one, than the complete value
|
|
314 # will be used to compute a frame and add this to the existing frame
|
|
315 $addSum = $addNode - $addOver;
|
|
316
|
|
317 my $addFrame = lookupPixelSQRT($addSum);
|
|
318 my $sumFrame = $frame + $addFrame;
|
|
319 return $sumFrame;
|
|
320 }
|
|
321
|
|
322
|
|
323 sub getMaxPos{
|
|
324 my $pos = -1;
|
|
325 if( ($_[0] >= $_[1]) && ($_[0] >= $_[2]) ){
|
|
326 $pos = 0;
|
|
327 }elsif( ($_[1] >= $_[0]) && ($_[1] >= $_[2]) ){
|
|
328 $pos = 1;
|
|
329 }else{
|
|
330 $pos = 2;
|
|
331 }
|
|
332 return $pos;
|
|
333 }
|
|
334
|
|
335
|
|
336 sub getFrameSize{
|
|
337 my $values = $_[0];
|
|
338 my @spVal = split(" ", $values);
|
|
339 my $sum = $spVal[0] + $spVal[1] + $spVal[2];
|
|
340 my $frame = lookupPixel($sum);
|
|
341 return $frame;
|
|
342 }
|
|
343
|
|
344
|
|
345
|
|
346 sub clusterVennBottomUp{
|
|
347 # transform $modifiedInput into datastructure
|
|
348 # container => [deep]{parent}{child}
|
|
349 my @container = ();
|
|
350 my @containerSpecial = ();
|
|
351 my @nodeValues = ();
|
|
352 my $maxDeep = 0;
|
|
353 my %helperHash = ();
|
|
354 my %specialMatrixAll = ();
|
|
355
|
|
356 foreach(@{$modifiedInput}){
|
|
357 my @tmpArr = split('\t', $_);
|
|
358 my @path = split(';' , $tmpArr[0]);
|
|
359 my $deep = @path - 1;
|
|
360
|
|
361 if(($deep - 1) >= 0){
|
|
362 $container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1];
|
|
363 $nodeValues[$deep]{$path[-2]}{$path[-1]} = "f";
|
|
364 }else{
|
|
365 $container[$deep]{"no"}{$path[-1]} = $tmpArr[1];
|
|
366 }
|
|
367 }
|
|
368 # start computation from the deepest path to the root node
|
|
369 for(my $i = (@container-1) ; $i >= 0 ; $i--){
|
|
370 while ( my($key, $value) = each %{$container[$i]} ){
|
|
371 # update all predecessor nodes
|
|
372 while ( my($keyUp, $valueUp) = each %helperHash ){
|
|
373 if(exists $container[$i]{$key}{$keyUp}){
|
|
374 $container[$i]{$key}{$keyUp} = $valueUp;
|
|
375 # compute special value by decompose venn's and add special value
|
|
376 $containerSpecial[$i]{$key}{$keyUp} = vennCongruousness(\@{$specialMatrixAll{$keyUp}});
|
|
377 }
|
|
378 }
|
|
379 # group all nodes which has the same predecessor id and sum up the values
|
|
380 while ( my($key2, $value2) = each %{$container[$i]{$key}} ){
|
|
381 if(exists $helperHash{$key}){
|
|
382 $helperHash{$key} = addValues($helperHash{$key}, $value2);
|
|
383 #push(@{$specialMatrixAll{$key}}, $value2);
|
|
384 #print $key . "\t" . $value2 . "\n";
|
|
385 }else{
|
|
386 $helperHash{$key} = $value2;
|
|
387 }
|
|
388 #print $key . "\t" . $value2 . "\n";
|
|
389 push(@{$specialMatrixAll{$key}}, $value2);
|
|
390 }
|
|
391 }
|
|
392 }
|
|
393 return \@container, \@containerSpecial;
|
|
394 }
|
|
395
|
|
396
|
|
397 sub vennCongruousness{
|
|
398 my $inSpecMatrix = $_[0];
|
|
399 my $numOfSets = @numberOfSets - 1;
|
|
400 my @arrVal = (); my @matrix = ();
|
|
401 my @sum = ();
|
|
402 my $numVenn = 0;
|
|
403 my %actSet = ();
|
|
404 my %actOvp = ();
|
|
405 $actSet{"result"} = 0;
|
|
406 $actOvp{"result"} = 0;
|
|
407
|
|
408 # (step 1) - sum up rows
|
|
409 foreach (@{$inSpecMatrix}){
|
|
410 @arrVal = split(" ", $_);
|
|
411 $sum[0] += $arrVal[0]; $sum[1] += $arrVal[1]; $sum[2] += $arrVal[2];
|
|
412 $sum[3] += $arrVal[3]; $sum[4] += $arrVal[4]; $sum[5] += $arrVal[5];
|
|
413
|
|
414 if($arrVal[0] > 0){
|
|
415 if(!(exists $actSet{1})){
|
|
416 $actSet{1} = 1;
|
|
417 $actSet{"result"} += 1;
|
|
418 }
|
|
419 }
|
|
420 if($arrVal[1] > 0){
|
|
421 if(!(exists $actSet{2})){
|
|
422 $actSet{2} = 1;
|
|
423 $actSet{"result"} += 1;
|
|
424 }
|
|
425 }
|
|
426 if($arrVal[2] > 0){
|
|
427 if(!(exists $actSet{3})){
|
|
428 $actSet{3} = 1;
|
|
429 $actSet{"result"} += 1;
|
|
430 }
|
|
431 }
|
|
432 if($arrVal[3] > 0){
|
|
433 if(!(exists $actOvp{1})){
|
|
434 $actOvp{1} = 1;
|
|
435 $actOvp{"result"} += 1;
|
|
436 }
|
|
437 }
|
|
438 if($arrVal[4] > 0){
|
|
439 if(!(exists $actOvp{2})){
|
|
440 $actOvp{2} = 1;
|
|
441 $actOvp{"result"} += 1;
|
|
442 }
|
|
443 }
|
|
444 if($arrVal[5] > 0){
|
|
445 if(!(exists $actOvp{3})){
|
|
446 $actOvp{3} = 1;
|
|
447 $actOvp{"result"} += 1;
|
|
448 }
|
|
449 }
|
|
450 }
|
|
451
|
|
452 # (step 2) - calc ratios (-1)
|
|
453 my $i = 0;
|
|
454 foreach (@{$inSpecMatrix}){
|
|
455 @arrVal = split(" ", $_);
|
|
456 for(my $j = 0 ; $j < @arrVal ; $j++){ # eventuell -1 da index von 0 - 6 anstatt 0 - 5 laeuft
|
|
457 # div zero !
|
|
458 if($arrVal[$j] == 0){
|
|
459 $matrix[$i][$j] = 0;
|
|
460 }else{
|
|
461 #print $j . "\t" . $sum[$j] . " \t" . $arrVal[$j] . "\n";
|
|
462 $matrix[$i][$j] = $sum[$j] / $arrVal[$j];
|
|
463 }
|
|
464 }
|
|
465 $i++;
|
|
466 }
|
|
467
|
|
468 $numVenn = $i;
|
|
469 # (step 3) - sum up data set ratios d1-d3
|
|
470 @sum = ();
|
|
471 for(my $j = 0 ; $j < @matrix; $j++){
|
|
472 $sum[0] += $matrix[$j][0]; $sum[1] += $matrix[$j][1]; $sum[2] += $matrix[$j][2];
|
|
473 $sum[3] += $matrix[$j][3]; $sum[4] += $matrix[$j][4]; $sum[5] += $matrix[$j][5];
|
|
474 }
|
|
475 # (step 4) - calc ratios -> max(d_i, #{V}) / min(d_i, #{V})
|
|
476 my @condensedM = (); my $max = 0; my $min = 0;
|
|
477 for(my $j = 0 ; $j < @sum ; $j++){
|
|
478 $max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]];
|
|
479 $min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]];
|
|
480
|
|
481 if($min == 0){
|
|
482 $sum[$j] = 0;
|
|
483 }else{
|
|
484 #$sum[$j] = $max / $min;
|
|
485 $sum[$j] = $sum[$j] / $numVenn;
|
|
486 }
|
|
487 #print "-> " . $j . "\t" . $sum[$j] . "\t" . $max . "\t" . $min . "\n";
|
|
488 }
|
|
489 # (step 5) - normalize values between zero and one -> [0..1]
|
|
490 for(my $j = 0 ; $j < @sum ; $j++){
|
|
491 $max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]];
|
|
492 $min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]];
|
|
493
|
|
494 if($max == 0){
|
|
495 $sum[$j] = 0;
|
|
496 }else{
|
|
497 $sum[$j] = $min / $max;
|
|
498 }
|
|
499 #print "=> " . $j . "\t" . $sum[$j] . "\t" . $min . "\t" . $max . "\n";
|
|
500 }
|
|
501 # (step 6) - combine all decomposed values and create only one value
|
|
502 # case a: only one dataset -> $numOfSets == 1
|
|
503 if($numOfSets == 1){
|
|
504 #print "res: " . $sum[0] . "\n";
|
|
505 return $sum[0];
|
|
506 }elsif($numOfSets == 2){
|
|
507 # evtl fallunteruntescheiung
|
|
508 print "sum1: " . $sum[0] . "\t" . "sum2: " . $sum[1] . "\t" . "ovp1-2: " . $sum[3] . "\t" . "sets: " . $actSet{"result"} . "\t" . "ovp: " . $actOvp{"result"} . "\n";
|
|
509 if($actOvp{"result"} == 0){
|
|
510 my $t = ((($sum[0] + $sum[1]) / $actSet{"result"}) );
|
|
511 #print "res2 " . $t . " ***\n";
|
|
512 return ((($sum[0] + $sum[1]) / $actSet{"result"}) );
|
|
513 }else{
|
|
514 print "foobar\n";
|
|
515 my $t = ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) );
|
|
516 #print "res2* " . $t . " ***\n";
|
|
517 return ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) );
|
|
518 }
|
|
519 }elsif($numOfSets == 3){
|
|
520 #print $sum[0] . "\t" . $sum[1] . "\t" . $sum[2] . "\t" . $sum[3] . "\t" . $sum[4] . "\t" . $sum[5] . "\n";
|
|
521 #print $actSet{"result"} . "\t" . $actOvp{"result"} . "\n";
|
|
522 #return ((((($sum[0] + $sum[1] + $sum[2]) / $numOfSets) + (($sum[3] + $sum[4] + $sum[5]) / $numOfSets) ) / 2) );
|
|
523
|
|
524 if($actOvp{"result"} == 0){
|
|
525 my $t = (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"});
|
|
526 #print ">>>>>>> " . $t. "\n";
|
|
527 return (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"});
|
|
528 }else{
|
|
529 my $t = ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2));
|
|
530 #print ">>>>>>> " . $t. "\n";
|
|
531 return ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2));
|
|
532 }
|
|
533 }else{
|
|
534 return -1;
|
|
535 }
|
|
536 }
|
|
537
|
|
538
|
|
539 # save version of function clusterVennBottomUp()
|
|
540 #sub clusterVennBottomUp{
|
|
541 # # transform $modifiedInput into datastructure
|
|
542 # # container => [deep]{parent}{child}
|
|
543 # my @container = ();
|
|
544 # my $maxDeep = 0;
|
|
545 # my %helperHash = ();
|
|
546 #
|
|
547 # foreach(@{$modifiedInput}){
|
|
548 # my @tmpArr = split('\t', $_);
|
|
549 # my @path = split(';' , $tmpArr[0]);
|
|
550 # my $deep = @path - 1;
|
|
551 #
|
|
552 # if(($deep - 1) >= 0){
|
|
553 # $container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1];
|
|
554 # }else{
|
|
555 # $container[$deep]{"no"}{$path[-1]} = $tmpArr[1];
|
|
556 # }
|
|
557 # }
|
|
558 # # start computation from the deepest path to the root node
|
|
559 # for(my $i = (@container-1) ; $i >= 0 ; $i--){
|
|
560 # while ( my($key, $value) = each %{$container[$i]} ){
|
|
561 # # update all predecessor nodes
|
|
562 # while ( my($keyUp, $valueUp) = each %helperHash ){
|
|
563 # if(exists $container[$i]{$key}{$keyUp}){
|
|
564 # $container[$i]{$key}{$keyUp} = $valueUp;
|
|
565 # }
|
|
566 # }
|
|
567 # # group all nodes which has the same predecessor id and sum up the values
|
|
568 # while ( my($key2, $value2) = each %{$container[$i]{$key}} ){
|
|
569 # if(exists $helperHash{$key}){
|
|
570 # $helperHash{$key} = addValues($helperHash{$key}, $value2);
|
|
571 # }else{
|
|
572 # $helperHash{$key} = $value2;
|
|
573 # }
|
|
574 # }
|
|
575 # }
|
|
576 # }
|
|
577 # return \@container;
|
|
578 #}
|
|
579
|
|
580
|
|
581
|
|
582 sub addValues{
|
|
583 my $val1 = $_[0];
|
|
584 my $val2 = $_[1];
|
|
585
|
|
586 my @sV1 = split(" ", $val1);
|
|
587 my @sV2 = split(" ", $val2);
|
|
588
|
|
589 my $tmp = $sV1[0] + $sV2[0];
|
|
590 my $out = $tmp;
|
|
591
|
|
592 for(my $i = 1 ; $i < @sV1 ; $i++){
|
|
593 $tmp = $sV1[$i] + $sV2[$i];
|
|
594 $out .= " " . $tmp;
|
|
595 }
|
|
596 return $out;
|
|
597 }
|
|
598
|
|
599
|
|
600
|
|
601 # detect non leaf nodes and remove the values
|
|
602 # works on @input_file !!!
|
|
603 # this version works only with 3 depths!
|
|
604 sub detectNonLeafs{
|
|
605 my %recursiveValues = ();
|
|
606 my @modifiedFile = ();
|
|
607 my $convertedPath = "";
|
|
608
|
|
609 my @additionalNetwork = ();
|
|
610
|
|
611 # read last line
|
|
612 my @tmpArr1 = split('\t',$input_file[($#input_file)],2);
|
|
613 # -2 path direction from reward instead from the beginning. (-1 leaf,child , -2 parent,inner node)
|
|
614 my $parent1 = getId($tmpArr1[0],-2);
|
|
615 my $child1 = getId($tmpArr1[0],-1);
|
|
616 my $deep1 = getPathDeep($tmpArr1[0]);
|
|
617 my $parent2 = "";
|
|
618 my $child2 = "";
|
|
619 my $deep2 = 0;
|
|
620
|
|
621
|
|
622 # if "if-statement is true, only root node exists"
|
|
623 my $outStr = "";
|
|
624 if($parent1 == -1){
|
|
625 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
|
|
626 push(@modifiedFile, $outStr);
|
|
627 }else{
|
|
628 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
|
|
629 push(@modifiedFile, $outStr);
|
|
630
|
|
631 for(my $i = (@input_file-2) ; $i >= 0 ; $i--){
|
|
632 @tmpArr1 = split('\t',$input_file[$i],2);
|
|
633 $parent2 = getId($tmpArr1[0],-2);
|
|
634 $child2 = getId($tmpArr1[0],-1);
|
|
635 $deep2 = getPathDeep($tmpArr1[0]);
|
|
636
|
|
637 #print $parent2 . "\t" . $child2 . "\n";
|
|
638
|
|
639 # if eq true -> new leaf
|
|
640 if($parent2 eq $parent1){
|
|
641 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
|
|
642 push(@modifiedFile, $outStr);
|
|
643 }elsif($parent1 eq $child2){
|
|
644 $outStr = convertPath($tmpArr1[0]) . "\t" . "undef";
|
|
645 push(@modifiedFile, $outStr);
|
|
646
|
|
647 my @check = split('\t', $tmpArr1[1]);
|
|
648 my $tSum = 0;
|
|
649 foreach(@check){
|
|
650 $tSum += $_;
|
|
651 }
|
|
652 if(($onlyLeafs eq "off") && ($tSum > 0)){
|
|
653 $outStr = convertPath($tmpArr1[0]) . "not_assigned_" . $child2 . ";" . "\t" . computeLeafValues($tmpArr1[1]);
|
|
654 push(@modifiedFile, $outStr);
|
|
655 $outStr = $child2 . " pp " . "not_assigned_" . $child2;
|
|
656 push(@network, $outStr);
|
|
657 }
|
|
658 }else{
|
|
659 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
|
|
660 push(@modifiedFile, $outStr);
|
|
661 }
|
|
662
|
|
663 if($parent1 == -1){
|
|
664 push(@modifiedFile, convertPath($tmpArr1[0]));
|
|
665 last;
|
|
666 }
|
|
667 $parent1 = $parent2;
|
|
668 $child1 = $child2;
|
|
669 $deep1 = $deep2;
|
|
670 }
|
|
671 }
|
|
672 # store @additionalNetwork in .sif file!!! at this point, the sif file exists!
|
|
673 # it is stored into @network container. this container is globel defined!
|
|
674
|
|
675 return \@modifiedFile;
|
|
676 }
|
|
677
|
|
678
|
|
679 # helper function for detectNonLeafs
|
|
680 sub getPathDeep{
|
|
681 my $inPath = $_[0];
|
|
682 my @deep = split(';', $inPath);
|
|
683 my $size = $#deep;
|
|
684 return $size;
|
|
685 }
|
|
686
|
|
687 sub convertPath{
|
|
688 my $inString = $_[0];
|
|
689 $inString =~ s/"//g;
|
|
690 $inString =~ s/\s+/_/g;
|
|
691 return $inString;
|
|
692 }
|
|
693
|
|
694 sub getId{
|
|
695 my $lineToParse = $_[0];
|
|
696 my $idPos = $_[1];
|
|
697 my $stringId = "";
|
|
698 my @path = ();
|
|
699
|
|
700 $lineToParse =~ s/"//g;
|
|
701 $lineToParse =~ s/\s+/_/g;
|
|
702 @path = split(';',$lineToParse);
|
|
703 my $num = @path;
|
|
704
|
|
705 if(($num + $idPos) < 0){
|
|
706 return -1;
|
|
707 }else{
|
|
708 return $path[$idPos];
|
|
709 }
|
|
710 }
|
|
711
|
|
712 sub computeLeafValues{
|
|
713 my $meganValues = $_[0];
|
|
714 my @rawValues = split('\t', $meganValues);
|
|
715 my @nodeRelVal = ();
|
|
716
|
|
717 my $outValues = $rawValues[0] . " " . $rawValues[1] . " " . $rawValues[2];
|
|
718
|
|
719 if($rawValues[0] <= $rawValues[1]){
|
|
720 $outValues .= " " . $rawValues[0];
|
|
721 }else{
|
|
722 $outValues .= " " . $rawValues[1];
|
|
723 }
|
|
724 if($rawValues[0] <= $rawValues[2]){
|
|
725 $outValues .= " " . $rawValues[0];
|
|
726 }else{
|
|
727 $outValues .= " " . $rawValues[2];
|
|
728 }
|
|
729 if($rawValues[1] <= $rawValues[2]){
|
|
730 $outValues .= " " . $rawValues[1];
|
|
731 }else{
|
|
732 $outValues .= " " . $rawValues[2];
|
|
733 }
|
|
734 #my ($min, $max) = minmax @rawValues;
|
|
735 my $min = 0;
|
|
736 $outValues .= " " . $min;
|
|
737
|
|
738 return $outValues;
|
|
739 }
|
|
740 # -----------------------------------------------------------------------------
|
|
741
|
|
742
|
|
743 # compute network (.sif)
|
|
744 sub addToNetwork{
|
|
745 my $inLine = $_[0];
|
|
746 my @splitInLine = split('\t',$inLine);
|
|
747 # remove ' " ' from line
|
|
748 $splitInLine[0] =~ s/"//g;
|
|
749 $splitInLine[0] =~ s/\s+/_/g;
|
|
750 my @elements = split(';' ,$splitInLine[0]);
|
|
751
|
|
752 if(@elements > 1){
|
|
753 my $outString = $elements[-2] . " pp " . $elements[-1];
|
|
754 push(@network, $outString);
|
|
755 }
|
|
756 }
|
|
757
|
|
758
|
|
759 # store network in .sif file
|
|
760 sub storeNetwork{
|
|
761 # test
|
|
762 my $tmpFileName = $out_network;
|
|
763
|
|
764 #my $tmpFileName = "./network.sif";
|
|
765 open(FILE , ">$tmpFileName") || die "File can't be written - \"sif - File\"!\n";
|
|
766 print FILE join("\n", @network) . "\n";
|
|
767 close(FILE);
|
|
768 }
|
|
769
|
|
770
|
|
771 # ---------------------------------------------------------------------------------------------
|
|
772 # two different lookup-tables are available!
|
|
773 # lookupPixel() => static ; lookupPixelSQRT() => dynamic
|
|
774 #
|
|
775 # lookup absolute node-size to pixel (frame-size for venn-diagram)
|
|
776 sub lookupPixel{
|
|
777 my $query = $_[0];
|
|
778
|
|
779 if($query < 10){
|
|
780 return 30;
|
|
781 }elsif($query < 100){
|
|
782 return 40;
|
|
783 }elsif($query < 1000){
|
|
784 return 50;
|
|
785 }elsif($query < 10000){
|
|
786 return 60;
|
|
787 }elsif($query < 100000){
|
|
788 return 80;
|
|
789 }elsif($query < 1000000){
|
|
790 return 100;
|
|
791 }elsif($query < 10000000){
|
|
792 return 140;
|
|
793 }elsif($query < 20000000){
|
|
794 return 180;
|
|
795 }elsif($query < 30000000){
|
|
796 return 220;
|
|
797 }else{
|
|
798 return 250;
|
|
799 }
|
|
800 }
|
|
801
|
|
802 # lookup absolute node-size to pixel (frame-size for venn-diagram) <- this is currently used!
|
|
803 sub lookupPixelSQRT{
|
|
804
|
|
805 if ($transFnc == 0) {
|
|
806 return int(($_[0] ** (1/(1.6))) * 1.8 + 8); # 3,000 datapoints in sum
|
|
807 }elsif($transFnc == 1){
|
|
808 return int(($_[0] ** (1/(2.1))) * 1.8 + 8); # 30,000 datapoints in sum
|
|
809 }elsif($transFnc == 2){
|
|
810 return int(($_[0] ** (1/(2.6))) * 1.8 + 8); # 300,000 datapoints in sum
|
|
811 }elsif($transFnc == 3){
|
|
812 return int(($_[0] ** (1/(3.1))) * 1.8 + 8); # 3,000,000 datapoints in sum
|
|
813 }elsif($transFnc == 4){
|
|
814 return int(($_[0] ** (1/(3.7))) * 1.8 + 8); # 30,000,000 datapoints in sum
|
|
815 }elsif($transFnc == 5){
|
|
816 return int(($_[0] ** (1/(4))) * 1.8 + 8); # 300,000,000 datapoints in sum
|
|
817 }elsif($transFnc == 6){
|
|
818 return int(($_[0] ** (1/(4.7))) * 1.8 + 8); # 3,000,000,000 datapoints in sum
|
|
819 }
|
|
820
|
|
821 #return int(($_[0] ** (1/(3.3))) * 1.8 + 30); # test version for small and large datasets?
|
|
822 #return int(($_[0] ** (1/(3.3))) * 1.8 + 5); # test version for small and large datasets?
|
|
823 #return int(($_[0] ** (1/(4))) * 1.8 + 8); # test version for small and large datasets?
|
|
824 #return int(($_[0] ** (1/6)) * 12); # old version this version is good for large datasets
|
|
825 }
|
|
826
|
|
827
|
|
828
|
|
829
|
|
830
|
|
831
|
|
832
|
|
833
|