# HG changeset patch # User steffen # Date 1422629745 18000 # Node ID 745aede829e932a6a7dbf99f25ba5fcdb8302b12 Imported from capsule None diff -r 000000000000 -r 745aede829e9 coVennTree/._.DS_Store Binary file coVennTree/._.DS_Store has changed diff -r 000000000000 -r 745aede829e9 coVennTree/coVennTree.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coVennTree/coVennTree.pl Fri Jan 30 09:55:45 2015 -0500 @@ -0,0 +1,833 @@ +#!/usr/bin/perl +use strict; +use File::Basename; +use List::MoreUtils qw( minmax ); + +# -------------------------------------------------------------------------------------------------- +# author: steffen lott +# mail: steffen.lott@uni-freiburg.de +# date: 06-10-2014 +# version: 1.6 +# +# description: +# The tool converts an output from MEGAN in a special network which can visuallized with +# cytoscape. Gaper produces two files, the first one contains the network and the second one +# describes the attributes of the network. +# -------------------------------------------------------------------------------------------------- + +# return version number +if (@ARGV == 0) { + print "CoVennTree-Version 1.6\n"; + print "COMMAND\n"; + print "coventree argv0 argv1 argv2 argv3 argv4\n"; + print "--------------\n"; + print "argv0 = input file\n"; + print "argv1 = color mode [1,4]\n"; + print "argv2 = transformation function [1,7]\n"; + print "argv3 = only leaf information => 0 ; all information => 1\n"; + print "argv4 = output file name network\n"; + print "argv5 = output file name attributes\n"; + exit; +} + + + + +# container to represent the network +my @network = (); + + + +# 0 PARAMETER_______________ +# read argument from command-line +# important: DSV -> taxon-path, count(s) -> assigned -> tab +my $megan_file = $ARGV[0]; + + +# 1 PARAMETER_______________ +my $colorMode; +# color mode for venn-diagrams 0,1,2,3,4 +if(defined $ARGV[1]){ + $colorMode = $ARGV[1]; +}else{ + $colorMode = 3; +} + + +# 2 PARAMETER_______________ +# 2 different transformations functions +my $transFnc = ""; +if(defined $ARGV[2]){ # small datasets + $transFnc = $ARGV[2]; +}else{ + $transFnc = 1; +} + + +# 3 PARAMETER_______________ +# the user can switch between "only leaf information" +# or the complete tree information. the last one takes also the not assigned reads +# and creates artificial nodes to keep this number +my $onlyLeafs; +if(defined $ARGV[3]){ + if($ARGV[3] == 0){ + $onlyLeafs = "on"; + }elsif($ARGV[3] == 1){ + $onlyLeafs = "off"; + } +}else{ # all information will be used! not assigned and assigned + $onlyLeafs = "off"; +} + +# 4 PARAMETER_______________ +# output -> network +my $out_network = $ARGV[4]; + +# 5 PARAMETER_______________ +# output -> attributes +my $out_attributes = $ARGV[5]; + + + +# check the input format of the file. only a file with exactly three datasets are excepted. the other one will fill up with zeros + + +# read-in MEGAN-file +# if #{data-sets} = 1 -> no heade line +# if #{data-sets} > 1 -> heade line " #Datasets set1 set2 ..." +open(inFile , "<$megan_file") || die "File not found - \"Path-File\"!\n"; +my @pairIds = (); +my $header = ""; +my @input_file = (); +my @numberOfSets = (); + +while(){ + chomp($_); + if($_ =~ /^#/){ + $header = $_; + @numberOfSets = split("\t", $_); + }else{ + #print @numberOfSets . "\n"; + # check the number of datasets are included + if(@numberOfSets == 0 || @numberOfSets == 1 || @numberOfSets > 4){ # no set is in the file + print "Error: File doesn't contain any dataset or contain more than three!"; + exit; + }elsif(@numberOfSets == 2){ # only one set is in the file -> add 2x zeros + $_ .= "\t" . 0 . "\t" . 0; + }elsif(@numberOfSets == 3){ # only two sets are in the file -> add 1x zeros + $_ .= "\t" . 0; + } + + addToNetwork($_); + push(@input_file, $_); + } +} +close(inFile); + + +# -------------------------------------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------------------------------------- +# (1) PREPROCESSING: detect all leaf nodes +my $modifiedInput = detectNonLeafs(); + +# (2) MAIN COMPUTATION: compute deep by deep (path deep ex. root;Viruses; => deep 2) +my ($vennClusterOut, $specialNumberOut) = clusterVennBottomUp(); + +# (3) VENN-END-PREPERATION: sum up all single values (d1-d3), transform abs values into +my $vennToStore = vennForCytoscape($vennClusterOut, $specialNumberOut); + +# (4) SAVE RESULTS INTO FILES: one file contains the network (.sif), the other one contains the attributes +storeNetwork(); +store2FileVenn($vennToStore); +# -------------------------------------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------------------------------------- + + + + +sub store2FileVenn{ + my $outVenn = $_[0]; + # test + my $tmpFileName = $out_attributes; + + #my $tmpFileName = "./network.venn"; + open(FILE , ">$tmpFileName") || die "File can't be written - \"venn - File\"!\n"; + print FILE join("\n", @{$outVenn}) . "\n"; + close(FILE); +} + + +sub vennForCytoscape{ + my $vennCluster = $_[0]; + my $specialNum = $_[1]; + my $specNformat = 0; + my @out = (); + # datastructure $vennCluster=> vennCluster[]{}{} => values + #delete $vennCluster->[0]{"no"}; + my $frameSize = 0; + my $values = 0; + my $googleURL = ""; + my $outStr = ""; + + for(my $i = 0 ; $i < @{$vennCluster}; $i++){ + while ( my($key, $value) = each %{$vennCluster->[$i]} ){ + while ( my($key2, $value2) = each %{$vennCluster->[$i]{$key}} ){ + $values = $vennCluster->[$i]{$key}{$key2}; + $frameSize = getCorrectedFrameSize($values); + + if(defined $specialNum->[$i]{$key}{$key2}){ + $specNformat = $key2 . "[" . sprintf("%.3f", $specialNum->[$i]{$key}{$key2}) . "]"; + }else{ + $specNformat = $key2; + } + + # old version, this version works pretty well + #$frameSize = getFrameSize($values); + $googleURL = computeGoogleApiStrRotation($frameSize,$values,$colorMode); + $outStr = $key2 . "\t" . $googleURL . "\t" . $specNformat . "\t" . $values; + push(@out, $outStr); + } + } + } + return \@out; +} + + +# this function keep the biggest node in the lowest depth,... +sub computeGoogleApiStrRotation{ + my $frameSize = $_[0]; + my $values = $_[1]; + my $colMode = $_[2]; + my @relVal = (); + my @col = (); + my %sort = (); + my @store = (); + my %ovHash = (); + my @storeOldPos = (); + my @spVal = split(" ", $values); + my $sum = $spVal[0] + $spVal[1] + $spVal[2]; + + # user color-mode + if($colMode == 0){ + $col[0] = "18A3F2"; $col[1] = "FA0800"; $col[2] = "FFF905"; + }elsif($colMode == 1){ + $col[0] = "FF2A00"; $col[1] = "9CFF00"; $col[2] = "00CCFF"; + }elsif($colMode == 2){ + $col[0] = "B4FF00"; $col[1] = "FF00C6"; $col[2] = "00AEFF"; + }elsif($colMode == 3){ + $col[0] = "82FF00"; $col[1] = "7E00FF"; $col[2] = "FF003B"; + }elsif($colMode == 4){ + $col[0] = "1A1A1A"; $col[1] = "8A8A8A"; $col[2] = "C7C7C7"; + } + + $sort{"0"} = $spVal[0]; $sort{"1"} = $spVal[1]; $sort{"2"} = $spVal[2]; + + my $tmp = 0; + foreach(@spVal){ + if($sum != 0){ + $tmp = $_ * 100 / $sum; + }else{ + $tmp = 0; + } + push(@relVal,$tmp); + } + + $ovHash{"01"} = $relVal[3]; $ovHash{"10"} = $relVal[3]; + $ovHash{"02"} = $relVal[4]; $ovHash{"20"} = $relVal[4]; + $ovHash{"21"} = $relVal[5]; $ovHash{"12"} = $relVal[5]; + + my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . "&chco="; + # change color position in the google output string corresponding to the highest value + foreach my $k( sort {$sort{$b}<=>$sort{$a}} keys %sort) { + $url .= $col[$k] . ","; + push(@store, $k); + } + chop($url); + + $url .= "&cht=v&chd=t:"; + # sort node values in the right order + for(my $i = 0 ; $i < @relVal - 4 ; $i++){ + #print $i . "\t" . $store[$i] . "\t" . $relVal[$store[$i]] . "\n"; + $url .= sprintf("%.1f", $relVal[$store[$i]]) . ","; + } + # sort intersection values in the right order + my $tStr0 = $store[0] . $store[1]; + my $tStr1 = $store[0] . $store[2]; + my $tStr2 = $store[1] . $store[2]; + $url .= sprintf("%.1f", $ovHash{$tStr0}) . "," . sprintf("%.1f", $ovHash{$tStr1}) . "," . sprintf("%.1f", $ovHash{$tStr2}) . ","; + $url .= "0.0"; + $url .= "&chf=bg,s,e0dede00"; +} + + +# original function without any node rotation. the order of the nodes is always the same +sub computeGoogleApiStr{ + my $frameSize = $_[0]; + my $values = $_[1]; + my @relVal = (); + my @spVal = split(" ", $values); + my $sum = $spVal[0] + $spVal[1] + $spVal[2]; + + foreach(@spVal){ + my $tmp = $_ * 100 / $sum; + push(@relVal,$tmp); + } + my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . + #color + "&chco=FF6342,ADDE63,63C6DE" . + #"&chco=0000FF,0099FF,00FFFF" . + + "&cht=v&chd=t:"; + + for(my $i = 0 ; $i < @relVal - 1 ; $i++){ + $url .= sprintf("%.1f", $relVal[$i]) . ","; + } + $url .= sprintf("%.1f", $relVal[-1]); + + $url .= "&chf=bg,s,e0dede00"; +} + + +sub getCorrectedFrameSize{ + my $values = $_[0]; + my @spVal = split(" ", $values); + my $sum = $spVal[0] + $spVal[1] + $spVal[2]; + my $frame = lookupPixelSQRT($sum); + + # find maxValue position ([0] - [2]) + my $arrPos = getMaxPos($spVal[0], $spVal[1], $spVal[2]); + my $addOver= -1; + my $addNode= -1; + my $addSum = -1; + + if($arrPos == 0){ + $addNode = $spVal[1] + $spVal[2]; + $addOver = $spVal[3] + $spVal[4]; + }elsif($arrPos == 1){ + $addNode = $spVal[0] + $spVal[2]; + $addOver = $spVal[3] + $spVal[5]; + }elsif($arrPos == 2){ + $addNode = $spVal[0] + $spVal[1]; + $addOver = $spVal[4] + $spVal[5]; + } + # if the 2 of 3 nodes have no overlap to the largest one, than the complete value + # will be used to compute a frame and add this to the existing frame + $addSum = $addNode - $addOver; + + my $addFrame = lookupPixelSQRT($addSum); + my $sumFrame = $frame + $addFrame; + return $sumFrame; +} + + +sub getMaxPos{ + my $pos = -1; + if( ($_[0] >= $_[1]) && ($_[0] >= $_[2]) ){ + $pos = 0; + }elsif( ($_[1] >= $_[0]) && ($_[1] >= $_[2]) ){ + $pos = 1; + }else{ + $pos = 2; + } + return $pos; +} + + +sub getFrameSize{ + my $values = $_[0]; + my @spVal = split(" ", $values); + my $sum = $spVal[0] + $spVal[1] + $spVal[2]; + my $frame = lookupPixel($sum); + return $frame; +} + + + +sub clusterVennBottomUp{ + # transform $modifiedInput into datastructure + # container => [deep]{parent}{child} + my @container = (); + my @containerSpecial = (); + my @nodeValues = (); + my $maxDeep = 0; + my %helperHash = (); + my %specialMatrixAll = (); + + foreach(@{$modifiedInput}){ + my @tmpArr = split('\t', $_); + my @path = split(';' , $tmpArr[0]); + my $deep = @path - 1; + + if(($deep - 1) >= 0){ + $container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1]; + $nodeValues[$deep]{$path[-2]}{$path[-1]} = "f"; + }else{ + $container[$deep]{"no"}{$path[-1]} = $tmpArr[1]; + } + } + # start computation from the deepest path to the root node + for(my $i = (@container-1) ; $i >= 0 ; $i--){ + while ( my($key, $value) = each %{$container[$i]} ){ + # update all predecessor nodes + while ( my($keyUp, $valueUp) = each %helperHash ){ + if(exists $container[$i]{$key}{$keyUp}){ + $container[$i]{$key}{$keyUp} = $valueUp; + # compute special value by decompose venn's and add special value + $containerSpecial[$i]{$key}{$keyUp} = vennCongruousness(\@{$specialMatrixAll{$keyUp}}); + } + } + # group all nodes which has the same predecessor id and sum up the values + while ( my($key2, $value2) = each %{$container[$i]{$key}} ){ + if(exists $helperHash{$key}){ + $helperHash{$key} = addValues($helperHash{$key}, $value2); + #push(@{$specialMatrixAll{$key}}, $value2); + #print $key . "\t" . $value2 . "\n"; + }else{ + $helperHash{$key} = $value2; + } + #print $key . "\t" . $value2 . "\n"; + push(@{$specialMatrixAll{$key}}, $value2); + } + } + } + return \@container, \@containerSpecial; +} + + +sub vennCongruousness{ + my $inSpecMatrix = $_[0]; + my $numOfSets = @numberOfSets - 1; + my @arrVal = (); my @matrix = (); + my @sum = (); + my $numVenn = 0; + my %actSet = (); + my %actOvp = (); + $actSet{"result"} = 0; + $actOvp{"result"} = 0; + + # (step 1) - sum up rows + foreach (@{$inSpecMatrix}){ + @arrVal = split(" ", $_); + $sum[0] += $arrVal[0]; $sum[1] += $arrVal[1]; $sum[2] += $arrVal[2]; + $sum[3] += $arrVal[3]; $sum[4] += $arrVal[4]; $sum[5] += $arrVal[5]; + + if($arrVal[0] > 0){ + if(!(exists $actSet{1})){ + $actSet{1} = 1; + $actSet{"result"} += 1; + } + } + if($arrVal[1] > 0){ + if(!(exists $actSet{2})){ + $actSet{2} = 1; + $actSet{"result"} += 1; + } + } + if($arrVal[2] > 0){ + if(!(exists $actSet{3})){ + $actSet{3} = 1; + $actSet{"result"} += 1; + } + } + if($arrVal[3] > 0){ + if(!(exists $actOvp{1})){ + $actOvp{1} = 1; + $actOvp{"result"} += 1; + } + } + if($arrVal[4] > 0){ + if(!(exists $actOvp{2})){ + $actOvp{2} = 1; + $actOvp{"result"} += 1; + } + } + if($arrVal[5] > 0){ + if(!(exists $actOvp{3})){ + $actOvp{3} = 1; + $actOvp{"result"} += 1; + } + } + } + + # (step 2) - calc ratios (-1) + my $i = 0; + foreach (@{$inSpecMatrix}){ + @arrVal = split(" ", $_); + for(my $j = 0 ; $j < @arrVal ; $j++){ # eventuell -1 da index von 0 - 6 anstatt 0 - 5 laeuft + # div zero ! + if($arrVal[$j] == 0){ + $matrix[$i][$j] = 0; + }else{ + #print $j . "\t" . $sum[$j] . " \t" . $arrVal[$j] . "\n"; + $matrix[$i][$j] = $sum[$j] / $arrVal[$j]; + } + } + $i++; + } + + $numVenn = $i; + # (step 3) - sum up data set ratios d1-d3 + @sum = (); + for(my $j = 0 ; $j < @matrix; $j++){ + $sum[0] += $matrix[$j][0]; $sum[1] += $matrix[$j][1]; $sum[2] += $matrix[$j][2]; + $sum[3] += $matrix[$j][3]; $sum[4] += $matrix[$j][4]; $sum[5] += $matrix[$j][5]; + } + # (step 4) - calc ratios -> max(d_i, #{V}) / min(d_i, #{V}) + my @condensedM = (); my $max = 0; my $min = 0; + for(my $j = 0 ; $j < @sum ; $j++){ + $max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]]; + $min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]]; + + if($min == 0){ + $sum[$j] = 0; + }else{ + #$sum[$j] = $max / $min; + $sum[$j] = $sum[$j] / $numVenn; + } + #print "-> " . $j . "\t" . $sum[$j] . "\t" . $max . "\t" . $min . "\n"; + } + # (step 5) - normalize values between zero and one -> [0..1] + for(my $j = 0 ; $j < @sum ; $j++){ + $max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]]; + $min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]]; + + if($max == 0){ + $sum[$j] = 0; + }else{ + $sum[$j] = $min / $max; + } + #print "=> " . $j . "\t" . $sum[$j] . "\t" . $min . "\t" . $max . "\n"; + } + # (step 6) - combine all decomposed values and create only one value + # case a: only one dataset -> $numOfSets == 1 + if($numOfSets == 1){ + #print "res: " . $sum[0] . "\n"; + return $sum[0]; + }elsif($numOfSets == 2){ + # evtl fallunteruntescheiung + print "sum1: " . $sum[0] . "\t" . "sum2: " . $sum[1] . "\t" . "ovp1-2: " . $sum[3] . "\t" . "sets: " . $actSet{"result"} . "\t" . "ovp: " . $actOvp{"result"} . "\n"; + if($actOvp{"result"} == 0){ + my $t = ((($sum[0] + $sum[1]) / $actSet{"result"}) ); + #print "res2 " . $t . " ***\n"; + return ((($sum[0] + $sum[1]) / $actSet{"result"}) ); + }else{ + print "foobar\n"; + my $t = ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) ); + #print "res2* " . $t . " ***\n"; + return ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) ); + } + }elsif($numOfSets == 3){ + #print $sum[0] . "\t" . $sum[1] . "\t" . $sum[2] . "\t" . $sum[3] . "\t" . $sum[4] . "\t" . $sum[5] . "\n"; + #print $actSet{"result"} . "\t" . $actOvp{"result"} . "\n"; + #return ((((($sum[0] + $sum[1] + $sum[2]) / $numOfSets) + (($sum[3] + $sum[4] + $sum[5]) / $numOfSets) ) / 2) ); + + if($actOvp{"result"} == 0){ + my $t = (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}); + #print ">>>>>>> " . $t. "\n"; + return (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}); + }else{ + my $t = ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2)); + #print ">>>>>>> " . $t. "\n"; + return ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2)); + } + }else{ + return -1; + } +} + + +# save version of function clusterVennBottomUp() +#sub clusterVennBottomUp{ +# # transform $modifiedInput into datastructure +# # container => [deep]{parent}{child} +# my @container = (); +# my $maxDeep = 0; +# my %helperHash = (); +# +# foreach(@{$modifiedInput}){ +# my @tmpArr = split('\t', $_); +# my @path = split(';' , $tmpArr[0]); +# my $deep = @path - 1; +# +# if(($deep - 1) >= 0){ +# $container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1]; +# }else{ +# $container[$deep]{"no"}{$path[-1]} = $tmpArr[1]; +# } +# } +# # start computation from the deepest path to the root node +# for(my $i = (@container-1) ; $i >= 0 ; $i--){ +# while ( my($key, $value) = each %{$container[$i]} ){ +# # update all predecessor nodes +# while ( my($keyUp, $valueUp) = each %helperHash ){ +# if(exists $container[$i]{$key}{$keyUp}){ +# $container[$i]{$key}{$keyUp} = $valueUp; +# } +# } +# # group all nodes which has the same predecessor id and sum up the values +# while ( my($key2, $value2) = each %{$container[$i]{$key}} ){ +# if(exists $helperHash{$key}){ +# $helperHash{$key} = addValues($helperHash{$key}, $value2); +# }else{ +# $helperHash{$key} = $value2; +# } +# } +# } +# } +# return \@container; +#} + + + +sub addValues{ + my $val1 = $_[0]; + my $val2 = $_[1]; + + my @sV1 = split(" ", $val1); + my @sV2 = split(" ", $val2); + + my $tmp = $sV1[0] + $sV2[0]; + my $out = $tmp; + + for(my $i = 1 ; $i < @sV1 ; $i++){ + $tmp = $sV1[$i] + $sV2[$i]; + $out .= " " . $tmp; + } + return $out; +} + + + +# detect non leaf nodes and remove the values +# works on @input_file !!! +# this version works only with 3 depths! +sub detectNonLeafs{ + my %recursiveValues = (); + my @modifiedFile = (); + my $convertedPath = ""; + + my @additionalNetwork = (); + + # read last line + my @tmpArr1 = split('\t',$input_file[($#input_file)],2); + # -2 path direction from reward instead from the beginning. (-1 leaf,child , -2 parent,inner node) + my $parent1 = getId($tmpArr1[0],-2); + my $child1 = getId($tmpArr1[0],-1); + my $deep1 = getPathDeep($tmpArr1[0]); + my $parent2 = ""; + my $child2 = ""; + my $deep2 = 0; + + + # if "if-statement is true, only root node exists" + my $outStr = ""; + if($parent1 == -1){ + $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); + push(@modifiedFile, $outStr); + }else{ + $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); + push(@modifiedFile, $outStr); + + for(my $i = (@input_file-2) ; $i >= 0 ; $i--){ + @tmpArr1 = split('\t',$input_file[$i],2); + $parent2 = getId($tmpArr1[0],-2); + $child2 = getId($tmpArr1[0],-1); + $deep2 = getPathDeep($tmpArr1[0]); + + #print $parent2 . "\t" . $child2 . "\n"; + + # if eq true -> new leaf + if($parent2 eq $parent1){ + $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); + push(@modifiedFile, $outStr); + }elsif($parent1 eq $child2){ + $outStr = convertPath($tmpArr1[0]) . "\t" . "undef"; + push(@modifiedFile, $outStr); + + my @check = split('\t', $tmpArr1[1]); + my $tSum = 0; + foreach(@check){ + $tSum += $_; + } + if(($onlyLeafs eq "off") && ($tSum > 0)){ + $outStr = convertPath($tmpArr1[0]) . "not_assigned_" . $child2 . ";" . "\t" . computeLeafValues($tmpArr1[1]); + push(@modifiedFile, $outStr); + $outStr = $child2 . " pp " . "not_assigned_" . $child2; + push(@network, $outStr); + } + }else{ + $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); + push(@modifiedFile, $outStr); + } + + if($parent1 == -1){ + push(@modifiedFile, convertPath($tmpArr1[0])); + last; + } + $parent1 = $parent2; + $child1 = $child2; + $deep1 = $deep2; + } + } + # store @additionalNetwork in .sif file!!! at this point, the sif file exists! + # it is stored into @network container. this container is globel defined! + + return \@modifiedFile; +} + + +# helper function for detectNonLeafs +sub getPathDeep{ + my $inPath = $_[0]; + my @deep = split(';', $inPath); + my $size = $#deep; + return $size; +} + +sub convertPath{ + my $inString = $_[0]; + $inString =~ s/"//g; + $inString =~ s/\s+/_/g; + return $inString; +} + +sub getId{ + my $lineToParse = $_[0]; + my $idPos = $_[1]; + my $stringId = ""; + my @path = (); + + $lineToParse =~ s/"//g; + $lineToParse =~ s/\s+/_/g; + @path = split(';',$lineToParse); + my $num = @path; + + if(($num + $idPos) < 0){ + return -1; + }else{ + return $path[$idPos]; + } +} + +sub computeLeafValues{ + my $meganValues = $_[0]; + my @rawValues = split('\t', $meganValues); + my @nodeRelVal = (); + + my $outValues = $rawValues[0] . " " . $rawValues[1] . " " . $rawValues[2]; + + if($rawValues[0] <= $rawValues[1]){ + $outValues .= " " . $rawValues[0]; + }else{ + $outValues .= " " . $rawValues[1]; + } + if($rawValues[0] <= $rawValues[2]){ + $outValues .= " " . $rawValues[0]; + }else{ + $outValues .= " " . $rawValues[2]; + } + if($rawValues[1] <= $rawValues[2]){ + $outValues .= " " . $rawValues[1]; + }else{ + $outValues .= " " . $rawValues[2]; + } + #my ($min, $max) = minmax @rawValues; + my $min = 0; + $outValues .= " " . $min; + + return $outValues; +} +# ----------------------------------------------------------------------------- + + +# compute network (.sif) +sub addToNetwork{ + my $inLine = $_[0]; + my @splitInLine = split('\t',$inLine); + # remove ' " ' from line + $splitInLine[0] =~ s/"//g; + $splitInLine[0] =~ s/\s+/_/g; + my @elements = split(';' ,$splitInLine[0]); + + if(@elements > 1){ + my $outString = $elements[-2] . " pp " . $elements[-1]; + push(@network, $outString); + } +} + + +# store network in .sif file +sub storeNetwork{ + # test + my $tmpFileName = $out_network; + + #my $tmpFileName = "./network.sif"; + open(FILE , ">$tmpFileName") || die "File can't be written - \"sif - File\"!\n"; + print FILE join("\n", @network) . "\n"; + close(FILE); +} + + +# --------------------------------------------------------------------------------------------- +# two different lookup-tables are available! +# lookupPixel() => static ; lookupPixelSQRT() => dynamic +# +# lookup absolute node-size to pixel (frame-size for venn-diagram) +sub lookupPixel{ + my $query = $_[0]; + + if($query < 10){ + return 30; + }elsif($query < 100){ + return 40; + }elsif($query < 1000){ + return 50; + }elsif($query < 10000){ + return 60; + }elsif($query < 100000){ + return 80; + }elsif($query < 1000000){ + return 100; + }elsif($query < 10000000){ + return 140; + }elsif($query < 20000000){ + return 180; + }elsif($query < 30000000){ + return 220; + }else{ + return 250; + } +} + +# lookup absolute node-size to pixel (frame-size for venn-diagram) <- this is currently used! +sub lookupPixelSQRT{ + + if ($transFnc == 0) { + return int(($_[0] ** (1/(1.6))) * 1.8 + 8); # 3,000 datapoints in sum + }elsif($transFnc == 1){ + return int(($_[0] ** (1/(2.1))) * 1.8 + 8); # 30,000 datapoints in sum + }elsif($transFnc == 2){ + return int(($_[0] ** (1/(2.6))) * 1.8 + 8); # 300,000 datapoints in sum + }elsif($transFnc == 3){ + return int(($_[0] ** (1/(3.1))) * 1.8 + 8); # 3,000,000 datapoints in sum + }elsif($transFnc == 4){ + return int(($_[0] ** (1/(3.7))) * 1.8 + 8); # 30,000,000 datapoints in sum + }elsif($transFnc == 5){ + return int(($_[0] ** (1/(4))) * 1.8 + 8); # 300,000,000 datapoints in sum + }elsif($transFnc == 6){ + return int(($_[0] ** (1/(4.7))) * 1.8 + 8); # 3,000,000,000 datapoints in sum + } + + #return int(($_[0] ** (1/(3.3))) * 1.8 + 30); # test version for small and large datasets? + #return int(($_[0] ** (1/(3.3))) * 1.8 + 5); # test version for small and large datasets? + #return int(($_[0] ** (1/(4))) * 1.8 + 8); # test version for small and large datasets? + #return int(($_[0] ** (1/6)) * 12); # old version this version is good for large datasets +} + + + + + + + + diff -r 000000000000 -r 745aede829e9 coVennTree/coVennTree.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coVennTree/coVennTree.xml Fri Jan 30 09:55:45 2015 -0500 @@ -0,0 +1,143 @@ + + Comparative rooted tree analysis for files in dsv format + + coVennTree + perl + + + coVennTree.pl + $infile + $color_mode + $trans_func + $leafs_allInformation + $outfile_network + $outfile_attribute + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +CoVennTree compares up to three rooted trees at the same time. + +CoVennTree (Comparative weighted Venn Tree) is a software to analyze and compare up to three datasets. Unlike other +methods, CoVennTree correlates data on the leaf level and transfers this information to the root node. CoVennTree works with numbers to compute weighted +Venn diagrams for each node in the graph (rooted tree). Therefore any kind of input data can be processed as long as the data structure will be taken into account. + + + +**Input** + +*Input example* + + +.. image:: $PATH_TO_IMAGES/example1.png + :height: 430 + :width: 600 + + +*dsv-format: The following table represents the graph.* + + +=========== ====== ====== ====== +#Datasets set1 set2 set3 +=========== ====== ====== ====== +"root;" 0 0 0 +"root;A;" 10000 0 0 +"root;A;C;" 600000 300000 500000 +"root;A;D;" 0 100000 200000 +"root;A;E;" 800000 0 100000 +"root;B;" 10000 20000 50000 +=========== ====== ====== ====== + + +------- + + +**Results** + +A specific color is assigned to each dataset in five optional color schemes (see parameter "Select color mode for weighted Venn diagrams"). +In this example set1 corresponds to color blue, set2 to red and set3 to yellow. +In order to cover a wide numerical range a non linear transformation function is used. + + +*Data format \*.sif* + +[parent_node] [connected_with] [child_node] + + +*Data format \*.venn* + +[id] [google_url] [id_vds] [Venn_abs_values] + + +*Output example "leaf information and not assigned information"* + +By selecting "leaf information + not assigned information" artificial nodes can be inserted. +Artificial nodes will be inserted if inner nodes have values larger than zero. + +.. image:: $PATH_TO_IMAGES/venn-graph-off.png + :height: 358 + :width: 425 + + +------- + + +*Output example "only leaf information"* + +By selecting "only leaf information" only leaf nodes are considered for the computation of weighted Venn diagrams. + +.. image:: $PATH_TO_IMAGES/venn-graph-on.png + :height: 358 + :width: 400 + + + + + + + + > + + diff -r 000000000000 -r 745aede829e9 coVennTree/static/._.DS_Store Binary file coVennTree/static/._.DS_Store has changed diff -r 000000000000 -r 745aede829e9 coVennTree/static/images/._example1.png Binary file coVennTree/static/images/._example1.png has changed diff -r 000000000000 -r 745aede829e9 coVennTree/static/images/._venn-graph-off.png Binary file coVennTree/static/images/._venn-graph-off.png has changed diff -r 000000000000 -r 745aede829e9 coVennTree/static/images/._venn-graph-on.png Binary file coVennTree/static/images/._venn-graph-on.png has changed diff -r 000000000000 -r 745aede829e9 coVennTree/static/images/example1.png Binary file coVennTree/static/images/example1.png has changed diff -r 000000000000 -r 745aede829e9 coVennTree/static/images/venn-graph-off.png Binary file coVennTree/static/images/venn-graph-off.png has changed diff -r 000000000000 -r 745aede829e9 coVennTree/static/images/venn-graph-on.png Binary file coVennTree/static/images/venn-graph-on.png has changed diff -r 000000000000 -r 745aede829e9 coVennTree/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coVennTree/tool_dependencies.xml Fri Jan 30 09:55:45 2015 -0500 @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + http://search.cpan.org/CPAN/authors/id/A/AD/ADAMK/List-MoreUtils-0.33.tar.gz + + + + + + +