changeset 0:745aede829e9 draft default tip

Imported from capsule None
author steffen
date Fri, 30 Jan 2015 09:55:45 -0500
parents
children
files coVennTree/._.DS_Store coVennTree/coVennTree.pl coVennTree/coVennTree.xml coVennTree/static/._.DS_Store coVennTree/static/images/._example1.png coVennTree/static/images/._venn-graph-off.png coVennTree/static/images/._venn-graph-on.png coVennTree/static/images/example1.png coVennTree/static/images/venn-graph-off.png coVennTree/static/images/venn-graph-on.png coVennTree/tool_dependencies.xml
diffstat 11 files changed, 997 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file coVennTree/._.DS_Store has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/coVennTree/coVennTree.pl	Fri Jan 30 09:55:45 2015 -0500
@@ -0,0 +1,833 @@
+#!/usr/bin/perl
+use strict;
+use File::Basename;
+use List::MoreUtils qw( minmax );
+
+# --------------------------------------------------------------------------------------------------
+# author:	steffen lott
+# mail: 	steffen.lott@uni-freiburg.de
+# date: 	06-10-2014
+# version: 	1.6
+# 
+# description:
+# 	The tool converts an output from MEGAN in a special network which can visuallized with
+#	cytoscape. Gaper produces two files, the first one contains the network and the second one
+#	describes the attributes of the network. 
+# --------------------------------------------------------------------------------------------------
+
+# return version number
+if (@ARGV == 0) {
+	print "CoVennTree-Version 1.6\n";
+	print "COMMAND\n";
+	print "coventree argv0 argv1 argv2 argv3 argv4\n";
+	print "--------------\n";
+	print "argv0 = input file\n";
+	print "argv1 = color mode [1,4]\n";
+	print "argv2 = transformation function [1,7]\n";
+	print "argv3 = only leaf information => 0 ; all information => 1\n";
+	print "argv4 = output file name network\n";
+	print "argv5 = output file name attributes\n";
+	exit;
+}
+
+
+
+
+# container to represent the network
+my @network = ();
+
+
+
+# 0 PARAMETER_______________
+# read argument from command-line
+# important: DSV -> taxon-path, count(s) -> assigned -> tab
+my $megan_file = $ARGV[0];
+
+
+# 1 PARAMETER_______________
+my $colorMode;   
+# color mode for venn-diagrams 0,1,2,3,4
+if(defined $ARGV[1]){
+	$colorMode = $ARGV[1];
+}else{
+	$colorMode = 3;
+}
+
+
+# 2 PARAMETER_______________
+# 2 different transformations functions
+my $transFnc = "";
+if(defined $ARGV[2]){          # small datasets
+   	$transFnc = $ARGV[2];
+}else{
+	$transFnc = 1;
+}
+
+
+# 3 PARAMETER_______________
+# the user can switch between "only leaf information" 
+# or the complete tree information. the last one takes also the not assigned reads
+# and creates artificial nodes to keep this number
+my $onlyLeafs;
+if(defined $ARGV[3]){
+	if($ARGV[3] == 0){
+		$onlyLeafs = "on";
+	}elsif($ARGV[3] == 1){
+		$onlyLeafs = "off";
+	}
+}else{ # all information will be used! not assigned and assigned
+	$onlyLeafs     = "off";
+}
+
+# 4 PARAMETER_______________
+# output -> network
+my $out_network    = $ARGV[4];
+
+# 5 PARAMETER_______________
+# output -> attributes
+my $out_attributes = $ARGV[5];
+
+
+
+# check the input format of the file. only a file with exactly three datasets are excepted. the other one will fill up with zeros
+
+
+# read-in MEGAN-file
+# if #{data-sets} = 1 -> no heade line
+# if #{data-sets} > 1 -> heade line " #Datasets		set1	set2	..."
+open(inFile , "<$megan_file")  || die "File not found - \"Path-File\"!\n";
+my @pairIds = ();
+my $header  = "";
+my @input_file   = ();
+my @numberOfSets = ();
+
+while(<inFile>){
+	chomp($_);
+  	if($_ =~ /^#/){
+  		$header = $_;
+  		@numberOfSets = split("\t", $_);
+  	}else{
+  		#print @numberOfSets . "\n";
+  		# check the number of datasets are included
+  		if(@numberOfSets == 0 || @numberOfSets == 1 || @numberOfSets > 4){			# no set is in the file
+  			print "Error: File doesn't contain any dataset or contain more than three!";
+  			exit;
+  		}elsif(@numberOfSets == 2){		# only one set is in the file -> add 2x zeros
+  			$_ .= "\t" . 0 . "\t" . 0;
+  		}elsif(@numberOfSets == 3){		# only two sets are in the file -> add 1x zeros
+  			$_ .= "\t" . 0;
+  		}
+  		
+  		addToNetwork($_);
+  		push(@input_file, $_);
+  	}
+}
+close(inFile);
+
+
+# --------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------------------
+# (1) PREPROCESSING: detect all leaf nodes
+my $modifiedInput  = detectNonLeafs();
+
+# (2) MAIN COMPUTATION: compute deep by deep (path deep ex. root;Viruses; => deep 2)
+my ($vennClusterOut, $specialNumberOut) = clusterVennBottomUp();
+
+# (3) VENN-END-PREPERATION: sum up all single values (d1-d3), transform abs values into 
+my $vennToStore    = vennForCytoscape($vennClusterOut, $specialNumberOut);
+
+# (4) SAVE RESULTS INTO FILES: one file contains the network (.sif), the other one contains the attributes
+storeNetwork();
+store2FileVenn($vennToStore);
+# --------------------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------------------
+
+
+
+
+sub store2FileVenn{
+	my $outVenn = $_[0];
+	# test
+	my $tmpFileName = $out_attributes;
+	
+	#my $tmpFileName = "./network.venn";
+	open(FILE , ">$tmpFileName")  || die "File can't be written - \"venn - File\"!\n";
+		print FILE join("\n", @{$outVenn}) . "\n";
+	close(FILE);
+}
+
+
+sub vennForCytoscape{
+	my $vennCluster = $_[0];
+	my $specialNum  = $_[1];
+	my $specNformat = 0;
+	my @out = ();
+	# datastructure $vennCluster=> vennCluster[]{}{} => values
+	#delete $vennCluster->[0]{"no"};
+	my $frameSize = 0;
+	my $values    = 0;
+	my $googleURL = "";
+	my $outStr = "";
+	
+	for(my $i = 0 ; $i < @{$vennCluster}; $i++){
+		while ( my($key, $value) = each %{$vennCluster->[$i]} ){
+			while ( my($key2, $value2) = each %{$vennCluster->[$i]{$key}} ){
+				$values    = $vennCluster->[$i]{$key}{$key2};				
+				$frameSize = getCorrectedFrameSize($values);
+				
+				if(defined $specialNum->[$i]{$key}{$key2}){
+					$specNformat = $key2 . "[" . sprintf("%.3f", $specialNum->[$i]{$key}{$key2}) . "]";
+				}else{
+					$specNformat = $key2; 
+				}
+				
+				# old version, this version works pretty well
+				#$frameSize = getFrameSize($values);	
+				$googleURL = computeGoogleApiStrRotation($frameSize,$values,$colorMode);
+				$outStr = $key2 . "\t" . $googleURL . "\t" . $specNformat . "\t" . $values;
+				push(@out, $outStr);
+			}
+		}
+	}
+	return \@out;
+}
+
+
+# this function keep the biggest node in the lowest depth,...
+sub computeGoogleApiStrRotation{
+	my $frameSize = $_[0];
+	my $values    = $_[1];
+	my $colMode   = $_[2];
+	my @relVal    = ();
+	my @col   	  = ();
+	my %sort      = ();
+	my @store  	  = ();
+	my %ovHash    = ();
+	my @storeOldPos = ();
+	my @spVal  	  = split(" ", $values);
+	my $sum    	  = $spVal[0] + $spVal[1] + $spVal[2];	
+	
+	# user color-mode 
+	if($colMode == 0){
+		$col[0] 	  = "18A3F2";     $col[1] = "FA0800";     $col[2] = "FFF905";
+	}elsif($colMode == 1){
+		$col[0] 	  = "FF2A00";     $col[1] = "9CFF00";     $col[2] = "00CCFF";
+	}elsif($colMode == 2){
+		$col[0] 	  = "B4FF00";     $col[1] = "FF00C6";     $col[2] = "00AEFF";
+	}elsif($colMode == 3){
+		$col[0] 	  = "82FF00";     $col[1] = "7E00FF";     $col[2] = "FF003B";
+	}elsif($colMode == 4){
+		$col[0] 	  = "1A1A1A";     $col[1] = "8A8A8A";     $col[2] = "C7C7C7";
+	}
+	
+	$sort{"0"} 	  = $spVal[0]; $sort{"1"} = $spVal[1]; $sort{"2"} = $spVal[2]; 
+	
+	my $tmp = 0;
+	foreach(@spVal){
+		if($sum != 0){
+			$tmp = $_ * 100 / $sum;
+		}else{
+			$tmp = 0;
+		}
+		push(@relVal,$tmp);
+	}
+	
+	$ovHash{"01"} = $relVal[3]; $ovHash{"10"} = $relVal[3]; 
+	$ovHash{"02"} = $relVal[4]; $ovHash{"20"} = $relVal[4]; 
+	$ovHash{"21"} = $relVal[5]; $ovHash{"12"} = $relVal[5]; 
+	
+	my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . "&chco=";
+	# change color position in the google output string corresponding to the highest value          
+	foreach my $k( sort {$sort{$b}<=>$sort{$a}} keys %sort) {
+    	$url .= $col[$k] . ",";
+    	push(@store, $k);
+	}
+	chop($url);	    
+	          
+	$url .= "&cht=v&chd=t:";
+	# sort node values in the right order         
+	for(my $i = 0 ; $i < @relVal - 4 ; $i++){
+		#print $i . "\t" . $store[$i] . "\t" . $relVal[$store[$i]] . "\n";
+		$url .= sprintf("%.1f", $relVal[$store[$i]]) . ",";
+	}
+	# sort intersection values in the right order
+	my $tStr0 = $store[0] . $store[1];
+	my $tStr1 = $store[0] . $store[2];
+	my $tStr2 = $store[1] . $store[2];
+	$url    .= sprintf("%.1f", $ovHash{$tStr0}) . "," . sprintf("%.1f", $ovHash{$tStr1}) . "," . sprintf("%.1f", $ovHash{$tStr2}) . ",";
+	$url .= "0.0";
+	$url .= "&chf=bg,s,e0dede00";
+}
+
+
+# original function without any node rotation. the order of the nodes is always the same
+sub computeGoogleApiStr{
+	my $frameSize = $_[0];
+	my $values    = $_[1];
+	my @relVal    = ();
+	my @spVal  = split(" ", $values);
+	my $sum    = $spVal[0] + $spVal[1] + $spVal[2];	
+	
+	foreach(@spVal){
+		my $tmp = $_ * 100 / $sum;
+		push(@relVal,$tmp);
+	}
+	my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . 
+	          #color
+	          "&chco=FF6342,ADDE63,63C6DE" . 
+	          #"&chco=0000FF,0099FF,00FFFF" .
+	          
+	          "&cht=v&chd=t:";
+	
+	for(my $i = 0 ; $i < @relVal - 1 ; $i++){
+		$url .= sprintf("%.1f", $relVal[$i]) . ",";
+	}        
+	$url .= sprintf("%.1f", $relVal[-1]);  
+	             
+	$url .= "&chf=bg,s,e0dede00";
+}
+
+
+sub getCorrectedFrameSize{
+	my $values = $_[0];
+	my @spVal  = split(" ", $values);
+	my $sum    = $spVal[0] + $spVal[1] + $spVal[2];	
+	my $frame  = lookupPixelSQRT($sum);
+	
+	# find maxValue position ([0] - [2])
+	my $arrPos = getMaxPos($spVal[0], $spVal[1], $spVal[2]);
+	my $addOver= -1;
+	my $addNode= -1;
+	my $addSum = -1;
+	
+	if($arrPos == 0){
+		$addNode = $spVal[1] + $spVal[2];
+		$addOver = $spVal[3] + $spVal[4];
+	}elsif($arrPos == 1){
+		$addNode = $spVal[0] + $spVal[2];
+		$addOver = $spVal[3] + $spVal[5];
+	}elsif($arrPos == 2){
+		$addNode = $spVal[0] + $spVal[1];
+		$addOver = $spVal[4] + $spVal[5];
+	}
+	# if the 2 of 3 nodes have no overlap to the largest one, than the complete value
+	# will be used to compute a frame and add this to the existing frame
+	$addSum = $addNode - $addOver;
+	
+	my $addFrame = lookupPixelSQRT($addSum);
+	my $sumFrame = $frame + $addFrame;
+	return $sumFrame;
+}
+
+
+sub getMaxPos{
+	my $pos = -1;
+	if( ($_[0] >= $_[1]) && ($_[0] >= $_[2]) ){
+		$pos = 0;
+	}elsif( ($_[1] >= $_[0]) && ($_[1] >= $_[2]) ){
+		$pos = 1;
+	}else{
+		$pos = 2;
+	}
+	return $pos;
+}
+
+
+sub getFrameSize{
+	my $values = $_[0];
+	my @spVal  = split(" ", $values);
+	my $sum    = $spVal[0] + $spVal[1] + $spVal[2];	
+	my $frame  = lookupPixel($sum);
+	return $frame;
+}
+
+
+
+sub clusterVennBottomUp{
+	# transform $modifiedInput into datastructure
+	# container => [deep]{parent}{child}
+	my @container        = ();
+	my @containerSpecial = ();
+	my @nodeValues = ();
+	my $maxDeep    = 0; 
+	my %helperHash = ();
+	my %specialMatrixAll = ();
+	
+	foreach(@{$modifiedInput}){
+		my @tmpArr = split('\t', $_);
+		my @path   = split(';' , $tmpArr[0]);
+		my $deep   = @path - 1;
+		
+		if(($deep - 1) >= 0){
+			$container[$deep]{$path[-2]}{$path[-1]}  = $tmpArr[1];
+			$nodeValues[$deep]{$path[-2]}{$path[-1]} = "f";
+		}else{
+			$container[$deep]{"no"}{$path[-1]} = $tmpArr[1];
+		}
+	}
+	# start computation from the deepest path to the root node	
+	for(my $i = (@container-1) ; $i >= 0  ; $i--){
+		while ( my($key, $value) = each %{$container[$i]} ){
+			# update all predecessor nodes
+			while ( my($keyUp, $valueUp) = each %helperHash ){
+				if(exists $container[$i]{$key}{$keyUp}){
+					$container[$i]{$key}{$keyUp} = $valueUp;
+					# compute special value by decompose venn's and add special value
+					$containerSpecial[$i]{$key}{$keyUp} = vennCongruousness(\@{$specialMatrixAll{$keyUp}});
+				}
+			}
+			# group all nodes which has the same predecessor id and sum up the values	
+			while ( my($key2, $value2) = each %{$container[$i]{$key}} ){
+				if(exists $helperHash{$key}){
+					$helperHash{$key} = addValues($helperHash{$key}, $value2);
+					#push(@{$specialMatrixAll{$key}}, $value2);
+					#print $key . "\t" . $value2 . "\n";
+				}else{
+					$helperHash{$key} = $value2;
+				}
+				#print $key . "\t" . $value2 . "\n";
+				push(@{$specialMatrixAll{$key}}, $value2);
+			}			
+		}
+	}
+	return \@container, \@containerSpecial;
+}
+
+
+sub vennCongruousness{
+	my $inSpecMatrix = $_[0];
+	my $numOfSets    = @numberOfSets - 1;
+	my @arrVal  = ();  my @matrix = ();  
+	my @sum     = ();
+	my $numVenn = 0; 
+	my %actSet = ();
+	my %actOvp = ();
+	$actSet{"result"} = 0;
+	$actOvp{"result"} = 0;
+	
+	# (step 1) - sum up rows
+	foreach (@{$inSpecMatrix}){
+		@arrVal = split(" ", $_);
+		$sum[0] += $arrVal[0];    $sum[1] += $arrVal[1];     $sum[2] += $arrVal[2];
+		$sum[3] += $arrVal[3];    $sum[4] += $arrVal[4];     $sum[5] += $arrVal[5];
+		
+		if($arrVal[0] > 0){
+			if(!(exists $actSet{1})){
+				$actSet{1} = 1;
+				$actSet{"result"} += 1; 
+			}
+		}
+		if($arrVal[1] > 0){
+			if(!(exists $actSet{2})){
+				$actSet{2} = 1;
+				$actSet{"result"} += 1; 
+			}
+		}
+		if($arrVal[2] > 0){
+			if(!(exists $actSet{3})){
+				$actSet{3} = 1;
+				$actSet{"result"} += 1; 
+			}
+		}
+		if($arrVal[3] > 0){
+			if(!(exists $actOvp{1})){
+				$actOvp{1} = 1;
+				$actOvp{"result"} += 1; 
+			}
+		}
+		if($arrVal[4] > 0){
+			if(!(exists $actOvp{2})){
+				$actOvp{2} = 1;
+				$actOvp{"result"} += 1; 
+			}
+		}
+		if($arrVal[5] > 0){
+			if(!(exists $actOvp{3})){
+				$actOvp{3} = 1;
+				$actOvp{"result"} += 1; 
+			}
+		}
+	} 
+	
+	# (step 2) - calc ratios (-1)
+	my $i = 0;
+	foreach (@{$inSpecMatrix}){
+		@arrVal = split(" ", $_);
+		for(my $j = 0 ; $j < @arrVal ; $j++){ # eventuell -1 da index von 0 - 6 anstatt 0 - 5 laeuft
+			# div zero !
+			if($arrVal[$j] == 0){
+				$matrix[$i][$j] = 0;
+			}else{
+				#print $j . "\t" . $sum[$j] . " \t" . $arrVal[$j] . "\n";
+				$matrix[$i][$j] = $sum[$j] / $arrVal[$j];
+			}
+		}
+		$i++;
+	}
+	
+	$numVenn = $i;
+	# (step 3) - sum up data set ratios d1-d3 
+	@sum = (); 
+	for(my $j = 0 ; $j < @matrix; $j++){
+		$sum[0] += $matrix[$j][0];    $sum[1] += $matrix[$j][1];     $sum[2] += $matrix[$j][2];
+		$sum[3] += $matrix[$j][3];    $sum[4] += $matrix[$j][4];     $sum[5] += $matrix[$j][5];
+	}
+	# (step 4) - calc ratios -> max(d_i, #{V}) / min(d_i, #{V})
+	my @condensedM = (); my $max = 0; my $min = 0;
+	for(my $j = 0 ; $j < @sum ; $j++){
+		$max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]];
+		$min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]];
+		
+		if($min == 0){
+			$sum[$j] = 0;
+		}else{
+			#$sum[$j] = $max / $min;
+			$sum[$j] = $sum[$j] / $numVenn;
+		}
+		#print "-> " . $j . "\t" . $sum[$j] . "\t" . $max . "\t" . $min . "\n";
+	}
+	# (step 5) - normalize values between zero and one -> [0..1]
+	for(my $j = 0 ; $j < @sum ; $j++){
+		$max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]];
+		$min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]];
+		
+		if($max == 0){
+			$sum[$j] = 0;
+		}else{
+			$sum[$j] =  $min / $max;
+		}
+		#print "=> " . $j . "\t" . $sum[$j] . "\t" . $min . "\t" . $max . "\n";
+	}
+	# (step 6) - combine all decomposed values and create only one value
+	# case a: only one dataset -> $numOfSets == 1
+	if($numOfSets == 1){
+		#print "res: " . $sum[0] . "\n";
+		return $sum[0];
+	}elsif($numOfSets == 2){
+		# evtl fallunteruntescheiung
+		print "sum1: " . $sum[0] . "\t" . "sum2: " . $sum[1] . "\t" . "ovp1-2: " . $sum[3] . "\t" . "sets: " . $actSet{"result"} . "\t" . "ovp: " . $actOvp{"result"} . "\n";
+		if($actOvp{"result"} == 0){
+			my $t = ((($sum[0] + $sum[1]) / $actSet{"result"}) );
+			#print "res2 " . $t . " ***\n";
+			return ((($sum[0] + $sum[1]) / $actSet{"result"}) );
+		}else{
+			print "foobar\n";
+			my $t = ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) );
+			#print "res2* " . $t . " ***\n";
+			return ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) );
+		}
+	}elsif($numOfSets == 3){
+		#print $sum[0] . "\t" . $sum[1] . "\t" . $sum[2] . "\t" . $sum[3] . "\t" . $sum[4] . "\t" . $sum[5] . "\n";
+		#print $actSet{"result"} . "\t" . $actOvp{"result"} . "\n";
+		#return ((((($sum[0] + $sum[1] + $sum[2]) / $numOfSets) + (($sum[3] + $sum[4] + $sum[5]) / $numOfSets) ) / 2) );
+		
+		if($actOvp{"result"} == 0){
+			my $t = (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"});
+			#print ">>>>>>> " . $t. "\n";
+			return (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"});
+		}else{
+			my $t = ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2));
+			#print ">>>>>>> " . $t. "\n";
+			return ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2));
+		}
+	}else{
+		return -1;
+	}
+}
+
+
+# save version of function clusterVennBottomUp()
+#sub clusterVennBottomUp{
+#	# transform $modifiedInput into datastructure
+#	# container => [deep]{parent}{child}
+#	my @container  = ();
+#	my $maxDeep    = 0; 
+#	my %helperHash = ();
+#	
+#	foreach(@{$modifiedInput}){
+#		my @tmpArr = split('\t', $_);
+#		my @path   = split(';' , $tmpArr[0]);
+#		my $deep   = @path - 1;
+#		
+#		if(($deep - 1) >= 0){
+#			$container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1];
+#		}else{
+#			$container[$deep]{"no"}{$path[-1]} = $tmpArr[1];
+#		}
+#	}
+#	# start computation from the deepest path to the root node	
+#	for(my $i = (@container-1) ; $i >= 0  ; $i--){
+#		while ( my($key, $value) = each %{$container[$i]} ){
+#			# update all predecessor nodes
+#			while ( my($keyUp, $valueUp) = each %helperHash ){
+#				if(exists $container[$i]{$key}{$keyUp}){
+#					$container[$i]{$key}{$keyUp} = $valueUp;
+#				}
+#			}
+#			# group all nodes which has the same predecessor id and sum up the values	
+#			while ( my($key2, $value2) = each %{$container[$i]{$key}} ){		
+#				if(exists $helperHash{$key}){
+#					$helperHash{$key} = addValues($helperHash{$key}, $value2);
+#				}else{
+#					$helperHash{$key} = $value2;
+#				}
+#			}
+#		}
+#	}
+#	return \@container;
+#}
+
+
+
+sub addValues{
+	my $val1 = $_[0];
+	my $val2 = $_[1];
+	
+	my @sV1 = split(" ", $val1);
+	my @sV2 = split(" ", $val2);
+
+	my $tmp = $sV1[0] + $sV2[0];
+	my $out = $tmp;
+	
+	for(my $i = 1 ; $i < @sV1 ; $i++){
+		$tmp = $sV1[$i] + $sV2[$i];
+		$out .= " " . $tmp;
+	}
+	return $out;
+}
+
+
+
+# detect non leaf nodes and remove the values
+# works on @input_file !!!
+# this version works only with 3 depths!
+sub detectNonLeafs{
+	my %recursiveValues = ();
+	my @modifiedFile    = ();
+	my $convertedPath   = "";
+	
+	my @additionalNetwork = ();
+	
+	# read last line
+	my @tmpArr1 = split('\t',$input_file[($#input_file)],2);
+	# -2 path direction from reward instead from the beginning. (-1 leaf,child , -2 parent,inner node)
+	my $parent1 = getId($tmpArr1[0],-2);
+	my $child1  = getId($tmpArr1[0],-1); 
+	my $deep1   = getPathDeep($tmpArr1[0]);
+	my $parent2 = "";
+	my $child2  = "";  
+	my $deep2   = 0;
+	
+	
+	# if "if-statement is true, only root node exists"
+	my $outStr = "";
+	if($parent1 == -1){
+		$outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
+		push(@modifiedFile, $outStr);
+	}else{
+		$outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
+		push(@modifiedFile, $outStr);
+	
+		for(my $i = (@input_file-2) ; $i >= 0 ; $i--){
+			@tmpArr1 = split('\t',$input_file[$i],2);
+			$parent2 = getId($tmpArr1[0],-2);
+			$child2  = getId($tmpArr1[0],-1); 
+			$deep2   = getPathDeep($tmpArr1[0]);
+			
+			#print $parent2 . "\t" . $child2 . "\n";
+			
+			# if eq true -> new leaf
+			if($parent2 eq $parent1){
+				$outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
+				push(@modifiedFile, $outStr);
+			}elsif($parent1 eq $child2){
+				$outStr = convertPath($tmpArr1[0]) . "\t" . "undef";
+				push(@modifiedFile, $outStr);
+				
+				my @check = split('\t', $tmpArr1[1]);
+				my $tSum  = 0;
+				foreach(@check){
+					$tSum += $_;
+				}
+				if(($onlyLeafs eq "off") && ($tSum > 0)){
+					$outStr = convertPath($tmpArr1[0]) . "not_assigned_" . $child2 . ";" . "\t" . computeLeafValues($tmpArr1[1]);
+					push(@modifiedFile, $outStr);
+					$outStr = $child2 . " pp " . "not_assigned_" . $child2;
+					push(@network, $outStr);
+				}
+			}else{
+				$outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
+				push(@modifiedFile, $outStr);
+			}			
+			
+			if($parent1 == -1){
+				push(@modifiedFile, convertPath($tmpArr1[0]));
+				last;	
+			}
+			$parent1 = $parent2;
+			$child1  = $child2;
+			$deep1   = $deep2;
+		}
+	}
+	# store @additionalNetwork in .sif file!!! at this point, the sif file exists!
+	# it is stored into @network container. this container is globel defined!
+	
+	return \@modifiedFile;
+}
+
+
+# helper function for detectNonLeafs 
+sub getPathDeep{
+	my $inPath = $_[0];
+	my @deep = split(';', $inPath);
+	my $size = $#deep;
+	return $size;
+}
+
+sub convertPath{
+	my $inString = $_[0];
+	$inString =~ s/"//g;
+	$inString =~ s/\s+/_/g;
+	return $inString;
+}
+
+sub getId{
+	my $lineToParse = $_[0];
+	my $idPos       = $_[1];
+	my $stringId = "";
+	my @path     = ();
+	
+	$lineToParse =~ s/"//g;
+	$lineToParse =~ s/\s+/_/g;
+	@path = split(';',$lineToParse);
+	my $num = @path;
+	
+	if(($num + $idPos) < 0){
+		return -1;
+	}else{
+		return $path[$idPos];	
+	}
+}
+
+sub computeLeafValues{
+	my $meganValues = $_[0];
+	my @rawValues   = split('\t', $meganValues);
+	my @nodeRelVal  = ();
+	
+	my $outValues   = $rawValues[0] . " " . $rawValues[1] . " " . $rawValues[2]; 
+
+	if($rawValues[0] <= $rawValues[1]){
+		$outValues .= " " . $rawValues[0];
+	}else{
+		$outValues .= " " . $rawValues[1];
+	}
+	if($rawValues[0] <= $rawValues[2]){
+		$outValues .= " " . $rawValues[0];
+	}else{
+		$outValues .= " " . $rawValues[2];
+	}
+	if($rawValues[1] <= $rawValues[2]){
+		$outValues .= " " . $rawValues[1];
+	}else{
+		$outValues .= " " . $rawValues[2];
+	}
+	#my ($min, $max) = minmax @rawValues;
+	my $min = 0;
+	$outValues .= " " . $min;
+	
+	return $outValues;
+}
+# -----------------------------------------------------------------------------
+
+
+# compute network (.sif)
+sub addToNetwork{
+	my $inLine = $_[0];
+	my @splitInLine = split('\t',$inLine);
+	# remove ' " ' from line
+	$splitInLine[0] =~ s/"//g;
+	$splitInLine[0] =~ s/\s+/_/g;
+	my @elements    = split(';' ,$splitInLine[0]); 
+		
+	if(@elements > 1){
+		my $outString = $elements[-2] . " pp " . $elements[-1];
+		push(@network, $outString);
+	}
+}
+
+
+# store network in .sif file
+sub storeNetwork{
+	# test
+	my $tmpFileName = $out_network;
+	
+	#my $tmpFileName = "./network.sif";
+	open(FILE , ">$tmpFileName")  || die "File can't be written - \"sif - File\"!\n";
+		print FILE join("\n", @network) . "\n";
+	close(FILE);
+}
+
+
+# ---------------------------------------------------------------------------------------------
+# two different lookup-tables are available!
+# lookupPixel() => static ; lookupPixelSQRT() => dynamic
+#
+# lookup absolute node-size to pixel (frame-size for venn-diagram)
+sub lookupPixel{
+	my $query = $_[0];
+	
+	if($query < 10){
+		return 30;
+	}elsif($query < 100){
+		return 40;
+	}elsif($query < 1000){
+		return 50;
+	}elsif($query < 10000){
+		return 60;
+	}elsif($query < 100000){
+		return 80;
+	}elsif($query < 1000000){
+		return 100;
+	}elsif($query < 10000000){
+		return 140;
+	}elsif($query < 20000000){
+		return 180;
+	}elsif($query < 30000000){
+		return 220;
+	}else{
+		return 250;	
+	}
+}
+
+# lookup absolute node-size to pixel (frame-size for venn-diagram) <- this is currently used!
+sub lookupPixelSQRT{
+	
+	if ($transFnc == 0) {
+		return int(($_[0] ** (1/(1.6))) * 1.8 + 8);  # 3,000 datapoints in sum
+	}elsif($transFnc == 1){
+		return int(($_[0] ** (1/(2.1))) * 1.8 + 8);  # 30,000 datapoints in sum
+	}elsif($transFnc == 2){
+		return int(($_[0] ** (1/(2.6))) * 1.8 + 8);  # 300,000 datapoints in sum
+	}elsif($transFnc == 3){
+		return int(($_[0] ** (1/(3.1))) * 1.8 + 8);  # 3,000,000 datapoints in sum
+	}elsif($transFnc == 4){
+		return int(($_[0] ** (1/(3.7))) * 1.8 + 8);  # 30,000,000 datapoints in sum
+	}elsif($transFnc == 5){
+		return int(($_[0] ** (1/(4))) * 1.8 + 8);    # 300,000,000 datapoints in sum
+	}elsif($transFnc == 6){
+		return int(($_[0] ** (1/(4.7))) * 1.8 + 8);  # 3,000,000,000 datapoints in sum
+	}
+	
+	#return int(($_[0] ** (1/(3.3))) * 1.8 + 30);	# test version for small and large datasets?
+	#return int(($_[0] ** (1/(3.3))) * 1.8 + 5);	# test version for small and large datasets?
+	#return int(($_[0] ** (1/(4))) * 1.8 + 8);	    # test version for small and large datasets?
+	#return int(($_[0] ** (1/6)) * 12);				# old version this version is good for large datasets
+}
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/coVennTree/coVennTree.xml	Fri Jan 30 09:55:45 2015 -0500
@@ -0,0 +1,143 @@
+<tool id="coVennTree" name="CoVennTree (Comparative weighted Venn Tree) - Rooted Tree" version="1.6.0">
+    <description>Comparative rooted tree analysis for files in dsv format</description>
+    <requirements>
+        <requirement type="package" version="1.6">coVennTree</requirement>
+        <requirement type="package" version="5.18.1">perl</requirement>
+    </requirements>
+    <command interpreter="perl">
+        coVennTree.pl
+        $infile
+        $color_mode
+        $trans_func
+        $leafs_allInformation
+        $outfile_network
+        $outfile_attribute
+    </command>
+    
+    <inputs>
+        <param name="infile" type="data" format="tabular" label="Path File" help="Tabular file containing the paths and values"/>
+        
+      
+        <param name="color_mode" multiple="false" type="select" label="Select color mode for Venn diagrams">
+                <option value="0">(1) Set1: blue       Set2: red       Set3: yellow</option>
+                <option value="1">(2) Set1: red        Set2: green     Set3: blue</option>
+                <option value="2">(3) Set1: green      Set2: magenta   Set3: blue</option>
+                <option value="3">(4) Set1: green      Set2: purple    Set3: red</option>
+                <option value="4">(5) Set1: dark gray  Set2: mid-grey  Set3: light gray</option>
+        </param>
+        
+             
+        <param name="trans_func" multiple="false" type="select" label="Select transformation function">
+                <option value="0">(1) datasets max: 3,000 data points in sum</option>
+                <option value="1">(2) datasets max: 30,000 data points in sum</option>
+                <option value="2">(3) datasets max: 300,000 data points in sum</option>
+                <option value="3">(4) datasets max: 3,000,000 data points in sum</option>
+                <option value="4">(5) datasets max: 30,000,000 data points in sum</option>
+                <option value="5">(6) datasets max: 300,000,000 data points in sum</option>
+                <option value="6">(7) datasets max: 3,000,000,000 data points in sum</option>
+        </param>
+       
+        
+        <param name="leafs_allInformation" multiple="false" type="select" label="Select tree analyzes function">
+                <option value="1">(1) leaf + inner nodes informations</option>
+                <option value="0">(2) only leaf information</option>
+        </param>
+        
+    </inputs>
+    
+    <outputs>
+        <data format="tabular" name="outfile_network" label="Network" />
+        <data format="tabular" name="outfile_attribute" label="Attributes" />
+    </outputs>
+    
+    <tests>
+        <test>
+        </test>
+    </tests>
+    
+    <help>
+.. class:: infomark
+
+CoVennTree compares up to three rooted trees at the same time.
+
+CoVennTree (Comparative weighted Venn Tree) is a software to analyze and compare up to three datasets. Unlike other
+methods, CoVennTree correlates data on the leaf level and transfers this information to the root node. CoVennTree works with numbers to compute weighted
+Venn diagrams for each node in the graph (rooted tree). Therefore any kind of input data can be processed as long as the data structure will be taken into account.
+
+
+
+**Input**
+
+*Input example*
+
+
+.. image:: $PATH_TO_IMAGES/example1.png 
+  :height: 430 
+  :width: 600
+
+
+*dsv-format: The following table represents the graph.*
+
+
+===========  ======  ======  ======
+#Datasets    set1    set2    set3
+===========  ======  ======  ======
+"root;"      0       0       0
+"root;A;"    10000   0       0
+"root;A;C;"  600000  300000  500000
+"root;A;D;"  0       100000  200000
+"root;A;E;"  800000  0       100000
+"root;B;"    10000   20000   50000
+===========  ======  ======  ======
+
+
+-------
+
+
+**Results**
+
+A specific color is assigned to each dataset in five optional color schemes (see parameter "Select color mode for weighted Venn diagrams").
+In this example set1 corresponds to color blue, set2 to red and set3 to yellow.
+In order to cover a wide numerical range a non linear transformation function is used.
+
+
+*Data format \*.sif*
+
+[parent_node]   [connected_with]    [child_node]
+
+
+*Data format \*.venn*
+
+[id]    [google_url]    [id_vds]    [Venn_abs_values]
+
+
+*Output example "leaf information and not assigned information"*
+
+By selecting "leaf information + not assigned information" artificial nodes can be inserted.
+Artificial nodes will be inserted if inner nodes have values larger than zero.
+
+.. image:: $PATH_TO_IMAGES/venn-graph-off.png 
+  :height: 358 
+  :width: 425
+  
+  
+-------
+
+
+*Output example "only leaf information"*
+
+By selecting "only leaf information" only leaf nodes are considered for the computation of weighted Venn diagrams.
+
+.. image:: $PATH_TO_IMAGES/venn-graph-on.png 
+  :height: 358
+  :width: 400
+
+
+
+    </help>
+    <citations>
+        <citation type="doi">
+        
+        </citation>>
+    </citations>
+</tool>
Binary file coVennTree/static/._.DS_Store has changed
Binary file coVennTree/static/images/._example1.png has changed
Binary file coVennTree/static/images/._venn-graph-off.png has changed
Binary file coVennTree/static/images/._venn-graph-on.png has changed
Binary file coVennTree/static/images/example1.png has changed
Binary file coVennTree/static/images/venn-graph-off.png has changed
Binary file coVennTree/static/images/venn-graph-on.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/coVennTree/tool_dependencies.xml	Fri Jan 30 09:55:45 2015 -0500
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="perl" version="5.18.1">
+      <repository changeset_revision="114b6af405fa" name="package_perl_5_18" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="coVennTree" version="1.6">
+      <install version="1.0">
+          <actions>
+              <action type="setup_perl_environment">
+                  <repository changeset_revision="114b6af405fa" name="package_perl_5_18" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu">
+                      <package name="perl" version="5.18.1" />
+                    </repository>
+                    <!-- allow downloading and installing an Perl package from cpan.org-->
+                    <package>http://search.cpan.org/CPAN/authors/id/A/AD/ADAMK/List-MoreUtils-0.33.tar.gz</package>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        </readme>
+    </package>
+</tool_dependency>