# HG changeset patch # User g2cmnty@test-web1.g2.bx.psu.edu # Date 1308727705 14400 # Node ID 8ea9b4e5a389d3acffb62ccf18959fff4b70cbb5 Uploaded diff -r 000000000000 -r 8ea9b4e5a389 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Wed Jun 22 03:28:25 2011 -0400 @@ -0,0 +1,31 @@ +prop_venn +--------- + +crates proportional venn diagrams from 2 or 3 files using google charts. +pure python implementation + +contains galaxy tool xml (prop_venn.xml) + +cli interface expects all arguments for the inputs concatenated +with , i.e. file1,file2,file3 ... + +contains an integration test for the cli interface (vennerTest.py), no galaxy unit tests. + + + +Proportional Venn Diagram: +-------------------------- +Creates a proportional Venn diagram from 2 or 3 tab delimited input files. +The 0 based column index decides which column is used for extracting the sets to compare between the files. +Multiple items with the same value per column are counted as one. + +Requirements: +------------- +- 2-3 tab delimited input files. +- 0 based index of the columns to extract for each file + +Outputs: +-------- +- Html page with the proportional Venn diagram and a table for the counts in each section. + + diff -r 000000000000 -r 8ea9b4e5a389 __init__.py diff -r 000000000000 -r 8ea9b4e5a389 prop_venn.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prop_venn.xml Wed Jun 22 03:28:25 2011 -0400 @@ -0,0 +1,75 @@ + + from 2-3 sets +venner.py + #if str( $twoThree['tt']) == 'three': + --files $inputFile1,$inputFile2,$twoThree.inputFile3 + --columns $column1,$column2,$twoThree.column3 + --asNames $asName1,$asName2,$twoThree.asName3 + #else: + --file $inputFile1,$inputFile2 + --columns $column1,$column2 + --asNames $asName1,$asName2 + #end if + --title '$title' + --size $size + --outname $outPath + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Proportional Venn Diagram: +-------------------------- +Creates a proportional Venn diagram from 2 or 3 tab delimited input files. +The 0 based column index decides which column is used for extracting the sets to compare between the files. +Multiple items with the same value per column are counted as one. + +Requirements: +------------- +- 2-3 tab delimited input files. +- 0 based index of the columns to extract for each file + +Outputs: +-------- +- Html page with the proportional Venn diagram and a table for the counts in each section. + +Please turn to *Ido Tamir* in case of problems or suggestions for this tool. + + + + + + + + + diff -r 000000000000 -r 8ea9b4e5a389 testFiles/fileA.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testFiles/fileA.tab Wed Jun 22 03:28:25 2011 -0400 @@ -0,0 +1,7 @@ +1 A +2 B +3 C +4 D +6 A +7 A +8 E diff -r 000000000000 -r 8ea9b4e5a389 testFiles/fileB.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testFiles/fileB.tab Wed Jun 22 03:28:25 2011 -0400 @@ -0,0 +1,4 @@ +1 D +2 A +3 E +5 F \ No newline at end of file diff -r 000000000000 -r 8ea9b4e5a389 testFiles/fileC.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testFiles/fileC.tab Wed Jun 22 03:28:25 2011 -0400 @@ -0,0 +1,6 @@ +4 C +5 D +2 A +3 A +8 F +9 G \ No newline at end of file diff -r 000000000000 -r 8ea9b4e5a389 testFiles/out.2.expected.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testFiles/out.2.expected.html Wed Jun 22 03:28:25 2011 -0400 @@ -0,0 +1,22 @@ + + + + Venn diagram + + +

+
+ +
+
+ + + + + + + +
SegmentSize
As5
Bs4
As \ Bs2
Bs \ As1
As ∩ Bs3
+
+ + \ No newline at end of file diff -r 000000000000 -r 8ea9b4e5a389 testFiles/out.3.expected.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testFiles/out.3.expected.html Wed Jun 22 03:28:25 2011 -0400 @@ -0,0 +1,27 @@ + + + + Venn diagram + + +

+
+ +
+
+ + + + + + + + + + + + +
SegmentSize
As5
Bs4
Cs5
As \ (Bs ∪ Cs)1
Bs \ (As ∪ Cs)0
Cs \ (As ∪ Bs)1
As ∩ Bs \ Cs 1
As ∩ Cs \ Bs 1
Bs ∩ Cs \ As 1
As ∩ Bs ∩ Cs2
+
+ + \ No newline at end of file diff -r 000000000000 -r 8ea9b4e5a389 venner.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/venner.py Wed Jun 22 03:28:25 2011 -0400 @@ -0,0 +1,251 @@ +#!/usr/bin/env python +''' +Created on Jul 13, 2010 + +Generates Venn diagram from 2 or three input files + +one could generate output file that for each row contain the segment +i.e. A, B, AB, AmB, BmA + +@author: Ido M. Tamir +''' +from mako.template import Template +from optparse import OptionParser +import urllib,re,sys + +class Bunch: + def __init__(self, d): + for k, v in d.items(): + if isinstance(v, dict): + v = Bunch(v) + self.__dict__[k] = v + +class VennFile(): + def __init__(self, filePath, column, name): + self.filePath = filePath + self.column = column + cleanname = re.sub("/","",name) + self.name = urllib.quote(cleanname, safe="%/:=&?~#+!$,;'@()*[]") + self.dict = {} + def read(self): + dict = {} + lineNr = 0 + for line in open( self.filePath, 'rb'): + key = line.split("\t")[self.column].strip() + linesList = dict.get(key, []) + linesList.append(line+"\t"+str(lineNr)) + dict[key] = linesList + lineNr += 1 + self.dict = dict + return self + +class Venn2: + def __init__(self, title, size, one, two): + self.one = one.read() + self.two = two.read() + self.title = title + self.size = size + + def toUrl(self): + one_keys = set(self.one.dict.keys()) + two_keys = set(self.two.dict.keys()) + + keys_one_i_two = one_keys.intersection(two_keys) + + total = len(one_keys) + len(two_keys) + sizes = [len(one_keys), len(two_keys), 0, len(keys_one_i_two)] + sizes = self.relSizes(sizes, total) + names = [self.one.name, self.two.name] + return self.url(total, sizes, names) + + def relSizes(self, sizes, total): + return map(lambda s: str(int(round((s/float(total) * 100)))), sizes) + + def url(self, total, sizes, names): + base = "http://chart.apis.google.com/chart?cht=v&chd=t:" + counts = ",".join(sizes) + titlep = "&chtt="+self.title + size = "&chs="+str(self.size)+"x"+str(self.size) + legend = "&chdl="+"|".join(names) + url = base+counts+titlep+size+legend + return url + + def toHtml(self): + one_keys = set(self.one.dict.keys()) + two_keys = set(self.two.dict.keys()) + + numbers = Bunch({ + "one_keys" : len(set(self.one.dict.keys())), + "two_keys" : len(set(self.two.dict.keys())), + "one_only" : len(one_keys.difference(two_keys)), + "two_only" : len(two_keys.difference(one_keys)), + "one_i_two" : len(one_keys.intersection(two_keys)), + }) + + template = """ + + + Venn diagram ${title} + + +

${ title }

+
+ +
+
+ + + + + + + +
SegmentCount
${ one }${ n.one_keys }
${ two }${ n.two_keys }
${ one } \ ${ two }${ n.one_only }
${ two } \ ${ one }${ n.two_only }
${ one } ∩ ${ two }${ n.one_i_two }
+
+ +""" + result = Template(template).render(one=self.one.name, two=self.two.name, n=numbers, title=self.title, url=self.toUrl()) + return(result) + + + +class Venn3(Venn2): + def __init__(self, title, size, one, two, three): + Venn2.__init__(self, title, size, one, two) + self.three = three.read() + + def toUrl(self): + one_keys = set(self.one.dict.keys()) + two_keys = set(self.two.dict.keys()) + three_keys = set(self.three.dict.keys()) + + keys_one_i_two = one_keys.intersection(two_keys) + keys_one_i_three = one_keys.intersection(three_keys) + keys_two_i_three = two_keys.intersection(three_keys) + keys_one_i_two_i_three = one_keys.intersection(two_keys).intersection(three_keys) + + total = len(one_keys)+len(two_keys)+len(three_keys) + sizes = [len(one_keys), len(two_keys), len(three_keys), len(keys_one_i_two), len(keys_one_i_three), len(keys_two_i_three), len(keys_one_i_two_i_three)] + sizes = self.relSizes(sizes, total) + names = [self.one.name, self.two.name, self.three.name] + return self.url(total, sizes, names) + + def toHtml(self): + one_keys = set(self.one.dict.keys()) + two_keys = set(self.two.dict.keys()) + three_keys = set(self.three.dict.keys()) + + xa = one_keys.intersection(two_keys) + xt = two_keys.intersection(three_keys) + xd = xt.difference(one_keys) + + numbers = Bunch({ + "one_keys" : len(set(self.one.dict.keys())), + "two_keys" : len(set(self.two.dict.keys())), + "three_keys" : len(set(self.three.dict.keys())), + "one_only" : len(one_keys.difference(two_keys.union(three_keys))), + "two_only" : len(two_keys.difference(one_keys.union(three_keys))), + "three_only" : len(three_keys.difference(one_keys.union(two_keys))), + "one_two" : len(one_keys.intersection(two_keys).difference(three_keys)), + "one_three" : len(one_keys.intersection(three_keys).difference(two_keys)), + "two_three" : len(two_keys.intersection(three_keys).difference(one_keys)), + "one_i_two_i_three" : len(one_keys.intersection(two_keys).intersection(three_keys)) + }) + + template = """ + + + Venn diagram ${title} + + +

${ title }

+
+ +
+
+ + + + + + + + + + + + +
SegmentCount
${ one }${ n.one_keys }
${ two }${ n.two_keys }
${ three }${ n.three_keys }
${ one } \ (${ two } ∪ ${ three })${ n.one_only }
${ two } \ (${ one } ∪ ${ three})${ n.two_only }
${ three } \ (${ one } ∪ ${ two })${ n.three_only }
${ one } ∩ ${ two } \ ${ three } ${ n.one_two }
${ one } ∩ ${ three } \ ${ two } ${ n.one_three }
${ two } ∩ ${ three } \ ${ one } ${ n.two_three }
${ one } ∩ ${ two } ∩ ${ three }${ n.one_i_two_i_three }
+
+ +""" + result = Template(template).render(one=self.one.name, two=self.two.name, three=self.three.name, n=numbers, title=self.title, url=self.toUrl()) + return(result) + + + + +def main(): + '''main worker func''' + parser = OptionParser() + parser.add_option( "--files", dest="filePaths", help="file paths delimited by ,") + parser.add_option( "--columns", dest="columns", help="0 based columnIndices delimited by ,") + parser.add_option( "--asNames", dest="asNames", help="names of the columns for pretty print") + parser.add_option( "--title", dest="title", help="title of plot") + parser.add_option( "--size", dest="size", help="size plot, default 300") + parser.add_option( "--outname", dest="outfileHtml", help="path of generated html file") + + (o, args) = parser.parse_args() + errors = [] + if o.filePaths is None: + errors.append("please add required paths to files") + if o.columns is None: + errors.append( "please add required columns" ) + if o.asNames is None: + errors.append( "please add required asNames") + if len(errors) > 0: + print("\n".join(errors)) + sys.exit() + filePaths = o.filePaths.split(",") + columns = o.columns.split(",") + columns = map(int, columns) + asNames = o.asNames.split(",") + if len(errors) > 0 and ( len(filePaths) != len(columns) or len(columns) != len(asNames) ): + errors.append( "different length of filePaths, columns or names:" +o.columns+" "+" "+o.names+" "+o.filePaths ) + title = "" + if o.title: + title = o.title + if o.outfileHtml is None: + errors.append( "please add outfile name for html" ) + if len(filePaths) > 3: + errors.append( "can only compare up to three files was:"+str(len(filePaths))) + if len(filePaths) == 1: + errors.append( "just one file to compare does not make sense!") + if len(errors) > 0: + print("\n".join(errors)) + sys.exit() + + size = "300" + if o.size: + size = o.size + + fileCount = len(filePaths) + if fileCount == 2: + venn = Venn2(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1])) + else: + venn = Venn3(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1]), VennFile(filePaths[2],columns[2],asNames[2])) + htmlText = venn.toHtml() + html = open(o.outfileHtml, 'w') + try: + html.write(htmlText) + finally: + html.close() + + + + +if __name__ == '__main__': + main() + +#$ python venner.py --files testFiles/fileA.tab,testFiles/fileB.tab --columns 1,1 --outname out.html --asNames As,Bs + diff -r 000000000000 -r 8ea9b4e5a389 vennerTest.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vennerTest.py Wed Jun 22 03:28:25 2011 -0400 @@ -0,0 +1,86 @@ +''' +Created on Jul 13, 2010 + +@author: Ido M. Tamir +''' +import unittest +import venner + +'''By hand: +A: +1 A +2 B +3 C +4 D +6 A +7 A +8 E + +B: +1 D +2 A +3 E +5 F + +C: +4 C +5 D +2 A +3 A +8 F +9 G + +A B C: +A = ABCDE 5 +B = ADEF 4 +AB unique = ABCDEF 6 +C = ACDFG 5 +ABC unique = ABCDEFG 7 + +Venn AB +A I B = ADE 2 +A \ B = BC 2 +B \ A = F 1 +Sum = 6 + +Venn ABC +A \ ( B U C ) = B 1 +B \ ( A U C ) = 0 +C \ ( A U B ) = G 1 +A I B \ C = E 1 +A I C \ B = C 1 +B I C \ A = F 1 +A I B I C = AD 2 +Sum = 7 + +A I B = ADE 3 +A I C = ACD 3 +B I C = ADF 3 + + +''' + + +class Test(unittest.TestCase): + + + def testTwo(self): + venn = venner.Venn2("", 300, venner.VennFile("testFiles/fileA.tab",1,"As"), venner.VennFile("testFiles/fileB.tab", 1, "Bs")) + url = venn.toUrl() + self.assertEquals("""http://chart.apis.google.com/chart?cht=v&chd=t:56,44,0,33&chtt=&chs=300x300&chdl=As|Bs""", url) + actual = venn.toHtml() + expected = open('testFiles/out.2.expected.html', 'r').read() + self.assertEquals(expected, actual) + + def testThree(self): + venn = venner.Venn3("", 300, venner.VennFile("testFiles/fileA.tab",1,"As"), venner.VennFile("testFiles/fileB.tab", 1, "Bs"), venner.VennFile("testFiles/fileC.tab", 1, "Cs")) + url = venn.toUrl() + self.assertEquals("""http://chart.apis.google.com/chart?cht=v&chd=t:36,29,36,21,21,21,14&chtt=&chs=300x300&chdl=As|Bs|Cs""",url) + actual = venn.toHtml() + expected = open('testFiles/out.3.expected.html', 'r').read() + self.assertEquals(expected, actual) + + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file