Next changeset 1:cc6707a1e044 (2012-09-24) |
Commit message:
Uploaded |
added:
README.md __init__.py prop_venn.xml testFiles/fileA.tab testFiles/fileB.tab testFiles/fileC.tab testFiles/out.2.expected.html testFiles/out.3.expected.html venner.py vennerTest.py |
b |
diff -r 000000000000 -r 8ea9b4e5a389 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Wed Jun 22 03:28:25 2011 -0400 |
b |
@@ -0,0 +1,31 @@ +prop_venn +--------- + +crates proportional venn diagrams from 2 or 3 files using google charts. +pure python implementation + +contains galaxy tool xml (prop_venn.xml) + +cli interface expects all arguments for the inputs concatenated +with , i.e. file1,file2,file3 ... + +contains an integration test for the cli interface (vennerTest.py), no galaxy unit tests. + + + +Proportional Venn Diagram: +-------------------------- +Creates a proportional Venn diagram from 2 or 3 tab delimited input files. +The 0 based column index decides which column is used for extracting the sets to compare between the files. +Multiple items with the same value per column are counted as one. + +Requirements: +------------- +- 2-3 tab delimited input files. +- 0 based index of the columns to extract for each file + +Outputs: +-------- +- Html page with the proportional Venn diagram and a table for the counts in each section. + + |
b |
diff -r 000000000000 -r 8ea9b4e5a389 prop_venn.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prop_venn.xml Wed Jun 22 03:28:25 2011 -0400 |
[ |
@@ -0,0 +1,75 @@ +<tool id="prop_venn" name="proportional venn" version="0.4"> +<description> from 2-3 sets</description> +<command interpreter="python">venner.py + #if str( $twoThree['tt']) == 'three': + --files $inputFile1,$inputFile2,$twoThree.inputFile3 + --columns $column1,$column2,$twoThree.column3 + --asNames $asName1,$asName2,$twoThree.asName3 + #else: + --file $inputFile1,$inputFile2 + --columns $column1,$column2 + --asNames $asName1,$asName2 + #end if + --title '$title' + --size $size + --outname $outPath +</command> +<inputs> + <param name="title" label="title" type="text" help="title of plot" optional="true" value=""/> + <param name="name" label="name" type="text" help="name of output file" value="venn of NA"/> + <param name="size" label="size" type="integer" help="size of plot ( < 540)" optional="true" value="300"/> + + <param format="tabular" name="inputFile1" label="input file 1" type="data" help="tabular input file" optional="false"/> + <param name="column1" label="column index" type="integer" help="0 based index of element column" optional="false" value="0"/> + <param name="asName1" label="as name" type="text" help="nice name for columns on plot" optional="false" value="A"/> + + <param format="tabular2" name="inputFile2" label="input file 2" type="data" help="tabular input file" optional="false"/> + <param name="column2" label="column index file 2" type="integer" help="0 based index of element column" optional="false" value="0"/> + <param name="asName2" label="as name file 2" type="text" help="nice name for columns on plot" optional="false" value="B"/> + + <conditional name="twoThree"> + <param name="tt" type="select" label="two or three"> + <option value="two" selected="true">two</option> + <option value="three">three</option> + </param> + <when value="two"> + </when> + <when value="three"> + <param format="tabular" name="inputFile3" label="input file 3" type="data" help="tabular input file" optional="false"/> + <param name="column3" label="column index file 3" type="integer" help="0 based index of element column" optional="false" value="0"/> + <param name="asName3" label="as name file 3" type="text" help="nice name for columns on plot" optional="false" value="C"/> + </when> +</conditional> +</inputs> + +<outputs> + <data name="outPath" format="html" label="${name}"/> +</outputs> + +<help> + +Proportional Venn Diagram: +-------------------------- +Creates a proportional Venn diagram from 2 or 3 tab delimited input files. +The 0 based column index decides which column is used for extracting the sets to compare between the files. +Multiple items with the same value per column are counted as one. + +Requirements: +------------- +- 2-3 tab delimited input files. +- 0 based index of the columns to extract for each file + +Outputs: +-------- +- Html page with the proportional Venn diagram and a table for the counts in each section. + +Please turn to *Ido Tamir* in case of problems or suggestions for this tool. + + + +</help> + +</tool> + + + |
b |
diff -r 000000000000 -r 8ea9b4e5a389 testFiles/fileA.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testFiles/fileA.tab Wed Jun 22 03:28:25 2011 -0400 |
b |
@@ -0,0 +1,7 @@ +1 A +2 B +3 C +4 D +6 A +7 A +8 E |
b |
diff -r 000000000000 -r 8ea9b4e5a389 testFiles/fileB.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testFiles/fileB.tab Wed Jun 22 03:28:25 2011 -0400 |
b |
@@ -0,0 +1,4 @@ +1 D +2 A +3 E +5 F \ No newline at end of file |
b |
diff -r 000000000000 -r 8ea9b4e5a389 testFiles/fileC.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testFiles/fileC.tab Wed Jun 22 03:28:25 2011 -0400 |
b |
@@ -0,0 +1,6 @@ +4 C +5 D +2 A +3 A +8 F +9 G \ No newline at end of file |
b |
diff -r 000000000000 -r 8ea9b4e5a389 testFiles/out.2.expected.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testFiles/out.2.expected.html Wed Jun 22 03:28:25 2011 -0400 |
b |
@@ -0,0 +1,22 @@ + +<html> + <head> + <title>Venn diagram </title> + </head> + <body> + <h3></h3> + <div> + <img src="http://chart.apis.google.com/chart?cht=v&chd=t:56,44,0,33&chtt=&chs=300x300&chdl=As|Bs"/> + </div> + <div> + <table> + <tr><th>Segment</th><th>Size</th></tr> + <tr><td>As</td><td>5</td></tr> + <tr><td>Bs</td><td>4</td></tr> + <tr><td>As \ Bs</td><td>2</td></tr> + <tr><td>Bs \ As</td><td>1</td></tr> + <tr><td>As ∩ Bs</td><td>3</td></tr> + </table> + </div> + </body> +</html> \ No newline at end of file |
b |
diff -r 000000000000 -r 8ea9b4e5a389 testFiles/out.3.expected.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testFiles/out.3.expected.html Wed Jun 22 03:28:25 2011 -0400 |
b |
@@ -0,0 +1,27 @@ + +<html> + <head> + <title>Venn diagram </title> + </head> + <body> + <h3></h3> + <div> + <img src="http://chart.apis.google.com/chart?cht=v&chd=t:36,29,36,21,21,21,14&chtt=&chs=300x300&chdl=As|Bs|Cs"/> + </div> + <div> + <table> + <tr><th>Segment</th><th>Size</th></tr> + <tr><td>As</td><td>5</td></tr> + <tr><td>Bs</td><td>4</td></tr> + <tr><td>Cs</td><td>5</td></tr> + <tr><td>As \ (Bs ∪ Cs)</td><td>1</td></tr> + <tr><td>Bs \ (As ∪ Cs)</td><td>0</td></tr> + <tr><td>Cs \ (As ∪ Bs)</td><td>1</td></tr> + <tr><td>As ∩ Bs \ Cs </td><td>1</td></tr> + <tr><td>As ∩ Cs \ Bs </td><td>1</td></tr> + <tr><td>Bs ∩ Cs \ As </td><td>1</td></tr> + <tr><td>As ∩ Bs ∩ Cs</td><td>2</td></tr> + </table> + </div> + </body> +</html> \ No newline at end of file |
b |
diff -r 000000000000 -r 8ea9b4e5a389 venner.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/venner.py Wed Jun 22 03:28:25 2011 -0400 |
[ |
b'@@ -0,0 +1,251 @@\n+#!/usr/bin/env python\n+\'\'\'\n+Created on Jul 13, 2010\n+\n+Generates Venn diagram from 2 or three input files\n+\n+one could generate output file that for each row contain the segment\n+i.e. A, B, AB, AmB, BmA\n+\n+@author: Ido M. Tamir\n+\'\'\'\n+from mako.template import Template\n+from optparse import OptionParser\n+import urllib,re,sys\n+\n+class Bunch:\n+ def __init__(self, d):\n+ for k, v in d.items():\n+ if isinstance(v, dict):\n+ v = Bunch(v)\n+ self.__dict__[k] = v\n+\n+class VennFile():\n+ def __init__(self, filePath, column, name):\n+ self.filePath = filePath\n+ self.column = column\n+ cleanname = re.sub("/","",name)\n+ self.name = urllib.quote(cleanname, safe="%/:=&?~#+!$,;\'@()*[]")\n+ self.dict = {}\n+ def read(self):\n+ dict = {}\n+ lineNr = 0\n+ for line in open( self.filePath, \'rb\'):\n+ key = line.split("\\t")[self.column].strip()\n+ linesList = dict.get(key, [])\n+ linesList.append(line+"\\t"+str(lineNr))\n+ dict[key] = linesList\n+ lineNr += 1\n+ self.dict = dict\n+ return self\n+\n+class Venn2:\n+ def __init__(self, title, size, one, two):\n+ self.one = one.read()\n+ self.two = two.read()\n+ self.title = title\n+ self.size = size\n+ \n+ def toUrl(self):\n+ one_keys = set(self.one.dict.keys())\n+ two_keys = set(self.two.dict.keys())\n+ \n+ keys_one_i_two = one_keys.intersection(two_keys)\n+ \n+ total = len(one_keys) + len(two_keys)\n+ sizes = [len(one_keys), len(two_keys), 0, len(keys_one_i_two)]\n+ sizes = self.relSizes(sizes, total)\n+ names = [self.one.name, self.two.name]\n+ return self.url(total, sizes, names)\n+ \n+ def relSizes(self, sizes, total):\n+ return map(lambda s: str(int(round((s/float(total) * 100)))), sizes) \n+\n+ def url(self, total, sizes, names):\n+ base = "http://chart.apis.google.com/chart?cht=v&chd=t:"\n+ counts = ",".join(sizes)\n+ titlep = "&chtt="+self.title\n+ size = "&chs="+str(self.size)+"x"+str(self.size)\n+ legend = "&chdl="+"|".join(names)\n+ url = base+counts+titlep+size+legend\n+ return url\n+ \n+ def toHtml(self):\n+ one_keys = set(self.one.dict.keys())\n+ two_keys = set(self.two.dict.keys())\n+ \n+ numbers = Bunch({\n+ "one_keys" : len(set(self.one.dict.keys())),\n+ "two_keys" : len(set(self.two.dict.keys())),\n+ "one_only" : len(one_keys.difference(two_keys)),\n+ "two_only" : len(two_keys.difference(one_keys)),\n+ "one_i_two" : len(one_keys.intersection(two_keys)),\n+ })\n+ \n+ template = """\n+<html>\n+ <head>\n+ <title>Venn diagram ${title}</title>\n+ </head>\n+ <body>\n+ <h3>${ title }</h3>\n+ <div>\n+ <img src="${ url }"/>\n+ </div>\n+ <div>\n+ <table>\n+ <tr><th>Segment</th><th>Count</th></tr>\n+ <tr><td>${ one }</td><td>${ n.one_keys }</td></tr>\n+ <tr><td>${ two }</td><td>${ n.two_keys }</td></tr>\n+ <tr><td>${ one } \\ ${ two }</td><td>${ n.one_only }</td></tr>\n+ <tr><td>${ two } \\ ${ one }</td><td>${ n.two_only }</td></tr>\n+ <tr><td>${ one } ∩ ${ two }</td><td>${ n.one_i_two }</td></tr>\n+ </table>\n+ </div>\n+ </body>\n+</html>"""\n+ result = Template(template).render(one=self.one.name, two=self.two.name, n=numbers, title=self.title, url=self.toUrl())\n+ return(result)\n+\n+\n+\n+class Venn3(Venn2):\n+ def __init__(self, title, size, one, two, three):\n+ Venn2.__init__(self, title, size, one, two)\n+ self.three = three.read()\n+ \n+ def toUrl(self):\n+ one_keys = set(self.one.dict.keys())\n+ two_keys = set(self.two.dict.keys())\n+ three_keys = set(self.three.dict.keys())\n+\n+ keys_one_i_two = one_keys.intersection(two_keys)\n+ keys_one_i_three'..b' <table>\n+ <tr><th>Segment</th><th>Count</th></tr>\n+ <tr><td>${ one }</td><td>${ n.one_keys }</td></tr>\n+ <tr><td>${ two }</td><td>${ n.two_keys }</td></tr>\n+ <tr><td>${ three }</td><td>${ n.three_keys }</td></tr>\n+ <tr><td>${ one } \\ (${ two } ∪ ${ three })</td><td>${ n.one_only }</td></tr>\n+ <tr><td>${ two } \\ (${ one } ∪ ${ three})</td><td>${ n.two_only }</td></tr>\n+ <tr><td>${ three } \\ (${ one } ∪ ${ two })</td><td>${ n.three_only }</td></tr>\n+ <tr><td>${ one } ∩ ${ two } \\ ${ three } </td><td>${ n.one_two }</td></tr>\n+ <tr><td>${ one } ∩ ${ three } \\ ${ two } </td><td>${ n.one_three }</td></tr>\n+ <tr><td>${ two } ∩ ${ three } \\ ${ one } </td><td>${ n.two_three }</td></tr>\n+ <tr><td>${ one } ∩ ${ two } ∩ ${ three }</td><td>${ n.one_i_two_i_three }</td></tr>\n+ </table>\n+ </div>\n+ </body>\n+</html>"""\n+ result = Template(template).render(one=self.one.name, two=self.two.name, three=self.three.name, n=numbers, title=self.title, url=self.toUrl())\n+ return(result)\n+\n+\n+\n+\n+def main():\n+ \'\'\'main worker func\'\'\'\n+ parser = OptionParser()\n+ parser.add_option( "--files", dest="filePaths", help="file paths delimited by ,")\n+ parser.add_option( "--columns", dest="columns", help="0 based columnIndices delimited by ,")\n+ parser.add_option( "--asNames", dest="asNames", help="names of the columns for pretty print")\n+ parser.add_option( "--title", dest="title", help="title of plot")\n+ parser.add_option( "--size", dest="size", help="size plot, default 300")\n+ parser.add_option( "--outname", dest="outfileHtml", help="path of generated html file")\n+ \n+ (o, args) = parser.parse_args()\n+ errors = []\n+ if o.filePaths is None:\n+ errors.append("please add required paths to files")\n+ if o.columns is None:\n+ errors.append( "please add required columns" )\n+ if o.asNames is None:\n+ errors.append( "please add required asNames")\n+ if len(errors) > 0:\n+ print("\\n".join(errors))\n+ sys.exit()\n+ filePaths = o.filePaths.split(",")\n+ columns = o.columns.split(",")\n+ columns = map(int, columns)\n+ asNames = o.asNames.split(",")\n+ if len(errors) > 0 and ( len(filePaths) != len(columns) or len(columns) != len(asNames) ):\n+ errors.append( "different length of filePaths, columns or names:" +o.columns+" "+" "+o.names+" "+o.filePaths )\n+ title = ""\n+ if o.title:\n+ title = o.title\n+ if o.outfileHtml is None:\n+ errors.append( "please add outfile name for html" )\n+ if len(filePaths) > 3:\n+ errors.append( "can only compare up to three files was:"+str(len(filePaths)))\n+ if len(filePaths) == 1:\n+ errors.append( "just one file to compare does not make sense!")\n+ if len(errors) > 0:\n+ print("\\n".join(errors))\n+ sys.exit()\n+ \n+ size = "300"\n+ if o.size:\n+ size = o.size\n+ \n+ fileCount = len(filePaths)\n+ if fileCount == 2:\n+ venn = Venn2(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1]))\n+ else:\n+ venn = Venn3(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1]), VennFile(filePaths[2],columns[2],asNames[2]))\n+ htmlText = venn.toHtml()\n+ html = open(o.outfileHtml, \'w\')\n+ try:\n+ html.write(htmlText)\n+ finally:\n+ html.close()\n+\n+ \n+ \n+ \n+if __name__ == \'__main__\':\n+ main()\n+\n+#$ python venner.py --files testFiles/fileA.tab,testFiles/fileB.tab --columns 1,1 --outname out.html --asNames As,Bs\n+ \n' |
b |
diff -r 000000000000 -r 8ea9b4e5a389 vennerTest.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vennerTest.py Wed Jun 22 03:28:25 2011 -0400 |
b |
@@ -0,0 +1,86 @@ +''' +Created on Jul 13, 2010 + +@author: Ido M. Tamir +''' +import unittest +import venner + +'''By hand: +A: +1 A +2 B +3 C +4 D +6 A +7 A +8 E + +B: +1 D +2 A +3 E +5 F + +C: +4 C +5 D +2 A +3 A +8 F +9 G + +A B C: +A = ABCDE 5 +B = ADEF 4 +AB unique = ABCDEF 6 +C = ACDFG 5 +ABC unique = ABCDEFG 7 + +Venn AB +A I B = ADE 2 +A \ B = BC 2 +B \ A = F 1 +Sum = 6 + +Venn ABC +A \ ( B U C ) = B 1 +B \ ( A U C ) = 0 +C \ ( A U B ) = G 1 +A I B \ C = E 1 +A I C \ B = C 1 +B I C \ A = F 1 +A I B I C = AD 2 +Sum = 7 + +A I B = ADE 3 +A I C = ACD 3 +B I C = ADF 3 + + +''' + + +class Test(unittest.TestCase): + + + def testTwo(self): + venn = venner.Venn2("", 300, venner.VennFile("testFiles/fileA.tab",1,"As"), venner.VennFile("testFiles/fileB.tab", 1, "Bs")) + url = venn.toUrl() + self.assertEquals("""http://chart.apis.google.com/chart?cht=v&chd=t:56,44,0,33&chtt=&chs=300x300&chdl=As|Bs""", url) + actual = venn.toHtml() + expected = open('testFiles/out.2.expected.html', 'r').read() + self.assertEquals(expected, actual) + + def testThree(self): + venn = venner.Venn3("", 300, venner.VennFile("testFiles/fileA.tab",1,"As"), venner.VennFile("testFiles/fileB.tab", 1, "Bs"), venner.VennFile("testFiles/fileC.tab", 1, "Cs")) + url = venn.toUrl() + self.assertEquals("""http://chart.apis.google.com/chart?cht=v&chd=t:36,29,36,21,21,21,14&chtt=&chs=300x300&chdl=As|Bs|Cs""",url) + actual = venn.toHtml() + expected = open('testFiles/out.3.expected.html', 'r').read() + self.assertEquals(expected, actual) + + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file |