Repository 'prop_venn'
hg clone https://toolshed.g2.bx.psu.edu/repos/idot/prop_venn

Changeset 0:8ea9b4e5a389 (2011-06-22)
Next changeset 1:cc6707a1e044 (2012-09-24)
Commit message:
Uploaded
added:
README.md
__init__.py
prop_venn.xml
testFiles/fileA.tab
testFiles/fileB.tab
testFiles/fileC.tab
testFiles/out.2.expected.html
testFiles/out.3.expected.html
venner.py
vennerTest.py
b
diff -r 000000000000 -r 8ea9b4e5a389 README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Wed Jun 22 03:28:25 2011 -0400
b
@@ -0,0 +1,31 @@
+prop_venn
+---------
+
+crates proportional venn diagrams from 2 or 3 files using google charts.
+pure python implementation
+
+contains galaxy tool xml (prop_venn.xml)
+
+cli interface expects all arguments for the inputs concatenated
+with , i.e. file1,file2,file3  ...
+
+contains an integration test for the cli interface (vennerTest.py), no galaxy unit tests.
+
+
+
+Proportional Venn Diagram:
+--------------------------
+Creates a proportional Venn diagram from 2 or 3 tab delimited input files.
+The 0 based column index decides which column is used for extracting the sets to compare between the files.
+Multiple items with the same value per column are counted as one.
+
+Requirements:
+-------------
+- 2-3 tab delimited input files.
+- 0 based index of the columns to extract for each file
+
+Outputs:
+--------
+- Html page with the proportional Venn diagram and a table for the counts in each section.
+
+
b
diff -r 000000000000 -r 8ea9b4e5a389 prop_venn.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/prop_venn.xml Wed Jun 22 03:28:25 2011 -0400
[
@@ -0,0 +1,75 @@
+<tool id="prop_venn" name="proportional venn" version="0.4">
+<description> from 2-3 sets</description>
+<command interpreter="python">venner.py
+ #if str( $twoThree['tt']) == 'three':
+ --files $inputFile1,$inputFile2,$twoThree.inputFile3
+ --columns $column1,$column2,$twoThree.column3
+        --asNames $asName1,$asName2,$twoThree.asName3
+    #else:
+     --file $inputFile1,$inputFile2
+        --columns $column1,$column2
+        --asNames $asName1,$asName2
+    #end if
+     --title '$title'
+     --size $size
+     --outname $outPath
+</command>
+<inputs>
+  <param name="title" label="title" type="text" help="title of plot" optional="true" value=""/>
+  <param name="name" label="name" type="text" help="name of output file" value="venn of NA"/>
+  <param name="size" label="size" type="integer" help="size of plot ( &lt; 540)" optional="true" value="300"/>
+  
+  <param format="tabular" name="inputFile1" label="input file 1" type="data" help="tabular input file" optional="false"/>
+  <param name="column1" label="column index" type="integer" help="0 based index of element column" optional="false" value="0"/>
+  <param name="asName1" label="as name" type="text" help="nice name for columns on plot" optional="false" value="A"/>
+  
+  <param format="tabular2" name="inputFile2" label="input file 2" type="data" help="tabular input file" optional="false"/>
+  <param name="column2" label="column index file 2" type="integer" help="0 based index of element column" optional="false" value="0"/>
+  <param name="asName2" label="as name file 2" type="text" help="nice name for columns on plot" optional="false" value="B"/>
+  
+  <conditional name="twoThree">
+  <param name="tt" type="select" label="two or three">
+        <option value="two" selected="true">two</option>
+        <option value="three">three</option>
+  </param>
+  <when value="two">
+  </when>
+  <when value="three">
+     <param format="tabular" name="inputFile3" label="input file 3" type="data" help="tabular input file" optional="false"/>
+     <param name="column3" label="column index file 3" type="integer" help="0 based index of element column" optional="false" value="0"/>
+     <param name="asName3" label="as name file 3" type="text" help="nice name for columns on plot" optional="false" value="C"/>
+  </when>
+</conditional>
+</inputs>
+
+<outputs>
+  <data name="outPath" format="html" label="${name}"/>
+</outputs>
+
+<help>
+
+Proportional Venn Diagram:
+--------------------------
+Creates a proportional Venn diagram from 2 or 3 tab delimited input files.
+The 0 based column index decides which column is used for extracting the sets to compare between the files.
+Multiple items with the same value per column are counted as one.
+
+Requirements:
+-------------
+- 2-3 tab delimited input files.
+- 0 based index of the columns to extract for each file
+
+Outputs:
+--------
+- Html page with the proportional Venn diagram and a table for the counts in each section.
+
+Please turn to *Ido Tamir* in case of problems or suggestions for this tool.
+
+
+
+</help>
+
+</tool>
+
+
+
b
diff -r 000000000000 -r 8ea9b4e5a389 testFiles/fileA.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/testFiles/fileA.tab Wed Jun 22 03:28:25 2011 -0400
b
@@ -0,0 +1,7 @@
+1 A
+2 B
+3 C
+4 D
+6 A
+7 A
+8 E
b
diff -r 000000000000 -r 8ea9b4e5a389 testFiles/fileB.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/testFiles/fileB.tab Wed Jun 22 03:28:25 2011 -0400
b
@@ -0,0 +1,4 @@
+1 D
+2 A
+3 E
+5 F
\ No newline at end of file
b
diff -r 000000000000 -r 8ea9b4e5a389 testFiles/fileC.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/testFiles/fileC.tab Wed Jun 22 03:28:25 2011 -0400
b
@@ -0,0 +1,6 @@
+4 C
+5 D
+2 A
+3 A
+8 F
+9 G
\ No newline at end of file
b
diff -r 000000000000 -r 8ea9b4e5a389 testFiles/out.2.expected.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/testFiles/out.2.expected.html Wed Jun 22 03:28:25 2011 -0400
b
@@ -0,0 +1,22 @@
+
+<html>
+  <head>
+     <title>Venn diagram </title>
+  </head>
+  <body>
+     <h3></h3>
+     <div>
+        <img src="http://chart.apis.google.com/chart?cht=v&chd=t:56,44,0,33&chtt=&chs=300x300&chdl=As|Bs"/>
+     </div>
+     <div>
+        <table>
+           <tr><th>Segment</th><th>Size</th></tr>
+           <tr><td>As</td><td>5</td></tr>
+           <tr><td>Bs</td><td>4</td></tr>
+           <tr><td>As \ Bs</td><td>2</td></tr>
+           <tr><td>Bs \ As</td><td>1</td></tr>
+           <tr><td>As &cap; Bs</td><td>3</td></tr>
+        </table>
+     </div>
+  </body>
+</html>
\ No newline at end of file
b
diff -r 000000000000 -r 8ea9b4e5a389 testFiles/out.3.expected.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/testFiles/out.3.expected.html Wed Jun 22 03:28:25 2011 -0400
b
@@ -0,0 +1,27 @@
+
+<html>
+  <head>
+     <title>Venn diagram </title>
+  </head>
+  <body>
+     <h3></h3>
+     <div>
+        <img src="http://chart.apis.google.com/chart?cht=v&chd=t:36,29,36,21,21,21,14&chtt=&chs=300x300&chdl=As|Bs|Cs"/>
+     </div>
+     <div>
+        <table>
+           <tr><th>Segment</th><th>Size</th></tr>
+           <tr><td>As</td><td>5</td></tr>
+           <tr><td>Bs</td><td>4</td></tr>
+           <tr><td>Cs</td><td>5</td></tr>
+           <tr><td>As \ (Bs &cup; Cs)</td><td>1</td></tr>
+           <tr><td>Bs \ (As &cup; Cs)</td><td>0</td></tr>
+           <tr><td>Cs \ (As &cup; Bs)</td><td>1</td></tr>
+           <tr><td>As &cap; Bs \ Cs </td><td>1</td></tr>
+           <tr><td>As &cap; Cs \ Bs </td><td>1</td></tr>
+           <tr><td>Bs &cap; Cs \ As </td><td>1</td></tr>
+           <tr><td>As &cap; Bs &cap; Cs</td><td>2</td></tr>
+        </table>
+     </div>
+  </body>
+</html>
\ No newline at end of file
b
diff -r 000000000000 -r 8ea9b4e5a389 venner.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/venner.py Wed Jun 22 03:28:25 2011 -0400
[
b'@@ -0,0 +1,251 @@\n+#!/usr/bin/env python\n+\'\'\'\n+Created on Jul 13, 2010\n+\n+Generates Venn diagram from 2 or three input files\n+\n+one could generate output file that for each row contain the segment\n+i.e. A, B, AB, AmB, BmA\n+\n+@author: Ido M. Tamir\n+\'\'\'\n+from mako.template import Template\n+from optparse import OptionParser\n+import urllib,re,sys\n+\n+class Bunch:\n+    def __init__(self, d):\n+        for k, v in d.items():\n+            if isinstance(v, dict):\n+                v = Bunch(v)\n+            self.__dict__[k] = v\n+\n+class VennFile():\n+    def __init__(self, filePath, column, name):\n+        self.filePath = filePath\n+        self.column = column\n+        cleanname = re.sub("/","",name)\n+        self.name = urllib.quote(cleanname, safe="%/:=&?~#+!$,;\'@()*[]")\n+        self.dict = {}\n+    def read(self):\n+        dict = {}\n+        lineNr = 0\n+        for line in open( self.filePath, \'rb\'):\n+            key = line.split("\\t")[self.column].strip()\n+            linesList = dict.get(key, [])\n+            linesList.append(line+"\\t"+str(lineNr))\n+            dict[key] = linesList\n+            lineNr += 1\n+        self.dict = dict\n+        return self\n+\n+class Venn2:\n+    def __init__(self, title, size, one, two):\n+        self.one = one.read()\n+        self.two = two.read()\n+        self.title = title\n+        self.size = size\n+    \n+    def toUrl(self):\n+        one_keys = set(self.one.dict.keys())\n+        two_keys = set(self.two.dict.keys())\n+        \n+        keys_one_i_two = one_keys.intersection(two_keys)\n+       \n+        total = len(one_keys) + len(two_keys)\n+        sizes = [len(one_keys), len(two_keys), 0, len(keys_one_i_two)]\n+        sizes = self.relSizes(sizes, total)\n+        names = [self.one.name, self.two.name]\n+        return self.url(total, sizes, names)\n+  \n+    def relSizes(self, sizes, total):\n+        return map(lambda s: str(int(round((s/float(total) * 100)))), sizes)  \n+\n+    def url(self, total, sizes, names):\n+        base = "http://chart.apis.google.com/chart?cht=v&chd=t:"\n+        counts = ",".join(sizes)\n+        titlep = "&chtt="+self.title\n+        size = "&chs="+str(self.size)+"x"+str(self.size)\n+        legend = "&chdl="+"|".join(names)\n+        url = base+counts+titlep+size+legend\n+        return url\n+    \n+    def toHtml(self):\n+        one_keys = set(self.one.dict.keys())\n+        two_keys = set(self.two.dict.keys())\n+       \n+        numbers = Bunch({\n+        "one_keys" : len(set(self.one.dict.keys())),\n+        "two_keys" : len(set(self.two.dict.keys())),\n+        "one_only" : len(one_keys.difference(two_keys)),\n+        "two_only" : len(two_keys.difference(one_keys)),\n+        "one_i_two" : len(one_keys.intersection(two_keys)),\n+        })\n+            \n+        template = """\n+<html>\n+  <head>\n+     <title>Venn diagram ${title}</title>\n+  </head>\n+  <body>\n+     <h3>${ title }</h3>\n+     <div>\n+        <img src="${ url }"/>\n+     </div>\n+     <div>\n+        <table>\n+           <tr><th>Segment</th><th>Count</th></tr>\n+           <tr><td>${ one }</td><td>${ n.one_keys }</td></tr>\n+           <tr><td>${ two }</td><td>${ n.two_keys }</td></tr>\n+           <tr><td>${ one } \\ ${ two }</td><td>${ n.one_only }</td></tr>\n+           <tr><td>${ two } \\ ${ one }</td><td>${ n.two_only }</td></tr>\n+           <tr><td>${ one } &cap; ${ two }</td><td>${ n.one_i_two }</td></tr>\n+        </table>\n+     </div>\n+  </body>\n+</html>"""\n+        result = Template(template).render(one=self.one.name, two=self.two.name, n=numbers, title=self.title, url=self.toUrl())\n+        return(result)\n+\n+\n+\n+class Venn3(Venn2):\n+    def __init__(self, title, size, one, two, three):\n+        Venn2.__init__(self, title, size, one, two)\n+        self.three = three.read()\n+           \n+    def toUrl(self):\n+        one_keys = set(self.one.dict.keys())\n+        two_keys = set(self.two.dict.keys())\n+        three_keys = set(self.three.dict.keys())\n+\n+        keys_one_i_two = one_keys.intersection(two_keys)\n+        keys_one_i_three'..b' <table>\n+           <tr><th>Segment</th><th>Count</th></tr>\n+           <tr><td>${ one }</td><td>${ n.one_keys }</td></tr>\n+           <tr><td>${ two }</td><td>${ n.two_keys }</td></tr>\n+           <tr><td>${ three }</td><td>${ n.three_keys }</td></tr>\n+           <tr><td>${ one } \\ (${ two } &cup; ${ three })</td><td>${ n.one_only }</td></tr>\n+           <tr><td>${ two } \\ (${ one } &cup; ${ three})</td><td>${ n.two_only }</td></tr>\n+           <tr><td>${ three } \\ (${ one } &cup; ${ two })</td><td>${ n.three_only }</td></tr>\n+           <tr><td>${ one } &cap; ${ two } \\ ${ three } </td><td>${ n.one_two }</td></tr>\n+           <tr><td>${ one } &cap; ${ three } \\ ${ two } </td><td>${ n.one_three }</td></tr>\n+           <tr><td>${ two } &cap; ${ three } \\ ${ one } </td><td>${ n.two_three }</td></tr>\n+           <tr><td>${ one } &cap; ${ two } &cap; ${ three }</td><td>${ n.one_i_two_i_three }</td></tr>\n+        </table>\n+     </div>\n+  </body>\n+</html>"""\n+        result = Template(template).render(one=self.one.name, two=self.two.name, three=self.three.name, n=numbers, title=self.title, url=self.toUrl())\n+        return(result)\n+\n+\n+\n+\n+def main():\n+        \'\'\'main worker func\'\'\'\n+        parser = OptionParser()\n+        parser.add_option( "--files", dest="filePaths", help="file paths delimited by ,")\n+        parser.add_option( "--columns", dest="columns", help="0 based columnIndices delimited by ,")\n+        parser.add_option( "--asNames", dest="asNames", help="names of the columns for pretty print")\n+        parser.add_option( "--title", dest="title", help="title of plot")\n+        parser.add_option( "--size", dest="size", help="size plot, default 300")\n+        parser.add_option( "--outname", dest="outfileHtml", help="path of generated html file")\n+        \n+        (o, args) = parser.parse_args()\n+        errors = []\n+        if o.filePaths is None:\n+            errors.append("please add required paths to files")\n+        if o.columns is None:\n+            errors.append( "please add required columns" )\n+        if o.asNames is None:\n+            errors.append( "please add required asNames")\n+        if len(errors) > 0:\n+            print("\\n".join(errors))\n+            sys.exit()\n+        filePaths = o.filePaths.split(",")\n+        columns = o.columns.split(",")\n+        columns = map(int, columns)\n+        asNames = o.asNames.split(",")\n+        if len(errors) > 0 and ( len(filePaths) != len(columns) or len(columns) != len(asNames) ):\n+            errors.append( "different length of filePaths, columns or names:" +o.columns+" "+" "+o.names+" "+o.filePaths )\n+        title = ""\n+        if o.title:\n+            title = o.title\n+        if o.outfileHtml is None:\n+            errors.append( "please add outfile name for html" )\n+        if len(filePaths) > 3:\n+            errors.append( "can only compare up to three files was:"+str(len(filePaths)))\n+        if len(filePaths) == 1:\n+            errors.append( "just one file to compare does not make sense!")\n+        if len(errors) > 0:\n+            print("\\n".join(errors))\n+            sys.exit()\n+        \n+        size = "300"\n+        if o.size:\n+            size = o.size\n+        \n+        fileCount = len(filePaths)\n+        if fileCount == 2:\n+            venn = Venn2(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1]))\n+        else:\n+            venn = Venn3(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1]), VennFile(filePaths[2],columns[2],asNames[2]))\n+        htmlText = venn.toHtml()\n+        html = open(o.outfileHtml, \'w\')\n+        try:\n+            html.write(htmlText)\n+        finally:\n+            html.close()\n+\n+               \n+        \n+        \n+if __name__ == \'__main__\':\n+      main()\n+\n+#$ python venner.py --files testFiles/fileA.tab,testFiles/fileB.tab --columns 1,1 --outname out.html --asNames As,Bs\n+                                               \n'
b
diff -r 000000000000 -r 8ea9b4e5a389 vennerTest.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/vennerTest.py Wed Jun 22 03:28:25 2011 -0400
b
@@ -0,0 +1,86 @@
+'''
+Created on Jul 13, 2010
+
+@author: Ido M. Tamir
+'''
+import unittest
+import venner
+
+'''By hand:
+A:
+1    A
+2    B
+3    C
+4    D
+6    A
+7    A
+8    E
+
+B:
+1    D
+2    A
+3    E
+5    F
+
+C:
+4    C
+5    D
+2    A
+3    A
+8    F
+9    G
+
+A B C:
+A = ABCDE 5
+B = ADEF 4
+AB unique = ABCDEF 6
+C = ACDFG 5
+ABC unique = ABCDEFG 7
+
+Venn AB
+A I B = ADE 2
+A \ B = BC 2
+B \ A = F 1
+Sum = 6
+
+Venn ABC
+A \ ( B U C ) = B 1
+B \ ( A U C ) =   0
+C \ ( A U B ) = G 1
+A I B \ C = E 1
+A I C \ B = C 1
+B I C \ A = F 1
+A I B I C = AD 2
+Sum = 7
+
+A I B = ADE 3
+A I C = ACD 3
+B I C = ADF 3
+
+
+'''
+
+
+class Test(unittest.TestCase):
+
+
+    def testTwo(self):
+         venn = venner.Venn2("", 300, venner.VennFile("testFiles/fileA.tab",1,"As"), venner.VennFile("testFiles/fileB.tab", 1, "Bs"))
+         url = venn.toUrl()
+         self.assertEquals("""http://chart.apis.google.com/chart?cht=v&chd=t:56,44,0,33&chtt=&chs=300x300&chdl=As|Bs""", url)
+         actual = venn.toHtml()
+         expected = open('testFiles/out.2.expected.html', 'r').read()
+         self.assertEquals(expected, actual)
+         
+    def testThree(self):
+         venn = venner.Venn3("", 300, venner.VennFile("testFiles/fileA.tab",1,"As"), venner.VennFile("testFiles/fileB.tab", 1, "Bs"), venner.VennFile("testFiles/fileC.tab", 1, "Cs"))
+         url = venn.toUrl()
+         self.assertEquals("""http://chart.apis.google.com/chart?cht=v&chd=t:36,29,36,21,21,21,14&chtt=&chs=300x300&chdl=As|Bs|Cs""",url)
+         actual = venn.toHtml()
+         expected = open('testFiles/out.3.expected.html', 'r').read()
+         self.assertEquals(expected, actual)
+                  
+         
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file