comparison tools/venn_list/venn_list.py @ 8:ee50d9ef9d69 draft

v0.0.11 Python 3 compatible print; capture script version
author peterjc
date Thu, 11 May 2017 06:21:20 -0400
parents ea68a1a4c1d9
children 20d347feb882
comparison
equal deleted inserted replaced
7:ba31415fedc5 8:ee50d9ef9d69
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 """Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy) 2 """Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy)
3 3
4 This script is copyright 2010 by Peter Cock, The James Hutton Institute 4 This script is copyright 2010-2017 by Peter Cock, The James Hutton Institute
5 (formerly SCRI), UK. All rights reserved. 5 (formerly SCRI), UK. All rights reserved.
6 See accompanying text file for licence details (MIT/BSD style).
7 6
8 This is version 0.0.8 of the script. 7 See accompanying text file for licence details (MIT License).
9 """ 8 """
10 9
10 from __futute__ import print_function
11 11
12 import sys 12 import sys
13
14 if "-v" in sys.argv or "--version" in sys.argv:
15 print("v0.0.11")
16 sys.exit(0)
13 17
14 try: 18 try:
15 import rpy 19 import rpy
16 except ImportError: 20 except ImportError:
17 sys.exit("Requires the Python library rpy (to call R)") 21 sys.exit("Requires the Python library rpy (to call R)")
35 set_data.append(tuple(sys.argv[7:10])) 39 set_data.append(tuple(sys.argv[7:10]))
36 if len(sys.argv) - 1 >= 13: 40 if len(sys.argv) - 1 >= 13:
37 set_data.append(tuple(sys.argv[10:13])) 41 set_data.append(tuple(sys.argv[10:13]))
38 pdf_file = sys.argv[-1] 42 pdf_file = sys.argv[-1]
39 n = len(set_data) 43 n = len(set_data)
40 print "Doing %i-way Venn Diagram" % n 44 print("Doing %i-way Venn Diagram" % n)
41 45
42 46
43 def load_ids(filename, filetype): 47 def load_ids(filename, filetype):
44 if filetype == "tabular": 48 if filetype == "tabular":
45 for line in open(filename): 49 for line in open(filename):
75 if name in whitelist: 79 if name in whitelist:
76 yield name 80 yield name
77 else: 81 else:
78 sys.exit("Unexpected ID %s in %s file %s" % (name, filetype, filename)) 82 sys.exit("Unexpected ID %s in %s file %s" % (name, filetype, filename))
79 83
84
80 if all_file in ["", "-", '""', '"-"']: 85 if all_file in ["", "-", '""', '"-"']:
81 # Load without white list 86 # Load without white list
82 sets = [set(load_ids(f, t)) for (f, t, c) in set_data] 87 sets = [set(load_ids(f, t)) for (f, t, c) in set_data]
83 # Take union 88 # Take union
84 all_ids = set() 89 all_ids = set()
85 for s in sets: 90 for s in sets:
86 all_ids.update(s) 91 all_ids.update(s)
87 print "Inferred total of %i IDs" % len(all_ids) 92 print("Inferred total of %i IDs" % len(all_ids))
88 else: 93 else:
89 all_ids = set(load_ids(all_file, all_type)) 94 all_ids = set(load_ids(all_file, all_type))
90 print "Total of %i IDs" % len(all_ids) 95 print("Total of %i IDs" % len(all_ids))
91 sets = [set(load_ids_whitelist(f, t, all_ids)) for (f, t, c) in set_data] 96 sets = [set(load_ids_whitelist(f, t, all_ids)) for (f, t, c) in set_data]
92 97
93 for s, (f, t, c) in zip(sets, set_data): 98 for s, (f, t, c) in zip(sets, set_data):
94 print "%i in %s" % (len(s), c) 99 print("%i in %s" % (len(s), c))
95 100
96 # Now call R library to draw simple Venn diagram 101 # Now call R library to draw simple Venn diagram
97 try: 102 try:
98 # Create dummy Venn diagram counts object for three groups 103 # Create dummy Venn diagram counts object for three groups
99 cols = 'c("%s")' % '","'.join("Set%i" % (i + 1) for i in range(n)) 104 cols = 'c("%s")' % '","'.join("Set%i" % (i + 1) for i in range(n))
100 rpy.r('groups <- cbind(%s)' % ','.join(['1'] * n)) 105 rpy.r('groups <- cbind(%s)' % ','.join(['1'] * n))
101 rpy.r('colnames(groups) <- %s' % cols) 106 rpy.r('colnames(groups) <- %s' % cols)
102 rpy.r('vc <- vennCounts(groups)') 107 rpy.r('vc <- vennCounts(groups)')
103 # Populate the 2^n classes with real counts 108 # Populate the 2^n classes with real counts
104 # Don't make any assumptions about the class order 109 # Don't make any assumptions about the class order
105 # print rpy.r('vc') 110 # print(rpy.r('vc'))
106 for index, row in enumerate(rpy.r('vc[,%s]' % cols)): 111 for index, row in enumerate(rpy.r('vc[,%s]' % cols)):
107 if isinstance(row, int) or isinstance(row, float): 112 if isinstance(row, int) or isinstance(row, float):
108 # Hack for rpy being too clever for single element row 113 # Hack for rpy being too clever for single element row
109 row = [row] 114 row = [row]
110 names = all_ids 115 names = all_ids
112 if wanted: 117 if wanted:
113 names = names.intersection(s) 118 names = names.intersection(s)
114 else: 119 else:
115 names = names.difference(s) 120 names = names.difference(s)
116 rpy.r('vc[%i,"Counts"] <- %i' % (index + 1, len(names))) 121 rpy.r('vc[%i,"Counts"] <- %i' % (index + 1, len(names)))
117 # print rpy.r('vc') 122 # print(rpy.r('vc'))
118 if n == 1: 123 if n == 1:
119 # Single circle, don't need to add (Total XXX) line 124 # Single circle, don't need to add (Total XXX) line
120 names = [c for (t, f, c) in set_data] 125 names = [c for (t, f, c) in set_data]
121 else: 126 else:
122 names = ["%s\n(Total %i)" % (c, len(s)) for s, (f, t, c) in zip(sets, set_data)] 127 names = ["%s\n(Total %i)" % (c, len(s)) for s, (f, t, c) in zip(sets, set_data)]
129 """ % (all_label, len(all_ids))) 134 """ % (all_label, len(all_ids)))
130 rpy.r.dev_off() 135 rpy.r.dev_off()
131 except Exception, exc: 136 except Exception, exc:
132 sys.exit("%s" % str(exc)) 137 sys.exit("%s" % str(exc))
133 rpy.r.quit(save="no") 138 rpy.r.quit(save="no")
134 print "Done" 139 print("Done")