Mercurial > repos > peterjc > venn_list
comparison tools/venn_list/venn_list.py @ 8:ee50d9ef9d69 draft
v0.0.11 Python 3 compatible print; capture script version
author | peterjc |
---|---|
date | Thu, 11 May 2017 06:21:20 -0400 |
parents | ea68a1a4c1d9 |
children | 20d347feb882 |
comparison
equal
deleted
inserted
replaced
7:ba31415fedc5 | 8:ee50d9ef9d69 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 """Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy) | 2 """Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy) |
3 | 3 |
4 This script is copyright 2010 by Peter Cock, The James Hutton Institute | 4 This script is copyright 2010-2017 by Peter Cock, The James Hutton Institute |
5 (formerly SCRI), UK. All rights reserved. | 5 (formerly SCRI), UK. All rights reserved. |
6 See accompanying text file for licence details (MIT/BSD style). | |
7 | 6 |
8 This is version 0.0.8 of the script. | 7 See accompanying text file for licence details (MIT License). |
9 """ | 8 """ |
10 | 9 |
10 from __futute__ import print_function | |
11 | 11 |
12 import sys | 12 import sys |
13 | |
14 if "-v" in sys.argv or "--version" in sys.argv: | |
15 print("v0.0.11") | |
16 sys.exit(0) | |
13 | 17 |
14 try: | 18 try: |
15 import rpy | 19 import rpy |
16 except ImportError: | 20 except ImportError: |
17 sys.exit("Requires the Python library rpy (to call R)") | 21 sys.exit("Requires the Python library rpy (to call R)") |
35 set_data.append(tuple(sys.argv[7:10])) | 39 set_data.append(tuple(sys.argv[7:10])) |
36 if len(sys.argv) - 1 >= 13: | 40 if len(sys.argv) - 1 >= 13: |
37 set_data.append(tuple(sys.argv[10:13])) | 41 set_data.append(tuple(sys.argv[10:13])) |
38 pdf_file = sys.argv[-1] | 42 pdf_file = sys.argv[-1] |
39 n = len(set_data) | 43 n = len(set_data) |
40 print "Doing %i-way Venn Diagram" % n | 44 print("Doing %i-way Venn Diagram" % n) |
41 | 45 |
42 | 46 |
43 def load_ids(filename, filetype): | 47 def load_ids(filename, filetype): |
44 if filetype == "tabular": | 48 if filetype == "tabular": |
45 for line in open(filename): | 49 for line in open(filename): |
75 if name in whitelist: | 79 if name in whitelist: |
76 yield name | 80 yield name |
77 else: | 81 else: |
78 sys.exit("Unexpected ID %s in %s file %s" % (name, filetype, filename)) | 82 sys.exit("Unexpected ID %s in %s file %s" % (name, filetype, filename)) |
79 | 83 |
84 | |
80 if all_file in ["", "-", '""', '"-"']: | 85 if all_file in ["", "-", '""', '"-"']: |
81 # Load without white list | 86 # Load without white list |
82 sets = [set(load_ids(f, t)) for (f, t, c) in set_data] | 87 sets = [set(load_ids(f, t)) for (f, t, c) in set_data] |
83 # Take union | 88 # Take union |
84 all_ids = set() | 89 all_ids = set() |
85 for s in sets: | 90 for s in sets: |
86 all_ids.update(s) | 91 all_ids.update(s) |
87 print "Inferred total of %i IDs" % len(all_ids) | 92 print("Inferred total of %i IDs" % len(all_ids)) |
88 else: | 93 else: |
89 all_ids = set(load_ids(all_file, all_type)) | 94 all_ids = set(load_ids(all_file, all_type)) |
90 print "Total of %i IDs" % len(all_ids) | 95 print("Total of %i IDs" % len(all_ids)) |
91 sets = [set(load_ids_whitelist(f, t, all_ids)) for (f, t, c) in set_data] | 96 sets = [set(load_ids_whitelist(f, t, all_ids)) for (f, t, c) in set_data] |
92 | 97 |
93 for s, (f, t, c) in zip(sets, set_data): | 98 for s, (f, t, c) in zip(sets, set_data): |
94 print "%i in %s" % (len(s), c) | 99 print("%i in %s" % (len(s), c)) |
95 | 100 |
96 # Now call R library to draw simple Venn diagram | 101 # Now call R library to draw simple Venn diagram |
97 try: | 102 try: |
98 # Create dummy Venn diagram counts object for three groups | 103 # Create dummy Venn diagram counts object for three groups |
99 cols = 'c("%s")' % '","'.join("Set%i" % (i + 1) for i in range(n)) | 104 cols = 'c("%s")' % '","'.join("Set%i" % (i + 1) for i in range(n)) |
100 rpy.r('groups <- cbind(%s)' % ','.join(['1'] * n)) | 105 rpy.r('groups <- cbind(%s)' % ','.join(['1'] * n)) |
101 rpy.r('colnames(groups) <- %s' % cols) | 106 rpy.r('colnames(groups) <- %s' % cols) |
102 rpy.r('vc <- vennCounts(groups)') | 107 rpy.r('vc <- vennCounts(groups)') |
103 # Populate the 2^n classes with real counts | 108 # Populate the 2^n classes with real counts |
104 # Don't make any assumptions about the class order | 109 # Don't make any assumptions about the class order |
105 # print rpy.r('vc') | 110 # print(rpy.r('vc')) |
106 for index, row in enumerate(rpy.r('vc[,%s]' % cols)): | 111 for index, row in enumerate(rpy.r('vc[,%s]' % cols)): |
107 if isinstance(row, int) or isinstance(row, float): | 112 if isinstance(row, int) or isinstance(row, float): |
108 # Hack for rpy being too clever for single element row | 113 # Hack for rpy being too clever for single element row |
109 row = [row] | 114 row = [row] |
110 names = all_ids | 115 names = all_ids |
112 if wanted: | 117 if wanted: |
113 names = names.intersection(s) | 118 names = names.intersection(s) |
114 else: | 119 else: |
115 names = names.difference(s) | 120 names = names.difference(s) |
116 rpy.r('vc[%i,"Counts"] <- %i' % (index + 1, len(names))) | 121 rpy.r('vc[%i,"Counts"] <- %i' % (index + 1, len(names))) |
117 # print rpy.r('vc') | 122 # print(rpy.r('vc')) |
118 if n == 1: | 123 if n == 1: |
119 # Single circle, don't need to add (Total XXX) line | 124 # Single circle, don't need to add (Total XXX) line |
120 names = [c for (t, f, c) in set_data] | 125 names = [c for (t, f, c) in set_data] |
121 else: | 126 else: |
122 names = ["%s\n(Total %i)" % (c, len(s)) for s, (f, t, c) in zip(sets, set_data)] | 127 names = ["%s\n(Total %i)" % (c, len(s)) for s, (f, t, c) in zip(sets, set_data)] |
129 """ % (all_label, len(all_ids))) | 134 """ % (all_label, len(all_ids))) |
130 rpy.r.dev_off() | 135 rpy.r.dev_off() |
131 except Exception, exc: | 136 except Exception, exc: |
132 sys.exit("%s" % str(exc)) | 137 sys.exit("%s" % str(exc)) |
133 rpy.r.quit(save="no") | 138 rpy.r.quit(save="no") |
134 print "Done" | 139 print("Done") |