Mercurial > repos > proteore > proteore_venn_diagram
comparison venn_diagram.py @ 0:9d4f20618ab4 draft
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
author | proteore |
---|---|
date | Sun, 26 Nov 2017 19:09:40 -0500 |
parents | |
children | 145f347dc0e1 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9d4f20618ab4 |
---|---|
1 #!/usr/bin/env python2.7 | |
2 | |
3 import os | |
4 import sys | |
5 import json | |
6 import operator | |
7 import argparse | |
8 import re | |
9 from itertools import combinations | |
10 | |
11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
12 | |
13 ################################################################################################################################################## | |
14 # FUNCTIONS | |
15 ################################################################################################################################################## | |
16 | |
17 def isnumber(format, n): | |
18 """ | |
19 Check if an element is integer or float | |
20 """ | |
21 float_format = re.compile("^[\-]?[1-9][0-9]*\.?[0-9]+$") | |
22 int_format = re.compile("^[\-]?[1-9][0-9]*$") | |
23 test = "" | |
24 if format == "int": | |
25 test = re.match(int_format, n) | |
26 elif format == "float": | |
27 test = re.match(float_format, n) | |
28 if test: | |
29 return True | |
30 else: | |
31 return False | |
32 | |
33 def input_to_dict(inputs): | |
34 """ | |
35 Parse input and return a dictionary of name and data of each lists/files | |
36 """ | |
37 comp_dict = {} | |
38 title_dict = {} | |
39 c = ["A", "B", "C", "D", "E", "F"] | |
40 for i in range(len(inputs)): | |
41 input_file = inputs[i][0] | |
42 name = inputs[i][1] | |
43 input_type = inputs[i][2] | |
44 title = c[i] | |
45 title_dict[title] = name | |
46 ids = set() | |
47 if input_type == "file": | |
48 header = inputs[i][3] | |
49 ncol = inputs[i][4] | |
50 file_content = open(input_file, "r").readlines() | |
51 | |
52 # Check if column number is in right form | |
53 if isnumber("int", ncol.replace("c", "")): | |
54 if header == "true": | |
55 file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content[1:]]] # take only first IDs | |
56 else: | |
57 file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content]] # take only first IDs | |
58 #print(file_content[1:13]) | |
59 else: | |
60 raise ValueError("Please fill in the right format of column number") | |
61 else: | |
62 ids = set() | |
63 file_content = inputs[i][0].split() | |
64 | |
65 ids.update(file_content) | |
66 comp_dict[title] = ids | |
67 | |
68 return comp_dict, title_dict | |
69 | |
70 def intersect(comp_dict): | |
71 """ | |
72 Calculate the intersections of input | |
73 """ | |
74 names = set(comp_dict) | |
75 for i in range(1, len(comp_dict) + 1): | |
76 for group in combinations(sorted(comp_dict), i): | |
77 others = set() | |
78 [others.add(name) for name in names if name not in group] | |
79 difference = [] | |
80 intersected = set.intersection(*(comp_dict[k] for k in group)) | |
81 n = "".join(group) | |
82 if len(others) > 0: | |
83 difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) | |
84 yield group, list(intersected), list(difference) | |
85 | |
86 def diagram(comp_dict, title_dict): | |
87 """ | |
88 Create json string for jvenn diagram plot | |
89 """ | |
90 result = {} | |
91 result["name"] = {} | |
92 for k in comp_dict.keys(): | |
93 result["name"][k] = title_dict[k] | |
94 | |
95 result["data"] = {} | |
96 result["values"] = {} | |
97 for group, intersected, difference in intersect(comp_dict): | |
98 if len(group) == 1: | |
99 result["data"]["".join(group)] = difference | |
100 result["values"]["".join(group)] = len(difference) | |
101 elif len(group) > 1 and len(group) < len(comp_dict): | |
102 result["data"]["".join(group)] = difference | |
103 result["values"]["".join(group)] = len(difference) | |
104 elif len(group) == len(comp_dict): | |
105 result["data"]["".join(group)] = intersected | |
106 result["values"]["".join(group)] = len(intersected) | |
107 | |
108 return result | |
109 | |
110 def write_text_venn(json_result): | |
111 """ | |
112 Write intersections of input to text output file | |
113 """ | |
114 output = open("venn_diagram_text_output.txt", "w") | |
115 string = "" | |
116 lines = [] | |
117 result = dict((k, v) for k, v in json_result["data"].iteritems() if v != []) | |
118 print(result) | |
119 max_count = max(len(v) for v in result.values()) | |
120 print(max_count) | |
121 for i in range(max_count): | |
122 lines.append("") | |
123 | |
124 for i in range(max_count): | |
125 header = "" | |
126 for d in range(len(result.keys())): | |
127 data = result.keys()[d] | |
128 name = "_".join([json_result["name"][x] for x in data]) | |
129 header += name + "\t" | |
130 if len(result[data]) > i: | |
131 lines[i] += result[data][i] + "\t" | |
132 else: | |
133 lines[i] += "\t" | |
134 string += header + "\n" | |
135 string += "\n".join(lines) | |
136 print(string) | |
137 output.write(string) | |
138 output.close() | |
139 | |
140 def write_summary( summary_file, inputs): | |
141 """ | |
142 Paste json string into template file | |
143 """ | |
144 a, b = input_to_dict(inputs) | |
145 data = diagram(a, b) | |
146 write_text_venn(data) | |
147 | |
148 to_replace = { | |
149 "series": [data], | |
150 "displayStat": "true", | |
151 "displaySwitch": "true", | |
152 "shortNumber": "true", | |
153 } | |
154 | |
155 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html")) | |
156 FH_summary_out = open(summary_file, "w" ) | |
157 for line in FH_summary_tpl: | |
158 if "###JVENN_DATA###" in line: | |
159 line = line.replace("###JVENN_DATA###", json.dumps(to_replace)) | |
160 FH_summary_out.write(line) | |
161 | |
162 FH_summary_out.close() | |
163 FH_summary_tpl.close() | |
164 | |
165 def process(args): | |
166 write_summary(args.summary, args.input) | |
167 | |
168 | |
169 ################################################################################################################################################## | |
170 # MAIN | |
171 ################################################################################################################################################## | |
172 if __name__ == '__main__': | |
173 # Parse parameters | |
174 parser = argparse.ArgumentParser(description='Filters an abundance file') | |
175 group_input = parser.add_argument_group( 'Inputs' ) | |
176 group_input.add_argument('--input', nargs="+", action="append", required=True, help="The input tabular file.") | |
177 group_output = parser.add_argument_group( 'Outputs' ) | |
178 group_output.add_argument('--summary', default="summary.html", help="The HTML file containing the graphs. [Default: %(default)s]") | |
179 args = parser.parse_args() | |
180 | |
181 # Process | |
182 process( args ) |