0
|
1 #!/usr/bin/env python2.7
|
|
2
|
|
3 import os
|
|
4 import sys
|
|
5 import json
|
|
6 import operator
|
|
7 import argparse
|
|
8 import re
|
|
9 from itertools import combinations
|
|
10
|
|
11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
12
|
|
13 ##################################################################################################################################################
|
|
14 # FUNCTIONS
|
|
15 ##################################################################################################################################################
|
|
16
|
|
17 def isnumber(format, n):
|
|
18 """
|
|
19 Check if an element is integer or float
|
|
20 """
|
|
21 float_format = re.compile("^[\-]?[1-9][0-9]*\.?[0-9]+$")
|
|
22 int_format = re.compile("^[\-]?[1-9][0-9]*$")
|
|
23 test = ""
|
|
24 if format == "int":
|
|
25 test = re.match(int_format, n)
|
|
26 elif format == "float":
|
|
27 test = re.match(float_format, n)
|
|
28 if test:
|
|
29 return True
|
|
30 else:
|
|
31 return False
|
|
32
|
|
33 def input_to_dict(inputs):
|
|
34 """
|
|
35 Parse input and return a dictionary of name and data of each lists/files
|
|
36 """
|
|
37 comp_dict = {}
|
|
38 title_dict = {}
|
|
39 c = ["A", "B", "C", "D", "E", "F"]
|
|
40 for i in range(len(inputs)):
|
|
41 input_file = inputs[i][0]
|
|
42 name = inputs[i][1]
|
|
43 input_type = inputs[i][2]
|
|
44 title = c[i]
|
|
45 title_dict[title] = name
|
|
46 ids = set()
|
|
47 if input_type == "file":
|
|
48 header = inputs[i][3]
|
|
49 ncol = inputs[i][4]
|
|
50 file_content = open(input_file, "r").readlines()
|
|
51
|
|
52 # Check if column number is in right form
|
|
53 if isnumber("int", ncol.replace("c", "")):
|
|
54 if header == "true":
|
|
55 file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content[1:]]] # take only first IDs
|
|
56 else:
|
|
57 file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content]] # take only first IDs
|
|
58 #print(file_content[1:13])
|
|
59 else:
|
|
60 raise ValueError("Please fill in the right format of column number")
|
|
61 else:
|
|
62 ids = set()
|
|
63 file_content = inputs[i][0].split()
|
|
64
|
|
65 ids.update(file_content)
|
|
66 comp_dict[title] = ids
|
|
67
|
|
68 return comp_dict, title_dict
|
|
69
|
|
70 def intersect(comp_dict):
|
|
71 """
|
|
72 Calculate the intersections of input
|
|
73 """
|
|
74 names = set(comp_dict)
|
|
75 for i in range(1, len(comp_dict) + 1):
|
|
76 for group in combinations(sorted(comp_dict), i):
|
|
77 others = set()
|
|
78 [others.add(name) for name in names if name not in group]
|
|
79 difference = []
|
|
80 intersected = set.intersection(*(comp_dict[k] for k in group))
|
|
81 n = "".join(group)
|
|
82 if len(others) > 0:
|
|
83 difference = intersected.difference(set.union(*(comp_dict[k] for k in others)))
|
|
84 yield group, list(intersected), list(difference)
|
|
85
|
|
86 def diagram(comp_dict, title_dict):
|
|
87 """
|
|
88 Create json string for jvenn diagram plot
|
|
89 """
|
|
90 result = {}
|
|
91 result["name"] = {}
|
|
92 for k in comp_dict.keys():
|
|
93 result["name"][k] = title_dict[k]
|
|
94
|
|
95 result["data"] = {}
|
|
96 result["values"] = {}
|
|
97 for group, intersected, difference in intersect(comp_dict):
|
|
98 if len(group) == 1:
|
|
99 result["data"]["".join(group)] = difference
|
|
100 result["values"]["".join(group)] = len(difference)
|
|
101 elif len(group) > 1 and len(group) < len(comp_dict):
|
|
102 result["data"]["".join(group)] = difference
|
|
103 result["values"]["".join(group)] = len(difference)
|
|
104 elif len(group) == len(comp_dict):
|
|
105 result["data"]["".join(group)] = intersected
|
|
106 result["values"]["".join(group)] = len(intersected)
|
|
107
|
|
108 return result
|
|
109
|
|
110 def write_text_venn(json_result):
|
|
111 """
|
|
112 Write intersections of input to text output file
|
|
113 """
|
|
114 output = open("venn_diagram_text_output.txt", "w")
|
|
115 string = ""
|
|
116 lines = []
|
|
117 result = dict((k, v) for k, v in json_result["data"].iteritems() if v != [])
|
|
118 print(result)
|
|
119 max_count = max(len(v) for v in result.values())
|
|
120 print(max_count)
|
|
121 for i in range(max_count):
|
|
122 lines.append("")
|
|
123
|
|
124 for i in range(max_count):
|
|
125 header = ""
|
|
126 for d in range(len(result.keys())):
|
|
127 data = result.keys()[d]
|
|
128 name = "_".join([json_result["name"][x] for x in data])
|
|
129 header += name + "\t"
|
|
130 if len(result[data]) > i:
|
|
131 lines[i] += result[data][i] + "\t"
|
|
132 else:
|
|
133 lines[i] += "\t"
|
|
134 string += header + "\n"
|
|
135 string += "\n".join(lines)
|
|
136 print(string)
|
|
137 output.write(string)
|
|
138 output.close()
|
|
139
|
|
140 def write_summary( summary_file, inputs):
|
|
141 """
|
|
142 Paste json string into template file
|
|
143 """
|
|
144 a, b = input_to_dict(inputs)
|
|
145 data = diagram(a, b)
|
|
146 write_text_venn(data)
|
|
147
|
|
148 to_replace = {
|
|
149 "series": [data],
|
|
150 "displayStat": "true",
|
|
151 "displaySwitch": "true",
|
|
152 "shortNumber": "true",
|
|
153 }
|
|
154
|
|
155 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html"))
|
|
156 FH_summary_out = open(summary_file, "w" )
|
|
157 for line in FH_summary_tpl:
|
|
158 if "###JVENN_DATA###" in line:
|
|
159 line = line.replace("###JVENN_DATA###", json.dumps(to_replace))
|
|
160 FH_summary_out.write(line)
|
|
161
|
|
162 FH_summary_out.close()
|
|
163 FH_summary_tpl.close()
|
|
164
|
|
165 def process(args):
|
|
166 write_summary(args.summary, args.input)
|
|
167
|
|
168
|
|
169 ##################################################################################################################################################
|
|
170 # MAIN
|
|
171 ##################################################################################################################################################
|
|
172 if __name__ == '__main__':
|
|
173 # Parse parameters
|
|
174 parser = argparse.ArgumentParser(description='Filters an abundance file')
|
|
175 group_input = parser.add_argument_group( 'Inputs' )
|
|
176 group_input.add_argument('--input', nargs="+", action="append", required=True, help="The input tabular file.")
|
|
177 group_output = parser.add_argument_group( 'Outputs' )
|
|
178 group_output.add_argument('--summary', default="summary.html", help="The HTML file containing the graphs. [Default: %(default)s]")
|
|
179 args = parser.parse_args()
|
|
180
|
|
181 # Process
|
|
182 process( args )
|