0
+ − 1 #!/usr/bin/env python2.7
+ − 2
+ − 3 import os
+ − 4 import sys
+ − 5 import json
+ − 6 import operator
+ − 7 import argparse
+ − 8 import re
+ − 9 from itertools import combinations
+ − 10
+ − 11 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+ − 12
+ − 13 ##################################################################################################################################################
+ − 14 # FUNCTIONS
+ − 15 ##################################################################################################################################################
+ − 16
+ − 17 def isnumber(format, n):
+ − 18 """
+ − 19 Check if an element is integer or float
+ − 20 """
+ − 21 float_format = re.compile("^[\-]?[1-9][0-9]*\.?[0-9]+$")
+ − 22 int_format = re.compile("^[\-]?[1-9][0-9]*$")
+ − 23 test = ""
+ − 24 if format == "int":
+ − 25 test = re.match(int_format, n)
+ − 26 elif format == "float":
+ − 27 test = re.match(float_format, n)
+ − 28 if test:
+ − 29 return True
+ − 30 else:
+ − 31 return False
+ − 32
+ − 33 def input_to_dict(inputs):
+ − 34 """
+ − 35 Parse input and return a dictionary of name and data of each lists/files
+ − 36 """
+ − 37 comp_dict = {}
+ − 38 title_dict = {}
+ − 39 c = ["A", "B", "C", "D", "E", "F"]
+ − 40 for i in range(len(inputs)):
+ − 41 input_file = inputs[i][0]
+ − 42 name = inputs[i][1]
+ − 43 input_type = inputs[i][2]
+ − 44 title = c[i]
+ − 45 title_dict[title] = name
+ − 46 ids = set()
+ − 47 if input_type == "file":
+ − 48 header = inputs[i][3]
+ − 49 ncol = inputs[i][4]
+ − 50 file_content = open(input_file, "r").readlines()
+ − 51
+ − 52 # Check if column number is in right form
+ − 53 if isnumber("int", ncol.replace("c", "")):
+ − 54 if header == "true":
+ − 55 file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content[1:]]] # take only first IDs
+ − 56 else:
+ − 57 file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content]] # take only first IDs
+ − 58 #print(file_content[1:13])
+ − 59 else:
+ − 60 raise ValueError("Please fill in the right format of column number")
+ − 61 else:
+ − 62 ids = set()
+ − 63 file_content = inputs[i][0].split()
+ − 64
+ − 65 ids.update(file_content)
+ − 66 comp_dict[title] = ids
+ − 67
+ − 68 return comp_dict, title_dict
+ − 69
+ − 70 def intersect(comp_dict):
+ − 71 """
+ − 72 Calculate the intersections of input
+ − 73 """
+ − 74 names = set(comp_dict)
+ − 75 for i in range(1, len(comp_dict) + 1):
+ − 76 for group in combinations(sorted(comp_dict), i):
+ − 77 others = set()
+ − 78 [others.add(name) for name in names if name not in group]
+ − 79 difference = []
+ − 80 intersected = set.intersection(*(comp_dict[k] for k in group))
+ − 81 n = "".join(group)
+ − 82 if len(others) > 0:
+ − 83 difference = intersected.difference(set.union(*(comp_dict[k] for k in others)))
+ − 84 yield group, list(intersected), list(difference)
+ − 85
+ − 86 def diagram(comp_dict, title_dict):
+ − 87 """
+ − 88 Create json string for jvenn diagram plot
+ − 89 """
+ − 90 result = {}
+ − 91 result["name"] = {}
+ − 92 for k in comp_dict.keys():
+ − 93 result["name"][k] = title_dict[k]
+ − 94
+ − 95 result["data"] = {}
+ − 96 result["values"] = {}
+ − 97 for group, intersected, difference in intersect(comp_dict):
+ − 98 if len(group) == 1:
+ − 99 result["data"]["".join(group)] = difference
+ − 100 result["values"]["".join(group)] = len(difference)
+ − 101 elif len(group) > 1 and len(group) < len(comp_dict):
+ − 102 result["data"]["".join(group)] = difference
+ − 103 result["values"]["".join(group)] = len(difference)
+ − 104 elif len(group) == len(comp_dict):
+ − 105 result["data"]["".join(group)] = intersected
+ − 106 result["values"]["".join(group)] = len(intersected)
+ − 107
+ − 108 return result
+ − 109
+ − 110 def write_text_venn(json_result):
+ − 111 """
+ − 112 Write intersections of input to text output file
+ − 113 """
+ − 114 output = open("venn_diagram_text_output.txt", "w")
+ − 115 string = ""
+ − 116 lines = []
+ − 117 result = dict((k, v) for k, v in json_result["data"].iteritems() if v != [])
+ − 118 print(result)
+ − 119 max_count = max(len(v) for v in result.values())
+ − 120 print(max_count)
+ − 121 for i in range(max_count):
+ − 122 lines.append("")
+ − 123
+ − 124 for i in range(max_count):
+ − 125 header = ""
+ − 126 for d in range(len(result.keys())):
+ − 127 data = result.keys()[d]
+ − 128 name = "_".join([json_result["name"][x] for x in data])
+ − 129 header += name + "\t"
+ − 130 if len(result[data]) > i:
+ − 131 lines[i] += result[data][i] + "\t"
+ − 132 else:
+ − 133 lines[i] += "\t"
+ − 134 string += header + "\n"
+ − 135 string += "\n".join(lines)
+ − 136 print(string)
+ − 137 output.write(string)
+ − 138 output.close()
+ − 139
+ − 140 def write_summary( summary_file, inputs):
+ − 141 """
+ − 142 Paste json string into template file
+ − 143 """
+ − 144 a, b = input_to_dict(inputs)
+ − 145 data = diagram(a, b)
+ − 146 write_text_venn(data)
+ − 147
+ − 148 to_replace = {
+ − 149 "series": [data],
+ − 150 "displayStat": "true",
+ − 151 "displaySwitch": "true",
+ − 152 "shortNumber": "true",
+ − 153 }
+ − 154
+ − 155 FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html"))
+ − 156 FH_summary_out = open(summary_file, "w" )
+ − 157 for line in FH_summary_tpl:
+ − 158 if "###JVENN_DATA###" in line:
+ − 159 line = line.replace("###JVENN_DATA###", json.dumps(to_replace))
+ − 160 FH_summary_out.write(line)
+ − 161
+ − 162 FH_summary_out.close()
+ − 163 FH_summary_tpl.close()
+ − 164
+ − 165 def process(args):
+ − 166 write_summary(args.summary, args.input)
+ − 167
+ − 168
+ − 169 ##################################################################################################################################################
+ − 170 # MAIN
+ − 171 ##################################################################################################################################################
+ − 172 if __name__ == '__main__':
+ − 173 # Parse parameters
+ − 174 parser = argparse.ArgumentParser(description='Filters an abundance file')
+ − 175 group_input = parser.add_argument_group( 'Inputs' )
+ − 176 group_input.add_argument('--input', nargs="+", action="append", required=True, help="The input tabular file.")
+ − 177 group_output = parser.add_argument_group( 'Outputs' )
+ − 178 group_output.add_argument('--summary', default="summary.html", help="The HTML file containing the graphs. [Default: %(default)s]")
+ − 179 args = parser.parse_args()
+ − 180
+ − 181 # Process
+ − 182 process( args )