Mercurial > repos > proteore > proteore_venn_diagram
diff venn_diagram.py @ 11:958eb2ea02aa draft default tip
"planemo upload commit 00cab92333b0338cb86ff78b5bbc1b6d26621012-dirty"
author | proteore |
---|---|
date | Wed, 12 May 2021 16:48:05 +0000 |
parents | e744a43171ff |
children |
line wrap: on
line diff
--- a/venn_diagram.py Tue Jan 14 09:14:27 2020 -0500 +++ b/venn_diagram.py Wed May 12 16:48:05 2021 +0000 @@ -1,19 +1,20 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python -import os -import sys +import argparse +import csv import json -import operator -import argparse -import re, csv +import os +import re from itertools import combinations + CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) -################################################################################################################################################## +######################################################################## # FUNCTIONS -################################################################################################################################################## - +######################################################################## + + def isnumber(format, n): """ Check if an element is integer or float @@ -29,16 +30,17 @@ return True else: return False - + + def input_to_dict(inputs): """ Parse input and return a dictionary of name and data of each lists/files """ comp_dict = {} title_dict = {} - c = ["A", "B", "C", "D", "E", "F"] + c = ["A", "B", "C", "D", "E", "F"] for i in range(len(inputs)): - input_file = inputs[i][0] + input_file = inputs[i][0] name = inputs[i][1] input_type = inputs[i][2] title = c[i] @@ -47,30 +49,35 @@ if input_type == "file": header = inputs[i][3] ncol = inputs[i][4] - with open(input_file,"r") as handle : - file_content = csv.reader(handle,delimiter="\t") - file_content = list(file_content) #csv object to list - + with open(input_file, "r") as handle: + file_content = csv.reader(handle, delimiter="\t") + file_content = list(file_content) # csv object to list + # Check if column number is in right form if isnumber("int", ncol.replace("c", "")): if header == "true": - file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # gets ids from defined column + # gets ids from defined column + file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501 + else: - file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] + file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501 else: - raise ValueError("Please fill in the right format of column number") + raise ValueError("Please fill in the right format of column number") # noqa 501 else: ids = set() file_content = inputs[i][0].split() file_content = [x.split(";") for x in file_content] - - file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] #flat list of list of lists, remove empty items + + # flat list of list of lists, remove empty items + file_content = [item.strip() for sublist in file_content for item in sublist if item != ''] # noqa 501 ids.update(file_content) - if 'NA' in ids : ids.remove('NA') + if 'NA' in ids: + ids.remove('NA') comp_dict[title] = ids - + return comp_dict, title_dict - + + def intersect(comp_dict): """ Calculate the intersections of input @@ -83,8 +90,9 @@ difference = [] intersected = set.intersection(*(comp_dict[k] for k in group)) if len(others) > 0: - difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) - yield group, list(intersected), list(difference) + difference = intersected.difference(set.union(*(comp_dict[k] for k in others))) # noqa 501 + yield group, list(intersected), list(difference) + def diagram(comp_dict, title_dict): """ @@ -94,54 +102,59 @@ result["name"] = {} for k in comp_dict.keys(): result["name"][k] = title_dict[k] - + result["data"] = {} - result["values"] = {} + result["values"] = {} for group, intersected, difference in intersect(comp_dict): if len(group) == 1: - result["data"]["".join(group)] = difference + result["data"]["".join(group)] = sorted(difference) result["values"]["".join(group)] = len(difference) elif len(group) > 1 and len(group) < len(comp_dict): - result["data"]["".join(group)] = difference - result["values"]["".join(group)] = len(difference) + result["data"]["".join(group)] = sorted(difference) + result["values"]["".join(group)] = len(difference) elif len(group) == len(comp_dict): - result["data"]["".join(group)] = intersected + result["data"]["".join(group)] = sorted(intersected) result["values"]["".join(group)] = len(intersected) return result -#Write intersections of input to text output file +# Write intersections of input to text output file + + def write_text_venn(json_result): lines = [] - result = dict((k, v) for k, v in json_result["data"].iteritems() if v != []) - for key in result : - if 'NA' in result[key] : result[key].remove("NA") - list_names = dict((k, v) for k, v in json_result["name"].iteritems() if v != []) + result = dict((k, v) for k, v in json_result["data"].items() if v != []) # noqa 501 + for key in result: + if 'NA' in result[key]: + result[key].remove("NA") + + list_names = dict((k, v) for k, v in json_result["name"].items() if v != []) # noqa 501 nb_lines_max = max(len(v) for v in result.values()) - #get list names associated to each column + # get list names associated to each column column_dict = {} - for key in result : - if key in list_names : + for key in result: + if key in list_names: column_dict[key] = list_names[key] - else : - keys= list(key) + else: + keys = list(key) column_dict[key] = "_".join([list_names[k] for k in keys]) - #construct tsv - for key in result : + # construct tsv + for key in result: line = result[key] - if len(line) < nb_lines_max : - line.extend(['NA']*(nb_lines_max-len(line))) - line = [column_dict[key]] + line #add header - lines.append(line) - #transpose tsv - lines=zip(*lines) - + if len(line) < nb_lines_max: + line.extend(['']*(nb_lines_max-len(line))) + line = [column_dict[key]] + line # add header + lines.append(line) + # transpose tsv + lines = zip(*lines) + with open("venn_diagram_text_output.tsv", "w") as output: tsv_output = csv.writer(output, delimiter='\t') tsv_output.writerows(lines) + def write_summary(summary_file, inputs): """ Paste json string into template file @@ -151,37 +164,41 @@ write_text_venn(data) to_replace = { - "series": [data], - "displayStat": "true", - "displaySwitch": "true", + "series": [data], + "displayStat": "true", + "displaySwitch": "true", "shortNumber": "true", } FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html")) - FH_summary_out = open(summary_file, "w" ) + FH_summary_out = open(summary_file, "w") for line in FH_summary_tpl: if "###JVENN_DATA###" in line: line = line.replace("###JVENN_DATA###", json.dumps(to_replace)) FH_summary_out.write(line) - + FH_summary_out.close() FH_summary_tpl.close() - + + def process(args): write_summary(args.summary, args.input) -################################################################################################################################################## +##################################################################### # MAIN -################################################################################################################################################## +##################################################################### if __name__ == '__main__': # Parse parameters parser = argparse.ArgumentParser(description='Filters an abundance file') - group_input = parser.add_argument_group( 'Inputs' ) - group_input.add_argument('--input', nargs="+", action="append", required=True, help="The input tabular file.") - group_output = parser.add_argument_group( 'Outputs' ) - group_output.add_argument('--summary', default="summary.html", help="The HTML file containing the graphs. [Default: %(default)s]") + group_input = parser.add_argument_group('Inputs') + group_input.add_argument('--input', nargs="+", action="append", + required=True, help="The input tabular file.") + group_output = parser.add_argument_group('Outputs') + group_output.add_argument('--summary', default="summary.html", + help="The HTML file containing the graphs. \ + [Default: %(default)s]") args = parser.parse_args() # Process - process( args ) + process(args)