diff venn_diagram.py @ 0:6f22d2726103 draft default tip

planemo upload
author lnguyen
date Fri, 15 Sep 2017 09:32:19 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/venn_diagram.py	Fri Sep 15 09:32:19 2017 -0400
@@ -0,0 +1,182 @@
+#!/usr/bin/env python2.7
+
+import os
+import sys
+import json
+import operator
+import argparse
+import re
+from itertools import combinations
+
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+##################################################################################################################################################
+# FUNCTIONS
+##################################################################################################################################################
+ 
+def isnumber(format, n):
+    """
+    Check if an element is integer or float
+    """
+    float_format = re.compile("^[\-]?[1-9][0-9]*\.?[0-9]+$")
+    int_format = re.compile("^[\-]?[1-9][0-9]*$")
+    test = ""
+    if format == "int":
+        test = re.match(int_format, n)
+    elif format == "float":
+        test = re.match(float_format, n)
+    if test:
+        return True
+    else:
+        return False
+        
+def input_to_dict(inputs):
+    """
+    Parse input and return a dictionary of name and data of each lists/files
+    """
+    comp_dict = {}
+    title_dict = {}
+    c = ["A", "B", "C", "D", "E", "F"]  
+    for i in range(len(inputs)):
+        input_file = inputs[i][0]        
+        name = inputs[i][1]
+        input_type = inputs[i][2]
+        title = c[i]
+        title_dict[title] = name
+        ids = set()
+        if input_type == "file":
+            header = inputs[i][3]
+            ncol = inputs[i][4]
+            file_content = open(input_file, "r").readlines()
+            
+            # Check if column number is in right form
+            if isnumber("int", ncol.replace("c", "")):
+                if header == "true":
+                    file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content[1:]]]     # take only first IDs
+                else:
+                    file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content]]     # take only first IDs
+                #print(file_content[1:13])
+            else:
+                raise ValueError("Please fill in the right format of column number")        
+        else:
+            ids = set()
+            file_content = inputs[i][0].split()
+            
+        ids.update(file_content)
+        comp_dict[title] = ids
+ 
+    return comp_dict, title_dict
+    
+def intersect(comp_dict):
+    """
+    Calculate the intersections of input
+    """
+    names = set(comp_dict)
+    for i in range(1, len(comp_dict) + 1):
+        for group in combinations(sorted(comp_dict), i):
+            others = set()
+            [others.add(name) for name in names if name not in group]
+            difference = []
+            intersected = set.intersection(*(comp_dict[k] for k in group))
+            n = "".join(group)
+            if len(others) > 0:
+                difference = intersected.difference(set.union(*(comp_dict[k] for k in others)))
+            yield group, list(intersected), list(difference)    
+
+def diagram(comp_dict, title_dict):
+    """
+    Create json string for jvenn diagram plot
+    """
+    result = {}
+    result["name"] = {}
+    for k in comp_dict.keys():
+        result["name"][k] = title_dict[k]
+        
+    result["data"] = {}
+    result["values"] = {}    
+    for group, intersected, difference in intersect(comp_dict):
+        if len(group) == 1:
+            result["data"]["".join(group)] = difference
+            result["values"]["".join(group)] = len(difference)
+        elif len(group) > 1 and len(group) < len(comp_dict):
+	        result["data"]["".join(group)] = difference
+	        result["values"]["".join(group)] = len(difference)               
+        elif len(group) == len(comp_dict):
+            result["data"]["".join(group)] = intersected
+            result["values"]["".join(group)] = len(intersected)
+
+    return result
+
+def write_text_venn(json_result):
+    """
+    Write intersections of input to text output file
+    """
+    output = open("venn_diagram_text_output.txt", "w")
+    string = ""
+    lines = []
+    result = dict((k, v) for k, v in json_result["data"].iteritems() if v != [])
+    print(result)
+    max_count = max(len(v) for v in result.values())
+    print(max_count)
+    for i in range(max_count):
+        lines.append("")
+        
+    for i in range(max_count):
+        header = ""
+        for d in range(len(result.keys())):
+            data = result.keys()[d]
+            name = "_".join([json_result["name"][x] for x in data])
+            header += name + "\t"
+            if len(result[data]) > i:
+                lines[i] += result[data][i] + "\t"
+            else:
+                lines[i] += "\t"
+    string += header + "\n"
+    string += "\n".join(lines)
+    print(string)
+    output.write(string)
+    output.close()
+
+def write_summary( summary_file, inputs):
+    """
+    Paste json string into template file
+    """
+    a, b = input_to_dict(inputs)
+    data = diagram(a, b)
+    write_text_venn(data)
+
+    to_replace = {
+    	"series": [data],
+    	"displayStat": "true",
+    	"displaySwitch": "true",
+        "shortNumber": "true",
+    }
+
+    FH_summary_tpl = open(os.path.join(CURRENT_DIR, "jvenn_template.html"))
+    FH_summary_out = open(summary_file, "w" )
+    for line in FH_summary_tpl:
+        if "###JVENN_DATA###" in line:
+            line = line.replace("###JVENN_DATA###", json.dumps(to_replace))
+        FH_summary_out.write(line)
+    
+    FH_summary_out.close()
+    FH_summary_tpl.close()
+   
+def process(args):
+    write_summary(args.summary, args.input)
+
+
+##################################################################################################################################################
+# MAIN
+##################################################################################################################################################
+if __name__ == '__main__':
+    # Parse parameters
+    parser = argparse.ArgumentParser(description='Filters an abundance file')
+    group_input = parser.add_argument_group( 'Inputs' )
+    group_input.add_argument('--input', nargs="+", action="append", required=True, help="The input tabular file.")
+    group_output = parser.add_argument_group( 'Outputs' )
+    group_output.add_argument('--summary', default="summary.html", help="The HTML file containing the graphs. [Default: %(default)s]")
+    args = parser.parse_args()
+
+    # Process
+    process( args )