annotate tools/plotting/venn_list.py @ 3:6aae6bc0802d draft

Uploaded v0.0.6, basic unit test, MIT licence, RST README, citation information, development moved to GitHub
author peterjc
date Wed, 18 Sep 2013 06:19:51 -0400
parents baf7031d470e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
1 #!/usr/bin/env python
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
2 """Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
3
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
4 This script is copyright 2010 by Peter Cock, The James Hutton Institute
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
5 (formerly SCRI), UK. All rights reserved.
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
6 See accompanying text file for licence details (MIT/BSD style).
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
7
3
6aae6bc0802d Uploaded v0.0.6, basic unit test, MIT licence, RST README, citation information, development moved to GitHub
peterjc
parents: 0
diff changeset
8 This is version 0.0.4 of the script.
0
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
9 """
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
10
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
11
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
12 import sys
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
13 import rpy
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
14
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
15 def stop_err(msg, error_level=1):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
16 """Print error message to stdout and quit with given error level."""
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
17 sys.stderr.write("%s\n" % msg)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
18 sys.exit(error_level)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
19
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
20 try:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
21 import rpy
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
22 except ImportError:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
23 stop_err("Requires the Python library rpy (to call R)")
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
24
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
25 try:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
26 rpy.r.library("limma")
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
27 except:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
28 stop_err("Requires the R library limma (for vennDiagram function)")
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
29
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
30
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
31 if len(sys.argv)-1 not in [7, 10, 13]:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
32 stop_err("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv)-1))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
33
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
34 all_file, all_type, all_label = sys.argv[1:4]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
35 set_data = []
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
36 if len(sys.argv)-1 >= 7:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
37 set_data.append(tuple(sys.argv[4:7]))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
38 if len(sys.argv)-1 >= 10:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
39 set_data.append(tuple(sys.argv[7:10]))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
40 if len(sys.argv)-1 >= 13:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
41 set_data.append(tuple(sys.argv[10:13]))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
42 pdf_file = sys.argv[-1]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
43 n = len(set_data)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
44 print "Doing %i-way Venn Diagram" % n
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
45
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
46 def load_ids(filename, filetype):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
47 if filetype=="tabular":
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
48 for line in open(filename):
3
6aae6bc0802d Uploaded v0.0.6, basic unit test, MIT licence, RST README, citation information, development moved to GitHub
peterjc
parents: 0
diff changeset
49 line = line.rstrip("\n")
6aae6bc0802d Uploaded v0.0.6, basic unit test, MIT licence, RST README, citation information, development moved to GitHub
peterjc
parents: 0
diff changeset
50 if line and not line.startswith("#"):
6aae6bc0802d Uploaded v0.0.6, basic unit test, MIT licence, RST README, citation information, development moved to GitHub
peterjc
parents: 0
diff changeset
51 yield line.split("\t",1)[0]
0
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
52 elif filetype=="fasta":
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
53 for line in open(filename):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
54 if line.startswith(">"):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
55 yield line[1:].rstrip("\n").split(None,1)[0]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
56 elif filetype.startswith("fastq"):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
57 #Use the Galaxy library not Biopython to cope with CS
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
58 from galaxy_utils.sequence.fastq import fastqReader
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
59 handle = open(filename, "rU")
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
60 for record in fastqReader(handle):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
61 #The [1:] is because the fastaReader leaves the @ on the identifer.
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
62 yield record.identifier.split()[0][1:]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
63 handle.close()
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
64 elif filetype=="sff":
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
65 try:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
66 from Bio.SeqIO import index
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
67 except ImportError:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
68 stop_err("Require Biopython 1.54 or later (to read SFF files)")
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
69 #This will read the SFF index block if present (very fast)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
70 for name in index(filename, "sff"):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
71 yield name
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
72 else:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
73 stop_err("Unexpected file type %s" % filetype)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
74
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
75 def load_ids_whitelist(filename, filetype, whitelist):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
76 for name in load_ids(filename, filetype):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
77 if name in whitelist:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
78 yield name
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
79 else:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
80 stop_err("Unexpected ID %s in %s file %s" % (name, filetype, filename))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
81
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
82 if all_file in ["", "-", '""', '"-"']:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
83 #Load without white list
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
84 sets = [set(load_ids(f,t)) for (f,t,c) in set_data]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
85 #Take union
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
86 all = set()
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
87 for s in sets:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
88 all.update(s)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
89 print "Inferred total of %i IDs" % len(all)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
90 else:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
91 all = set(load_ids(all_file, all_type))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
92 print "Total of %i IDs" % len(all)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
93 sets = [set(load_ids_whitelist(f,t,all)) for (f,t,c) in set_data]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
94
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
95 for s, (f,t,c) in zip(sets, set_data):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
96 print "%i in %s" % (len(s), c)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
97
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
98 #Now call R library to draw simple Venn diagram
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
99 try:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
100 #Create dummy Venn diagram counts object for three groups
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
101 cols = 'c("%s")' % '","'.join("Set%i" % (i+1) for i in range(n))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
102 rpy.r('groups <- cbind(%s)' % ','.join(['1']*n))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
103 rpy.r('colnames(groups) <- %s' % cols)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
104 rpy.r('vc <- vennCounts(groups)')
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
105 #Populate the 2^n classes with real counts
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
106 #Don't make any assumptions about the class order
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
107 #print rpy.r('vc')
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
108 for index, row in enumerate(rpy.r('vc[,%s]' % cols)):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
109 if isinstance(row, int) or isinstance(row, float):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
110 #Hack for rpy being too clever for single element row
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
111 row = [row]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
112 names = all
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
113 for wanted, s in zip(row, sets):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
114 if wanted:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
115 names = names.intersection(s)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
116 else:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
117 names = names.difference(s)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
118 rpy.r('vc[%i,"Counts"] <- %i' % (index+1, len(names)))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
119 #print rpy.r('vc')
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
120 if n == 1:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
121 #Single circle, don't need to add (Total XXX) line
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
122 names = [c for (t,f,c) in set_data]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
123 else:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
124 names = ["%s\n(Total %i)" % (c, len(s)) for s, (f,t,c) in zip(sets, set_data)]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
125 rpy.r.assign("names", names)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
126 rpy.r.assign("colors", ["red","green","blue"][:n])
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
127 rpy.r.pdf(pdf_file, 8, 8)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
128 rpy.r("""vennDiagram(vc, include="both", names=names,
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
129 main="%s", sub="(Total %i)",
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
130 circle.col=colors)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
131 """ % (all_label, len(all)))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
132 rpy.r.dev_off()
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
133 except Exception, exc:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
134 stop_err( "%s" %str( exc ) )
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
135 rpy.r.quit( save="no" )
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
136 print "Done"