annotate tools/plotting/venn_list.py @ 0:baf7031d470e

Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
author peterjc
date Tue, 07 Jun 2011 18:08:11 -0400
parents
children 6aae6bc0802d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
1 #!/usr/bin/env python
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
2 """Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
3
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
4 This script is copyright 2010 by Peter Cock, The James Hutton Institute
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
5 (formerly SCRI), UK. All rights reserved.
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
6 See accompanying text file for licence details (MIT/BSD style).
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
7
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
8 This is version 0.0.3 of the script.
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
9 """
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
10
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
11
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
12 import sys
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
13 import rpy
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
14
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
15 def stop_err(msg, error_level=1):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
16 """Print error message to stdout and quit with given error level."""
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
17 sys.stderr.write("%s\n" % msg)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
18 sys.exit(error_level)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
19
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
20 try:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
21 import rpy
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
22 except ImportError:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
23 stop_err("Requires the Python library rpy (to call R)")
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
24
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
25 try:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
26 rpy.r.library("limma")
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
27 except:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
28 stop_err("Requires the R library limma (for vennDiagram function)")
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
29
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
30
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
31 if len(sys.argv)-1 not in [7, 10, 13]:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
32 stop_err("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv)-1))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
33
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
34 all_file, all_type, all_label = sys.argv[1:4]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
35 set_data = []
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
36 if len(sys.argv)-1 >= 7:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
37 set_data.append(tuple(sys.argv[4:7]))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
38 if len(sys.argv)-1 >= 10:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
39 set_data.append(tuple(sys.argv[7:10]))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
40 if len(sys.argv)-1 >= 13:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
41 set_data.append(tuple(sys.argv[10:13]))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
42 pdf_file = sys.argv[-1]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
43 n = len(set_data)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
44 print "Doing %i-way Venn Diagram" % n
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
45
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
46 def load_ids(filename, filetype):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
47 if filetype=="tabular":
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
48 for line in open(filename):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
49 if not line.startswith("#"):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
50 yield line.rstrip("\n").split("\t",1)[0]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
51 elif filetype=="fasta":
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
52 for line in open(filename):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
53 if line.startswith(">"):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
54 yield line[1:].rstrip("\n").split(None,1)[0]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
55 elif filetype.startswith("fastq"):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
56 #Use the Galaxy library not Biopython to cope with CS
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
57 from galaxy_utils.sequence.fastq import fastqReader
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
58 handle = open(filename, "rU")
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
59 for record in fastqReader(handle):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
60 #The [1:] is because the fastaReader leaves the @ on the identifer.
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
61 yield record.identifier.split()[0][1:]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
62 handle.close()
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
63 elif filetype=="sff":
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
64 try:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
65 from Bio.SeqIO import index
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
66 except ImportError:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
67 stop_err("Require Biopython 1.54 or later (to read SFF files)")
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
68 #This will read the SFF index block if present (very fast)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
69 for name in index(filename, "sff"):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
70 yield name
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
71 else:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
72 stop_err("Unexpected file type %s" % filetype)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
73
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
74 def load_ids_whitelist(filename, filetype, whitelist):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
75 for name in load_ids(filename, filetype):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
76 if name in whitelist:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
77 yield name
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
78 else:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
79 stop_err("Unexpected ID %s in %s file %s" % (name, filetype, filename))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
80
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
81 if all_file in ["", "-", '""', '"-"']:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
82 #Load without white list
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
83 sets = [set(load_ids(f,t)) for (f,t,c) in set_data]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
84 #Take union
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
85 all = set()
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
86 for s in sets:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
87 all.update(s)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
88 print "Inferred total of %i IDs" % len(all)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
89 else:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
90 all = set(load_ids(all_file, all_type))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
91 print "Total of %i IDs" % len(all)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
92 sets = [set(load_ids_whitelist(f,t,all)) for (f,t,c) in set_data]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
93
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
94 for s, (f,t,c) in zip(sets, set_data):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
95 print "%i in %s" % (len(s), c)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
96
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
97 #Now call R library to draw simple Venn diagram
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
98 try:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
99 #Create dummy Venn diagram counts object for three groups
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
100 cols = 'c("%s")' % '","'.join("Set%i" % (i+1) for i in range(n))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
101 rpy.r('groups <- cbind(%s)' % ','.join(['1']*n))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
102 rpy.r('colnames(groups) <- %s' % cols)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
103 rpy.r('vc <- vennCounts(groups)')
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
104 #Populate the 2^n classes with real counts
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
105 #Don't make any assumptions about the class order
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
106 #print rpy.r('vc')
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
107 for index, row in enumerate(rpy.r('vc[,%s]' % cols)):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
108 if isinstance(row, int) or isinstance(row, float):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
109 #Hack for rpy being too clever for single element row
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
110 row = [row]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
111 names = all
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
112 for wanted, s in zip(row, sets):
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
113 if wanted:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
114 names = names.intersection(s)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
115 else:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
116 names = names.difference(s)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
117 rpy.r('vc[%i,"Counts"] <- %i' % (index+1, len(names)))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
118 #print rpy.r('vc')
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
119 if n == 1:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
120 #Single circle, don't need to add (Total XXX) line
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
121 names = [c for (t,f,c) in set_data]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
122 else:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
123 names = ["%s\n(Total %i)" % (c, len(s)) for s, (f,t,c) in zip(sets, set_data)]
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
124 rpy.r.assign("names", names)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
125 rpy.r.assign("colors", ["red","green","blue"][:n])
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
126 rpy.r.pdf(pdf_file, 8, 8)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
127 rpy.r("""vennDiagram(vc, include="both", names=names,
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
128 main="%s", sub="(Total %i)",
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
129 circle.col=colors)
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
130 """ % (all_label, len(all)))
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
131 rpy.r.dev_off()
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
132 except Exception, exc:
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
133 stop_err( "%s" %str( exc ) )
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
134 rpy.r.quit( save="no" )
baf7031d470e Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
135 print "Done"