venn_list: tools/venn_list/venn

comparison tools/venn_list/venn_list.py @ 11:679b6323db03 draft

v0.1.0 now using matplotlib_venn instead of limma R package via rpy. Contribution from Frederic Sapet.

author	peterjc
date	Wed, 29 May 2019 04:45:19 -0400
parents	20d347feb882
children

comparison

equal deleted inserted replaced

-:20d347feb882
+:679b6323db03
 #!/usr/bin/env python
-"""Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy)
+"""
+Plot up to 3-way Venn Diagram.
+It uses the Python libraries matplotlib and matplotlib_venn.
 This script is copyright 2010-2017 by Peter Cock, The James Hutton Institute
 (formerly SCRI), UK. All rights reserved.
 See accompanying text file for licence details (MIT License).
 """
 from __future__ import print_function
 import sys
+from shutil import move
 if "-v" in sys.argv or "--version" in sys.argv:
-print("v0.0.11")
+print("v0.1.0")
 sys.exit(0)
 try:
-import rpy
+import matplotlib
 except ImportError:
-sys.exit("Requires the Python library rpy (to call R)")
+sys.exit("Requires the Python library matplotlib")
-except RuntimeError, e:
-sys.exit("The Python library rpy is not availble for the current R version\n\n%s" % e)
-try:
+matplotlib.use("Agg")
-rpy.r.library("limma")
+from matplotlib import pyplot as plt
-except Exception:
-sys.exit("Requires the R library limma (for vennDiagram function)")
 if len(sys.argv) - 1 not in [7, 10, 13]:
-sys.exit("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv) - 1))
+sys.exit(
+"Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i"
+% (len(sys.argv) - 1)
+)
 all_file, all_type, all_label = sys.argv[1:4]
 set_data = []
 if len(sys.argv) - 1 >= 7:
 set_data.append(tuple(sys.argv[4:7]))
 n = len(set_data)
 print("Doing %i-way Venn Diagram" % n)
 def load_ids(filename, filetype):
+"""Load ids from files."""
 if filetype == "tabular":
 for line in open(filename):
 line = line.rstrip("\n")
 if line and not line.startswith("#"):
 yield line.split("\t", 1)[0]
 if line.startswith(">"):
 yield line[1:].rstrip("\n").split(None, 1)[0]
 elif filetype.startswith("fastq"):
 # Use the Galaxy library not Biopython to cope with CS
 from galaxy_utils.sequence.fastq import fastqReader
 handle = open(filename, "rU")
 for record in fastqReader(handle):
 # The [1:] is because the fastaReader leaves the @ on the identifer.
 yield record.identifier.split()[0][1:]
 handle.close()
 try:
 from Bio.SeqIO import index
 except ImportError:
 sys.exit("Require Biopython 1.54 or later (to read SFF files)")
 # This will read the SFF index block if present (very fast)
-for name in index(filename, "sff"):
+for this_name in index(filename, "sff"):
-yield name
+yield this_name
 else:
 sys.exit("Unexpected file type %s" % filetype)
 def load_ids_whitelist(filename, filetype, whitelist):
-for name in load_ids(filename, filetype):
+"""Check if ids are in whitelist."""
-if name in whitelist:
+for single_name in load_ids(filename, filetype):
-yield name
+if single_name in whitelist:
+yield single_name
 else:
-sys.exit("Unexpected ID %s in %s file %s" % (name, filetype, filename))
+sys.exit(
+"Unexpected ID %s in %s file %s" % (single_name, filetype, filename)
+)
 if all_file in ["", "-", '""', '"-"']:
 # Load without white list
 sets = [set(load_ids(f, t)) for (f, t, c) in set_data]
 sets = [set(load_ids_whitelist(f, t, all_ids)) for (f, t, c) in set_data]
 for s, (f, t, c) in zip(sets, set_data):
 print("%i in %s" % (len(s), c))
-# Now call R library to draw simple Venn diagram
+names = [one_name for one_file, one_type, one_name in set_data]
+lengths = [len(one_set) for one_set in sets]
+if len(sets) == 3:
+try:
+from matplotlib_venn import venn3_unweighted
+except ImportError:
+sys.exit("Requires the Python library matplotlib_venn")
+venn3_unweighted(
+sets,
+[
+"{} (Total {})".format(name, length)
+for (name, length) in zip(names, lengths)
+],
+)
+if len(sets) == 2:
+try:
+from matplotlib_venn import venn2_unweighted
+except ImportError:
+sys.exit("Requires the Python library matplotlib_venn")
+venn2_unweighted(
+sets,
+[
+"{} (Total {})".format(name, length)
+for (name, length) in zip(names, lengths)
+],
+)
+# not sure what I am doing here,
+# matplotlib_venn does not want to create a single Venn circle
+# stick to the old behavior (rpy and Limma) as much as possible
+if len(sets) == 1:
+try:
+from matplotlib_venn import venn2
+except ImportError:
+sys.exit("Requires the Python library matplotlib_venn")
+venn2((sets[0], set()), [set_data[0][2], ""])
+plt.title(all_label)
+plt.savefig(pdf_file + ".pdf")
+# Format "dat" is not supported.
 try:
-# Create dummy Venn diagram counts object for three groups
+move(pdf_file + ".pdf", pdf_file)
-cols = 'c("%s")' % '","'.join("Set%i" % (i + 1) for i in range(n))
+except (OSError, IOError) as error:
-rpy.r('groups <- cbind(%s)' % ','.join(['1'] * n))
+sys.exit("Fail to rename file {}".format(str(error)))
-rpy.r('colnames(groups) <- %s' % cols)
-rpy.r('vc <- vennCounts(groups)')
+plt.close()
-# Populate the 2^n classes with real counts
-# Don't make any assumptions about the class order
-# print(rpy.r('vc'))
-for index, row in enumerate(rpy.r('vc[,%s]' % cols)):
-if isinstance(row, int) or isinstance(row, float):
-# Hack for rpy being too clever for single element row
-row = [row]
-names = all_ids
-for wanted, s in zip(row, sets):
-if wanted:
-names = names.intersection(s)
-else:
-names = names.difference(s)
-rpy.r('vc[%i,"Counts"] <- %i' % (index + 1, len(names)))
-# print(rpy.r('vc'))
-if n == 1:
-# Single circle, don't need to add (Total XXX) line
-names = [c for (t, f, c) in set_data]
-else:
-names = ["%s\n(Total %i)" % (c, len(s)) for s, (f, t, c) in zip(sets, set_data)]
-rpy.r.assign("names", names)
-rpy.r.assign("colors", ["red", "green", "blue"][:n])
-rpy.r.pdf(pdf_file, 8, 8)
-rpy.r("""vennDiagram(vc, include="both", names=names,
-main="%s", sub="(Total %i)",
-circle.col=colors)
-""" % (all_label, len(all_ids)))
-rpy.r.dev_off()
-except Exception, exc:
-sys.exit("%s" % str(exc))
-rpy.r.quit(save="no")
 print("Done")

Mercurial > repos > peterjc > venn_list

comparison tools/venn_list/venn_list.py @ 11:679b6323db03 draft