# HG changeset patch
# User peterjc
# Date 1559119519 14400
# Node ID 679b6323db0303d280e296ec78830511dc6a1311
# Parent 20d347feb8828df349dcb0934007f393ea7a3046
v0.1.0 now using matplotlib_venn instead of limma R package via rpy. Contribution from Frederic Sapet.
diff -r 20d347feb882 -r 679b6323db03 tools/venn_list/README.rst
--- a/tools/venn_list/README.rst Thu May 11 07:02:57 2017 -0400
+++ b/tools/venn_list/README.rst Wed May 29 04:45:19 2019 -0400
@@ -33,8 +33,8 @@
The suggested location is in the Galaxy folder ``tools/plotting`` next to other
graph drawing tools, or a dedicated ``tools/venn_list`` directory.
-You will also need to install Biopython 1.54 or later, and the R/Bioconductor
-package limma. You should already have rpy installed for other Galaxy tools.
+You will also need to install Biopython 1.54 or later, matplotlib, and
+matplotlib_venn - all available via PyPI or Conda,
You will also need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the
tool. The suggested location is in the "Graph/Display Data" section. Simply add
@@ -79,6 +79,10 @@
- Single quote command line arguments (internal change only)
- Python 3 compatible print function.
- Capture python script version when run from Galaxy.
+v0.0.12 - Internal Python style change.
+ - Python 3 compatible exception handling.
+v0.1.0 - Now uses Python library matplotlib_venn to draw the diagram
+ instead of R's limma via rpy). Contribution from Frederic Sapet.
======= ======================================================================
@@ -107,7 +111,7 @@
$ planemo shed_upload --tar_only tools/venn_list/
...
- $ tar -tzf shed_upload.tar.gz
+ $ tar -tzf shed_upload.tar.gz
tools/venn_list/README.rst
tools/venn_list/tool_dependencies.xml
tools/venn_list/venn_list.py
diff -r 20d347feb882 -r 679b6323db03 tools/venn_list/tool_dependencies.xml
--- a/tools/venn_list/tool_dependencies.xml Thu May 11 07:02:57 2017 -0400
+++ b/tools/venn_list/tool_dependencies.xml Wed May 29 04:45:19 2019 -0400
@@ -1,15 +1,15 @@
-
+
-
+
-
+
-
+
-
+
-
+
\ No newline at end of file
diff -r 20d347feb882 -r 679b6323db03 tools/venn_list/venn_list.py
--- a/tools/venn_list/venn_list.py Thu May 11 07:02:57 2017 -0400
+++ b/tools/venn_list/venn_list.py Wed May 29 04:45:19 2019 -0400
@@ -1,5 +1,9 @@
#!/usr/bin/env python
-"""Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy)
+
+"""
+Plot up to 3-way Venn Diagram.
+
+It uses the Python libraries matplotlib and matplotlib_venn.
This script is copyright 2010-2017 by Peter Cock, The James Hutton Institute
(formerly SCRI), UK. All rights reserved.
@@ -10,26 +14,25 @@
from __future__ import print_function
import sys
+from shutil import move
if "-v" in sys.argv or "--version" in sys.argv:
- print("v0.0.11")
+ print("v0.1.0")
sys.exit(0)
try:
- import rpy
+ import matplotlib
except ImportError:
- sys.exit("Requires the Python library rpy (to call R)")
-except RuntimeError, e:
- sys.exit("The Python library rpy is not availble for the current R version\n\n%s" % e)
+ sys.exit("Requires the Python library matplotlib")
-try:
- rpy.r.library("limma")
-except Exception:
- sys.exit("Requires the R library limma (for vennDiagram function)")
-
+matplotlib.use("Agg")
+from matplotlib import pyplot as plt
if len(sys.argv) - 1 not in [7, 10, 13]:
- sys.exit("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv) - 1))
+ sys.exit(
+ "Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i"
+ % (len(sys.argv) - 1)
+ )
all_file, all_type, all_label = sys.argv[1:4]
set_data = []
@@ -45,6 +48,7 @@
def load_ids(filename, filetype):
+ """Load ids from files."""
if filetype == "tabular":
for line in open(filename):
line = line.rstrip("\n")
@@ -57,6 +61,7 @@
elif filetype.startswith("fastq"):
# Use the Galaxy library not Biopython to cope with CS
from galaxy_utils.sequence.fastq import fastqReader
+
handle = open(filename, "rU")
for record in fastqReader(handle):
# The [1:] is because the fastaReader leaves the @ on the identifer.
@@ -68,18 +73,21 @@
except ImportError:
sys.exit("Require Biopython 1.54 or later (to read SFF files)")
# This will read the SFF index block if present (very fast)
- for name in index(filename, "sff"):
- yield name
+ for this_name in index(filename, "sff"):
+ yield this_name
else:
sys.exit("Unexpected file type %s" % filetype)
def load_ids_whitelist(filename, filetype, whitelist):
- for name in load_ids(filename, filetype):
- if name in whitelist:
- yield name
+ """Check if ids are in whitelist."""
+ for single_name in load_ids(filename, filetype):
+ if single_name in whitelist:
+ yield single_name
else:
- sys.exit("Unexpected ID %s in %s file %s" % (name, filetype, filename))
+ sys.exit(
+ "Unexpected ID %s in %s file %s" % (single_name, filetype, filename)
+ )
if all_file in ["", "-", '""', '"-"']:
@@ -98,42 +106,54 @@
for s, (f, t, c) in zip(sets, set_data):
print("%i in %s" % (len(s), c))
-# Now call R library to draw simple Venn diagram
+names = [one_name for one_file, one_type, one_name in set_data]
+lengths = [len(one_set) for one_set in sets]
+
+if len(sets) == 3:
+ try:
+ from matplotlib_venn import venn3_unweighted
+ except ImportError:
+ sys.exit("Requires the Python library matplotlib_venn")
+ venn3_unweighted(
+ sets,
+ [
+ "{} (Total {})".format(name, length)
+ for (name, length) in zip(names, lengths)
+ ],
+ )
+
+if len(sets) == 2:
+ try:
+ from matplotlib_venn import venn2_unweighted
+ except ImportError:
+ sys.exit("Requires the Python library matplotlib_venn")
+ venn2_unweighted(
+ sets,
+ [
+ "{} (Total {})".format(name, length)
+ for (name, length) in zip(names, lengths)
+ ],
+ )
+
+# not sure what I am doing here,
+# matplotlib_venn does not want to create a single Venn circle
+# stick to the old behavior (rpy and Limma) as much as possible
+if len(sets) == 1:
+ try:
+ from matplotlib_venn import venn2
+ except ImportError:
+ sys.exit("Requires the Python library matplotlib_venn")
+ venn2((sets[0], set()), [set_data[0][2], ""])
+
+plt.title(all_label)
+plt.savefig(pdf_file + ".pdf")
+
+# Format "dat" is not supported.
try:
- # Create dummy Venn diagram counts object for three groups
- cols = 'c("%s")' % '","'.join("Set%i" % (i + 1) for i in range(n))
- rpy.r('groups <- cbind(%s)' % ','.join(['1'] * n))
- rpy.r('colnames(groups) <- %s' % cols)
- rpy.r('vc <- vennCounts(groups)')
- # Populate the 2^n classes with real counts
- # Don't make any assumptions about the class order
- # print(rpy.r('vc'))
- for index, row in enumerate(rpy.r('vc[,%s]' % cols)):
- if isinstance(row, int) or isinstance(row, float):
- # Hack for rpy being too clever for single element row
- row = [row]
- names = all_ids
- for wanted, s in zip(row, sets):
- if wanted:
- names = names.intersection(s)
- else:
- names = names.difference(s)
- rpy.r('vc[%i,"Counts"] <- %i' % (index + 1, len(names)))
- # print(rpy.r('vc'))
- if n == 1:
- # Single circle, don't need to add (Total XXX) line
- names = [c for (t, f, c) in set_data]
- else:
- names = ["%s\n(Total %i)" % (c, len(s)) for s, (f, t, c) in zip(sets, set_data)]
- rpy.r.assign("names", names)
- rpy.r.assign("colors", ["red", "green", "blue"][:n])
- rpy.r.pdf(pdf_file, 8, 8)
- rpy.r("""vennDiagram(vc, include="both", names=names,
- main="%s", sub="(Total %i)",
- circle.col=colors)
- """ % (all_label, len(all_ids)))
- rpy.r.dev_off()
-except Exception, exc:
- sys.exit("%s" % str(exc))
-rpy.r.quit(save="no")
+ move(pdf_file + ".pdf", pdf_file)
+except (OSError, IOError) as error:
+ sys.exit("Fail to rename file {}".format(str(error)))
+
+plt.close()
+
print("Done")
diff -r 20d347feb882 -r 679b6323db03 tools/venn_list/venn_list.xml
--- a/tools/venn_list/venn_list.xml Thu May 11 07:02:57 2017 -0400
+++ b/tools/venn_list/venn_list.xml Wed May 29 04:45:19 2019 -0400
@@ -1,10 +1,10 @@
-
+
from lists
- galaxy_sequence_utils
- rpy
- limma
+ galaxy_sequence_utils
biopython
+ matplotlib
+ matplotlib-venn
python $__tool_directory__/venn_list.py --version
@@ -126,10 +126,9 @@
**Citations**
-The Venn Diagrams are drawn using Gordon Smyth's limma package from
-R/Bioconductor, http://www.bioconductor.org/
+The Venn Diagrams are drawn using Konstantin Tretyakov's matplotlib-venn
+package, https://pypi.org/project/matplotlib-venn/
-The R library is called from Python via rpy, http://rpy.sourceforge.net/
If you use this Galaxy tool in work leading to a scientific publication please
cite:
@@ -137,14 +136,14 @@
Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
Galaxy tools and workflows for sequence analysis with applications
in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
+https://doi.org/10.7717/peerj.167
This tool uses Biopython to read and write SFF files, so you may also wish to
cite the Biopython application note (and Galaxy too of course):
Cock et al 2009. Biopython: freely available Python tools for computational
molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
-http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878.