changeset 0:cd52c931b325

Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
author peterjc
date Tue, 07 Jun 2011 16:28:31 -0400
parents
children 0f159cf346c8
files tools/ncbi_blast_plus/blast2go.py tools/ncbi_blast_plus/blast2go.txt tools/ncbi_blast_plus/blast2go.xml
diffstat 3 files changed, 273 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/blast2go.py	Tue Jun 07 16:28:31 2011 -0400
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+"""Galaxy wrapper for Blast2GO for pipelines, b2g4pipe v2.3.5.
+
+This script takes exactly three command line arguments:
+ * Input BLAST XML filename
+ * Blast2GO properties filename (settings file)
+ * Output tabular filename
+
+It then calls the Java command line tool, and moves the output file to
+the location Galaxy is expecting.
+"""
+import sys
+import os
+import subprocess
+
+#You may need to edit this to match your local setup,
+blast2go_jar = "/opt/b2g4pipe/blast2go.jar"
+
+
+def stop_err(msg, error_level=1):
+   """Print error message to stdout and quit with given error level."""
+   sys.stderr.write("%s\n" % msg)
+   sys.exit(error_level)
+
+if len(sys.argv) != 4:
+   stop_err("Require three arguments: XML filename, properties filename, output tabular filename")
+
+xml_file, prop_file, tabular_file = sys.argv[1:]
+
+if not os.path.isfile(xml_file):
+   stop_err("Input BLAST XML file not found: %s" % xml_file)
+
+if not os.path.isfile(prop_file):
+   stop_err("Blast2GO configuration file not found: %s" % prop_file)
+
+def run(cmd):
+    #Avoid using shell=True when we call subprocess to ensure if the Python
+    #script is killed, so too is the child process.
+    try:
+        child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    except Exception, err:
+        stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
+    #Use .communicate as can get deadlocks with .wait(),
+    stdout, stderr = child.communicate()
+    return_code = child.returncode
+    if return_code:
+        if stderr and stdout:
+            stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr))
+        else:
+            stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr))
+    #For early diagnostics,
+    else:
+       print stdout
+       print stderr
+
+if not os.path.isfile(blast2go_jar):
+   stop_err("Blast2GO JAR file not found: %s" % blast2go_jar)
+
+#We will have write access whereever the output should be,
+#so we'll ask Blast2GO to use that as the stem for its output
+#(it will append .annot to the filename)
+cmd = ["java", "-jar", blast2go_jar,
+       "-in", xml_file,
+       "-prop", prop_file,
+       "-out", tabular_file,
+       "-a"]
+run(cmd)
+
+out_file = tabular_file + ".annot"
+if not os.path.isfile(out_file):
+   stop_err("ERROR - No output annotation file from Blast2GO")
+
+#Move the output file where Galaxy expects it to be:
+os.rename(out_file, tabular_file)
+
+print "Done"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/blast2go.txt	Tue Jun 07 16:28:31 2011 -0400
@@ -0,0 +1,122 @@
+Galaxy wrapper for Blast2GO for pipelines, b2g4pipe
+===================================================
+
+This wrapper is copyright 2011 by Peter Cock, The James Hutton Institute
+(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
+See the licence text below.
+
+This is a wrapper for the command line Java tool b2g4pipe v2.3.5,
+Blast2GO for pipelines. See:
+
+S. Götz et al.
+High-throughput functional annotation and data mining with the Blast2GO suite.
+Nucleic Acids Res. 36(10):3420–3435, 2008.
+http://dx.doi.org/10.1093/nar/gkn176
+
+A. Conesa and S. Götz.
+Blast2GO: A Comprehensive Suite for Functional Analysis in Plant Genomics.
+Int. J. Plant Genomics. 619832, 2008.
+http://dx.doi.org/10.1155/2008/619832
+
+A. Conesa et al.
+Blast2GO: A universal tool for annotation, visualization and analysis in functional genomics research.
+Bioinformatics 21:3674-3676, 2005.
+http://dx.doi.org/10.1093/bioinformatics/bti610
+
+http://www.blast2go.org/
+
+
+
+Installation
+============
+
+You can change the path by editing the definition near the start of the Python
+script blast2go.py, but by default it expects the underlying tool to be here:
+
+/opt/b2g4pip/blast2go.jar
+
+To install the wrapper copy or move the following files under the Galaxy tools
+folder, e.g. in the tools/ncbi_blast_blast folder:
+
+* blast2go.xml (the Galaxy tool definition)
+* blast2go.py (the Python wrapper script)
+* blast2go.txt (this README file)
+
+You will also need to modify the tools_conf.xml file to tell Galaxy to offer the
+tool. We suggest putting it next to the NCBI BLAST+ wrappers. Just add the line:
+
+<tool file="ncbi_blast_plus/blast2go.xml" />
+
+As part of setting up b2g4pipe you will need to setup one or more Blast2GO
+property files which tell the tool which database to use etc. The example
+b2gPipe.properties provided with b2g4pipe v2.3.5 is out of date, with the
+latest server IP address and database name given on the Blast2GO website.
+These files can be anywhere accessable to the Galaxy Unix user, we put them
+under /opt/b2g4pipe with the JAR file etc.
+
+You must tell Galaxy about these Blast2GO property files so that they can be
+offered to the user. Create the file tool-data/blast2go.loc under the Galaxy
+folder. This must be plain text, tab separated, with three columns:
+
+(1) ID for the setup, e.g. Spain_2010_May
+(2) Description for the setup, e.g. Database in Spain (May 2010)
+(3) Properties filename for the setup, e.g. /opt/b2g4pipe/Spain_2010_May.properties
+
+Avoid including "Blast2GO" in the description (column 2) as this will be
+included in the automatically assigned output dataset name. The blast2go.loc
+file allows you to customise the database setup. If for example you have a local
+Blast2GO server running (which we recommend for speed), and you want this to be
+the default setting, include it as the first line in your blast2go.loc file.
+
+Consult the Blast2GO documentation for details about the property files and
+setting up a local MySQL Blast2GO database.
+
+
+History
+=======
+
+v0.0.1 - Initial public release
+
+
+Developers
+==========
+
+This script and related tools are being developed on the following hg branch:
+http://bitbucket.org/peterjc/galaxy-central/src/tools
+
+For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball I use
+the following command from the Galaxy root folder:
+
+$ tar -czf blast2go.tar.gz tools/ncbi_blast_plus/blast2go.xml tools/ncbi_blast_plus/blast2go.py tools/ncbi_blast_plus/blast2go.txt
+
+Check this worked:
+
+$ tar -tzf blast2go.tar.gz
+tools/ncbi_blast_plus/blast2go.xml
+tools/ncbi_blast_plus/blast2go.py
+tools/ncbi_blast_plus/blast2go.txt
+
+
+Licence (MIT/BSD style)
+=======================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
+NOTE: This is the licence for the Galaxy Wrapper only. Blast2GO and
+associated data files are available and licenced separately.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/blast2go.xml	Tue Jun 07 16:28:31 2011 -0400
@@ -0,0 +1,75 @@
+<tool id="blast2go" name="Blast2GO" version="0.0.1">
+    <description>Maps BLAST results to GO annotation terms</description>
+    <command interpreter="python">
+        blast2go.py $xml ${prop.fields.path} $tab
+    </command>
+    <inputs>
+        <param name="xml" type="data" format="blastxml" label="BLAST XML results" description="You must have run BLAST against a protein database such as the NCBI non-redundant (NR) database. Use BLASTX for nucleotide queries, BLASTP for protein queries." /> 
+        <param name="prop" type="select" label="Blast2GO settings" description="One or more configurations can be setup, such as using the Blast2GO team's server in Spain, or a local database.">
+             <options from_file="blast2go.loc">
+                 <column name="value" index="0"/>
+                 <column name="name" index="1"/>
+                 <column name="path" index="2"/>
+             </options>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="tab" format="tabular" label="Blast2GO ${prop.fields.name}" />
+    </outputs>
+    <requirements>
+    </requirements>
+    <tests>
+    </tests>
+    <help>
+.. class:: warningmark
+
+**Note**. Blast2GO may take a substantial amount of time, especially if
+running against the public server in Spain. For large input datasets it
+is advisable to allow overnight processing, or consider subdividing.
+
+-----
+
+**What it does**
+
+This runs b2g4Pipe, the command line (no GUI) version of Blast2GO designed
+for use in pipelines.
+
+It takes as input BLAST XML results against a protein database, typically
+the NCBI non-redundant (NR) database. The BLAST matches are used to assign
+Gene Ontology (GO) annotation terms to each query sequence.
+
+The output from this tool is a tabular file containing three columns, with
+the order taken from query order in the original BLAST XML file:
+
+====== ====================================
+Column Description
+------ ------------------------------------
+     1 ID and description of query sequence
+     2 GO term
+     3 GO description
+====== ====================================
+
+Note that if no GO terms are assigned to a sequence (e.g. if it had no
+BLAST matches), then it will not be present in the output file.
+
+**References**
+
+S. Götz et al.
+High-throughput functional annotation and data mining with the Blast2GO suite.
+Nucleic Acids Res. 36(10):3420–3435, 2008.
+http://dx.doi.org/10.1093/nar/gkn176
+
+A. Conesa and S. Götz.
+Blast2GO: A Comprehensive Suite for Functional Analysis in Plant Genomics.
+Int. J. Plant Genomics. 619832, 2008.
+http://dx.doi.org/10.1155/2008/619832
+
+A. Conesa et al.
+Blast2GO: A universal tool for annotation, visualization and analysis in functional genomics research.
+Bioinformatics 21:3674-3676, 2005.
+http://dx.doi.org/10.1093/bioinformatics/bti610
+
+http://www.blast2go.org/
+
+    </help>
+</tool>