diff tools/blastxml_to_top_descr/blastxml_to_top_descr.xml @ 11:98f8431dab44 draft

Uploaded v0.1.0, now also handles extended tabular BLAST output.
author peterjc
date Fri, 13 Jun 2014 07:07:35 -0400
parents
children fe1ed74793c9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blastxml_to_top_descr/blastxml_to_top_descr.xml	Fri Jun 13 07:07:35 2014 -0400
@@ -0,0 +1,111 @@
+<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.1.0">
+    <description>Make a table from BLAST output</description>
+    <version_command interpreter="python">blastxml_to_top_descr.py --version</version_command>
+    <command interpreter="python">
+blastxml_to_top_descr.py
+-f "$input.in_format"
+#if $input.in_format == "tabular":
+    --qseqid $input.qseqid
+    --sseqid $input.sseqid
+    --salltitles $input.salltitles
+#end if
+-o "${tabular_file}"
+-t ${topN}
+"${in_file}"
+    </command>
+    <stdio>
+        <!-- Assume anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <inputs>
+        <conditional name="input">
+            <param name="in_format" type="select" label="Input format">
+                <option value="blastxml" select="True">BLAST XML</option>
+                <option value="tabular">Tabular</option>
+            </param>
+            <when value="blastxml">
+                <param name="in_file" type="data" format="blastxml" label="BLAST results as XML"/>
+            </when>
+            <when value="tabular">
+                <param name="in_file" type="data" format="tabular" label="BLAST results as tabular"/>
+                <param name="qseqid" type="data_column" data_ref="in_file"
+		       multiple="False" numerical="False" default_value="1" value="1"
+		       label="Column containing query ID (qseqid)"
+		       help="This is column 1 in standard BLAST tabular output" />
+		<param name="sseqid" type="data_column" data_ref="in_file"
+		       multiple="False" numerical="False" default_value="2" value="2"
+		       label="Column containing match ID (sseqid)"
+		       help="This is column 2 in standard BLAST tabular output"/>
+                <param name="salltitles" type="data_column" data_ref="in_file"
+		       multiple="False" numerical="False" default_value="25" value="25"
+                       label="Column containing containing descriptions (salltitles)"
+		       help="This is column 25 in the default extended BLAST tabular output"/>
+            </when>
+        </conditional>
+        <param name="topN" type="integer" min="1" max="100" optional="false" label="Number of descriptions" value="3"/>
+    </inputs>
+    <outputs>
+        <data name="tabular_file" format="tabular" label="Top $topN descriptions from $input.in_file.name" />
+    </outputs>
+    <requirements>
+    </requirements>
+    <tests>
+        <test>
+            <param name="in_format" value="blastxml" />
+            <param name="in_file" value="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
+            <param name="topN" value="3" />
+            <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_top3.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="in_format" value="tabular" />
+            <param name="in_file" value="blastp_four_human_vs_rhodopsin_converted_ext.tabular" ftype="tabular" />
+            <param name="topN" value="3" />
+            <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_top3_positive.tabular" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+NCBI BLAST+ (and the older NCBI 'legacy' BLAST) can output in a range of
+formats including text, tabular and a more detailed XML format. You can
+do a lot of things with tabular files in Galaxy (sorting, filtering, joins,
+etc), however until BLAST+ 2.2.28 the tabular output never included the
+hit descriptions (titles) found in the other output formats.
+
+This tool turns a BLAST XML file into a simple tabular file containing
+one row per query sequence, containing the query identifier and then
+the three (by default) top hit descriptions (i.e. the first three). If
+a query doesn't have that many hits, then these entries are left blank.
+
+This tool can also be used with the tabular output from BLAST+ instead,
+provided the relevant columns are provided. The default settings will
+work with the default 25 column extended output from the BLAST+ tools
+wrapped in Galaxy. Note if a query has *no* hits, it does not appear in
+the BLAST tabular output.
+
+**Example Usage**
+
+One simple usage would be to take a transcriptome assembly or set of
+gene predictions, run a BLAST search against the NCBI NR database, and
+then use this tool to make a table of the top three BLAST hits. This
+can give you a 'quick and dirty' crude annotation, potentially enough
+to spot some problems (e.g. bacterial contaimination could be very
+obvious).
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blastxml_to_top_descr
+
+    </help>
+</tool>