Mercurial > repos > peterjc > blastxml_to_top_descr
diff tools/blastxml_to_top_descr/blastxml_to_top_descr.xml @ 11:98f8431dab44 draft
Uploaded v0.1.0, now also handles extended tabular BLAST output.
author | peterjc |
---|---|
date | Fri, 13 Jun 2014 07:07:35 -0400 |
parents | |
children | fe1ed74793c9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blastxml_to_top_descr/blastxml_to_top_descr.xml Fri Jun 13 07:07:35 2014 -0400 @@ -0,0 +1,111 @@ +<tool id="blastxml_to_top_descr" name="BLAST top hit descriptions" version="0.1.0"> + <description>Make a table from BLAST output</description> + <version_command interpreter="python">blastxml_to_top_descr.py --version</version_command> + <command interpreter="python"> +blastxml_to_top_descr.py +-f "$input.in_format" +#if $input.in_format == "tabular": + --qseqid $input.qseqid + --sseqid $input.sseqid + --salltitles $input.salltitles +#end if +-o "${tabular_file}" +-t ${topN} +"${in_file}" + </command> + <stdio> + <!-- Assume anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + <inputs> + <conditional name="input"> + <param name="in_format" type="select" label="Input format"> + <option value="blastxml" select="True">BLAST XML</option> + <option value="tabular">Tabular</option> + </param> + <when value="blastxml"> + <param name="in_file" type="data" format="blastxml" label="BLAST results as XML"/> + </when> + <when value="tabular"> + <param name="in_file" type="data" format="tabular" label="BLAST results as tabular"/> + <param name="qseqid" type="data_column" data_ref="in_file" + multiple="False" numerical="False" default_value="1" value="1" + label="Column containing query ID (qseqid)" + help="This is column 1 in standard BLAST tabular output" /> + <param name="sseqid" type="data_column" data_ref="in_file" + multiple="False" numerical="False" default_value="2" value="2" + label="Column containing match ID (sseqid)" + help="This is column 2 in standard BLAST tabular output"/> + <param name="salltitles" type="data_column" data_ref="in_file" + multiple="False" numerical="False" default_value="25" value="25" + label="Column containing containing descriptions (salltitles)" + help="This is column 25 in the default extended BLAST tabular output"/> + </when> + </conditional> + <param name="topN" type="integer" min="1" max="100" optional="false" label="Number of descriptions" value="3"/> + </inputs> + <outputs> + <data name="tabular_file" format="tabular" label="Top $topN descriptions from $input.in_file.name" /> + </outputs> + <requirements> + </requirements> + <tests> + <test> + <param name="in_format" value="blastxml" /> + <param name="in_file" value="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" /> + <param name="topN" value="3" /> + <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_top3.tabular" ftype="tabular" /> + </test> + <test> + <param name="in_format" value="tabular" /> + <param name="in_file" value="blastp_four_human_vs_rhodopsin_converted_ext.tabular" ftype="tabular" /> + <param name="topN" value="3" /> + <output name="tabular_file" file="blastp_four_human_vs_rhodopsin_top3_positive.tabular" ftype="tabular" /> + </test> + </tests> + <help> + +**What it does** + +NCBI BLAST+ (and the older NCBI 'legacy' BLAST) can output in a range of +formats including text, tabular and a more detailed XML format. You can +do a lot of things with tabular files in Galaxy (sorting, filtering, joins, +etc), however until BLAST+ 2.2.28 the tabular output never included the +hit descriptions (titles) found in the other output formats. + +This tool turns a BLAST XML file into a simple tabular file containing +one row per query sequence, containing the query identifier and then +the three (by default) top hit descriptions (i.e. the first three). If +a query doesn't have that many hits, then these entries are left blank. + +This tool can also be used with the tabular output from BLAST+ instead, +provided the relevant columns are provided. The default settings will +work with the default 25 column extended output from the BLAST+ tools +wrapped in Galaxy. Note if a query has *no* hits, it does not appear in +the BLAST tabular output. + +**Example Usage** + +One simple usage would be to take a transcriptome assembly or set of +gene predictions, run a BLAST search against the NCBI NR database, and +then use this tool to make a table of the top three BLAST hits. This +can give you a 'quick and dirty' crude annotation, potentially enough +to spot some problems (e.g. bacterial contaimination could be very +obvious). + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blastxml_to_top_descr + + </help> +</tool>