diff tools/ncbi_blast_plus/ncbi_macros.xml @ 13:623f727cdff1 draft

Uploaded v0.1.00, uses BLAST+ 2.2.29, allows custom column selection for tabular output - including taxonomy fields.
author peterjc
date Fri, 14 Mar 2014 07:40:46 -0400
parents 4c4a0da938ff
children 2fe07f50a41e
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/ncbi_macros.xml	Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml	Fri Mar 14 07:40:46 2014 -0400
@@ -1,30 +1,101 @@
 <macros>
     <xml name="output_change_format">
         <change_format>
-            <when input="out_format" value="0" format="txt"/>
-            <when input="out_format" value="0 -html" format="html"/>
-            <when input="out_format" value="2" format="txt"/>
-            <when input="out_format" value="2 -html" format="html"/>
-            <when input="out_format" value="4" format="txt"/>
-            <when input="out_format" value="4 -html" format="html"/>
-            <when input="out_format" value="5" format="blastxml"/>
+            <when input="output.out_format" value="0" format="txt"/>
+            <when input="output.out_format" value="0 -html" format="html"/>
+            <when input="output.out_format" value="2" format="txt"/>
+            <when input="output.out_format" value="2 -html" format="html"/>
+            <when input="output.out_format" value="4" format="txt"/>
+            <when input="output.out_format" value="4 -html" format="html"/>
+            <when input="output.out_format" value="5" format="blastxml"/>
         </change_format>
     </xml>
     <xml name="input_out_format">
-        <param name="out_format" type="select" label="Output format">
-            <option value="6">Tabular (standard 12 columns)</option>
-            <option value="ext" selected="True">Tabular (extended 25 columns)</option>
-            <option value="5">BLAST XML</option>
-            <option value="0">Pairwise text</option>
-            <option value="0 -html">Pairwise HTML</option>
-            <option value="2">Query-anchored text</option>
-            <option value="2 -html">Query-anchored HTML</option>
-            <option value="4">Flat query-anchored text</option>
-            <option value="4 -html">Flat query-anchored HTML</option>
-            <!--
-            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
-            -->
-        </param>
+        <conditional name="output">
+            <param name="out_format" type="select" label="Output format">
+                <option value="6">Tabular (standard 12 columns)</option>
+                <option value="ext" selected="True">Tabular (extended 25 columns)</option>
+                <option value="cols">Tabular (select which columns)</option>
+                <option value="5">BLAST XML</option>
+                <option value="0">Pairwise text</option>
+                <option value="0 -html">Pairwise HTML</option>
+                <option value="2">Query-anchored text</option>
+                <option value="2 -html">Query-anchored HTML</option>
+                <option value="4">Flat query-anchored text</option>
+                <option value="4 -html">Flat query-anchored HTML</option>
+                <!--
+                <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+                -->
+            </param>
+            <when value="6"/>
+            <when value="ext"/>
+            <when value="cols">
+                <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns">
+                    <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option>
+                    <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option>
+                    <option selected="true" value="pident">pident = Percentage of identical matches</option>
+                    <option selected="true" value="length">length = Alignment length</option>
+                    <option selected="true" value="mismatch">mismatch = Number of mismatches</option>
+                    <option selected="true" value="gapopen">gapopen = Number of gap openings</option>
+                    <option selected="true" value="qstart">qstart = Start of alignment in query</option>
+                    <option selected="true" value="qend">qend = End of alignment in query</option>
+                    <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option>
+                    <option selected="true" value="send">send = End of alignment in subject (database hit)</option>
+                    <option selected="true" value="evalue">evalue = Expectation value (E-value)</option>
+                    <option selected="true" value="bitscore">bitscore = Bit score</option>
+                </param>
+                <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns">
+                    <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option>
+                    <option value="score">score = Raw score</option>
+                    <option value="nident">nident = Number of identical matches</option>
+                    <option value="positive">positive = Number of positive-scoring matches</option>
+                    <option value="gaps">gaps = Total number of gaps</option>
+                    <option value="ppos">ppos = Percentage of positive-scoring matches</option>
+                    <option value="qframe">qframe = Query frame</option>
+                    <option value="sframe">sframe = Subject frame</option>
+                    <option value="qseq">qseq = Aligned part of query sequence</option>
+                    <option value="sseq">sseq = Aligned part of subject sequence</option>
+                    <option value="qlen">qlen = Query sequence length</option>
+                    <option value="slen">slen = Subject sequence length</option>
+                    <option value="salltitles">salltitles = All subject title(s), separated by a '&lt;&gt;'</option>
+                </param>
+                <param name="ids_cols" type="select" multiple="true" display="checkboxes" label="Other identifer columns">
+                    <option value="qgi">qgi = Query GI</option>
+                    <option value="qacc">qacc = Query accesion</option>
+                    <option value="qaccver">qaccver = Query accesion.version</option>
+                    <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option>
+                    <option value="sgi">sgi = Subject GI</option>
+                    <option value="sallgi">sallgi = All subject GIs</option>
+                    <option value="sacc">sacc = Subject accession</option>
+                    <option value="saccver">saccver = Subject accession.version</option>
+                    <option value="sallacc">sallacc = All subject accessions</option>
+                    <option value="stitle">stitle = Subject Title</option>
+                </param>
+                <param name="misc_cols" type="select" multiple="true" display="checkboxes" label="Miscellaneous columns">
+                    <option value="sstrand">sstrand = Subject Strand</option>
+                    <!-- Is it really worth including 'frames' given have 'qframe' and 'sframe'? -->
+                    <option value="frames">frames = Query and subject frames separated by a '/'</option>
+                    <option value="btop">btop = Blast traceback operations (BTOP)</option>
+                    <option value="qcovs">qcovs = Query Coverage Per Subject</option>
+                    <option value="qcovhsp">qcovhsp = Query Coverage Per HSP</option>
+                </param>
+                <param name="tax_cols" type="select" multiple="true" display="checkboxes" label="Taxonomy columns">
+                    <option value="staxids">staxids = unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)</option>
+                    <!-- TODO, how to handle the taxonomy data file dependency? If missing these give N/A -->
+                    <option value="sscinames">sscinames = unique Subject Scientific Name(s), separated by a ';'</option>
+                    <option value="scomnames">scomnames = unique Subject Common Name(s), separated by a ';'</option>
+                    <option value="sblastnames">sblastnames = unique Subject Blast Name(s), separated by a ';' (in alphabetical order)</option>
+                    <option value="sskingdoms">sskingdoms = unique Subject Super Kingdom(s), separated by a ';' (in alphabetical order)</option>
+                </param>
+            </when>
+            <when value="5"/>
+            <when value="0"/>
+            <when value="0 -html"/>
+            <when value="2"/>
+            <when value="2 -html"/>
+            <when value="4"/>
+            <when value="4 -html"/>
+        </conditional>
     </xml>
     <xml name="input_scoring_matrix">
         <param name="matrix" type="select" label="Scoring matrix">
@@ -240,7 +311,7 @@
     <xml name="requirements">
         <requirements>
             <requirement type="binary">@BINARY@</requirement>
-            <requirement type="package" version="2.2.28">blast+</requirement>
+            <requirement type="package" version="2.2.29">blast+</requirement>
         </requirements>
         <version_command>@BINARY@ -version</version_command>
     </xml>
@@ -268,10 +339,15 @@
     </token>
     <token name="@BLAST_OUTPUT@">-out "$output1"
 ##Set the extended list here so when we add things, saved workflows are not affected
-#if str($out_format)=="ext":
+#if str($output.out_format)=="ext":
     -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles"
+#elif str($output.out_format)=="cols"
+##Pick your own columns. Galaxy gives us it comma separated, BLAST+ wants space separated:
+##TODO - Can we catch the user picking no columns and raise an error here?
+#set cols = (str($output.std_cols)+","+str($output.ext_cols)+","+str($output.ids_cols)+","+str($output.misc_cols)+","+str($output.tax_cols)).replace("None", "").replace(",,", ",").replace(",", " ").strip()
+    -outfmt "6 $cols"
 #else:
-    -outfmt $out_format
+    -outfmt $output.out_format
 #end if
     </token>
     <token name="@ADVANCED_OPTIONS@">$adv_opts.filter_query
@@ -330,7 +406,7 @@
 ====== ========= ============================================
 
 The BLAST+ tools can optionally output additional columns of information,
-but this takes longer to calculate. Most (but not all) of these columns are
+but this takes longer to calculate. Many commonly used extra columns are
 included by selecting the extended tabular output. The extra columns are
 included *after* the standard 12 columns. This is so that you can write
 workflow filtering steps that accept either the 12 or 25 column tabular
@@ -339,7 +415,7 @@
 ====== ============= ===========================================
 Column NCBI name     Description
 ------ ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by ';'
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
     14 score         Raw score
     15 nident        Number of identical matches
     16 positive      Number of positive-scoring matches
@@ -351,10 +427,14 @@
     22 sseq          Aligned part of subject sequence
     23 qlen          Query sequence length
     24 slen          Subject sequence length
-    25 salltitles    All subject title(s), separated by '&lt;&gt;'
+    25 salltitles    All subject title(s), separated by a '&lt;&gt;'
 ====== ============= ===========================================
 
-The third option is BLAST XML output, which is designed to be parsed by
+The third option is to customise the tabular output by selecting which
+columns you want, from the standard set of 12, the default set of 25,
+or any of the additional columns BLAST+ offers (including species name).
+
+The fourth option is BLAST XML output, which is designed to be parsed by
 another program, and is understood by some Galaxy tools.
 
 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).