diff tools/ncbi_blast_plus/ncbi_makeblastdb.xml @ 11:4c4a0da938ff draft

Uploaded v0.0.22, now wraps BLAST+ 2.2.28 allowing extended tabular output to include the hit descriptions as column 25. Supports $GALAXY_SLOTS. Includes more tests and heavy use of macros.
author peterjc
date Thu, 05 Dec 2013 06:55:59 -0500
parents 70e7dcbf6573
children 623f727cdff1
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml	Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml	Thu Dec 05 06:55:59 2013 -0500
@@ -1,11 +1,17 @@
-<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.5">
+<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.22">
     <description>Make BLAST database</description>
-    <requirements>
-        <requirement type="binary">makeblastdb</requirement>
-        <requirement type="package" version="2.2.26+">blast+</requirement>
-    </requirements>
-    <version_command>makeblastdb -version</version_command>
-    <command>
+    <macros>
+        <token name="@BINARY@">makeblastdb</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command interpreter="python">check_no_duplicates.py
+##First check for duplicates (since BLAST+ 2.2.28 fails to do so)
+##and abort (via the ampersand ampersand trick) if any are found.
+#for $i in $in
+"${i.file}"
+#end for
+&amp;&amp;
 makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}"
 $parse_seqids
 $hash_index
@@ -24,54 +30,55 @@
 -title "BLAST Database"
 #end if
 -dbtype $dbtype 
-## #set $sep = '-mask_data '
-## #for $i in $mask_data
-## $sep${i.file}
-## #set $set = ', '
-## #end for 
+#set $mask_string = ''
+#set $sep = '-mask_data '
+#for $i in $mask_data
+#set $mask_string += $sep + str($i.file)
+#set $sep = ','
+#end for
+$mask_string
+## #set $gi_mask_string = ''
 ## #set $sep = '-gi_mask -gi_mask_name '
 ## #for $i in $gi_mask
-## $sep${i.file}
-## #set $set = ', '
-## #end for 
+## #set $gi_mask_string += $sep + str($i.file)
+## #set $sep = ','
+## #end for
+## $gi_mask_string
 ## #if $tax.select == 'id':
 ## -taxid $tax.id
 ## #else if $tax.select == 'map':
 ## -taxid_map $tax.map
 ## #end if
+## --------------------------------------------------------------------
+## Capture the stdout log information to the primary file (plain text):
+&gt;&gt; "$outfile"
     </command>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-        <!-- In case the return code has not been set propery check stderr too -->
-        <regex match="Error:" />
-        <regex match="Exception:" />
-    </stdio>
+    <expand macro="stdio" />
     <inputs>
         <param name="dbtype" type="select" display="radio" label="Molecule type of input">
             <option value="prot">protein</option>
             <option value="nucl">nucleotide</option>
         </param>
         <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)
+             NOTE Double check the new database would be self contained first
         <repeat name="in" title="BLAST or FASTA Database" min="1">
             <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="BLAST or FASTA database" />
         </repeat>
         -->
+        <!-- TODO Switch this to using <param ... multiple="true" /> instead of <repeat> block? -->
         <repeat name="in" title="FASTA file" min="1">
             <param name="file" type="data" format="fasta" />
         </repeat>
         <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />
         <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" />
-        <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values." help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
-
+        <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
         <!-- SEQUENCE MASKING OPTIONS -->
+        <repeat name="mask_data" title="Masking data file">
+            <param name="mask_data_file" type="data" format="maskinfo-asn1,maskinfo-asn1-binary" label="ASN.1 file containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
+        </repeat>
         <!-- TODO
-        <repeat name="mask_data" title="Provide one or more files containing masking data">
-            <param name="file" type="data" format="asnb" label="File containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
-        </repeat>
         <repeat name="gi_mask" title="Create GI indexed masking data">
-            <param name="file" type="data" format="asnb" label="Masking data output file" />
+            <param name="gi_mask_file" type="data" format="asnb" label="Masking data output file" />
         </repeat>
         -->
 
@@ -104,6 +111,25 @@
         </data>
     </outputs>
     <tests>
+        <!-- Note the (two line) PIN file is not reproducible run to run.
+        -->
+        <test>
+            <param name="dbtype" value="prot" />
+            <param name="file" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="title" value="Just 4 human proteins" />
+            <param name="parse_seqids" value="" />
+            <param name="hash_index" value="true" />
+            <output name="out_file" file="four_human_proteins.fasta.log" ftype="blastdbp" lines_diff="6">
+                <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />
+                <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />
+                <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />
+                <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" />
+                <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" />
+                <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" />
+                <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" />
+                <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" />
+            </output>
+        </test>
     </tests>
     <help>
 **What it does**
@@ -127,17 +153,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:
 
-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>