diff tools/ncbi_blast_plus/ncbi_makeblastdb.xml @ 22:6f386c5dc4fb draft

v0.2.01 add -max_hsps, -use_sw_tback; lists args; internal updates
author peterjc
date Mon, 18 Sep 2017 06:21:27 -0400
parents 3034ce97dd33
children 31e517610e1f
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml	Wed Apr 19 05:27:19 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml	Mon Sep 18 06:21:27 2017 -0400
@@ -5,23 +5,24 @@
         <import>ncbi_macros.xml</import>
     </macros>
     <expand macro="preamble" />
-    <command interpreter="python">check_no_duplicates.py
+    <command detect_errors="aggressive" strict="true">
+python $__tool_directory__/check_no_duplicates.py
 ##First check for duplicates (since BLAST+ 2.2.28 fails to do so)
 ##and abort (via the ampersand ampersand trick) if any are found.
-#for i in $input_file#"${i}" #end for#
+#for i in $input_file#'${i}' #end for#
 &amp;&amp;
-makeblastdb -out "${os.path.join($outfile.files_path,'blastdb')}"
+makeblastdb -out '${os.path.join($outfile.files_path, "blastdb")}'
 $parse_seqids
 $hash_index
 ## Single call to -in with multiple filenames space separated with outer quotes
 ## (presumably any filenames with spaces would be a problem). Note this gives
 ## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy:
--in "#for i in $input_file#${i} #end for#"
+-in '#for i in $input_file#${i} #end for#'
 #if $title:
--title "$title"
+-title '$title'
 #else:
 ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful
--title "BLAST Database"
+-title 'BLAST Database'
 #end if
 -dbtype $dbtype
 ## --------------------------------------------------------------------
@@ -31,7 +32,7 @@
 ## See Trello issue https://trello.com/c/lp5YmA1O
 #if ' '.join( map(str, $mask_data_file) ) != 'None':
 #for i in $mask_data_file:
--mask_data "${i}"
+-mask_data '${i}'
 #end for
 #end if
 ## --------------------------------------------------------------------
@@ -48,7 +49,7 @@
 &gt; "$outfile"
     </command>
     <inputs>
-        <param name="dbtype" type="select" display="radio" label="Molecule type of input">
+        <param argument="-dbtype" type="select" display="radio" label="Molecule type of input">
             <option value="prot">protein</option>
             <option value="nucl">nucleotide</option>
         </param>
@@ -56,13 +57,13 @@
              NOTE Double check the new database would be self contained first
         -->
         <!-- Note this is a mandatory parameter - default should be most recent FASTA file -->
-        <param name="input_file" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" />
-        <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />
-        <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" />
-        <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
+        <param name="input_file" argument="-in" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" />
+        <param argument="-title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />
+        <param argument="-parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="false" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" />
+        <param argument="-hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
         <!-- SEQUENCE MASKING OPTIONS -->
         <!-- Note this is an optional parameter - default should be NO files -->
-        <param name="mask_data_file" type="data" multiple="true" optional="true" value="" format="maskinfo-asn1,maskinfo-asn1-binary" label="Optional ASN.1 file(s) containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
+        <param name="mask_data_file" argument="-mask_data" type="data" multiple="true" optional="true" value="" format="maskinfo-asn1,maskinfo-asn1-binary" label="Optional ASN.1 file(s) containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
         <!-- TODO - Option to create GI indexed masking data? via -gi_mask and -gi_mask_name? -->
         <!-- TAXONOMY OPTIONS -->
         <conditional name="tax">
@@ -76,11 +77,11 @@
             <when value="">
             </when>
             <when value="id">
-                <param name="taxid" type="integer" value="" label="NCBI taxonomy ID" help="Integer &gt;=0, e.g. 9606 for Homo sapiens" min="0" />
+                <param argument="-taxid" type="integer" min="0" value="" label="NCBI taxonomy ID" help="Integer &gt;=0, e.g. 9606 for Homo sapiens" />
             </when>
             <!-- TODO: File format?
             <when value="map">
-                <param name="taxmap" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" />
+                <param name="taxmap" argument="-taxid_map" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" />
             </when>
             -->
         </conditional>
@@ -107,7 +108,7 @@
             <param name="title" value="Just 4 human proteins" />
             <param name="parse_seqids" value="" />
             <param name="hash_index" value="true" />
-            <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">
+            <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">
                 <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />
                 <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />
                 <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />
@@ -126,7 +127,7 @@
             <param name="hash_index" value="true" />
             <param name="taxselect" value="id" />
             <param name="taxid" value="9606" />
-            <output name="out_file" compare="contains" file="four_human_proteins_taxid.fasta.log.txt" ftype="blastdbp">
+            <output name="outfile" compare="contains" file="four_human_proteins_taxid.fasta.log.txt" ftype="blastdbp">
                 <extra_files type="file" value="four_human_proteins_taxid.fasta.phr" name="blastdb.phr" />
                 <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" lines_diff="2" />
                 <extra_files type="file" value="four_human_proteins_taxid.fasta.psq" name="blastdb.psq" />
@@ -144,7 +145,7 @@
             <param name="parse_seqids" value="" />
             <param name="hash_index" value="true" />
             <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" />
-            <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">
+            <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">
                 <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />
                 <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />
                 <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />
@@ -163,7 +164,7 @@
             <param name="hash_index" value="true" />
             <param name="taxselect" value="id" />
             <param name="taxid" value="9606" />
-            <output name="out_file" compare="contains" file="three_human_mRNA.fasta.log.txt" ftype="blastdbn">
+            <output name="outfile" compare="contains" file="three_human_mRNA.fasta.log.txt" ftype="blastdbn">
                 <extra_files type="file" value="three_human_mRNA.fasta.nhr" name="blastdb.nhr" />
                 <extra_files type="file" value="three_human_mRNA.fasta.nin" name="blastdb.nin" lines_diff="2" />
                 <extra_files type="file" value="three_human_mRNA.fasta.nsq" name="blastdb.nsq" />