Mercurial > repos > devteam > ncbi_blast_plus
diff tools/ncbi_blast_plus/ncbi_makeblastdb.xml @ 11:4c4a0da938ff draft
Uploaded v0.0.22, now wraps BLAST+ 2.2.28 allowing extended tabular output to include the hit descriptions as column 25.
Supports $GALAXY_SLOTS.
Includes more tests and heavy use of macros.
author | peterjc |
---|---|
date | Thu, 05 Dec 2013 06:55:59 -0500 |
parents | 70e7dcbf6573 |
children | 623f727cdff1 |
line wrap: on
line diff
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,11 +1,17 @@ -<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.5"> +<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.22"> <description>Make BLAST database</description> - <requirements> - <requirement type="binary">makeblastdb</requirement> - <requirement type="package" version="2.2.26+">blast+</requirement> - </requirements> - <version_command>makeblastdb -version</version_command> - <command> + <macros> + <token name="@BINARY@">makeblastdb</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command interpreter="python">check_no_duplicates.py +##First check for duplicates (since BLAST+ 2.2.28 fails to do so) +##and abort (via the ampersand ampersand trick) if any are found. +#for $i in $in +"${i.file}" +#end for +&& makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}" $parse_seqids $hash_index @@ -24,54 +30,55 @@ -title "BLAST Database" #end if -dbtype $dbtype -## #set $sep = '-mask_data ' -## #for $i in $mask_data -## $sep${i.file} -## #set $set = ', ' -## #end for +#set $mask_string = '' +#set $sep = '-mask_data ' +#for $i in $mask_data +#set $mask_string += $sep + str($i.file) +#set $sep = ',' +#end for +$mask_string +## #set $gi_mask_string = '' ## #set $sep = '-gi_mask -gi_mask_name ' ## #for $i in $gi_mask -## $sep${i.file} -## #set $set = ', ' -## #end for +## #set $gi_mask_string += $sep + str($i.file) +## #set $sep = ',' +## #end for +## $gi_mask_string ## #if $tax.select == 'id': ## -taxid $tax.id ## #else if $tax.select == 'map': ## -taxid_map $tax.map ## #end if +## -------------------------------------------------------------------- +## Capture the stdout log information to the primary file (plain text): +>> "$outfile" </command> - <stdio> - <!-- Anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <!-- In case the return code has not been set propery check stderr too --> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> + <expand macro="stdio" /> <inputs> <param name="dbtype" type="select" display="radio" label="Molecule type of input"> <option value="prot">protein</option> <option value="nucl">nucleotide</option> </param> <!-- TODO Allow merging of existing BLAST databases (conditional on the database type) + NOTE Double check the new database would be self contained first <repeat name="in" title="BLAST or FASTA Database" min="1"> <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="BLAST or FASTA database" /> </repeat> --> + <!-- TODO Switch this to using <param ... multiple="true" /> instead of <repeat> block? --> <repeat name="in" title="FASTA file" min="1"> <param name="file" type="data" format="fasta" /> </repeat> <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" /> <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" /> - <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values." help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> - + <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> <!-- SEQUENCE MASKING OPTIONS --> + <repeat name="mask_data" title="Masking data file"> + <param name="mask_data_file" type="data" format="maskinfo-asn1,maskinfo-asn1-binary" label="ASN.1 file containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" /> + </repeat> <!-- TODO - <repeat name="mask_data" title="Provide one or more files containing masking data"> - <param name="file" type="data" format="asnb" label="File containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" /> - </repeat> <repeat name="gi_mask" title="Create GI indexed masking data"> - <param name="file" type="data" format="asnb" label="Masking data output file" /> + <param name="gi_mask_file" type="data" format="asnb" label="Masking data output file" /> </repeat> --> @@ -104,6 +111,25 @@ </data> </outputs> <tests> + <!-- Note the (two line) PIN file is not reproducible run to run. + --> + <test> + <param name="dbtype" value="prot" /> + <param name="file" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="title" value="Just 4 human proteins" /> + <param name="parse_seqids" value="" /> + <param name="hash_index" value="true" /> + <output name="out_file" file="four_human_proteins.fasta.log" ftype="blastdbp" lines_diff="6"> + <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" /> + <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" /> + <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" /> + <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" /> + <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" /> + <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" /> + <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" /> + <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" /> + </output> + </test> </tests> <help> **What it does** @@ -127,17 +153,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ </help> </tool>