comparison tools/ncbi_blast_plus/ncbi_makeblastdb.xml @ 11:4c4a0da938ff draft

Uploaded v0.0.22, now wraps BLAST+ 2.2.28 allowing extended tabular output to include the hit descriptions as column 25. Supports $GALAXY_SLOTS. Includes more tests and heavy use of macros.
author peterjc
date Thu, 05 Dec 2013 06:55:59 -0500
parents 70e7dcbf6573
children 623f727cdff1
comparison
equal deleted inserted replaced
10:70e7dcbf6573 11:4c4a0da938ff
1 <tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.5"> 1 <tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.22">
2 <description>Make BLAST database</description> 2 <description>Make BLAST database</description>
3 <requirements> 3 <macros>
4 <requirement type="binary">makeblastdb</requirement> 4 <token name="@BINARY@">makeblastdb</token>
5 <requirement type="package" version="2.2.26+">blast+</requirement> 5 <import>ncbi_macros.xml</import>
6 </requirements> 6 </macros>
7 <version_command>makeblastdb -version</version_command> 7 <expand macro="requirements" />
8 <command> 8 <command interpreter="python">check_no_duplicates.py
9 ##First check for duplicates (since BLAST+ 2.2.28 fails to do so)
10 ##and abort (via the ampersand ampersand trick) if any are found.
11 #for $i in $in
12 "${i.file}"
13 #end for
14 &amp;&amp;
9 makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}" 15 makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}"
10 $parse_seqids 16 $parse_seqids
11 $hash_index 17 $hash_index
12 ## Single call to -in with multiple filenames space separated with outer quotes 18 ## Single call to -in with multiple filenames space separated with outer quotes
13 ## (presumably any filenames with spaces would be a problem). Note this gives 19 ## (presumably any filenames with spaces would be a problem). Note this gives
22 #else: 28 #else:
23 ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful 29 ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful
24 -title "BLAST Database" 30 -title "BLAST Database"
25 #end if 31 #end if
26 -dbtype $dbtype 32 -dbtype $dbtype
27 ## #set $sep = '-mask_data ' 33 #set $mask_string = ''
28 ## #for $i in $mask_data 34 #set $sep = '-mask_data '
29 ## $sep${i.file} 35 #for $i in $mask_data
30 ## #set $set = ', ' 36 #set $mask_string += $sep + str($i.file)
31 ## #end for 37 #set $sep = ','
38 #end for
39 $mask_string
40 ## #set $gi_mask_string = ''
32 ## #set $sep = '-gi_mask -gi_mask_name ' 41 ## #set $sep = '-gi_mask -gi_mask_name '
33 ## #for $i in $gi_mask 42 ## #for $i in $gi_mask
34 ## $sep${i.file} 43 ## #set $gi_mask_string += $sep + str($i.file)
35 ## #set $set = ', ' 44 ## #set $sep = ','
36 ## #end for 45 ## #end for
46 ## $gi_mask_string
37 ## #if $tax.select == 'id': 47 ## #if $tax.select == 'id':
38 ## -taxid $tax.id 48 ## -taxid $tax.id
39 ## #else if $tax.select == 'map': 49 ## #else if $tax.select == 'map':
40 ## -taxid_map $tax.map 50 ## -taxid_map $tax.map
41 ## #end if 51 ## #end if
52 ## --------------------------------------------------------------------
53 ## Capture the stdout log information to the primary file (plain text):
54 &gt;&gt; "$outfile"
42 </command> 55 </command>
43 <stdio> 56 <expand macro="stdio" />
44 <!-- Anything other than zero is an error -->
45 <exit_code range="1:" />
46 <exit_code range=":-1" />
47 <!-- In case the return code has not been set propery check stderr too -->
48 <regex match="Error:" />
49 <regex match="Exception:" />
50 </stdio>
51 <inputs> 57 <inputs>
52 <param name="dbtype" type="select" display="radio" label="Molecule type of input"> 58 <param name="dbtype" type="select" display="radio" label="Molecule type of input">
53 <option value="prot">protein</option> 59 <option value="prot">protein</option>
54 <option value="nucl">nucleotide</option> 60 <option value="nucl">nucleotide</option>
55 </param> 61 </param>
56 <!-- TODO Allow merging of existing BLAST databases (conditional on the database type) 62 <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)
63 NOTE Double check the new database would be self contained first
57 <repeat name="in" title="BLAST or FASTA Database" min="1"> 64 <repeat name="in" title="BLAST or FASTA Database" min="1">
58 <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="BLAST or FASTA database" /> 65 <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="BLAST or FASTA database" />
59 </repeat> 66 </repeat>
60 --> 67 -->
68 <!-- TODO Switch this to using <param ... multiple="true" /> instead of <repeat> block? -->
61 <repeat name="in" title="FASTA file" min="1"> 69 <repeat name="in" title="FASTA file" min="1">
62 <param name="file" type="data" format="fasta" /> 70 <param name="file" type="data" format="fasta" />
63 </repeat> 71 </repeat>
64 <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" /> 72 <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />
65 <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" /> 73 <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" />
66 <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values." help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> 74 <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
67
68 <!-- SEQUENCE MASKING OPTIONS --> 75 <!-- SEQUENCE MASKING OPTIONS -->
76 <repeat name="mask_data" title="Masking data file">
77 <param name="mask_data_file" type="data" format="maskinfo-asn1,maskinfo-asn1-binary" label="ASN.1 file containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
78 </repeat>
69 <!-- TODO 79 <!-- TODO
70 <repeat name="mask_data" title="Provide one or more files containing masking data">
71 <param name="file" type="data" format="asnb" label="File containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
72 </repeat>
73 <repeat name="gi_mask" title="Create GI indexed masking data"> 80 <repeat name="gi_mask" title="Create GI indexed masking data">
74 <param name="file" type="data" format="asnb" label="Masking data output file" /> 81 <param name="gi_mask_file" type="data" format="asnb" label="Masking data output file" />
75 </repeat> 82 </repeat>
76 --> 83 -->
77 84
78 <!-- TAXONOMY OPTIONS --> 85 <!-- TAXONOMY OPTIONS -->
79 <!-- TODO 86 <!-- TODO
102 <when input="dbtype" value="prot" format="blastdbp" /> 109 <when input="dbtype" value="prot" format="blastdbp" />
103 </change_format> 110 </change_format>
104 </data> 111 </data>
105 </outputs> 112 </outputs>
106 <tests> 113 <tests>
114 <!-- Note the (two line) PIN file is not reproducible run to run.
115 -->
116 <test>
117 <param name="dbtype" value="prot" />
118 <param name="file" value="four_human_proteins.fasta" ftype="fasta" />
119 <param name="title" value="Just 4 human proteins" />
120 <param name="parse_seqids" value="" />
121 <param name="hash_index" value="true" />
122 <output name="out_file" file="four_human_proteins.fasta.log" ftype="blastdbp" lines_diff="6">
123 <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />
124 <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />
125 <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />
126 <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" />
127 <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" />
128 <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" />
129 <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" />
130 <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" />
131 </output>
132 </test>
107 </tests> 133 </tests>
108 <help> 134 <help>
109 **What it does** 135 **What it does**
110 136
111 Make BLAST database from one or more FASTA files and/or BLAST databases. 137 Make BLAST database from one or more FASTA files and/or BLAST databases.
125 **References** 151 **References**
126 152
127 If you use this Galaxy tool in work leading to a scientific publication please 153 If you use this Galaxy tool in work leading to a scientific publication please
128 cite the following papers: 154 cite the following papers:
129 155
130 Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). 156 @REFERENCES@
131 Galaxy tools and workflows for sequence analysis with applications
132 in molecular plant pathology. PeerJ 1:e167
133 http://dx.doi.org/10.7717/peerj.167
134
135 Christiam Camacho et al. (2009).
136 BLAST+: architecture and applications.
137 BMC Bioinformatics. 15;10:421.
138 http://dx.doi.org/10.1186/1471-2105-10-421
139
140 This wrapper is available to install into other Galaxy Instances via the Galaxy
141 Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
142 </help> 157 </help>
143 </tool> 158 </tool>