Mercurial > repos > devteam > ncbi_blast_plus
comparison tools/ncbi_blast_plus/ncbi_makeblastdb.xml @ 34:b6893f57f8d8 draft
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/tools/ncbi_blast_plus commit 028e3e806ba6df913403a2a083a354dfa713755f
author | peterjc |
---|---|
date | Thu, 22 Feb 2024 14:47:01 +0000 |
parents | a52d2d93e595 |
children |
comparison
equal
deleted
inserted
replaced
33:d999e774072a | 34:b6893f57f8d8 |
---|---|
6 </macros> | 6 </macros> |
7 <expand macro="preamble"> | 7 <expand macro="preamble"> |
8 <requirement type="package" version="3.9">python</requirement> | 8 <requirement type="package" version="3.9">python</requirement> |
9 </expand> | 9 </expand> |
10 <command detect_errors="aggressive" strict="true"><![CDATA[ | 10 <command detect_errors="aggressive" strict="true"><![CDATA[ |
11 #set $inputs = [] | |
12 #set $input_compression = [] | |
13 #for r in $input.selection: | |
14 #if $input.type == "protein": | |
15 #silent $inputs.append($r.input_file) | |
16 #silent $input_compression.append($r.input_file.is_of_type('fasta.gz')) | |
17 #elif $r.nuc_choice.source == "history": | |
18 #silent $inputs.append($r.nuc_choice.input_file) | |
19 #silent $input_compression.append($r.nuc_choice.input_file.is_of_type('fasta.gz')) | |
20 #else: | |
21 #silent $inputs.append($r.nuc_choice.input_file.fields.path) | |
22 #silent $input_compression.append(False) | |
23 #end if | |
24 #end for | |
25 | |
11 python $__tool_directory__/check_no_duplicates.py | 26 python $__tool_directory__/check_no_duplicates.py |
12 ##First check for duplicates (since BLAST+ 2.2.28 fails to do so) | 27 ##First check for duplicates (since BLAST+ 2.2.28 fails to do so) |
13 ##and abort (via the ampersand ampersand trick) if any are found. | 28 ##and abort (via the ampersand ampersand trick) if any are found. |
14 #for i in $input_file#'${i}' #end for# | 29 #for i in $inputs#'$i' #end for# |
15 && | 30 && |
16 ##makeblastdb does not like input redirects of the sort | 31 ##makeblastdb does not like input redirects of the sort |
17 ##makeblastdb -in <(gunzip -c gzipped_fasta_file) | 32 ##makeblastdb -in <(gunzip -c gzipped_fasta_file) |
18 ##therefore we're cramming everything | 33 ##therefore we're cramming everything |
19 ##into a single cat command below | 34 ##into a single cat command below |
20 cat | 35 cat |
21 #for i in $input_file: | 36 #for i, is_gzipped in zip($inputs, $input_compression): |
22 #if $i.is_of_type('fasta.gz') and $i.ext != "fasta": | 37 #if $is_gzipped: |
23 <(gunzip -c ${i}) | 38 <(gunzip -c '$i') |
24 #else: | 39 #else: |
25 ${i} | 40 '$i' |
26 #end if | 41 #end if |
27 #end for | 42 #end for |
28 | makeblastdb -out '${os.path.join($outfile.files_path, "blastdb")}' | 43 | makeblastdb -out '${os.path.join($outfile.files_path, "blastdb")}' |
29 -blastdb_version 4 | 44 -blastdb_version 4 |
30 $parse_seqids | 45 $parse_seqids |
34 -title '${title}' | 49 -title '${title}' |
35 #else: | 50 #else: |
36 ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful | 51 ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful |
37 -title 'BLAST Database' | 52 -title 'BLAST Database' |
38 #end if | 53 #end if |
39 -dbtype $dbtype | 54 -dbtype |
55 #if $input.type == "protein": | |
56 prot | |
57 #else: | |
58 nucl | |
59 #end if | |
40 ## -------------------------------------------------------------------- | 60 ## -------------------------------------------------------------------- |
41 ## Masking | 61 ## Masking |
42 ## -------------------------------------------------------------------- | 62 ## -------------------------------------------------------------------- |
43 ## HACK: If no mask files, evaluates as a list with just None in it: | 63 ## HACK: If no mask files, evaluates as a list with just None in it: |
44 ## See Trello issue https://trello.com/c/lp5YmA1O | 64 ## See Trello issue https://trello.com/c/lp5YmA1O |
58 ## -------------------------------------------------------------------- | 78 ## -------------------------------------------------------------------- |
59 ## Capture the stdout log information to the primary file (plain text): | 79 ## Capture the stdout log information to the primary file (plain text): |
60 > '$outfile' | 80 > '$outfile' |
61 ]]></command> | 81 ]]></command> |
62 <inputs> | 82 <inputs> |
63 <param argument="-dbtype" type="select" display="radio" label="Molecule type of input"> | 83 <conditional name="input"> |
64 <option value="prot">protein</option> | 84 <param argument="-dbtype" name="type" type="select" label="Molecule type of input"> |
65 <option value="nucl">nucleotide</option> | 85 <option value="protein">protein</option> |
66 </param> | 86 <option value="nucleotide">nucleotide</option> |
67 <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)? | 87 </param> |
68 NOTE Double check the new database would be self contained first | 88 <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)? |
69 --> | 89 NOTE Double check the new database would be self contained first |
70 <!-- Note this is a mandatory parameter - default should be most recent FASTA file --> | 90 --> |
71 <param name="input_file" argument="-in" type="data" multiple="true" optional="false" format="fasta,fasta.gz" label="Input FASTA files(s)" help="One or more FASTA files" /> | 91 <when value="protein"> |
92 <repeat name="selection" title="Select input" min="1" default="1"> | |
93 <!-- Note this is a mandatory parameter - default should be most recent FASTA file --> | |
94 <param name="input_file" argument="-in" type="data" format="fasta,fasta.gz" label="FASTA input" help="FASTA file with one or more sequences to add to the database" /> | |
95 </repeat> | |
96 </when> | |
97 <when value="nucleotide"> | |
98 <repeat name="selection" title="Select input" min="1" default="1"> | |
99 <conditional name="nuc_choice"> | |
100 <param name="source" type="select" label="Input is a"> | |
101 <option value="history">Dataset in history</option> | |
102 <option value="cached">Genome on server</option> | |
103 </param> | |
104 <when value="history"> | |
105 <param name="input_file" argument="-in" type="data" format="fasta,fasta.gz" label="FASTA input" help="FASTA file with one or more sequences to add to the database" /> | |
106 </when> | |
107 <when value="cached"> | |
108 <param name="input_file" type="select" label="Installed genome"> | |
109 <options from_data_table="all_fasta"/> | |
110 </param> | |
111 </when> | |
112 </conditional> | |
113 </repeat> | |
114 </when> | |
115 </conditional> | |
72 <param argument="-title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" /> | 116 <param argument="-title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" /> |
73 <param argument="-parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="false" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" /> | 117 <param argument="-parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="false" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" /> |
74 <param argument="-hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> | 118 <param argument="-hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> |
75 <!-- SEQUENCE MASKING OPTIONS --> | 119 <!-- SEQUENCE MASKING OPTIONS --> |
76 <!-- Note this is an optional parameter - default should be NO files --> | 120 <!-- Note this is an optional parameter - default should be NO files --> |
93 </when> | 137 </when> |
94 <!-- TODO: File format? | 138 <!-- TODO: File format? |
95 <when value="map"> | 139 <when value="map"> |
96 <param name="taxmap" argument="-taxid_map" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" /> | 140 <param name="taxmap" argument="-taxid_map" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" /> |
97 </when> | 141 </when> |
142 | |
98 --> | 143 --> |
99 </conditional> | 144 </conditional> |
100 </inputs> | 145 </inputs> |
101 <outputs> | 146 <outputs> |
102 <!-- If we only accepted one FASTA file, we could use its human name here... --> | 147 <!-- If we only accepted one FASTA file, we could use its human name here... --> |
103 <data name="outfile" format="data" label="${dbtype.value_label} BLAST database from ${on_string}"> | 148 <data name="outfile" format="data" label="${input.type} BLAST database from ${on_string}"> |
104 <change_format> | 149 <change_format> |
105 <when input="dbtype" value="nucl" format="blastdbn" /> | 150 <when input="input.type" value="nucleotide" format="blastdbn" /> |
106 <when input="dbtype" value="prot" format="blastdbp" /> | 151 <when input="input.type" value="protein" format="blastdbp" /> |
107 </change_format> | 152 </change_format> |
108 </data> | 153 </data> |
109 </outputs> | 154 </outputs> |
110 <tests> | 155 <tests> |
111 <!-- Note the (two line) PIN file is not reproducible run to run. | 156 <!-- Note the (two line) PIN file is not reproducible run to run. |
113 Likewise there is a datestamp in the log file as well, so use contains comparison | 158 Likewise there is a datestamp in the log file as well, so use contains comparison |
114 With and without the masking makes no difference. | 159 With and without the masking makes no difference. |
115 With and without the taxid the only real difference is in the *.phr file. | 160 With and without the taxid the only real difference is in the *.phr file. |
116 --> | 161 --> |
117 <test> | 162 <test> |
118 <param name="dbtype" value="prot" /> | 163 <conditional name="input"> |
119 <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" /> | 164 <param name="type" value="protein"/> |
165 <repeat name="selection"> | |
166 <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" /> | |
167 </repeat> | |
168 </conditional> | |
120 <param name="title" value="Just 4 human proteins" /> | 169 <param name="title" value="Just 4 human proteins" /> |
121 <param name="parse_seqids" value="" /> | 170 <param name="parse_seqids" value="" /> |
122 <param name="hash_index" value="true" /> | 171 <param name="hash_index" value="true" /> |
123 <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> | 172 <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> |
124 <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" /> | 173 <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" /> |
130 <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" /> | 179 <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" /> |
131 <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" /> | 180 <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" /> |
132 </output> | 181 </output> |
133 </test> | 182 </test> |
134 <test> | 183 <test> |
135 <param name="dbtype" value="prot" /> | 184 <conditional name="input"> |
136 <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" /> | 185 <param name="type" value="protein"/> |
186 <repeat name="selection"> | |
187 <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" /> | |
188 </repeat> | |
189 </conditional> | |
137 <param name="title" value="Just 4 human proteins" /> | 190 <param name="title" value="Just 4 human proteins" /> |
138 <param name="parse_seqids" value="" /> | 191 <param name="parse_seqids" value="" /> |
139 <param name="hash_index" value="true" /> | 192 <param name="hash_index" value="true" /> |
140 <param name="taxselect" value="id" /> | 193 <param name="taxselect" value="id" /> |
141 <param name="taxid" value="9606" /> | 194 <param name="taxid" value="9606" /> |
149 <extra_files type="file" value="four_human_proteins_taxid.fasta.psd" name="blastdb.psd" /> | 202 <extra_files type="file" value="four_human_proteins_taxid.fasta.psd" name="blastdb.psd" /> |
150 <extra_files type="file" value="four_human_proteins_taxid.fasta.psi" name="blastdb.psi" /> | 203 <extra_files type="file" value="four_human_proteins_taxid.fasta.psi" name="blastdb.psi" /> |
151 </output> | 204 </output> |
152 </test> | 205 </test> |
153 <test> | 206 <test> |
154 <param name="dbtype" value="prot" /> | 207 <conditional name="input"> |
155 <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" /> | 208 <param name="type" value="protein"/> |
209 <repeat name="selection"> | |
210 <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" /> | |
211 </repeat> | |
212 </conditional> | |
156 <param name="title" value="Just 4 human proteins" /> | 213 <param name="title" value="Just 4 human proteins" /> |
157 <param name="parse_seqids" value="" /> | 214 <param name="parse_seqids" value="" /> |
158 <param name="hash_index" value="true" /> | 215 <param name="hash_index" value="true" /> |
159 <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" /> | 216 <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" /> |
160 <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> | 217 <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> |
167 <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" /> | 224 <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" /> |
168 <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" /> | 225 <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" /> |
169 </output> | 226 </output> |
170 </test> | 227 </test> |
171 <test> | 228 <test> |
172 <param name="dbtype" value="nucl" /> | 229 <conditional name="input"> |
173 <param name="input_file" value="three_human_mRNA.fasta.gz" ftype="fasta.gz" /> | 230 <param name="type" value="nucleotide"/> |
231 <repeat name="selection"> | |
232 <conditional name="nuc_choice"> | |
233 <param name="source" value="history"/> | |
234 <param name="input_file" value="three_human_mRNA.fasta.gz" ftype="fasta.gz" /> | |
235 </conditional> | |
236 </repeat> | |
237 </conditional> | |
174 <param name="title" value="Just 3 human mRNA sequences" /> | 238 <param name="title" value="Just 3 human mRNA sequences" /> |
175 <param name="parse_seqids" value="" /> | 239 <param name="parse_seqids" value="" /> |
176 <param name="hash_index" value="true" /> | 240 <param name="hash_index" value="true" /> |
177 <param name="taxselect" value="id" /> | 241 <param name="taxselect" value="id" /> |
178 <param name="taxid" value="9606" /> | 242 <param name="taxid" value="9606" /> |
185 <extra_files type="file" value="three_human_mRNA.fasta.nhi" name="blastdb.nhi" /> | 249 <extra_files type="file" value="three_human_mRNA.fasta.nhi" name="blastdb.nhi" /> |
186 <extra_files type="file" value="three_human_mRNA.fasta.nsd" name="blastdb.nsd" /> | 250 <extra_files type="file" value="three_human_mRNA.fasta.nsd" name="blastdb.nsd" /> |
187 <extra_files type="file" value="three_human_mRNA.fasta.nsi" name="blastdb.nsi" /> | 251 <extra_files type="file" value="three_human_mRNA.fasta.nsi" name="blastdb.nsi" /> |
188 </output> | 252 </output> |
189 </test> | 253 </test> |
254 <test> | |
255 <conditional name="input"> | |
256 <param name="type" value="nucleotide"/> | |
257 <repeat name="selection"> | |
258 <conditional name="nuc_choice"> | |
259 <param name="source" value="cached"/> | |
260 <param name="input_file" value="three_human_mRNA" /> | |
261 </conditional> | |
262 </repeat> | |
263 </conditional> | |
264 <param name="title" value="Just 3 human mRNA sequences" /> | |
265 <param name="parse_seqids" value="" /> | |
266 <param name="hash_index" value="true" /> | |
267 <param name="taxselect" value="id" /> | |
268 <param name="taxid" value="9606" /> | |
269 <output name="outfile" compare="contains" file="three_human_mRNA.fasta.log.txt" ftype="blastdbn"> | |
270 <extra_files type="file" value="three_human_mRNA.fasta.nhr" name="blastdb.nhr" /> | |
271 <extra_files type="file" value="three_human_mRNA.fasta.nin" name="blastdb.nin" compare="sim_size" delta="8" /> | |
272 <extra_files type="file" value="three_human_mRNA.fasta.nsq" name="blastdb.nsq" /> | |
273 <extra_files type="file" value="three_human_mRNA.fasta.nog" name="blastdb.nog" /> | |
274 <extra_files type="file" value="three_human_mRNA.fasta.nhd" name="blastdb.nhd" /> | |
275 <extra_files type="file" value="three_human_mRNA.fasta.nhi" name="blastdb.nhi" /> | |
276 <extra_files type="file" value="three_human_mRNA.fasta.nsd" name="blastdb.nsd" /> | |
277 <extra_files type="file" value="three_human_mRNA.fasta.nsi" name="blastdb.nsi" /> | |
278 </output> | |
279 </test> | |
190 </tests> | 280 </tests> |
191 <help> | 281 <help> |
192 **What it does** | 282 **What it does** |
193 | 283 |
194 Make BLAST database from one or more FASTA files and/or BLAST databases. | 284 Make BLAST database from one or more FASTA files and/or BLAST databases. |