Mercurial > repos > p.lucas > diamond_using_binary
comparison diamond.xml @ 8:0908e94a4c84 draft
Uploaded
author | p.lucas |
---|---|
date | Tue, 28 May 2024 13:04:19 +0000 |
parents | |
children | bc3dcdb96817 |
comparison
equal
deleted
inserted
replaced
7:b92784764ecd | 8:0908e94a4c84 |
---|---|
1 <tool id="bg_diamond" name="Diamond" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01"> | |
2 <description>alignment tool for short sequences against a protein database</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="stdio" /> | |
7 <expand macro="version_command" /> | |
8 <command detect_errors="aggressive"> | |
9 <![CDATA[ | |
10 | |
11 #if $ref_db_source.db_source == "history": | |
12 ln -s $ref_db_source.reference_database ./database.dmnd | |
13 #else: | |
14 ln -s ${ref_db_source.index.fields.db_path} ./database.dmnd | |
15 #end if | |
16 | |
17 && | |
18 | |
19 /usr/bin/diamond | |
20 $method_cond.method_select | |
21 --quiet | |
22 --threads "\${GALAXY_SLOTS:-12}" | |
23 --db ./database | |
24 --query '$query' | |
25 #if $method_cond.method_select == "blastx" | |
26 --query-gencode '$method_cond.query_gencode' | |
27 --strand '$method_cond.query_strand' | |
28 --min-orf $method_cond.min_orf | |
29 #if $method_cond.frameshift_cond.frameshift_select == 'yes' | |
30 --frameshift $method_cond.frameshift_cond.frameshift | |
31 $method_cond.frameshift_cond.range_culling | |
32 #end if | |
33 #else if $method_cond.method_select == "blastp" | |
34 $method_cond.no_self_hits | |
35 #end if | |
36 | |
37 @OUTPUT_ARGS@ | |
38 | |
39 #if $output_section.output.outfmt != '100' | |
40 --compress '0' | |
41 #end if | |
42 $sens_cond.sensitivity | |
43 $iterate | |
44 --algo $algo | |
45 #if $global_ranking | |
46 --global-ranking $global_ranking | |
47 #end if | |
48 #if str($gapopen) != "": | |
49 --gapopen '$gapopen' | |
50 #end if | |
51 #if str($gapextend) != "": | |
52 --gapextend '$gapextend' | |
53 #end if | |
54 --matrix '$matrix' | |
55 --comp-based-stats '$method_cond.comp_based_stats' | |
56 --masking '$masking' | |
57 | |
58 @HITFILTER_ARGS@ | |
59 | |
60 #if str($filter_score.filter_score_select) == 'evalue': | |
61 --evalue '$filter_score.evalue' | |
62 #else: | |
63 --min-score '$filter_score.min_score' | |
64 #end if | |
65 | |
66 --id '$id' | |
67 --query-cover '$query_cover' | |
68 --subject-cover '$subject_cover' | |
69 --block-size '$sens_cond.block_size' | |
70 #if $output_section.output_unal | |
71 #if "--un" in $output_section.output_unal | |
72 --un '$unalqueries' | |
73 #if $query.ext.startswith("fasta"): | |
74 --unfmt fasta | |
75 #else | |
76 --unfmt fastq | |
77 #end if | |
78 #end if | |
79 #if "--al" in $output_section.output_unal | |
80 --al '$alqueries' | |
81 #if $query.ext.startswith("fasta"): | |
82 --alfmt fasta | |
83 #else | |
84 --alfmt fastq | |
85 #end if | |
86 #end if | |
87 #end if | |
88 #if $output_section.max_hsps | |
89 --max-hsps $output_section.max_hsps | |
90 #end if | |
91 #if $tax_cond.tax_select == 'file': | |
92 --taxonlist `cat '$tax_cond.taxonlistfile' | grep -v "^#" | grep -v "^$" | tr "\n" "," | sed 's/,$//'` | |
93 #else if $tax_cond.tax_select == 'list': | |
94 --taxonlist '$tax_cond.taxonlist' | |
95 #end if | |
96 #if $advanced_section.seed_cut | |
97 --seed-cut $advanced_section.seed_cut | |
98 #end if | |
99 $advanced_section.freq_masking | |
100 --motif-masking $advanced_section.motif_masking | |
101 ]]> | |
102 </command> | |
103 <inputs> | |
104 <conditional name="method_cond"> | |
105 <param name="method_select" type="select" label="Alignment mode" help="(blastp/blastx)"> | |
106 <option value="blastp">Amino acid query sequences (blastp)</option> | |
107 <option value="blastx">DNA query sequences (blastx)</option> | |
108 </param> | |
109 <when value="blastx"> | |
110 <param argument="--query-gencode" type="select" label="Genetic code used for translation of query in BLASTX mode" help=""> | |
111 <option value="1">Standard Code</option> | |
112 <option value="2">Vertebrate Mitochondrial Code</option> | |
113 <option value="3">Yeast Mitochondrial Code</option> | |
114 <option value="4">Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> | |
115 <option value="5">Invertebrate Mitochondrial Code</option> | |
116 <option value="6">Ciliate, Dasycladacean and Hexamita Nuclear Code</option> | |
117 <option value="9">Echinoderm and Flatworm Mitochondrial Code</option> | |
118 <option value="10">Euplotid Nuclear Code</option> | |
119 <option value="11">Bacterial, Archaeal and Plant Plastid Code</option> | |
120 <option value="12">Alternative Yeast Nuclear Code</option> | |
121 <option value="13">Ascidian Mitochondrial Code</option> | |
122 <option value="14">Alternative Flatworm Mitochondrial Code</option> | |
123 <option value="16">Chlorophycean Mitochondrial Code</option> | |
124 <option value="21">Trematode Mitochondrial Code</option> | |
125 <option value="22">Scenedesmus obliquus Mitochondrial Code</option> | |
126 <option value="23">Thraustochytrium Mitochondrial Code</option> | |
127 <option value="24">Pterobranchia Mitochondrial Code</option> | |
128 <option value="25">Candidate Division SR1 and Gracilibacteria Code</option> | |
129 <option value="26">Pachysolen tannophilus Nuclear Code</option> | |
130 </param> | |
131 <param argument="--min-orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature" /> | |
132 | |
133 <param name="query_strand" argument="--strand" type="select" label="query strands to search" help=""> | |
134 <option value="both" selected="True">Both</option> | |
135 <option value="plus">Plus</option> | |
136 <option value="minus">Minus</option> | |
137 </param> | |
138 <conditional name="frameshift_cond"> | |
139 <param name="frameshift_select" type="select" label="Allow for frameshifts?" help=""> | |
140 <option value="yes">yes</option> | |
141 <option value="no" selected="true">no</option> | |
142 </param> | |
143 <when value="yes"> | |
144 <param argument="--range-culling" type="boolean" truevalue="--range-culling" falsevalue="" checked="false" label="restrict hit culling to overlapping query ranges" help="This feature is designed for long query DNA sequences that may span several genes. In these cases, the default of reporting the 25 best overall hits could cause hits to a lower scoring gene to be overshadowed. But just increasing the number of alignments reported will bloat the output size and reduce performance. Using this feature along with -k 25 (default), a hit will only be deleted if at least 50% of its query range is spanned by at least 25 higher or equal scoring hits. Using this feature along with --top 10, a hit will only be deleted if its score is more than 10% lower than that of a higher scoring hit over at least 50% of its query range. The percentage is configurable using --range-cover. Note that this feature is currently only available in frameshift alignment mode"/> | |
145 <param argument="--frameshift" type="integer" value="0" label="frame shift penalty" help="Values around 15 are reasonable for this parameter. Enabling this feature will have the aligner tolerate missing bases in DNA sequences and is most recommended for long, error-prone sequences like MinION reads. In the pairwise output format, frameshifts will be indicated by \ and / for a shift by +1 and -1 nucleotide in the direction of translation respectively." /> | |
146 </when> | |
147 <when value="no"/> | |
148 </conditional> | |
149 | |
150 <param argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> | |
151 <option value="0">Disable</option> | |
152 <option value="1" selected="True">Default mode (Hauser, 2016)</option> | |
153 </param> | |
154 </when> | |
155 <when value="blastp"> | |
156 <param argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true" | |
157 label="Suppress reporting of identical self-hits between sequences" | |
158 help="The FASTA sequence identifiers as well as the sequences of query and target need to be identical for a hit to be deleted"/> | |
159 | |
160 <param argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> | |
161 <option value="0">Disable</option> | |
162 <option value="1" selected="True">Default mode (Hauser, 2016)</option> | |
163 <option value="2">Compositional matrix adjust conditioned on sequence properties, simplified (Yu, 2005)</option> | |
164 <option value="3">Compositional matrix adjust conditioned on sequence properties (Yu, 2005)</option> | |
165 <option value="4">Compositional matrix adjust unconditionally (Yu, 2005)</option> | |
166 </param> | |
167 </when> | |
168 </conditional> | |
169 <param argument="--query" type="data" format="fasta,fastq" label="Input query file in FASTA or FASTQ format" /> | |
170 <conditional name="ref_db_source"> | |
171 <param name="db_source" type="select" label="Will you select a reference database from your history or use a built-in index?" help="Built-ins were indexed using default options"> | |
172 <option value="indexed">Use a built-in index</option> | |
173 <option value="history">Use one from the history</option> | |
174 </param> | |
175 <when value="indexed"> | |
176 <param name="index" type="select" label="Select a reference database" help="If your database of interest is not listed, contact your Galaxy admin"> | |
177 <options from_data_table="diamond_database"> | |
178 <filter type="sort_by" column="2"/> | |
179 <validator type="no_options" message="No indexes are available for the selected input dataset"/> | |
180 </options> | |
181 </param> | |
182 </when> | |
183 <when value="history"> | |
184 <param name="reference_database" argument="--db" type="data" format="dmnd" label="Select the reference database" /> | |
185 </when> | |
186 </conditional> | |
187 <conditional name="tax_cond"> | |
188 <param name="tax_select" type="select" label="Restrict search taxonomically?" help="Any taxonomic rank can be used, and only reference sequences matching one of the specified taxon ids will be searched against."> | |
189 <option value="no" selected="True">No</option> | |
190 <option value="list">List of taxids entered manually</option> | |
191 <option value="file">List of taxids from single column tabular file</option> | |
192 </param> | |
193 <when value="no"/> | |
194 <when value="list"> | |
195 <param name="taxonlist" argument="--taxonlist" type="text" value="" label="Comma separated list of taxon ids" help=""> | |
196 <validator type="regex" message="Taxonlist needs to be a comma separated list of integers">[0-9,]*</validator> | |
197 </param> | |
198 </when> | |
199 <when value="file"> | |
200 <param name="taxonlistfile" argument="--taxonlist" type="data" format="tabular" label="Keep alignments within the given percentage range of the top alignment score for a quer" help="" /> | |
201 </when> | |
202 </conditional> | |
203 <conditional name="sens_cond"> | |
204 <param name='sensitivity' type="select" label="Sensitivity Mode" help="Choose one of the sensitivity modes. The default mode is mainly designed for short read alignment, i.e. finding significant matches of >50 bits on 30-40aa fragments. The sensitive mode is a lot more sensitive than the default and generally recommended for aligning longer sequences. The more sensitive mode provides even more sensitivity. More sensitivity may increase computation time."> | |
205 <option value="--fast">Fast (--fast)</option> | |
206 <option value="" selected="True">Default</option> | |
207 <option value="--mid-sensitive">Mid Sensitive (--mid-sensitive)</option> | |
208 <option value="--sensitive">Sensitive (--sensitive)</option> | |
209 <option value="--more-sensitive">More Sensitive (--more-sensitive)</option> | |
210 <option value="--very-sensitive">Very Sensitive (--very-sensitive)</option> | |
211 <option value="--ultra-sensitive">Ultra Sensitive (--ultra-sensitive)</option> | |
212 </param> | |
213 <when value="--fast"> | |
214 <expand macro="block_size_low_sens"/> | |
215 </when> | |
216 <when value=""> | |
217 <expand macro="block_size_low_sens"/> | |
218 </when> | |
219 <when value="--mid-sensitive"> | |
220 <expand macro="block_size_low_sens"/> | |
221 </when> | |
222 <when value="--sensitive"> | |
223 <expand macro="block_size_low_sens"/> | |
224 </when> | |
225 <when value="--more-sensitive"> | |
226 <expand macro="block_size_low_sens"/> | |
227 </when> | |
228 <when value="--very-sensitive"> | |
229 <expand macro="block_size_hi_sens"/> | |
230 </when> | |
231 <when value="--ultra-sensitive"> | |
232 <expand macro="block_size_hi_sens"/> | |
233 </when> | |
234 </conditional> | |
235 <param argument="--matrix" type="select" label="Scoring matrix" help="In parentheses are the supported values for (gap open)/(gap extend). In brackets are default gap penalties"> | |
236 <option value="BLOSUM45">BLOSUM45 ((10-13)/3; (12-16)/2; (16-19)/1) [14/2]</option> | |
237 <option value="BLOSUM50">BLOSUM50 ((9-13)/3; (12-16)/2; (15-19)/1) [13/2]</option> | |
238 <option value="BLOSUM62" selected="True">BLOSUM62 ((6-11)/2; (9-13)/1) [11/1]</option> | |
239 <option value="BLOSUM80">BLOSUM80 ((6-9)/2; 13/2; 25/2; (9-11)/1) [10/1]</option> | |
240 <option value="BLOSUM90">BLOSUM90 ((6-9)/2; (9-11)/1) [10/1]</option> | |
241 <option value="PAM250">PAM250 ((11-15)/3; (13-17)/2; (17-21)/1) [14/2]</option> | |
242 <option value="PAM70">PAM70 ((6-8)/2; (9-11)/1) [10/1]</option> | |
243 <option value="PAM30">PAM30 ((5-7)/2; (8-10)/1) [9/1]</option> | |
244 </param> | |
245 <param argument="--gapopen" type="integer" optional="True" value="" label="Gap open penalty" help="Leave empty for default (see scoring matrix)" /> | |
246 <param argument="--gapextend" type="integer" optional="True" value="" label="Gap extension penalty" help="Leave empty for default (see scoring matrix)" /> | |
247 <param argument="--masking" type="select" label="Masking algorithm" help="DIAMOND by default applies the tantan repeat masking algorithm to the query and target sequences as described in (Frith, 2011). | |
248 This masking procedure increases the specificity of alignments and serves to filter out spurious hits. Note that when using --comp-based-stats (2,3,4), tantan masking is disabled by default."> | |
249 <option value="0">Disabled</option> | |
250 <option value="1" selected="true">Tantan</option> | |
251 <option value="seg">SEG</option> | |
252 </param> | |
253 <conditional name="filter_score"> | |
254 <param name="filter_score_select" type="select" label="Method to filter?" help="(--evalue/--min-score)"> | |
255 <option value="evalue" selected="True">Maximum e-value to report alignments</option> | |
256 <option value="min-score">Minimum bit score to report alignments</option> | |
257 </param> | |
258 <when value="evalue"> | |
259 <param argument="--evalue" type="float" value="0.001" label="Maximum expected value to keep an alignment" /> | |
260 </when> | |
261 <when value="min-score"> | |
262 <param name="min_score" argument="--min-score" type="integer" value="0" label="Minimum bit score to keep an alignment" help="(--min-score)" /> | |
263 </when> | |
264 </conditional> | |
265 <param argument="--iterate" type="boolean" truevalue="--iterate" falsevalue="" checked="false" | |
266 label="Run multiple rounds of searches with increasing sensitivity" help="he query dataset will first be searched at a lower sensitivity setting, only searching those query sequences at | |
267 the target sensitivity that fail to produce a significant alignment at a lower sensitivity." /> | |
268 <param argument="--algo" type="select" label="Algorithm for seed search" help="Double-indexed is the main algorithm of the program, designed for large input files but less efficient for small | |
269 query files. Query-indexed and improves performance for small query files. This mode will be automatically triggered based on the input. Contiguous-seed mode and further improves performance | |
270 for small query files. The modes differ slightly in their sensitivity, so results are not guaranteed to be 100% identical for different settings of this option."> | |
271 <option value="0">Doble-indexed (0)</option> | |
272 <option value="1">Query-indexed (1)</option> | |
273 <option value="ctg">Contiguous-seed mode (ctg)</option> | |
274 </param> | |
275 <expand macro="hit_filter_macro" /> | |
276 <param argument="--global-ranking" type="integer" min="0" value="" optional="true" | |
277 label="Limit on the number of Smith Waterman extensions" help="Target sequences will be ranked according to their ungapped extension scores at seed hits, and gapped extensions will only | |
278 be computed for the best N targets for each query. Note that this option increases memory use." /> | |
279 <param argument="--id" type="integer" value="0" label="Minimum identity percentage to report an alignment" help="Report only alignments above the given percentage of sequence identity" /> | |
280 <param argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="Report only alignments above the given percentage of query cover" /> | |
281 <param argument="--subject-cover" type="integer" value="0" label="Minimum subject cover percentage to report an alignment" help="Report only alignments above the given percentage of subject cover"/> | |
282 <section name="output_section" title="Output options"> | |
283 <param argument="--max-hsps" type="integer" min="0" optional="true" label="Maximum number of HSPs" | |
284 help="The maximum number of HSPs (High-Scoring Segment Pairs) per target sequence to report for each query. The default policy is to report only the highest-scoring | |
285 HSP for each target, while disregarding alternative, lower-scoring HSPs that are contained in the same target." /> | |
286 <expand macro="output_type_macro"> | |
287 <!-- Taxonomy features are not supported for the DAA format (i.e. | |
288 can't be used in diamond view) --> | |
289 <option value="staxids">unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)</option> | |
290 <option value="sskingdoms">Subject super kingdoms</option> | |
291 <option value="skingdoms">Subject kingdoms</option> | |
292 <option value="sphylums">Subject phylums</option> | |
293 </expand> | |
294 <param name="output_unal" type="select" optional="true" multiple="true" label="Output aligned/unaligned queries to separate file" help=""> | |
295 <option value="--un">Output unaligned queries (--un)</option> | |
296 <option value="--al">Output alaligned queries (--al)</option> | |
297 </param> | |
298 </section> | |
299 <section name="advanced_section" title="Advanced options" expanded="false"> | |
300 <param argument="--seed-cut" type="float" min="0" optional="true" label="Set a complexity cutoff for indexed seeds"/> | |
301 <param argument="--freq-masking" type="boolean" truevalue="--freq-masking" falsevalue="" checked="false" label="Enable masking seeds based on frequency" help="This option is incompatible with --sed-cut" /> | |
302 <param argument="--motif-masking" type="select" label="Softmask abundant motifs" help="Enable or disable motif masking"> | |
303 <option value="0">Disabled</option> | |
304 <option value="1">Enabled</option> | |
305 </param> | |
306 </section> | |
307 </inputs> | |
308 <outputs> | |
309 <expand macro="output_macro" /> | |
310 <data format_source="query" name="unalqueries" label="${tool.name} on ${on_string}: unaligned queries"> | |
311 <filter>output_section['output_unal'] and "--un" in output_section['output_unal']</filter> | |
312 </data> | |
313 <data format_source="query" name="alqueries" label="${tool.name} on ${on_string}: aligned queries"> | |
314 <filter>output_section['output_unal'] and "--un" in output_section['output_unal']</filter> | |
315 </data> | |
316 </outputs> | |
317 <tests> | |
318 <!--Test 01--> | |
319 <test expect_num_outputs="3"> | |
320 <conditional name="method_cond"> | |
321 <param name="method_select" value="blastp" /> | |
322 </conditional> | |
323 <param name="query" value="protein.fasta" ftype="fasta"/> | |
324 <conditional name="ref_db_source"> | |
325 <param name="db_source" value="history"/> | |
326 <param name="reference_database" value="db-wtax.dmnd"/> | |
327 </conditional> | |
328 <section name="output_section"> | |
329 <conditional name="output"> | |
330 <param name="outfmt" value="6"/> | |
331 <!-- removed ,cigar from test: https://github.com/bbuchfink/diamond/issues/532 --> | |
332 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,scovhsp,sskingdoms,skingdoms,sphylums"/> | |
333 <param name="unal" value="true"/> | |
334 </conditional> | |
335 </section> | |
336 <conditional name="sens_cond"> | |
337 <param name="sensitivity" value=""/> | |
338 </conditional> | |
339 <param name="matrix" value="BLOSUM62"/> | |
340 <param name="comp_based_stats" value="1"/> | |
341 <param name="masking" value="1"/> | |
342 <conditional name="hit_filter"> | |
343 <param name="hit_filter_select" value="max"/> | |
344 <param name="max_target_seqs" value="25" /> | |
345 </conditional> | |
346 <conditional name="filter_score"> | |
347 <param name="filter_score_select" value="evalue"/> | |
348 <param name="evalue" value="0.001" /> | |
349 </conditional> | |
350 <param name="id" value="0"/> | |
351 <param name="query_cover" value="0"/> | |
352 <conditional name="sens_cond"> | |
353 <param name="block_size" value="2"/> | |
354 </conditional> | |
355 <param name="output_unal" value="--al,--un"/> | |
356 <output name="unalqueries"> | |
357 <assert_contents> | |
358 <has_line line=">shuffled sequence that should go to unaligned"/> | |
359 </assert_contents> | |
360 </output> | |
361 <output name="alqueries"> | |
362 <assert_contents> | |
363 <has_line line=">sequence more text"/> | |
364 </assert_contents> | |
365 </output> | |
366 <output name="blast_tabular" file="diamond_results.tabular"/> | |
367 </test> | |
368 <!--Test 02--> | |
369 <test expect_num_outputs="1"> | |
370 <conditional name="method_cond"> | |
371 <param name="method_select" value="blastp" /> | |
372 </conditional> | |
373 <param name="query" value="protein.fasta" ftype="fasta"/> | |
374 <conditional name="ref_db_source"> | |
375 <param name="db_source" value="history"/> | |
376 <param name="reference_database" value="db-wtax.dmnd"/> | |
377 </conditional> | |
378 <conditional name="tax_cond"> | |
379 <param name="tax_select" value="list"/> | |
380 <param name="taxonlist" value="2" /> | |
381 </conditional> | |
382 <section name="output_section"> | |
383 <conditional name="output"> | |
384 <param name="outfmt" value="6"/> | |
385 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> | |
386 </conditional> | |
387 </section> | |
388 <conditional name="sens_cond"> | |
389 <param name="sensitivity" value=""/> | |
390 </conditional> | |
391 <param name="matrix" value="BLOSUM62"/> | |
392 <param name="comp_based_stats" value="1"/> | |
393 <param name="masking" value="1"/> | |
394 <conditional name="hit_filter"> | |
395 <param name="hit_filter_select" value="max"/> | |
396 <param name="max_target_seqs" value="25" /> | |
397 </conditional> | |
398 <conditional name="filter_score"> | |
399 <param name="filter_score_select" value="evalue"/> | |
400 <param name="evalue" value="0.001" /> | |
401 </conditional> | |
402 <param name="id" value="0"/> | |
403 <param name="query_cover" value="0"/> | |
404 <conditional name="sens_cond"> | |
405 <param name="block_size" value="2"/> | |
406 </conditional> | |
407 <output name="blast_tabular" file="diamond_results.wtax.tabular"/> | |
408 </test> | |
409 <!--Test 03--> | |
410 <test expect_num_outputs="1"> | |
411 <conditional name="method_cond"> | |
412 <param name="method_select" value="blastx" /> | |
413 <conditional name="frameshift_cond"> | |
414 <param name="frameshift_select" value="yes"/> | |
415 </conditional> | |
416 </conditional> | |
417 <param name="query" value="nucleotide.fasta" ftype="fasta"/> | |
418 <conditional name="ref_db_source"> | |
419 <param name="db_source" value="history"/> | |
420 <param name="reference_database" value="db.dmnd"/> | |
421 </conditional> | |
422 <section name="output_section"> | |
423 <conditional name="output"> | |
424 <param name="outfmt" value="0"/> | |
425 </conditional> | |
426 </section> | |
427 <conditional name="sens_cond"> | |
428 <param name="sensitivity" value=""/> | |
429 </conditional> | |
430 <param name="matrix" value="BLOSUM62"/> | |
431 <param name="comp_based_stats" value="1"/> | |
432 <param name="masking" value="1"/> | |
433 <conditional name="hit_filter"> | |
434 <param name="hit_filter_select" value="top"/> | |
435 <param name="top" value="10" /> | |
436 </conditional> | |
437 <conditional name="filter_score"> | |
438 <param name="filter_score_select" value="min-score"/> | |
439 <param name="min_score" value="1" /> | |
440 </conditional> | |
441 <param name="id" value="0"/> | |
442 <param name="query_cover" value="0"/> | |
443 <conditional name="sens_cond"> | |
444 <param name="block_size" value="2"/> | |
445 </conditional> | |
446 <output name="blast_tabular" file="diamond_results.pairwise"/> | |
447 </test> | |
448 <!--Test 04--> | |
449 <test expect_num_outputs="1"> | |
450 <conditional name="method_cond"> | |
451 <param name="method_select" value="blastp" /> | |
452 </conditional> | |
453 <param name="query" value="protein.fasta" ftype="fasta"/> | |
454 <conditional name="ref_db_source"> | |
455 <param name="db_source" value="history"/> | |
456 <param name="reference_database" value="db-wtax.dmnd"/> | |
457 </conditional> | |
458 <section name="output_section"> | |
459 <conditional name="output"> | |
460 <param name="outfmt" value="100"/> | |
461 <param name="salltitles" value="false"/> | |
462 <param name="sallseqid" value="false"/> | |
463 </conditional> | |
464 </section> | |
465 <output name="daa_output" file="diamond_results.daa" compare="sim_size" delta="10"/> | |
466 </test> | |
467 <!--Test 05--> | |
468 <test expect_num_outputs="1"> | |
469 <conditional name="method_cond"> | |
470 <param name="method_select" value="blastx" /> | |
471 <conditional name="frameshift_cond"> | |
472 <param name="frameshift_select" value="yes"/> | |
473 </conditional> | |
474 </conditional> | |
475 <param name="query" value="nucleotide.fasta" ftype="fasta"/> | |
476 <conditional name="ref_db_source"> | |
477 <param name="db_source" value="indexed"/> | |
478 <param name="index" value="testDb"/> | |
479 </conditional> | |
480 <section name="output_section"> | |
481 <conditional name="output"> | |
482 <param name="outfmt" value="0"/> | |
483 </conditional> | |
484 </section> | |
485 <conditional name="sens_cond"> | |
486 <param name="sensitivity" value=""/> | |
487 </conditional> | |
488 <param name="matrix" value="BLOSUM62"/> | |
489 <param name="comp_based_stats" value="1"/> | |
490 <param name="masking" value="1"/> | |
491 <conditional name="hit_filter"> | |
492 <param name="hit_filter_select" value="top"/> | |
493 <param name="top" value="10" /> | |
494 </conditional> | |
495 <conditional name="filter_score"> | |
496 <param name="filter_score_select" value="min-score"/> | |
497 <param name="min_score" value="1" /> | |
498 </conditional> | |
499 <param name="id" value="0"/> | |
500 <param name="query_cover" value="0"/> | |
501 <conditional name="sens_cond"> | |
502 <param name="block_size" value="2"/> | |
503 </conditional> | |
504 <output name="blast_tabular" file="diamond_results.pairwise"/> | |
505 </test> | |
506 <!-- Test 06 iterate option--> | |
507 <test expect_num_outputs="1"> | |
508 <conditional name="method_cond"> | |
509 <param name="method_select" value="blastx" /> | |
510 </conditional> | |
511 <param name="query" value="nucleotide.fasta" ftype="fasta"/> | |
512 <conditional name="ref_db_source"> | |
513 <param name="db_source" value="indexed"/> | |
514 <param name="index" value="testDb"/> | |
515 </conditional> | |
516 <param name="iterate" value="true"/> | |
517 <section name="output_section"> | |
518 <conditional name="output"> | |
519 <param name="outfmt" value="6"/> | |
520 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> | |
521 </conditional> | |
522 </section> | |
523 <output name="blast_tabular" file="diamond_results_iterate.tabular"/> | |
524 </test> | |
525 <!--Test 07 algo option--> | |
526 <test expect_num_outputs="1"> | |
527 <conditional name="method_cond"> | |
528 <param name="method_select" value="blastx" /> | |
529 </conditional> | |
530 <param name="query" value="nucleotide.fasta" ftype="fasta"/> | |
531 <conditional name="ref_db_source"> | |
532 <param name="db_source" value="indexed"/> | |
533 <param name="index" value="testDb"/> | |
534 </conditional> | |
535 <param name="algo" value="1"/> | |
536 <section name="output_section"> | |
537 <conditional name="output"> | |
538 <param name="outfmt" value="6"/> | |
539 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> | |
540 </conditional> | |
541 </section> | |
542 <output name="blast_tabular" file="diamond_results_algorithm.tabular"/> | |
543 </test> | |
544 <!--Test 08 global-ranking option--> | |
545 <test expect_num_outputs="1"> | |
546 <conditional name="method_cond"> | |
547 <param name="method_select" value="blastx" /> | |
548 </conditional> | |
549 <param name="query" value="nucleotide.fasta" ftype="fasta"/> | |
550 <conditional name="ref_db_source"> | |
551 <param name="db_source" value="indexed"/> | |
552 <param name="index" value="testDb"/> | |
553 </conditional> | |
554 <param name="global_ranking" value="10"/> | |
555 <section name="output_section"> | |
556 <conditional name="output"> | |
557 <param name="outfmt" value="6"/> | |
558 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> | |
559 </conditional> | |
560 </section> | |
561 <output name="blast_tabular" file="diamond_results_global_ranking.tabular"/> | |
562 </test> | |
563 <!--Test 09 max-hsps option--> | |
564 <test expect_num_outputs="1"> | |
565 <conditional name="method_cond"> | |
566 <param name="method_select" value="blastx" /> | |
567 </conditional> | |
568 <param name="query" value="nucleotide.fasta" ftype="fasta"/> | |
569 <conditional name="ref_db_source"> | |
570 <param name="db_source" value="indexed"/> | |
571 <param name="index" value="testDb"/> | |
572 </conditional> | |
573 <param name="max_hsps" value="10"/> | |
574 <section name="output_section"> | |
575 <conditional name="output"> | |
576 <param name="outfmt" value="6"/> | |
577 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> | |
578 </conditional> | |
579 </section> | |
580 <output name="blast_tabular" file="diamond_results_max_hsps.tabular"/> | |
581 </test> | |
582 <!--Test 10 seed-cut option--> | |
583 <test expect_num_outputs="1"> | |
584 <conditional name="method_cond"> | |
585 <param name="method_select" value="blastx" /> | |
586 </conditional> | |
587 <param name="query" value="nucleotide.fasta" ftype="fasta"/> | |
588 <conditional name="ref_db_source"> | |
589 <param name="db_source" value="indexed"/> | |
590 <param name="index" value="testDb"/> | |
591 </conditional> | |
592 <param name="seed_cut" value="100"/> | |
593 <section name="output_section"> | |
594 <conditional name="output"> | |
595 <param name="outfmt" value="6"/> | |
596 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> | |
597 </conditional> | |
598 </section> | |
599 <output name="blast_tabular" file="diamond_results_seed_cut.tabular"/> | |
600 </test> | |
601 <!--Test 11 freq-masking option--> | |
602 <test expect_num_outputs="1"> | |
603 <conditional name="method_cond"> | |
604 <param name="method_select" value="blastx" /> | |
605 </conditional> | |
606 <param name="query" value="nucleotide.fasta" ftype="fasta"/> | |
607 <conditional name="ref_db_source"> | |
608 <param name="db_source" value="indexed"/> | |
609 <param name="index" value="testDb"/> | |
610 </conditional> | |
611 <param name="freq_masking" value="true"/> | |
612 <section name="output_section"> | |
613 <conditional name="output"> | |
614 <param name="outfmt" value="6"/> | |
615 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> | |
616 </conditional> | |
617 </section> | |
618 <output name="blast_tabular" file="diamond_results_freq_masking.tabular"/> | |
619 </test> | |
620 <!--Test 12 motif-masking option--> | |
621 <test expect_num_outputs="1"> | |
622 <conditional name="method_cond"> | |
623 <param name="method_select" value="blastx" /> | |
624 </conditional> | |
625 <param name="query" value="nucleotide.fasta" ftype="fasta"/> | |
626 <conditional name="ref_db_source"> | |
627 <param name="db_source" value="indexed"/> | |
628 <param name="index" value="testDb"/> | |
629 </conditional> | |
630 <section name="advanced_section"> | |
631 <param name="motif_masking" value="1"/> | |
632 </section> | |
633 <section name="output_section"> | |
634 <conditional name="output"> | |
635 <param name="outfmt" value="6"/> | |
636 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> | |
637 </conditional> | |
638 </section> | |
639 <output name="blast_tabular" file="diamond_results_motif_masking.tabular"/> | |
640 </test> | |
641 </tests> | |
642 <help> | |
643 <![CDATA[ | |
644 | |
645 **What it does** | |
646 | |
647 DIAMOND_ is a new alignment tool for aligning short DNA sequencing reads to a protein reference database such as NCBI-NR. | |
648 On Illumina reads of length 100-150bp, in fast mode, DIAMOND is about 20,000 times faster than BLASTX, while reporting | |
649 about 80-90% of all matches that BLASTX finds, with an e-value of at most 1e-5. In sensitive mode, DIAMOND ist about 2,500 | |
650 times faster than BLASTX, finding more than 94% of all matches. | |
651 | |
652 The DIAMOND algorithm is designed for the alignment of large datasets. The algorithm is not efficient for a small number of query sequences or only a single one of them, and speed will be low. BLAST is recommended for small datasets. | |
653 | |
654 .. _DIAMOND: http://ab.inf.uni-tuebingen.de/software/diamond/ | |
655 | |
656 **Input** | |
657 | |
658 Input data is a large protein or nucleotide sequence file. | |
659 | |
660 | |
661 **Output** | |
662 | |
663 Diamond gives you a tabular output file with 12 columns: | |
664 | |
665 Column Description | |
666 1 Query Seq-id (ID of your sequence) | |
667 2 Subject Seq-id (ID of the database hit) | |
668 3 Percentage of identical matches | |
669 4 Alignment length | |
670 5 Number of mismatches | |
671 6 Number of gap openings | |
672 7 Start of alignment in query | |
673 8 End of alignment in query | |
674 9 Start of alignment in subject (database hit) | |
675 10 End of alignment in subject (database hit) | |
676 11 Expectation value (E-value) | |
677 12 Bit score | |
678 | |
679 | |
680 Supported values for gap open and gap extend parameters depending on the selected scoring matrix. | |
681 | |
682 ======== ============================================ | |
683 Matrix Supported values for (gap open)/(gap extend) | |
684 ======== ============================================ | |
685 BLOSUM45 (10-13)/3; (12-16)/2; (16-19)/1 | |
686 BLOSUM50 (9-13)/3; (12-16)/2; (15-19)/1 | |
687 BLOSUM62 (6-11)/2; (9-13)/1 | |
688 BLOSUM80 (6-9)/2; 13/2; 25/2; (9-11)/1 | |
689 BLOSUM90 (6-9)/2; (9-11)/1 | |
690 PAM250 (11-15)/3; (13-17)/2; (17-21)/1 | |
691 PAM70 (6-8)/2; (9-11)/1 | |
692 PAM30 (5-7)/2; (8-10)/1 | |
693 ======== ============================================ | |
694 | |
695 | |
696 ]]> | |
697 </help> | |
698 <expand macro="citations" /> | |
699 </tool> |