comparison diamond.xml @ 8:0908e94a4c84 draft

Uploaded
author p.lucas
date Tue, 28 May 2024 13:04:19 +0000
parents
children bc3dcdb96817
comparison
equal deleted inserted replaced
7:b92784764ecd 8:0908e94a4c84
1 <tool id="bg_diamond" name="Diamond" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
2 <description>alignment tool for short sequences against a protein database</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="stdio" />
7 <expand macro="version_command" />
8 <command detect_errors="aggressive">
9 <![CDATA[
10
11 #if $ref_db_source.db_source == "history":
12 ln -s $ref_db_source.reference_database ./database.dmnd
13 #else:
14 ln -s ${ref_db_source.index.fields.db_path} ./database.dmnd
15 #end if
16
17 &&
18
19 /usr/bin/diamond
20 $method_cond.method_select
21 --quiet
22 --threads "\${GALAXY_SLOTS:-12}"
23 --db ./database
24 --query '$query'
25 #if $method_cond.method_select == "blastx"
26 --query-gencode '$method_cond.query_gencode'
27 --strand '$method_cond.query_strand'
28 --min-orf $method_cond.min_orf
29 #if $method_cond.frameshift_cond.frameshift_select == 'yes'
30 --frameshift $method_cond.frameshift_cond.frameshift
31 $method_cond.frameshift_cond.range_culling
32 #end if
33 #else if $method_cond.method_select == "blastp"
34 $method_cond.no_self_hits
35 #end if
36
37 @OUTPUT_ARGS@
38
39 #if $output_section.output.outfmt != '100'
40 --compress '0'
41 #end if
42 $sens_cond.sensitivity
43 $iterate
44 --algo $algo
45 #if $global_ranking
46 --global-ranking $global_ranking
47 #end if
48 #if str($gapopen) != "":
49 --gapopen '$gapopen'
50 #end if
51 #if str($gapextend) != "":
52 --gapextend '$gapextend'
53 #end if
54 --matrix '$matrix'
55 --comp-based-stats '$method_cond.comp_based_stats'
56 --masking '$masking'
57
58 @HITFILTER_ARGS@
59
60 #if str($filter_score.filter_score_select) == 'evalue':
61 --evalue '$filter_score.evalue'
62 #else:
63 --min-score '$filter_score.min_score'
64 #end if
65
66 --id '$id'
67 --query-cover '$query_cover'
68 --subject-cover '$subject_cover'
69 --block-size '$sens_cond.block_size'
70 #if $output_section.output_unal
71 #if "--un" in $output_section.output_unal
72 --un '$unalqueries'
73 #if $query.ext.startswith("fasta"):
74 --unfmt fasta
75 #else
76 --unfmt fastq
77 #end if
78 #end if
79 #if "--al" in $output_section.output_unal
80 --al '$alqueries'
81 #if $query.ext.startswith("fasta"):
82 --alfmt fasta
83 #else
84 --alfmt fastq
85 #end if
86 #end if
87 #end if
88 #if $output_section.max_hsps
89 --max-hsps $output_section.max_hsps
90 #end if
91 #if $tax_cond.tax_select == 'file':
92 --taxonlist `cat '$tax_cond.taxonlistfile' | grep -v "^#" | grep -v "^$" | tr "\n" "," | sed 's/,$//'`
93 #else if $tax_cond.tax_select == 'list':
94 --taxonlist '$tax_cond.taxonlist'
95 #end if
96 #if $advanced_section.seed_cut
97 --seed-cut $advanced_section.seed_cut
98 #end if
99 $advanced_section.freq_masking
100 --motif-masking $advanced_section.motif_masking
101 ]]>
102 </command>
103 <inputs>
104 <conditional name="method_cond">
105 <param name="method_select" type="select" label="Alignment mode" help="(blastp/blastx)">
106 <option value="blastp">Amino acid query sequences (blastp)</option>
107 <option value="blastx">DNA query sequences (blastx)</option>
108 </param>
109 <when value="blastx">
110 <param argument="--query-gencode" type="select" label="Genetic code used for translation of query in BLASTX mode" help="">
111 <option value="1">Standard Code</option>
112 <option value="2">Vertebrate Mitochondrial Code</option>
113 <option value="3">Yeast Mitochondrial Code</option>
114 <option value="4">Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
115 <option value="5">Invertebrate Mitochondrial Code</option>
116 <option value="6">Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
117 <option value="9">Echinoderm and Flatworm Mitochondrial Code</option>
118 <option value="10">Euplotid Nuclear Code</option>
119 <option value="11">Bacterial, Archaeal and Plant Plastid Code</option>
120 <option value="12">Alternative Yeast Nuclear Code</option>
121 <option value="13">Ascidian Mitochondrial Code</option>
122 <option value="14">Alternative Flatworm Mitochondrial Code</option>
123 <option value="16">Chlorophycean Mitochondrial Code</option>
124 <option value="21">Trematode Mitochondrial Code</option>
125 <option value="22">Scenedesmus obliquus Mitochondrial Code</option>
126 <option value="23">Thraustochytrium Mitochondrial Code</option>
127 <option value="24">Pterobranchia Mitochondrial Code</option>
128 <option value="25">Candidate Division SR1 and Gracilibacteria Code</option>
129 <option value="26">Pachysolen tannophilus Nuclear Code</option>
130 </param>
131 <param argument="--min-orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature" />
132
133 <param name="query_strand" argument="--strand" type="select" label="query strands to search" help="">
134 <option value="both" selected="True">Both</option>
135 <option value="plus">Plus</option>
136 <option value="minus">Minus</option>
137 </param>
138 <conditional name="frameshift_cond">
139 <param name="frameshift_select" type="select" label="Allow for frameshifts?" help="">
140 <option value="yes">yes</option>
141 <option value="no" selected="true">no</option>
142 </param>
143 <when value="yes">
144 <param argument="--range-culling" type="boolean" truevalue="--range-culling" falsevalue="" checked="false" label="restrict hit culling to overlapping query ranges" help="This feature is designed for long query DNA sequences that may span several genes. In these cases, the default of reporting the 25 best overall hits could cause hits to a lower scoring gene to be overshadowed. But just increasing the number of alignments reported will bloat the output size and reduce performance. Using this feature along with -k 25 (default), a hit will only be deleted if at least 50% of its query range is spanned by at least 25 higher or equal scoring hits. Using this feature along with --top 10, a hit will only be deleted if its score is more than 10% lower than that of a higher scoring hit over at least 50% of its query range. The percentage is configurable using --range-cover. Note that this feature is currently only available in frameshift alignment mode"/>
145 <param argument="--frameshift" type="integer" value="0" label="frame shift penalty" help="Values around 15 are reasonable for this parameter. Enabling this feature will have the aligner tolerate missing bases in DNA sequences and is most recommended for long, error-prone sequences like MinION reads. In the pairwise output format, frameshifts will be indicated by \ and / for a shift by +1 and -1 nucleotide in the direction of translation respectively." />
146 </when>
147 <when value="no"/>
148 </conditional>
149
150 <param argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST">
151 <option value="0">Disable</option>
152 <option value="1" selected="True">Default mode (Hauser, 2016)</option>
153 </param>
154 </when>
155 <when value="blastp">
156 <param argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true"
157 label="Suppress reporting of identical self-hits between sequences"
158 help="The FASTA sequence identifiers as well as the sequences of query and target need to be identical for a hit to be deleted"/>
159
160 <param argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST">
161 <option value="0">Disable</option>
162 <option value="1" selected="True">Default mode (Hauser, 2016)</option>
163 <option value="2">Compositional matrix adjust conditioned on sequence properties, simplified (Yu, 2005)</option>
164 <option value="3">Compositional matrix adjust conditioned on sequence properties (Yu, 2005)</option>
165 <option value="4">Compositional matrix adjust unconditionally (Yu, 2005)</option>
166 </param>
167 </when>
168 </conditional>
169 <param argument="--query" type="data" format="fasta,fastq" label="Input query file in FASTA or FASTQ format" />
170 <conditional name="ref_db_source">
171 <param name="db_source" type="select" label="Will you select a reference database from your history or use a built-in index?" help="Built-ins were indexed using default options">
172 <option value="indexed">Use a built-in index</option>
173 <option value="history">Use one from the history</option>
174 </param>
175 <when value="indexed">
176 <param name="index" type="select" label="Select a reference database" help="If your database of interest is not listed, contact your Galaxy admin">
177 <options from_data_table="diamond_database">
178 <filter type="sort_by" column="2"/>
179 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
180 </options>
181 </param>
182 </when>
183 <when value="history">
184 <param name="reference_database" argument="--db" type="data" format="dmnd" label="Select the reference database" />
185 </when>
186 </conditional>
187 <conditional name="tax_cond">
188 <param name="tax_select" type="select" label="Restrict search taxonomically?" help="Any taxonomic rank can be used, and only reference sequences matching one of the specified taxon ids will be searched against.">
189 <option value="no" selected="True">No</option>
190 <option value="list">List of taxids entered manually</option>
191 <option value="file">List of taxids from single column tabular file</option>
192 </param>
193 <when value="no"/>
194 <when value="list">
195 <param name="taxonlist" argument="--taxonlist" type="text" value="" label="Comma separated list of taxon ids" help="">
196 <validator type="regex" message="Taxonlist needs to be a comma separated list of integers">[0-9,]*</validator>
197 </param>
198 </when>
199 <when value="file">
200 <param name="taxonlistfile" argument="--taxonlist" type="data" format="tabular" label="Keep alignments within the given percentage range of the top alignment score for a quer" help="" />
201 </when>
202 </conditional>
203 <conditional name="sens_cond">
204 <param name='sensitivity' type="select" label="Sensitivity Mode" help="Choose one of the sensitivity modes. The default mode is mainly designed for short read alignment, i.e. finding significant matches of >50 bits on 30-40aa fragments. The sensitive mode is a lot more sensitive than the default and generally recommended for aligning longer sequences. The more sensitive mode provides even more sensitivity. More sensitivity may increase computation time.">
205 <option value="--fast">Fast (--fast)</option>
206 <option value="" selected="True">Default</option>
207 <option value="--mid-sensitive">Mid Sensitive (--mid-sensitive)</option>
208 <option value="--sensitive">Sensitive (--sensitive)</option>
209 <option value="--more-sensitive">More Sensitive (--more-sensitive)</option>
210 <option value="--very-sensitive">Very Sensitive (--very-sensitive)</option>
211 <option value="--ultra-sensitive">Ultra Sensitive (--ultra-sensitive)</option>
212 </param>
213 <when value="--fast">
214 <expand macro="block_size_low_sens"/>
215 </when>
216 <when value="">
217 <expand macro="block_size_low_sens"/>
218 </when>
219 <when value="--mid-sensitive">
220 <expand macro="block_size_low_sens"/>
221 </when>
222 <when value="--sensitive">
223 <expand macro="block_size_low_sens"/>
224 </when>
225 <when value="--more-sensitive">
226 <expand macro="block_size_low_sens"/>
227 </when>
228 <when value="--very-sensitive">
229 <expand macro="block_size_hi_sens"/>
230 </when>
231 <when value="--ultra-sensitive">
232 <expand macro="block_size_hi_sens"/>
233 </when>
234 </conditional>
235 <param argument="--matrix" type="select" label="Scoring matrix" help="In parentheses are the supported values for (gap open)/(gap extend). In brackets are default gap penalties">
236 <option value="BLOSUM45">BLOSUM45 ((10-13)/3; (12-16)/2; (16-19)/1) [14/2]</option>
237 <option value="BLOSUM50">BLOSUM50 ((9-13)/3; (12-16)/2; (15-19)/1) [13/2]</option>
238 <option value="BLOSUM62" selected="True">BLOSUM62 ((6-11)/2; (9-13)/1) [11/1]</option>
239 <option value="BLOSUM80">BLOSUM80 ((6-9)/2; 13/2; 25/2; (9-11)/1) [10/1]</option>
240 <option value="BLOSUM90">BLOSUM90 ((6-9)/2; (9-11)/1) [10/1]</option>
241 <option value="PAM250">PAM250 ((11-15)/3; (13-17)/2; (17-21)/1) [14/2]</option>
242 <option value="PAM70">PAM70 ((6-8)/2; (9-11)/1) [10/1]</option>
243 <option value="PAM30">PAM30 ((5-7)/2; (8-10)/1) [9/1]</option>
244 </param>
245 <param argument="--gapopen" type="integer" optional="True" value="" label="Gap open penalty" help="Leave empty for default (see scoring matrix)" />
246 <param argument="--gapextend" type="integer" optional="True" value="" label="Gap extension penalty" help="Leave empty for default (see scoring matrix)" />
247 <param argument="--masking" type="select" label="Masking algorithm" help="DIAMOND by default applies the tantan repeat masking algorithm to the query and target sequences as described in (Frith, 2011).
248 This masking procedure increases the specificity of alignments and serves to filter out spurious hits. Note that when using --comp-based-stats (2,3,4), tantan masking is disabled by default.">
249 <option value="0">Disabled</option>
250 <option value="1" selected="true">Tantan</option>
251 <option value="seg">SEG</option>
252 </param>
253 <conditional name="filter_score">
254 <param name="filter_score_select" type="select" label="Method to filter?" help="(--evalue/--min-score)">
255 <option value="evalue" selected="True">Maximum e-value to report alignments</option>
256 <option value="min-score">Minimum bit score to report alignments</option>
257 </param>
258 <when value="evalue">
259 <param argument="--evalue" type="float" value="0.001" label="Maximum expected value to keep an alignment" />
260 </when>
261 <when value="min-score">
262 <param name="min_score" argument="--min-score" type="integer" value="0" label="Minimum bit score to keep an alignment" help="(--min-score)" />
263 </when>
264 </conditional>
265 <param argument="--iterate" type="boolean" truevalue="--iterate" falsevalue="" checked="false"
266 label="Run multiple rounds of searches with increasing sensitivity" help="he query dataset will first be searched at a lower sensitivity setting, only searching those query sequences at
267 the target sensitivity that fail to produce a significant alignment at a lower sensitivity." />
268 <param argument="--algo" type="select" label="Algorithm for seed search" help="Double-indexed is the main algorithm of the program, designed for large input files but less efficient for small
269 query files. Query-indexed and improves performance for small query files. This mode will be automatically triggered based on the input. Contiguous-seed mode and further improves performance
270 for small query files. The modes differ slightly in their sensitivity, so results are not guaranteed to be 100% identical for different settings of this option.">
271 <option value="0">Doble-indexed (0)</option>
272 <option value="1">Query-indexed (1)</option>
273 <option value="ctg">Contiguous-seed mode (ctg)</option>
274 </param>
275 <expand macro="hit_filter_macro" />
276 <param argument="--global-ranking" type="integer" min="0" value="" optional="true"
277 label="Limit on the number of Smith Waterman extensions" help="Target sequences will be ranked according to their ungapped extension scores at seed hits, and gapped extensions will only
278 be computed for the best N targets for each query. Note that this option increases memory use." />
279 <param argument="--id" type="integer" value="0" label="Minimum identity percentage to report an alignment" help="Report only alignments above the given percentage of sequence identity" />
280 <param argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="Report only alignments above the given percentage of query cover" />
281 <param argument="--subject-cover" type="integer" value="0" label="Minimum subject cover percentage to report an alignment" help="Report only alignments above the given percentage of subject cover"/>
282 <section name="output_section" title="Output options">
283 <param argument="--max-hsps" type="integer" min="0" optional="true" label="Maximum number of HSPs"
284 help="The maximum number of HSPs (High-Scoring Segment Pairs) per target sequence to report for each query. The default policy is to report only the highest-scoring
285 HSP for each target, while disregarding alternative, lower-scoring HSPs that are contained in the same target." />
286 <expand macro="output_type_macro">
287 <!-- Taxonomy features are not supported for the DAA format (i.e.
288 can't be used in diamond view) -->
289 <option value="staxids">unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)</option>
290 <option value="sskingdoms">Subject super kingdoms</option>
291 <option value="skingdoms">Subject kingdoms</option>
292 <option value="sphylums">Subject phylums</option>
293 </expand>
294 <param name="output_unal" type="select" optional="true" multiple="true" label="Output aligned/unaligned queries to separate file" help="">
295 <option value="--un">Output unaligned queries (--un)</option>
296 <option value="--al">Output alaligned queries (--al)</option>
297 </param>
298 </section>
299 <section name="advanced_section" title="Advanced options" expanded="false">
300 <param argument="--seed-cut" type="float" min="0" optional="true" label="Set a complexity cutoff for indexed seeds"/>
301 <param argument="--freq-masking" type="boolean" truevalue="--freq-masking" falsevalue="" checked="false" label="Enable masking seeds based on frequency" help="This option is incompatible with --sed-cut" />
302 <param argument="--motif-masking" type="select" label="Softmask abundant motifs" help="Enable or disable motif masking">
303 <option value="0">Disabled</option>
304 <option value="1">Enabled</option>
305 </param>
306 </section>
307 </inputs>
308 <outputs>
309 <expand macro="output_macro" />
310 <data format_source="query" name="unalqueries" label="${tool.name} on ${on_string}: unaligned queries">
311 <filter>output_section['output_unal'] and "--un" in output_section['output_unal']</filter>
312 </data>
313 <data format_source="query" name="alqueries" label="${tool.name} on ${on_string}: aligned queries">
314 <filter>output_section['output_unal'] and "--un" in output_section['output_unal']</filter>
315 </data>
316 </outputs>
317 <tests>
318 <!--Test 01-->
319 <test expect_num_outputs="3">
320 <conditional name="method_cond">
321 <param name="method_select" value="blastp" />
322 </conditional>
323 <param name="query" value="protein.fasta" ftype="fasta"/>
324 <conditional name="ref_db_source">
325 <param name="db_source" value="history"/>
326 <param name="reference_database" value="db-wtax.dmnd"/>
327 </conditional>
328 <section name="output_section">
329 <conditional name="output">
330 <param name="outfmt" value="6"/>
331 <!-- removed ,cigar from test: https://github.com/bbuchfink/diamond/issues/532 -->
332 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,scovhsp,sskingdoms,skingdoms,sphylums"/>
333 <param name="unal" value="true"/>
334 </conditional>
335 </section>
336 <conditional name="sens_cond">
337 <param name="sensitivity" value=""/>
338 </conditional>
339 <param name="matrix" value="BLOSUM62"/>
340 <param name="comp_based_stats" value="1"/>
341 <param name="masking" value="1"/>
342 <conditional name="hit_filter">
343 <param name="hit_filter_select" value="max"/>
344 <param name="max_target_seqs" value="25" />
345 </conditional>
346 <conditional name="filter_score">
347 <param name="filter_score_select" value="evalue"/>
348 <param name="evalue" value="0.001" />
349 </conditional>
350 <param name="id" value="0"/>
351 <param name="query_cover" value="0"/>
352 <conditional name="sens_cond">
353 <param name="block_size" value="2"/>
354 </conditional>
355 <param name="output_unal" value="--al,--un"/>
356 <output name="unalqueries">
357 <assert_contents>
358 <has_line line=">shuffled sequence that should go to unaligned"/>
359 </assert_contents>
360 </output>
361 <output name="alqueries">
362 <assert_contents>
363 <has_line line=">sequence more text"/>
364 </assert_contents>
365 </output>
366 <output name="blast_tabular" file="diamond_results.tabular"/>
367 </test>
368 <!--Test 02-->
369 <test expect_num_outputs="1">
370 <conditional name="method_cond">
371 <param name="method_select" value="blastp" />
372 </conditional>
373 <param name="query" value="protein.fasta" ftype="fasta"/>
374 <conditional name="ref_db_source">
375 <param name="db_source" value="history"/>
376 <param name="reference_database" value="db-wtax.dmnd"/>
377 </conditional>
378 <conditional name="tax_cond">
379 <param name="tax_select" value="list"/>
380 <param name="taxonlist" value="2" />
381 </conditional>
382 <section name="output_section">
383 <conditional name="output">
384 <param name="outfmt" value="6"/>
385 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
386 </conditional>
387 </section>
388 <conditional name="sens_cond">
389 <param name="sensitivity" value=""/>
390 </conditional>
391 <param name="matrix" value="BLOSUM62"/>
392 <param name="comp_based_stats" value="1"/>
393 <param name="masking" value="1"/>
394 <conditional name="hit_filter">
395 <param name="hit_filter_select" value="max"/>
396 <param name="max_target_seqs" value="25" />
397 </conditional>
398 <conditional name="filter_score">
399 <param name="filter_score_select" value="evalue"/>
400 <param name="evalue" value="0.001" />
401 </conditional>
402 <param name="id" value="0"/>
403 <param name="query_cover" value="0"/>
404 <conditional name="sens_cond">
405 <param name="block_size" value="2"/>
406 </conditional>
407 <output name="blast_tabular" file="diamond_results.wtax.tabular"/>
408 </test>
409 <!--Test 03-->
410 <test expect_num_outputs="1">
411 <conditional name="method_cond">
412 <param name="method_select" value="blastx" />
413 <conditional name="frameshift_cond">
414 <param name="frameshift_select" value="yes"/>
415 </conditional>
416 </conditional>
417 <param name="query" value="nucleotide.fasta" ftype="fasta"/>
418 <conditional name="ref_db_source">
419 <param name="db_source" value="history"/>
420 <param name="reference_database" value="db.dmnd"/>
421 </conditional>
422 <section name="output_section">
423 <conditional name="output">
424 <param name="outfmt" value="0"/>
425 </conditional>
426 </section>
427 <conditional name="sens_cond">
428 <param name="sensitivity" value=""/>
429 </conditional>
430 <param name="matrix" value="BLOSUM62"/>
431 <param name="comp_based_stats" value="1"/>
432 <param name="masking" value="1"/>
433 <conditional name="hit_filter">
434 <param name="hit_filter_select" value="top"/>
435 <param name="top" value="10" />
436 </conditional>
437 <conditional name="filter_score">
438 <param name="filter_score_select" value="min-score"/>
439 <param name="min_score" value="1" />
440 </conditional>
441 <param name="id" value="0"/>
442 <param name="query_cover" value="0"/>
443 <conditional name="sens_cond">
444 <param name="block_size" value="2"/>
445 </conditional>
446 <output name="blast_tabular" file="diamond_results.pairwise"/>
447 </test>
448 <!--Test 04-->
449 <test expect_num_outputs="1">
450 <conditional name="method_cond">
451 <param name="method_select" value="blastp" />
452 </conditional>
453 <param name="query" value="protein.fasta" ftype="fasta"/>
454 <conditional name="ref_db_source">
455 <param name="db_source" value="history"/>
456 <param name="reference_database" value="db-wtax.dmnd"/>
457 </conditional>
458 <section name="output_section">
459 <conditional name="output">
460 <param name="outfmt" value="100"/>
461 <param name="salltitles" value="false"/>
462 <param name="sallseqid" value="false"/>
463 </conditional>
464 </section>
465 <output name="daa_output" file="diamond_results.daa" compare="sim_size" delta="10"/>
466 </test>
467 <!--Test 05-->
468 <test expect_num_outputs="1">
469 <conditional name="method_cond">
470 <param name="method_select" value="blastx" />
471 <conditional name="frameshift_cond">
472 <param name="frameshift_select" value="yes"/>
473 </conditional>
474 </conditional>
475 <param name="query" value="nucleotide.fasta" ftype="fasta"/>
476 <conditional name="ref_db_source">
477 <param name="db_source" value="indexed"/>
478 <param name="index" value="testDb"/>
479 </conditional>
480 <section name="output_section">
481 <conditional name="output">
482 <param name="outfmt" value="0"/>
483 </conditional>
484 </section>
485 <conditional name="sens_cond">
486 <param name="sensitivity" value=""/>
487 </conditional>
488 <param name="matrix" value="BLOSUM62"/>
489 <param name="comp_based_stats" value="1"/>
490 <param name="masking" value="1"/>
491 <conditional name="hit_filter">
492 <param name="hit_filter_select" value="top"/>
493 <param name="top" value="10" />
494 </conditional>
495 <conditional name="filter_score">
496 <param name="filter_score_select" value="min-score"/>
497 <param name="min_score" value="1" />
498 </conditional>
499 <param name="id" value="0"/>
500 <param name="query_cover" value="0"/>
501 <conditional name="sens_cond">
502 <param name="block_size" value="2"/>
503 </conditional>
504 <output name="blast_tabular" file="diamond_results.pairwise"/>
505 </test>
506 <!-- Test 06 iterate option-->
507 <test expect_num_outputs="1">
508 <conditional name="method_cond">
509 <param name="method_select" value="blastx" />
510 </conditional>
511 <param name="query" value="nucleotide.fasta" ftype="fasta"/>
512 <conditional name="ref_db_source">
513 <param name="db_source" value="indexed"/>
514 <param name="index" value="testDb"/>
515 </conditional>
516 <param name="iterate" value="true"/>
517 <section name="output_section">
518 <conditional name="output">
519 <param name="outfmt" value="6"/>
520 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
521 </conditional>
522 </section>
523 <output name="blast_tabular" file="diamond_results_iterate.tabular"/>
524 </test>
525 <!--Test 07 algo option-->
526 <test expect_num_outputs="1">
527 <conditional name="method_cond">
528 <param name="method_select" value="blastx" />
529 </conditional>
530 <param name="query" value="nucleotide.fasta" ftype="fasta"/>
531 <conditional name="ref_db_source">
532 <param name="db_source" value="indexed"/>
533 <param name="index" value="testDb"/>
534 </conditional>
535 <param name="algo" value="1"/>
536 <section name="output_section">
537 <conditional name="output">
538 <param name="outfmt" value="6"/>
539 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
540 </conditional>
541 </section>
542 <output name="blast_tabular" file="diamond_results_algorithm.tabular"/>
543 </test>
544 <!--Test 08 global-ranking option-->
545 <test expect_num_outputs="1">
546 <conditional name="method_cond">
547 <param name="method_select" value="blastx" />
548 </conditional>
549 <param name="query" value="nucleotide.fasta" ftype="fasta"/>
550 <conditional name="ref_db_source">
551 <param name="db_source" value="indexed"/>
552 <param name="index" value="testDb"/>
553 </conditional>
554 <param name="global_ranking" value="10"/>
555 <section name="output_section">
556 <conditional name="output">
557 <param name="outfmt" value="6"/>
558 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
559 </conditional>
560 </section>
561 <output name="blast_tabular" file="diamond_results_global_ranking.tabular"/>
562 </test>
563 <!--Test 09 max-hsps option-->
564 <test expect_num_outputs="1">
565 <conditional name="method_cond">
566 <param name="method_select" value="blastx" />
567 </conditional>
568 <param name="query" value="nucleotide.fasta" ftype="fasta"/>
569 <conditional name="ref_db_source">
570 <param name="db_source" value="indexed"/>
571 <param name="index" value="testDb"/>
572 </conditional>
573 <param name="max_hsps" value="10"/>
574 <section name="output_section">
575 <conditional name="output">
576 <param name="outfmt" value="6"/>
577 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
578 </conditional>
579 </section>
580 <output name="blast_tabular" file="diamond_results_max_hsps.tabular"/>
581 </test>
582 <!--Test 10 seed-cut option-->
583 <test expect_num_outputs="1">
584 <conditional name="method_cond">
585 <param name="method_select" value="blastx" />
586 </conditional>
587 <param name="query" value="nucleotide.fasta" ftype="fasta"/>
588 <conditional name="ref_db_source">
589 <param name="db_source" value="indexed"/>
590 <param name="index" value="testDb"/>
591 </conditional>
592 <param name="seed_cut" value="100"/>
593 <section name="output_section">
594 <conditional name="output">
595 <param name="outfmt" value="6"/>
596 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
597 </conditional>
598 </section>
599 <output name="blast_tabular" file="diamond_results_seed_cut.tabular"/>
600 </test>
601 <!--Test 11 freq-masking option-->
602 <test expect_num_outputs="1">
603 <conditional name="method_cond">
604 <param name="method_select" value="blastx" />
605 </conditional>
606 <param name="query" value="nucleotide.fasta" ftype="fasta"/>
607 <conditional name="ref_db_source">
608 <param name="db_source" value="indexed"/>
609 <param name="index" value="testDb"/>
610 </conditional>
611 <param name="freq_masking" value="true"/>
612 <section name="output_section">
613 <conditional name="output">
614 <param name="outfmt" value="6"/>
615 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
616 </conditional>
617 </section>
618 <output name="blast_tabular" file="diamond_results_freq_masking.tabular"/>
619 </test>
620 <!--Test 12 motif-masking option-->
621 <test expect_num_outputs="1">
622 <conditional name="method_cond">
623 <param name="method_select" value="blastx" />
624 </conditional>
625 <param name="query" value="nucleotide.fasta" ftype="fasta"/>
626 <conditional name="ref_db_source">
627 <param name="db_source" value="indexed"/>
628 <param name="index" value="testDb"/>
629 </conditional>
630 <section name="advanced_section">
631 <param name="motif_masking" value="1"/>
632 </section>
633 <section name="output_section">
634 <conditional name="output">
635 <param name="outfmt" value="6"/>
636 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
637 </conditional>
638 </section>
639 <output name="blast_tabular" file="diamond_results_motif_masking.tabular"/>
640 </test>
641 </tests>
642 <help>
643 <![CDATA[
644
645 **What it does**
646
647 DIAMOND_ is a new alignment tool for aligning short DNA sequencing reads to a protein reference database such as NCBI-NR.
648 On Illumina reads of length 100-150bp, in fast mode, DIAMOND is about 20,000 times faster than BLASTX, while reporting
649 about 80-90% of all matches that BLASTX finds, with an e-value of at most 1e-5. In sensitive mode, DIAMOND ist about 2,500
650 times faster than BLASTX, finding more than 94% of all matches.
651
652 The DIAMOND algorithm is designed for the alignment of large datasets. The algorithm is not efficient for a small number of query sequences or only a single one of them, and speed will be low. BLAST is recommended for small datasets.
653
654 .. _DIAMOND: http://ab.inf.uni-tuebingen.de/software/diamond/
655
656 **Input**
657
658 Input data is a large protein or nucleotide sequence file.
659
660
661 **Output**
662
663 Diamond gives you a tabular output file with 12 columns:
664
665 Column Description
666 1 Query Seq-id (ID of your sequence)
667 2 Subject Seq-id (ID of the database hit)
668 3 Percentage of identical matches
669 4 Alignment length
670 5 Number of mismatches
671 6 Number of gap openings
672 7 Start of alignment in query
673 8 End of alignment in query
674 9 Start of alignment in subject (database hit)
675 10 End of alignment in subject (database hit)
676 11 Expectation value (E-value)
677 12 Bit score
678
679
680 Supported values for gap open and gap extend parameters depending on the selected scoring matrix.
681
682 ======== ============================================
683 Matrix Supported values for (gap open)/(gap extend)
684 ======== ============================================
685 BLOSUM45 (10-13)/3; (12-16)/2; (16-19)/1
686 BLOSUM50 (9-13)/3; (12-16)/2; (15-19)/1
687 BLOSUM62 (6-11)/2; (9-13)/1
688 BLOSUM80 (6-9)/2; 13/2; 25/2; (9-11)/1
689 BLOSUM90 (6-9)/2; (9-11)/1
690 PAM250 (11-15)/3; (13-17)/2; (17-21)/1
691 PAM70 (6-8)/2; (9-11)/1
692 PAM30 (5-7)/2; (8-10)/1
693 ======== ============================================
694
695
696 ]]>
697 </help>
698 <expand macro="citations" />
699 </tool>