Mercurial > repos > bgruening > diamond

--- a/diamond.xml	Sat Nov 27 09:48:10 2021 +0000
+++ b/diamond.xml	Fri Apr 22 13:51:34 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="bg_diamond" name="Diamond" version="@TOOL_VERSION@.1" profile="19.01">
+<tool id="bg_diamond" name="Diamond" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
     <description>alignment tool for short sequences against a protein database</description>
     <macros>
         <import>macros.xml</import>
@@ -37,9 +37,15 @@

         @OUTPUT_ARGS@

-        --compress '0'
+        #if $output_section.output.outfmt != '100'
+            --compress '0'
+        #end if
         $sens_cond.sensitivity
-
+        $iterate
+        --algo $algo
+        #if $global_ranking
+            --global-ranking $global_ranking
+        #end if
         #if str($gapopen) != "":
           --gapopen '$gapopen'
         #end if
@@ -62,8 +68,8 @@
         --query-cover '$query_cover'
         --subject-cover '$subject_cover'
         --block-size '$sens_cond.block_size'
-        #if $output_unal
-            #if "--un" in $output_unal
+        #if $output_section.output_unal
+            #if "--un" in $output_section.output_unal
                 --un '$unalqueries'
                 #if $query.ext.startswith("fasta"):
                     --unfmt fasta
@@ -71,7 +77,7 @@
                     --unfmt fastq
                 #end if
             #end if
-            #if "--al" in $output_unal
+            #if "--al" in $output_section.output_unal
                 --al '$alqueries'
                 #if $query.ext.startswith("fasta"):
                     --alfmt fasta
@@ -80,33 +86,41 @@
                 #end if
             #end if
         #end if
+        #if $output_section.max_hsps
+            --max-hsps $output_section.max_hsps
+        #end if
         #if $tax_cond.tax_select == 'file':
             --taxonlist `cat '$tax_cond.taxonlistfile' | grep -v "^#" | grep -v "^$" | tr "\n" "," | sed 's/,$//'`
         #else if  $tax_cond.tax_select == 'list':
             --taxonlist '$tax_cond.taxonlist'
         #end if
+        #if $advanced_section.seed_cut
+            --seed-cut $advanced_section.seed_cut
+        #end if
+        $advanced_section.freq_masking
+        --motif-masking $advanced_section.motif_masking
 ]]>
     </command>
     <inputs>
         <conditional name="method_cond">
-            <param name="method_select" type="select" label="What do you want to align?" help="(blastp/blastx)">
-                <option value="blastp">Align amino acid query sequences (blastp)</option>
-                <option value="blastx">Align DNA query sequences (blastx)</option>
+            <param name="method_select" type="select" label="Alignment mode" help="(blastp/blastx)">
+                <option value="blastp">Amino acid query sequences (blastp)</option>
+                <option value="blastx">DNA query sequences (blastx)</option>
             </param>
             <when value="blastx">
                 <param argument="--query-gencode" type="select" label="Genetic code used for translation of query in BLASTX mode" help="">
-                    <option value="1">The Standard Code</option>
-                    <option value="2">The Vertebrate Mitochondrial Code</option>
-                    <option value="3">The Yeast Mitochondrial Code</option>
-                    <option value="4">The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
-                    <option value="5">The Invertebrate Mitochondrial Code</option>
-                    <option value="6">The Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
-                    <option value="9">The Echinoderm and Flatworm Mitochondrial Code</option>
-                    <option value="10">The Euplotid Nuclear Code</option>
-                    <option value="11">The Bacterial, Archaeal and Plant Plastid Code</option>
-                    <option value="12">The Alternative Yeast Nuclear Code</option>
-                    <option value="13">The Ascidian Mitochondrial Code</option>
-                    <option value="14">The Alternative Flatworm Mitochondrial Code</option>
+                    <option value="1">Standard Code</option>
+                    <option value="2">Vertebrate Mitochondrial Code</option>
+                    <option value="3">Yeast Mitochondrial Code</option>
+                    <option value="4">Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
+                    <option value="5">Invertebrate Mitochondrial Code</option>
+                    <option value="6">Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
+                    <option value="9">Echinoderm and Flatworm Mitochondrial Code</option>
+                    <option value="10">Euplotid Nuclear Code</option>
+                    <option value="11">Bacterial, Archaeal and Plant Plastid Code</option>
+                    <option value="12">Alternative Yeast Nuclear Code</option>
+                    <option value="13">Ascidian Mitochondrial Code</option>
+                    <option value="14">Alternative Flatworm Mitochondrial Code</option>
                     <option value="16">Chlorophycean Mitochondrial Code</option>
                     <option value="21">Trematode Mitochondrial Code</option>
                     <option value="22">Scenedesmus obliquus Mitochondrial Code</option>
@@ -116,7 +130,7 @@
                     <option value="26">Pachysolen tannophilus Nuclear Code</option>
                 </param>
                 <param argument="--min-orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature" />
-
+
                 <param name="query_strand" argument="--strand" type="select" label="query strands to search" help="">
                     <option value="both" selected="True">Both</option>
                     <option value="plus">Plus</option>
@@ -140,7 +154,9 @@
                 </param>
             </when>
             <when value="blastp">
-                <param argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true" label="suppress reporting of identical self hits?" help=""/>
+                <param argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true"
+                    label="Suppress reporting of identical self-hits between sequences"
+                    help="The FASTA sequence identifiers as well as the sequences of query and target need to be identical for a hit to be deleted"/>

                 <param argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST">
                     <option value="0">Disable</option>
@@ -169,16 +185,25 @@
                 <param name="reference_database" argument="--db" type="data" format="dmnd" label="Select the reference database" />
             </when>
         </conditional>
-        <expand macro="output_type_macro">
-            <!-- Taxonomy features are not supported for the DAA format (i.e.
-                 can't be used in diamond view) -->
-            <option value="staxids">unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)</option>
-            <option value="sskingdoms">Subject super kingdoms</option>
-            <option value="skingdoms">Subject kingdoms</option>
-            <option value="sphylums">Subject phylums</option>
-        </expand>
+        <conditional name="tax_cond">
+            <param name="tax_select" type="select" label="Restrict search taxonomically?" help="Any taxonomic rank can be used, and only reference sequences matching one of the specified taxon ids will be searched against.">
+                <option value="no" selected="True">No</option>
+                <option value="list">List of taxids entered manually</option>
+                <option value="file">List of taxids from single column tabular file</option>
+            </param>
+            <when value="no"/>
+            <when value="list">
+                <param name="taxonlist" argument="--taxonlist" type="text" value="" label="Comma separated list of taxon ids" help="">
+                    <validator type="regex" message="Taxonlist needs to be a comma separated list of integers">[0-9,]*</validator>
+                </param>
+            </when>
+            <when value="file">
+                <param name="taxonlistfile" argument="--taxonlist" type="data" format="tabular" label="Keep alignments within the given percentage range of the top alignment score for a quer" help="" />
+            </when>
+        </conditional>
         <conditional name="sens_cond">
             <param name='sensitivity' type="select" label="Sensitivity Mode" help="Choose one of the sensitivity modes. The default mode is mainly designed for short read alignment, i.e. finding significant matches of >50 bits on 30-40aa fragments. The sensitive mode is a lot more sensitive than the default and generally recommended for aligning longer sequences. The more sensitive mode provides even more sensitivity. More sensitivity may increase computation time.">
+                <option value="--fast">Fast (--fast)</option>
                 <option value="" selected="True">Default</option>
                 <option value="--mid-sensitive">Mid Sensitive (--mid-sensitive)</option>
                 <option value="--sensitive">Sensitive (--sensitive)</option>
@@ -186,6 +211,9 @@
                 <option value="--very-sensitive">Very Sensitive (--very-sensitive)</option>
                 <option value="--ultra-sensitive">Ultra Sensitive (--ultra-sensitive)</option>
             </param>
+            <when value="--fast">
+                <expand macro="block_size_low_sens"/>
+            </when>
             <when value="">
                 <expand macro="block_size_low_sens"/>
             </when>
@@ -215,26 +243,14 @@
             <option value="PAM70">PAM70 ((6-8)/2; (9-11)/1) [10/1]</option>
             <option value="PAM30">PAM30 ((5-7)/2; (8-10)/1) [9/1]</option>
         </param>
-        <param argument="--gapopen" type="integer" optional="True" value="" label="Gap open penalty" help="leave empty for default (see scoring matrix)" />
-        <param argument="--gapextend" type="integer" optional="True" value="" label="Gap extension penalty" help="leave empty for default (see scoring matrix)" />
-        <param argument="--masking" type="boolean" truevalue="1" falsevalue="0" checked="true" label="enable masking of low complexity regions?" help="Masked residues appear in the output as X"/>
-        <conditional name="tax_cond">
-            <param name="tax_select" type="select" label="Restrict search taxonomically?" help="Any taxonomic rank can be used, and only reference sequences matching one of the specified taxon ids will be searched against">
-                <option value="no" selected="True">No</option>
-                <option value="list">list of taxids entered manually</option>
-                <option value="file">list of taxids from single column tabular file</option>
-            </param>
-            <when value="no"/>
-            <when value="list">
-                <param name="taxonlist" argument="--taxonlist" type="text" value="" label="comma separated list of taxon ids" help="">
-                    <validator type="regex" message="taxonlist needs to be a comma separated list of integers">[0-9,]*</validator>
-                    <validator type="expression" message="taxon ids 0 and 1 are not allowed">not ("0" in value.split(",") or "1" in value.split(","))</validator>
-                </param>
-            </when>
-            <when value="file">
-                <param name="taxonlistfile" argument="--taxonlist" type="data" format="tabular" label="Keep alignments within the given percentage range of the top alignment score for a quer" help="" />
-            </when>
-        </conditional>
+        <param argument="--gapopen" type="integer" optional="True" value="" label="Gap open penalty" help="Leave empty for default (see scoring matrix)" />
+        <param argument="--gapextend" type="integer" optional="True" value="" label="Gap extension penalty" help="Leave empty for default (see scoring matrix)" />
+        <param argument="--masking" type="select" label="Masking algorithm" help="DIAMOND by default applies the tantan repeat masking algorithm to the query and target sequences as described in (Frith, 2011).
+            This masking procedure increases the specificity of alignments and serves to filter out spurious hits. Note that when using --comp-based-stats (2,3,4), tantan masking is disabled by default.">
+            <option value="0">Disabled</option>
+            <option value="1" selected="true">Tantan</option>
+            <option value="seg">SEG</option>
+        </param>
         <conditional name="filter_score">
             <param name="filter_score_select" type="select" label="Method to filter?" help="(--evalue/--min-score)">
                 <option value="evalue" selected="True">Maximum e-value to report alignments</option>
@@ -247,25 +263,60 @@
                 <param name="min_score" argument="--min-score" type="integer" value="0" label="Minimum bit score to keep an alignment" help="(--min-score)" />
             </when>
         </conditional>
+        <param argument="--iterate" type="boolean" truevalue="--iterate" falsevalue="" checked="false"
+            label="Run multiple rounds of searches with increasing sensitivity" help="he query dataset will first be searched at a lower sensitivity setting, only searching those query sequences at
+                the target sensitivity that fail to produce a significant alignment at a lower sensitivity." />
+        <param argument="--algo" type="select" label="Algorithm for seed search" help="Double-indexed is the main algorithm of the program, designed for large input files but less efficient for small
+            query files. Query-indexed and improves performance for small query files. This mode will be automatically triggered based on the input. Contiguous-seed mode and further improves performance
+            for small query files. The modes differ slightly in their sensitivity, so results are not guaranteed to be 100% identical for different settings of this option.">
+            <option value="0">Doble-indexed (0)</option>
+            <option value="1">Query-indexed (1)</option>
+            <option value="ctg">Contiguous-seed mode (ctg)</option>
+        </param>
         <expand macro="hit_filter_macro" />
-        <param argument="--id" type="integer" value="0" label="Minimum identity percentage to report an alignment" help="" />
-        <param argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="" />
-        <param argument="--subject-cover" type="integer" value="0" label="Minimum subject cover percentage to report an alignment" help="" />
-        <param name="output_unal" type="select" optional="true" multiple="true" label="Output aligned/unaligned queries to separate file" help="">
-            <option value="--un">Output unaligned queries (--un)</option>
-            <option value="--al">Output alaligned queries (--al)</option>
-        </param>
+        <param argument="--global-ranking" type="integer" min="0" value="" optional="true"
+            label="Limit on the number of Smith Waterman extensions" help="Target sequences will be ranked according to their ungapped extension scores at seed hits, and gapped extensions will only
+                be computed for the best N targets for each query. Note that this option increases memory use." />
+        <param argument="--id" type="integer" value="0" label="Minimum identity percentage to report an alignment" help="Report only alignments above the given percentage of sequence identity" />
+        <param argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="Report only alignments above the given percentage of query cover" />
+        <param argument="--subject-cover" type="integer" value="0" label="Minimum subject cover percentage to report an alignment" help="Report only alignments above the given percentage of subject cover"/>
+        <section name="output_section" title="Output options">
+            <param argument="--max-hsps" type="integer" min="0" optional="true" label="Maximum number of HSPs"
+                help="The maximum number of HSPs (High-Scoring Segment Pairs) per target sequence to report for each query. The default policy is to report only the highest-scoring
+                    HSP for each target, while disregarding alternative, lower-scoring HSPs that are contained in the same target." />
+            <expand macro="output_type_macro">
+                <!-- Taxonomy features are not supported for the DAA format (i.e.
+                        can't be used in diamond view) -->
+                <option value="staxids">unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)</option>
+                <option value="sskingdoms">Subject super kingdoms</option>
+                <option value="skingdoms">Subject kingdoms</option>
+                <option value="sphylums">Subject phylums</option>
+            </expand>
+            <param name="output_unal" type="select" optional="true" multiple="true" label="Output aligned/unaligned queries to separate file" help="">
+                <option value="--un">Output unaligned queries (--un)</option>
+                <option value="--al">Output alaligned queries (--al)</option>
+            </param>
+        </section>
+        <section name="advanced_section" title="Advanced options" expanded="false">
+            <param argument="--seed-cut" type="float" min="0" optional="true" label="Set a complexity cutoff for indexed seeds"/>
+            <param argument="--freq-masking" type="boolean" truevalue="--freq-masking" falsevalue="" checked="false" label="Enable masking seeds based on frequency" help="This option is incompatible with --sed-cut" />
+            <param argument="--motif-masking" type="select" label="Softmask abundant motifs" help="Enable or disable motif masking">
+                <option value="0">Disabled</option>
+                <option value="1">Enabled</option>
+            </param>
+        </section>
     </inputs>
     <outputs>
         <expand macro="output_macro" />
         <data format_source="query" name="unalqueries" label="${tool.name} on ${on_string}: unaligned queries">
-            <filter>output_unal and "--un" in output_unal</filter>
+            <filter>output_section['output_unal'] and "--un" in output_section['output_unal']</filter>
         </data>
         <data format_source="query" name="alqueries" label="${tool.name} on ${on_string}: aligned queries">
-            <filter>output_unal and "--un" in output_unal</filter>
+            <filter>output_section['output_unal'] and "--un" in output_section['output_unal']</filter>
         </data>
     </outputs>
     <tests>
+        <!--Test 01-->
         <test expect_num_outputs="3">
             <conditional name="method_cond">
                 <param name="method_select" value="blastp" />
@@ -275,12 +326,14 @@
                 <param name="db_source" value="history"/>
                 <param name="reference_database" value="db-wtax.dmnd"/>
             </conditional>
-            <conditional name="output">
-                <param name="outfmt" value="6"/>
-                <!-- removed ,cigar from test: https://github.com/bbuchfink/diamond/issues/532 -->
-                <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,scovhsp,sskingdoms,skingdoms,sphylums"/>
-                <param name="unal" value="true"/>
-            </conditional>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="6"/>
+                    <!-- removed ,cigar from test: https://github.com/bbuchfink/diamond/issues/532 -->
+                    <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,scovhsp,sskingdoms,skingdoms,sphylums"/>
+                    <param name="unal" value="true"/>
+                </conditional>
+            </section>
             <conditional name="sens_cond">
                 <param name="sensitivity" value=""/>
             </conditional>
@@ -313,6 +366,7 @@
             </output>
             <output name="blast_tabular" file="diamond_results.tabular"/>
         </test>
+        <!--Test 02-->
         <test expect_num_outputs="1">
             <conditional name="method_cond">
                 <param name="method_select" value="blastp" />
@@ -326,10 +380,12 @@
                 <param name="tax_select" value="list"/>
                 <param name="taxonlist" value="2" />
             </conditional>
-            <conditional name="output">
-                <param name="outfmt" value="6"/>
-                <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
-            </conditional>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="6"/>
+                    <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
+                </conditional>
+            </section>
             <conditional name="sens_cond">
                 <param name="sensitivity" value=""/>
             </conditional>
@@ -351,6 +407,7 @@
             </conditional>
             <output name="blast_tabular" file="diamond_results.wtax.tabular"/>
         </test>
+        <!--Test 03-->
         <test expect_num_outputs="1">
             <conditional name="method_cond">
                 <param name="method_select" value="blastx" />
@@ -363,9 +420,69 @@
                 <param name="db_source" value="history"/>
                 <param name="reference_database" value="db.dmnd"/>
             </conditional>
-            <conditional name="output">
-                <param name="outfmt" value="0"/>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="0"/>
+                </conditional>
+            </section>
+            <conditional name="sens_cond">
+                <param name="sensitivity" value=""/>
+            </conditional>
+            <param name="matrix" value="BLOSUM62"/>
+            <param name="comp_based_stats" value="1"/>
+            <param name="masking" value="1"/>
+            <conditional name="hit_filter">
+                <param name="hit_filter_select" value="top"/>
+                <param name="top" value="10" />
+            </conditional>
+            <conditional name="filter_score">
+                <param name="filter_score_select" value="min-score"/>
+                <param name="min_score" value="1" />
+            </conditional>
+            <param name="id" value="0"/>
+            <param name="query_cover" value="0"/>
+            <conditional name="sens_cond">
+                <param name="block_size" value="2"/>
+            </conditional>
+            <output name="blast_tabular" file="diamond_results.pairwise"/>
+        </test>
+        <!--Test 04-->
+        <test expect_num_outputs="1">
+            <conditional name="method_cond">
+                <param name="method_select" value="blastp" />
             </conditional>
+            <param name="query" value="protein.fasta" ftype="fasta"/>
+            <conditional name="ref_db_source">
+                <param name="db_source" value="history"/>
+                <param name="reference_database" value="db-wtax.dmnd"/>
+            </conditional>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="100"/>
+                    <param name="salltitles" value="false"/>
+                    <param name="sallseqid" value="false"/>
+                </conditional>
+            </section>
+            <output name="daa_output" file="diamond_results.daa" compare="sim_size" delta="10"/>
+        </test>
+        <!--Test 05-->
+        <test expect_num_outputs="1">
+            <conditional name="method_cond">
+                <param name="method_select" value="blastx" />
+                <conditional name="frameshift_cond">
+                    <param name="frameshift_select" value="yes"/>
+                </conditional>
+            </conditional>
+            <param name="query" value="nucleotide.fasta" ftype="fasta"/>
+            <conditional name="ref_db_source">
+                <param name="db_source" value="indexed"/>
+                <param name="index" value="testDb"/>
+            </conditional>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="0"/>
+                </conditional>
+            </section>
             <conditional name="sens_cond">
                 <param name="sensitivity" value=""/>
             </conditional>
@@ -387,57 +504,140 @@
             </conditional>
             <output name="blast_tabular" file="diamond_results.pairwise"/>
         </test>
+        <!-- Test 06 iterate option-->
         <test expect_num_outputs="1">
             <conditional name="method_cond">
-                <param name="method_select" value="blastp" />
+                <param name="method_select" value="blastx" />
             </conditional>
-            <param name="query" value="protein.fasta" ftype="fasta"/>
+            <param name="query" value="nucleotide.fasta" ftype="fasta"/>
             <conditional name="ref_db_source">
-                <param name="db_source" value="history"/>
-                <param name="reference_database" value="db-wtax.dmnd"/>
+                <param name="db_source" value="indexed"/>
+                <param name="index" value="testDb"/>
             </conditional>
-            <conditional name="output">
-                <param name="outfmt" value="100"/>
-                <param name="salltitles" value="false"/>
-                <param name="sallseqid" value="false"/>
-            </conditional>
-            <output name="daa_output" file="diamond_results.daa" compare="sim_size" delta="10"/>
+            <param name="iterate" value="true"/>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="6"/>
+                    <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
+                </conditional>
+            </section>
+            <output name="blast_tabular" file="diamond_results_iterate.tabular"/>
         </test>
+        <!--Test 07 algo option-->
         <test expect_num_outputs="1">
             <conditional name="method_cond">
                 <param name="method_select" value="blastx" />
-                <conditional name="frameshift_cond">
-                    <param name="frameshift_select" value="yes"/>
+            </conditional>
+            <param name="query" value="nucleotide.fasta" ftype="fasta"/>
+            <conditional name="ref_db_source">
+                <param name="db_source" value="indexed"/>
+                <param name="index" value="testDb"/>
+            </conditional>
+            <param name="algo" value="1"/>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="6"/>
+                    <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
                 </conditional>
+            </section>
+            <output name="blast_tabular" file="diamond_results_algorithm.tabular"/>
+        </test>
+        <!--Test 08 global-ranking option-->
+        <test expect_num_outputs="1">
+            <conditional name="method_cond">
+                <param name="method_select" value="blastx" />
+            </conditional>
+            <param name="query" value="nucleotide.fasta" ftype="fasta"/>
+            <conditional name="ref_db_source">
+                <param name="db_source" value="indexed"/>
+                <param name="index" value="testDb"/>
+            </conditional>
+            <param name="global_ranking" value="10"/>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="6"/>
+                    <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
+                </conditional>
+            </section>
+            <output name="blast_tabular" file="diamond_results_global_ranking.tabular"/>
+        </test>
+        <!--Test 09 max-hsps option-->
+        <test expect_num_outputs="1">
+            <conditional name="method_cond">
+                <param name="method_select" value="blastx" />
             </conditional>
             <param name="query" value="nucleotide.fasta" ftype="fasta"/>
             <conditional name="ref_db_source">
                 <param name="db_source" value="indexed"/>
                 <param name="index" value="testDb"/>
             </conditional>
-            <conditional name="output">
-                <param name="outfmt" value="0"/>
+            <param name="max_hsps" value="10"/>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="6"/>
+                    <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
+                </conditional>
+            </section>
+            <output name="blast_tabular" file="diamond_results_max_hsps.tabular"/>
+        </test>
+        <!--Test 10 seed-cut option-->
+        <test expect_num_outputs="1">
+            <conditional name="method_cond">
+                <param name="method_select" value="blastx" />
             </conditional>
-            <conditional name="sens_cond">
-                <param name="sensitivity" value=""/>
+            <param name="query" value="nucleotide.fasta" ftype="fasta"/>
+            <conditional name="ref_db_source">
+                <param name="db_source" value="indexed"/>
+                <param name="index" value="testDb"/>
+            </conditional>
+            <param name="seed_cut" value="100"/>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="6"/>
+                    <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
+                </conditional>
+            </section>
+            <output name="blast_tabular" file="diamond_results_seed_cut.tabular"/>
+        </test>
+        <!--Test 11 freq-masking option-->
+        <test expect_num_outputs="1">
+            <conditional name="method_cond">
+                <param name="method_select" value="blastx" />
             </conditional>
-            <param name="matrix" value="BLOSUM62"/>
-            <param name="comp_based_stats" value="1"/>
-            <param name="masking" value="1"/>
-            <conditional name="hit_filter">
-                <param name="hit_filter_select" value="top"/>
-                <param name="top" value="10" />
+            <param name="query" value="nucleotide.fasta" ftype="fasta"/>
+            <conditional name="ref_db_source">
+                <param name="db_source" value="indexed"/>
+                <param name="index" value="testDb"/>
             </conditional>
-            <conditional name="filter_score">
-                <param name="filter_score_select" value="min-score"/>
-                <param name="min_score" value="1" />
+            <param name="freq_masking" value="true"/>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="6"/>
+                    <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
+                </conditional>
+            </section>
+            <output name="blast_tabular" file="diamond_results_freq_masking.tabular"/>
+        </test>
+        <!--Test 12 motif-masking option-->
+        <test expect_num_outputs="1">
+            <conditional name="method_cond">
+                <param name="method_select" value="blastx" />
             </conditional>
-            <param name="id" value="0"/>
-            <param name="query_cover" value="0"/>
-            <conditional name="sens_cond">
-                <param name="block_size" value="2"/>
+            <param name="query" value="nucleotide.fasta" ftype="fasta"/>
+            <conditional name="ref_db_source">
+                <param name="db_source" value="indexed"/>
+                <param name="index" value="testDb"/>
             </conditional>
-            <output name="blast_tabular" file="diamond_results.pairwise"/>
+            <section name="advanced_section">
+                <param name="motif_masking" value="1"/>
+            </section>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="6"/>
+                    <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
+                </conditional>
+            </section>
+            <output name="blast_tabular" file="diamond_results_motif_masking.tabular"/>
         </test>
     </tests>
     <help>
--- a/diamond_makedb.xml	Sat Nov 27 09:48:10 2021 +0000
+++ b/diamond_makedb.xml	Fri Apr 22 13:51:34 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="bg_diamond_makedb" name="Diamond makedb" version="@TOOL_VERSION@" profile="19.01">
+<tool id="bg_diamond_makedb" name="Diamond makedb" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
     <description>Build database from a FASTA file</description>
     <macros>
         <import>macros.xml</import>
@@ -32,9 +32,12 @@
           <option value="no" selected="true">No</option>
         </param>
         <when value="yes">
-          <param argument="--taxonmap" type="data" format="tabular" label="protein accession to taxid mapping file" help="" />
-          <param argument="--taxonnodes" type="data" format="tabular" label="taxonomy nodes.dmp from NCBI" help="" />
-          <param argument="--taxonnames" type="data" format="tabular" label="taxonomy names.dmp from NCBI" help="" />
+          <param argument="--taxonmap" type="data" format="tabular"
+            label="Protein accession to taxid mapping file"
+            help="Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features.
+              A custom file following the same format may be supplied here. Note that the first line of this file is assumed to contain headings and will be ignored" />
+          <param argument="--taxonnodes" type="data" format="tabular" label="Taxonomy nodes.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" />
+          <param argument="--taxonnames" type="data" format="tabular" label="Taxonomy names.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" />
         </when>
         <when value="no"/>
       </conditional>
@@ -76,7 +79,7 @@
 .. _DIAMOND: http://ab.inf.uni-tuebingen.de/software/diamond/


-- taxonmap: Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. The file can be downloaded from NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.FULL.gz
+- taxonmap: Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. The file can be downloaded from NCBI: ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.gz

 - taxonnames: Path to the names.dmp file from the NCBI taxonomy. This parameter is optional and needs to be supplied in order to provide taxonomy features. The file is contained within this archive downloadable at NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
--- a/diamond_view.xml	Sat Nov 27 09:48:10 2021 +0000
+++ b/diamond_view.xml	Fri Apr 22 13:51:34 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="bg_diamond_view" name="Diamond view" version="@TOOL_VERSION@.1" profile="19.01">
+<tool id="bg_diamond_view" name="Diamond view" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
     <description>generate formatted output from DAA files</description>
     <macros>
         <import>macros.xml</import>
@@ -21,7 +21,9 @@
     </command>
     <inputs>
         <param argument="--daa" type="data" format="daa" label="input file in DAA format" />
-        <expand macro="output_type_macro" />
+        <section name="output_section" title="Output options">
+            <expand macro="output_type_macro" />
+        </section>
         <expand macro="hit_filter_macro" />
         <param argument="--forwardonly" type="boolean" truevalue="--forwardonly" falsevalue="" checked="false" label="only show alignments of forward strand" help=""/>
     </inputs>
@@ -31,9 +33,11 @@
     <tests>
         <test expect_num_outputs="1">
             <param name="daa" ftype="daa" value="diamond_results.daa" />
-            <conditional name="output">
-                <param name="outfmt" value="5"/>
-            </conditional>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="5"/>
+                </conditional>
+            </section>
             <conditional name="hit_filter">
                 <param name="hit_filter_select" value="max"/>
                 <param name="max_target_seqs" value="1" />
@@ -42,17 +46,21 @@
         </test>
         <test expect_num_outputs="1">
             <param name="daa" ftype="daa" value="diamond_results.daa" />
-            <conditional name="output">
-                <param name="outfmt" value="6"/>
-                <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,cigar,scovhsp"/>
-            </conditional>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="6"/>
+                    <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,cigar,scovhsp"/>
+                </conditional>
+            </section>
             <output name="blast_tabular" file="diamond_view_results.tabular"/>
         </test>
         <test expect_num_outputs="1">
             <param name="daa" ftype="daa" value="diamond_results.daa" />
-            <conditional name="output">
-                <param name="outfmt" value="101"/>
-            </conditional>
+            <section name="output_section">
+                <conditional name="output">
+                    <param name="outfmt" value="101"/>
+                </conditional>
+            </section>
             <conditional name="hit_filter">
                 <param name="hit_filter_select" value="top"/>
                 <param name="max_target_seqs" value="1" />
--- a/macros.xml	Sat Nov 27 09:48:10 2021 +0000
+++ b/macros.xml	Fri Apr 22 13:51:34 2022 +0000
@@ -1,6 +1,6 @@
 <macros>
-    <token name="@TOOL_VERSION@">2.0.8</token>
-
+    <token name="@TOOL_VERSION@">2.0.15</token>
+    <token name="@VERSION_SUFFIX@">0</token>
     <xml name="requirements">
         <requirements>
           <requirement type="package" version="@TOOL_VERSION@">diamond</requirement>
@@ -19,7 +19,7 @@

     <xml name="output_type_macro">
         <conditional name="output">
-            <param argument="--outfmt" type="select" label="Format of output file " help="">
+            <param argument="--outfmt" type="select" label="Format of output file" help="">
                 <option value="0">BLAST pairwise</option>
                 <option value="5">BLAST XML</option>
                 <option value="6">BLAST tabular</option>
@@ -90,21 +90,26 @@
                 <option value="top">Percentage of top alignment score</option>
             </param>
             <when value="max">
-                <param name="max_target_seqs" argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to report alignments for" help="Setting this to 0 will report all alignments that were found." />
+                <param name="max_target_seqs" argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to report alignments for"
+                    help="Setting this to 0 will report all alignments that were found." />
             </when>
             <when value="top">
-                <param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a query" help="For example, setting this to 10 will report all align-
-ments whose score is at most 10% lower than the best alignment score for a query." />
+                <param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a query"
+                    help="For example, setting this to 10 will report all alignments whose score is at most 10% lower than the best alignment score for a query." />
             </when>
         </conditional>
     </xml>

     <xml name="block_size_low_sens">
-        <param argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time" help="" />
+        <param argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time"
+            help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary
+                disk space, but also improve performance" />
     </xml>

     <xml name="block_size_hi_sens">
-        <param argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time" help="" />
+        <param argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time"
+            help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary
+                disk space, but also improve performance" />
     </xml>

     <xml name="citations">
@@ -116,48 +121,48 @@

     <xml name="output_macro">
         <data format="txt" name="blast_pairw" label="${tool.name} on ${on_string}">
-            <filter>output["outfmt"] == "0"</filter>
+            <filter>output_section["output"]["outfmt"] == "0"</filter>
         </data>
         <data format="xml" name="blast_xml" label="${tool.name} on ${on_string}">
-            <filter>output["outfmt"] == "5"</filter>
+            <filter>output_section["output"]["outfmt"] == "5"</filter>
         </data>
         <data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}">
-            <filter>output["outfmt"] == "6"</filter>
+            <filter>output_section["output"]["outfmt"] == "6"</filter>
         </data>
         <!-- for daa diamond appends the .daa extension -> hence from_work_dir -->
         <data format="daa" name="daa_output" label="${tool.name} on ${on_string}" from_work_dir="output.daa">
-            <filter>output["outfmt"] == "100"</filter>
+            <filter>output_section["output"]["outfmt"] == "100"</filter>
         </data>
         <data format="sam" name="sam_output" label="${tool.name} on ${on_string}">
-            <filter>output["outfmt"] == "101"</filter>
+            <filter>output_section["output"]["outfmt"] == "101"</filter>
         </data>
         <data format="tabular" name="tax_output" label="${tool.name} on ${on_string}">
-            <filter>output["outfmt"] == "102"</filter>
+            <filter>output_section["output"]["outfmt"] == "102"</filter>
         </data>
     </xml>

     <token name="@OUTPUT_ARGS@">
-        #if $output.outfmt == "0"
+        #if $output_section.output.outfmt == "0"
             --outfmt '0'
             --out '$blast_pairw'
-        #else if $output.outfmt == "5"
+        #else if $output_section.output.outfmt == "5"
             --outfmt '5'
             --out '$blast_xml'
-        #else if $output.outfmt == "6"
-            --outfmt '6' #echo ' '.join(str($output.fields).split(','))
+        #else if $output_section.output.outfmt == "6"
+            --outfmt '6' #echo ' '.join(str($output_section.output.fields).split(','))
             --out '$blast_tabular'
-            --unal $output.unal
-        #else if $output.outfmt == "100"
+            --unal $output_section.output.unal
+        #else if $output_section.output.outfmt == "100"
             --outfmt '100'
-            $output.salltitles
-            $output.sallseqid
+            $output_section.output.salltitles
+            $output_section.output.sallseqid
             --out output.daa
-        #else if $output.outfmt == "101"
+        #else if $output_section.output.outfmt == "101"
             --outfmt '101'
-            $output.salltitles
-            $output.sallseqid
+            $output_section.output.salltitles
+            $output_section.output.sallseqid
             --out '$sam_output'
-        #else if $output.outfmt == "102"
+        #else if $output_section.output.outfmt == "102"
             --outfmt '102'
             --out '$tax_output'
         #end if
Binary file test-data/db-wtax.dmnd has changed
Binary file test-data/db.dmnd has changed
Binary file test-data/diamond_results.daa has changed
--- a/test-data/diamond_results.sam	Sat Nov 27 09:48:10 2021 +0000
+++ b/test-data/diamond_results.sam	Fri Apr 22 13:51:34 2022 +0000
@@ -1,5 +1,5 @@
 @HD	VN:1.5	SO:query
-@PG	PN:DIAMOND	VN:2.0.8	CL:diamond view --threads 1 --daa input.daa --outfmt 101 --out /tmp/tmpz1aqzru3/files/3/f/6/dataset_3f6f43ac-3af2-4ec2-93be-9ced0e692b43.dat --top 0 --forwardonly --compress 0
+@PG	PN:DIAMOND	VN:2.0.15	CL:diamond view --threads 1 --daa input.daa --outfmt 101 --salltitles --sallseqid --out /tmp/tmpuqw24dac/files/e/4/b/dataset_e4b47568-a2e4-4ec1-ac5f-f266085686a4.dat --top 0 --forwardonly --compress 0
 @mm	BlastP
 @CO	BlastP-like alignments
 @CO	Reporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length, ZF: frame, ZS: query start DNA coordinate
--- a/test-data/diamond_results.xml	Sat Nov 27 09:48:10 2021 +0000
+++ b/test-data/diamond_results.xml	Fri Apr 22 13:51:34 2022 +0000
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>blastp</BlastOutput_program>
-  <BlastOutput_version>diamond 2.0.8</BlastOutput_version>
+  <BlastOutput_version>diamond 2.0.15</BlastOutput_version>
   <BlastOutput_reference>Benjamin Buchfink, Xie Chao, and Daniel Huson (2015), &quot;Fast and sensitive protein alignment using DIAMOND&quot;, Nature Methods 12:59-60.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
@@ -28,7 +28,7 @@
   <Hit_num>1</Hit_num>
   <Hit_id>gi|5524211|gb|AAD44166.1|</Hit_id>
   <Hit_def></Hit_def>
-  <Hit_accession>AAD44166.1</Hit_accession>
+  <Hit_accession>AAD44166</Hit_accession>
   <Hit_len>284</Hit_len>
   <Hit_hsps>
     <Hsp>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results_algorithm.tabular	Fri Apr 22 13:51:34 2022 +0000
@@ -0,0 +1,2 @@
+sequence	gi|5524211|gb|AAD44166.1|	99.6	284	0	1	1	849	1	284	1.44e-205	550
+sequence	gi|5524212|gb|AAD44167.1|	79.6	284	57	1	1	849	1	284	5.77e-150	409
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results_freq_masking.tabular	Fri Apr 22 13:51:34 2022 +0000
@@ -0,0 +1,2 @@
+sequence	gi|5524211|gb|AAD44166.1|	99.6	284	0	1	1	849	1	284	1.44e-205	550
+sequence	gi|5524212|gb|AAD44167.1|	79.6	284	57	1	1	849	1	284	5.77e-150	409
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results_global_ranking.tabular	Fri Apr 22 13:51:34 2022 +0000
@@ -0,0 +1,2 @@
+sequence	gi|5524211|gb|AAD44166.1|	99.6	284	0	1	1	849	1	284	1.44e-205	550
+sequence	gi|5524212|gb|AAD44167.1|	79.6	284	57	1	1	849	1	284	5.77e-150	409
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results_iterate.tabular	Fri Apr 22 13:51:34 2022 +0000
@@ -0,0 +1,2 @@
+sequence	gi|5524211|gb|AAD44166.1|	99.6	284	0	1	1	849	1	284	1.44e-205	550
+sequence	gi|5524212|gb|AAD44167.1|	79.6	284	57	1	1	849	1	284	5.77e-150	409
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results_max_hsps.tabular	Fri Apr 22 13:51:34 2022 +0000
@@ -0,0 +1,2 @@
+sequence	gi|5524211|gb|AAD44166.1|	99.6	284	0	1	1	849	1	284	1.44e-205	550
+sequence	gi|5524212|gb|AAD44167.1|	79.6	284	57	1	1	849	1	284	5.77e-150	409
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results_motif_masking.tabular	Fri Apr 22 13:51:34 2022 +0000
@@ -0,0 +1,2 @@
+sequence	gi|5524211|gb|AAD44166.1|	99.6	284	0	1	1	849	1	284	1.44e-205	550
+sequence	gi|5524212|gb|AAD44167.1|	79.6	284	57	1	1	849	1	284	5.77e-150	409
--- a/test-data/diamond_view_results.tabular	Sat Nov 27 09:48:10 2021 +0000
+++ b/test-data/diamond_view_results.tabular	Fri Apr 22 13:51:34 2022 +0000
@@ -1,2 +1,2 @@
 sequence	gi|5524211|gb|AAD44166.1|	99.6	284	0	1	1	283	1	284	1.44e-205	550	94M1D189M	100
-sequence	gi|5524212|gb|AAD44167.1|	79.6	284	57	1	1	283	1	284	5.77e-150	409	83M1D200M	100
+sequence	gi|5524212|gb|AAD44167.1|	79.6	284	57	1	1	283	1	284	5.77e-150	409	105M1D178M	100