Mercurial > repos > bgruening > diamond
changeset 12:60f307965815 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diamond commit 62db819c1db857d3fba94dc4e290ee0f50f7928d
author | iuc |
---|---|
date | Mon, 03 Feb 2025 16:01:01 +0000 |
parents | e8ac2b53f262 |
children | |
files | diamond.xml diamond_makedb.xml diamond_view.xml macros.xml test-data/db-wtax.dmnd test-data/db.dmnd test-data/diamond_log.txt test-data/diamond_results.daa test-data/diamond_results.sam test-data/diamond_results.tabular test-data/diamond_results.xml test-data/diamond_results_log_test.tabular test-data/diamond_results_soft_masking.tabular test-data/diamond_results_soft_masking_memory.tabular test-data/diamond_results_swipe.tabular |
diffstat | 15 files changed, 491 insertions(+), 172 deletions(-) [+] |
line wrap: on
line diff
--- a/diamond.xml Thu Nov 02 11:14:39 2023 +0000 +++ b/diamond.xml Mon Feb 03 16:01:01 2025 +0000 @@ -1,4 +1,4 @@ -<tool id="bg_diamond" name="Diamond" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01"> +<tool id="bg_diamond" name="Diamond" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2" license="GPL-3.0"> <description>alignment tool for short sequences against a protein database</description> <macros> <import>macros.xml</import> @@ -6,9 +6,9 @@ <xrefs> <xref type="bio.tools">diamond</xref> </xrefs> - <expand macro="requirements" /> - <expand macro="stdio" /> - <expand macro="version_command" /> + <expand macro="requirements"/> + <expand macro="stdio"/> + <expand macro="version_command"/> <command detect_errors="aggressive"> <![CDATA[ @@ -45,6 +45,7 @@ #end if $sens_cond.sensitivity $iterate + $swipe --algo $algo #if $global_ranking --global-ranking $global_ranking @@ -102,6 +103,11 @@ #end if $advanced_section.freq_masking --motif-masking $advanced_section.motif_masking + --soft-masking $advanced_section.soft_masking + --index-chunks "\${DIAMOND_INDEX_CHUNKS:-4}" + --file-buffer-size "\${DIAMOND_FILE_BUFFER_SIZE:-67108864}" + $log + ]]> </command> <inputs> @@ -131,9 +137,14 @@ <option value="24">Pterobranchia Mitochondrial Code</option> <option value="25">Candidate Division SR1 and Gracilibacteria Code</option> <option value="26">Pachysolen tannophilus Nuclear Code</option> + <option value="27">Karyorelict Nuclear Code</option> + <option value="28">Condylostoma Nuclear Code</option> + <option value="29">Mesodinium Nuclear Code</option> + <option value="30">Peritrich Nuclear Code</option> + <option value="31">Blastocrithidia Nuclear Code</option> + <option value="33">Cephalodiscidae Mitochondrial UAA-Tyr Code</option> </param> - <param argument="--min-orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature" /> - + <param argument="--min-orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature"/> <param name="query_strand" argument="--strand" type="select" label="query strands to search" help=""> <option value="both" selected="True">Both</option> <option value="plus">Plus</option> @@ -146,21 +157,17 @@ </param> <when value="yes"> <param argument="--range-culling" type="boolean" truevalue="--range-culling" falsevalue="" checked="false" label="restrict hit culling to overlapping query ranges" help="This feature is designed for long query DNA sequences that may span several genes. In these cases, the default of reporting the 25 best overall hits could cause hits to a lower scoring gene to be overshadowed. But just increasing the number of alignments reported will bloat the output size and reduce performance. Using this feature along with -k 25 (default), a hit will only be deleted if at least 50% of its query range is spanned by at least 25 higher or equal scoring hits. Using this feature along with --top 10, a hit will only be deleted if its score is more than 10% lower than that of a higher scoring hit over at least 50% of its query range. The percentage is configurable using --range-cover. Note that this feature is currently only available in frameshift alignment mode"/> - <param argument="--frameshift" type="integer" value="0" label="frame shift penalty" help="Values around 15 are reasonable for this parameter. Enabling this feature will have the aligner tolerate missing bases in DNA sequences and is most recommended for long, error-prone sequences like MinION reads. In the pairwise output format, frameshifts will be indicated by \ and / for a shift by +1 and -1 nucleotide in the direction of translation respectively." /> + <param argument="--frameshift" type="integer" value="0" label="frame shift penalty" help="Values around 15 are reasonable for this parameter. Enabling this feature will have the aligner tolerate missing bases in DNA sequences and is most recommended for long, error-prone sequences like MinION reads. In the pairwise output format, frameshifts will be indicated by \ and / for a shift by +1 and -1 nucleotide in the direction of translation respectively."/> </when> <when value="no"/> </conditional> - <param argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> <option value="0">Disable</option> <option value="1" selected="True">Default mode (Hauser, 2016)</option> </param> </when> <when value="blastp"> - <param argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true" - label="Suppress reporting of identical self-hits between sequences" - help="The FASTA sequence identifiers as well as the sequences of query and target need to be identical for a hit to be deleted"/> - + <param argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true" label="Suppress reporting of identical self-hits between sequences" help="The FASTA sequence identifiers as well as the sequences of query and target need to be identical for a hit to be deleted"/> <param argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> <option value="0">Disable</option> <option value="1" selected="True">Default mode (Hauser, 2016)</option> @@ -170,7 +177,7 @@ </param> </when> </conditional> - <param argument="--query" type="data" format="fasta,fastq" label="Input query file in FASTA or FASTQ format" /> + <param argument="--query" type="data" format="fasta,fastq" label="Input query file in FASTA or FASTQ format"/> <conditional name="ref_db_source"> <param name="db_source" type="select" label="Will you select a reference database from your history or use a built-in index?" help="Built-ins were indexed using default options"> <option value="indexed">Use a built-in index</option> @@ -185,7 +192,7 @@ </param> </when> <when value="history"> - <param name="reference_database" argument="--db" type="data" format="dmnd" label="Select the reference database" /> + <param name="reference_database" argument="--db" type="data" format="dmnd" label="Select the reference database"/> </when> </conditional> <conditional name="tax_cond"> @@ -196,16 +203,17 @@ </param> <when value="no"/> <when value="list"> - <param name="taxonlist" argument="--taxonlist" type="text" value="" label="Comma separated list of taxon ids" help=""> + <param argument="--taxonlist" type="text" value="" label="Comma separated list of taxon ids" help=""> <validator type="regex" message="Taxonlist needs to be a comma separated list of integers">[0-9,]*</validator> </param> </when> <when value="file"> - <param name="taxonlistfile" argument="--taxonlist" type="data" format="tabular" label="Keep alignments within the given percentage range of the top alignment score for a quer" help="" /> + <param name="taxonlistfile" argument="--taxonlist" type="data" format="tabular" label="Keep alignments within the given percentage range of the top alignment score for a quer" help=""/> </when> </conditional> <conditional name="sens_cond"> - <param name='sensitivity' type="select" label="Sensitivity Mode" help="Choose one of the sensitivity modes. The default mode is mainly designed for short read alignment, i.e. finding significant matches of >50 bits on 30-40aa fragments. The sensitive mode is a lot more sensitive than the default and generally recommended for aligning longer sequences. The more sensitive mode provides even more sensitivity. More sensitivity may increase computation time."> + <param name="sensitivity" type="select" label="Sensitivity Mode" help="Choose one of the sensitivity modes. The default mode is mainly designed for short read alignment, i.e. finding significant matches of >50 bits on 30-40aa fragments. The sensitive mode is a lot more sensitive than the default and generally recommended for aligning longer sequences. The more sensitive mode provides even more sensitivity. More sensitivity may increase computation time."> + <option value="--faster">Faster (--faster)</option> <option value="--fast">Fast (--fast)</option> <option value="" selected="True">Default</option> <option value="--mid-sensitive">Mid Sensitive (--mid-sensitive)</option> @@ -214,6 +222,9 @@ <option value="--very-sensitive">Very Sensitive (--very-sensitive)</option> <option value="--ultra-sensitive">Ultra Sensitive (--ultra-sensitive)</option> </param> + <when value="--faster"> + <expand macro="block_size_low_sens"/> + </when> <when value="--fast"> <expand macro="block_size_low_sens"/> </when> @@ -246,10 +257,9 @@ <option value="PAM70">PAM70 ((6-8)/2; (9-11)/1) [10/1]</option> <option value="PAM30">PAM30 ((5-7)/2; (8-10)/1) [9/1]</option> </param> - <param argument="--gapopen" type="integer" optional="True" value="" label="Gap open penalty" help="Leave empty for default (see scoring matrix)" /> - <param argument="--gapextend" type="integer" optional="True" value="" label="Gap extension penalty" help="Leave empty for default (see scoring matrix)" /> - <param argument="--masking" type="select" label="Masking algorithm" help="DIAMOND by default applies the tantan repeat masking algorithm to the query and target sequences as described in (Frith, 2011). - This masking procedure increases the specificity of alignments and serves to filter out spurious hits. Note that when using --comp-based-stats (2,3,4), tantan masking is disabled by default."> + <param argument="--gapopen" type="integer" optional="True" value="" label="Gap open penalty" help="Leave empty for default (see scoring matrix)"/> + <param argument="--gapextend" type="integer" optional="True" value="" label="Gap extension penalty" help="Leave empty for default (see scoring matrix)"/> + <param argument="--masking" type="select" label="Masking algorithm" help="DIAMOND by default applies the tantan repeat masking algorithm to the query and target sequences as described in (Frith, 2011). This masking procedure increases the specificity of alignments and serves to filter out spurious hits. Note that when using --comp-based-stats (2,3,4), tantan masking is disabled by default."> <option value="0">Disabled</option> <option value="1" selected="true">Tantan</option> <option value="seg">SEG</option> @@ -260,33 +270,26 @@ <option value="min-score">Minimum bit score to report alignments</option> </param> <when value="evalue"> - <param argument="--evalue" type="float" value="0.001" label="Maximum expected value to keep an alignment" /> + <param argument="--evalue" type="float" value="0.001" label="Maximum expected value to keep an alignment"/> </when> <when value="min-score"> - <param name="min_score" argument="--min-score" type="integer" value="0" label="Minimum bit score to keep an alignment" help="(--min-score)" /> + <param argument="--min-score" type="integer" value="0" label="Minimum bit score to keep an alignment" help="(--min-score)"/> </when> </conditional> - <param argument="--iterate" type="boolean" truevalue="--iterate" falsevalue="" checked="false" - label="Run multiple rounds of searches with increasing sensitivity" help="he query dataset will first be searched at a lower sensitivity setting, only searching those query sequences at - the target sensitivity that fail to produce a significant alignment at a lower sensitivity." /> - <param argument="--algo" type="select" label="Algorithm for seed search" help="Double-indexed is the main algorithm of the program, designed for large input files but less efficient for small - query files. Query-indexed and improves performance for small query files. This mode will be automatically triggered based on the input. Contiguous-seed mode and further improves performance - for small query files. The modes differ slightly in their sensitivity, so results are not guaranteed to be 100% identical for different settings of this option."> + <param argument="--swipe" type="boolean" truevalue="--swipe" falsevalue="" checked="false" label="Run Exhaustive alignment against all database sequences" help="Smith Waterman alignments of all queries will be computed against all targets."/> + <param argument="--iterate" type="boolean" truevalue="--iterate" falsevalue="" checked="false" label="Run multiple rounds of searches with increasing sensitivity" help="The query dataset will first be searched at a lower sensitivity setting, only searching those query sequences at the target sensitivity that fail to produce a significant alignment at a lower sensitivity."/> + <param argument="--algo" type="select" label="Algorithm for seed search" help="Double-indexed is the main algorithm of the program, designed for large input files but less efficient for small query files. Query-indexed and improves performance for small query files. This mode will be automatically triggered based on the input. Contiguous-seed mode and further improves performance for small query files. The modes differ slightly in their sensitivity, so results are not guaranteed to be 100% identical for different settings of this option."> <option value="0">Doble-indexed (0)</option> <option value="1">Query-indexed (1)</option> <option value="ctg">Contiguous-seed mode (ctg)</option> </param> - <expand macro="hit_filter_macro" /> - <param argument="--global-ranking" type="integer" min="0" value="" optional="true" - label="Limit on the number of Smith Waterman extensions" help="Target sequences will be ranked according to their ungapped extension scores at seed hits, and gapped extensions will only - be computed for the best N targets for each query. Note that this option increases memory use." /> - <param argument="--id" type="integer" value="0" label="Minimum identity percentage to report an alignment" help="Report only alignments above the given percentage of sequence identity" /> - <param argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="Report only alignments above the given percentage of query cover" /> + <expand macro="hit_filter_macro"/> + <param argument="--global-ranking" type="integer" min="0" value="" optional="true" label="Limit on the number of Smith Waterman extensions" help="Target sequences will be ranked according to their ungapped extension scores at seed hits, and gapped extensions will only be computed for the best N targets for each query. Note that this option increases memory use."/> + <param argument="--id" type="integer" value="0" label="Minimum identity percentage to report an alignment" help="Report only alignments above the given percentage of sequence identity"/> + <param argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="Report only alignments above the given percentage of query cover"/> <param argument="--subject-cover" type="integer" value="0" label="Minimum subject cover percentage to report an alignment" help="Report only alignments above the given percentage of subject cover"/> <section name="output_section" title="Output options"> - <param argument="--max-hsps" type="integer" min="0" optional="true" label="Maximum number of HSPs" - help="The maximum number of HSPs (High-Scoring Segment Pairs) per target sequence to report for each query. The default policy is to report only the highest-scoring - HSP for each target, while disregarding alternative, lower-scoring HSPs that are contained in the same target." /> + <param argument="--max-hsps" type="integer" min="0" optional="true" label="Maximum number of HSPs" help="The maximum number of HSPs (High-Scoring Segment Pairs) per target sequence to report for each query. The default policy is to report only the highest-scoring HSP for each target, while disregarding alternative, lower-scoring HSPs that are contained in the same target."/> <expand macro="output_type_macro"> <!-- Taxonomy features are not supported for the DAA format (i.e. can't be used in diamond view) --> @@ -297,12 +300,19 @@ </expand> <param name="output_unal" type="select" optional="true" multiple="true" label="Output aligned/unaligned queries to separate file" help=""> <option value="--un">Output unaligned queries (--un)</option> - <option value="--al">Output alaligned queries (--al)</option> + <option value="--al">Output aligned queries (--al)</option> </param> + <param argument="--log" type="boolean" truevalue="--log" falsevalue="" label="Output log file"/> </section> + <section name="advanced_section" title="Advanced options" expanded="false"> <param argument="--seed-cut" type="float" min="0" optional="true" label="Set a complexity cutoff for indexed seeds"/> - <param argument="--freq-masking" type="boolean" truevalue="--freq-masking" falsevalue="" checked="false" label="Enable masking seeds based on frequency" help="This option is incompatible with --sed-cut" /> + <param argument="--freq-masking" type="boolean" truevalue="--freq-masking" falsevalue="" checked="false" label="Enable masking seeds based on frequency" help="This option is incompatible with --sed-cut"/> + <param argument="--soft-masking" type="select" label="Soft Masking" help="Select type of soft masking"> + <option value="0" selected="True">Disbled</option> + <option value="seg">seg</option> + <option value="tantan">tantan</option> + </param> <param argument="--motif-masking" type="select" label="Softmask abundant motifs" help="Enable or disable motif masking"> <option value="0">Disabled</option> <option value="1">Enabled</option> @@ -310,19 +320,22 @@ </section> </inputs> <outputs> - <expand macro="output_macro" /> + <expand macro="output_macro"/> <data format_source="query" name="unalqueries" label="${tool.name} on ${on_string}: unaligned queries"> <filter>output_section['output_unal'] and "--un" in output_section['output_unal']</filter> </data> <data format_source="query" name="alqueries" label="${tool.name} on ${on_string}: aligned queries"> - <filter>output_section['output_unal'] and "--un" in output_section['output_unal']</filter> + <filter>output_section['output_unal'] and "--al" in output_section['output_unal']</filter> + </data> + <data name="log_file" format="txt" label="Diamond log file" from_work_dir="diamond.log"> + <filter>output_section['log']</filter> </data> </outputs> <tests> <!--Test 01--> <test expect_num_outputs="3"> <conditional name="method_cond"> - <param name="method_select" value="blastp" /> + <param name="method_select" value="blastp"/> </conditional> <param name="query" value="protein.fasta" ftype="fasta"/> <conditional name="ref_db_source"> @@ -332,8 +345,7 @@ <section name="output_section"> <conditional name="output"> <param name="outfmt" value="6"/> - <!-- removed ,cigar from test: https://github.com/bbuchfink/diamond/issues/532 --> - <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,scovhsp,sskingdoms,skingdoms,sphylums"/> + <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,scovhsp,sskingdoms,skingdoms,sphylums,cigar"/> <param name="unal" value="true"/> </conditional> </section> @@ -345,11 +357,11 @@ <param name="masking" value="1"/> <conditional name="hit_filter"> <param name="hit_filter_select" value="max"/> - <param name="max_target_seqs" value="25" /> + <param name="max_target_seqs" value="25"/> </conditional> <conditional name="filter_score"> <param name="filter_score_select" value="evalue"/> - <param name="evalue" value="0.001" /> + <param name="evalue" value="0.001"/> </conditional> <param name="id" value="0"/> <param name="query_cover" value="0"/> @@ -359,12 +371,12 @@ <param name="output_unal" value="--al,--un"/> <output name="unalqueries"> <assert_contents> - <has_line line=">shuffled sequence that should go to unaligned"/> + <has_line line=">shuffled sequence that should go to unaligned"/> </assert_contents> </output> <output name="alqueries"> <assert_contents> - <has_line line=">sequence more text"/> + <has_line line=">sequence more text"/> </assert_contents> </output> <output name="blast_tabular" file="diamond_results.tabular"/> @@ -372,7 +384,7 @@ <!--Test 02--> <test expect_num_outputs="1"> <conditional name="method_cond"> - <param name="method_select" value="blastp" /> + <param name="method_select" value="blastp"/> </conditional> <param name="query" value="protein.fasta" ftype="fasta"/> <conditional name="ref_db_source"> @@ -381,7 +393,7 @@ </conditional> <conditional name="tax_cond"> <param name="tax_select" value="list"/> - <param name="taxonlist" value="2" /> + <param name="taxonlist" value="2"/> </conditional> <section name="output_section"> <conditional name="output"> @@ -397,11 +409,11 @@ <param name="masking" value="1"/> <conditional name="hit_filter"> <param name="hit_filter_select" value="max"/> - <param name="max_target_seqs" value="25" /> + <param name="max_target_seqs" value="25"/> </conditional> <conditional name="filter_score"> <param name="filter_score_select" value="evalue"/> - <param name="evalue" value="0.001" /> + <param name="evalue" value="0.001"/> </conditional> <param name="id" value="0"/> <param name="query_cover" value="0"/> @@ -413,7 +425,7 @@ <!--Test 03--> <test expect_num_outputs="1"> <conditional name="method_cond"> - <param name="method_select" value="blastx" /> + <param name="method_select" value="blastx"/> <conditional name="frameshift_cond"> <param name="frameshift_select" value="yes"/> </conditional> @@ -436,11 +448,11 @@ <param name="masking" value="1"/> <conditional name="hit_filter"> <param name="hit_filter_select" value="top"/> - <param name="top" value="10" /> + <param name="top" value="10"/> </conditional> <conditional name="filter_score"> <param name="filter_score_select" value="min-score"/> - <param name="min_score" value="1" /> + <param name="min_score" value="1"/> </conditional> <param name="id" value="0"/> <param name="query_cover" value="0"/> @@ -452,7 +464,7 @@ <!--Test 04--> <test expect_num_outputs="1"> <conditional name="method_cond"> - <param name="method_select" value="blastp" /> + <param name="method_select" value="blastp"/> </conditional> <param name="query" value="protein.fasta" ftype="fasta"/> <conditional name="ref_db_source"> @@ -471,7 +483,7 @@ <!--Test 05--> <test expect_num_outputs="1"> <conditional name="method_cond"> - <param name="method_select" value="blastx" /> + <param name="method_select" value="blastx"/> <conditional name="frameshift_cond"> <param name="frameshift_select" value="yes"/> </conditional> @@ -494,11 +506,11 @@ <param name="masking" value="1"/> <conditional name="hit_filter"> <param name="hit_filter_select" value="top"/> - <param name="top" value="10" /> + <param name="top" value="10"/> </conditional> <conditional name="filter_score"> <param name="filter_score_select" value="min-score"/> - <param name="min_score" value="1" /> + <param name="min_score" value="1"/> </conditional> <param name="id" value="0"/> <param name="query_cover" value="0"/> @@ -510,7 +522,7 @@ <!-- Test 06 iterate option--> <test expect_num_outputs="1"> <conditional name="method_cond"> - <param name="method_select" value="blastx" /> + <param name="method_select" value="blastx"/> </conditional> <param name="query" value="nucleotide.fasta" ftype="fasta"/> <conditional name="ref_db_source"> @@ -526,10 +538,29 @@ </section> <output name="blast_tabular" file="diamond_results_iterate.tabular"/> </test> - <!--Test 07 algo option--> + <!-- Test 07 swipe option--> <test expect_num_outputs="1"> <conditional name="method_cond"> - <param name="method_select" value="blastx" /> + <param name="method_select" value="blastx"/> + </conditional> + <param name="query" value="nucleotide.fasta" ftype="fasta"/> + <conditional name="ref_db_source"> + <param name="db_source" value="indexed"/> + <param name="index" value="testDb"/> + </conditional> + <param name="swipe" value="true"/> + <section name="output_section"> + <conditional name="output"> + <param name="outfmt" value="6"/> + <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> + </conditional> + </section> + <output name="blast_tabular" file="diamond_results_swipe.tabular"/> + </test> + <!--Test 08 algo option--> + <test expect_num_outputs="1"> + <conditional name="method_cond"> + <param name="method_select" value="blastx"/> </conditional> <param name="query" value="nucleotide.fasta" ftype="fasta"/> <conditional name="ref_db_source"> @@ -545,10 +576,10 @@ </section> <output name="blast_tabular" file="diamond_results_algorithm.tabular"/> </test> - <!--Test 08 global-ranking option--> + <!-- Test 09 global-ranking option --> <test expect_num_outputs="1"> <conditional name="method_cond"> - <param name="method_select" value="blastx" /> + <param name="method_select" value="blastx"/> </conditional> <param name="query" value="nucleotide.fasta" ftype="fasta"/> <conditional name="ref_db_source"> @@ -564,10 +595,10 @@ </section> <output name="blast_tabular" file="diamond_results_global_ranking.tabular"/> </test> - <!--Test 09 max-hsps option--> + <!--Test 10 max-hsps option--> <test expect_num_outputs="1"> <conditional name="method_cond"> - <param name="method_select" value="blastx" /> + <param name="method_select" value="blastx"/> </conditional> <param name="query" value="nucleotide.fasta" ftype="fasta"/> <conditional name="ref_db_source"> @@ -583,10 +614,10 @@ </section> <output name="blast_tabular" file="diamond_results_max_hsps.tabular"/> </test> - <!--Test 10 seed-cut option--> + <!--Test 11 seed-cut option--> <test expect_num_outputs="1"> <conditional name="method_cond"> - <param name="method_select" value="blastx" /> + <param name="method_select" value="blastx"/> </conditional> <param name="query" value="nucleotide.fasta" ftype="fasta"/> <conditional name="ref_db_source"> @@ -602,10 +633,10 @@ </section> <output name="blast_tabular" file="diamond_results_seed_cut.tabular"/> </test> - <!--Test 11 freq-masking option--> + <!--Test 12 freq-masking option--> <test expect_num_outputs="1"> <conditional name="method_cond"> - <param name="method_select" value="blastx" /> + <param name="method_select" value="blastx"/> </conditional> <param name="query" value="nucleotide.fasta" ftype="fasta"/> <conditional name="ref_db_source"> @@ -621,10 +652,10 @@ </section> <output name="blast_tabular" file="diamond_results_freq_masking.tabular"/> </test> - <!--Test 12 motif-masking option--> + <!--Test 13 motif-masking option--> <test expect_num_outputs="1"> <conditional name="method_cond"> - <param name="method_select" value="blastx" /> + <param name="method_select" value="blastx"/> </conditional> <param name="query" value="nucleotide.fasta" ftype="fasta"/> <conditional name="ref_db_source"> @@ -642,6 +673,54 @@ </section> <output name="blast_tabular" file="diamond_results_motif_masking.tabular"/> </test> + <!--Test 14 soft-masking option--> + <test expect_num_outputs="1"> + <conditional name="method_cond"> + <param name="method_select" value="blastx"/> + </conditional> + <param name="query" value="nucleotide.fasta" ftype="fasta"/> + <conditional name="ref_db_source"> + <param name="db_source" value="indexed"/> + <param name="index" value="testDb"/> + </conditional> + <section name="advanced_section"> + <param name="soft_masking" value="0"/> + </section> + <section name="output_section"> + <conditional name="output"> + <param name="outfmt" value="6"/> + <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> + </conditional> + </section> + <output name="blast_tabular" file="diamond_results_soft_masking.tabular"/> + </test> + <!--Test 15 Log option--> + <test expect_num_outputs="2"> + <conditional name="method_cond"> + <param name="method_select" value="blastx"/> + </conditional> + <param name="query" value="nucleotide.fasta" ftype="fasta"/> + <conditional name="ref_db_source"> + <param name="db_source" value="indexed"/> + <param name="index" value="testDb"/> + </conditional> + <section name="output_section"> + <conditional name="output"> + <param name="outfmt" value="6"/> + <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/> + </conditional> + <param name="log" value="true"/> + </section> + <output name="blast_tabular" file="diamond_results_log_test.tabular"/> + <output name="log_file"> + <assert_contents> + <has_n_lines n="261"/> + <has_text text="diamond blastx --quiet"/> + <has_text text="--log"/> + <has_line line="Sequences = 6, letters = 1694, average length = 282"/> + </assert_contents> + </output> + </test> </tests> <help> <![CDATA[ @@ -699,5 +778,5 @@ ]]> </help> - <expand macro="citations" /> + <expand macro="citations"/> </tool>
--- a/diamond_makedb.xml Thu Nov 02 11:14:39 2023 +0000 +++ b/diamond_makedb.xml Mon Feb 03 16:01:01 2025 +0000 @@ -1,18 +1,16 @@ -<tool id="bg_diamond_makedb" name="Diamond makedb" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01"> +<tool id="bg_diamond_makedb" name="Diamond makedb" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2" license="GPL-3.0"> <description>Build database from a FASTA file</description> <macros> <import>macros.xml</import> </macros> - - <expand macro="requirements" /> - <expand macro="stdio" /> - <expand macro="version_command" /> - + <expand macro="requirements"/> + <expand macro="stdio"/> + <expand macro="version_command"/> <command detect_errors="aggressive"> - <!-- DB has two files, *.dmnd and *.tx --> + <!-- DB has two files, *.dmnd and *.tx --> <![CDATA[ diamond makedb - --threads "\${GALAXY_SLOTS:-12}" + --threads \${GALAXY_SLOTS:-12} --in '$infile' --db ./database @@ -23,30 +21,24 @@ #end if ]]> </command> - <inputs> - <param name="infile" type="data" format="fasta" label="Input reference file in FASTA format" /> - <conditional name="tax_cond"> - <param name="tax_select" type="select" label="Add taxonomic data?" help="Needs to be supplied in order to provide taxonomy features of the aligner"> - <option value="yes">Yes</option> - <option value="no" selected="true">No</option> - </param> - <when value="yes"> - <param argument="--taxonmap" type="data" format="tabular" - label="Protein accession to taxid mapping file" - help="Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. - A custom file following the same format may be supplied here. Note that the first line of this file is assumed to contain headings and will be ignored" /> - <param argument="--taxonnodes" type="data" format="tabular" label="Taxonomy nodes.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" /> - <param argument="--taxonnames" type="data" format="tabular" label="Taxonomy names.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" /> - </when> - <when value="no"/> - </conditional> + <param name="infile" type="data" format="fasta" label="Input reference file in FASTA format"/> + <conditional name="tax_cond"> + <param name="tax_select" type="select" label="Add taxonomic data?" help="Needs to be supplied in order to provide taxonomy features of the aligner"> + <option value="yes">Yes</option> + <option value="no" selected="true">No</option> + </param> + <when value="yes"> + <param argument="--taxonmap" type="data" format="tabular" label="Protein accession to taxid mapping file" help="Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. A custom file following the same format may be supplied here. Note that the first line of this file is assumed to contain headings and will be ignored"/> + <param argument="--taxonnodes" type="data" format="tabular" label="Taxonomy nodes.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features"/> + <param argument="--taxonnames" type="data" format="tabular" label="Taxonomy names.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features"/> + </when> + <when value="no"/> + </conditional> </inputs> - <outputs> <data format="dmnd" name="outfile" from_work_dir="database.dmnd" label="${tool.name} on ${on_string}"/> </outputs> - <tests> <test> <param name="infile" value="db.fasta" ftype="fasta"/> @@ -56,14 +48,13 @@ <param name="infile" value="db.fasta" ftype="fasta"/> <conditional name="tax_cond"> <param name="tax_select" value="yes"/> - <param name="taxonmap" ftype="tabular" value="prot.accession2taxid" /> - <param name="taxonnodes" ftype="tabular" value="nodes.dmp" /> - <param name="taxonnames" ftype="tabular" value="names.dmp" /> + <param name="taxonmap" ftype="tabular" value="prot.accession2taxid"/> + <param name="taxonnodes" ftype="tabular" value="nodes.dmp"/> + <param name="taxonnames" ftype="tabular" value="names.dmp"/> </conditional> <output name="outfile" value="db-wtax.dmnd" compare="sim_size" delta="2"/> </test> </tests> - <help> <![CDATA[ @@ -86,6 +77,5 @@ - taxonnodes: Path to the nodes.dmp file from the NCBI taxonomy. This parameter is optional and needs to be supplied in order to provide taxonomy features. The file is contained within this archive downloadable at NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip ]]> </help> - - <expand macro="citations" /> + <expand macro="citations"/> </tool>
--- a/diamond_view.xml Thu Nov 02 11:14:39 2023 +0000 +++ b/diamond_view.xml Mon Feb 03 16:01:01 2025 +0000 @@ -1,11 +1,11 @@ -<tool id="bg_diamond_view" name="Diamond view" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01"> +<tool id="bg_diamond_view" name="Diamond view" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2" license="GPL-3.0"> <description>generate formatted output from DAA files</description> <macros> <import>macros.xml</import> </macros> - <expand macro="requirements" /> - <expand macro="stdio" /> - <expand macro="version_command" /> + <expand macro="requirements"/> + <expand macro="stdio"/> + <expand macro="version_command"/> <command detect_errors="aggressive"><![CDATA[ ## need to link because diamont tries to open dataset_xxx.dat.daa ln -s '$daa' input.daa && @@ -16,23 +16,23 @@ @OUTPUT_ARGS@ @HITFILTER_ARGS@ $forwardonly - --compress '0' + --verbose ]]> </command> <inputs> - <param argument="--daa" type="data" format="daa" label="input file in DAA format" /> + <param argument="--daa" type="data" format="daa" label="input file in DAA format"/> <section name="output_section" title="Output options"> - <expand macro="output_type_macro" /> + <expand macro="output_type_macro"/> </section> - <expand macro="hit_filter_macro" /> - <param argument="--forwardonly" type="boolean" truevalue="--forwardonly" falsevalue="" checked="false" label="only show alignments of forward strand" help=""/> + <expand macro="hit_filter_macro"/> + <param argument="--forwardonly" type="boolean" truevalue="--forwardonly" falsevalue="" checked="false" label="only show alignments of forward strand"/> </inputs> <outputs> - <expand macro="output_macro" /> + <expand macro="output_macro"/> </outputs> <tests> <test expect_num_outputs="1"> - <param name="daa" ftype="daa" value="diamond_results.daa" /> + <param name="daa" ftype="daa" value="diamond_results.daa"/> <section name="output_section"> <conditional name="output"> <param name="outfmt" value="5"/> @@ -40,12 +40,12 @@ </section> <conditional name="hit_filter"> <param name="hit_filter_select" value="max"/> - <param name="max_target_seqs" value="1" /> + <param name="max_target_seqs" value="1"/> </conditional> <output name="blast_tabular" file="diamond_results.xml"/> </test> <test expect_num_outputs="1"> - <param name="daa" ftype="daa" value="diamond_results.daa" /> + <param name="daa" ftype="daa" value="diamond_results.daa"/> <section name="output_section"> <conditional name="output"> <param name="outfmt" value="6"/> @@ -55,7 +55,7 @@ <output name="blast_tabular" file="diamond_view_results.tabular"/> </test> <test expect_num_outputs="1"> - <param name="daa" ftype="daa" value="diamond_results.daa" /> + <param name="daa" ftype="daa" value="diamond_results.daa"/> <section name="output_section"> <conditional name="output"> <param name="outfmt" value="101"/> @@ -63,13 +63,12 @@ </section> <conditional name="hit_filter"> <param name="hit_filter_select" value="top"/> - <param name="max_target_seqs" value="1" /> + <param name="max_target_seqs" value="1"/> </conditional> - <param name="forwardonly" value="--forwardonly" /> + <param name="forwardonly" value="--forwardonly"/> <output name="blast_tabular" file="diamond_results.sam" lines_diff="2"/> </test> </tests> - <help> <![CDATA[ @@ -103,5 +102,5 @@ 12 Bit score ]]> </help> - <expand macro="citations" /> + <expand macro="citations"/> </tool>
--- a/macros.xml Thu Nov 02 11:14:39 2023 +0000 +++ b/macros.xml Mon Feb 03 16:01:01 2025 +0000 @@ -1,22 +1,19 @@ <macros> - <token name="@TOOL_VERSION@">2.0.15</token> + <token name="@TOOL_VERSION@">2.1.11</token> <token name="@VERSION_SUFFIX@">0</token> <xml name="requirements"> <requirements> - <requirement type="package" version="@TOOL_VERSION@">diamond</requirement> + <requirement type="package" version="@TOOL_VERSION@">diamond</requirement> </requirements> </xml> - <xml name="stdio"> <stdio> - <regex match="Failed to allocate" source="stderr" level="fatal_oom" /> + <regex match="Failed to allocate" source="stderr" level="fatal_oom"/> </stdio> </xml> - <xml name="version_command"> <version_command>diamond version | cut -d" " -f 3</version_command> </xml> - <xml name="output_type_macro"> <conditional name="output"> <param argument="--outfmt" type="select" label="Format of output file" help=""> @@ -26,6 +23,7 @@ <option value="100">DAA</option> <option value="101">SAM</option> <option value="102">Taxonomic classification</option> + <option value="104">JSON (flat)</option> </param> <when value="0"/> <when value="5"/> @@ -69,20 +67,17 @@ <option value="cigar">Cigar</option> <yield/> </param> - <param argument="--unal" type="boolean" label="Report unaligned queries" truevalue="1" falsevalue="0" checked="false"/> </when> <when value="100"> - <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/> - <param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/> </when> <when value="101"> - <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/> - <param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/> </when> - <when value="102"/> + <when value="102"> + <param argument="--include-lineage" type="boolean" truevalue="--include-lineage" falsevalue="" checked="false" label="Include lineage in the taxonomic classification format"/> + </when> + <when value="104"/> </conditional> </xml> - <xml name="hit_filter_macro"> <conditional name="hit_filter"> <param name="hit_filter_select" type="select" label="Method to restrict the number of hits?"> @@ -90,57 +85,50 @@ <option value="top">Percentage of top alignment score</option> </param> <when value="max"> - <param name="max_target_seqs" argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to report alignments for" - help="Setting this to 0 will report all alignments that were found." /> + <param argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to report alignments for" help="Setting this to 0 will report all alignments that were found."/> </when> <when value="top"> - <param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a query" - help="For example, setting this to 10 will report all alignments whose score is at most 10% lower than the best alignment score for a query." /> + <param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a query" help="For example, setting this to 10 will report all alignments whose score is at most 10% lower than the best alignment score for a query."/> </when> </conditional> </xml> - <xml name="block_size_low_sens"> - <param argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time" - help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary - disk space, but also improve performance" /> + <param argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time" + help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary disk space, but also improve performance"/> </xml> - <xml name="block_size_hi_sens"> - <param argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time" - help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary - disk space, but also improve performance" /> + <param argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time" + help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary disk space, but also improve performance"/> </xml> - <xml name="citations"> <citations> - <citation type="doi">10.1038/nmeth.3176</citation> + <citation type="doi">10.1038/s41592-021-01101-x</citation> </citations> </xml> - - <xml name="output_macro"> - <data format="txt" name="blast_pairw" label="${tool.name} on ${on_string}"> + <data format="txt" name="blast_pairw" label="${tool.name} on ${on_string}: Blast pairwise"> <filter>output_section["output"]["outfmt"] == "0"</filter> </data> - <data format="xml" name="blast_xml" label="${tool.name} on ${on_string}"> + <data format="xml" name="blast_xml" label="${tool.name} on ${on_string}: Blast XML"> <filter>output_section["output"]["outfmt"] == "5"</filter> </data> - <data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}"> + <data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}: Blast Tabular"> <filter>output_section["output"]["outfmt"] == "6"</filter> </data> <!-- for daa diamond appends the .daa extension -> hence from_work_dir --> - <data format="daa" name="daa_output" label="${tool.name} on ${on_string}" from_work_dir="output.daa"> + <data format="daa" name="daa_output" label="${tool.name} on ${on_string}: DAA" from_work_dir="output.daa"> <filter>output_section["output"]["outfmt"] == "100"</filter> </data> - <data format="sam" name="sam_output" label="${tool.name} on ${on_string}"> + <data format="sam" name="sam_output" label="${tool.name} on ${on_string}: SAM"> <filter>output_section["output"]["outfmt"] == "101"</filter> </data> - <data format="tabular" name="tax_output" label="${tool.name} on ${on_string}"> + <data format="tabular" name="tax_output" label="${tool.name} on ${on_string}: Taxonomic classification"> <filter>output_section["output"]["outfmt"] == "102"</filter> </data> + <data format="json" name="json_output" label="${tool.name} on ${on_string}: Json flat"> + <filter>output_section["output"]["outfmt"] == "104"</filter> + </data> </xml> - <token name="@OUTPUT_ARGS@"> #if $output_section.output.outfmt == "0" --outfmt '0' @@ -151,23 +139,18 @@ #else if $output_section.output.outfmt == "6" --outfmt '6' #echo ' '.join(str($output_section.output.fields).split(',')) --out '$blast_tabular' - --unal $output_section.output.unal #else if $output_section.output.outfmt == "100" --outfmt '100' - $output_section.output.salltitles - $output_section.output.sallseqid --out output.daa #else if $output_section.output.outfmt == "101" --outfmt '101' - $output_section.output.salltitles - $output_section.output.sallseqid --out '$sam_output' #else if $output_section.output.outfmt == "102" --outfmt '102' --out '$tax_output' + $output_section.output.include_lineage #end if </token> - <token name="@HITFILTER_ARGS@"> #if str($hit_filter.hit_filter_select) == 'max': --max-target-seqs '$hit_filter.max_target_seqs'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/diamond_log.txt Mon Feb 03 16:01:01 2025 +0000 @@ -0,0 +1,261 @@ +diamond blastx --quiet --threads 1 --db ./database --query /tmp/tmpb7mnrgbk/files/0/b/a/dataset_0bafd694-5276-4809-9258-1d272c89c442.dat --query-gencode 1 --strand both --min-orf 1 --outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore --out /tmp/tmpb7mnrgbk/job_working_directory/000/34/outputs/dataset_9356bfe1-71ff-4289-b524-b07b2006b561.dat --compress 0 --algo 0 --matrix BLOSUM62 --comp-based-stats 1 --masking 1 --max-target-seqs 25 --evalue 0.001 --id 0 --query-cover 0 --subject-cover 0 --block-size 2.0 --motif-masking 0 --soft-masking 0 --index-chunks 4 --file-buffer-size 67108864 --log +#CPU threads: 1 +Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1) +CPU features detected: ssse3 popcnt sse4.1 avx2 +L3 cache size: 12582912 +MAX_SHAPE_LEN=19 SEQ_MASK STRICT_BAND +Temporary directory: /tmp/tmpb7mnrgbk/job_working_directory/000/34/outputs +#Target sequences to report alignments for: 25 +DP fields: 510 +Opening the database... [0s] +Database: ./database (type: Diamond database, sequences: 2, letters: 568) +Block size = 2000000000 +Current RSS: 6.5 MB, Peak RSS: 6.5 MB +Opening the input file... [0s] +Opening the output file... [0s] +Current RSS: 6.6 MB, Peak RSS: 6.6 MB +Loading query sequences... Sequences = 6, letters = 1694, average length = 282 + [0s] +Sequences = 6, letters = 1694, average length = 282 +Masking queries... [0s] +Current RSS: 7.1 MB, Peak RSS: 7.1 MB +Seed partition bits = 8 +Algorithm: Double-indexed +Shape configuration: 111101110111,111011010010111 +Building query histograms... [0s] +Current RSS: 7.1 MB, Peak RSS: 7.1 MB +Seeking in database... [0s] +Loading reference sequences... Sequences = 2, letters = 568, average length = 284 + [0s] +Current RSS: 7.1 MB, Peak RSS: 7.1 MB +Masking reference... [0s] +Masked letters: 0 +Initializing temporary storage... Async_buffer() 1 + [0s] +Building reference histograms... [0s] +Allocating buffers... [0s] +Current RSS: 7.4 MB, Peak RSS: 7.4 MB +Processing query block 1, reference block 1/1, shape 1/2, index chunk 1/4. +Building reference seed array... [0s] +Current RSS: 7.4 MB, Peak RSS: 7.4 MB +Building query seed array... [0s] +Current RSS: 7.4 MB, Peak RSS: 7.4 MB +Indexed query seeds = 301/1694 (17.77%), reference seeds = 87/568 (15.32%) +Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%) +Computing hash join... [0s] +Current RSS: 7.2 MB, Peak RSS: 7.4 MB +Masking low complexity seeds... [0s] +Masked seeds: 0/53 (0.00%) +Masked positions (query): 0/1694 (0.00%) +Masked positions (target): 0/568 (0.00%) +Current RSS: 7.4 MB, Peak RSS: 7.4 MB +Searching alignments... [0s] +Current RSS: 8.6 MB, Peak RSS: 9.5 MB +Deallocating memory... [0s] +Current RSS: 7.6 MB, Peak RSS: 9.5 MB +Processing query block 1, reference block 1/1, shape 1/2, index chunk 2/4. +Building reference seed array... [0s] +Current RSS: 7.6 MB, Peak RSS: 9.5 MB +Building query seed array... [0s] +Current RSS: 7.6 MB, Peak RSS: 9.5 MB +Indexed query seeds = 286/1694 (16.88%), reference seeds = 120/568 (21.13%) +Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%) +Computing hash join... [0s] +Current RSS: 7.6 MB, Peak RSS: 9.5 MB +Masking low complexity seeds... [0s] +Masked seeds: 0/71 (0.00%) +Masked positions (query): 0/1694 (0.00%) +Masked positions (target): 0/568 (0.00%) +Current RSS: 7.6 MB, Peak RSS: 9.5 MB +Searching alignments... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Deallocating memory... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Processing query block 1, reference block 1/1, shape 1/2, index chunk 3/4. +Building reference seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Building query seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Indexed query seeds = 296/1694 (17.47%), reference seeds = 98/568 (17.25%) +Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%) +Computing hash join... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Masking low complexity seeds... [0s] +Masked seeds: 0/54 (0.00%) +Masked positions (query): 0/1694 (0.00%) +Masked positions (target): 0/568 (0.00%) +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Searching alignments... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Deallocating memory... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Processing query block 1, reference block 1/1, shape 1/2, index chunk 4/4. +Building reference seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Building query seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Indexed query seeds = 307/1694 (18.12%), reference seeds = 109/568 (19.19%) +Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%) +Computing hash join... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Masking low complexity seeds... [0s] +Masked seeds: 0/61 (0.00%) +Masked positions (query): 0/1694 (0.00%) +Masked positions (target): 0/568 (0.00%) +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Searching alignments... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Deallocating memory... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Processing query block 1, reference block 1/1, shape 2/2, index chunk 1/4. +Building reference seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Building query seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Indexed query seeds = 301/1694 (17.77%), reference seeds = 92/568 (16.20%) +Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%) +Computing hash join... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Masking low complexity seeds... [0s] +Masked seeds: 0/51 (0.00%) +Masked positions (query): 0/1694 (0.00%) +Masked positions (target): 0/568 (0.00%) +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Searching alignments... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Deallocating memory... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Processing query block 1, reference block 1/1, shape 2/2, index chunk 2/4. +Building reference seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Building query seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Indexed query seeds = 283/1694 (16.71%), reference seeds = 95/568 (16.73%) +Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%) +Computing hash join... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Masking low complexity seeds... [0s] +Masked seeds: 0/55 (0.00%) +Masked positions (query): 0/1694 (0.00%) +Masked positions (target): 0/568 (0.00%) +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Searching alignments... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Deallocating memory... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Processing query block 1, reference block 1/1, shape 2/2, index chunk 3/4. +Building reference seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Building query seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Indexed query seeds = 266/1694 (15.70%), reference seeds = 93/568 (16.37%) +Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%) +Computing hash join... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Masking low complexity seeds... [0s] +Masked seeds: 0/56 (0.00%) +Masked positions (query): 0/1694 (0.00%) +Masked positions (target): 0/568 (0.00%) +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Searching alignments... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Deallocating memory... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Processing query block 1, reference block 1/1, shape 2/2, index chunk 4/4. +Building reference seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Building query seed array... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Indexed query seeds = 305/1694 (18.00%), reference seeds = 113/568 (19.89%) +Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%) +Computing hash join... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Masking low complexity seeds... [0s] +Masked seeds: 0/68 (0.00%) +Masked positions (query): 0/1694 (0.00%) +Masked positions (target): 0/568 (0.00%) +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Searching alignments... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Deallocating memory... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Deallocating buffers... [0s] +Clearing query masking... [0s] +Current RSS: 9.4 MB, Peak RSS: 9.5 MB +Computing alignments... Async_buffer.load() 21(2.93367e-07 GB, 2.38419e-07 GB on disk) +Loading trace points... [0s] +Sorting trace points... [0s] +Computing partition... [0s] +Computing alignments... [0s] +Deallocating buffers... [0s] +Loading trace points... [0s] + [0.001s] +Deallocating reference... [0s] +Loading reference sequences... Current RSS: 10.1 MB, Peak RSS: 10.1 MB + [0s] +Deallocating buffers... [0s] +Current RSS: 10.1 MB, Peak RSS: 10.1 MB +Deallocating queries... [0s] +Current RSS: 10.1 MB, Peak RSS: 10.1 MB +Loading query sequences... [0s] +Closing the input file... [0s] +Closing the output file... [0s] +Closing the database... [0s] +Cleaning up... [0s] +Current RSS: 10.1 MB, Peak RSS: 10.1 MB +Total time = 0.017s +Hits (filter stage 0) = 774 +Hits (filter stage 1) = 774 (100 %) +Hits (filter stage 2) = 774 (100 %) +Hits (filter stage 3) = 21 (2.71318 %) +Target hits (stage 0) = 2 +Target hits (stage 1) = 0 +Target hits (stage 2) = 2 +Target hits (stage 3) = 2 (0 (0%) with CBS) +Target hits (stage 4) = 2 +Target hits (stage 5) = 2 +Target hits (stage 6) = 2 +Swipe realignments = 0 +Matrix adjusts = 0 +Extensions (8 bit) = 0 +Extensions (16 bit) = 4 +Extensions (32 bit) = 0 +Overflows (8 bit) = 0 +Wasted (16 bit) = 0 +Effort (Extension) = 8 +Effort (Cells) = 0 +Cells (8 bit) = 0 +Cells (16 bit) = 0 +SWIPE tasks = 2 +SWIPE tasks (async) = 0 +Trivial aln = 0 +Hard queries = 0 +Gapped filter (targets) = 0 +Gapped filter (hits) stage 1 = 0 +Gapped filter (hits) stage 2 = 0 +Time (Load seed hit targets) = 5e-06s (CPU) +Time (Sort targets by score) = 0s (CPU) +Time (Gapped filter) = 0s (CPU) +Time (Matrix adjust) = 0s (CPU) +Time (Chaining) = 3.4e-05s (CPU) +Time (DP target sorting) = 0s (CPU) +Time (Query profiles) = 0s (CPU) +Time (Smith Waterman) = 0.000352s (CPU) +Time (Anchored SWIPE Alloc) = 0s (CPU) +Time (Anchored SWIPE Sort) = 0s (CPU) +Time (Anchored SWIPE Add) = 0s (CPU) +Time (Anchored SWIPE Output) = 0s (CPU) +Time (Anchored SWIPE) = 0s (CPU) +Time (Smith Waterman TB) = 0s (CPU) +Time (Smith Waterman-32) = 0s (CPU) +Time (Traceback) = 1.6e-05s (CPU) +Time (Target parallel) = 0s (wall) +Time (Load seed hits) = 0.000797s (wall) +Time (Sort seed hits) = 2.8e-05s (wall) +Time (Extension) = 0.000568s (wall) +Temporary disk space used (search): 2.38419e-07 GB +Reported 2 pairwise alignments, 2 HSPs. +1 queries aligned. +Current RSS: 10.1 MB, Peak RSS: 10.1 MB
--- a/test-data/diamond_results.sam Thu Nov 02 11:14:39 2023 +0000 +++ b/test-data/diamond_results.sam Mon Feb 03 16:01:01 2025 +0000 @@ -1,5 +1,5 @@ @HD VN:1.5 SO:query -@PG PN:DIAMOND VN:2.0.15 CL:diamond view --threads 1 --daa input.daa --outfmt 101 --salltitles --sallseqid --out /tmp/tmpuqw24dac/files/e/4/b/dataset_e4b47568-a2e4-4ec1-ac5f-f266085686a4.dat --top 0 --forwardonly --compress 0 +@PG PN:DIAMOND VN:2.1.11 CL:diamond view --threads 1 --daa input.daa --outfmt 101 --out /tmp/tmp61jyo35f/job_working_directory/000/6/outputs/dataset_bb83a399-fa7d-414f-afff-28cb61b3cd8d.dat --top 0 --forwardonly --verbose @mm BlastP @CO BlastP-like alignments @CO Reporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length, ZF: frame, ZS: query start DNA coordinate
--- a/test-data/diamond_results.tabular Thu Nov 02 11:14:39 2023 +0000 +++ b/test-data/diamond_results.tabular Mon Feb 03 16:01:01 2025 +0000 @@ -1,3 +1,2 @@ -sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 100 0 0 0 -sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 100 0 0 0 -shuffled * -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 * * * +sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 100 0 0 0 94M1D189M +sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 100 0 0 0 105M1D178M
--- a/test-data/diamond_results.xml Thu Nov 02 11:14:39 2023 +0000 +++ b/test-data/diamond_results.xml Mon Feb 03 16:01:01 2025 +0000 @@ -2,7 +2,7 @@ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> <BlastOutput> <BlastOutput_program>blastp</BlastOutput_program> - <BlastOutput_version>diamond 2.0.15</BlastOutput_version> + <BlastOutput_version>diamond 2.1.11</BlastOutput_version> <BlastOutput_reference>Benjamin Buchfink, Xie Chao, and Daniel Huson (2015), "Fast and sensitive protein alignment using DIAMOND", Nature Methods 12:59-60.</BlastOutput_reference> <BlastOutput_db></BlastOutput_db> <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/diamond_results_log_test.tabular Mon Feb 03 16:01:01 2025 +0000 @@ -0,0 +1,2 @@ +sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550 +sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/diamond_results_soft_masking.tabular Mon Feb 03 16:01:01 2025 +0000 @@ -0,0 +1,2 @@ +sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550 +sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/diamond_results_soft_masking_memory.tabular Mon Feb 03 16:01:01 2025 +0000 @@ -0,0 +1,2 @@ +sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550 +sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409