Galaxy |

Changeset 12:60f307965815 (2025-02-03)

Previous changeset 11:e8ac2b53f262 (2023-11-02) Next changeset 13:20d92dc4c6cb (2025-06-10)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diamond commit 62db819c1db857d3fba94dc4e290ee0f50f7928d

modified:
diamond.xml
diamond_makedb.xml
diamond_view.xml
macros.xml
test-data/db-wtax.dmnd
test-data/db.dmnd
test-data/diamond_results.daa
test-data/diamond_results.sam
test-data/diamond_results.tabular
test-data/diamond_results.xml

added:
test-data/diamond_log.txt
test-data/diamond_results_log_test.tabular
test-data/diamond_results_soft_masking.tabular
test-data/diamond_results_soft_masking_memory.tabular
test-data/diamond_results_swipe.tabular

diff -r e8ac2b53f262 -r 60f307965815 diamond.xml
--- a/diamond.xml Thu Nov 02 11:14:39 2023 +0000
+++ b/diamond.xml Mon Feb 03 16:01:01 2025 +0000

[

b'@@ -1,4 +1,4 @@\n-<tool id="bg_diamond" name="Diamond" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">\n+<tool id="bg_diamond" name="Diamond" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2" license="GPL-3.0">\n <description>alignment tool for short sequences against a protein database</description>\n <macros>\n <import>macros.xml</import>\n@@ -6,9 +6,9 @@\n <xrefs>\n <xref type="bio.tools">diamond</xref>\n </xrefs>\n- <expand macro="requirements" />\n- <expand macro="stdio" />\n- <expand macro="version_command" />\n+ <expand macro="requirements"/>\n+ <expand macro="stdio"/>\n+ <expand macro="version_command"/>\n <command detect_errors="aggressive">\n <![CDATA[\n \n@@ -45,6 +45,7 @@\n #end if\n $sens_cond.sensitivity\n $iterate\n+ $swipe\n --algo $algo\n #if $global_ranking\n --global-ranking $global_ranking\n@@ -102,6 +103,11 @@\n #end if\n $advanced_section.freq_masking\n --motif-masking $advanced_section.motif_masking\n+ --soft-masking $advanced_section.soft_masking\n+ --index-chunks "\\${DIAMOND_INDEX_CHUNKS:-4}"\n+ --file-buffer-size "\\${DIAMOND_FILE_BUFFER_SIZE:-67108864}"\n+ $log\n+ \n ]]>\n </command>\n <inputs>\n@@ -131,9 +137,14 @@\n <option value="24">Pterobranchia Mitochondrial Code</option>\n <option value="25">Candidate Division SR1 and Gracilibacteria Code</option>\n <option value="26">Pachysolen tannophilus Nuclear Code</option>\n+ <option value="27">Karyorelict Nuclear Code</option>\n+ <option value="28">Condylostoma Nuclear Code</option>\n+ <option value="29">Mesodinium Nuclear Code</option>\n+ <option value="30">Peritrich Nuclear Code</option>\n+ <option value="31">Blastocrithidia Nuclear Code</option>\n+ <option value="33">Cephalodiscidae Mitochondrial UAA-Tyr Code</option>\n </param>\n- <param argument="--min-orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature" />\n- \n+ <param argument="--min-orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature"/>\n <param name="query_strand" argument="--strand" type="select" label="query strands to search" help="">\n <option value="both" selected="True">Both</option>\n <option value="plus">Plus</option>\n@@ -146,21 +157,17 @@\n </param>\n <when value="yes">\n <param argument="--range-culling" type="boolean" truevalue="--range-culling" falsevalue="" checked="false" label="restrict hit culling to overlapping query ranges" help="This feature is designed for long query DNA sequences that may span several genes. In these cases, the default of reporting the 25 best overall hits could cause hits to a lower scoring gene to be overshadowed. But just increasing the number of alignments reported will bloat the output size and reduce performance. Using this feature along with -k 25 (default), a hit will only be deleted if at least 50% of its query range is spanned by at least 25 higher or equal scoring hits. Using this feature along with --top 10, a hit will only be deleted if its score is more than 10% lower than that of a higher scoring hit over at least 50% of its'..b' <param name="query" value="nucleotide.fasta" ftype="fasta"/>\n <conditional name="ref_db_source">\n@@ -602,10 +633,10 @@\n </section>\n <output name="blast_tabular" file="diamond_results_seed_cut.tabular"/>\n </test>\n- \n+ \n <test expect_num_outputs="1">\n <conditional name="method_cond">\n- <param name="method_select" value="blastx" />\n+ <param name="method_select" value="blastx"/>\n </conditional>\n <param name="query" value="nucleotide.fasta" ftype="fasta"/>\n <conditional name="ref_db_source">\n@@ -621,10 +652,10 @@\n </section>\n <output name="blast_tabular" file="diamond_results_freq_masking.tabular"/>\n </test>\n- \n+ \n <test expect_num_outputs="1">\n <conditional name="method_cond">\n- <param name="method_select" value="blastx" />\n+ <param name="method_select" value="blastx"/>\n </conditional>\n <param name="query" value="nucleotide.fasta" ftype="fasta"/>\n <conditional name="ref_db_source">\n@@ -642,6 +673,54 @@\n </section>\n <output name="blast_tabular" file="diamond_results_motif_masking.tabular"/>\n </test>\n+ \n+ <test expect_num_outputs="1">\n+ <conditional name="method_cond">\n+ <param name="method_select" value="blastx"/>\n+ </conditional>\n+ <param name="query" value="nucleotide.fasta" ftype="fasta"/>\n+ <conditional name="ref_db_source">\n+ <param name="db_source" value="indexed"/>\n+ <param name="index" value="testDb"/>\n+ </conditional>\n+ <section name="advanced_section">\n+ <param name="soft_masking" value="0"/>\n+ </section>\n+ <section name="output_section">\n+ <conditional name="output">\n+ <param name="outfmt" value="6"/>\n+ <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>\n+ </conditional>\n+ </section>\n+ <output name="blast_tabular" file="diamond_results_soft_masking.tabular"/>\n+ </test>\n+ \n+ <test expect_num_outputs="2">\n+ <conditional name="method_cond">\n+ <param name="method_select" value="blastx"/>\n+ </conditional>\n+ <param name="query" value="nucleotide.fasta" ftype="fasta"/>\n+ <conditional name="ref_db_source">\n+ <param name="db_source" value="indexed"/>\n+ <param name="index" value="testDb"/>\n+ </conditional>\n+ <section name="output_section">\n+ <conditional name="output">\n+ <param name="outfmt" value="6"/>\n+ <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>\n+ </conditional>\n+ <param name="log" value="true"/>\n+ </section>\n+ <output name="blast_tabular" file="diamond_results_log_test.tabular"/>\n+ <output name="log_file">\n+ <assert_contents>\n+ <has_n_lines n="261"/>\n+ <has_text text="diamond blastx --quiet"/>\n+ <has_text text="--log"/>\n+ <has_line line="Sequences = 6, letters = 1694, average length = 282"/>\n+ </assert_contents>\n+ </output>\n+ </test>\n </tests>\n <help>\n <![CDATA[\n@@ -699,5 +778,5 @@\n \n ]]>\n </help>\n- <expand macro="citations" />\n+ <expand macro="citations"/>\n </tool>\n'

diff -r e8ac2b53f262 -r 60f307965815 diamond_makedb.xml
--- a/diamond_makedb.xml Thu Nov 02 11:14:39 2023 +0000
+++ b/diamond_makedb.xml Mon Feb 03 16:01:01 2025 +0000

[

@@ -1,18 +1,16 @@
-<tool id="bg_diamond_makedb" name="Diamond makedb" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
+<tool id="bg_diamond_makedb" name="Diamond makedb" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2" license="GPL-3.0">
     <description>Build database from a FASTA file</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-
-    <expand macro="requirements" />
-    <expand macro="stdio" />
-    <expand macro="version_command" />
-
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_command"/>
     <command detect_errors="aggressive">
-    
+        
     <![CDATA[
     diamond makedb
-        --threads "\${GALAXY_SLOTS:-12}"
+        --threads \${GALAXY_SLOTS:-12}
         --in '$infile'
         --db ./database

@@ -23,30 +21,24 @@
       #end if
     ]]>
     </command>
-
     <inputs>
-      <param name="infile" type="data" format="fasta" label="Input reference file in FASTA format" />
-      <conditional name="tax_cond">
-        <param name="tax_select" type="select" label="Add taxonomic data?" help="Needs to be supplied in order to provide taxonomy features of the aligner">
-          <option value="yes">Yes</option>
-          <option value="no" selected="true">No</option>
-        </param>
-        <when value="yes">
-          <param argument="--taxonmap" type="data" format="tabular"
-            label="Protein accession to taxid mapping file"
-            help="Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features.
-              A custom file following the same format may be supplied here. Note that the first line of this file is assumed to contain headings and will be ignored" />
-          <param argument="--taxonnodes" type="data" format="tabular" label="Taxonomy nodes.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" />
-          <param argument="--taxonnames" type="data" format="tabular" label="Taxonomy names.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" />
-        </when>
-        <when value="no"/>
-      </conditional>
+        <param name="infile" type="data" format="fasta" label="Input reference file in FASTA format"/>
+        <conditional name="tax_cond">
+            <param name="tax_select" type="select" label="Add taxonomic data?" help="Needs to be supplied in order to provide taxonomy features of the aligner">
+                <option value="yes">Yes</option>
+                <option value="no" selected="true">No</option>
+            </param>
+            <when value="yes">
+                <param argument="--taxonmap" type="data" format="tabular" label="Protein accession to taxid mapping file" help="Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features.                A custom file following the same format may be supplied here. Note that the first line of this file is assumed to contain headings and will be ignored"/>
+                <param argument="--taxonnodes" type="data" format="tabular" label="Taxonomy nodes.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features"/>
+                <param argument="--taxonnames" type="data" format="tabular" label="Taxonomy names.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features"/>
+            </when>
+            <when value="no"/>
+        </conditional>
     </inputs>
-
     <outputs>
         <data format="dmnd" name="outfile" from_work_dir="database.dmnd" label="${tool.name} on ${on_string}"/>
     </outputs>
-
     <tests>
         <test>
             <param name="infile" value="db.fasta" ftype="fasta"/>
@@ -56,14 +48,13 @@
             <param name="infile" value="db.fasta" ftype="fasta"/>
             <conditional name="tax_cond">
                 <param name="tax_select" value="yes"/>
-                <param name="taxonmap" ftype="tabular" value="prot.accession2taxid" />
-                <param name="taxonnodes" ftype="tabular" value="nodes.dmp" />
-                <param name="taxonnames" ftype="tabular" value="names.dmp" />
+                <param name="taxonmap" ftype="tabular" value="prot.accession2taxid"/>
+                <param name="taxonnodes" ftype="tabular" value="nodes.dmp"/>
+                <param name="taxonnames" ftype="tabular" value="names.dmp"/>
             </conditional>
             <output name="outfile" value="db-wtax.dmnd" compare="sim_size" delta="2"/>
         </test>
     </tests>
-
     <help>
<![CDATA[

@@ -86,6 +77,5 @@
- taxonnodes: Path to the nodes.dmp file from the NCBI taxonomy. This parameter is optional and needs to be supplied in order to provide taxonomy features. The file is contained within this archive downloadable at NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
]]>
     </help>
-
-    <expand macro="citations" />
+    <expand macro="citations"/>
</tool>

diff -r e8ac2b53f262 -r 60f307965815 diamond_view.xml
--- a/diamond_view.xml Thu Nov 02 11:14:39 2023 +0000
+++ b/diamond_view.xml Mon Feb 03 16:01:01 2025 +0000

[

@@ -1,11 +1,11 @@
-<tool id="bg_diamond_view" name="Diamond view" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
+<tool id="bg_diamond_view" name="Diamond view" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2" license="GPL-3.0">
     <description>generate formatted output from DAA files</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements" />
-    <expand macro="stdio" />
-    <expand macro="version_command" />
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_command"/>
     <command detect_errors="aggressive"><![CDATA[
     ## need to link because diamont tries to open dataset_xxx.dat.daa
     ln -s '$daa' input.daa &&
@@ -16,23 +16,23 @@
         @OUTPUT_ARGS@
         @HITFILTER_ARGS@
         $forwardonly
-        --compress '0'
+        --verbose
     ]]>
     </command>
     <inputs>
-        <param argument="--daa" type="data" format="daa" label="input file in DAA format" />
+        <param argument="--daa" type="data" format="daa" label="input file in DAA format"/>
         <section name="output_section" title="Output options">
-            <expand macro="output_type_macro" />
+            <expand macro="output_type_macro"/>
         </section>
-        <expand macro="hit_filter_macro" />
-        <param argument="--forwardonly" type="boolean" truevalue="--forwardonly" falsevalue="" checked="false" label="only show alignments of forward strand" help=""/>
+        <expand macro="hit_filter_macro"/>
+        <param argument="--forwardonly" type="boolean" truevalue="--forwardonly" falsevalue="" checked="false" label="only show alignments of forward strand"/>
     </inputs>
     <outputs>
-        <expand macro="output_macro" />
+        <expand macro="output_macro"/>
     </outputs>
     <tests>
         <test expect_num_outputs="1">
-            <param name="daa" ftype="daa" value="diamond_results.daa" />
+            <param name="daa" ftype="daa" value="diamond_results.daa"/>
             <section name="output_section">
                 <conditional name="output">
                     <param name="outfmt" value="5"/>
@@ -40,12 +40,12 @@
             </section>
             <conditional name="hit_filter">
                 <param name="hit_filter_select" value="max"/>
-                <param name="max_target_seqs" value="1" />
+                <param name="max_target_seqs" value="1"/>
             </conditional>
             <output name="blast_tabular" file="diamond_results.xml"/>
         </test>
         <test expect_num_outputs="1">
-            <param name="daa" ftype="daa" value="diamond_results.daa" />
+            <param name="daa" ftype="daa" value="diamond_results.daa"/>
             <section name="output_section">
                 <conditional name="output">
                     <param name="outfmt" value="6"/>
@@ -55,7 +55,7 @@
             <output name="blast_tabular" file="diamond_view_results.tabular"/>
         </test>
         <test expect_num_outputs="1">
-            <param name="daa" ftype="daa" value="diamond_results.daa" />
+            <param name="daa" ftype="daa" value="diamond_results.daa"/>
             <section name="output_section">
                 <conditional name="output">
                     <param name="outfmt" value="101"/>
@@ -63,13 +63,12 @@
             </section>
             <conditional name="hit_filter">
                 <param name="hit_filter_select" value="top"/>
-                <param name="max_target_seqs" value="1" />
+                <param name="max_target_seqs" value="1"/>
             </conditional>
-            <param name="forwardonly" value="--forwardonly" />
+            <param name="forwardonly" value="--forwardonly"/>
             <output name="blast_tabular" file="diamond_results.sam" lines_diff="2"/>
         </test>
     </tests>
-
     <help>
<![CDATA[

@@ -103,5 +102,5 @@
12     Bit score
]]>
     </help>
-    <expand macro="citations" />
+    <expand macro="citations"/>
</tool>

diff -r e8ac2b53f262 -r 60f307965815 macros.xml
--- a/macros.xml Thu Nov 02 11:14:39 2023 +0000
+++ b/macros.xml Mon Feb 03 16:01:01 2025 +0000

[

b'@@ -1,22 +1,19 @@\n <macros>\n- <token name="@TOOL_VERSION@">2.0.15</token>\n+ <token name="@TOOL_VERSION@">2.1.11</token>\n <token name="@VERSION_SUFFIX@">0</token>\n <xml name="requirements">\n <requirements>\n- <requirement type="package" version="@TOOL_VERSION@">diamond</requirement>\n+ <requirement type="package" version="@TOOL_VERSION@">diamond</requirement>\n </requirements>\n </xml>\n-\n <xml name="stdio">\n <stdio>\n- <regex match="Failed to allocate" source="stderr" level="fatal_oom" />\n+ <regex match="Failed to allocate" source="stderr" level="fatal_oom"/>\n </stdio>\n </xml>\n-\n <xml name="version_command">\n <version_command>diamond version | cut -d" " -f 3</version_command>\n </xml>\n-\n <xml name="output_type_macro">\n <conditional name="output">\n <param argument="--outfmt" type="select" label="Format of output file" help="">\n@@ -26,6 +23,7 @@\n <option value="100">DAA</option>\n <option value="101">SAM</option>\n <option value="102">Taxonomic classification</option>\n+ <option value="104">JSON (flat)</option>\n </param>\n <when value="0"/>\n <when value="5"/>\n@@ -69,20 +67,17 @@\n <option value="cigar">Cigar</option>\n <yield/>\n </param>\n- <param argument="--unal" type="boolean" label="Report unaligned queries" truevalue="1" falsevalue="0" checked="false"/>\n </when>\n <when value="100">\n- <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/>\n- <param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/>\n </when>\n <when value="101">\n- <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/>\n- <param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/>\n </when>\n- <when value="102"/>\n+ <when value="102">\n+ <param argument="--include-lineage" type="boolean" truevalue="--include-lineage" falsevalue="" checked="false" label="Include lineage in the taxonomic classification format"/>\n+ </when>\n+ <when value="104"/>\n </conditional>\n </xml>\n-\n <xml name="hit_filter_macro">\n <conditional name="hit_filter">\n <param name="hit_filter_select" type="select" label="Method to restrict the number of hits?">\n@@ -90,57 +85,50 @@\n <option value="top">Percentage of top alignment score</option>\n </param>\n <when value="max">\n- <param name="max_target_seqs" argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to report alignments for" \n- help="Setting this to 0 will report all alignments that were found." />\n+ <param argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to report alignments for" help="Setting this to 0 will report all alignments that were found."/>\n </when>\n <when value="top">\n- <param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a query" \n- help="For example, setting this to 10 will report all alignments whose score is at most 10% lower than the best alignment score for a query." />\n+ <param arg'..b'e" \n- help="This is the main parameter for controlling the program\xe2\x80\x99s memory and disk space usage. Bigger numbers will increase the use of memory and temporary \n- disk space, but also improve performance" />\n+ <param argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time"\n+ help="This is the main parameter for controlling the program\xe2\x80\x99s memory and disk space usage. Bigger numbers will increase the use of memory and temporary disk space, but also improve performance"/>\n </xml>\n-\n <xml name="citations">\n <citations>\n- <citation type="doi">10.1038/nmeth.3176</citation>\n+ <citation type="doi">10.1038/s41592-021-01101-x</citation>\n </citations>\n </xml>\n-\n-\n <xml name="output_macro">\n- <data format="txt" name="blast_pairw" label="${tool.name} on ${on_string}">\n+ <data format="txt" name="blast_pairw" label="${tool.name} on ${on_string}: Blast pairwise">\n <filter>output_section["output"]["outfmt"] == "0"</filter>\n </data>\n- <data format="xml" name="blast_xml" label="${tool.name} on ${on_string}">\n+ <data format="xml" name="blast_xml" label="${tool.name} on ${on_string}: Blast XML">\n <filter>output_section["output"]["outfmt"] == "5"</filter>\n </data>\n- <data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}">\n+ <data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}: Blast Tabular">\n <filter>output_section["output"]["outfmt"] == "6"</filter>\n </data>\n \n- <data format="daa" name="daa_output" label="${tool.name} on ${on_string}" from_work_dir="output.daa">\n+ <data format="daa" name="daa_output" label="${tool.name} on ${on_string}: DAA" from_work_dir="output.daa">\n <filter>output_section["output"]["outfmt"] == "100"</filter>\n </data>\n- <data format="sam" name="sam_output" label="${tool.name} on ${on_string}">\n+ <data format="sam" name="sam_output" label="${tool.name} on ${on_string}: SAM">\n <filter>output_section["output"]["outfmt"] == "101"</filter>\n </data>\n- <data format="tabular" name="tax_output" label="${tool.name} on ${on_string}">\n+ <data format="tabular" name="tax_output" label="${tool.name} on ${on_string}: Taxonomic classification">\n <filter>output_section["output"]["outfmt"] == "102"</filter>\n </data>\n+ <data format="json" name="json_output" label="${tool.name} on ${on_string}: Json flat">\n+ <filter>output_section["output"]["outfmt"] == "104"</filter>\n+ </data>\n </xml>\n-\n <token name="@OUTPUT_ARGS@">\n #if $output_section.output.outfmt == "0"\n --outfmt \'0\'\n@@ -151,23 +139,18 @@\n #else if $output_section.output.outfmt == "6"\n --outfmt \'6\' #echo \' \'.join(str($output_section.output.fields).split(\',\'))\n --out \'$blast_tabular\'\n- --unal $output_section.output.unal\n #else if $output_section.output.outfmt == "100"\n --outfmt \'100\'\n- $output_section.output.salltitles\n- $output_section.output.sallseqid\n --out output.daa\n #else if $output_section.output.outfmt == "101"\n --outfmt \'101\'\n- $output_section.output.salltitles\n- $output_section.output.sallseqid\n --out \'$sam_output\'\n #else if $output_section.output.outfmt == "102"\n --outfmt \'102\'\n --out \'$tax_output\'\n+ $output_section.output.include_lineage\n #end if\n </token>\n-\n <token name="@HITFILTER_ARGS@">\n #if str($hit_filter.hit_filter_select) == \'max\':\n --max-target-seqs \'$hit_filter.max_target_seqs\'\n'

diff -r e8ac2b53f262 -r 60f307965815 test-data/db-wtax.dmnd

Binary file test-data/db-wtax.dmnd has changed

diff -r e8ac2b53f262 -r 60f307965815 test-data/db.dmnd

Binary file test-data/db.dmnd has changed

diff -r e8ac2b53f262 -r 60f307965815 test-data/diamond_log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_log.txt Mon Feb 03 16:01:01 2025 +0000

[

b'@@ -0,0 +1,261 @@\n+diamond blastx --quiet --threads 1 --db ./database --query /tmp/tmpb7mnrgbk/files/0/b/a/dataset_0bafd694-5276-4809-9258-1d272c89c442.dat --query-gencode 1 --strand both --min-orf 1 --outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore --out /tmp/tmpb7mnrgbk/job_working_directory/000/34/outputs/dataset_9356bfe1-71ff-4289-b524-b07b2006b561.dat --compress 0 --algo 0 --matrix BLOSUM62 --comp-based-stats 1 --masking 1 --max-target-seqs 25 --evalue 0.001 --id 0 --query-cover 0 --subject-cover 0 --block-size 2.0 --motif-masking 0 --soft-masking 0 --index-chunks 4 --file-buffer-size 67108864 --log\n+#CPU threads: 1\n+Scoring parameters: (Matrix=BLOSUM62 Lambda=0.267 K=0.041 Penalties=11/1)\n+CPU features detected: ssse3 popcnt sse4.1 avx2\n+L3 cache size: 12582912\n+MAX_SHAPE_LEN=19 SEQ_MASK STRICT_BAND\n+Temporary directory: /tmp/tmpb7mnrgbk/job_working_directory/000/34/outputs\n+#Target sequences to report alignments for: 25\n+DP fields: 510\n+Opening the database... [0s]\n+Database: ./database (type: Diamond database, sequences: 2, letters: 568)\n+Block size = 2000000000\n+Current RSS: 6.5 MB, Peak RSS: 6.5 MB\n+Opening the input file... [0s]\n+Opening the output file... [0s]\n+Current RSS: 6.6 MB, Peak RSS: 6.6 MB\n+Loading query sequences... Sequences = 6, letters = 1694, average length = 282\n+ [0s]\n+Sequences = 6, letters = 1694, average length = 282\n+Masking queries... [0s]\n+Current RSS: 7.1 MB, Peak RSS: 7.1 MB\n+Seed partition bits = 8\n+Algorithm: Double-indexed\n+Shape configuration: 111101110111,111011010010111\n+Building query histograms... [0s]\n+Current RSS: 7.1 MB, Peak RSS: 7.1 MB\n+Seeking in database... [0s]\n+Loading reference sequences... Sequences = 2, letters = 568, average length = 284\n+ [0s]\n+Current RSS: 7.1 MB, Peak RSS: 7.1 MB\n+Masking reference... [0s]\n+Masked letters: 0\n+Initializing temporary storage... Async_buffer() 1\n+ [0s]\n+Building reference histograms... [0s]\n+Allocating buffers... [0s]\n+Current RSS: 7.4 MB, Peak RSS: 7.4 MB\n+Processing query block 1, reference block 1/1, shape 1/2, index chunk 1/4.\n+Building reference seed array... [0s]\n+Current RSS: 7.4 MB, Peak RSS: 7.4 MB\n+Building query seed array... [0s]\n+Current RSS: 7.4 MB, Peak RSS: 7.4 MB\n+Indexed query seeds = 301/1694 (17.77%), reference seeds = 87/568 (15.32%)\n+Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%)\n+Computing hash join... [0s]\n+Current RSS: 7.2 MB, Peak RSS: 7.4 MB\n+Masking low complexity seeds... [0s]\n+Masked seeds: 0/53 (0.00%)\n+Masked positions (query): 0/1694 (0.00%)\n+Masked positions (target): 0/568 (0.00%)\n+Current RSS: 7.4 MB, Peak RSS: 7.4 MB\n+Searching alignments... [0s]\n+Current RSS: 8.6 MB, Peak RSS: 9.5 MB\n+Deallocating memory... [0s]\n+Current RSS: 7.6 MB, Peak RSS: 9.5 MB\n+Processing query block 1, reference block 1/1, shape 1/2, index chunk 2/4.\n+Building reference seed array... [0s]\n+Current RSS: 7.6 MB, Peak RSS: 9.5 MB\n+Building query seed array... [0s]\n+Current RSS: 7.6 MB, Peak RSS: 9.5 MB\n+Indexed query seeds = 286/1694 (16.88%), reference seeds = 120/568 (21.13%)\n+Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%)\n+Computing hash join... [0s]\n+Current RSS: 7.6 MB, Peak RSS: 9.5 MB\n+Masking low complexity seeds... [0s]\n+Masked seeds: 0/71 (0.00%)\n+Masked positions (query): 0/1694 (0.00%)\n+Masked positions (target): 0/568 (0.00%)\n+Current RSS: 7.6 MB, Peak RSS: 9.5 MB\n+Searching alignments... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Deallocating memory... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Processing query block 1, reference block 1/1, shape 1/2, index chunk 3/4.\n+Building reference seed array... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Building query seed array... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Indexed query seeds = 296/1694 (17.47%), reference seeds = 98/568 (17.25%)\n+Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%)\n+Computing hash join... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Masking low complexi'..b' = 93/568 (16.37%)\n+Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%)\n+Computing hash join... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Masking low complexity seeds... [0s]\n+Masked seeds: 0/56 (0.00%)\n+Masked positions (query): 0/1694 (0.00%)\n+Masked positions (target): 0/568 (0.00%)\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Searching alignments... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Deallocating memory... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Processing query block 1, reference block 1/1, shape 2/2, index chunk 4/4.\n+Building reference seed array... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Building query seed array... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Indexed query seeds = 305/1694 (18.00%), reference seeds = 113/568 (19.89%)\n+Soft masked letters = 0/1694 (0.00%), 0/568 (0.00%)\n+Computing hash join... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Masking low complexity seeds... [0s]\n+Masked seeds: 0/68 (0.00%)\n+Masked positions (query): 0/1694 (0.00%)\n+Masked positions (target): 0/568 (0.00%)\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Searching alignments... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Deallocating memory... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Deallocating buffers... [0s]\n+Clearing query masking... [0s]\n+Current RSS: 9.4 MB, Peak RSS: 9.5 MB\n+Computing alignments... Async_buffer.load() 21(2.93367e-07 GB, 2.38419e-07 GB on disk)\n+Loading trace points... [0s]\n+Sorting trace points... [0s]\n+Computing partition... [0s]\n+Computing alignments... [0s]\n+Deallocating buffers... [0s]\n+Loading trace points... [0s]\n+ [0.001s]\n+Deallocating reference... [0s]\n+Loading reference sequences... Current RSS: 10.1 MB, Peak RSS: 10.1 MB\n+ [0s]\n+Deallocating buffers... [0s]\n+Current RSS: 10.1 MB, Peak RSS: 10.1 MB\n+Deallocating queries... [0s]\n+Current RSS: 10.1 MB, Peak RSS: 10.1 MB\n+Loading query sequences... [0s]\n+Closing the input file... [0s]\n+Closing the output file... [0s]\n+Closing the database... [0s]\n+Cleaning up... [0s]\n+Current RSS: 10.1 MB, Peak RSS: 10.1 MB\n+Total time = 0.017s\n+Hits (filter stage 0) = 774\n+Hits (filter stage 1) = 774 (100 %)\n+Hits (filter stage 2) = 774 (100 %)\n+Hits (filter stage 3) = 21 (2.71318 %)\n+Target hits (stage 0) = 2\n+Target hits (stage 1) = 0\n+Target hits (stage 2) = 2\n+Target hits (stage 3) = 2 (0 (0%) with CBS)\n+Target hits (stage 4) = 2\n+Target hits (stage 5) = 2\n+Target hits (stage 6) = 2\n+Swipe realignments = 0\n+Matrix adjusts = 0\n+Extensions (8 bit) = 0\n+Extensions (16 bit) = 4\n+Extensions (32 bit) = 0\n+Overflows (8 bit) = 0\n+Wasted (16 bit) = 0\n+Effort (Extension) = 8\n+Effort (Cells) = 0\n+Cells (8 bit) = 0\n+Cells (16 bit) = 0\n+SWIPE tasks = 2\n+SWIPE tasks (async) = 0\n+Trivial aln = 0\n+Hard queries = 0\n+Gapped filter (targets) = 0\n+Gapped filter (hits) stage 1 = 0\n+Gapped filter (hits) stage 2 = 0\n+Time (Load seed hit targets) = 5e-06s (CPU)\n+Time (Sort targets by score) = 0s (CPU)\n+Time (Gapped filter) = 0s (CPU)\n+Time (Matrix adjust) = 0s (CPU)\n+Time (Chaining) = 3.4e-05s (CPU)\n+Time (DP target sorting) = 0s (CPU)\n+Time (Query profiles) = 0s (CPU)\n+Time (Smith Waterman) = 0.000352s (CPU)\n+Time (Anchored SWIPE Alloc) = 0s (CPU)\n+Time (Anchored SWIPE Sort) = 0s (CPU)\n+Time (Anchored SWIPE Add) = 0s (CPU)\n+Time (Anchored SWIPE Output) = 0s (CPU)\n+Time (Anchored SWIPE) = 0s (CPU)\n+Time (Smith Waterman TB) = 0s (CPU)\n+Time (Smith Waterman-32) = 0s (CPU)\n+Time (Traceback) = 1.6e-05s (CPU)\n+Time (Target parallel) = 0s (wall)\n+Time (Load seed hits) = 0.000797s (wall)\n+Time (Sort seed hits) = 2.8e-05s (wall)\n+Time (Extension) = 0.000568s (wall)\n+Temporary disk space used (search): 2.38419e-07 GB\n+Reported 2 pairwise alignments, 2 HSPs.\n+1 queries aligned.\n+Current RSS: 10.1 MB, Peak RSS: 10.1 MB\n'

diff -r e8ac2b53f262 -r 60f307965815 test-data/diamond_results.daa

Binary file test-data/diamond_results.daa has changed

diff -r e8ac2b53f262 -r 60f307965815 test-data/diamond_results.sam
--- a/test-data/diamond_results.sam Thu Nov 02 11:14:39 2023 +0000
+++ b/test-data/diamond_results.sam Mon Feb 03 16:01:01 2025 +0000

@@ -1,5 +1,5 @@
@HD VN:1.5 SO:query
-@PG PN:DIAMOND VN:2.0.15 CL:diamond view --threads 1 --daa input.daa --outfmt 101 --salltitles --sallseqid --out /tmp/tmpuqw24dac/files/e/4/b/dataset_e4b47568-a2e4-4ec1-ac5f-f266085686a4.dat --top 0 --forwardonly --compress 0
+@PG PN:DIAMOND VN:2.1.11 CL:diamond view --threads 1 --daa input.daa --outfmt 101 --out /tmp/tmp61jyo35f/job_working_directory/000/6/outputs/dataset_bb83a399-fa7d-414f-afff-28cb61b3cd8d.dat --top 0 --forwardonly --verbose
@mm BlastP
@CO BlastP-like alignments
@CO Reporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length, ZF: frame, ZS: query start DNA coordinate

diff -r e8ac2b53f262 -r 60f307965815 test-data/diamond_results.tabular
--- a/test-data/diamond_results.tabular Thu Nov 02 11:14:39 2023 +0000
+++ b/test-data/diamond_results.tabular Mon Feb 03 16:01:01 2025 +0000

@@ -1,3 +1,2 @@
-sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 100 0 0 0
-sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 100 0 0 0
-shuffled * -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 * * *
+sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 100 0 0 0 94M1D189M
+sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 100 0 0 0 105M1D178M

diff -r e8ac2b53f262 -r 60f307965815 test-data/diamond_results.xml
--- a/test-data/diamond_results.xml Thu Nov 02 11:14:39 2023 +0000
+++ b/test-data/diamond_results.xml Mon Feb 03 16:01:01 2025 +0000

@@ -2,7 +2,7 @@
<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
<BlastOutput>
   <BlastOutput_program>blastp</BlastOutput_program>
-  <BlastOutput_version>diamond 2.0.15</BlastOutput_version>
+  <BlastOutput_version>diamond 2.1.11</BlastOutput_version>
   <BlastOutput_reference>Benjamin Buchfink, Xie Chao, and Daniel Huson (2015), "Fast and sensitive protein alignment using DIAMOND", Nature Methods 12:59-60.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>

diff -r e8ac2b53f262 -r 60f307965815 test-data/diamond_results_log_test.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results_log_test.tabular Mon Feb 03 16:01:01 2025 +0000

@@ -0,0 +1,2 @@
+sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550
+sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409

diff -r e8ac2b53f262 -r 60f307965815 test-data/diamond_results_soft_masking.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results_soft_masking.tabular Mon Feb 03 16:01:01 2025 +0000

@@ -0,0 +1,2 @@
+sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550
+sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409

diff -r e8ac2b53f262 -r 60f307965815 test-data/diamond_results_soft_masking_memory.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results_soft_masking_memory.tabular Mon Feb 03 16:01:01 2025 +0000

@@ -0,0 +1,2 @@
+sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550
+sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409

diff -r e8ac2b53f262 -r 60f307965815 test-data/diamond_results_swipe.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/diamond_results_swipe.tabular Mon Feb 03 16:01:01 2025 +0000

@@ -0,0 +1,2 @@
+sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550
+sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409