changeset 0:47189346d00d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/metasbt commit 5509a3fab8eb49e931a5fde335dd7030e16737f5
author iuc
date Fri, 22 Aug 2025 19:04:25 +0000
parents
children dff5f0dd17eb
files index.xml macros.xml test-data/MetaSBT-Test-20250620.1.tar.gz test-data/genome_1.fna.gz test-data/genome_1.tsv test-data/genome_2.fna.gz test-data/genome_2.tsv test-data/genome_3.fna.gz test-data/genome_3.tsv test-data/genome_4.fna.gz test-data/genome_4.tsv test-data/genome_5.fna.gz test-data/genome_5.tsv test-data/genome_6.fna.gz test-data/genome_6.tsv test-data/metasbt_databases.loc tool-data/metasbt_databases.loc tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 19 files changed, 506 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/index.xml	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,339 @@
+<?xml version="1.0"?>
+<tool name="index" id="metasbt_index" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@" profile="@PROFILE@" license="MIT">
+    <description>genomes with Sequence Bloom Trees or update an existing database</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="creator"/>
+    <expand macro="requirements"/>
+
+    <command detect_errors="exit_code"><![CDATA[
+#set input_dir = "./genomes"
+
+mkdir -p "${input_dir}" &&
+
+#for $genome in $genomes:
+    genome_name="\$(echo '${genome.element_identifier}' | sed 's/[^[:alnum:]_.-]/_/g')" &&
+    target_fna="${input_dir}/\${genome_name}.fna" &&
+
+    #if $genome.ext.endswith("gz"):
+        gzip -dc '${genome}' > "\${target_fna}" &&
+    #else
+        ln -s '${genome}' "\${target_fna}" &&
+    #end if
+
+    echo -e "\${target_fna}" >> "./genomes.txt" &&
+#end for
+
+#if $advanced.index_update_conditional.index_update_option == "false":
+    while IFS=$'\t' read -r genome_name genome_taxonomy; do
+        genome_name="\$(echo '\${genome_name}' | sed 's/[^[:alnum:]_.-]/_/g')" &&
+        genome_taxonomy="\$(echo '\${genome_taxonomy}' | sed 's/[^[:alnum:]_.-|]/_/g')" &&
+
+        if [ -f "${input_dir}/\${genome_name}.fna" ]; then
+            echo -e "${input_dir}/\${genome_name}.fna\t\${genome_taxonomy}" >> "${input_dir}/genomes.tsv";
+        fi
+    done < '${taxonomies}' &&
+
+    metasbt index --workdir "."
+                  --database "Database"
+                  --references "${input_dir}/genomes.tsv"
+                  --nproc "\${GALAXY_SLOTS:-4}"
+                  --pack
+
+            #if $advanced.index_update_conditional.bfs_selection.bfs_auto == "false":
+                  --filter-size '${advanced.index_update_conditional.bfs_selection.bfs_size}'
+            #else:
+                  --increase-filter-size '${advanced.index_update_conditional.bfs_selection.bfs_size_increase}'
+                  --min-kmer-occurrences '${advanced.index_update_conditional.bfs_selection.min_kmer_occurrences}'
+            #end if
+
+            #if $advanced.index_update_conditional.kmers_selection.kmers_auto == "false":
+                  --kmer-size '${advanced.index_update_conditional.kmers_selection.kmer_len}'
+            #else:
+                  --limit-kmer-size '${advanced.index_update_conditional.kmers_selection.max_kmer_len}'
+            #end if
+
+            #if $advanced.dereplication_selection.dereplication == "true":
+                  --dereplicate '${advanced.dereplication_update_conditional.distance_threshold}'
+            #end if
+
+            #if $advanced.quality_control_selection.quality_control == "true":
+                  --completeness '${advanced.quality_control_selection.completeness}'
+                  --contamination '${advanced.quality_control_selection.contamination}'
+            #end if
+
+            &&
+#else:
+    #if $advanced.index_update_conditional.database_selection.source == "cvmfs":
+        ln -s '${advanced.index_update_conditional.database_selection.db_tarball.fields.path}' "./MetaSBT-Database.tar.gz" &&
+    #else:
+        ln -s '${advanced.index_update_conditional.database_selection.db_tarball}' "./MetaSBT-Database.tar.gz" &&
+    #end if
+
+    metasbt unpack --workdir "."
+                   --database "Database"
+                   --tarball "./MetaSBT-Database.tar.gz" &&
+
+    rm "./MetaSBT-Database.tar.gz" &&
+
+    metasbt update --workdir "."
+                   --database "Database"
+                   --genomes "./genomes.txt"
+                   --nproc "\${GALAXY_SLOTS:-4}"
+                   --pack
+
+            #if $advanced.dereplication_selection.dereplication == "true":
+                   --dereplicate '${advanced.dereplication_selection.distance_threshold}'
+            #end if
+
+            #if $advanced.quality_control_selection.quality_control == "true":
+                   --completeness '${advanced.quality_control_selection.completeness}'
+                   --contamination '${advanced.quality_control_selection.contamination}'
+            #end if
+
+            &&
+#end if
+
+tail -n +3 "./Database/clusters.tsv" > clusters.tsv &&
+tail -n +3 "./Database/genomes.tsv" > genomes.tsv &&
+
+mv ./MetaSBT-Database*.tar.gz MetaSBT-Database.tar.gz
+    ]]></command>
+
+    <inputs>
+        <!-- Input genomes -->
+        <param name="genomes" format="fasta,fasta.gz" multiple="true" type="data"
+               label="Input genomes"
+               help="Select a set of input genomes." />
+
+        <!-- Optional file with mapping betweek input file names and their taxonomic label -->
+        <param name="taxonomies" format="tsv" multiple="false" type="data" optional="true"
+               label="Input table with taxonomic labels"
+               help="Optional two-columns table with the input file names and their full taxonomic labels. This is not required in case the input files are not reference genomes." />
+
+        <!-- Advanced options -->
+        <section name="advanced" expanded="true"
+                 title="Advanced options"
+                 help="Access advanced options to customize k-mer length, bloom filter size, and other settings.">
+
+            <!-- Show different options in case of index or update -->
+            <conditional name="index_update_conditional">
+                <param name="index_update_option" type="select" 
+                        label="MetaSBT database"
+                        help="When enabled, most of the advanced options are inherited from an already existing database and cannot be modified.">
+                    <option value="true">Update a MetaSBT database</option>
+                    <option value="false" selected="true">Build your own MetaSBT database from scratch</option>
+                </param>
+
+                <when value="false">
+                    <!-- Index -->
+                    <!-- Estimate a k-mer length -->
+                    <conditional name="kmers_selection">
+                        <!-- Enable k-mer length estimation -->
+                        <param name="kmers_auto" type="select" 
+                                label="K-mer length"
+                                help="Automatically estimate a proper k-mer length for the input set of genomes with Kitsune.">
+                            <option value="true" selected="true">Estimate a proper k-mer length for your set of genomes</option>
+                            <option value="false">Set a k-mer length</option>
+                        </param>
+
+                        <when value="true">
+                            <!-- Limit k-mer length -->
+                            <param name="max_kmer_len" type="integer" value="9" min="8" max="128"
+                                   label="Max k-mer length"
+                                   help="Limit the length of k-mers for the estimation of the best k-mer length with Kitsune." />
+                        </when>
+
+                        <when value="false">
+                            <!-- K-mer length -->
+                            <param name="kmer_len" type="integer" value="21" min="8" max="128"
+                                   label="Set a k-mer length"
+                                   help="Set the length of k-mers." />
+                        </when>
+                    </conditional>
+
+                    <!-- Estimate the bloom filter size -->
+                    <conditional name="bfs_selection">
+                        <!-- Enable bloom filter size estimation -->
+                        <param name="bfs_auto" type="select" 
+                                label="Bloom filter size"
+                                help="Automatically estimate the most appropriate bloom filter size that better fits with the input set of genomes with ntCard.">
+                            <option value="true" selected="true">Estimate the bloom filter size</option>
+                            <option value="false">Set a bloom filter size</option>
+                        </param>
+
+                        <when value="true">
+                            <!-- Increment the estimated bloom filter size -->
+                            <param name="bfs_size_increase" type="float" value="5" min="0" max="100"
+                                   label="Increment the bloom filter size"
+                                   help="Increment the estimated bloom filter size by this percentage in case you are planning to update the database with new genomes in future." />
+
+                            <!-- Minimum number of occurrences of k-mers computed with ntCard -->
+                            <param name="min_kmer_occurrences" type="integer" value="1" min="1"
+                                   label="Minimum k-mer occurrences"
+                                   help="Minimum number of occurrences of k-mers to be considered for the estimation of the bloom filter size and for building the bloom filter sketches." />
+                        </when>
+
+                        <when value="false">
+                            <!-- Bloom filter size -->
+                            <param name="bfs_size" type="integer" value="10000"
+                                   label="Set a bloom filter size"
+                                   help="Set a bloom filter size for building genome or sequence sketches." />
+                        </when>
+                    </conditional>
+                </when>
+
+                <when value="true">
+                    <expand macro="database"/>
+                </when>
+            </conditional>
+
+            <conditional name="dereplication_selection">
+                <!-- Enable the dereplication of input genomes -->
+                <param name="dereplication" type="select"
+                        label="Dereplication"
+                        help="Enable the dereplication of input genomes based on their ANI distance.">
+                    <option value="true">Dereplicate the input genomes</option>
+                    <option value="false" selected="true">Do not dereplicate the input genomes</option>
+                </param>
+
+                <when value="true">
+                    <!-- ANI distance threshold -->
+                    <param name="distance_threshold" type="float" value="0.01" min="0.0" max="1.0"
+                        label="Distance threshold"
+                        help="Set a threshold based on the ANI distance of the sketch representation of the input genomes versus themselves and the other genomes in the database (in case of an update)." />
+                </when>
+
+                <when value="false" />
+            </conditional>
+
+            <conditional name="quality_control_selection">
+                <!-- Enable the quality control of input genomes -->
+                <param name="quality_control" type="select"
+                        label="Quality Control"
+                        help="Filter out genomes based on their completeness and contamination.">
+                    <option value="true">Enable the quality control of input genomes</option>
+                    <option value="false" selected="true">Do not assess for the quality of genomes</option>
+                </param>
+
+                <when value="true">
+                    <!-- Completeness -->
+                    <param name="completeness" type="float" value="90.0" min="0.0" max="100.0"
+                        label="Completeness threshold"
+                        help="Set a threshold on the completeness and discard everything below this threshold." />
+
+                    <!-- Contamination -->
+                    <param name="contamination" type="float" value="5.0" min="0.0" max="100.0"
+                        label="Contamination threshold"
+                        help="Set a threshold on the contamination and discard everything above this threshold." />
+                </when>
+
+                <when value="false" />
+            </conditional>
+        </section>
+    </inputs>
+
+    <outputs>
+        <!-- Table with known and unknown clusters -->
+        <data format="tabular" name="clusters_table" label="${tool.name} on ${on_string}: clusters" from_work_dir="clusters.tsv">
+            <actions>
+                <action name="column_names" type="metadata" default="Cluster,Level,Bloom Filter Density,Number of Reference Genomes,Number of MAGs,List of Reference Genomes,List of MAGs,Cluster Centroid,Known,Assigned Taxonomy,Internal Taxonomy,Minimum pair-wise ANI,Maximum pair-wise ANI" />
+                <action name="column_types" type="metadata" default="str,str,float,int,int,str,str,str,str,str,str,float,float" />
+            </actions>
+        </data>
+
+        <!-- Table with list of genomes and their assignments -->
+        <data format="tabular" name="genomes_table" label="${tool.name} on ${on_string}: genomes" from_work_dir="genomes.tsv">
+            <actions>
+                <action name="column_names" type="metadata" default="Genome,Type,Assigned Taxonomy,Internal Taxonomy" />
+                <action name="column_types" type="metadata" default="str,str,str,str" />
+            </actions>
+        </data>
+
+        <!-- Database as compressed tarball -->
+        <data format="tar" name="database" label="${tool.name} on ${on_string}: database" from_work_dir="MetaSBT-Database.tar.gz" />
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="3">
+            <param name="genomes" value="genome_1.fna.gz,genome_2.fna.gz,genome_3.fna.gz,genome_4.fna.gz,genome_5.fna.gz,genome_6.fna.gz" ftype="fasta.gz" />
+
+            <param name="advanced|index_update_conditional|index_update_option" value="true" />
+            <param name="advanced|index_update_conditional|database_selection|source" value="history" />
+            <param name="advanced|index_update_conditional|database_selection|db_tarball" ftype="tar" value="MetaSBT-Test-20250620.1.tar.gz" />
+
+            <output name="clusters_table" ftype="tabular">
+                <assert_contents>
+                    <has_text text="k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus" />
+                </assert_contents>
+            </output>
+
+            <output name="genomes_table" ftype="tabular">
+                <assert_contents>
+                    <has_text text="genome_1" />
+                    <has_text text="genome_2" />
+                    <has_text text="genome_3" />
+                    <has_text text="genome_4" />
+                </assert_contents>
+            </output>
+
+            <output name="database" ftype="tar">
+                <assert_contents>
+                    <has_size value="468556" delta="10000"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <test expect_num_outputs="3">
+            <param name="genomes" value="genome_1.fna.gz,genome_2.fna.gz,genome_3.fna.gz,genome_4.fna.gz,genome_5.fna.gz,genome_6.fna.gz" ftype="fasta.gz" />
+
+            <param name="advanced|index_update_conditional|index_update_option" value="true" />
+            <param name="advanced|index_update_conditional|database_selection|source" value="cvmfs" />
+            <param name="advanced|index_update_conditional|database_selection|db_tarball" value="test_db" />
+
+            <output name="clusters_table" ftype="tabular">
+                <assert_contents>
+                    <has_text text="k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus" />
+                </assert_contents>
+            </output>
+
+            <output name="genomes_table" ftype="tabular">
+                <assert_contents>
+                    <has_text text="genome_1" />
+                    <has_text text="genome_2" />
+                    <has_text text="genome_3" />
+                    <has_text text="genome_4" />
+                </assert_contents>
+            </output>
+
+            <output name="database" ftype="tar">
+                <assert_contents>
+                    <has_size value="468556" delta="10000"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help>
+<![CDATA[
+**What it does**
+
+MetaSBT is a scalable framework for the characterization of known and still unknown microbial genomes with Sequence Bloom Trees.
+This tool act as an interface to the `index` and `update` subroutines of MetaSBT for the generation and update of new or predefined public databases.
+
+-----
+
+.. class:: infomark
+
+Please visit the official GitHub repository_ for additional information about MetaSBT.
+Public MetaSBT Databases are available at the official MetaSBT-DBs_ repository.
+
+.. _repository: https://github.com/cumbof/MetaSBT
+.. _MetaSBT-DBs: https://github.com/cumbof/MetaSBT-DBs
+]]>
+    </help>
+
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,68 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.1.5</token>
+    <token name="@GALAXY_VERSION@">1</token>
+    <token name="@PROFILE@">24.00</token>
+
+    <xml name="creator">
+        <creator>
+            <person givenName="Fabio" familyName="Cumbo"
+                    url="https://cumbof.github.io/"
+                    email="fabio.cumbo@gmail.com" />
+
+            <person givenName="Daniel" familyName="Blankenberg"
+                    url="https://www.lerner.ccf.org/computational-medicine/blankenberg/"
+                    email="blanked2@ccf.org" />
+        </creator>
+    </xml>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="4.9">sed</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">metasbt</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="database">
+        <conditional name="database_selection">
+            <param name="source" type="select"
+                    label="Database source"
+                    help="Public databases are hosted on the Galaxy CVMFS.">
+                <option value="cvmfs" selected="true">Use a public database</option>
+                <option value="history">Use a database from the history</option>
+            </param>
+
+            <when value="cvmfs">
+                <param name="db_tarball" type="select"
+                        label="Select a MetaSBT database"
+                        help="Choose a specific version of a database.">
+                    <options from_data_table="metasbt_databases">
+                        <column name="value" index="0" />
+                        <column name="version" index="1" />
+                        <column name="name" index="2" />
+                        <column name="path" index="3" />
+                        <filter type="sort_by" column="0" reverse_sort_order="true"/>
+                        <validator message="No MetaSBT database is available" type="no_options"/>
+                    </options>
+                </param>
+            </when>
+
+            <when value="history">
+                <param name="db_tarball" type="data" format="tar"
+                       label="Select a MetaSBT database tarball" />
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+@misc{metasbtrepo,
+    author={Fabio Cumbo, Daniel Blankenberg},
+    title={{MetaSBT: A scalable framework for automatically indexing microbial genomes and accurately characterizing metagenome-assembled genomes with Sequence Bloom Trees}},
+    publisher={Github},
+    url={https://github.com/cumbof/MetaSBT}
+}
+            </citation>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
Binary file test-data/MetaSBT-Test-20250620.1.tar.gz has changed
Binary file test-data/genome_1.fna.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_1.tsv	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,11 @@
+level	closest	ani
+kingdom	k__Viruses	0.01795
+phylum	k__Viruses|p__Nucleocytoviricota	0.01743
+class	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes	0.01743
+order	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales	0.01743
+family	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae	0.01743
+genus	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus	0.01743
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus	0.00154
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Skunkpox_virus	0.02526
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Raccoonpox_virus	0.02548
+genome	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus|t__GCA_028389905.1_ASM2838990v1_genomic	0.00154
Binary file test-data/genome_2.fna.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_2.tsv	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,11 @@
+level	closest	ani
+kingdom	k__Viruses	0.01846
+phylum	k__Viruses|p__Nucleocytoviricota	0.01743
+class	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes	0.01743
+order	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales	0.01743
+family	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae	0.01743
+genus	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus	0.01743
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus	0.00119
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Skunkpox_virus	0.02545
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Raccoonpox_virus	0.02566
+genome	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus|t__GCA_028389905.1_ASM2838990v1_genomic	0.00119
Binary file test-data/genome_3.fna.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_3.tsv	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,10 @@
+level	closest	ani
+kingdom	k__Viruses	0.01765
+phylum	k__Viruses|p__Nucleocytoviricota	0.01699
+class	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes	0.01699
+order	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales	0.01699
+family	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae	0.01699
+genus	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus	0.01699
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus	0.00034
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Skunkpox_virus	0.02521
+genome	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus|t__GCA_028389905.1_ASM2838990v1_genomic	0.00034
Binary file test-data/genome_4.fna.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_4.tsv	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,10 @@
+level	closest	ani
+kingdom	k__Viruses	0.01829
+phylum	k__Viruses|p__Nucleocytoviricota	0.01741
+class	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes	0.01741
+order	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales	0.01741
+family	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae	0.01741
+genus	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus	0.01741
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus	0.00101
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Skunkpox_virus	0.02553
+genome	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus|t__GCA_028389905.1_ASM2838990v1_genomic	0.00101
Binary file test-data/genome_5.fna.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_5.tsv	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,11 @@
+level	closest	ani
+kingdom	k__Viruses	0.39054
+phylum	k__Viruses|p__Nucleocytoviricota	0.38041
+class	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes	0.38041
+order	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales	0.38041
+family	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae	0.38041
+genus	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus	0.38041
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Skunkpox_virus	0.38091
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus	0.38065
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Raccoonpox_virus	0.37966
+genome	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus|t__GCA_028389905.1_ASM2838990v1_genomic	0.38065
Binary file test-data/genome_6.fna.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_6.tsv	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,11 @@
+level	closest	ani
+kingdom	k__Viruses	0.44364
+phylum	k__Viruses|p__Nucleocytoviricota	0.43076
+class	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes	0.43076
+order	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales	0.43076
+family	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae	0.43076
+genus	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus	0.43076
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Skunkpox_virus	0.42999
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Monkeypox_virus	0.43146
+species	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Raccoonpox_virus	0.43083
+genome	k__Viruses|p__Nucleocytoviricota|c__Pokkesviricetes|o__Chitovirales|f__Poxviridae|g__Orthopoxvirus|s__Skunkpox_virus|t__GCA_001745695.1_ViralProj344115_genomic	0.43072
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metasbt_databases.loc	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,7 @@
+# Tab separated with four columns:
+# - value (Galaxy records this in the Galaxy DB)
+# - version (MetaSBT DB version)
+# - name (Galaxy shows this in the UI)
+# - path (file path to the MetaSBT database)
+#
+test_db	test	Test Database	${__HERE__}/MetaSBT-Test-20250620.1.tar.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/metasbt_databases.loc	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,12 @@
+# Expect four columns, tab separated, as follows:
+# - value (Galaxy records this in the Galaxy DB)
+# - version (MetaSBT DB version)
+# - name (Galaxy shows this in the UI)
+# - path with or without trailing slash (folder name containing the MetaSBT DB)
+#
+# e.g.
+# viruses_20250115<tab>20250115<tab>"Viruses (References)"<tab>/path/to/MetaSBT-Viruses-20250115.tar.gz
+# value	version	name	path
+viruses_20250115	20250115	Viruses (References)	/cvmfs/data.galaxyproject.org/byhand/MetaSBT/MetaSBT-Viruses-20250115.tar.gz
+viruses_20250118	20250118	Viruses (References+MAGs)	/cvmfs/data.galaxyproject.org/byhand/MetaSBT/MetaSBT-Viruses-20250118.tar.gz
+viruses_20250320	20250320	Viruses (References+MAGs+MGV)	/cvmfs/data.galaxyproject.org/byhand/MetaSBT/MetaSBT-Viruses-20250320.tar.gz
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of MetaSBT databases in the required format -->
+    <table name="metasbt_databases" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, version, name, path</columns>
+        <file path="tool-data/metasbt_databases.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri Aug 22 19:04:25 2025 +0000
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of MetaSBT databases in the required format -->
+    <table name="metasbt_databases" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, version, name, path</columns>
+        <file path="${__HERE__}/test-data/metasbt_databases.loc" />
+    </table>
+</tables>
\ No newline at end of file