Mercurial > repos > ufz > genomad_end_to_end

--- a/genomad_end_to_end.xml	Mon Oct 07 11:51:02 2024 +0000
+++ b/genomad_end_to_end.xml	Fri Jun 13 20:40:32 2025 +0000
@@ -1,9 +1,9 @@
 <tool id="genomad_end_to_end" name="geNomad" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0" license="MIT">
     <description>identify virus and plasmid genomes from nucleotide sequences</description>
     <macros>
-        <token name="@TOOL_VERSION@">1.8.0</token>
-        <token name="@VERSION_SUFFIX@">1</token>
-        <token name="@MIN_DB_VERSION@">1.2</token>  <!-- https://portal.nersc.gov/genomad/__data__/releases.txt -->
+        <token name="@TOOL_VERSION@">1.11.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@DB_VERSION@">1.9</token>  <!-- https://portal.nersc.gov/genomad/__data__/releases.txt -->
         <xml name="summary_output_macro" tokens="type">
             <data name="summary_@TYPE@_fna" format="fasta" from_work_dir="output/sequence_summary/sequence_@TYPE@.fna" label="${tool.name} on ${on_string}: @TYPE@ fasta"/>
             <data name="summary_@TYPE@_genes" format="tabular" from_work_dir="output/sequence_summary/sequence_@TYPE@_genes.tsv" label="${tool.name} on ${on_string}: @TYPE@ genes">
@@ -47,7 +47,8 @@
             $basic.disable_find_proviruses
             $basic.disable_nn_classification
             $basic.enable_score_calibration
-            $annotation.conservative_taxonomy
+            $annotation.lenient_taxonomy
+            $annotation.full_ictv_lineage
             --sensitivity $annotation.sensitivity
             --splits $annotation.splits
             $provirus.skip_integrase_identification
@@ -64,9 +65,9 @@
         </param>
         <param argument="INPUT" type="data" format="fasta" label="Input sequences" help="geNomad will work for isolate genomes, metagenomes, and metatranscriptomes"/>
         <param name="DATABASE" type="select" label="Reference data" help="">
-            <options from_data_table="genomad"/>
-            <!-- TODO needs to be activated with https://github.com/galaxyproject/galaxy/pull/18411
-                <validator type="in_range" min="@MIN_DB_VERSION@"/> -->
+            <options from_data_table="genomad">
+                <filter type="static_value" value="@DB_VERSION@" column="0"/>
+            </options>
         </param>
         <conditional name="filter_cond">
             <param name="filtering_preset" type="select" label="Filtering presets" help="After classification, sequences are further filtered to remove possible false positives. The --conservative preset makes those filters even more aggressive, resulting in more restricted sets of plasmid and virus, containing only sequences whose classification is strongly supported. The --relaxed preset disables all post-classification filters.">
@@ -79,6 +80,7 @@
             <when value="">
                 <param argument="--min-score" type="float" min="0" max="1" value="0.7" label="Minimum score to flag a sequence as virus or plasmid" help=""/>
                 <param argument="--max-fdr" type="float" min="0" max="1" value="0.1" label="Maximum false discovery rate" help="This option will be ignored if the scores were not calibrated"/>
+                <param argument="--min-number-genes" type="integer" min="0" value="1" label="The minimum number" help="The minimum number of genes a sequence must encode to be considered for classification as a plasmid or virus"/>
                 <param argument="--min-plasmid-marker-enrichment" type="float" value="0.1" label="Minimum allowed value for the plasmid marker enrichment score" help="This enrichment score represents the total enrichment of plasmid markers in the sequence. Sequences with multiple plasmid markers will have higher values than the ones that encode few or no markers. This option will be ignored if the annotation module was not executed." />
                 <param argument="--min-virus-marker-enrichment" type="float" value="0.0" label="Minimum allowed value for the virus marker enrichment score" help="This enrichment score represents the total enrichment of virus markers in the sequence. Sequences with multiple virus markers will have higher values than the ones that encode few or no markers. This option will be ignored if the annotation module was not executed." />
                 <param argument="--min-plasmid-hallmarks" type="integer" min="0" value="0" label="minimum number of plasmid hallmarks in the identified plasmids" help="this option will be ignored if the annotation module was not executed." />
@@ -90,11 +92,12 @@
         </conditional>
         <section name="basic" title="basic options" expanded="true">
             <param argument="--disable-find-proviruses" type="boolean" truevalue="" falsevalue="--disable-find-proviruses" checked="true" label="Execute the find-proviruses module" help="" />
-            <param argument="--disable-nn-classification" type="boolean" truevalue="" falsevalue="--disable-nn-classification" checked="true" label="Execute the find-proviruses module" help="" />
+            <param argument="--disable-nn-classification" type="boolean" truevalue="" falsevalue="--disable-nn-classification" checked="true" label="Execute the nn-classification and aggregated-classification modules" help="" />
             <param argument="--enable-score-calibration" type="boolean" truevalue="--enable-score-calibration" falsevalue="" checked="false" label="Execute the score-calibration module" help="" />
         </section>
         <section name="annotation" title="annotation options" expanded="true">
-            <param argument="--conservative-taxonomy" type="boolean" truevalue="--conservative-taxonomy" falsevalue="" checked="false" label="More conservative virus taxonomic assignment" help="This might reduce the amount of genomes assigned to the family level, but will decrease the rate of family misassignment" />
+            <param argument="--lenient-taxonomy" type="boolean" truevalue="--lenient-taxonomy" falsevalue="" checked="false" label="Allow classification of virus genomes to taxa below the family rank" help="That is, subfamily, genus, subgenus, and species. The subfamily and subgenus ranks are only shown if --full-ictv-lineage is also used" />
+            <param argument="--full-ictv-lineage" type="boolean" truevalue="--full-ictv-lineage" falsevalue="" checked="false" label="Output the full ICTV lineage of each virus genome" help=" including ranks that are hidden by default (subrealm, subkingdom, subphylum, subclass, suborder, subfamily, and, subgenus). The subfamily and subgenus ranks are only shown if --lenient-taxonomy is also used." />
             <param argument="--sensitivity" type="float" min="0" value="4.2" label="MMseqs2 marker search sensitivity" help="Higher values will annotate more proteins, but the search will be slower and consume more memory" />
             <param argument="--splits" type="integer" min="0" value="0" label="Split the data for the MMseqs2 search." help="Higher values will reduce memory usage, but will make the search slower. If the MMseqs2 search is failing, try to increase the number of splits. Consult you Galaxy admin if more memory may be used" />
         </section>
@@ -119,29 +122,29 @@
         <test>
             <param name="license" value="true"/>
             <param name="INPUT" value="GCF_009025895.1_ASM902589v1_genomic.fna" ftype="fasta"/>
-            <param name="DATABASE" value="1.2"/>
+            <param name="DATABASE" value="1.9"/>
             <section name="annotation">
                 <param name="splits" value="8"/><!-- needed for low mem CI-->
             </section>
             <output name="summary_plasmid_fna">
                 <assert_contents>
-                    <has_line_matching expression="^>.*" n="5"/>
+                    <has_line_matching expression="^>.*" n="6"/>
                 </assert_contents>
             </output>
             <output name="summary_plasmid_genes">
                 <assert_contents>
-                    <has_n_lines n="336"/>
+                    <has_n_lines n="343"/>
                     <has_n_columns n="20"/>
                 </assert_contents>
             </output>
             <output name="summary_plasmid_proteins">
                 <assert_contents>
-                    <has_line_matching expression="^>.*" n="335"/>
+                    <has_line_matching expression="^>.*" n="342"/>
                 </assert_contents>
             </output>
             <output name="summary_plasmid_summary">
                 <assert_contents>
-                    <has_n_lines n="6"/>
+                    <has_n_lines n="7"/>
                     <has_n_columns n="11"/>
                 </assert_contents>
             </output>
--- a/test-data.sh	Mon Oct 07 11:51:02 2024 +0000
+++ b/test-data.sh	Fri Jun 13 20:40:32 2025 +0000
@@ -1,4 +1,4 @@
 #!/bin/bash

 cd test-data/
-wget -O - https://zenodo.org/records/11945948/files/genomad_microdb.tar.gz?download=1 | tar -xz
\ No newline at end of file
+wget -O - https://github.com/user-attachments/files/20716815/genomad_microdb.tar.gz | tar -xz
\ No newline at end of file
--- a/test-data/genomad.loc	Mon Oct 07 11:51:02 2024 +0000
+++ b/test-data/genomad.loc	Fri Jun 13 20:40:32 2025 +0000
@@ -1,1 +1,1 @@
-1.2	version 1.2	${__HERE__}/genomad_microdb/
+1.9	version 1.9	${__HERE__}/genomad_microdb/