comparison genomad_end_to_end.xml @ 2:26f5822743aa draft default tip

planemo upload for repository https://github.com/Helmholtz-UFZ/ufz-galaxy-tools/blob/main/tools/genomad/ commit fea5692fe5258a520e43b739c5aa4d109756123f
author ufz
date Fri, 13 Jun 2025 20:40:32 +0000
parents edb671f0661e
children
comparison
equal deleted inserted replaced
1:edb671f0661e 2:26f5822743aa
1 <tool id="genomad_end_to_end" name="geNomad" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0" license="MIT"> 1 <tool id="genomad_end_to_end" name="geNomad" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0" license="MIT">
2 <description>identify virus and plasmid genomes from nucleotide sequences</description> 2 <description>identify virus and plasmid genomes from nucleotide sequences</description>
3 <macros> 3 <macros>
4 <token name="@TOOL_VERSION@">1.8.0</token> 4 <token name="@TOOL_VERSION@">1.11.1</token>
5 <token name="@VERSION_SUFFIX@">1</token> 5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@MIN_DB_VERSION@">1.2</token> <!-- https://portal.nersc.gov/genomad/__data__/releases.txt --> 6 <token name="@DB_VERSION@">1.9</token> <!-- https://portal.nersc.gov/genomad/__data__/releases.txt -->
7 <xml name="summary_output_macro" tokens="type"> 7 <xml name="summary_output_macro" tokens="type">
8 <data name="summary_@TYPE@_fna" format="fasta" from_work_dir="output/sequence_summary/sequence_@TYPE@.fna" label="${tool.name} on ${on_string}: @TYPE@ fasta"/> 8 <data name="summary_@TYPE@_fna" format="fasta" from_work_dir="output/sequence_summary/sequence_@TYPE@.fna" label="${tool.name} on ${on_string}: @TYPE@ fasta"/>
9 <data name="summary_@TYPE@_genes" format="tabular" from_work_dir="output/sequence_summary/sequence_@TYPE@_genes.tsv" label="${tool.name} on ${on_string}: @TYPE@ genes"> 9 <data name="summary_@TYPE@_genes" format="tabular" from_work_dir="output/sequence_summary/sequence_@TYPE@_genes.tsv" label="${tool.name} on ${on_string}: @TYPE@ genes">
10 <actions> 10 <actions>
11 <action name="column_names" type="metadata" default="gene,start,end,length,strand,gc_content,genetic_code,rbs_motif,marker,evalue,bitscore,uscg,plasmid_hallmark,virus_hallmark,taxid,taxname,annotation_conjscan,annotation_amr,annotation_accessions,annotation_description"/> 11 <action name="column_names" type="metadata" default="gene,start,end,length,strand,gc_content,genetic_code,rbs_motif,marker,evalue,bitscore,uscg,plasmid_hallmark,virus_hallmark,taxid,taxname,annotation_conjscan,annotation_amr,annotation_accessions,annotation_description"/>
45 #end if 45 #end if
46 --threads \${GALAXY_SLOTS:-4} 46 --threads \${GALAXY_SLOTS:-4}
47 $basic.disable_find_proviruses 47 $basic.disable_find_proviruses
48 $basic.disable_nn_classification 48 $basic.disable_nn_classification
49 $basic.enable_score_calibration 49 $basic.enable_score_calibration
50 $annotation.conservative_taxonomy 50 $annotation.lenient_taxonomy
51 $annotation.full_ictv_lineage
51 --sensitivity $annotation.sensitivity 52 --sensitivity $annotation.sensitivity
52 --splits $annotation.splits 53 --splits $annotation.splits
53 $provirus.skip_integrase_identification 54 $provirus.skip_integrase_identification
54 $provirus.skip_trna_identification 55 $provirus.skip_trna_identification
55 --composition $score.composition 56 --composition $score.composition
62 <param name="license" type="boolean" checked="false" label="Agree with geNomad license?" help="geNomad is free to use for internal use, research &amp; development, non-commercial use, purposes only."> 63 <param name="license" type="boolean" checked="false" label="Agree with geNomad license?" help="geNomad is free to use for internal use, research &amp; development, non-commercial use, purposes only.">
63 <validator type="expression" message="You need to agree to the geNomad license in order to use this tool. License is linked in the help below.">value</validator> 64 <validator type="expression" message="You need to agree to the geNomad license in order to use this tool. License is linked in the help below.">value</validator>
64 </param> 65 </param>
65 <param argument="INPUT" type="data" format="fasta" label="Input sequences" help="geNomad will work for isolate genomes, metagenomes, and metatranscriptomes"/> 66 <param argument="INPUT" type="data" format="fasta" label="Input sequences" help="geNomad will work for isolate genomes, metagenomes, and metatranscriptomes"/>
66 <param name="DATABASE" type="select" label="Reference data" help=""> 67 <param name="DATABASE" type="select" label="Reference data" help="">
67 <options from_data_table="genomad"/> 68 <options from_data_table="genomad">
68 <!-- TODO needs to be activated with https://github.com/galaxyproject/galaxy/pull/18411 69 <filter type="static_value" value="@DB_VERSION@" column="0"/>
69 <validator type="in_range" min="@MIN_DB_VERSION@"/> --> 70 </options>
70 </param> 71 </param>
71 <conditional name="filter_cond"> 72 <conditional name="filter_cond">
72 <param name="filtering_preset" type="select" label="Filtering presets" help="After classification, sequences are further filtered to remove possible false positives. The --conservative preset makes those filters even more aggressive, resulting in more restricted sets of plasmid and virus, containing only sequences whose classification is strongly supported. The --relaxed preset disables all post-classification filters."> 73 <param name="filtering_preset" type="select" label="Filtering presets" help="After classification, sequences are further filtered to remove possible false positives. The --conservative preset makes those filters even more aggressive, resulting in more restricted sets of plasmid and virus, containing only sequences whose classification is strongly supported. The --relaxed preset disables all post-classification filters.">
73 <option value="--conservative">Conservative (--conservative)</option> 74 <option value="--conservative">Conservative (--conservative)</option>
74 <option value="--relaxed">Relaxed (--relaxed)</option> 75 <option value="--relaxed">Relaxed (--relaxed)</option>
77 <when value="--conservative"/> 78 <when value="--conservative"/>
78 <when value="--relaxed"/> 79 <when value="--relaxed"/>
79 <when value=""> 80 <when value="">
80 <param argument="--min-score" type="float" min="0" max="1" value="0.7" label="Minimum score to flag a sequence as virus or plasmid" help=""/> 81 <param argument="--min-score" type="float" min="0" max="1" value="0.7" label="Minimum score to flag a sequence as virus or plasmid" help=""/>
81 <param argument="--max-fdr" type="float" min="0" max="1" value="0.1" label="Maximum false discovery rate" help="This option will be ignored if the scores were not calibrated"/> 82 <param argument="--max-fdr" type="float" min="0" max="1" value="0.1" label="Maximum false discovery rate" help="This option will be ignored if the scores were not calibrated"/>
83 <param argument="--min-number-genes" type="integer" min="0" value="1" label="The minimum number" help="The minimum number of genes a sequence must encode to be considered for classification as a plasmid or virus"/>
82 <param argument="--min-plasmid-marker-enrichment" type="float" value="0.1" label="Minimum allowed value for the plasmid marker enrichment score" help="This enrichment score represents the total enrichment of plasmid markers in the sequence. Sequences with multiple plasmid markers will have higher values than the ones that encode few or no markers. This option will be ignored if the annotation module was not executed." /> 84 <param argument="--min-plasmid-marker-enrichment" type="float" value="0.1" label="Minimum allowed value for the plasmid marker enrichment score" help="This enrichment score represents the total enrichment of plasmid markers in the sequence. Sequences with multiple plasmid markers will have higher values than the ones that encode few or no markers. This option will be ignored if the annotation module was not executed." />
83 <param argument="--min-virus-marker-enrichment" type="float" value="0.0" label="Minimum allowed value for the virus marker enrichment score" help="This enrichment score represents the total enrichment of virus markers in the sequence. Sequences with multiple virus markers will have higher values than the ones that encode few or no markers. This option will be ignored if the annotation module was not executed." /> 85 <param argument="--min-virus-marker-enrichment" type="float" value="0.0" label="Minimum allowed value for the virus marker enrichment score" help="This enrichment score represents the total enrichment of virus markers in the sequence. Sequences with multiple virus markers will have higher values than the ones that encode few or no markers. This option will be ignored if the annotation module was not executed." />
84 <param argument="--min-plasmid-hallmarks" type="integer" min="0" value="0" label="minimum number of plasmid hallmarks in the identified plasmids" help="this option will be ignored if the annotation module was not executed." /> 86 <param argument="--min-plasmid-hallmarks" type="integer" min="0" value="0" label="minimum number of plasmid hallmarks in the identified plasmids" help="this option will be ignored if the annotation module was not executed." />
85 <param argument="--min-plasmid-hallmarks-short-seqs" type="integer" min="0" value="1" label="minimum number of plasmid hallmarks in plasmids shorter than 2,500 bp" help="this option will be ignored if the annotation module was not executed." /> 87 <param argument="--min-plasmid-hallmarks-short-seqs" type="integer" min="0" value="1" label="minimum number of plasmid hallmarks in plasmids shorter than 2,500 bp" help="this option will be ignored if the annotation module was not executed." />
86 <param argument="--min-virus-hallmarks" type="integer" min="0" value="0" label="minimum number of virus hallmarks in the identified viruses" help="this option will be ignored if the annotation module was not executed." /> 88 <param argument="--min-virus-hallmarks" type="integer" min="0" value="0" label="minimum number of virus hallmarks in the identified viruses" help="this option will be ignored if the annotation module was not executed." />
88 <param argument="--max-uscg" type="integer" value="4" label="Maximum allowed number of universal single copy genes (USCGs) in a virus or a plasmid." help="Sequences with more than this number of USCGs will not be classified as viruses or plasmids, regardless of their score. This option will be ignored if the annotation module was not executed." /> 90 <param argument="--max-uscg" type="integer" value="4" label="Maximum allowed number of universal single copy genes (USCGs) in a virus or a plasmid." help="Sequences with more than this number of USCGs will not be classified as viruses or plasmids, regardless of their score. This option will be ignored if the annotation module was not executed." />
89 </when> 91 </when>
90 </conditional> 92 </conditional>
91 <section name="basic" title="basic options" expanded="true"> 93 <section name="basic" title="basic options" expanded="true">
92 <param argument="--disable-find-proviruses" type="boolean" truevalue="" falsevalue="--disable-find-proviruses" checked="true" label="Execute the find-proviruses module" help="" /> 94 <param argument="--disable-find-proviruses" type="boolean" truevalue="" falsevalue="--disable-find-proviruses" checked="true" label="Execute the find-proviruses module" help="" />
93 <param argument="--disable-nn-classification" type="boolean" truevalue="" falsevalue="--disable-nn-classification" checked="true" label="Execute the find-proviruses module" help="" /> 95 <param argument="--disable-nn-classification" type="boolean" truevalue="" falsevalue="--disable-nn-classification" checked="true" label="Execute the nn-classification and aggregated-classification modules" help="" />
94 <param argument="--enable-score-calibration" type="boolean" truevalue="--enable-score-calibration" falsevalue="" checked="false" label="Execute the score-calibration module" help="" /> 96 <param argument="--enable-score-calibration" type="boolean" truevalue="--enable-score-calibration" falsevalue="" checked="false" label="Execute the score-calibration module" help="" />
95 </section> 97 </section>
96 <section name="annotation" title="annotation options" expanded="true"> 98 <section name="annotation" title="annotation options" expanded="true">
97 <param argument="--conservative-taxonomy" type="boolean" truevalue="--conservative-taxonomy" falsevalue="" checked="false" label="More conservative virus taxonomic assignment" help="This might reduce the amount of genomes assigned to the family level, but will decrease the rate of family misassignment" /> 99 <param argument="--lenient-taxonomy" type="boolean" truevalue="--lenient-taxonomy" falsevalue="" checked="false" label="Allow classification of virus genomes to taxa below the family rank" help="That is, subfamily, genus, subgenus, and species. The subfamily and subgenus ranks are only shown if --full-ictv-lineage is also used" />
100 <param argument="--full-ictv-lineage" type="boolean" truevalue="--full-ictv-lineage" falsevalue="" checked="false" label="Output the full ICTV lineage of each virus genome" help=" including ranks that are hidden by default (subrealm, subkingdom, subphylum, subclass, suborder, subfamily, and, subgenus). The subfamily and subgenus ranks are only shown if --lenient-taxonomy is also used." />
98 <param argument="--sensitivity" type="float" min="0" value="4.2" label="MMseqs2 marker search sensitivity" help="Higher values will annotate more proteins, but the search will be slower and consume more memory" /> 101 <param argument="--sensitivity" type="float" min="0" value="4.2" label="MMseqs2 marker search sensitivity" help="Higher values will annotate more proteins, but the search will be slower and consume more memory" />
99 <param argument="--splits" type="integer" min="0" value="0" label="Split the data for the MMseqs2 search." help="Higher values will reduce memory usage, but will make the search slower. If the MMseqs2 search is failing, try to increase the number of splits. Consult you Galaxy admin if more memory may be used" /> 102 <param argument="--splits" type="integer" min="0" value="0" label="Split the data for the MMseqs2 search." help="Higher values will reduce memory usage, but will make the search slower. If the MMseqs2 search is failing, try to increase the number of splits. Consult you Galaxy admin if more memory may be used" />
100 </section> 103 </section>
101 <section name="provirus" title="find-proviruses options" expanded="true"> 104 <section name="provirus" title="find-proviruses options" expanded="true">
102 <param argument="--skip-integrase-identification" type="boolean" truevalue="--skip-integrase-identification" falsevalue="" checked="false" label="Disable provirus boundary extension using nearby integrases" /> 105 <param argument="--skip-integrase-identification" type="boolean" truevalue="--skip-integrase-identification" falsevalue="" checked="false" label="Disable provirus boundary extension using nearby integrases" />
117 </outputs> 120 </outputs>
118 <tests> 121 <tests>
119 <test> 122 <test>
120 <param name="license" value="true"/> 123 <param name="license" value="true"/>
121 <param name="INPUT" value="GCF_009025895.1_ASM902589v1_genomic.fna" ftype="fasta"/> 124 <param name="INPUT" value="GCF_009025895.1_ASM902589v1_genomic.fna" ftype="fasta"/>
122 <param name="DATABASE" value="1.2"/> 125 <param name="DATABASE" value="1.9"/>
123 <section name="annotation"> 126 <section name="annotation">
124 <param name="splits" value="8"/><!-- needed for low mem CI--> 127 <param name="splits" value="8"/><!-- needed for low mem CI-->
125 </section> 128 </section>
126 <output name="summary_plasmid_fna"> 129 <output name="summary_plasmid_fna">
127 <assert_contents> 130 <assert_contents>
128 <has_line_matching expression="^>.*" n="5"/> 131 <has_line_matching expression="^>.*" n="6"/>
129 </assert_contents> 132 </assert_contents>
130 </output> 133 </output>
131 <output name="summary_plasmid_genes"> 134 <output name="summary_plasmid_genes">
132 <assert_contents> 135 <assert_contents>
133 <has_n_lines n="336"/> 136 <has_n_lines n="343"/>
134 <has_n_columns n="20"/> 137 <has_n_columns n="20"/>
135 </assert_contents> 138 </assert_contents>
136 </output> 139 </output>
137 <output name="summary_plasmid_proteins"> 140 <output name="summary_plasmid_proteins">
138 <assert_contents> 141 <assert_contents>
139 <has_line_matching expression="^>.*" n="335"/> 142 <has_line_matching expression="^>.*" n="342"/>
140 </assert_contents> 143 </assert_contents>
141 </output> 144 </output>
142 <output name="summary_plasmid_summary"> 145 <output name="summary_plasmid_summary">
143 <assert_contents> 146 <assert_contents>
144 <has_n_lines n="6"/> 147 <has_n_lines n="7"/>
145 <has_n_columns n="11"/> 148 <has_n_columns n="11"/>
146 </assert_contents> 149 </assert_contents>
147 </output> 150 </output>
148 <output name="summary_virus_fna"> 151 <output name="summary_virus_fna">
149 <assert_contents> 152 <assert_contents>