comparison antismash.xml @ 4:e78e25d3b4bd draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/antismash commit f5f8e44e726c9f2cc57e0f0fe8182a73afa56669
author bgruening
date Tue, 31 May 2022 14:04:07 +0000
parents 5784e268efca
children bc88856eddab
comparison
equal deleted inserted replaced
3:5784e268efca 4:e78e25d3b4bd
1 <?xml version='1.0' encoding='utf-8'?> 1 <tool id="antismash" name="Antismash" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <tool id="antismash" name="Antismash" version="5.1.2" profile="17.01">
3 <description>allows the genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters</description> 2 <description>allows the genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters</description>
4 <requirements> 3 <macros>
5 <requirement type="package" version="5.1.2">antismash</requirement> 4 <import>macros.xml</import>
6 </requirements> 5 </macros>
6 <expand macro='requirements'/>
7 <expand macro="bio_tools"/>
7 <version_command>antismash --version</version_command> 8 <version_command>antismash --version</version_command>
8 <command detect_errors="aggressive"> 9 <command detect_errors="aggressive">
9 <![CDATA[ 10 <![CDATA[
10 export PYTHONWARNINGS="ignore::FutureWarning" && 11 export PYTHONWARNINGS="ignore::FutureWarning" &&
11 12
16 #else: 17 #else:
17 #set $file_extension = $infile.ext 18 #set $file_extension = $infile.ext
18 #end if 19 #end if
19 20
20 ln -s '$infile' input_tempfile.$file_extension && 21 ln -s '$infile' input_tempfile.$file_extension &&
22 #if $genefinding_gff3
23 ln -s $genefinding_gff3 annotation.gff3 &&
24 #end if
25
21 26
22 ## create html folder 27 ## create html folder
23 mkdir -p '$htmloutputfolder' && 28 mkdir -p '$htmloutputfolder' &&
24 29
25 antismash 30 antismash
26 --cpus "\${GALAXY_SLOTS:-12}" 31 --cpus "\${GALAXY_SLOTS:-12}"
27 --taxon '${cond_taxon.taxon}' 32 --taxon '${cond_taxon.taxon}'
28 33 #if $genefinding_gff3
34 --genefinding-gff3 annotation.gff3
35 #end if
29 --genefinding-tool $cond_taxon.genefinding_tool 36 --genefinding-tool $cond_taxon.genefinding_tool
30 37
31 ${cb_general} 38 ${cb_general}
32 ${cb_subclusters} 39 ${cb_subclusters}
33 ${cb_knownclusters} 40 ${cb_knownclusters}
34 ${smcog_trees} 41 ${smcog_trees}
35 --tta-threshold ${tta_threshold} 42 --tta-threshold ${tta_threshold}
36 ${asf} 43 ${asf}
37 44
38 ${extra_cluster}
39 ${clusterhmmer} 45 ${clusterhmmer}
40 ${fullhmmer} 46 ${fullhmmer}
41 #if $cond_taxon.taxon == 'fungi': 47 #if $cond_taxon.taxon == 'fungi':
42 $cond_taxon.cassis 48 $cond_taxon.cassis
49 #else
50 $cond_taxon.tigrfam
43 #end if 51 #end if
52
53 ${cc_mibig}
54 ${rre}
55 --logfile $log
56
57 ## Advanced options
58 --minlength $advanced_options.minlength
59 --hmmdetection-strictness $advanced_options.hmmdetection_strictness
60 --cb-nclusters $advanced_options.cb_nclusters
61 --cb-min-homology-scale $advanced_options.cb_min_homology_scale
62 --rre-cutoff $advanced_options.rre_cutoff
63 --rre-minlength $advanced_options.rre_minlength
44 64
45 input_tempfile.$file_extension && 65 input_tempfile.$file_extension &&
46 66
47 ## copy all content to html folder 67 ## copy all content to html folder
48 cp input_tempfile/index.html '${html}' 2> /dev/null && 68 cp input_tempfile/index.html '${html}' 2> /dev/null &&
49 cp -r input_tempfile/* '${htmloutputfolder}' 69 cp -r input_tempfile/* '${htmloutputfolder}'
50
51 ]]> 70 ]]>
52 </command> 71 </command>
53 <inputs> 72 <inputs>
54 <param name="infile" type="data" format="genbank,fasta,embl" label="Sequence file in GenBank,EMBL or FASTA format"/> 73 <param name="infile" type="data" format="genbank,fasta,embl" label="Sequence file in GenBank,EMBL or FASTA format"/>
74 <param argument="--genefinding-gff3" type="data" format="gff3" optional="true" label="GFF3 file" help="Specify GFF3 file to extract features from" />
55 75
56 <conditional name="cond_taxon"> 76 <conditional name="cond_taxon">
57 <param argument="--taxon" type="select" label="Origin of DNA"> 77 <param argument="--taxon" type="select" label="Taxonomic classification of input sequence" help="Source of DNA">
58 <option value="bacteria" selected="True">Bacteria</option> 78 <option value="bacteria" selected="True">Bacteria</option>
59 <option value="fungi">Fungi</option> 79 <option value="fungi">Fungi</option>
60 </param> 80 </param>
61 <when value="bacteria"> 81 <when value="bacteria">
62 <param argument="--genefinding-tool" type="select" label="Specify algorithm used for gene finding" 82 <expand macro="genefinding">
63 help="The 'error' option will raise an error if genefinding is attempted. The 'none' option will not run genefinding">
64 <option value="prodigal" selected="True">Prodigal</option> 83 <option value="prodigal" selected="True">Prodigal</option>
65 <option value="prodigal-m">Prodigal Metagenomic/Anonymous</option> 84 <option value="prodigal-m">Prodigal Metagenomic/Anonymous</option>
66 <option value="glimmerhmm">GlimmerHMM</option> 85 </expand>
67 <option value="none">None</option> 86 <param argument="--tigrfam" type="boolean" truevalue="--tigrfam" falsevalue="" checked="false"
68 <option value="error">Error</option> 87 label="Annotate with TIGRFam" help="Annotate clusters using TIGRFam profiles. TIGRFAMs is a
69 </param> 88 collection of manually curated protein families focusing primarily on prokaryotic sequences" />
70 </when> 89 </when>
71 <when value="fungi"> 90 <when value="fungi">
72 <param argument="--genefinding-tool" type="select" label="Specify algorithm used for gene finding" 91 <expand macro="genefinding"/>
73 help="The 'error' option will raise an error if genefinding is attempted. The 'none' option will not run genefinding"> 92 <param argument="--cassis" type="boolean" truevalue="--cassis" falsevalue="" checked="false"
74 <option value="glimmerhmm">GlimmerHMM</option> 93 label="Motif based prediction of SM gene cluster regions" help="Improved prediction of gene cluster borders for fungal BGCs (CASSIS)"/>
75 <option value="none">None</option>
76 <option value="error">Error</option>
77 </param>
78 <param argument="--cassis" type="boolean" truevalue="--cassis" falsevalue="" checked="False"
79 label="Motif based prediction of SM gene cluster regions" />
80 </when> 94 </when>
81 </conditional> 95 </conditional>
82 96 <param argument="--fullhmmer" type="boolean" truevalue="--fullhmmer" falsevalue="" checked="false"
83 97 label="Full genome PFAM anotation" help="Each gene product encoded in the detected BGCs is analyzed against the PFAM database.
84 <param argument="--cb-general" type="boolean" truevalue="--cb-general" falsevalue="" checked="False" 98 Hits are annotated in the final Genbank/EMBL files. Also, selecting this option normally increases the runtime"/>
85 label="BLAST identified clusters against known clusters" 99
86 help="Compare identified clusters against a database of antiSMASH-predicted clusters." /> 100 <param argument="--clusterhmmer" type="boolean" truevalue="--clusterhmmer" falsevalue="" checked="false"
87 <param argument="--cb-subclusters" type="boolean" truevalue="--cb-subclusters" falsevalue="" checked="True" 101 label="PFAM anotation for only clusters" help="Run a cluster-limited HMMer analysis" />
88 label="Subcluster BLAST analysis" 102
89 help="Compare identified clusters against known subclusters responsible for synthesising precursors." /> 103 <param argument="--asf" type="boolean" truevalue="--asf" falsevalue="" checked="True"
90 <param argument="--cb-knownclusters" type="boolean" truevalue="--cb-knownclusters" falsevalue="" checked="True" 104 label="Run active site finder analysis" help="Active sites of several highly conserved biosynthetic enzymes are detected and variations of the active sites are reported"/>
91 label="KnowCluster BLAST analysis" 105
92 help="Compare identified clusters against known gene clusters from the MIBiG database."/> 106 <param argument="--cc-mibig" type="boolean" truevalue="--cc-mibig" falsevalue="" checked="false" label="Comparison against MIBiG database" help="Run a comparison against the MIBiG database" />
107
108 <param argument="--cb-general" type="boolean" truevalue="--cb-general" falsevalue="" checked="false"
109 label="BLAST identified clusters against known clusters"
110 help="Compare identified clusters against a database of antiSMASH-predicted clusters." />
111
112 <param argument="--cb-knownclusters" type="boolean" truevalue="--cb-knownclusters" falsevalue="" checked="true"
113 label="KnowCluster BLAST analysis"
114 help="Compare identified clusters against known gene clusters from the MIBiG database. MIBiG is a hand curated data collection of biosynthetic
115 gene clusters, which have been experimentally characterized"/>
116
117 <param argument="--cb-subclusters" type="boolean" truevalue="--cb-subclusters" falsevalue="" checked="true"
118 label="Subcluster BLAST analysis"
119 help="The identified clusters are searched against a database containing operons involved in the biosynthesis of common secondary metabolite building
120 blocks (e.g. the biosynthesis of non-proteinogenic amino acids)" />
121
122 <param argument="--pfam2go" type="boolean" truevalue="--pfam2go" falsevalue="" checked="true"
123 label="Run Pfam to Gene Ontology mapping module" />
124
125 <param argument="--rre" type="boolean" truevalue="--rre" falsevalue="" checked="true" label="RREFinder precision mode" help="Run RREFinder precision mode on all RiPP gene clusters. Many ribosomally
126 synthesized and posttranslationally modified peptide classes (RiPPs) are reliant on a domain called the RiPP recognition element (RRE). The RRE binds specifically to a precursor peptide and directs
127 the posttranslational modification enzymes to their substrates" />
128
93 <param argument="--smcog-trees" type="boolean" checked="True" truevalue="--smcog-trees" falsevalue="" 129 <param argument="--smcog-trees" type="boolean" checked="True" truevalue="--smcog-trees" falsevalue=""
94 label="Analysis of secondary metabolism gene families (smCOGs)" 130 label="Analysis of secondary metabolism gene families (smCOGs)"
95 help="Look for sec. met. clusters of orthologous groups."/> 131 help="It attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene family using profile hidden Markov models specific for
96 <param argument="--asf" type="boolean" truevalue="--asf" falsevalue="" checked="True" 132 the conserved sequence region characteristic of this family. In other words, each gene of the cluster is compared to a database of clusters of orthologous groups
97 label="Run active site finder analysus" /> 133 of proteins involved in secondary metabolism"/>
98 <param argument="-pfam2go" type="boolean" truevalue="-pfam2go" falsevalue="" checked="True" 134
99 label="Run Pfam to Gene Ontology mapping module" /> 135 <param argument="--tta-threshold" type="float" value="0.65" label="Lowest GC content to annotate TTA codons at"
100 <param argument="--tta-threshold" type="float" value="0.65" label="Lowest GC content to annotate TTA codons at" /> 136 help="High-GC containing bacterial sequences contain the rare Leu-codon “TTA” as a mean for post-transcriptional regulation by limiting/controlling the amount of TTA-tNRA in the cell.
101 137 This type of regulation is commonly found in secondary metabolite BGCs. This feature will annotate such TTA codons in the identified BGCs. Default: 0.65"/>
102 <param argument="--clusterhmmer" type="boolean" truevalue="--clusterhmmer" falsevalue="" checked="False" 138 <section name="advanced_options" title="Advanced options">
103 label="Run a cluster-limited HMMer analysis" /> 139 <param argument="--minlength" type="integer" min="0" value="1000" label="Min length" help="Only process sequences larger than this value. Default: 1000" />
104 <param argument="--fullhmmer" type="boolean" truevalue="--fullhmmer" falsevalue="" checked="False" 140 <param argument="--hmmdetection-strictness" type="select" label="HMM detection strictness" help="Defines which level of strictness to use for HMM-based cluster detection. Default: relaxed">
105 label="Run a whole-genome HMMer analysis" /> 141 <option value="strict">Strict</option>
106 142 <option value="relaxed" selected="true">Relaxed</option>
107 <param name="extra_cluster" type="select" label="Clusters"> 143 <option value="loose">Loose</option>
108 <option value="--cf-create-clusters" selected="True">Find extra clusters</option> 144 </param>
109 <option value="--cf-borders-only">Only annotate borders of existing clusters</option> 145 <param argument="--cb-nclusters" type="integer" min="0" max="50" value="10" label="Number of clusters from ClusterBlast to display" help="Default: 10" />
110 </param> 146 <param argument="--cb-min-homology-scale" type="float" min="0" max="1" value="0" label="ClusterBlast minimum scaling factor" help="A minimum scaling factor
147 for the query BGC in ClusterBlast results. Default: 0" />
148 <param argument="--rre-cutoff" type="float" min="0" max="100" value="25" label="RRE cutoff" help="Bitscore cutoff for RRE pHMM detection. Default: 25.0" />
149 <param argument="--rre-minlength" type="integer" min="0" max="100" value="50" label="RRE minlength" help="Minimum amino acid length of RRE domains. Default: 50" />
150 </section>
111 151
112 <param name="outputs" type="select" multiple="true" label="Outputs"> 152 <param name="outputs" type="select" multiple="true" label="Outputs">
113 <option value="html" selected="True">HTML file</option> 153 <option value="html" selected="True">HTML file</option>
114 <option value="all">All results</option> 154 <option value="all">All results</option>
115 <option value="embl">EMBL files</option> 155 <option value="embl">EMBL files</option>
116 <option value="gb">GenBank files</option> 156 <option value="gb">GenBank files</option>
117 <option value="genecluster_tabular">Gene clusters</option> 157 <option value="genecluster_tabular">Gene clusters</option>
158 <option value="log">Log file</option>
118 </param> 159 </param>
119 160
120 </inputs> 161 </inputs>
121 <outputs> 162 <outputs>
122 <collection type="list" name="genecluster_tabular" label="${tool.name} on ${on_string} (Gene Cluster)"> 163 <collection type="list" name="genecluster_tabular" label="${tool.name} on ${on_string}: Gene Cluster">
123 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.txt" directory="input_tempfile" ext="txt" visible="false" /> 164 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.txt" directory="input_tempfile" ext="txt" visible="false" />
124 <filter>'genecluster_tabular' in outputs</filter> 165 <filter>'genecluster_tabular' in outputs</filter>
125 </collection> 166 </collection>
126 <collection name="genbank" type="list" label="${tool.name} on ${on_string} (GenBank)"> 167 <collection name="genbank" type="list" label="${tool.name} on ${on_string}: GenBank">
127 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gbk" directory="input_tempfile" ext="genbank" visible="false" /> 168 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gbk" directory="input_tempfile" ext="genbank" visible="false" />
128 <filter>'gb' in outputs</filter> 169 <filter>'gb' in outputs or fullhmmer</filter>
129 </collection> 170 </collection>
130 <collection name="embl" type="list" label="${tool.name} on ${on_string} (EMBL)"> 171 <collection name="embl" type="list" label="${tool.name} on ${on_string}: EMBL">
131 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gbk" directory="input_tempfile" ext="embl" visible="false" /> 172 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gbk" directory="input_tempfile" ext="embl" visible="false" />
132 <filter>'embl' in outputs</filter> 173 <filter>'embl' in outputs</filter>
133 </collection> 174 </collection>
134 <collection name="archive" type="list" label="${tool.name} on ${on_string} (all files compressed)"> 175 <collection name="archive" type="list" label="${tool.name} on ${on_string}: all files compressed">
135 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.zip" directory="input_tempfile" ext="zip" visible="false" /> 176 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.zip" directory="input_tempfile" ext="zip" visible="false" />
136 <filter>'all' in outputs</filter> 177 <filter>'all' in outputs</filter>
137 </collection> 178 </collection>
138 <data format="html" name="html" label="${tool.name} on ${on_string} (html report)" /> 179 <data format="html" name="html" label="${tool.name} on ${on_string}: HTML report" />
180 <data format="txt" name="log" label="${tool.name} on ${on_string}: log file">
181 <filter>'log' in outputs</filter>
182 </data>
139 </outputs> 183 </outputs>
140 <tests> 184 <tests>
141 <test> 185 <test expect_num_outputs="1">
142 <param name="infile" value="sequence.fasta"/> 186 <param name="infile" value="sequence.fasta"/>
143 <output name="html" file="index.html"/> 187 <output name="html" file="index.html"/>
144 </test> 188 </test>
145 <test> 189 <test expect_num_outputs="2">
146 <param name="infile" value="sequence.gb"/> 190 <param name="infile" value="sequence.gb"/>
147 <param name="outputs" value="html,gb"/> 191 <param name="outputs" value="html,gb"/>
148 <param name="taxon" value="fungi"/> 192 <param name="taxon" value="fungi"/>
149 <param name="clusterhmmer" value="True"/> 193 <param name="clusterhmmer" value="true"/>
150 <param name="fullhmmer" value="True"/> 194 <param name="fullhmmer" value="true"/>
151 <param name="extra_cluster" value="--cf-create-clusters"/> 195 <param name="cassis" value="true"/>
152 <param name="cassis" value="True"/> 196 <param name="cb_general" value="true"/>
153 <param name="cb_general" value="True"/>
154 <output_collection name="genbank" type="list"> 197 <output_collection name="genbank" type="list">
155 <element name="ARBH01000003.1.cluster001" file="ARBH01000003.1.cluster001" ftype="genbank" /> 198 <element name="input_tempfile" file="test_02.genbank" ftype="genbank" lines_diff="2"/>
156 <element name="ARBH01000003.1.final" file="ARBH01000003.1.final" ftype="genbank"/>
157 </output_collection> 199 </output_collection>
158 <output name="html" file="index.2.html"/> 200 <output name="html" file="index.2.html" ftype="html">
201 <assert_contents>
202 <has_text text="No results found on input"/>
203 </assert_contents>
204 </output>
159 </test> 205 </test>
206
207 <test expect_num_outputs="3">
208 <param name="infile" value="sequence_long.fasta"/>
209 <param name="genefinding_gff3" value="annotation.gff3"/>
210 <param name="fullhmmer" value="true"/>
211 <param name="cc_mibig" value="true"/>
212 <param name="pfam2go" value="true"/>
213 <param name="rre" value="true"/>
214 <param name="outputs" value="html,gb,log"/>
215 <section name="advanced_options">
216 <param name="minlength" value="1000"/>
217 <param name="hmmdetection_strictness" value="strict"/>
218 <param name="cb_nclusters" value="10"/>
219 <param name="cb_min_homology_scale" value="0.1"/>
220 <param name="rre_cutoff" value="10"/>
221 <param name="rre_minlength" value="50"/>
222 </section>
223 <output_collection name="genbank" type="list">
224 <element name="input_tempfile" file="test_03.genbank" ftype="genbank" lines_diff="2"/>
225 </output_collection>
226 <output name="html" file="index.3.html" ftype="html">
227 <assert_contents>
228 <has_text text="No results found on input"/>
229 </assert_contents>
230 </output>
231 <output name="log">
232 <assert_contents>
233 <has_text text="antiSMASH status: SUCCESS"/>
234 <has_text text="HMM detection using strictness: strict"/>
235 </assert_contents>
236 </output>
237 </test>
238
160 </tests> 239 </tests>
161 <help> 240 <help>
162 <![CDATA[ 241 <![CDATA[
163 242
164 **What it does** 243 **What it does**
193 The downward-pointing arrow will open a menu offering to download the complete set of results from the antiSMASH run, a summary Excel file and to the summary EMBL/GenBank output file. 272 The downward-pointing arrow will open a menu offering to download the complete set of results from the antiSMASH run, a summary Excel file and to the summary EMBL/GenBank output file.
194 The EMBL/GenBank file can be viewed in a genome browser such as Artemis. 273 The EMBL/GenBank file can be viewed in a genome browser such as Artemis.
195 274
196 ]]> 275 ]]>
197 </help> 276 </help>
198 <citations> 277 <expand macro="citations" />
199 <citation type="doi">10.1093/nar/gkv437</citation>
200 </citations>
201 </tool> 278 </tool>