comparison mitos2.xml @ 0:dd589aa77943 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mitos commit 791f28c8a7194fdd1ecec05ad166932d461899b2"
author iuc
date Fri, 27 Mar 2020 17:53:42 -0400
parents
children 80323066acd4
comparison
equal deleted inserted replaced
-1:000000000000 0:dd589aa77943
1 <tool id="mitos2" name="@MITOS_NAME@" version="@MITOS_VERSION@">
2 <description>de-novo annotation of metazoan mitochondrial genomes</description>
3 <macros>
4 <import>macros.xml</import>
5 <token name="@MITOS_NAME@">MITOS2</token>
6 <token name="@MITOS_VERSION@">2.0.6</token>
7 </macros>
8 <requirements>
9 <requirement type="package" version="@MITOS_VERSION@">mitos</requirement>
10 <requirement type="package">zip</requirement>
11 </requirements>
12 <version_command>python -c "import mitos; print(mitos.__version__)"</version_command>
13 <command detect_errors="aggressive"><![CDATA[
14 mkdir outdir &&
15
16 runmitos.py
17 --input '$input'
18 --code $code
19 --outdir outdir
20 --refdir '/'
21 --refseqver '$refseqver.fields.path'
22 $linear
23 #for tpe in ["prot", "trna", "rrna", "intron", "oril", "orih"]
24 #if not $tpe in str($advanced.featuretypes).split(',')
25 --$tpe 0
26 #end if
27 #end for
28 --finovl $advanced.finovl
29 $advanced.best
30 #set fragovl=float($advanced.fragovl)/100.0
31 --fragovl $fragovl
32 --fragfac $advanced.fragfac
33
34 --evalue $advanced_prot.evalue
35 #set cutoff=float($advanced_prot.cutoff)/100.0
36 --cutoff $cutoff
37 --clipfac $advanced_prot.clipfac
38 $advanced_prot.ncbicode
39 $advanced_prot.alarab
40 $advanced_prot.oldstst
41 $advanced_ncrna.locandgloc
42 --ncev $advanced_ncrna.ncev
43 $advanced_ncrna.sensitive
44 --maxtrnaovl $advanced_ncrna.maxtrnaovl
45 --maxrrnaovl $advanced_ncrna.maxrrnaovl
46
47 #if not ("protein_plot" in str($addoutputs).split(',') or "ncRNA_plot" in str($addoutputs).split(',')):
48 --noplots
49 #end if
50
51 #if "raw" in str($addoutputs).split(','):
52 && zip -9 -y -r output.zip outdir/ > /dev/null
53 #end if
54 ]]></command>
55 <inputs>
56 <param argument="--input" label="Sequence" type="data" format="fasta" help="a single sequence in fasta formated sequence">
57 <options options_filter_attribute="metadata.sequences">
58 <filter type="add_value" value="1"/>
59 </options>
60 </param>
61 <param argument="--code" label="Genetic code" type="select">
62 <option value="2">Vertebrate (2)</option>
63 <option value="4">Mold, Protozoan, Coelenteral (4)</option>
64 <option value="5">Invertebrate (5)</option>
65 <option value="9">Echinoderm, Flatworm (9)</option>
66 <option value="13">Ascidian (13)</option>
67 <option value="14">Alternative Flatworm (14)</option>
68 </param>
69 <param argument="--refseqver" label="Reference data" type="select" help="contact the administrator of this Galaxy instance if you miss reference data">
70 <options from_data_table="mitos">
71 <filter type="static_value" value="mitos2" column="2"/>
72 </options>
73 <validator message="No reference annotation is available for MITOS2" type="no_options" />
74 </param>
75 <param argument="--linear" checked="false" label="Treat sequence as linear" type="boolean" truevalue="--linear" falsevalue=""/>
76 <param name="addoutputs" type="select" multiple="true" label="Outputs">
77 <option value="bed" selected="true">BED</option>
78 <option value="mito" selected="false">mito</option>
79 <option value="gff" selected="false">GFF file</option>
80 <option value="seq" selected="false">SEQ</option>
81 <option value="fas" selected="false">nucleotide FASTA</option>
82 <option value="faa" selected="false">protein FASTA</option>
83 <option value="geneorder" selected="false">geneorder</option>
84 <option value="protein_plot" selected="false">Protein prediction plot</option>
85 <option value="ncRNA_plot" selected="false">ncRNA prediction plot</option>
86 <!--<option value="ncRNA_structure_ps_plots" selected="false">ncRNA structure plots - postscript</option>-->
87 <option value="ncRNA_structure_svg_plots" selected="false">ncRNA structure plots - svg</option>
88 <option value="raw" selected="false">zipped raw results</option>
89 </param>
90 <section name="advanced" title="Advanced options">
91 <param name="featuretypes" label="Feature types" help="Feature types that should be predicted by MITOS (--noprot,--notrna,--norrna)" type="select" multiple="true">
92 <option value="prot" selected="true">Protein coding genes</option>
93 <option value="trna" selected="true">tRNAs</option>
94 <option value="rrna" selected="true">rRNAs</option>
95 <option value="intron" selected="false">Introns</option>
96 <option value="oril" selected="false">Origin of light strand replication</option>
97 <option value="orih" selected="false">Origin of heavy strand replication</option>
98 </param>
99 <param argument="--finovl" label="Final overlap (nt)" help="Maximum number of nucleotides by which genes of different types may overlap" type="integer" value="50" min="0"/>
100 <param argument="--best" checked="false" label="Annotate only the best copy of each feature" type="boolean" truevalue="--best" falsevalue=""/>
101 <param argument="--fragovl" label="Fragment overlap" help="Maximum allowed overlap of proteins in the query (in percent of the shorter query range) for two hits to be counted as fragments of the same gene" type="integer" value="20" min="0" max="100"/>
102 <param argument="--fragfac" label="Fragment quality factor" help="Maximum factor by which fragments of the same protein may differ in their quality" type="float" min="0" value="10"/>
103 </section>
104 <section name="advanced_prot" title="Advanced options for protein coding gene prediction">
105 <param argument="--evalue" label="BLAST E-value Exponent" help="Negation of the exponent of the E-value threshold used by BLAST, i.e. a value X gives an E-value of 10^(-X)" type="float" value="2" min="1"/>
106 <param argument="--cutoff" label="Quality cutoff" help="Minimum allowed quality in % of the maximum quality value per reading frame" type="integer" value="50" min="0" max="100"/>
107 <param argument="--clipfac" label="Clipping factor" help="Clip overlapping proteins with the same name that differ by less than the specified factor" type="float" value="10" min="0"/>
108 <param argument="--ncbicode" checked="false" label="use start/stop codons as in NCBI (default: learned start/stop codons)" type="boolean" truevalue="--ncbicode" falsevalue=""/>
109 <param argument="--alarab" checked="false" label="Use the hmmer based method of Al Arab et al. 2016. This will consider the evalue, ncbicode, fragovl, fragfac" type="boolean" truevalue="--alarab" falsevalue=""/>
110 <param argument="--oldstst" checked="false" label="Use the old start/stop prediction method of MITOS1" type="boolean" truevalue="--oldstst" falsevalue=""/>
111 </section>
112 <section name="advanced_ncrna" title="Advanced options for ncRNA gene prediction">
113 <param argument="--locandgloc" checked="false" label="Run mitfi in glocal and local mode (default: local only)" type="boolean" truevalue="--locandgloc" falsevalue=""/>
114 <param argument="--ncev" label="e-value to use for inferal fast mode" type="float" min="0" value="0.01"/>
115 <param argument="--sensitive" checked="false" label="Use infernals sensitive mode only" type="boolean" truevalue="--sensitive" falsevalue=""/>
116 <param argument="--maxtrnaovl" label="Allow tRNA overlap of up to X nt for mitfi" type="integer" value="50"/>
117 <param argument="--maxrrnaovl" label="Allow rRNA overlap of up to X nt for mitfi" type="integer" value="50"/>
118 </section>
119 </inputs>
120 <outputs>
121 <data name="bedout" format="bed" from_work_dir="outdir/result.bed">
122 <filter>"bed" in str(addoutputs)</filter>
123 </data>
124 <data name="mitoout" format="tabular" from_work_dir="outdir/result.mitos" label="${tool.name} on ${on_string}: mito">
125 <filter>"mito" in str(addoutputs)</filter>
126 </data>
127 <data name="gffout" format="gff" from_work_dir="outdir/result.gff" label="${tool.name} on ${on_string}: GFF">
128 <filter>"gff" in str(addoutputs)</filter>
129 </data>
130 <data name="seqout" format="txt" from_work_dir="outdir/result.seq" label="${tool.name} on ${on_string}: TBL">
131 <filter>"seq" in str(addoutputs)</filter>
132 </data>
133 <data name="faa" format="fasta" from_work_dir="outdir/result.faa" label="${tool.name} on ${on_string}: aa FASTA">
134 <filter>"faa" in str(addoutputs)</filter>
135 </data>
136 <data name="fas" format="fasta" from_work_dir="outdir/result.fas" label="${tool.name} on ${on_string}: nt FASTA">
137 <filter>"fas" in str(addoutputs)</filter>
138 </data>
139 <data name="geneorderout" format="fasta" from_work_dir="outdir/result.geneorder" label="${tool.name} on ${on_string}: geneorder">
140 <filter>"geneorder" in str(addoutputs)</filter>
141 </data>
142 <data name="protein_plot_out" format="pdf" from_work_dir="outdir/plots/prot.pdf" label="${tool.name} on ${on_string}: Protein prediction plot">
143 <filter>"protein_plot" in str(addoutputs)</filter>
144 </data>
145 <data name="ncRNA_plot_out" format="pdf" from_work_dir="outdir/plots/rna.pdf" label="${tool.name} on ${on_string}: ncRNA prediction plot">
146 <filter>"ncRNA_plot" in str(addoutputs)</filter>
147 </data>
148 <!--<collection name="ncRNA_structure_plot_ps_out" type="list" label="${tool.name} on ${on_string}: ncRNA postscript structure plots">
149 <discover_datasets pattern="(?P&lt;name&gt;.+)\.ps" format="ps" directory="outdir/plots" />
150 <filter>"ncRNA_structure_ps_plots" in str(addoutputs)</filter>
151 </collection>-->
152 <collection name="ncRNA_structure_plot_svg_out" type="list" label="${tool.name} on ${on_string}: ncRNA svg structure plots">
153 <discover_datasets pattern="(?P&lt;name&gt;.+)\.svg" format="svg" directory="outdir/plots" />
154 <filter>"ncRNA_structure_svg_plots" in str(addoutputs)</filter>
155 </collection>
156 <data name="rawout" format="zip" from_work_dir="output.zip" label="${tool.name} on ${on_string}: raw data">
157 <filter>"raw" in str(addoutputs)</filter>
158 </data>
159 </outputs>
160 <tests>
161 <!-- default options -->
162 <test expect_num_outputs="1">
163 <param name="input" value="NC_012920.fasta"/>
164 <param name="code" value="2"/>
165 <param name="refseqver" value="mitos2-refdata" />
166 <output name="bedout" file="mitos2_NC_012920.bed" ftype="bed"/>
167 <assert_command>
168 <has_text text="--code 2"/>
169 <has_text text="--finovl 50"/>
170 <not_has_text text="--trna"/>
171 <not_has_text text="--rrna"/>
172 <not_has_text text="--prot"/>
173 <has_text text="--intron 0"/>
174 <has_text text="--oril 0"/>
175 <has_text text="--orih 0"/>
176 <has_text text="--evalue 2.0"/>
177 <has_text text="--cutoff 0.5"/>
178 <has_text text="--clipfac 10.0"/>
179 <not_has_text text="--best"/>
180 <has_text text="--fragovl 0.2"/>
181 <has_text text="--fragfac 10.0"/>
182 <has_text text="--ncev 0.01"/>
183 <has_text text="--maxtrnaovl 50"/>
184 <has_text text="--maxrrnaovl 50"/>
185 <has_text text="--noplots"/>
186 </assert_command>
187 </test>
188 <!-- different main options -->
189 <test expect_num_outputs="1">
190 <param name="input" value="NC_012920.fasta"/>
191 <param name="code" value="5"/>
192 <param name="refseqver" value="mitos2-refdata" />
193 <output name="bedout" file="mitos2_NC_012920.bed" ftype="bed" compare="sim_size"/>
194 <section name="advanced">
195 <param name="featuretypes" value="prot,trna,rrna,intron,oril,orih"/>
196 <param name="finovl" value="49"/>
197 <param name="best" value="true"/>
198 <param name="fragovl" value="10"/>
199 <param name="fragfac" value="9"/>
200 </section>
201 <assert_command>
202 <has_text text="--code 5"/>
203 <has_text text="--finovl 49"/>
204 <not_has_text text="--trna"/>
205 <not_has_text text="--rrna"/>
206 <not_has_text text="--prot"/>
207 <not_has_text text="--intron"/>
208 <not_has_text text="--oril"/>
209 <not_has_text text="--orih"/>
210 <has_text text="--evalue 2.0"/>
211 <has_text text="--cutoff 0.5"/>
212 <has_text text="--clipfac 10.0"/>
213 <has_text text="--best"/>
214 <has_text text="--fragovl 0.1"/>
215 <has_text text="--fragfac 9.0"/>
216 <has_text text="--ncev 0.01"/>
217 <has_text text="--maxtrnaovl 50"/>
218 <has_text text="--maxrrnaovl 50"/>
219 <has_text text="--noplots"/>
220 </assert_command>
221 </test>
222 <!-- different pcg and ncrn options -->
223 <test expect_num_outputs="11">
224 <param name="input" value="NC_012920.fasta"/>
225 <param name="code" value="2"/>
226 <param name="refseqver" value="mitos2-refdata" />
227 <section name="advanced_prot">
228 <param name="evalue" value="3"/>
229 <param name="cutoff" value="49"/>
230 <param name="clipfac" value="9"/>
231 <param name="ncbicode" value="true"/>
232 <param name="alarab" value="true"/>
233 <param name="oldstst" value="true"/>
234 </section>
235 <section name="advanced_ncrna">
236 <!-- <param name="locandgloc" value="true"/> should be possible from 2.0.5 https://gitlab.com/Bernt/MITOS/-/commit/9b4c55c29961c307dce02ac0319dadbd76f6b9e5-->
237 <param name="ncev" value="0.1"/>
238 <param name="sensitive" value="true"/>
239 <param name="maxtrnaovl" value="51"/>
240 <param name="maxrrnaovl" value="49"/>
241 </section>
242 <param name="addoutputs" value="bed,mito,gff,seq,fas,faa,geneorder,protein_plot,ncRNA_plot,ncRNA_structure_svg_plots,raw"/>
243 <output name="bedout" file="mitos2_NC_012920.bed" ftype="bed" compare="sim_size"/>
244 <output name="mitoout" file="mitos2_NC_012920.mitos" ftype="tabular"/>
245 <output name="gffout" file="mitos2_NC_012920.gff" ftype="gff"/>
246 <output name="seqout" file="mitos2_NC_012920.seq" ftype="txt"/>
247 <output name="faa" file="mitos2_NC_012920.faa" ftype="fasta"/>
248 <output name="fas" file="mitos2_NC_012920.fas" ftype="fasta"/>
249 <output name="geneorderout" file="mitos2_NC_012920.geneorder" ftype="fasta"/>
250 <output name="protein_plot_out" file="mitos2_NC_012920_prot.pdf" ftype="pdf" compare="sim_size"/>
251 <output name="ncRNA_plot_out" file="mitos2_NC_012920_ncrna.pdf" ftype="pdf" compare="sim_size"/>
252 <output name="rawout" ftype="zip">
253 <assert_contents>
254 <has_archive_member path=".*/result.bed"/>
255 </assert_contents>
256 </output>
257 <output_collection name="ncRNA_structure_plot_svg_out" type="list" count="17"/>
258 <assert_command>
259 <has_text text="--code 2"/>
260 <has_text text="--finovl 50"/>
261 <not_has_text text="--trna"/>
262 <not_has_text text="--rrna"/>
263 <not_has_text text="--prot"/>
264 <has_text text="--intron 0"/>
265 <has_text text="--oril 0"/>
266 <has_text text="--orih 0"/>
267 <has_text text="--evalue 3.0"/>
268 <has_text text="--cutoff 0.49"/>
269 <has_text text="--clipfac 9.0"/>
270 <has_text text="--alarab"/>
271 <has_text text="--oldstst"/>
272 <has_text text="--ncbicode"/>
273 <not_has_text text="--best"/>
274 <has_text text="--fragovl 0.2"/>
275 <has_text text="--fragfac 10.0"/>
276 <!--<has_text text="\-\-locandgloc"/>-->
277 <has_text text="--ncev 0.1"/>
278 <has_text text="--sensitive"/>
279 <has_text text="--maxtrnaovl 51"/>
280 <has_text text="--maxrrnaovl 49"/>
281 <not_has_text text="--noplots"/>
282 </assert_command>
283 </test>
284 </tests>
285 <help>@COMMON_HELP@
286 <![CDATA[
287
288
289 **Advanced options**
290
291 - Feature types
292
293 Select the feature types that should be annotated. By default this is protein coding genes, tRNA and rRNA which is useful for metazoan mitogenomes. In addition also the replication origins of the light (OL) and heavy (OH) strand and introns can be annotated. The annotation of the replication origins is most useful for chordate mitogenomes. Introns are usually only found in mitogenomes of non-metazoans and basal Metazoa.
294
295 - Final overlap (nt)
296
297 Maximum number of nucleotides by which genes of different types may overlap. Applies to merging of the final predictions.
298
299 - Annotate only the best copy of each feature
300
301 If there are copies of the same feature type only the one with the lowest e-value (for ncRNAs and OL) or highest quality score (protein coding genes and OH)
302
303 - Fragment overlap
304
305 Maximum fraction (of the shorter feature) allowed that two hits overlap in the query to be counted as fragments.
306
307 - Fragment quality factor
308
309 Maximum factor by which fragments may differ in their quality scores. Higher values allow that parts of a gene can differ more in their quality.
310
311 **Advanced options for protein coding gene prediction**
312
313 - BLAST E-value Exponent
314
315 The statistical significance threshold for considering matches in the BLASTX search. The value entered here is the negation of the exponent of the E-value threshold that should be used by BLAST, i.e. a value X gives an E-value of 10^(-X).
316
317 - Quality cutoff
318
319 Minimum allowed quality value (in percent) of the maximum quality value per reading frame. A higher values correspond to shorter protein prediction and therefore reduced risk for conflicts with other features
320
321 - Clipping factor
322
323 Clipping is started if overlapping prediction of hits with the same name differ by less than a factor X in their quality value.
324
325 - use start/stop codons as in NCBI (default: learned start/stop codons)
326
327 Instead of the codon probabilities derived from the protein coding genes annotated in RefSeq the codons listed at NCBI taxonomy are used with equal probabilities (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)
328
329 - Use the hmmer based method of Al Arab et al. 2016. This will consider the evalue, ncbicode, fragovl, fragfac parameters
330
331 Note: 1) this only works for Metazoa RefSeq release 63 reference data set. 2) This will only predict the protein coding genes that are typical for metazoan mitochondrial genomes.
332
333 - Use the old start/stop prediction method of MITOS1
334
335 The search for start and stop codons just takes the closest to the initial start / stop positions within 6aa (i.e. the method used in MITOS1)
336
337 **Advanced options for ncRNA gene prediction**
338
339 - Run mitfi in glocal and local mode (default: local only)
340
341 By default mitfi uses infernal's cmsearch in local search mode only. By enabling this option mitfi will invoke cmserach also in glocal mode if a feature is missing.
342
343 - e-value to use for inferal fast mode
344
345 The e-value passed to the first pass of cmsearch in the second pass (the sensitive search) an e-value of 0.1 is used.
346
347 - Use infernal's sensitive mode only
348
349 By default mitfi searches for ncRNAs using cmsearch's default fast mode first. If a ncRNA type is missing it is searched using the sensitive mode. This can be useful if low scoring copies are expected which might be missed when searching in the two stage mode.
350
351 - Allow tRNA/rRNA overlap of up to X nt for mitfi
352
353 Allow that a tRNA/rRNA overlaps with another feature by this number of nucleotides.
354
355 ]]></help>
356 <citations>
357 <citation type="doi">10.1093/nar/gkz833</citation>
358 <citation type="doi">10.1016/j.ympev.2016.09.024</citation>
359 </citations>
360 </tool>
361