comparison maker.xml @ 0:16e44ec438c4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/maker commit 2896dcfd180800d00ea413a59264ef8b11788b8e
author iuc
date Thu, 19 Oct 2017 15:58:39 -0400
parents
children 73a79dec987b
comparison
equal deleted inserted replaced
-1:000000000000 0:16e44ec438c4
1 <?xml version="1.0"?>
2 <tool id="maker" name="Maker" profile="16.04" version="@VERSION@">
3 <description>genome annotation pipeline</description>
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7 <expand macro="requirements"/>
8 <command><![CDATA[
9 maker -CTL
10
11 &&
12
13 sed "s/cpus=/cpus=\${GALAXY_SLOTS:-4}/g" '$ctl' > maker_opts.ctl
14
15 &&
16
17 #if $abinitio_gene_prediction.aug_prediction.augustus_mode == 'history'
18
19 ## Using an augustus model from history, we need to unzip it and let augustus find it
20
21 cp -r "\$AUGUSTUS_CONFIG_PATH/" augustus_dir/ &&
22
23 mkdir -p 'augustus_dir/species/' &&
24
25 tar -C 'augustus_dir/species/' -xzvf '${abinitio_gene_prediction.aug_prediction.augustus_model}' > /dev/null &&
26
27 export AUGUSTUS_CONFIG_PATH=`pwd`/augustus_dir/ &&
28 #end if
29
30 maker maker_opts.ctl maker_bopts.ctl maker_exe.ctl
31
32 &&
33
34 gff3_merge -d *.maker.output/*_master_datastore_index.log -o '${output_full}'
35
36 &&
37
38 awk '{if ($2 == "maker" || $1 ~ /^\#/) {print}}' '${output_full}' | sed -n '/^\#\#FASTA\$/q;p' > '${output_gff}'
39
40 &&
41
42 awk '{if ($2 != "maker") {print}}' '${output_full}' | sed -n '/^\#\#FASTA\$/q;p' > '${output_evidences}'
43 ]]></command>
44 <configfiles>
45 <!-- Maker doesn't like indentation in its config file... -->
46 <configfile name="ctl"><![CDATA[
47 #-----Genome (these are always required)
48 genome=${genome} # genome sequence (fasta file or fasta embeded in GFF3 file)
49 organism_type=${organism_type} # eukaryotic or prokaryotic. Default is eukaryotic
50
51 #-----Re-annotation Using MAKER Derived GFF3
52 #if $reannotation.reannotate == 'no'
53 maker_gff= # MAKER derived GFF3 file
54 est_pass=0 # use ESTs in maker_gff: 1 = yes, 0 = no
55 altest_pass=0 # use alternate organism ESTs in maker_gff: 1 = yes, 0 = no
56 protein_pass=0 # use protein alignments in maker_gff: 1 = yes, 0 = no
57 rm_pass=0 # use repeats in maker_gff: 1 = yes, 0 = no
58 model_pass=0 # use gene models in maker_gff: 1 = yes, 0 = no
59 pred_pass=0 # use ab-initio predictions in maker_gff: 1 = yes, 0 = no
60 other_pass=0 # passthrough anything else in maker_gff: 1 = yes, 0 = no
61 #else
62 maker_gff=${reannotation.maker_gff} # MAKER derived GFF3 file
63 est_pass=${reannotation.est_pass} # use ESTs in maker_gff: 1 = yes, 0 = no
64 altest_pass=${reannotation.altest_pass} # use alternate organism ESTs in maker_gff: 1 = yes, 0 = no
65 protein_pass=${reannotation.protein_pass} # use protein alignments in maker_gff: 1 = yes, 0 = no
66 rm_pass=${reannotation.rm_pass} # use repeats in maker_gff: 1 = yes, 0 = no
67 model_pass=${reannotation.model_pass} # use gene models in maker_gff: 1 = yes, 0 = no
68 pred_pass=${reannotation.pred_pass} # use ab-initio predictions in maker_gff: 1 = yes, 0 = no
69 other_pass=${reannotation.other_pass} # passthrough anything else in maker_gff: 1 = yes, 0 = no
70 #end if
71
72 #-----EST Evidence (for best results provide a file for at least one)
73 #if $est_evidences.est
74 est=${est_evidences.est} # set of ESTs or assembled mRNA-seq in fasta format
75 #else
76 est= # set of ESTs or assembled mRNA-seq in fasta format
77 #end if
78 #if $est_evidences.altest
79 altest=${est_evidences.altest} # EST/cDNA sequence file in fasta format from an alternate organism
80 #else
81 altest= # EST/cDNA sequence file in fasta format from an alternate organism
82 #end if
83 #if $est_evidences.est_gff
84 est_gff=${est_evidences.est_gff} # aligned ESTs or mRNA-seq from an external GFF3 file
85 #else
86 est_gff= # aligned ESTs or mRNA-seq from an external GFF3 file
87 #end if
88 #if $est_evidences.altest_gff
89 altest_gff=${est_evidences.altest_gff} # aligned ESTs from a closly relate species in GFF3 format
90 #else
91 altest_gff= # aligned ESTs from a closly relate species in GFF3 format
92 #end if
93
94 #-----Protein Homology Evidence (for best results provide a file for at least one)
95 #if $protein_evidences.protein
96 protein=${protein_evidences.protein} # protein sequence file in fasta format (i.e. from mutiple oransisms)
97 #else
98 protein= # protein sequence file in fasta format (i.e. from mutiple oransisms)
99 #end if
100 #if $protein_evidences.protein_gff
101 protein_gff=${protein_evidences.protein_gff} # aligned protein homology evidence from an external GFF3 file
102 #else
103 protein_gff= # aligned protein homology evidence from an external GFF3 file
104 #end if
105
106 #-----Repeat Masking (leave values blank to skip repeat masking)
107 #if $repeat_masking.repeatmasker.do_rm == 'simple'
108 model_org=simple # select a model organism for RepBase masking in RepeatMasker
109 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker
110 #else if $repeat_masking.repeatmasker.do_rm == 'lib'
111 model_org= # select a model organism for RepBase masking in RepeatMasker
112 rmlib=${repeat_masking.repeatmasker.rmlib} # provide an organism specific repeat library in fasta format for RepeatMasker
113 #else
114 model_org= # select a model organism for RepBase masking in RepeatMasker
115 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker
116 #end if
117 #if $repeat_masking.repeat_protein
118 repeat_protein=${repeat_masking.repeat_protein} # provide a fasta file of transposable element proteins for RepeatRunner
119 #else
120 repeat_protein= # provide a fasta file of transposable element proteins for RepeatRunner
121 #end if
122 #if $repeat_masking.rm_gff
123 rm_gff=${repeat_masking.rm_gff} # pre-identified repeat elements from an external GFF3 file
124 #else
125 rm_gff= # pre-identified repeat elements from an external GFF3 file
126 #end if
127 prok_rm=0 # forces MAKER to repeatmask prokaryotes (no reason to change this), 1 = yes, 0 = no
128 softmask=${repeat_masking.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering)
129
130 #-----Gene Prediction
131 #if $abinitio_gene_prediction.snaphmm
132 snaphmm=${abinitio_gene_prediction.snaphmm} # SNAP HMM file
133 #else
134 snaphmm= # SNAP HMM file
135 #end if
136 gmhmm= # GeneMark HMM file, disabled in galaxy as not free
137 #if $abinitio_gene_prediction.aug_prediction.augustus_mode == 'builtin'
138 augustus_species=${abinitio_gene_prediction.aug_prediction.augustus_species} # Augustus gene prediction species model
139 #else if $abinitio_gene_prediction.aug_prediction.augustus_mode == 'history'
140 augustus_species=local # Augustus gene prediction species model
141 #else
142 augustus_species= # Augustus gene prediction species model
143 #end if
144 fgenesh_par_file= # FGENESH parameter file disabled in galaxy as not free
145 #if $gene_prediction.pred_gff
146 pred_gff=${gene_prediction.pred_gff} # ab-initio predictions from an external GFF3 file
147 #else
148 pred_gff= # ab-initio predictions from an external GFF3 file
149 #end if
150 #if $gene_prediction.model_gff
151 model_gff=${gene_prediction.model_gff} # annotated gene models from an external GFF3 file (annotation pass-through)
152 #else
153 model_gff= # annotated gene models from an external GFF3 file (annotation pass-through)
154 #end if
155 est2genome=${est_evidences.est2genome} # infer gene predictions directly from ESTs, 1 = yes, 0 = no
156 protein2genome=${protein_evidences.protein2genome} # infer predictions from protein homology, 1 = yes, 0 = no
157 trna=${gene_prediction.trna} # find tRNAs with tRNAscan, 1 = yes, 0 = no
158 #if $gene_prediction.snoscan_rrna
159 snoscan_rrna=${gene_prediction.snoscan_rrna} # rRNA file to have Snoscan find snoRNAs
160 #else
161 snoscan_rrna= # rRNA file to have Snoscan find snoRNAs
162 #end if
163 unmask=${abinitio_gene_prediction.unmask} # also run ab-initio prediction programs on unmasked sequence, 1 = yes, 0 = no
164
165 #-----Other Annotation Feature Types (features MAKER doesn't recognize)
166 #if $advanced.other_gff
167 other_gff=${advanced.other_gff} # extra features to pass-through to final MAKER generated GFF3 file
168 #else
169 other_gff= # extra features to pass-through to final MAKER generated GFF3 file
170 #end if
171
172 #-----External Application Behavior Options
173 alt_peptide=${advanced.alt_peptide} # amino acid used to replace non-standard amino acids in BLAST databases
174 cpus= # max number of cpus to use in BLAST and RepeatMasker (not for MPI, leave 1 when using MPI)
175
176 #-----MAKER Behavior Options
177 max_dna_len=${advanced.max_dna_len} # length for dividing up contigs into chunks (increases/decreases memory usage)
178 min_contig=${advanced.min_contig} # skip genome contigs below this length (under 10kb are often useless)
179
180 pred_flank=${advanced.pred_flank} # flank for extending evidence clusters sent to gene predictors
181 pred_stats=${advanced.pred_stats} # report AED and QI statistics for all predictions as well as models
182 AED_threshold=${advanced.AED_threshold} # Maximum Annotation Edit Distance allowed (bound by 0 and 1)
183 min_protein=${advanced.min_protein} # require at least this many amino acids in predicted proteins
184 alt_splice=${advanced.alt_splice} # Take extra steps to try and find alternative splicing, 1 = yes, 0 = no
185 always_complete=${advanced.always_complete} # extra steps to force start and stop codons, 1 = yes, 0 = no
186 map_forward=${advanced.map_forward} # map names and attributes forward from old GFF3 genes, 1 = yes, 0 = no
187 keep_preds=${advanced.keep_preds} # Concordance threshold to add unsupported gene prediction (bound by 0 and 1)
188
189 split_hit=${advanced.split_hit} # length for the splitting of hits (expected max intron size for evidence alignments)
190 single_exon=${advanced.single_exon.single_exon} # consider single exon EST evidence when generating annotations, 1 = yes, 0 = no
191 #if $advanced.single_exon.single_exon == '1'
192 single_length=${advanced.single_length} # min length required for single exon ESTs if 'single_exon is enabled'
193 #else
194 single_length=250 # min length required for single exon ESTs if 'single_exon is enabled'
195 #end if
196 correct_est_fusion=${advanced.correct_est_fusion} # limits use of ESTs in annotation to avoid fusion genes
197
198 tries=2 # number of times to try a contig if there is a failure for some reason
199 clean_try=0 # remove all data from previous run before retrying, 1 = yes, 0 = no
200 clean_up=0 # removes theVoid directory with individual analysis files, 1 = yes, 0 = no
201 TMP= # specify a directory other than the system default temporary directory for temporary files
202 ]]></configfile>
203 </configfiles>
204
205 <inputs>
206 <param name="genome" type="data" format="fasta" label="Genome to annotate"/>
207 <param name="organism_type" type="select" label="Organism type">
208 <option value="eukaryotic">Eukaryotic</option>
209 <option value="prokaryotic">Prokaryotic</option>
210 </param>
211
212 <conditional name="reannotation">
213 <param name="reannotate" type="select" label="Re-annotate using an existing Maker annotation">
214 <option value="no" selected="true">No</option>
215 <option value="yes">Yes</option>
216 </param>
217 <when value="no"/>
218 <when value="yes">
219 <param name="maker_gff" type="data" format="gff" label="Previous Maker annotation"/>
220 <param name="est_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use ESTs"/>
221 <param name="altest_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use alternate organism ESTs"/>
222 <param name="protein_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use protein alignments"/>
223 <param name="rm_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use repeats"/>
224 <param name="model_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use gene models"/>
225 <param name="pred_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use ab-initio predictions"/>
226 <param name="other_pass" type="boolean" truevalue="1" falsevalue="0" label="Passthrough anything else"/>
227 </when>
228 </conditional>
229
230 <section name="est_evidences" title="EST evidences (for best results provide at least one of these)" expanded="True">
231 <param name="est2genome" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Infer gene predictions directly from all ESTs" help="Maker will blindly trust EST alignments to create gene models. Use this only before training ab-initio predictors."/>
232 <param name="est" type="data" format="fasta" label="ESTs or assembled cDNA" optional="True"/>
233 <param name="altest" type="data" format="fasta" label="EST/cDNA from an alternate organism" optional="True"/>
234 <param name="est_gff" type="data" format="gff" label="Aligned ESTs or cDNA" optional="True"/>
235 <param name="altest_gff" type="data" format="gff" label="Aligned EST/cDNA from an alternate organism" optional="True"/>
236 </section>
237
238 <section name="protein_evidences" title="Protein evidences (for best results provide at least one of these)" expanded="True">
239 <param name="protein2genome" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Infer gene predictions directly from all protein alignments" help="Maker will blindly trust protein alignments to create gene models. Use this only before training ab-initio predictors."/>
240 <param name="protein" type="data" format="fasta" label="Protein sequences" help="From mutiple organisms" optional="True"/>
241 <param name="protein_gff" type="data" format="gff" label="Aligned proteins" help="From mutiple organisms" optional="True"/>
242 </section>
243
244 <section name="abinitio_gene_prediction" title="Ab-initio gene prediction" expanded="True">
245 <param name="snaphmm" type="data" format="snaphmm" label="SNAP model" optional="True" help="Leave empty to disable gene prediction by SNAP"/>
246
247 <conditional name="aug_prediction">
248 <param name="augustus_mode" type="select" label="Prediction with Augustus">
249 <option value="no" selected="true">Don't use Augustus to predict genes</option>
250 <option value="builtin">Run Augustus with a predefined prediction model</option>
251 <option value="history">Run Augustus with a custom prediction model</option>
252 </param>
253 <when value="no"/>
254 <when value="history">
255 <param name="augustus_model" type="data" format="augustus" label="Augustus model"/>
256 </when>
257 <when value="builtin">
258 <param name="augustus_species" type="select" label="Augustus species model">
259 <!-- If you update this list, please also update it in augustus and busco tools (../augustus/augustus.xml and ../busco/busco.xml) -->
260 <option value="human">Homo sapiens</option>
261 <option value="fly">Drosophila melanogaster</option>
262 <option value="arabidopsis">Arabidopsis thaliana</option>
263 <option value="brugia ">Brugia malayi</option>
264 <option value="aedes">Aedes aegypti</option>
265 <option value="tribolium2012">Tribolium castaneum</option>
266 <option value="schistosoma">Schistosoma mansoni</option>
267 <option value="tetrahymena">Tetrahymena thermophila</option>
268 <option value="galdieria">Galdieria sulphuraria</option>
269 <option value="maize">Zea mays</option>
270 <option value="toxoplasma">Toxoplasma gondii</option>
271 <option value="caenorhabditis ">Caenorhabditis elegans</option>
272 <option value="aspergillus_fumigatus">Aspergillus fumigatus</option>
273 <option value="aspergillus_nidulans ">Aspergillus nidulans</option>
274 <option value="aspergillus_oryzae ">Aspergillus oryzae</option>
275 <option value="aspergillus_terreus">Aspergillus terreus</option>
276 <option value="botrytis_cinerea ">Botrytis cinerea</option>
277 <option value="candida_albicans ">Candida albicans</option>
278 <option value="candida_guilliermondii ">Candida guilliermondii</option>
279 <option value="candida_tropicalis ">Candida tropicalis</option>
280 <option value="chaetomium_globosum">Chaetomium globosum</option>
281 <option value="coccidioides_immitis">Coccidioides immitis</option>
282 <option value="coprinus">Coprinus cinereus</option>
283 <option value="coprinus_cinereus">Coprinus cinereus</option>
284 <option value="cryptococcus_neoformans_gattii">Cryptococcus neoformans gattii</option>
285 <option value="cryptococcus_neoformans_neoformans_B">Cryptococcus neoformans neoformans</option>
286 <option value="cryptococcus_neoformans_neoformans_JEC21">Cryptococcus neoformans neoformans</option>
287 <option value="cryptococcus">Cryptococcus neoformans</option>
288 <option value="debaryomyces_hansenii">Debaryomyces hansenii</option>
289 <option value="encephalitozoon_cuniculi_GB">Encephalitozoon cuniculi</option>
290 <option value="eremothecium_gossypii">Eremothecium gossypii</option>
291 <option value="fusarium_graminearum ">Fusarium graminearum</option>
292 <option value="histoplasma_capsulatum ">Histoplasma capsulatum</option>
293 <option value="histoplasma">Histoplasma capsulatum</option>
294 <option value="kluyveromyces_lactis ">Kluyveromyces lactis</option>
295 <option value="laccaria_bicolor ">Laccaria bicolor</option>
296 <option value="lamprey">Petromyzon marinus</option>
297 <option value="leishmania_tarentolae">Leishmania tarentolae</option>
298 <option value="lodderomyces_elongisporus">Lodderomyces elongisporus</option>
299 <option value="magnaporthe_grisea ">Magnaporthe grisea</option>
300 <option value="neurospora_crassa">Neurospora crassa</option>
301 <option value="phanerochaete_chrysosporium">Phanerochaete chrysosporium</option>
302 <option value="pichia_stipitis">Pichia stipitis</option>
303 <option value="rhizopus_oryzae">Rhizopus oryzae</option>
304 <option value="saccharomyces_cerevisiae_S288C">Saccharomyces cerevisiae</option>
305 <option value="saccharomyces_cerevisiae_rm11-1a_1">Saccharomyces cerevisiae</option>
306 <option value="saccharomyces">Saccharomyces cerevisiae</option>
307 <option value="schizosaccharomyces_pombe">Schizosaccharomyces pombe</option>
308 <option value="trichinella">Trichinella spiralis</option>
309 <option value="ustilago_maydis">Ustilago maydis</option>
310 <option value="yarrowia_lipolytica">Yarrowia lipolytica</option>
311 <option value="nasonia">Nasonia vitripennis</option>
312 <option value="tomato">Solanum lycopersicum</option>
313 <option value="chlamydomonas">Chlamydomonas reinhardtii</option>
314 <option value="amphimedon">Amphimedon queenslandica</option>
315 <option value="pneumocystis">Pneumocystis jirovecii</option>
316 <option value="chicken">Gallus gallus domesticus (chicken)</option>
317 <option value="cacao">Theobroma cacao (cacao)</option>
318 <option value="heliconius_melpomene1">Heliconius melpomene</option>
319 <option value="xenoturbella">Xenoturbella</option>
320 <option value="E_coli_K12">E coli K12</option>
321 <option value="c_elegans_trsk">c elegans trsk</option>
322 <option value="camponotus_floridanus">Camponotus floridanus</option>
323 <option value="coyote_tobacco">Coyote tobacco</option>
324 <option value="s_aureus">Staphylococcus aureus</option>
325 <option value="thermoanaerobacter_tengcongensis">Thermoanaerobacter tengcongensis</option>
326 <option value="wheat">wheat</option>
327 <option value="zebrafish">Danio rerio</option>
328 <option value="anidulans">Aspergillus nidulans</option>
329 <option value="bombus_impatiens1">Bombus impatiens1</option>
330 <option value="bombus_terrestris2">Bombus terrestris2</option>
331 <option value="botrytis_cinerea">Botrytis cinerea</option>
332 <option value="brugia_malayi">Brugia malayi</option>
333 <option value="conidiobolus_coronatus">Conidiobolus coronatus</option>
334 <option value="cryptococcus_neoformans">Cryptococcus neoformans</option>
335 <option value="culex_pipiens">Culex pipiens</option>
336 <option value="elephant_shark">Callorhinchus milii</option>
337 <option value="honeybee1">Apis mellifera</option>
338 <option value="phanerochaete_chrysosporium">Phanerochaete chrysosporium</option>
339 <option value="pea_aphid">Acyrthosiphon pisum</option>
340 <option value="rhodnius_prolixus">Rhodnius prolixus</option>
341 <option value="ustilago_maydis">Ustilago maydis</option>
342 <option value="verticillium_albo_atrum1">Verticillium albo atrum1</option>
343 <option value="verticillium_longisporum1">Verticillium longisporum1</option>
344 <option value="Xipophorus_maculatus">Xipophorus_maculatus</option>
345 <option value="adorsata">adorsata</option>
346 <option value="ancylostoma_ceylanicum">ancylostoma_ceylanicum</option>
347 <option value="maker2_athal1">maker2_athal1</option>
348 <option value="maker2_c_elegans1">maker2_c_elegans1</option>
349 <option value="maker2_dmel1">maker2_dmel1</option>
350 <option value="maker2_spomb1">maker2_spomb1</option>
351 <option value="parasteatoda">parasteatoda</option>
352 <option value="rice">rice</option>
353 <option value="schistosoma2">schistosoma2</option>
354 <option value="sulfolobus_solfataricus">sulfolobus_solfataricus</option>
355 </param>
356 </when>
357 </conditional>
358 <param name="unmask" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Run ab-initio prediction programs on unmasked sequence" help="Predictors will look for genes in repeated elements (like transposons). Only useful when you believe that transposons might have been integrated into a real gene structure (rare)."/>
359 </section>
360
361 <section name="repeat_masking" title="Repeat masking" expanded="True">
362 <conditional name="repeatmasker">
363 <param name="do_rm" type="select" label="Enable repeat masking with RepeatMasker">
364 <option value="no">No</option>
365 <option value="simple" selected="true">Yes, run RepeatMasker with default simple models</option>
366 <option value="lib">Yes, use an organism specific repeat library (fasta)</option>
367 </param>
368 <!-- full repbase cannot be redistributed (for licensing reasons, see https://hpc.nih.gov/apps/repbase_license.html),
369 using only the default maker base
370 model_org is ignored and replaced by 'simple' if the full RepBase is not available.
371 model_org=simple means maker will search using the simple models shipped by default
372 Installing RepBase requires to replace files in the RepeatMasker installation dir
373 -->
374 <when value="no"/>
375 <when value="simple"/>
376 <when value="lib">
377 <param name="rmlib" type="data" format="fasta" label="Organism specific repeat library for RepeatMasker (fasta)"/>
378 </when>
379 </conditional>
380 <param name="repeat_protein" type="data" format="fasta" label="Transposable element protein sequences for RepeatRunner (fasta)" help="Leave empty to skip" optional="True"/>
381 <param name="rm_gff" type="data" format="gff" label="Pre-identified repeat elements from an external GFF file" help="Leave empty to skip" optional="True"/>
382 <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/>
383 </section>
384
385 <section name="gene_prediction" title="Other predictions" expanded="True">
386 <param name="pred_gff" type="data" format="gff" label="Predictions from an external GFF3 file" optional="True"/>
387 <param name="model_gff" type="data" format="gff" label="Annotated gene models an external GFF3 file" help="annotation pass-through" optional="True"/>
388 <param name="trna" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Find tRNAs with tRNAscan"/>
389 <param name="snoscan_rrna" type="data" format="fasta" label="rRNA file to have Snoscan find snoRNAs" optional="True"/>
390 </section>
391
392 <section name="advanced" title="Advanced settings" expanded="False">
393 <param name="other_gff" type="data" format="gff" label="Extra features to pass-through to final Maker generated GFF3 file" optional="True"/>
394 <param name="alt_peptide" type="text" value="C" size="1" label="Amino acid used to replace non-standard amino acids in BLAST databases">
395 <validator type="regex" message="This must be a single uppercase letter">^[A-Z]$</validator>
396 </param>
397 <param name="max_dna_len" type="integer" value="100000" label="Length for dividing up contigs into chunks" help="Increases/decreases memory usage"/>
398 <param name="min_contig" type="integer" value="1" label="Skip genome contigs below this length" help="Under 10kb are often useless"/>
399 <param name="pred_flank" type="integer" value="200" label="Flank for extending evidence clusters sent to gene predictors"/>
400 <param name="pred_stats" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Report AED and QI statistics for all predictions as well as models"/>
401 <param name="AED_threshold" type="float" min="0" max="1" value="1" label="Maximum Annotation Edit Distance allowed"/>
402 <param name="min_protein" type="integer" value="0" label="Require at least this many amino acids in predicted proteins"/>
403 <param name="alt_splice" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Take extra steps to try and find alternative splicing" help="Will try to output gene isoforms when detected instead of a single consensus isoform.Use this if you have good EST evidences allowing to detect isoforms."/>
404 <param name="always_complete" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Take extra steps to force the finding of a start and stop codons" help="Only cannonical gene structures will be reported, but it can lead to biologically incorrect seqences."/>
405 <param name="map_forward" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Map names and attributes forward from old GFF3 genes"/>
406 <param name="keep_preds" type="float" min="0" max="1" value="0" label="Concordance threshold to add unsupported gene prediction"/>
407 <param name="split_hit" type="integer" value="10000" label="length for the splitting of hits" help="Expected max intron size for evidence alignments"/>
408 <param name="correct_est_fusion" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Limit use of ESTs in annotation to avoid fusion genes"/>
409 <conditional name="single_exon">
410 <param name="single_exon" type="select" label="Consider single exon EST evidence when generating annotations">
411 <option value="0" selected="true">No</option>
412 <option value="1">Yes</option>
413 </param>
414 <when value="0"/>
415 <when value="1">
416 <param name="single_length" type="integer" value="250" label="min length required for single exon ESTs if"/>
417 </when>
418 </conditional>
419 </section>
420 </inputs>
421 <outputs>
422 <data format="gff3" name="output_gff" label="${tool.name} on ${on_string}: final annotation"/>
423 <data format="gff3" name="output_evidences" label="${tool.name} on ${on_string}: evidences"/>
424 <data format="gff3" name="output_full" label="${tool.name} on ${on_string}: full gff (evidences + final annotation)"/>
425 </outputs>
426 <tests>
427 <test>
428 <param name="genome" value="genome.fasta"/>
429 <param name="est_evidences|est" value="est.fasta"/>
430 <param name="est_evidences|est2genome" value="1"/>
431 <output name="output_gff" file="annot.gff3"/>
432 <output name="output_evidences" file="evidences.gff3" compare="sim_size"/>
433 </test>
434 <test>
435 <param name="genome" value="genome.fasta"/>
436 <param name="organism_type" value="prokaryotic"/>
437 <param name="est_evidences|est" value="est.fasta"/>
438 <param name="est_evidences|est2genome" value="1"/>
439 <output name="output_gff" file="annot_proc.gff3"/>
440 <output name="output_evidences" file="evidences_proc.gff3" compare="sim_size"/>
441 </test>
442 <test>
443 <param name="genome" value="genome.fasta"/>
444 <param name="reannotation|reannotate" value="yes"/>
445 <param name="reannotation|maker_gff" value="evidences.gff3"/>
446 <param name="reannotation|est_pass" value="true"/>
447 <param name="est_evidences|est2genome" value="1"/>
448 <output name="output_gff" file="annot_reuse.gff3"/>
449 <output name="output_evidences" file="evidences_reuse.gff3" compare="sim_size"/>
450 </test>
451 <test>
452 <param name="genome" value="genome.fasta"/>
453 <param name="abinitio_gene_prediction|snaphmm" value="snap.hmm"/>
454 <param name="abinitio_gene_prediction|aug_prediction|augustus_mode" value="builtin"/>
455 <param name="abinitio_gene_prediction|aug_prediction|augustus_species" value="human"/>
456 <param name="est_evidences|est" value="est.fasta"/>
457 <param name="est_evidences|est2genome" value="1"/>
458 <output name="output_gff" file="annot_human.gff3" compare="sim_size"/>
459 <output name="output_evidences" file="evidences_human.gff3" compare="sim_size"/>
460 </test>
461 <test>
462 <param name="genome" value="genome.fasta"/>
463 <param name="abinitio_gene_prediction|snaphmm" value="snap.hmm"/>
464 <param name="abinitio_gene_prediction|aug_prediction|augustus_mode" value="history"/>
465 <param name="abinitio_gene_prediction|aug_prediction|augustus_model" value="local.tar.gz" ftype="augustus"/>
466 <param name="est_evidences|est" value="est.fasta"/>
467 <param name="est_evidences|est2genome" value="1"/>
468 <output name="output_gff" file="annot_model.gff3" compare="sim_size"/>
469 <output name="output_evidences" file="evidences_model.gff3" compare="sim_size"/>
470 </test>
471 <test>
472 <param name="genome" value="genome.fasta"/>
473 <param name="est_evidences|est" value="est.fasta"/>
474 <param name="est_evidences|est2genome" value="1"/>
475 <param name="repeat_masking|repeatmasker|do_rm" value="no"/>
476 <output name="output_gff" file="annot_norm.gff3"/>
477 <output name="output_evidences" file="evidences_norm.gff3" compare="sim_size"/>
478 </test>
479 </tests>
480 <help><![CDATA[
481 MAKER is a portable and easily configurable genome annotation pipeline. Its purpose is to allow smaller eukaryotic and prokaryotic genome projects to independently annotate their genomes and to create genome databases. MAKER identifies repeats, aligns ESTs and proteins to a genome, produces ab-initio gene predictions and automatically synthesizes these data into gene annotations having evidence-based quality values. MAKER is also easily trainable: outputs of preliminary runs can be used to automatically retrain its gene prediction algorithm, producing higher quality gene-models on seusequent runs. MAKER's inputs are minimal and its ouputs can be directly loaded into a GMOD database. They can also be viewed in the Apollo genome browser; this feature of MAKER provides an easy means to annotate, view and edit individual contigs and BACs without the overhead of a database. MAKER should prove especially useful for emerging model organism projects with minimal bioinformatics expertise and computer resources.
482
483 .. _Maker: http://www.yandell-lab.org/software/maker.html
484 ]]></help>
485 <expand macro="citations"/>
486 </tool>