Mercurial > repos > iuc > maker
comparison maker.xml @ 0:16e44ec438c4 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/maker commit 2896dcfd180800d00ea413a59264ef8b11788b8e
author | iuc |
---|---|
date | Thu, 19 Oct 2017 15:58:39 -0400 |
parents | |
children | 73a79dec987b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:16e44ec438c4 |
---|---|
1 <?xml version="1.0"?> | |
2 <tool id="maker" name="Maker" profile="16.04" version="@VERSION@"> | |
3 <description>genome annotation pipeline</description> | |
4 <macros> | |
5 <import>macros.xml</import> | |
6 </macros> | |
7 <expand macro="requirements"/> | |
8 <command><![CDATA[ | |
9 maker -CTL | |
10 | |
11 && | |
12 | |
13 sed "s/cpus=/cpus=\${GALAXY_SLOTS:-4}/g" '$ctl' > maker_opts.ctl | |
14 | |
15 && | |
16 | |
17 #if $abinitio_gene_prediction.aug_prediction.augustus_mode == 'history' | |
18 | |
19 ## Using an augustus model from history, we need to unzip it and let augustus find it | |
20 | |
21 cp -r "\$AUGUSTUS_CONFIG_PATH/" augustus_dir/ && | |
22 | |
23 mkdir -p 'augustus_dir/species/' && | |
24 | |
25 tar -C 'augustus_dir/species/' -xzvf '${abinitio_gene_prediction.aug_prediction.augustus_model}' > /dev/null && | |
26 | |
27 export AUGUSTUS_CONFIG_PATH=`pwd`/augustus_dir/ && | |
28 #end if | |
29 | |
30 maker maker_opts.ctl maker_bopts.ctl maker_exe.ctl | |
31 | |
32 && | |
33 | |
34 gff3_merge -d *.maker.output/*_master_datastore_index.log -o '${output_full}' | |
35 | |
36 && | |
37 | |
38 awk '{if ($2 == "maker" || $1 ~ /^\#/) {print}}' '${output_full}' | sed -n '/^\#\#FASTA\$/q;p' > '${output_gff}' | |
39 | |
40 && | |
41 | |
42 awk '{if ($2 != "maker") {print}}' '${output_full}' | sed -n '/^\#\#FASTA\$/q;p' > '${output_evidences}' | |
43 ]]></command> | |
44 <configfiles> | |
45 <!-- Maker doesn't like indentation in its config file... --> | |
46 <configfile name="ctl"><![CDATA[ | |
47 #-----Genome (these are always required) | |
48 genome=${genome} # genome sequence (fasta file or fasta embeded in GFF3 file) | |
49 organism_type=${organism_type} # eukaryotic or prokaryotic. Default is eukaryotic | |
50 | |
51 #-----Re-annotation Using MAKER Derived GFF3 | |
52 #if $reannotation.reannotate == 'no' | |
53 maker_gff= # MAKER derived GFF3 file | |
54 est_pass=0 # use ESTs in maker_gff: 1 = yes, 0 = no | |
55 altest_pass=0 # use alternate organism ESTs in maker_gff: 1 = yes, 0 = no | |
56 protein_pass=0 # use protein alignments in maker_gff: 1 = yes, 0 = no | |
57 rm_pass=0 # use repeats in maker_gff: 1 = yes, 0 = no | |
58 model_pass=0 # use gene models in maker_gff: 1 = yes, 0 = no | |
59 pred_pass=0 # use ab-initio predictions in maker_gff: 1 = yes, 0 = no | |
60 other_pass=0 # passthrough anything else in maker_gff: 1 = yes, 0 = no | |
61 #else | |
62 maker_gff=${reannotation.maker_gff} # MAKER derived GFF3 file | |
63 est_pass=${reannotation.est_pass} # use ESTs in maker_gff: 1 = yes, 0 = no | |
64 altest_pass=${reannotation.altest_pass} # use alternate organism ESTs in maker_gff: 1 = yes, 0 = no | |
65 protein_pass=${reannotation.protein_pass} # use protein alignments in maker_gff: 1 = yes, 0 = no | |
66 rm_pass=${reannotation.rm_pass} # use repeats in maker_gff: 1 = yes, 0 = no | |
67 model_pass=${reannotation.model_pass} # use gene models in maker_gff: 1 = yes, 0 = no | |
68 pred_pass=${reannotation.pred_pass} # use ab-initio predictions in maker_gff: 1 = yes, 0 = no | |
69 other_pass=${reannotation.other_pass} # passthrough anything else in maker_gff: 1 = yes, 0 = no | |
70 #end if | |
71 | |
72 #-----EST Evidence (for best results provide a file for at least one) | |
73 #if $est_evidences.est | |
74 est=${est_evidences.est} # set of ESTs or assembled mRNA-seq in fasta format | |
75 #else | |
76 est= # set of ESTs or assembled mRNA-seq in fasta format | |
77 #end if | |
78 #if $est_evidences.altest | |
79 altest=${est_evidences.altest} # EST/cDNA sequence file in fasta format from an alternate organism | |
80 #else | |
81 altest= # EST/cDNA sequence file in fasta format from an alternate organism | |
82 #end if | |
83 #if $est_evidences.est_gff | |
84 est_gff=${est_evidences.est_gff} # aligned ESTs or mRNA-seq from an external GFF3 file | |
85 #else | |
86 est_gff= # aligned ESTs or mRNA-seq from an external GFF3 file | |
87 #end if | |
88 #if $est_evidences.altest_gff | |
89 altest_gff=${est_evidences.altest_gff} # aligned ESTs from a closly relate species in GFF3 format | |
90 #else | |
91 altest_gff= # aligned ESTs from a closly relate species in GFF3 format | |
92 #end if | |
93 | |
94 #-----Protein Homology Evidence (for best results provide a file for at least one) | |
95 #if $protein_evidences.protein | |
96 protein=${protein_evidences.protein} # protein sequence file in fasta format (i.e. from mutiple oransisms) | |
97 #else | |
98 protein= # protein sequence file in fasta format (i.e. from mutiple oransisms) | |
99 #end if | |
100 #if $protein_evidences.protein_gff | |
101 protein_gff=${protein_evidences.protein_gff} # aligned protein homology evidence from an external GFF3 file | |
102 #else | |
103 protein_gff= # aligned protein homology evidence from an external GFF3 file | |
104 #end if | |
105 | |
106 #-----Repeat Masking (leave values blank to skip repeat masking) | |
107 #if $repeat_masking.repeatmasker.do_rm == 'simple' | |
108 model_org=simple # select a model organism for RepBase masking in RepeatMasker | |
109 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker | |
110 #else if $repeat_masking.repeatmasker.do_rm == 'lib' | |
111 model_org= # select a model organism for RepBase masking in RepeatMasker | |
112 rmlib=${repeat_masking.repeatmasker.rmlib} # provide an organism specific repeat library in fasta format for RepeatMasker | |
113 #else | |
114 model_org= # select a model organism for RepBase masking in RepeatMasker | |
115 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker | |
116 #end if | |
117 #if $repeat_masking.repeat_protein | |
118 repeat_protein=${repeat_masking.repeat_protein} # provide a fasta file of transposable element proteins for RepeatRunner | |
119 #else | |
120 repeat_protein= # provide a fasta file of transposable element proteins for RepeatRunner | |
121 #end if | |
122 #if $repeat_masking.rm_gff | |
123 rm_gff=${repeat_masking.rm_gff} # pre-identified repeat elements from an external GFF3 file | |
124 #else | |
125 rm_gff= # pre-identified repeat elements from an external GFF3 file | |
126 #end if | |
127 prok_rm=0 # forces MAKER to repeatmask prokaryotes (no reason to change this), 1 = yes, 0 = no | |
128 softmask=${repeat_masking.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering) | |
129 | |
130 #-----Gene Prediction | |
131 #if $abinitio_gene_prediction.snaphmm | |
132 snaphmm=${abinitio_gene_prediction.snaphmm} # SNAP HMM file | |
133 #else | |
134 snaphmm= # SNAP HMM file | |
135 #end if | |
136 gmhmm= # GeneMark HMM file, disabled in galaxy as not free | |
137 #if $abinitio_gene_prediction.aug_prediction.augustus_mode == 'builtin' | |
138 augustus_species=${abinitio_gene_prediction.aug_prediction.augustus_species} # Augustus gene prediction species model | |
139 #else if $abinitio_gene_prediction.aug_prediction.augustus_mode == 'history' | |
140 augustus_species=local # Augustus gene prediction species model | |
141 #else | |
142 augustus_species= # Augustus gene prediction species model | |
143 #end if | |
144 fgenesh_par_file= # FGENESH parameter file disabled in galaxy as not free | |
145 #if $gene_prediction.pred_gff | |
146 pred_gff=${gene_prediction.pred_gff} # ab-initio predictions from an external GFF3 file | |
147 #else | |
148 pred_gff= # ab-initio predictions from an external GFF3 file | |
149 #end if | |
150 #if $gene_prediction.model_gff | |
151 model_gff=${gene_prediction.model_gff} # annotated gene models from an external GFF3 file (annotation pass-through) | |
152 #else | |
153 model_gff= # annotated gene models from an external GFF3 file (annotation pass-through) | |
154 #end if | |
155 est2genome=${est_evidences.est2genome} # infer gene predictions directly from ESTs, 1 = yes, 0 = no | |
156 protein2genome=${protein_evidences.protein2genome} # infer predictions from protein homology, 1 = yes, 0 = no | |
157 trna=${gene_prediction.trna} # find tRNAs with tRNAscan, 1 = yes, 0 = no | |
158 #if $gene_prediction.snoscan_rrna | |
159 snoscan_rrna=${gene_prediction.snoscan_rrna} # rRNA file to have Snoscan find snoRNAs | |
160 #else | |
161 snoscan_rrna= # rRNA file to have Snoscan find snoRNAs | |
162 #end if | |
163 unmask=${abinitio_gene_prediction.unmask} # also run ab-initio prediction programs on unmasked sequence, 1 = yes, 0 = no | |
164 | |
165 #-----Other Annotation Feature Types (features MAKER doesn't recognize) | |
166 #if $advanced.other_gff | |
167 other_gff=${advanced.other_gff} # extra features to pass-through to final MAKER generated GFF3 file | |
168 #else | |
169 other_gff= # extra features to pass-through to final MAKER generated GFF3 file | |
170 #end if | |
171 | |
172 #-----External Application Behavior Options | |
173 alt_peptide=${advanced.alt_peptide} # amino acid used to replace non-standard amino acids in BLAST databases | |
174 cpus= # max number of cpus to use in BLAST and RepeatMasker (not for MPI, leave 1 when using MPI) | |
175 | |
176 #-----MAKER Behavior Options | |
177 max_dna_len=${advanced.max_dna_len} # length for dividing up contigs into chunks (increases/decreases memory usage) | |
178 min_contig=${advanced.min_contig} # skip genome contigs below this length (under 10kb are often useless) | |
179 | |
180 pred_flank=${advanced.pred_flank} # flank for extending evidence clusters sent to gene predictors | |
181 pred_stats=${advanced.pred_stats} # report AED and QI statistics for all predictions as well as models | |
182 AED_threshold=${advanced.AED_threshold} # Maximum Annotation Edit Distance allowed (bound by 0 and 1) | |
183 min_protein=${advanced.min_protein} # require at least this many amino acids in predicted proteins | |
184 alt_splice=${advanced.alt_splice} # Take extra steps to try and find alternative splicing, 1 = yes, 0 = no | |
185 always_complete=${advanced.always_complete} # extra steps to force start and stop codons, 1 = yes, 0 = no | |
186 map_forward=${advanced.map_forward} # map names and attributes forward from old GFF3 genes, 1 = yes, 0 = no | |
187 keep_preds=${advanced.keep_preds} # Concordance threshold to add unsupported gene prediction (bound by 0 and 1) | |
188 | |
189 split_hit=${advanced.split_hit} # length for the splitting of hits (expected max intron size for evidence alignments) | |
190 single_exon=${advanced.single_exon.single_exon} # consider single exon EST evidence when generating annotations, 1 = yes, 0 = no | |
191 #if $advanced.single_exon.single_exon == '1' | |
192 single_length=${advanced.single_length} # min length required for single exon ESTs if 'single_exon is enabled' | |
193 #else | |
194 single_length=250 # min length required for single exon ESTs if 'single_exon is enabled' | |
195 #end if | |
196 correct_est_fusion=${advanced.correct_est_fusion} # limits use of ESTs in annotation to avoid fusion genes | |
197 | |
198 tries=2 # number of times to try a contig if there is a failure for some reason | |
199 clean_try=0 # remove all data from previous run before retrying, 1 = yes, 0 = no | |
200 clean_up=0 # removes theVoid directory with individual analysis files, 1 = yes, 0 = no | |
201 TMP= # specify a directory other than the system default temporary directory for temporary files | |
202 ]]></configfile> | |
203 </configfiles> | |
204 | |
205 <inputs> | |
206 <param name="genome" type="data" format="fasta" label="Genome to annotate"/> | |
207 <param name="organism_type" type="select" label="Organism type"> | |
208 <option value="eukaryotic">Eukaryotic</option> | |
209 <option value="prokaryotic">Prokaryotic</option> | |
210 </param> | |
211 | |
212 <conditional name="reannotation"> | |
213 <param name="reannotate" type="select" label="Re-annotate using an existing Maker annotation"> | |
214 <option value="no" selected="true">No</option> | |
215 <option value="yes">Yes</option> | |
216 </param> | |
217 <when value="no"/> | |
218 <when value="yes"> | |
219 <param name="maker_gff" type="data" format="gff" label="Previous Maker annotation"/> | |
220 <param name="est_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use ESTs"/> | |
221 <param name="altest_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use alternate organism ESTs"/> | |
222 <param name="protein_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use protein alignments"/> | |
223 <param name="rm_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use repeats"/> | |
224 <param name="model_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use gene models"/> | |
225 <param name="pred_pass" type="boolean" truevalue="1" falsevalue="0" label="Re-use ab-initio predictions"/> | |
226 <param name="other_pass" type="boolean" truevalue="1" falsevalue="0" label="Passthrough anything else"/> | |
227 </when> | |
228 </conditional> | |
229 | |
230 <section name="est_evidences" title="EST evidences (for best results provide at least one of these)" expanded="True"> | |
231 <param name="est2genome" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Infer gene predictions directly from all ESTs" help="Maker will blindly trust EST alignments to create gene models. Use this only before training ab-initio predictors."/> | |
232 <param name="est" type="data" format="fasta" label="ESTs or assembled cDNA" optional="True"/> | |
233 <param name="altest" type="data" format="fasta" label="EST/cDNA from an alternate organism" optional="True"/> | |
234 <param name="est_gff" type="data" format="gff" label="Aligned ESTs or cDNA" optional="True"/> | |
235 <param name="altest_gff" type="data" format="gff" label="Aligned EST/cDNA from an alternate organism" optional="True"/> | |
236 </section> | |
237 | |
238 <section name="protein_evidences" title="Protein evidences (for best results provide at least one of these)" expanded="True"> | |
239 <param name="protein2genome" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Infer gene predictions directly from all protein alignments" help="Maker will blindly trust protein alignments to create gene models. Use this only before training ab-initio predictors."/> | |
240 <param name="protein" type="data" format="fasta" label="Protein sequences" help="From mutiple organisms" optional="True"/> | |
241 <param name="protein_gff" type="data" format="gff" label="Aligned proteins" help="From mutiple organisms" optional="True"/> | |
242 </section> | |
243 | |
244 <section name="abinitio_gene_prediction" title="Ab-initio gene prediction" expanded="True"> | |
245 <param name="snaphmm" type="data" format="snaphmm" label="SNAP model" optional="True" help="Leave empty to disable gene prediction by SNAP"/> | |
246 | |
247 <conditional name="aug_prediction"> | |
248 <param name="augustus_mode" type="select" label="Prediction with Augustus"> | |
249 <option value="no" selected="true">Don't use Augustus to predict genes</option> | |
250 <option value="builtin">Run Augustus with a predefined prediction model</option> | |
251 <option value="history">Run Augustus with a custom prediction model</option> | |
252 </param> | |
253 <when value="no"/> | |
254 <when value="history"> | |
255 <param name="augustus_model" type="data" format="augustus" label="Augustus model"/> | |
256 </when> | |
257 <when value="builtin"> | |
258 <param name="augustus_species" type="select" label="Augustus species model"> | |
259 <!-- If you update this list, please also update it in augustus and busco tools (../augustus/augustus.xml and ../busco/busco.xml) --> | |
260 <option value="human">Homo sapiens</option> | |
261 <option value="fly">Drosophila melanogaster</option> | |
262 <option value="arabidopsis">Arabidopsis thaliana</option> | |
263 <option value="brugia ">Brugia malayi</option> | |
264 <option value="aedes">Aedes aegypti</option> | |
265 <option value="tribolium2012">Tribolium castaneum</option> | |
266 <option value="schistosoma">Schistosoma mansoni</option> | |
267 <option value="tetrahymena">Tetrahymena thermophila</option> | |
268 <option value="galdieria">Galdieria sulphuraria</option> | |
269 <option value="maize">Zea mays</option> | |
270 <option value="toxoplasma">Toxoplasma gondii</option> | |
271 <option value="caenorhabditis ">Caenorhabditis elegans</option> | |
272 <option value="aspergillus_fumigatus">Aspergillus fumigatus</option> | |
273 <option value="aspergillus_nidulans ">Aspergillus nidulans</option> | |
274 <option value="aspergillus_oryzae ">Aspergillus oryzae</option> | |
275 <option value="aspergillus_terreus">Aspergillus terreus</option> | |
276 <option value="botrytis_cinerea ">Botrytis cinerea</option> | |
277 <option value="candida_albicans ">Candida albicans</option> | |
278 <option value="candida_guilliermondii ">Candida guilliermondii</option> | |
279 <option value="candida_tropicalis ">Candida tropicalis</option> | |
280 <option value="chaetomium_globosum">Chaetomium globosum</option> | |
281 <option value="coccidioides_immitis">Coccidioides immitis</option> | |
282 <option value="coprinus">Coprinus cinereus</option> | |
283 <option value="coprinus_cinereus">Coprinus cinereus</option> | |
284 <option value="cryptococcus_neoformans_gattii">Cryptococcus neoformans gattii</option> | |
285 <option value="cryptococcus_neoformans_neoformans_B">Cryptococcus neoformans neoformans</option> | |
286 <option value="cryptococcus_neoformans_neoformans_JEC21">Cryptococcus neoformans neoformans</option> | |
287 <option value="cryptococcus">Cryptococcus neoformans</option> | |
288 <option value="debaryomyces_hansenii">Debaryomyces hansenii</option> | |
289 <option value="encephalitozoon_cuniculi_GB">Encephalitozoon cuniculi</option> | |
290 <option value="eremothecium_gossypii">Eremothecium gossypii</option> | |
291 <option value="fusarium_graminearum ">Fusarium graminearum</option> | |
292 <option value="histoplasma_capsulatum ">Histoplasma capsulatum</option> | |
293 <option value="histoplasma">Histoplasma capsulatum</option> | |
294 <option value="kluyveromyces_lactis ">Kluyveromyces lactis</option> | |
295 <option value="laccaria_bicolor ">Laccaria bicolor</option> | |
296 <option value="lamprey">Petromyzon marinus</option> | |
297 <option value="leishmania_tarentolae">Leishmania tarentolae</option> | |
298 <option value="lodderomyces_elongisporus">Lodderomyces elongisporus</option> | |
299 <option value="magnaporthe_grisea ">Magnaporthe grisea</option> | |
300 <option value="neurospora_crassa">Neurospora crassa</option> | |
301 <option value="phanerochaete_chrysosporium">Phanerochaete chrysosporium</option> | |
302 <option value="pichia_stipitis">Pichia stipitis</option> | |
303 <option value="rhizopus_oryzae">Rhizopus oryzae</option> | |
304 <option value="saccharomyces_cerevisiae_S288C">Saccharomyces cerevisiae</option> | |
305 <option value="saccharomyces_cerevisiae_rm11-1a_1">Saccharomyces cerevisiae</option> | |
306 <option value="saccharomyces">Saccharomyces cerevisiae</option> | |
307 <option value="schizosaccharomyces_pombe">Schizosaccharomyces pombe</option> | |
308 <option value="trichinella">Trichinella spiralis</option> | |
309 <option value="ustilago_maydis">Ustilago maydis</option> | |
310 <option value="yarrowia_lipolytica">Yarrowia lipolytica</option> | |
311 <option value="nasonia">Nasonia vitripennis</option> | |
312 <option value="tomato">Solanum lycopersicum</option> | |
313 <option value="chlamydomonas">Chlamydomonas reinhardtii</option> | |
314 <option value="amphimedon">Amphimedon queenslandica</option> | |
315 <option value="pneumocystis">Pneumocystis jirovecii</option> | |
316 <option value="chicken">Gallus gallus domesticus (chicken)</option> | |
317 <option value="cacao">Theobroma cacao (cacao)</option> | |
318 <option value="heliconius_melpomene1">Heliconius melpomene</option> | |
319 <option value="xenoturbella">Xenoturbella</option> | |
320 <option value="E_coli_K12">E coli K12</option> | |
321 <option value="c_elegans_trsk">c elegans trsk</option> | |
322 <option value="camponotus_floridanus">Camponotus floridanus</option> | |
323 <option value="coyote_tobacco">Coyote tobacco</option> | |
324 <option value="s_aureus">Staphylococcus aureus</option> | |
325 <option value="thermoanaerobacter_tengcongensis">Thermoanaerobacter tengcongensis</option> | |
326 <option value="wheat">wheat</option> | |
327 <option value="zebrafish">Danio rerio</option> | |
328 <option value="anidulans">Aspergillus nidulans</option> | |
329 <option value="bombus_impatiens1">Bombus impatiens1</option> | |
330 <option value="bombus_terrestris2">Bombus terrestris2</option> | |
331 <option value="botrytis_cinerea">Botrytis cinerea</option> | |
332 <option value="brugia_malayi">Brugia malayi</option> | |
333 <option value="conidiobolus_coronatus">Conidiobolus coronatus</option> | |
334 <option value="cryptococcus_neoformans">Cryptococcus neoformans</option> | |
335 <option value="culex_pipiens">Culex pipiens</option> | |
336 <option value="elephant_shark">Callorhinchus milii</option> | |
337 <option value="honeybee1">Apis mellifera</option> | |
338 <option value="phanerochaete_chrysosporium">Phanerochaete chrysosporium</option> | |
339 <option value="pea_aphid">Acyrthosiphon pisum</option> | |
340 <option value="rhodnius_prolixus">Rhodnius prolixus</option> | |
341 <option value="ustilago_maydis">Ustilago maydis</option> | |
342 <option value="verticillium_albo_atrum1">Verticillium albo atrum1</option> | |
343 <option value="verticillium_longisporum1">Verticillium longisporum1</option> | |
344 <option value="Xipophorus_maculatus">Xipophorus_maculatus</option> | |
345 <option value="adorsata">adorsata</option> | |
346 <option value="ancylostoma_ceylanicum">ancylostoma_ceylanicum</option> | |
347 <option value="maker2_athal1">maker2_athal1</option> | |
348 <option value="maker2_c_elegans1">maker2_c_elegans1</option> | |
349 <option value="maker2_dmel1">maker2_dmel1</option> | |
350 <option value="maker2_spomb1">maker2_spomb1</option> | |
351 <option value="parasteatoda">parasteatoda</option> | |
352 <option value="rice">rice</option> | |
353 <option value="schistosoma2">schistosoma2</option> | |
354 <option value="sulfolobus_solfataricus">sulfolobus_solfataricus</option> | |
355 </param> | |
356 </when> | |
357 </conditional> | |
358 <param name="unmask" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Run ab-initio prediction programs on unmasked sequence" help="Predictors will look for genes in repeated elements (like transposons). Only useful when you believe that transposons might have been integrated into a real gene structure (rare)."/> | |
359 </section> | |
360 | |
361 <section name="repeat_masking" title="Repeat masking" expanded="True"> | |
362 <conditional name="repeatmasker"> | |
363 <param name="do_rm" type="select" label="Enable repeat masking with RepeatMasker"> | |
364 <option value="no">No</option> | |
365 <option value="simple" selected="true">Yes, run RepeatMasker with default simple models</option> | |
366 <option value="lib">Yes, use an organism specific repeat library (fasta)</option> | |
367 </param> | |
368 <!-- full repbase cannot be redistributed (for licensing reasons, see https://hpc.nih.gov/apps/repbase_license.html), | |
369 using only the default maker base | |
370 model_org is ignored and replaced by 'simple' if the full RepBase is not available. | |
371 model_org=simple means maker will search using the simple models shipped by default | |
372 Installing RepBase requires to replace files in the RepeatMasker installation dir | |
373 --> | |
374 <when value="no"/> | |
375 <when value="simple"/> | |
376 <when value="lib"> | |
377 <param name="rmlib" type="data" format="fasta" label="Organism specific repeat library for RepeatMasker (fasta)"/> | |
378 </when> | |
379 </conditional> | |
380 <param name="repeat_protein" type="data" format="fasta" label="Transposable element protein sequences for RepeatRunner (fasta)" help="Leave empty to skip" optional="True"/> | |
381 <param name="rm_gff" type="data" format="gff" label="Pre-identified repeat elements from an external GFF file" help="Leave empty to skip" optional="True"/> | |
382 <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/> | |
383 </section> | |
384 | |
385 <section name="gene_prediction" title="Other predictions" expanded="True"> | |
386 <param name="pred_gff" type="data" format="gff" label="Predictions from an external GFF3 file" optional="True"/> | |
387 <param name="model_gff" type="data" format="gff" label="Annotated gene models an external GFF3 file" help="annotation pass-through" optional="True"/> | |
388 <param name="trna" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Find tRNAs with tRNAscan"/> | |
389 <param name="snoscan_rrna" type="data" format="fasta" label="rRNA file to have Snoscan find snoRNAs" optional="True"/> | |
390 </section> | |
391 | |
392 <section name="advanced" title="Advanced settings" expanded="False"> | |
393 <param name="other_gff" type="data" format="gff" label="Extra features to pass-through to final Maker generated GFF3 file" optional="True"/> | |
394 <param name="alt_peptide" type="text" value="C" size="1" label="Amino acid used to replace non-standard amino acids in BLAST databases"> | |
395 <validator type="regex" message="This must be a single uppercase letter">^[A-Z]$</validator> | |
396 </param> | |
397 <param name="max_dna_len" type="integer" value="100000" label="Length for dividing up contigs into chunks" help="Increases/decreases memory usage"/> | |
398 <param name="min_contig" type="integer" value="1" label="Skip genome contigs below this length" help="Under 10kb are often useless"/> | |
399 <param name="pred_flank" type="integer" value="200" label="Flank for extending evidence clusters sent to gene predictors"/> | |
400 <param name="pred_stats" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Report AED and QI statistics for all predictions as well as models"/> | |
401 <param name="AED_threshold" type="float" min="0" max="1" value="1" label="Maximum Annotation Edit Distance allowed"/> | |
402 <param name="min_protein" type="integer" value="0" label="Require at least this many amino acids in predicted proteins"/> | |
403 <param name="alt_splice" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Take extra steps to try and find alternative splicing" help="Will try to output gene isoforms when detected instead of a single consensus isoform.Use this if you have good EST evidences allowing to detect isoforms."/> | |
404 <param name="always_complete" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Take extra steps to force the finding of a start and stop codons" help="Only cannonical gene structures will be reported, but it can lead to biologically incorrect seqences."/> | |
405 <param name="map_forward" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Map names and attributes forward from old GFF3 genes"/> | |
406 <param name="keep_preds" type="float" min="0" max="1" value="0" label="Concordance threshold to add unsupported gene prediction"/> | |
407 <param name="split_hit" type="integer" value="10000" label="length for the splitting of hits" help="Expected max intron size for evidence alignments"/> | |
408 <param name="correct_est_fusion" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Limit use of ESTs in annotation to avoid fusion genes"/> | |
409 <conditional name="single_exon"> | |
410 <param name="single_exon" type="select" label="Consider single exon EST evidence when generating annotations"> | |
411 <option value="0" selected="true">No</option> | |
412 <option value="1">Yes</option> | |
413 </param> | |
414 <when value="0"/> | |
415 <when value="1"> | |
416 <param name="single_length" type="integer" value="250" label="min length required for single exon ESTs if"/> | |
417 </when> | |
418 </conditional> | |
419 </section> | |
420 </inputs> | |
421 <outputs> | |
422 <data format="gff3" name="output_gff" label="${tool.name} on ${on_string}: final annotation"/> | |
423 <data format="gff3" name="output_evidences" label="${tool.name} on ${on_string}: evidences"/> | |
424 <data format="gff3" name="output_full" label="${tool.name} on ${on_string}: full gff (evidences + final annotation)"/> | |
425 </outputs> | |
426 <tests> | |
427 <test> | |
428 <param name="genome" value="genome.fasta"/> | |
429 <param name="est_evidences|est" value="est.fasta"/> | |
430 <param name="est_evidences|est2genome" value="1"/> | |
431 <output name="output_gff" file="annot.gff3"/> | |
432 <output name="output_evidences" file="evidences.gff3" compare="sim_size"/> | |
433 </test> | |
434 <test> | |
435 <param name="genome" value="genome.fasta"/> | |
436 <param name="organism_type" value="prokaryotic"/> | |
437 <param name="est_evidences|est" value="est.fasta"/> | |
438 <param name="est_evidences|est2genome" value="1"/> | |
439 <output name="output_gff" file="annot_proc.gff3"/> | |
440 <output name="output_evidences" file="evidences_proc.gff3" compare="sim_size"/> | |
441 </test> | |
442 <test> | |
443 <param name="genome" value="genome.fasta"/> | |
444 <param name="reannotation|reannotate" value="yes"/> | |
445 <param name="reannotation|maker_gff" value="evidences.gff3"/> | |
446 <param name="reannotation|est_pass" value="true"/> | |
447 <param name="est_evidences|est2genome" value="1"/> | |
448 <output name="output_gff" file="annot_reuse.gff3"/> | |
449 <output name="output_evidences" file="evidences_reuse.gff3" compare="sim_size"/> | |
450 </test> | |
451 <test> | |
452 <param name="genome" value="genome.fasta"/> | |
453 <param name="abinitio_gene_prediction|snaphmm" value="snap.hmm"/> | |
454 <param name="abinitio_gene_prediction|aug_prediction|augustus_mode" value="builtin"/> | |
455 <param name="abinitio_gene_prediction|aug_prediction|augustus_species" value="human"/> | |
456 <param name="est_evidences|est" value="est.fasta"/> | |
457 <param name="est_evidences|est2genome" value="1"/> | |
458 <output name="output_gff" file="annot_human.gff3" compare="sim_size"/> | |
459 <output name="output_evidences" file="evidences_human.gff3" compare="sim_size"/> | |
460 </test> | |
461 <test> | |
462 <param name="genome" value="genome.fasta"/> | |
463 <param name="abinitio_gene_prediction|snaphmm" value="snap.hmm"/> | |
464 <param name="abinitio_gene_prediction|aug_prediction|augustus_mode" value="history"/> | |
465 <param name="abinitio_gene_prediction|aug_prediction|augustus_model" value="local.tar.gz" ftype="augustus"/> | |
466 <param name="est_evidences|est" value="est.fasta"/> | |
467 <param name="est_evidences|est2genome" value="1"/> | |
468 <output name="output_gff" file="annot_model.gff3" compare="sim_size"/> | |
469 <output name="output_evidences" file="evidences_model.gff3" compare="sim_size"/> | |
470 </test> | |
471 <test> | |
472 <param name="genome" value="genome.fasta"/> | |
473 <param name="est_evidences|est" value="est.fasta"/> | |
474 <param name="est_evidences|est2genome" value="1"/> | |
475 <param name="repeat_masking|repeatmasker|do_rm" value="no"/> | |
476 <output name="output_gff" file="annot_norm.gff3"/> | |
477 <output name="output_evidences" file="evidences_norm.gff3" compare="sim_size"/> | |
478 </test> | |
479 </tests> | |
480 <help><![CDATA[ | |
481 MAKER is a portable and easily configurable genome annotation pipeline. Its purpose is to allow smaller eukaryotic and prokaryotic genome projects to independently annotate their genomes and to create genome databases. MAKER identifies repeats, aligns ESTs and proteins to a genome, produces ab-initio gene predictions and automatically synthesizes these data into gene annotations having evidence-based quality values. MAKER is also easily trainable: outputs of preliminary runs can be used to automatically retrain its gene prediction algorithm, producing higher quality gene-models on seusequent runs. MAKER's inputs are minimal and its ouputs can be directly loaded into a GMOD database. They can also be viewed in the Apollo genome browser; this feature of MAKER provides an easy means to annotate, view and edit individual contigs and BACs without the overhead of a database. MAKER should prove especially useful for emerging model organism projects with minimal bioinformatics expertise and computer resources. | |
482 | |
483 .. _Maker: http://www.yandell-lab.org/software/maker.html | |
484 ]]></help> | |
485 <expand macro="citations"/> | |
486 </tool> |