comparison maker.xml @ 5:5201ec38c01f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/maker commit 5bef07276e14b38cca31ef724d0b4d2f55809715"
author iuc
date Mon, 28 Dec 2020 23:19:04 +0000
parents 5e96efe6e6c6
children d46d803ca6cc
comparison
equal deleted inserted replaced
4:5e96efe6e6c6 5:5201ec38c01f
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 <tool id="maker" name="Maker" profile="16.04" version="@VERSION@+galaxy1"> 2 <tool id="maker" name="Maker" profile="16.04" version="@VERSION@">
3 <description>genome annotation pipeline</description> 3 <description>genome annotation pipeline</description>
4 <macros> 4 <macros>
5 <import>macros.xml</import> 5 <import>macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <command><![CDATA[ 8 <command><![CDATA[
9 RM_LIB_PATH=\$(dirname \$(which RepeatMasker))/../share/RepeatMasker/Libraries && 9 RM_PATH=\$(which RepeatMasker) &&
10 mkdir lib && 10 if [ -z "\$RM_PATH" ] ; then echo "Failed to find RepeatMasker in PATH (\$PATH)" >&2 ; exit 1 ; fi &&
11 export REPEATMASKER_LIB_DIR=\$(pwd)/lib && 11
12 for file in \$(ls \$RM_LIB_PATH) ; do ln -s \$RM_LIB_PATH/\$file lib/\$file ; done && 12 LIBDIR=\$(dirname "\$RM_PATH")/../share/RepeatMasker/Libraries &&
13 #if $repeat_masking.repeat_source.source_type == "repbase": 13 #if $repeat_masking.repeat_source.source_type == "dfam_up":
14 cp '${repeat_masking.repeat_source.repbase_file}' 'lib/${repeat_masking.repeat_source.repbase_file_name}' && 14 mkdir lib/ &&
15 ln -s '${repeat_masking.repeat_source.dfam_lib}' lib/RepeatMaskerLib.h5 &&
16 LIBDIR=\$(pwd)/lib &&
15 #end if 17 #end if
18
19 export LIBDIR &&
16 20
17 maker -CTL 21 maker -CTL
18 22
19 && 23 &&
20 24
33 tar -C 'augustus_dir/species/' -xzvf '${abinitio_gene_prediction.aug_prediction.augustus_model}' > /dev/null && 37 tar -C 'augustus_dir/species/' -xzvf '${abinitio_gene_prediction.aug_prediction.augustus_model}' > /dev/null &&
34 38
35 export AUGUSTUS_CONFIG_PATH=`pwd`/augustus_dir/ && 39 export AUGUSTUS_CONFIG_PATH=`pwd`/augustus_dir/ &&
36 #end if 40 #end if
37 41
38 mpiexec -n \${GALAXY_SLOTS:-4} maker --ignore_nfs_tmp maker_opts.ctl maker_bopts.ctl maker_exe.ctl < /dev/null 42 MPI_CMD="mpiexec -n \${GALAXY_SLOTS:-4}" &&
43 if [ "\$MAKER_NO_MPI" != "1" ]; then
44 MPI_CMD="";
45 fi &&
46
47 \${MPI_CMD} maker --ignore_nfs_tmp maker_opts.ctl maker_bopts.ctl maker_exe.ctl < /dev/null
39 48
40 && 49 &&
41 50
42 gff3_merge -d *.maker.output/*_master_datastore_index.log -o '${output_full}' 51 gff3_merge -d *.maker.output/*_master_datastore_index.log -o '${output_full}'
43 52
110 #else 119 #else
111 protein_gff= # aligned protein homology evidence from an external GFF3 file 120 protein_gff= # aligned protein homology evidence from an external GFF3 file
112 #end if 121 #end if
113 122
114 #-----Repeat Masking (leave values blank to skip repeat masking) 123 #-----Repeat Masking (leave values blank to skip repeat masking)
115 #if $repeat_masking.repeat_source.source_type == 'repbase' 124 #if $repeat_masking.repeat_source.source_type == 'dfam'
116 125
117 #if $repeat_masking.repeat_source.species_source.species_from_list == 'yes' 126 #if $repeat_masking.repeat_source.species_source.species_from_list == 'yes'
118 model_org=${repeat_masking.repeat_source.species_source.species_list} 127 model_org=${repeat_masking.repeat_source.species_source.species_list}
119 #else 128 #else
120 model_org=${repeat_masking.repeat_source.species_source.species_name} 129 model_org=${repeat_masking.repeat_source.species_source.species_name}
121 #end if 130 #end if
122 131
132 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker
133 softmask=${repeat_masking.repeat_source.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering)
134
135 #else if $repeat_masking.repeat_source.source_type == 'dfam_up'
136 model_org=${repeat_masking.repeat_source.species_name}
123 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker 137 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker
124 softmask=${repeat_masking.repeat_source.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering) 138 softmask=${repeat_masking.repeat_source.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering)
125 139
126 #else if $repeat_masking.repeat_source.source_type == 'library' 140 #else if $repeat_masking.repeat_source.source_type == 'library'
127 model_org= # select a model organism for RepBase masking in RepeatMasker 141 model_org= # select a model organism for RepBase masking in RepeatMasker
383 <param name="unmask" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Run ab-initio prediction programs on unmasked sequence" help="Predictors will look for genes in repeated elements (like transposons). Only useful when you believe that transposons might have been integrated into a real gene structure (rare)."/> 397 <param name="unmask" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Run ab-initio prediction programs on unmasked sequence" help="Predictors will look for genes in repeated elements (like transposons). Only useful when you believe that transposons might have been integrated into a real gene structure (rare)."/>
384 </section> 398 </section>
385 399
386 <section name="repeat_masking" title="Repeat masking" expanded="True"> 400 <section name="repeat_masking" title="Repeat masking" expanded="True">
387 <conditional name="repeat_source"> 401 <conditional name="repeat_source">
388 <param label="Repeat library source" name="source_type" type="select"> 402 <param label="Repeat library source" name="source_type" type="select" help="To use RepBase, choose 'Custom library of repeats' and select a fasta version of this non-free database.">
389 <option selected="true" value="repbase">RepBase</option> 403 <option selected="true" value="dfam">DFam (curated only, bundled with RepeatMasker)</option>
404 <option value="dfam_up">DFam (full/specific version)</option>
390 <option value="library">Custom library of repeats</option> 405 <option value="library">Custom library of repeats</option>
391 <option value="no">Disable repeat masking (not recommended)</option> 406 <option value="no">Disable repeat masking (not recommended)</option>
392 </param> 407 </param>
393 <when value="repbase"> 408 <when value="dfam">
394 <param name="repbase_file" type="data" format="embl" label="RepBase (RMRBSeqs.embl) file" />
395 <param name="repbase_file_name" type="hidden" value="RMRBSeqs.embl"/> <!-- This is an ugly hack to allow testing with a fake repbase -->
396 <conditional name="species_source"> 409 <conditional name="species_source">
397 <param label="Select species name from a list?" name="species_from_list" type="select"> 410 <param label="Select species name from a list?" name="species_from_list" type="select">
398 <option value="yes" selected="true">Yes</option> 411 <option value="yes" selected="true">Yes</option>
399 <option value="no">No</option> 412 <option value="no">No</option>
400 </param> 413 </param>
401 <when value="yes"> 414 <when value="yes">
402 <param name="species_list" type="select" label="Species"> 415 <param name="species_list" type="select" label="Species">
403 <option value="anopheles" selected="true">anopheles</option> 416 <option value="vertebrate">Vertebrate (other than below)</option>
404 <option value="arabidopsis">arabidopsis</option> 417 <option value="mammal">Mammal (other than below)</option>
405 <option value="artiodactyl">artiodactyl</option> 418 <option value="human" selected="true">Human</option>
406 <option value="aspergillus">aspergillus</option> 419 <option value="rodent">Rodent</option>
407 <option value="carnivore">carnivore</option> 420 <option value="mouse">Mouse</option>
408 <option value="cat">cat</option> 421 <option value="rat">Rat</option>
409 <option value="chicken">chicken</option> 422 <option value="danio">Danio (zebra fish)</option>
410 <option value="ciona intestinalis">ciona intestinalis</option> 423 <option value="drosophila">Fruit fly (Drosophila melanogaster)</option>
411 <option value="ciona savignyi">ciona savignyi</option> 424 <option value="elegans">Caenorhabditis elegans (nematode)</option>
412 <option value="cow">cow</option>
413 <option value="danio">danio</option>
414 <option value="diatoaea">diatomea</option>
415 <option value="dog">dog</option>
416 <option value="drosophila">drosophila</option>
417 <option value="elegans">elegans</option>
418 <option value="fugu">fugu</option>
419 <option value="fungi" selected="true">fungi</option>
420 <option value="human">human</option>
421 <option value="maize">maize</option>
422 <option value="mammal">mammal</option>
423 <option value="mouse">mouse</option>
424 <option value="pig">pig</option>
425 <option value="rat">rat</option>
426 <option value="rice">rice</option>
427 <option value="rodentia">rodentia</option>
428 <option value="ruminantia">ruminantia</option>
429 <option value="wheat">wheat</option>
430 </param> 425 </param>
431 </when> 426 </when>
432 <when value="no"> 427 <when value="no">
433 <param name="species_name" type="text" value="homo sapiens" label="Repeat source species" help="Source species (or clade name) used to select repeats from RepBase" /> 428 <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" />
434 </when> 429 </when>
435 </conditional> 430 </conditional>
436 <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/> 431 <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/>
432 </when>
433 <when value="dfam_up">
434 <param name="dfam_lib" type="data" format="h5" label="DFam library" help="The full DFam library can be downloaded from https://www.dfam.org/releases/current/families/Dfam.h5.gz" />
435 <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" />
436 <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/>
437 </when> 437 </when>
438 <when value="library"> 438 <when value="library">
439 <param name="rmlib" type="data" format="fasta" label="Transposable element sequences for RepeatRunner" optional="True" /> 439 <param name="rmlib" type="data" format="fasta" label="Transposable element sequences for RepeatRunner" optional="True" />
440 <param name="repeat_protein" type="data" format="fasta" label="Transposable element protein sequences for RepeatRunner" optional="True" /> 440 <param name="repeat_protein" type="data" format="fasta" label="Transposable element protein sequences for RepeatRunner" optional="True" />
441 <param name="rm_gff" type="data" format="gff" label="Pre-identified repeat elements from an external GFF file" optional="True" /> 441 <param name="rm_gff" type="data" format="gff" label="Pre-identified repeat elements from an external GFF file" optional="True" />
546 </test> 546 </test>
547 <test> 547 <test>
548 <param name="genome" value="genome.fasta"/> 548 <param name="genome" value="genome.fasta"/>
549 <param name="est_evidences|est" value="est.fasta"/> 549 <param name="est_evidences|est" value="est.fasta"/>
550 <param name="est_evidences|est2genome" value="1"/> 550 <param name="est_evidences|est2genome" value="1"/>
551 <param name="repeat_masking|repeat_source|source_type" value="repbase"/> 551 <param name="repeat_masking|repeat_source|source_type" value="dfam"/>
552 <param name="repeat_masking|repeat_source|repbase_file" value="fake_repbase.embl" /> 552 <param name="repeat_masking|repeat_source|species_list" value="drosophila" />
553 <param name="repeat_masking|repeat_source|repbase_file_name" value="fake.embl" /> 553 <output name="output_gff" file="annot_dfam.gff3"/>
554 <param name="repeat_masking|repeat_source|species_list" value="anopheles" />
555 <output name="output_gff" file="annot_repbase.gff3"/>
556 <output name="output_evidences" file="evidences_norm.gff3" compare="sim_size"/> 554 <output name="output_evidences" file="evidences_norm.gff3" compare="sim_size"/>
555 </test>
556 <test>
557 <param name="genome" value="genome.fasta"/>
558 <param name="est_evidences|est" value="est.fasta"/>
559 <param name="est_evidences|est2genome" value="1"/>
560 <param name="repeat_masking|repeat_source|source_type" value="dfam_up"/>
561 <param name="repeat_masking|repeat_source|dfam_lib" value="Dfam_partial_test.h5" ftype="h5" />
562 <param name="repeat_masking|repeat_source|species_name" value="rodent" />
563 <output name="output_gff" file="annot_dfam_up.gff3"/>
564 <output name="output_evidences" file="evidences_norm_dfam_up.gff3" compare="sim_size"/>
557 </test> 565 </test>
558 </tests> 566 </tests>
559 <help><![CDATA[ 567 <help><![CDATA[
560 MAKER is a portable and easily configurable genome annotation pipeline. Its purpose is to allow smaller eukaryotic and prokaryotic genome projects to independently annotate their genomes and to create genome databases. MAKER identifies repeats, aligns ESTs and proteins to a genome, produces ab-initio gene predictions and automatically synthesizes these data into gene annotations having evidence-based quality values. MAKER is also easily trainable: outputs of preliminary runs can be used to automatically retrain its gene prediction algorithm, producing higher quality gene-models on seusequent runs. MAKER's inputs are minimal and its ouputs can be directly loaded into a GMOD database. They can also be viewed in the Apollo genome browser; this feature of MAKER provides an easy means to annotate, view and edit individual contigs and BACs without the overhead of a database. MAKER should prove especially useful for emerging model organism projects with minimal bioinformatics expertise and computer resources. 568 MAKER is a portable and easily configurable genome annotation pipeline. Its purpose is to allow smaller eukaryotic and prokaryotic genome projects to independently annotate their genomes and to create genome databases. MAKER identifies repeats, aligns ESTs and proteins to a genome, produces ab-initio gene predictions and automatically synthesizes these data into gene annotations having evidence-based quality values. MAKER is also easily trainable: outputs of preliminary runs can be used to automatically retrain its gene prediction algorithm, producing higher quality gene-models on seusequent runs. MAKER's inputs are minimal and its ouputs can be directly loaded into a GMOD database. They can also be viewed in the Apollo genome browser; this feature of MAKER provides an easy means to annotate, view and edit individual contigs and BACs without the overhead of a database. MAKER should prove especially useful for emerging model organism projects with minimal bioinformatics expertise and computer resources.
561 569