comparison braker3.xml @ 0:3c0865d1172f draft

planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/braker commit d6e73c0417506d93c905b4aeb77d92350b662fbb
author genouest
date Wed, 06 Sep 2023 09:50:28 +0000
parents
children bd103884a09c
comparison
equal deleted inserted replaced
-1:000000000000 0:3c0865d1172f
1 <tool id="braker3" name="BRAKER3" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
2 <description>genome annotation</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <requirements>
7 <expand macro="requirements" />
8 </requirements>
9 <version_command><![CDATA[braker.pl --version; $GENEMARK_PATH/gmes/gmes_petap.pl | grep version]]></version_command>
10 <command><![CDATA[
11 if [ -z "\$GENEMARK_PATH" ] ; then echo "GeneMark is not installed on this Galaxy server." >&2 ; exit 1 ; fi &&
12 if [ ! -f "\$GENEMARK_PATH/gmes/gmes_petap.pl" ] ; then echo "GeneMark is not installed properly on this Galaxy server." >&2 ; exit 1 ; fi &&
13
14 ## This specific GeneMark version has some tools bundled with it, forced to use it.
15 ## I feel dirty. Pardon me.
16
17 export PATH="\$GENEMARK_PATH/../tools/:\$PATH" &&
18
19 ## GeneMark only search for license in ~/.gm_key
20 cp '${genemark_license}' ~/.gm_key &&
21
22 braker.pl
23 --genome '${genome}'
24 cd
25 $softmasking
26
27 #if $evidences.bam:
28 --bam ${evidences.bam}
29 #end if
30
31 #if $evidences.prot_seq:
32 --prot_seq ${evidences.prot_seq}
33 #end if
34
35 ## No hints, use esmode
36 #if not $evidences.bam and not $evidences.prot_seq
37 --esmode
38 #end if
39
40 #if $output_format == 'gff3'
41 --gff3
42 #end if
43
44 $genemark.fungus
45
46 $augustus.crf
47 --rounds $augustus.rounds
48 $augustus.AUGUSTUS_ab_initio
49 $augustus.keepCrf
50
51 $advanced.UTR
52
53 $advanced.filterOutShort
54
55 #if $advanced.eval:
56 --eval ${advanced.eval}
57 #end if
58
59 #if $advanced.eval_pseudo:
60 --eval_pseudo ${advanced.eval_pseudo}
61 #end if
62
63 #if $species:
64 --species '$species'
65 #end if
66
67 $advanced.alternatives_from_evidence
68
69 #if $dev.splice_sites:
70 --splice_sites '$dev.splice_sites'
71 #end if
72
73 #if $dev.min_contig:
74 --min_contig $dev.min_contig
75 #end if
76
77 --gc_probability $dev.gc_probability
78 --downsampling_lambda $dev.downsampling_lambda
79
80 #if $dev.gm_max_intergenic:
81 --gm_max_intergenic $dev.gm_max_intergenic
82 #end if
83
84 --threads \${GALAXY_SLOTS:-2}
85 ]]></command>
86 <inputs>
87 <param name="genemark_license" type="data" format="txt" label="GeneMark license file" help="Braker uses GeneMark, which is not a free software, to use it download and unzip a license from http://topaz.gatech.edu/GeneMark/license_download.cgi (ES/ET/EP version). GeneMark needs to be installed manually by Galaxy administrators." />
88
89 <param argument="--genome" type="data" format="fasta" label="Assembly to annotate" help="The assembly should preferably be soft-masked (with RepeatMasker for example)" />
90
91 <param argument="--softmasking" type="boolean" checked="true" truevalue="" falsevalue="--softmasking_off" label="Genome sequence is soft-masked" />
92
93 <param argument="--species" type="text" label="Species name" optional="true" help="Using Sp_1, if no species is assigned"/>
94
95 <section name="evidences" expanded="true" title="Evidences">
96 <param argument="--bam" type="data" format="bam" optional="true" label="RNA-seq mapped to genome to train Augustus/GeneMark" />
97 <param argument="--prot_seq" type="data" format="fasta" optional="true" label="Proteins to map to genome" />
98 </section>
99
100 <section name="genemark" expanded="true" title="GeneMark">
101 <param argument="--fungus" type="boolean" checked="false" truevalue="--fungus" falsevalue="" label="Fungal genome" help="GeneMark-EX option, run algorithm with branch point model (most useful for fungal genomes)" />
102 </section>
103
104 <section name="augustus" expanded="true" title="Augustus">
105 <param argument="--crf" type="boolean" checked="false" truevalue="--crf" falsevalue="" label="Use CRF training for Augustus" help="Alternate training method (Conditional Random Field)" />
106 <param argument="--rounds" type="integer" value="5" label="Number of optimization rounds used in optimize_augustus.pl" />
107 <param argument="--AUGUSTUS_ab_initio" type="boolean" checked="false" truevalue="--AUGUSTUS_ab_initio" falsevalue="" label="Output ab initio predictions by AUGUSTUS"/>
108 <param argument="--keepCrf" type="boolean" checked="false" truevalue="--keepCrf" falsevalue="" label="CRF parameters" help="keep CRF parameters even if they are no better than HMM parameters"/>
109 </section>
110
111 <section name="advanced" expanded="false" title="Advanced">
112 <param argument="--UTR" type="boolean" checked="false" truevalue="--UTR=on" falsevalue="" label="" help="Experimental, requires RNASeq data (bam) and a softmasked genome" />
113 <param argument="--filterOutShort" type="boolean" checked="false" truevalue="--filterOutShort" falsevalue="" label="Filter out too short traingin gene predicted by GeneMark-EX" />
114 <param argument="--eval" type="data" format="gtf" optional="true" label="Reference set to evaluate predictions" help="using evaluation scripts from GaTech" />
115 <param argument="--eval_pseudo" type="data" format="gff3" optional="true" label="File with pseudogenes that will be excluded from accuracy evaluation" />
116 <param argument="--alternatives-from-evidence" type="boolean" checked="true" truevalue="--alternatives-from-evidence=true" falsevalue="--alternatives-from-evidence=false" label="Output alternative transcripts based on explicit evidence from hints"/>
117 </section>
118
119 <section name="dev" expanded="false" title="Expert options">
120 <param argument="--splice_sites" type="text" label="List of splice site patterns for UTR prediction" help="Only affects UTR training example generation, not gene prediction by AUGUSTUS (default: GTAG"/>
121 <param argument="--min_contig" type="integer" optional="true" label="Minimal contig length for GeneMark-EX"/>
122 <param argument="--gc_probability" type="float" min="0" max="1" value="0.001" label="Probablity for donor splice site pattern GC for gene prediction with GeneMark-EX"/>
123 <param argument="--gm_max_intergenic" type="integer" optional="true" label="Maximum allowed size of intergenic regions in GeneMark-EX" help="If not set, the value is automatically determined by GeneMark-EX"/>
124 <param argument="--downsampling_lambda" type="integer" min="0" value="2" label="Lambda parameter of the Poisson distribution" help="for downsampling of training gene structures according to their number of introns distribution"/>
125 </section>
126
127 <param name="output_format" type="select" label="Output format">
128 <option value="gtf" selected="true">GTF</option>
129 <option value="gff3">GFF3</option>
130 </param>
131 </inputs>
132
133 <outputs>
134 <data name='output_gtf' format='gtf' label="GTF Annotation" from_work_dir="braker/braker.gtf">
135 <filter>output_format == 'gtf'</filter>
136 </data>
137 <data name='output_gff' format='gff3' label="GFF Annotation" from_work_dir="braker/braker.gff3">
138 <filter>output_format == 'gff3'</filter>
139 </data>
140 </outputs>
141
142 <tests>
143 <test expect_failure="true">
144 <param name="genemark_license" value="gm_key_64"/>
145 <param name="genome" value="genome_masked.fa"/>
146 <section name="evidences">
147 <param name="bam" value="SRR7458692.bam"/>
148 </section>
149 <param name="output_format" value="gtf" />
150 </test>
151 </tests>
152
153 <!-- <test expect_num_outputs="1">
154 <param name="genemark_license" value="gm_key_64" />
155 <param name="genome" value="genome_masked.fa" />
156 <section name="augustus">
157 <param name="rounds" value="2" />
158 </section>
159 <section name="evidences">
160 <param name="bam" value="SRR7458692.bam" />
161 </section>
162 <param name="output_format" value="gtf" />
163 <output name="output_gtf" file="out_genome/braker.gtf" sort="true"/>
164 </test>
165
166 <test expect_num_outputs="1">
167 <param name="genemark_license" value="gm_key_64" />
168 <param name="genome" value="genome_masked.fa" />
169 <section name="augustus">
170 <param name="rounds" value="2" />
171 </section>
172 <section name="evidences">
173 <param name="bam" value="SRR7458692.bam" />
174 <param name="prot_seq" value="proteins.fa" />
175 </section>
176 <param name="output_format" value="gff3" />
177 <output name="output_gff" file="out_genome/braker.gff3"/>
178 </test>
179
180 <test expect_num_outputs="1">
181 <param name="genemark_license" value="gm_key_64" />
182 <param name="genome" value="genome_masked.fa" />
183 <section name="augustus">
184 <param name="rounds" value="2" />
185 </section>
186 <section name="evidences">
187 <param name="prot_seq" value="proteins.fa" />
188 </section>
189 <param name="output_format" value="gff3" />
190 <output name="output_gff" file="out_genome/braker.gff3"/>
191 </test> -->
192
193
194 <help><![CDATA[
195
196 Braker3_ allows for fully automated training of the gene prediction tools GeneMark-EX and AUGUSTUS from RNA-Seq and/or protein homology information, and that integrates the extrinsic evidence from RNA-Seq and protein homology information into the prediction.
197
198 In contrast to other available methods that rely on protein homology information, BRAKER3 reaches high gene prediction accuracy even in the absence of the annotation of very closely related species and in the absence of RNA-Seq data.
199
200 .. _Braker3: https://github.com/Gaius-Augustus/BRAKER
201 ]]></help>
202 <expand macro="citations" />
203 </tool>