comparison funannotate_predict.xml @ 1:1a59958c1f76 draft

"planemo upload commit 87560553f1dbbd3e0ab7d7157fa5a7f32f61dca1"
author iuc
date Mon, 04 Oct 2021 19:37:44 +0000
parents 40b87aef5241
children 33092577d65d
comparison
equal deleted inserted replaced
0:40b87aef5241 1:1a59958c1f76
30 --database `pwd`'/hacked_database' 30 --database `pwd`'/hacked_database'
31 #else 31 #else
32 --database '$database.fields.path' 32 --database '$database.fields.path'
33 #end if 33 #end if
34 34
35 $force
36
35 --species '${organism.species}' 37 --species '${organism.species}'
36 --isolate '${organism.isolate}' 38 --isolate '${organism.isolate}'
37 --strain '${organism.strain}' 39 --strain '${organism.strain}'
38 --organism '${organism.organism}' 40 --organism '${organism.organism}'
39 --ploidy ${organism.ploidy} 41 --ploidy ${organism.ploidy}
44 46
45 #if $parameters: 47 #if $parameters:
46 --parameters '${parameters}' 48 --parameters '${parameters}'
47 #end if 49 #end if
48 50
49 #if $evidences.rna_bam: 51 #if $evidences.rna_bam
50 --rna_bam ${evidences.rna_bam} 52 --rna_bam ${evidences.rna_bam}
51 #end if 53 #end if
52 54
53 #set est_list = "" 55 #set est_list = ""
54 #if len($evidences.transcript_evidence) > 0: 56 #if len($evidences.transcript_evidence) > 0:
69 #end for 71 #end for
70 #end if 72 #end if
71 --p2g_pident ${evidences.p2g_pident} 73 --p2g_pident ${evidences.p2g_pident}
72 --p2g_prefilter ${evidences.p2g_prefilter} 74 --p2g_prefilter ${evidences.p2g_prefilter}
73 75
76 --busco_seed_species '${busco.busco_seed_species}'
77 --busco_db '${busco.busco_db}'
78
74 #if $augustus.augustus_species != 'none': 79 #if $augustus.augustus_species != 'none':
75 --augustus_species '${augustus.augustus_species}' 80 --augustus_species '${augustus.augustus_species}'
76 #end if 81 #end if
77 --min_training_models ${augustus.min_training_models} 82 --min_training_models ${augustus.min_training_models}
78 ${augustus.optimize_augustus} 83 ${augustus.optimize_augustus}
82 #if $genemark.genemark_mod: 87 #if $genemark.genemark_mod:
83 --genemark_mod '${genemark.genemark_mod}' 88 --genemark_mod '${genemark.genemark_mod}'
84 #end if 89 #end if
85 --soft_mask ${genemark.soft_mask} 90 --soft_mask ${genemark.soft_mask}
86 #end if 91 #end if
87
88 --busco_seed_species '${busco.busco_seed_species}'
89 --busco_db '${busco.busco_db}'
90 92
91 $evm.repeats2evm 93 $evm.repeats2evm
92 #if $evm.evm_partitioning.evm_partition == "yes": 94 #if $evm.evm_partitioning.evm_partition == "yes":
93 --evm-partition-interval ${evm.evm_partitioning.evm_partition_interval} 95 --evm-partition-interval ${evm.evm_partitioning.evm_partition_interval}
94 #else: 96 #else:
142 <column name="path" index="3" /> 144 <column name="path" index="3" />
143 <filter type="sort_by" column="0" /> 145 <filter type="sort_by" column="0" />
144 <filter type="static_value" column="2" value="1.0" /> 146 <filter type="static_value" column="2" value="1.0" />
145 </options> 147 </options>
146 </param> 148 </param>
149
150 <param argument="--force" type="boolean" checked="true" truevalue="" falsevalue="--force" label="Check the genome sequence" help="Disable at your own risk if you want to ignore problems in the genome sequence reported by Funannotate" />
147 151
148 <section name="organism" expanded="true" title="Organism"> 152 <section name="organism" expanded="true" title="Organism">
149 <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species"> 153 <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species">
150 <validator type="empty_field" /> 154 <validator type="empty_field" />
151 </param> 155 </param>
177 <option value="diamond" selected="True">Diamond</option> 181 <option value="diamond" selected="True">Diamond</option>
178 <option value="tblastn">tblastn (slower)</option> 182 <option value="tblastn">tblastn (slower)</option>
179 </param> 183 </param>
180 </section> 184 </section>
181 185
186 <section name="busco" expanded="true" title="Busco">
187 <param argument="--busco_db" type="select" label="BUSCO models to align" help="BUSCO will be used to perform initial training of ab initio predictors (e.g. Augustus).">
188 <expand macro="busco_species"/>
189 </param>
190 <param argument="--busco_seed_species" type="select" label="Initial Augustus species training set for BUSCO alignment" help="Used when BUSCO runs Augustus internally.">
191 <expand macro="augustus_species"/>
192 </param>
193 </section>
194
195 <section name="filtering" expanded="true" title="Filtering">
196 <param argument="--min_intronlen" type="integer" value="10" label="Minimum intron length" />
197 <param argument="--max_intronlen" type="integer" value="3000" label="Maximum intron length" />
198 <param argument="--min_protlen" type="integer" value="50" label="Minimum protein length" />
199 <param argument="--keep_no_stops" type="boolean" checked="false" truevalue="--keep_no_stops" falsevalue="" label="Keep gene models without valid stops" />
200 <param argument="--repeat_filter" type="select" label="Repetitive gene model filtering" help="'overlap' drops gene models that are more than 90% contained within a repeat region; 'blast' compares the amino acid sequences to a small database of known transposons">
201 <option value="overlap blast" selected="True">overlap + blast</option>
202 <option value="overlap">overlap</option>
203 <option value="blast">blast</option>
204 <option value="none">none</option>
205 </param>
206 </section>
207
182 <param argument="--parameters" type="data" format="json" optional="true" label="Ab-initio training parameters from a previous run" help="If specified, will over-rule any other training presets based on sepcies selection." /> 208 <param argument="--parameters" type="data" format="json" optional="true" label="Ab-initio training parameters from a previous run" help="If specified, will over-rule any other training presets based on sepcies selection." />
183 209
184 <section name="other_predictors" expanded="false" title="Other annotations"> 210 <section name="augustus" expanded="false" title="Augustus settings (advanced)">
211 <param argument="--augustus_species" type="select" label="Augustus species training set" help="Select a species from the list">
212 <option value="none" selected="True">No corresponding species, train from scratch</option>
213 <expand macro="augustus_species"/>
214 </param>
215 <param argument="--min_training_models" type="integer" value="200" label="Minimum number of models to train Augustus" />
216 <param argument="--optimize_augustus" type="boolean" checked="false" truevalue="--optimize_augustus" falsevalue="" label="Run 'optimize_augustus.pl' to refine training (long runtime)" />
217 </section>
218
219 <section name="genemark" expanded="false" title="GeneMark settings (advanced)">
220 <param name="genemark_license" type="data" format="txt" optional="true" label="GeneMark license file" help="GeneMark is not a free software, to use it download and unzip a license from http://topaz.gatech.edu/GeneMark/license_download.cgi (ES/ET/EP version). GeneMark needs to be installed manually by Galaxy administrators, it might not be available on this server." />
221 <param argument="--genemark_mode" type="select" label="GeneMark mode">
222 <option value="ES" selected="True">ES</option>
223 <option value="ET">ET</option>
224 </param>
225 <param argument="--genemark_mod" type="data" format="txt" optional="true" label="Use pre-existing Genemark training file (e.g. gmhmm.mod)" />
226 <param argument="--soft_mask" type="integer" value="2000" label="Softmasked length threshold for GeneMark" help="GeneMark will skip prediction on repeat regions shorter than this value" />
227 </section>
228
229 <section name="other_predictors" expanded="false" title="Other annotations (advanced)">
185 <param argument="--stringtie" type="data" format="gtf" optional="true" label="StringTie GTF result" /> 230 <param argument="--stringtie" type="data" format="gtf" optional="true" label="StringTie GTF result" />
186 <param argument="--maker_gff" type="data" format="gff3" optional="true" label="MAKER2 GFF file" help="Parse results directly to EVM" /> 231 <param argument="--maker_gff" type="data" format="gff3" optional="true" label="MAKER2 GFF file" help="Parse results directly to EVM" />
187 <param argument="--pasa_gff" type="data" format="gff3" optional="true" label="PASA generated gene models" /> 232 <param argument="--pasa_gff" type="data" format="gff3" optional="true" label="PASA generated gene models" />
188 <param name="pasa_gff_weight" type="integer" value="1" label="Weight for PASA generated gene models" /> 233 <param name="pasa_gff_weight" type="integer" value="1" label="Weight for PASA generated gene models" />
189 <param argument="--other_gff" type="data" format="gff3" optional="true" label="Annotation pass-through to EVM" /> 234 <param argument="--other_gff" type="data" format="gff3" optional="true" label="Annotation pass-through to EVM" />
190 <param name="other_gff_weight" type="integer" value="1" label="Weight for annotation pass-through to EVM" /> 235 <param name="other_gff_weight" type="integer" value="1" label="Weight for annotation pass-through to EVM" />
191 </section> 236 </section>
192 237
193 <section name="augustus" expanded="true" title="Augustus settings"> 238 <section name="evm" expanded="false" title="EVM settings (advanced)">
194 <param argument="--augustus_species" type="select" label="Augustus species training set" help="Select a species from the list">
195 <option value="none" selected="True">No corresponding species, train from scratch</option>
196 <expand macro="augustus_species"/>
197 </param>
198 <param argument="--min_training_models" type="integer" value="200" label="Minimum number of models to train Augustus" />
199 <param argument="--optimize_augustus" type="boolean" checked="false" truevalue="--optimize_augustus" falsevalue="" label="Run 'optimize_augustus.pl' to refine training (long runtime)" />
200 </section>
201
202 <section name="genemark" expanded="false" title="GeneMark settings">
203 <param name="genemark_license" type="data" format="txt" optional="true" label="GeneMark license file" help="GeneMark is not a free software, to use it download and unzip a license from http://topaz.gatech.edu/GeneMark/license_download.cgi (ES/ET/EP version). GeneMark needs to be installed manually by Galaxy administrators, it might not be available on this server." />
204 <param argument="--genemark_mode" type="select" label="GeneMark mode">
205 <option value="ES" selected="True">ES</option>
206 <option value="ET">ET</option>
207 </param>
208 <param argument="--genemark_mod" type="data" format="txt" optional="true" label="Use pre-existing Genemark training file (e.g. gmhmm.mod)" />
209 <param argument="--soft_mask" type="integer" value="2000" label="Softmasked length threshold for GeneMark" help="GeneMark will skip prediction on repeat regions shorter than this value" />
210 </section>
211
212 <section name="busco" expanded="true" title="BUSCO settings">
213 <param argument="--busco_seed_species" type="select" label="Initial Augustus species training set for BUSCO alignment" help="Select the closest species. BUSCO will only be used if no RNASeq (bam) data is given as evidence.">
214 <expand macro="augustus_species"/>
215 </param>
216 <param argument="--busco_db" type="select" label="BUSCO models to align" help="BUSCO will only be used if no RNASeq (bam) data is given as evidence.">
217 <expand macro="busco_species"/>
218 </param>
219 </section>
220
221 <section name="evm" expanded="false" title="EVM settings">
222 <param argument="--repeats2evm" type="boolean" checked="false" truevalue="--repeats2evm" falsevalue="" label="Use repeats in EVM consensus model building" help="Not recommended for fungal genomes that have high gene density. You might want to turn this option on for larger genomes or those that have a high repeat content." /> 239 <param argument="--repeats2evm" type="boolean" checked="false" truevalue="--repeats2evm" falsevalue="" label="Use repeats in EVM consensus model building" help="Not recommended for fungal genomes that have high gene density. You might want to turn this option on for larger genomes or those that have a high repeat content." />
223 <conditional name="evm_partitioning"> 240 <conditional name="evm_partitioning">
224 <param name="evm_partition" type="select" label="Split contigs into partitions for EVM processing?" help="Splits big contigs in smaller overlaping chunks to reduce memory usage and parallelize"> 241 <param name="evm_partition" type="select" label="Split contigs into partitions for EVM processing?" help="Splits big contigs in smaller overlaping chunks to reduce memory usage and parallelize">
225 <option value="yes" selected="True">Yes</option> 242 <option value="yes" selected="True">Yes</option>
226 <option value="no">No</option> 243 <option value="no">No</option>
233 <param argument="--weights" type="text" optional="true" label="Custom ab-initio predictor and EVM weight" help="e.g. augustus:2 pasa:10"> 250 <param argument="--weights" type="text" optional="true" label="Custom ab-initio predictor and EVM weight" help="e.g. augustus:2 pasa:10">
234 <validator type="regex" message="Key must consist of alphanumeric characters only, possibly separated by the period character ('.')">^[\w: ]+$</validator> 251 <validator type="regex" message="Key must consist of alphanumeric characters only, possibly separated by the period character ('.')">^[\w: ]+$</validator>
235 </param> 252 </param>
236 </section> 253 </section>
237 254
238 <section name="filtering" expanded="true" title="Filtering"> 255 <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated">
239 <param argument="--min_intronlen" type="integer" value="10" label="Minimum intron length" /> 256 <option value="gbk" selected="true">Annotated genome (genbank)</option>
240 <param argument="--max_intronlen" type="integer" value="3000" label="Maximum intron length" /> 257 <option value="tbl">NCBI tbl annotation file (use for NCBI submission)</option>
241 <param argument="--min_protlen" type="integer" value="50" label="Minimum protein length" /> 258 <option value="gff3">Annotation in GFF3 format</option>
242 <param argument="--keep_no_stops" type="boolean" checked="false" truevalue="--keep_no_stops" falsevalue="" label="Keep gene models without valid stops" /> 259 <option value="proteins_fa">Multi-fasta file of protein coding genes</option>
243 <param argument="--repeat_filter" type="select" label="Repetitive gene model filtering" help="'overlap' drops gene models that are more than 90% contained within a repeat region; 'blast' compares the amino acid sequences to a small database of known transposons"> 260 <option value="mrna_transcripts_fa">Multi-fasta file of transcripts (mRNA)</option>
244 <option value="overlap blast" selected="True">overlap + blast</option> 261 <option value="cds_transcripts_fa">Multi-fasta file of transcripts (CDS)</option>
245 <option value="overlap">overlap</option> 262 <option value="tbl2asn_report">tbl2asn summary report of annotated genome</option>
246 <option value="blast">blast</option> 263 <option value="tbl2asn_error">tbl2asn error summary report</option>
247 <option value="none">none</option> 264 <option value="tbl2asn_validation">tbl2asn genome validation report</option>
248 </param> 265 <option value="stats">statistics</option>
249 </section> 266 </param>
250 267
251 <!-- Need this to change path in the test funannotate_db --> 268 <!-- Need this to change path in the test funannotate_db -->
252 <param type="hidden" name="uglyTestingHack" value="" /> 269 <param type="hidden" name="uglyTestingHack" value="" />
253 </inputs> 270 </inputs>
254 <outputs> 271 <outputs>
255 <data name='annot_gbk' format='genbank' label="${tool.name} on ${on_string}: annotation (genbank)" from_work_dir="out.gbk" /> 272 <data name='annot_gbk' format='genbank' label="${tool.name} on ${on_string}: annotation (genbank)" from_work_dir="out.gbk">
256 <data name='annot_tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl" /> 273 <filter>outputs and 'gbk' in outputs</filter>
257 <data name='annot_gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3" /> 274 </data>
258 <data name='fasta_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa" /> 275 <data name='annot_tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl">
259 <data name='fasta_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa" /> 276 <filter>outputs and 'tbl' in outputs</filter>
260 <data name='fasta_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa" /> 277 </data>
261 <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt" /> 278 <data name='annot_gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3">
262 <data name='tbl2asn_error' format='txt' label="${tool.name} on ${on_string}: tbl2asn error summary report" from_work_dir="out.error.summary.txt" /> 279 <filter>outputs and 'gff3' in outputs</filter>
263 <data name='tbl2asn_validation' format='txt' label="${tool.name} on ${on_string}: tbl2asn genome validation report" from_work_dir="out.validation.txt" /> 280 </data>
264 <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json" /> 281 <data name='fasta_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa">
282 <filter>outputs and 'proteins_fa' in outputs</filter>
283 </data>
284 <data name='fasta_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa">
285 <filter>outputs and 'mrna_transcripts_fa' in outputs</filter>
286 </data>
287 <data name='fasta_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa">
288 <filter>outputs and 'cds_transcripts_fa' in outputs</filter>
289 </data>
290 <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt">
291 <filter>outputs and 'tbl2asn_report' in outputs</filter>
292 </data>
293 <data name='tbl2asn_error' format='txt' label="${tool.name} on ${on_string}: tbl2asn error summary report" from_work_dir="out.error.summary.txt">
294 <filter>outputs and 'tbl2asn_error' in outputs</filter>
295 </data>
296 <data name='tbl2asn_validation' format='txt' label="${tool.name} on ${on_string}: tbl2asn genome validation report" from_work_dir="out.validation.txt">
297 <filter>outputs and 'tbl2asn_validation' in outputs</filter>
298 </data>
299 <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json">
300 <filter>outputs and 'stats' in outputs</filter>
301 </data>
265 <!-- TODO some day: provide trained models as output, reusable as input to other funannotate runs 302 <!-- TODO some day: provide trained models as output, reusable as input to other funannotate runs
266 (parameters.json file references files with absolute paths, would probably need to create an archive + edit paths in parameters.json) --> 303 (parameters.json file references files with absolute paths, would probably need to create an archive + edit paths in parameters.json) -->
267 <!--data name='abinitio' format='json' label="${tool.name} on ${on_string}: ab-initio training parameters" from_work_dir="output/predict_results/*.parameters.json" /--> 304 <!--data name='abinitio' format='json' label="${tool.name} on ${on_string}: ab-initio training parameters" from_work_dir="output/predict_results/*.parameters.json" /-->
268 </outputs> 305 </outputs>
269 <tests> 306 <tests>
270 <!-- training from scratch --> 307 <!-- training from scratch -->
271 <test> 308 <test>
272 <param name="input" value="genome_masked.fa" /> 309 <param name="input" value="genome_masked.fa" />
273 <param name="database" value="2021-07-20-120000" /> 310 <param name="database" value="2021-07-20-120000" />
274 <section name="organism">
275 <param name="species" value="Genus species" />
276 </section>
277 <section name="augustus">
278 <param name="min_training_models" value="3" />
279 </section>
280 <section name="busco"> 311 <section name="busco">
281 <param name="busco_seed_species" value="fly" /> 312 <param name="busco_seed_species" value="fly" />
282 <param name="busco_db" value="insecta" /> 313 <param name="busco_db" value="insecta" />
283 </section> 314 </section>
315 <section name="organism">
316 <param name="species" value="Genus species" />
317 </section>
318 <section name="augustus">
319 <param name="min_training_models" value="3" />
320 </section>
321 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" />
284 <!-- non deterministic results, so can't be more precise here --> 322 <!-- non deterministic results, so can't be more precise here -->
285 <output name="annot_gbk"> 323 <output name="annot_gbk">
286 <assert_contents> 324 <assert_contents>
287 <has_text text=" TITLE Direct Submission" /> 325 <has_text text=" TITLE Direct Submission" />
288 <has_text text="/locus_tag=&quot;FUN_000001&quot;" /> 326 <has_text text="/locus_tag=&quot;FUN_000001&quot;" />
348 <section name="busco"> 386 <section name="busco">
349 <param name="busco_seed_species" value="fly" /> 387 <param name="busco_seed_species" value="fly" />
350 <param name="busco_db" value="insecta" /> 388 <param name="busco_db" value="insecta" />
351 </section> 389 </section>
352 <param name="uglyTestingHack" value="true" /> 390 <param name="uglyTestingHack" value="true" />
391 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" />
353 <!-- non deterministic results, so can't be more precise here --> 392 <!-- non deterministic results, so can't be more precise here -->
354 <output name="annot_gbk"> 393 <output name="annot_gbk">
355 <assert_contents> 394 <assert_contents>
356 <has_text text=" TITLE Direct Submission" /> 395 <has_text text=" TITLE Direct Submission" />
357 <has_text text="/locus_tag=&quot;FUN_000001&quot;" /> 396 <has_text text="/locus_tag=&quot;FUN_000001&quot;" />
397 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> 436 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/>
398 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> 437 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/>
399 </assert_stderr> 438 </assert_stderr>
400 </test> 439 </test>
401 440
402 <!-- bam --> 441 <!-- bam and transcripts and proteins -->
403 <test> 442 <test>
404 <param name="input" value="genome_masked.fa" /> 443 <param name="input" value="genome_masked.fa" />
405 <param name="database" value="2021-07-20-120000" /> 444 <param name="database" value="2021-07-20-120000" />
406 <section name="organism"> 445 <section name="organism">
407 <param name="species" value="Genus species" /> 446 <param name="species" value="Genus species" />
412 <conditional name="prot_evidence"> 451 <conditional name="prot_evidence">
413 <param name="prot_evidence_source" value="custom" /> 452 <param name="prot_evidence_source" value="custom" />
414 <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" /> 453 <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" />
415 </conditional> 454 </conditional>
416 </section> 455 </section>
417 <section name="augustus">
418 <param name="min_training_models" value="3" />
419 </section>
420 <section name="busco"> 456 <section name="busco">
421 <param name="busco_seed_species" value="fly" /> 457 <param name="busco_seed_species" value="fly" />
422 <param name="busco_db" value="insecta" /> 458 <param name="busco_db" value="insecta" />
423 </section> 459 </section>
460 <section name="augustus">
461 <param name="min_training_models" value="3" />
462 </section>
463 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" />
424 <!-- non deterministic results, so can't be more precise here --> 464 <!-- non deterministic results, so can't be more precise here -->
425 <output name="annot_gbk"> 465 <output name="annot_gbk">
426 <assert_contents> 466 <assert_contents>
427 <has_text text=" TITLE Direct Submission" /> 467 <has_text text=" TITLE Direct Submission" />
428 <has_text text="/locus_tag=&quot;FUN_000001&quot;" /> 468 <has_text text="/locus_tag=&quot;FUN_000001&quot;" />
468 <has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/> 508 <has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/>
469 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> 509 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/>
470 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> 510 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/>
471 </assert_stderr> 511 </assert_stderr>
472 </test> 512 </test>
513
514 <!-- proteins -->
515 <test>
516 <param name="input" value="genome_masked.fa" />
517 <param name="database" value="2021-07-20-120000" />
518 <section name="organism">
519 <param name="species" value="Genus species" />
520 </section>
521 <section name="evidences">
522 <conditional name="prot_evidence">
523 <param name="prot_evidence_source" value="custom" />
524 <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" />
525 </conditional>
526 </section>
527 <section name="busco">
528 <param name="busco_seed_species" value="fly" />
529 <param name="busco_db" value="insecta" />
530 </section>
531 <section name="augustus">
532 <param name="min_training_models" value="3" />
533 </section>
534 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" />
535 <!-- non deterministic results, so can't be more precise here -->
536 <output name="annot_gbk">
537 <assert_contents>
538 <has_text text=" TITLE Direct Submission" />
539 <has_text text="/locus_tag=&quot;FUN_000001&quot;" />
540 </assert_contents>
541 </output>
542 <output name="annot_tbl">
543 <assert_contents>
544 <has_text text=">Feature sample" />
545 <has_text text="gnl|ncbi|FUN_000001-T1_mrna" />
546 </assert_contents>
547 </output>
548 <output name="annot_gff3">
549 <assert_contents>
550 <has_text text="##gff-version 3" />
551 <has_text text="ID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;" />
552 </assert_contents>
553 </output>
554 <output name="fasta_proteins">
555 <assert_contents>
556 <has_text text=">FUN_000001-T1 FUN_000001" />
557 </assert_contents>
558 </output>
559 <output name="fasta_transcripts_mrna">
560 <assert_contents>
561 <has_text text=">FUN_000001-T1 FUN_000001" />
562 </assert_contents>
563 </output>
564 <output name="fasta_transcripts_cds">
565 <assert_contents>
566 <has_text text=">FUN_000001-T1 FUN_000001" />
567 </assert_contents>
568 </output>
569 <assert_stderr>
570 <has_text text="augustus busco"/>
571 <has_text text="glimmerhmm busco"/>
572 <has_text text="snap busco"/>
573 <has_text text="Running BUSCO to find conserved gene models for training ab-initio predictors"/>
574 <has_text text="Skipping CodingQuarry as no --rna_bam passed"/>
575 <has_text text="Running Augustus gene prediction using genus_species parameters"/>
576 <has_text text="Training Augustus using BUSCO gene models"/>
577 <not_has_text text="Aligning transcript evidence to genome with minimap2"/>
578 <not_has_text text="Found 16 alignments, wrote GFF3 and Augustus hints to file"/>
579 <not_has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/>
580 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/>
581 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/>
582 </assert_stderr>
583 </test>
473 </tests> 584 </tests>
474 <help><![CDATA[ 585 <help><![CDATA[
475 Funannotate_ predict 586 Funannotate_ predict
476 -------------------- 587 --------------------
477 588