Mercurial > repos > iuc > funannotate_predict
comparison funannotate_predict.xml @ 1:1a59958c1f76 draft
"planemo upload commit 87560553f1dbbd3e0ab7d7157fa5a7f32f61dca1"
author | iuc |
---|---|
date | Mon, 04 Oct 2021 19:37:44 +0000 |
parents | 40b87aef5241 |
children | 33092577d65d |
comparison
equal
deleted
inserted
replaced
0:40b87aef5241 | 1:1a59958c1f76 |
---|---|
30 --database `pwd`'/hacked_database' | 30 --database `pwd`'/hacked_database' |
31 #else | 31 #else |
32 --database '$database.fields.path' | 32 --database '$database.fields.path' |
33 #end if | 33 #end if |
34 | 34 |
35 $force | |
36 | |
35 --species '${organism.species}' | 37 --species '${organism.species}' |
36 --isolate '${organism.isolate}' | 38 --isolate '${organism.isolate}' |
37 --strain '${organism.strain}' | 39 --strain '${organism.strain}' |
38 --organism '${organism.organism}' | 40 --organism '${organism.organism}' |
39 --ploidy ${organism.ploidy} | 41 --ploidy ${organism.ploidy} |
44 | 46 |
45 #if $parameters: | 47 #if $parameters: |
46 --parameters '${parameters}' | 48 --parameters '${parameters}' |
47 #end if | 49 #end if |
48 | 50 |
49 #if $evidences.rna_bam: | 51 #if $evidences.rna_bam |
50 --rna_bam ${evidences.rna_bam} | 52 --rna_bam ${evidences.rna_bam} |
51 #end if | 53 #end if |
52 | 54 |
53 #set est_list = "" | 55 #set est_list = "" |
54 #if len($evidences.transcript_evidence) > 0: | 56 #if len($evidences.transcript_evidence) > 0: |
69 #end for | 71 #end for |
70 #end if | 72 #end if |
71 --p2g_pident ${evidences.p2g_pident} | 73 --p2g_pident ${evidences.p2g_pident} |
72 --p2g_prefilter ${evidences.p2g_prefilter} | 74 --p2g_prefilter ${evidences.p2g_prefilter} |
73 | 75 |
76 --busco_seed_species '${busco.busco_seed_species}' | |
77 --busco_db '${busco.busco_db}' | |
78 | |
74 #if $augustus.augustus_species != 'none': | 79 #if $augustus.augustus_species != 'none': |
75 --augustus_species '${augustus.augustus_species}' | 80 --augustus_species '${augustus.augustus_species}' |
76 #end if | 81 #end if |
77 --min_training_models ${augustus.min_training_models} | 82 --min_training_models ${augustus.min_training_models} |
78 ${augustus.optimize_augustus} | 83 ${augustus.optimize_augustus} |
82 #if $genemark.genemark_mod: | 87 #if $genemark.genemark_mod: |
83 --genemark_mod '${genemark.genemark_mod}' | 88 --genemark_mod '${genemark.genemark_mod}' |
84 #end if | 89 #end if |
85 --soft_mask ${genemark.soft_mask} | 90 --soft_mask ${genemark.soft_mask} |
86 #end if | 91 #end if |
87 | |
88 --busco_seed_species '${busco.busco_seed_species}' | |
89 --busco_db '${busco.busco_db}' | |
90 | 92 |
91 $evm.repeats2evm | 93 $evm.repeats2evm |
92 #if $evm.evm_partitioning.evm_partition == "yes": | 94 #if $evm.evm_partitioning.evm_partition == "yes": |
93 --evm-partition-interval ${evm.evm_partitioning.evm_partition_interval} | 95 --evm-partition-interval ${evm.evm_partitioning.evm_partition_interval} |
94 #else: | 96 #else: |
142 <column name="path" index="3" /> | 144 <column name="path" index="3" /> |
143 <filter type="sort_by" column="0" /> | 145 <filter type="sort_by" column="0" /> |
144 <filter type="static_value" column="2" value="1.0" /> | 146 <filter type="static_value" column="2" value="1.0" /> |
145 </options> | 147 </options> |
146 </param> | 148 </param> |
149 | |
150 <param argument="--force" type="boolean" checked="true" truevalue="" falsevalue="--force" label="Check the genome sequence" help="Disable at your own risk if you want to ignore problems in the genome sequence reported by Funannotate" /> | |
147 | 151 |
148 <section name="organism" expanded="true" title="Organism"> | 152 <section name="organism" expanded="true" title="Organism"> |
149 <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species"> | 153 <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species"> |
150 <validator type="empty_field" /> | 154 <validator type="empty_field" /> |
151 </param> | 155 </param> |
177 <option value="diamond" selected="True">Diamond</option> | 181 <option value="diamond" selected="True">Diamond</option> |
178 <option value="tblastn">tblastn (slower)</option> | 182 <option value="tblastn">tblastn (slower)</option> |
179 </param> | 183 </param> |
180 </section> | 184 </section> |
181 | 185 |
186 <section name="busco" expanded="true" title="Busco"> | |
187 <param argument="--busco_db" type="select" label="BUSCO models to align" help="BUSCO will be used to perform initial training of ab initio predictors (e.g. Augustus)."> | |
188 <expand macro="busco_species"/> | |
189 </param> | |
190 <param argument="--busco_seed_species" type="select" label="Initial Augustus species training set for BUSCO alignment" help="Used when BUSCO runs Augustus internally."> | |
191 <expand macro="augustus_species"/> | |
192 </param> | |
193 </section> | |
194 | |
195 <section name="filtering" expanded="true" title="Filtering"> | |
196 <param argument="--min_intronlen" type="integer" value="10" label="Minimum intron length" /> | |
197 <param argument="--max_intronlen" type="integer" value="3000" label="Maximum intron length" /> | |
198 <param argument="--min_protlen" type="integer" value="50" label="Minimum protein length" /> | |
199 <param argument="--keep_no_stops" type="boolean" checked="false" truevalue="--keep_no_stops" falsevalue="" label="Keep gene models without valid stops" /> | |
200 <param argument="--repeat_filter" type="select" label="Repetitive gene model filtering" help="'overlap' drops gene models that are more than 90% contained within a repeat region; 'blast' compares the amino acid sequences to a small database of known transposons"> | |
201 <option value="overlap blast" selected="True">overlap + blast</option> | |
202 <option value="overlap">overlap</option> | |
203 <option value="blast">blast</option> | |
204 <option value="none">none</option> | |
205 </param> | |
206 </section> | |
207 | |
182 <param argument="--parameters" type="data" format="json" optional="true" label="Ab-initio training parameters from a previous run" help="If specified, will over-rule any other training presets based on sepcies selection." /> | 208 <param argument="--parameters" type="data" format="json" optional="true" label="Ab-initio training parameters from a previous run" help="If specified, will over-rule any other training presets based on sepcies selection." /> |
183 | 209 |
184 <section name="other_predictors" expanded="false" title="Other annotations"> | 210 <section name="augustus" expanded="false" title="Augustus settings (advanced)"> |
211 <param argument="--augustus_species" type="select" label="Augustus species training set" help="Select a species from the list"> | |
212 <option value="none" selected="True">No corresponding species, train from scratch</option> | |
213 <expand macro="augustus_species"/> | |
214 </param> | |
215 <param argument="--min_training_models" type="integer" value="200" label="Minimum number of models to train Augustus" /> | |
216 <param argument="--optimize_augustus" type="boolean" checked="false" truevalue="--optimize_augustus" falsevalue="" label="Run 'optimize_augustus.pl' to refine training (long runtime)" /> | |
217 </section> | |
218 | |
219 <section name="genemark" expanded="false" title="GeneMark settings (advanced)"> | |
220 <param name="genemark_license" type="data" format="txt" optional="true" label="GeneMark license file" help="GeneMark is not a free software, to use it download and unzip a license from http://topaz.gatech.edu/GeneMark/license_download.cgi (ES/ET/EP version). GeneMark needs to be installed manually by Galaxy administrators, it might not be available on this server." /> | |
221 <param argument="--genemark_mode" type="select" label="GeneMark mode"> | |
222 <option value="ES" selected="True">ES</option> | |
223 <option value="ET">ET</option> | |
224 </param> | |
225 <param argument="--genemark_mod" type="data" format="txt" optional="true" label="Use pre-existing Genemark training file (e.g. gmhmm.mod)" /> | |
226 <param argument="--soft_mask" type="integer" value="2000" label="Softmasked length threshold for GeneMark" help="GeneMark will skip prediction on repeat regions shorter than this value" /> | |
227 </section> | |
228 | |
229 <section name="other_predictors" expanded="false" title="Other annotations (advanced)"> | |
185 <param argument="--stringtie" type="data" format="gtf" optional="true" label="StringTie GTF result" /> | 230 <param argument="--stringtie" type="data" format="gtf" optional="true" label="StringTie GTF result" /> |
186 <param argument="--maker_gff" type="data" format="gff3" optional="true" label="MAKER2 GFF file" help="Parse results directly to EVM" /> | 231 <param argument="--maker_gff" type="data" format="gff3" optional="true" label="MAKER2 GFF file" help="Parse results directly to EVM" /> |
187 <param argument="--pasa_gff" type="data" format="gff3" optional="true" label="PASA generated gene models" /> | 232 <param argument="--pasa_gff" type="data" format="gff3" optional="true" label="PASA generated gene models" /> |
188 <param name="pasa_gff_weight" type="integer" value="1" label="Weight for PASA generated gene models" /> | 233 <param name="pasa_gff_weight" type="integer" value="1" label="Weight for PASA generated gene models" /> |
189 <param argument="--other_gff" type="data" format="gff3" optional="true" label="Annotation pass-through to EVM" /> | 234 <param argument="--other_gff" type="data" format="gff3" optional="true" label="Annotation pass-through to EVM" /> |
190 <param name="other_gff_weight" type="integer" value="1" label="Weight for annotation pass-through to EVM" /> | 235 <param name="other_gff_weight" type="integer" value="1" label="Weight for annotation pass-through to EVM" /> |
191 </section> | 236 </section> |
192 | 237 |
193 <section name="augustus" expanded="true" title="Augustus settings"> | 238 <section name="evm" expanded="false" title="EVM settings (advanced)"> |
194 <param argument="--augustus_species" type="select" label="Augustus species training set" help="Select a species from the list"> | |
195 <option value="none" selected="True">No corresponding species, train from scratch</option> | |
196 <expand macro="augustus_species"/> | |
197 </param> | |
198 <param argument="--min_training_models" type="integer" value="200" label="Minimum number of models to train Augustus" /> | |
199 <param argument="--optimize_augustus" type="boolean" checked="false" truevalue="--optimize_augustus" falsevalue="" label="Run 'optimize_augustus.pl' to refine training (long runtime)" /> | |
200 </section> | |
201 | |
202 <section name="genemark" expanded="false" title="GeneMark settings"> | |
203 <param name="genemark_license" type="data" format="txt" optional="true" label="GeneMark license file" help="GeneMark is not a free software, to use it download and unzip a license from http://topaz.gatech.edu/GeneMark/license_download.cgi (ES/ET/EP version). GeneMark needs to be installed manually by Galaxy administrators, it might not be available on this server." /> | |
204 <param argument="--genemark_mode" type="select" label="GeneMark mode"> | |
205 <option value="ES" selected="True">ES</option> | |
206 <option value="ET">ET</option> | |
207 </param> | |
208 <param argument="--genemark_mod" type="data" format="txt" optional="true" label="Use pre-existing Genemark training file (e.g. gmhmm.mod)" /> | |
209 <param argument="--soft_mask" type="integer" value="2000" label="Softmasked length threshold for GeneMark" help="GeneMark will skip prediction on repeat regions shorter than this value" /> | |
210 </section> | |
211 | |
212 <section name="busco" expanded="true" title="BUSCO settings"> | |
213 <param argument="--busco_seed_species" type="select" label="Initial Augustus species training set for BUSCO alignment" help="Select the closest species. BUSCO will only be used if no RNASeq (bam) data is given as evidence."> | |
214 <expand macro="augustus_species"/> | |
215 </param> | |
216 <param argument="--busco_db" type="select" label="BUSCO models to align" help="BUSCO will only be used if no RNASeq (bam) data is given as evidence."> | |
217 <expand macro="busco_species"/> | |
218 </param> | |
219 </section> | |
220 | |
221 <section name="evm" expanded="false" title="EVM settings"> | |
222 <param argument="--repeats2evm" type="boolean" checked="false" truevalue="--repeats2evm" falsevalue="" label="Use repeats in EVM consensus model building" help="Not recommended for fungal genomes that have high gene density. You might want to turn this option on for larger genomes or those that have a high repeat content." /> | 239 <param argument="--repeats2evm" type="boolean" checked="false" truevalue="--repeats2evm" falsevalue="" label="Use repeats in EVM consensus model building" help="Not recommended for fungal genomes that have high gene density. You might want to turn this option on for larger genomes or those that have a high repeat content." /> |
223 <conditional name="evm_partitioning"> | 240 <conditional name="evm_partitioning"> |
224 <param name="evm_partition" type="select" label="Split contigs into partitions for EVM processing?" help="Splits big contigs in smaller overlaping chunks to reduce memory usage and parallelize"> | 241 <param name="evm_partition" type="select" label="Split contigs into partitions for EVM processing?" help="Splits big contigs in smaller overlaping chunks to reduce memory usage and parallelize"> |
225 <option value="yes" selected="True">Yes</option> | 242 <option value="yes" selected="True">Yes</option> |
226 <option value="no">No</option> | 243 <option value="no">No</option> |
233 <param argument="--weights" type="text" optional="true" label="Custom ab-initio predictor and EVM weight" help="e.g. augustus:2 pasa:10"> | 250 <param argument="--weights" type="text" optional="true" label="Custom ab-initio predictor and EVM weight" help="e.g. augustus:2 pasa:10"> |
234 <validator type="regex" message="Key must consist of alphanumeric characters only, possibly separated by the period character ('.')">^[\w: ]+$</validator> | 251 <validator type="regex" message="Key must consist of alphanumeric characters only, possibly separated by the period character ('.')">^[\w: ]+$</validator> |
235 </param> | 252 </param> |
236 </section> | 253 </section> |
237 | 254 |
238 <section name="filtering" expanded="true" title="Filtering"> | 255 <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated"> |
239 <param argument="--min_intronlen" type="integer" value="10" label="Minimum intron length" /> | 256 <option value="gbk" selected="true">Annotated genome (genbank)</option> |
240 <param argument="--max_intronlen" type="integer" value="3000" label="Maximum intron length" /> | 257 <option value="tbl">NCBI tbl annotation file (use for NCBI submission)</option> |
241 <param argument="--min_protlen" type="integer" value="50" label="Minimum protein length" /> | 258 <option value="gff3">Annotation in GFF3 format</option> |
242 <param argument="--keep_no_stops" type="boolean" checked="false" truevalue="--keep_no_stops" falsevalue="" label="Keep gene models without valid stops" /> | 259 <option value="proteins_fa">Multi-fasta file of protein coding genes</option> |
243 <param argument="--repeat_filter" type="select" label="Repetitive gene model filtering" help="'overlap' drops gene models that are more than 90% contained within a repeat region; 'blast' compares the amino acid sequences to a small database of known transposons"> | 260 <option value="mrna_transcripts_fa">Multi-fasta file of transcripts (mRNA)</option> |
244 <option value="overlap blast" selected="True">overlap + blast</option> | 261 <option value="cds_transcripts_fa">Multi-fasta file of transcripts (CDS)</option> |
245 <option value="overlap">overlap</option> | 262 <option value="tbl2asn_report">tbl2asn summary report of annotated genome</option> |
246 <option value="blast">blast</option> | 263 <option value="tbl2asn_error">tbl2asn error summary report</option> |
247 <option value="none">none</option> | 264 <option value="tbl2asn_validation">tbl2asn genome validation report</option> |
248 </param> | 265 <option value="stats">statistics</option> |
249 </section> | 266 </param> |
250 | 267 |
251 <!-- Need this to change path in the test funannotate_db --> | 268 <!-- Need this to change path in the test funannotate_db --> |
252 <param type="hidden" name="uglyTestingHack" value="" /> | 269 <param type="hidden" name="uglyTestingHack" value="" /> |
253 </inputs> | 270 </inputs> |
254 <outputs> | 271 <outputs> |
255 <data name='annot_gbk' format='genbank' label="${tool.name} on ${on_string}: annotation (genbank)" from_work_dir="out.gbk" /> | 272 <data name='annot_gbk' format='genbank' label="${tool.name} on ${on_string}: annotation (genbank)" from_work_dir="out.gbk"> |
256 <data name='annot_tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl" /> | 273 <filter>outputs and 'gbk' in outputs</filter> |
257 <data name='annot_gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3" /> | 274 </data> |
258 <data name='fasta_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa" /> | 275 <data name='annot_tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl"> |
259 <data name='fasta_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa" /> | 276 <filter>outputs and 'tbl' in outputs</filter> |
260 <data name='fasta_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa" /> | 277 </data> |
261 <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt" /> | 278 <data name='annot_gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3"> |
262 <data name='tbl2asn_error' format='txt' label="${tool.name} on ${on_string}: tbl2asn error summary report" from_work_dir="out.error.summary.txt" /> | 279 <filter>outputs and 'gff3' in outputs</filter> |
263 <data name='tbl2asn_validation' format='txt' label="${tool.name} on ${on_string}: tbl2asn genome validation report" from_work_dir="out.validation.txt" /> | 280 </data> |
264 <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json" /> | 281 <data name='fasta_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa"> |
282 <filter>outputs and 'proteins_fa' in outputs</filter> | |
283 </data> | |
284 <data name='fasta_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa"> | |
285 <filter>outputs and 'mrna_transcripts_fa' in outputs</filter> | |
286 </data> | |
287 <data name='fasta_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa"> | |
288 <filter>outputs and 'cds_transcripts_fa' in outputs</filter> | |
289 </data> | |
290 <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt"> | |
291 <filter>outputs and 'tbl2asn_report' in outputs</filter> | |
292 </data> | |
293 <data name='tbl2asn_error' format='txt' label="${tool.name} on ${on_string}: tbl2asn error summary report" from_work_dir="out.error.summary.txt"> | |
294 <filter>outputs and 'tbl2asn_error' in outputs</filter> | |
295 </data> | |
296 <data name='tbl2asn_validation' format='txt' label="${tool.name} on ${on_string}: tbl2asn genome validation report" from_work_dir="out.validation.txt"> | |
297 <filter>outputs and 'tbl2asn_validation' in outputs</filter> | |
298 </data> | |
299 <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json"> | |
300 <filter>outputs and 'stats' in outputs</filter> | |
301 </data> | |
265 <!-- TODO some day: provide trained models as output, reusable as input to other funannotate runs | 302 <!-- TODO some day: provide trained models as output, reusable as input to other funannotate runs |
266 (parameters.json file references files with absolute paths, would probably need to create an archive + edit paths in parameters.json) --> | 303 (parameters.json file references files with absolute paths, would probably need to create an archive + edit paths in parameters.json) --> |
267 <!--data name='abinitio' format='json' label="${tool.name} on ${on_string}: ab-initio training parameters" from_work_dir="output/predict_results/*.parameters.json" /--> | 304 <!--data name='abinitio' format='json' label="${tool.name} on ${on_string}: ab-initio training parameters" from_work_dir="output/predict_results/*.parameters.json" /--> |
268 </outputs> | 305 </outputs> |
269 <tests> | 306 <tests> |
270 <!-- training from scratch --> | 307 <!-- training from scratch --> |
271 <test> | 308 <test> |
272 <param name="input" value="genome_masked.fa" /> | 309 <param name="input" value="genome_masked.fa" /> |
273 <param name="database" value="2021-07-20-120000" /> | 310 <param name="database" value="2021-07-20-120000" /> |
274 <section name="organism"> | |
275 <param name="species" value="Genus species" /> | |
276 </section> | |
277 <section name="augustus"> | |
278 <param name="min_training_models" value="3" /> | |
279 </section> | |
280 <section name="busco"> | 311 <section name="busco"> |
281 <param name="busco_seed_species" value="fly" /> | 312 <param name="busco_seed_species" value="fly" /> |
282 <param name="busco_db" value="insecta" /> | 313 <param name="busco_db" value="insecta" /> |
283 </section> | 314 </section> |
315 <section name="organism"> | |
316 <param name="species" value="Genus species" /> | |
317 </section> | |
318 <section name="augustus"> | |
319 <param name="min_training_models" value="3" /> | |
320 </section> | |
321 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" /> | |
284 <!-- non deterministic results, so can't be more precise here --> | 322 <!-- non deterministic results, so can't be more precise here --> |
285 <output name="annot_gbk"> | 323 <output name="annot_gbk"> |
286 <assert_contents> | 324 <assert_contents> |
287 <has_text text=" TITLE Direct Submission" /> | 325 <has_text text=" TITLE Direct Submission" /> |
288 <has_text text="/locus_tag="FUN_000001"" /> | 326 <has_text text="/locus_tag="FUN_000001"" /> |
348 <section name="busco"> | 386 <section name="busco"> |
349 <param name="busco_seed_species" value="fly" /> | 387 <param name="busco_seed_species" value="fly" /> |
350 <param name="busco_db" value="insecta" /> | 388 <param name="busco_db" value="insecta" /> |
351 </section> | 389 </section> |
352 <param name="uglyTestingHack" value="true" /> | 390 <param name="uglyTestingHack" value="true" /> |
391 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" /> | |
353 <!-- non deterministic results, so can't be more precise here --> | 392 <!-- non deterministic results, so can't be more precise here --> |
354 <output name="annot_gbk"> | 393 <output name="annot_gbk"> |
355 <assert_contents> | 394 <assert_contents> |
356 <has_text text=" TITLE Direct Submission" /> | 395 <has_text text=" TITLE Direct Submission" /> |
357 <has_text text="/locus_tag="FUN_000001"" /> | 396 <has_text text="/locus_tag="FUN_000001"" /> |
397 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> | 436 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> |
398 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> | 437 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> |
399 </assert_stderr> | 438 </assert_stderr> |
400 </test> | 439 </test> |
401 | 440 |
402 <!-- bam --> | 441 <!-- bam and transcripts and proteins --> |
403 <test> | 442 <test> |
404 <param name="input" value="genome_masked.fa" /> | 443 <param name="input" value="genome_masked.fa" /> |
405 <param name="database" value="2021-07-20-120000" /> | 444 <param name="database" value="2021-07-20-120000" /> |
406 <section name="organism"> | 445 <section name="organism"> |
407 <param name="species" value="Genus species" /> | 446 <param name="species" value="Genus species" /> |
412 <conditional name="prot_evidence"> | 451 <conditional name="prot_evidence"> |
413 <param name="prot_evidence_source" value="custom" /> | 452 <param name="prot_evidence_source" value="custom" /> |
414 <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" /> | 453 <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" /> |
415 </conditional> | 454 </conditional> |
416 </section> | 455 </section> |
417 <section name="augustus"> | |
418 <param name="min_training_models" value="3" /> | |
419 </section> | |
420 <section name="busco"> | 456 <section name="busco"> |
421 <param name="busco_seed_species" value="fly" /> | 457 <param name="busco_seed_species" value="fly" /> |
422 <param name="busco_db" value="insecta" /> | 458 <param name="busco_db" value="insecta" /> |
423 </section> | 459 </section> |
460 <section name="augustus"> | |
461 <param name="min_training_models" value="3" /> | |
462 </section> | |
463 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" /> | |
424 <!-- non deterministic results, so can't be more precise here --> | 464 <!-- non deterministic results, so can't be more precise here --> |
425 <output name="annot_gbk"> | 465 <output name="annot_gbk"> |
426 <assert_contents> | 466 <assert_contents> |
427 <has_text text=" TITLE Direct Submission" /> | 467 <has_text text=" TITLE Direct Submission" /> |
428 <has_text text="/locus_tag="FUN_000001"" /> | 468 <has_text text="/locus_tag="FUN_000001"" /> |
468 <has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/> | 508 <has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/> |
469 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> | 509 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> |
470 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> | 510 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> |
471 </assert_stderr> | 511 </assert_stderr> |
472 </test> | 512 </test> |
513 | |
514 <!-- proteins --> | |
515 <test> | |
516 <param name="input" value="genome_masked.fa" /> | |
517 <param name="database" value="2021-07-20-120000" /> | |
518 <section name="organism"> | |
519 <param name="species" value="Genus species" /> | |
520 </section> | |
521 <section name="evidences"> | |
522 <conditional name="prot_evidence"> | |
523 <param name="prot_evidence_source" value="custom" /> | |
524 <param name="protein_evidence" value="predict_scratch/Genus_species.proteins.fa" /> | |
525 </conditional> | |
526 </section> | |
527 <section name="busco"> | |
528 <param name="busco_seed_species" value="fly" /> | |
529 <param name="busco_db" value="insecta" /> | |
530 </section> | |
531 <section name="augustus"> | |
532 <param name="min_training_models" value="3" /> | |
533 </section> | |
534 <param name="outputs" value="gbk,tbl,gff3,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,tbl2asn_error,tbl2asn_report,tbl2asn_validation,stats" /> | |
535 <!-- non deterministic results, so can't be more precise here --> | |
536 <output name="annot_gbk"> | |
537 <assert_contents> | |
538 <has_text text=" TITLE Direct Submission" /> | |
539 <has_text text="/locus_tag="FUN_000001"" /> | |
540 </assert_contents> | |
541 </output> | |
542 <output name="annot_tbl"> | |
543 <assert_contents> | |
544 <has_text text=">Feature sample" /> | |
545 <has_text text="gnl|ncbi|FUN_000001-T1_mrna" /> | |
546 </assert_contents> | |
547 </output> | |
548 <output name="annot_gff3"> | |
549 <assert_contents> | |
550 <has_text text="##gff-version 3" /> | |
551 <has_text text="ID=FUN_000001-T1;Parent=FUN_000001;product=hypothetical protein;" /> | |
552 </assert_contents> | |
553 </output> | |
554 <output name="fasta_proteins"> | |
555 <assert_contents> | |
556 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
557 </assert_contents> | |
558 </output> | |
559 <output name="fasta_transcripts_mrna"> | |
560 <assert_contents> | |
561 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
562 </assert_contents> | |
563 </output> | |
564 <output name="fasta_transcripts_cds"> | |
565 <assert_contents> | |
566 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
567 </assert_contents> | |
568 </output> | |
569 <assert_stderr> | |
570 <has_text text="augustus busco"/> | |
571 <has_text text="glimmerhmm busco"/> | |
572 <has_text text="snap busco"/> | |
573 <has_text text="Running BUSCO to find conserved gene models for training ab-initio predictors"/> | |
574 <has_text text="Skipping CodingQuarry as no --rna_bam passed"/> | |
575 <has_text text="Running Augustus gene prediction using genus_species parameters"/> | |
576 <has_text text="Training Augustus using BUSCO gene models"/> | |
577 <not_has_text text="Aligning transcript evidence to genome with minimap2"/> | |
578 <not_has_text text="Found 16 alignments, wrote GFF3 and Augustus hints to file"/> | |
579 <not_has_text text="Extracting hints from RNA-seq BAM file using bam2hints"/> | |
580 <has_text text="Mapping 13 proteins to genome using diamond and exonerate"/> | |
581 <has_text text="Found 4 preliminary alignments --> aligning with exonerate"/> | |
582 </assert_stderr> | |
583 </test> | |
473 </tests> | 584 </tests> |
474 <help><![CDATA[ | 585 <help><![CDATA[ |
475 Funannotate_ predict | 586 Funannotate_ predict |
476 -------------------- | 587 -------------------- |
477 | 588 |