comparison bakta.xml @ 3:865ece5ca178 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/bakta commit 9f516ce5c84b251b9041cd79129e5b5e1979a738
author iuc
date Fri, 10 Feb 2023 14:20:09 +0000
parents debdc1469b41
children 3f0aa1b3e816
comparison
equal deleted inserted replaced
2:debdc1469b41 3:865ece5ca178
1 <tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> 1 <tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description> 2 <description>
3 genome annotation via alignment-free sequence identification 3 Genome annotation via alignment-free sequence identification
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>macro.xml</import> 6 <import>macro.xml</import>
7 </macros> 7 </macros>
8 <expand macro='edam'/> 8 <expand macro='edam'/>
9 <expand macro='xrefs'/> 9 <expand macro='xrefs'/>
10 <expand macro="requirements"/> 10 <expand macro="requirements"/>
11 <expand macro="version_command"/> 11 <expand macro="version_command"/>
12 12
13 <command detect_errors="aggressive"><![CDATA[ 13 <command detect_errors="aggressive"><![CDATA[
14 mkdir ./database_path && 14 mkdir -p ./database_path/amrfinderplus-db &&
15 ln -s '$(input_option.bakta_db_select.fields.path)/'* database_path && 15 ln -s '$(input_option.bakta_db_select.fields.path)'/* database_path &&
16 ln -s '$(input_option.amrfinder_db_select.fields.path)' database_path && 16 ln -s '$(input_option.amrfinder_db_select.fields.path)/' database_path/amrfinderplus-db/latest &&
17
18 bakta 17 bakta
19 #*====================================== 18 #*======================================
20 CPU option 19 CPU option
21 ======================================*# 20 ======================================*#
22 --threads \${GALAXY_SLOTS:-1} 21 --threads \${GALAXY_SLOTS:-1}
57 --prodigal-tf '$annotation.prodigal' 56 --prodigal-tf '$annotation.prodigal'
58 #end if 57 #end if
59 #if $annotation.translation_table 58 #if $annotation.translation_table
60 --translation-table '$annotation.translation_table' 59 --translation-table '$annotation.translation_table'
61 #end if 60 #end if
62 #if $annotation.gram 61 --gram '?'
63 --gram '$annotation.gram'
64 #end if
65 $annotation.keep_contig_headers 62 $annotation.keep_contig_headers
66 #if $annotation.replicons 63 #if $annotation.replicons
67 --replicons '$annotation.replicons' 64 --replicons '$annotation.replicons'
68 #end if 65 #end if
69 $annotation.compliant 66 $annotation.compliant
125 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/> 122 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/>
126 <param name="translation_table" type="select" optional="true" label="Translation table" help="Default is the bacterial table 11"> 123 <param name="translation_table" type="select" optional="true" label="Translation table" help="Default is the bacterial table 11">
127 <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> 124 <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
128 <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option> 125 <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option>
129 </param> 126 </param>
130 <param argument="--gram" type="select" optional="true" label="Gram type for signal peptide predictions" help="Gram type +/- or unknown. Default: unknown">
131 <option value="+">Gram+</option>
132 <option value="-">Gram-</option>
133 <option value="?" selected="true">Unknown</option>
134 </param>
135 <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/> 127 <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/>
136 <param argument="--replicons" type="data" format="tsv,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/> 128 <param argument="--replicons" type="data" format="tabular,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/>
137 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/> 129 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/>
138 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/> 130 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/>
139 </section> 131 </section>
140 <!-- PARAMETER FOR WORKFLOW ANALYSIS --> 132 <!-- PARAMETER FOR WORKFLOW ANALYSIS -->
141 <section name="workflow" title="Workflow option to skip steps"> 133 <section name="workflow" title="Workflow option to skip steps">
213 <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter> 205 <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter>
214 </data> 206 </data>
215 </outputs> 207 </outputs>
216 <tests> 208 <tests>
217 <test expect_num_outputs="13"> <!-- TEST_1 database + input --> 209 <test expect_num_outputs="13"> <!-- TEST_1 database + input -->
218 <section name="input_option" > 210 <section name="input_option" >
219 <param name="bakta_db_select" value="V0.1_2022-08-29"/> 211 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
220 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> 212 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
221 <param name="input_file" value="NC_002127.1.fna"/> 213 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/>
222 <param name="min_contig_length" value="250"/> 214 <param name="min_contig_length" value="250"/>
223 </section> 215 </section>
224 <section name="output_files"> 216 <section name="output_files">
225 <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/> 217 <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/>
226 </section> 218 </section>
227 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"/> 219 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"/>
228 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"/> 220 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"/>
229 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="8"/> 221 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="8"/>
230 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="6"/> 222 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="6"/>
231 <output name="annotation_fna" value="TEST_1/TEST_1.fna"/> 223 <output name="annotation_fna" value="TEST_1/TEST_1.fna"/>
232 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/> 224 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/>
233 <output name="annotation_faa" value="TEST_1/TEST_1.faa"/> 225 <output name="annotation_faa" value="TEST_1/TEST_1.faa"/>
234 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv" lines_diff="4"/> 226 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv" lines_diff="4"/>
235 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/> 227 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/>
236 <output name="summary_txt" value="TEST_1/TEST_1.txt" lines_diff="4"/> 228 <output name="summary_txt" value="TEST_1/TEST_1.txt" lines_diff="4"/>
237 <output name="annotation_plot"> 229 <output name="annotation_plot">
238 <assert_contents> 230 <assert_contents>
239 <has_size value="418991" delta="1000"/> 231 <has_size value="418991" delta="1000"/>
240 </assert_contents> 232 </assert_contents>
241 </output> 233 </output>
242 234 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="6"/>
243 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="6"/> 235 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="6"/>
244 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="6"/> 236 </test>
245 </test> 237 <test expect_num_outputs="4"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps -->
246 <test expect_num_outputs="4"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps --> 238 <section name="input_option" >
247 <section name="input_option" > 239 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
248 <param name="bakta_db_select" value="V0.1_2022-08-29"/> 240 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
249 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> 241 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/>
250 <param name="input_file" value="NC_002127.1.fna"/> 242 <param name="min_contig_length" value="250"/>
251 <param name="min_contig_length" value="250"/> 243 </section>
252 </section> 244 <section name="organism">
253 <section name="organism"> 245 <param name="genus" value="Escherichia"/>
254 <param name="genus" value="Escherichia"/> 246 <param name="species" value="coli O157:H7"/>
255 <param name="species" value="coli O157:H7"/> 247 <param name="strain" value="Sakai"/>
256 <param name="strain" value="Sakai"/> 248 <param name="plasmid" value="pOSAK1"/>
257 <param name="plasmid" value="pOSAK1"/> 249 </section>
258 </section> 250 <section name="annotation">
259 <section name="annotation"> 251 <param name="keep_contig_headers" value="true"/>
260 <param name="--gram" value="-"/> 252 </section>
261 <param name="keep_contig_headers" value="true"/> 253 <section name="workflow">
262 </section> 254 <param name="skip_analysis" value="--skip-trna,--skip-tmrna"/>
263 <section name="workflow"> 255 </section>
264 <param name="skip_analysis" value="--skip-trna,--skip-tmrna"/> 256 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="4">
265 </section> 257 <assert_contents>
266 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="4"> 258 <has_text_matching expression="IHHALP_00005"/>
267 <assert_contents> 259 </assert_contents>
268 <has_text_matching expression="IHHALP_00005"/> 260 </output>
269 </assert_contents> 261 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="4">
270 </output> 262 <assert_contents>
271 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="4"> 263 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/>
272 <assert_contents> 264 </assert_contents>
273 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/> 265 </output>
274 </assert_contents> 266 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/>
275 </output> 267 <output name="annotation_plot">
276 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/> 268 <assert_contents>
269 <has_size value="418991" delta="1000"/>
270 </assert_contents>
271 </output>
272 </test>
273 <test expect_num_outputs="4"> <!-- TEST_3 test all skip steps -->
274 <section name="input_option" >
275 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
276 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
277 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/>
278 <param name="min_contig_length" value="350"/>
279 </section>
280 <section name="workflow">
281 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
282 </section>
283 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="4"/>
284 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="4"/>
285 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
286 <output name="annotation_plot">
287 <assert_contents>
288 <has_size value="418399" delta="1000"/>
289 </assert_contents>
290 </output>
291 </test>
292 <test expect_num_outputs="4"> <!-- TEST_4 annotations -->
293 <section name="input_option" >
294 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
295 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
296 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/>
297 </section>
298 <section name="annotation">
299 <param name="complete" value="true"/>
300 <param name="prodigal" value="prodigal.tf"/>
301 <param name="translation_table" value="4"/>
302 <param name="replicons" value="replicons.tsv" ftype="tabular"/>
303 <param name="compliant" value="true"/>
304 <param name="proteins" value="user-proteins.faa" ftype="fasta"/>
305 </section>
306 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="4"/>
307 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="4"/>
308 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
277 <output name="annotation_plot"> 309 <output name="annotation_plot">
278 <assert_contents> 310 <assert_contents>
279 <has_size value="418991" delta="1000"/> 311 <has_size value="418399" delta="1000"/>
280 </assert_contents> 312 </assert_contents>
281 </output> 313 </output>
282 </test> 314 </test>
283 <test expect_num_outputs="4"> <!-- TEST_3 test all skip steps --> 315 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary -->
284 <section name="input_option" > 316 <section name="input_option" >
285 <param name="bakta_db_select" value="V0.1_2022-08-29"/> 317 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
286 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> 318 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
287 <param name="input_file" value="NC_002127.1.fna"/> 319 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/>
288 <param name="min_contig_length" value="350"/> 320 </section>
289 </section> 321 <section name="annotation">
290 <section name="workflow"> 322 <param name="complete" value="true"/>
291 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/> 323 <param name="translation_table" value="4"/>
292 </section> 324 </section>
293 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="4"/> 325 <section name="workflow">
294 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="4"/> 326 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
295 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/> 327 </section>
296 <output name="annotation_plot"> 328 <section name="output_files">
297 <assert_contents> 329 <param name="output_selection" value="log_txt,sum_txt"/>
298 <has_size value="418399" delta="1000"/> 330 </section>
299 </assert_contents> 331 <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="6"/>
300 </output> 332 <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/>
301 </test> 333 </test>
302 <test expect_num_outputs="4"> <!-- TEST_4 annotations -->
303 <section name="input_option" >
304 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
305 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
306 <param name="input_file" value="NC_002127.1.fna"/>
307 </section>
308 <section name="annotation">
309 <param name="complete" value="true"/>
310 <param name="prodigal" value="prodigal.tf"/>
311 <param name="translation_table" value="4"/>
312 <param name="replicons" value="replicons.tsv"/>
313 <param name="compliant" value="true"/>
314 <param name="proteins" value="user-proteins.faa"/>
315 </section>
316 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="4"/>
317 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="4"/>
318 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
319 <output name="annotation_plot">
320 <assert_contents>
321 <has_size value="418399" delta="1000"/>
322 </assert_contents>
323 </output>
324 </test>
325 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary -->
326 <section name="input_option" >
327 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
328 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
329 <param name="input_file" value="NC_002127.1.fna"/>
330 </section>
331 <section name="annotation">
332 <param name="complete" value="true"/>
333 <param name="translation_table" value="4"/>
334 </section>
335 <section name="workflow">
336 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
337 </section>
338 <section name="output_files">
339 <param name="output_selection" value="log_txt,sum_txt"/>
340 </section>
341 <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="6"/>
342 <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/>
343 </test>
344 </tests> 334 </tests>
345 <help><![CDATA[**What it does** 335 <help><![CDATA[**What it does**
346 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs. 336 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs.
347 337
348 *Comprehensive & taxonomy-independent database* 338 *Comprehensive & taxonomy-independent database*
388 378
389 **Annotation options** 379 **Annotation options**
390 1. You can specify if all sequences (chromosome or plasmids) are complete or not 380 1. You can specify if all sequences (chromosome or plasmids) are complete or not
391 2. You can add your own prodigal training file for CDS predictionœ 381 2. You can add your own prodigal training file for CDS predictionœ
392 3. The translation table could be modified, default is the 11th for bacteria 382 3. The translation table could be modified, default is the 11th for bacteria
393 4. You can specify if bacteria is gram -/+ or unknonw (default value unknow) 383 4. You can specify if bacteria is gram -/+ or unknonw (default value is unknow)
394 5. You can keep the name of contig present in the input file 384 5. You can keep the name of contig present in the input file
395 6. You can specify your own replicon table as a TSV/CSV file 385 6. You can specify your own replicon table as a TSV/CSV file
396 7. The compliance option is for ready to submit annotation file to Public database 386 7. The compliance option is for ready to submit annotation file to Public database
397 as ENA, Genbank EMBL 387 as ENA, Genbank EMBL
398 8. You can specify a protein sequence file for annotation in GenBank or fasta formats 388 8. You can specify a protein sequence file for annotation in GenBank or fasta formats