comparison ectyper.xml @ 4:08d801182fa1 draft default tip

"planemo upload for repository https://github.com/phac-nml/ecoli_serotyping commit 6615f6e5ae2eac1f8e90f25e1707c8b7ab161517"
author nml
date Fri, 29 May 2020 13:09:54 -0400
parents fb3683870b74
children
comparison
equal deleted inserted replaced
3:fb3683870b74 4:08d801182fa1
1 <tool id="ectyper" name="ectyper" version="0.9.1"> 1 <tool id="ectyper" name="ectyper" version="1.0.0">
2 <description>ectyper is a standalone serotyping module for Escherichia coli. It supports fasta and fastq file formats.</description> 2 <description>ectyper is a standalone serotyping module for Escherichia coli. It supports fasta and fastq file formats.</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.9.1">ectyper</requirement> 4 <requirement type="package" version="1.0.0">ectyper</requirement>
5 </requirements> 5 </requirements>
6 <command detect_errors="exit_code"> 6 <command detect_errors="exit_code">
7 <![CDATA[ 7 <![CDATA[
8 #set $genomes = '' 8 #set $genomes = ''
9 #if hasattr($input, '__iter__') 9 #if hasattr($input, '__iter__')
18 #else 18 #else
19 ln -s "${input}" "${input.element_identifier}" && 19 ln -s "${input}" "${input.element_identifier}" &&
20 #set $genomes = $input.element_identifier 20 #set $genomes = $input.element_identifier
21 #end if 21 #end if
22 22
23 #if $mash_input 23 #if $adv_param.mash_input
24 ln -s "${mash_input}" mash_sketch.msh && 24 ln -s "${adv_param.mash_input}" mash_sketch.msh &&
25 #end if 25 #end if
26
27
28 #if $adv_param.db_input
29 ln -s "${adv_param.db_input}" custom_db.json &&
30 #end if
31
26 32
27 ectyper --cores \${GALAXY_SLOTS:-4} 33 ectyper --cores \${GALAXY_SLOTS:-4}
28 --input "${genomes}" 34 --input "${genomes}"
29 --percentIdentity '$adv_param.min_percentIdentity' 35 -opid '$adv_param.opid'
30 --percentLength '$adv_param.percentLength' 36 -opcov '$adv_param.opcov'
37 -hpid '$adv_param.hpid'
38 -hpcov '$adv_param.hpcov'
39
31 #if $adv_param.verifyEcoli 40 #if $adv_param.verifyEcoli
32 --verify 41 --verify
33 #end if 42 #end if
34 #if $mash_input 43
44 #if $adv_param.mash_input
35 --refseq mash_sketch.msh 45 --refseq mash_sketch.msh
36 #end if
37 #if $adv_param.alleleSequence
38 --sequence
39 #end if 46 #end if
47
48 #if $adv_param.db_input
49 --dbpath custom_db.json
50 #end if
51
40 --output '.' 52 --output '.'
41 ]]> 53 ]]>
42 </command> 54 </command>
43 <inputs> 55 <inputs>
44 <param name="input" type="data" format="fastq,fasta" label="Genome(s) input(s)" help="FASTA or FASTQ file(s)"/> 56 <param name="input" type="data" format="fastq,fasta" label="Genome(s) input(s)" help="FASTA or FASTQ file(s)"/>
45 <param name="mash_input" type="data" optional="true" format="binary" label="Mash genome sketches (Optional)" help="Optionally provide custom MASH genome sketch to help with species identification (otherwise default RefSeq sketch is used)"/>
46 <section name="adv_param" title="Advanced parameters" expanded="False"> 57 <section name="adv_param" title="Advanced parameters" expanded="False">
47 <param name="min_percentIdentity" type="integer" value="90" min="1" max="100"/> 58 <param name="opid" label="O antigen minimum %identity" type="integer" value="90" min="1" max="100"/>
48 <param name="percentLength" type="integer" value="10" min="1" max="100"/> 59 <param name="opcov" label="O antigen minimum %coverage" type="integer" value="90" min="1" max="100"/>
60 <param name="hpid" label="H antigen minimum %identity" type="integer" value="95" min="1" max="100"/>
61 <param name="hpcov" label="H antigen minimum %coverage" type="integer" value="50" min="1" max="100"/>
49 <param name="verifyEcoli" type="boolean" checked="true" label="Enable E. coli species verification"/> 62 <param name="verifyEcoli" type="boolean" checked="true" label="Enable E. coli species verification"/>
50 <param name="alleleSequence" type="boolean" checked="false" label="Print the allele sequences as the final columns of the output?"/> 63 <param name="blastresults" type="boolean" checked="false" label="Include BLAST allele alignment results tab-delim file in the outputs?" />
51 <param name="logging" type="boolean" checked="false" label="Include log file in the run outputs?" /> 64 <param name="logging" type="boolean" checked="false" label="Include log file in the run outputs?" />
52 </section> 65 <param name="mash_input" type="data" optional="true" format="binary" label="Mash genome sketches (Optional)" help="Optionally provide custom MASH genome sketch to help with species identification (otherwise default RefSeq sketch is used)"/>
66 <param name="db_input" type="data" optional="true" format="json" label="Custom database of alleles (Optional)" help="Optionally provide custom database of alleles in JSON format"/>
67 </section>
53 </inputs> 68 </inputs>
54 <outputs> 69 <outputs>
55 <data name="output_result" format="tabular" from_work_dir="output.tsv" label="${tool.name} serotype report on ${input.element_identifier}"> </data> 70 <data name="output_result" format="tabular" from_work_dir="output.tsv" label="${tool.name} serotype report on ${input.element_identifier}"> </data>
56 <data name="output_log" format="txt" from_work_dir="ectyper.log" label="${tool.name} log file on ${input.element_identifier}"> 71 <data name="output_log" format="txt" from_work_dir="ectyper.log" label="${tool.name} log file on ${input.element_identifier}">
57 <filter>adv_param['logging']==True</filter> 72 <filter>adv_param['logging']==True</filter>
58 </data> 73 </data>
74 <data name="output_blast" format="tabular" from_work_dir="blast_output_alleles.txt" label="${tool.name} BLAST results file on ${input.element_identifier}">
75 <filter>adv_param['blastresults']==True</filter>
76 </data>
59 </outputs> 77 </outputs>
60 <tests> 78 <tests>
61 <test> 79 <test>
62 <param name="input" value="Escherichia2.fastq"/> 80 <param name="input" value="Escherichia2.fastq"/>
63 <assert_stderr> 81 <assert_stderr>
74 92
75 <help> 93 <help>
76 **Syntax** 94 **Syntax**
77 95
78 96
79 This tool identifies the serotype of assembled or assembly-free Escherichia coli genome sample based on a set of either *wzm/wzt* or *wzx/wzy* and *fliC/flkA/flmA* alleles corresponding to O and H antigens, respectively. 97 This tool identifies the serotype of both assembled or assembly-free Escherichia coli genome samples based on a set of the key O and H antigen determinant genes including *wzm/wzt* or *wzx/wzy* and *fliC/flkA/flmA*.
80 The non-E.coli genomes and other Escherichia genus species are successfully identified and well handled. The 0.9.0 version improves tool sensitivy when target alleles are truncated or 98 Unique to the tool, species identification module allows for non-E.coli genomes identification including other Escherichia genus species.
81 poorly covered by raw reads. 99 This version improves antigen call rates on "difficult samples" by use of an adaptive threshold. This is especially useful when antigen genes are truncated or poorly covered by raw reads.
100 If no antigen call is being predicted by the tool, try to lower %coverage parameter first. For more information on the new Quality Control module and running parameter details please visit https://github.com/phac-nml/ecoli_serotyping.
82 101
83
84 For more information please visit https://github.com/phac-nml/ecoli_serotyping.
85 102
86 ----- 103 -----
87 104
88 **Input:** 105 **Input:**
89 106
90 Accepts a variety of inputs including both single and/or multiple FASTQ and/or FASTA file(s). Inputs might contain pure raw reads, but for more accurate results, draft assemblies are recommended. 107 Accepts a variety of inputs including both single and/or multiple FASTQ and/or FASTA file(s). Inputs might contain pure raw reads, but for more accurate results, draft assemblies are recommended.
91 108
92 The default MASH RefSeq genome sketch is included and updated every 6 months, but one can supply custom sketch file for species identification.
93 109
94 One can download RefSeq genome sketch containing approximately 91,283 genomes from https://gembox.cbcb.umd.edu/mash/refseq.genomes.k21s1000.msh. 110 The default MASH RefSeq genome sketch (https://gembox.cbcb.umd.edu/mash/refseq.genomes.k21s1000.msh) containing approximately 91K genomes is included and automatically updated every 6 months.
111
95 112
96 113
97 **Output:** 114 **Output:**
98 115
99 Tab-delimited report listing identified O and H antigens together with corresponding highest scoring alleles and normalized BLAST score defined as (%identity x query coverage length) / 10000 116 Tab-delimited report listing identified O and H antigens together with corresponding the highest-scoring alleles and normalized BLAST score defined as (%identity x %coverage) / 1e4.
117 If *verifyEcoli* parameter is enabled, final report will contain allele quality control information on results for reporting purposes. PASS (REPORTABLE) QC flag means that O and H antigen calls are of sufficient to unambiguously resolve them from all other antigens.
100 118
101 ----- 119 -----
102 120
103 **Parameters (Optional):** 121 **Parameters (Optional):**
104 122 - **Enable E. coli species verification:** for species verification in case samples are of non-E.coli origin
105 - **Print the allele sequences as the final columns of the output?** Turn ON/OFF addition of the actual O and H antigen allelic sequences in the report 123 - **Include BLAST allele alignment results tab-delim file in the outputs?** Get reference allele sequences and detailed BLAST output
106 - **Enable E. coli species verification:** Turn ON/OFF for more rigorous species verification (recommended) 124 - **Include log file in the run outputs?:** Get optional logs of the ectyper run for a more detailed results assessment and troubleshooting
107 - **Include log file in the run outputs?:** Turn ON/OFF optional output of the ectyper log file for a more detailed results assessment
108 125
109 </help> 126 </help>
110 <citations> 127 <citations>
111 <citation type="bibtex"> 128 <citation type="bibtex">
112 @misc{githubectyper, 129 @misc{githubectyper,