comparison ectyper.xml @ 2:e79a8dad83b4 draft

"planemo upload for repository https://github.com/phac-nml/ecoli_serotyping commit 42aaf40ea7e6658cf115edd8f2014ed0db9dbe38"
author nml
date Tue, 08 Oct 2019 15:48:45 -0400
parents b02c775b27c8
children fb3683870b74
comparison
equal deleted inserted replaced
1:b02c775b27c8 2:e79a8dad83b4
1 <tool id="ectyper" name="ectyper" version="0.8.1"> 1 <tool id="ectyper" name="ectyper" version="0.9.0">
2 <description>ectyper is a standalone serotyping module for Escherichia coli. It supports fasta and fastq file formats.</description> 2 <description>ectyper is a standalone serotyping module for Escherichia coli. It supports fasta and fastq file formats.</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.8.1">ectyper</requirement> 4 <requirement type="package" version="0.9.0">ectyper</requirement>
5 </requirements> 5 </requirements>
6 <command detect_errors="exit_code"> 6 <command detect_errors="exit_code">
7 <![CDATA[ 7 <![CDATA[
8 #set $genomes = '' 8 #set $genomes = ''
9 #if hasattr($input, '__iter__') 9 #if hasattr($input, '__iter__')
10 #for $i in $input 10 #for $i in $input
11 ln -s "${i}" "${i.name}" && 11 ln -s "${i}" "${i.element_identifier}" &&
12 #if len($genomes) > 0 12 #if len($genomes) > 0
13 #set $genomes = $genomes + ',' + str($i.name) 13 #set $genomes = $genomes + ',' + str($i.element_identifier)
14 #else 14 #else
15 #set $genomes = str($i.name) 15 #set $genomes = str($i.element_identifier)
16 #end if 16 #end if
17 #end for 17 #end for
18 #else 18 #else
19 ln -s "${input}" "${input.name}" && 19 ln -s "${input}" "${input.element_identifier}" &&
20 #set $genomes = $input.name 20 #set $genomes = $input.element_identifier
21 #end if 21 #end if
22 22
23 #if $mash_input 23 #if $mash_input
24 ln -s "${mash_input}" mash_sketch.msh && 24 ln -s "${mash_input}" mash_sketch.msh &&
25 #end if 25 #end if
26 26
27 ectyper --cores \${GALAXY_SLOTS:-4} 27 ectyper --cores \${GALAXY_SLOTS:-4}
28 --input "${genomes}" 28 --input "${genomes}"
29 --percentIdentity '$adv_param.min_percentIdentity' 29 --percentIdentity '$adv_param.min_percentIdentity'
30 --percentLength '$adv_param.percentLength' 30 --percentLength '$adv_param.percentLength'
31 #if $adv_param.verifyEcoli 31 #if $adv_param.verifyEcoli
39 #end if 39 #end if
40 --output '.' 40 --output '.'
41 ]]> 41 ]]>
42 </command> 42 </command>
43 <inputs> 43 <inputs>
44 <param name="input" type="data" format="fastq,fasta" label="Genome(s) input(s)" help="FASTA or FASTQ file(s) with contig(s)"/> 44 <param name="input" type="data" format="fastq,fasta" label="Genome(s) input(s)" help="FASTA or FASTQ file(s)"/>
45 <param name="mash_input" type="data" optional="true" format="binary" label="Mash genome sketches (Optional)" help="Optionally provide MASH sketches to find closest genome (in case O/H typing fails)"/> 45 <param name="mash_input" type="data" optional="true" format="binary" label="Mash genome sketches (Optional)" help="Optionally provide custom MASH genome sketch to help with species identification (otherwise default RefSeq sketch is used)"/>
46 <section name="adv_param" title="Advanced parameters" expanded="False"> 46 <section name="adv_param" title="Advanced parameters" expanded="False">
47 <param name="min_percentIdentity" type="integer" value="90" min="1" max="100"/> 47 <param name="min_percentIdentity" type="integer" value="90" min="1" max="100"/>
48 <param name="percentLength" type="integer" value="50" min="1" max="100"/> 48 <param name="percentLength" type="integer" value="10" min="1" max="100"/>
49 <param name="verifyEcoli" type="boolean" checked="true" label="Enable E. coli species verification"/> 49 <param name="verifyEcoli" type="boolean" checked="true" label="Enable E. coli species verification"/>
50 <param name="alleleSequence" type="boolean" checked="false" label="Print the allele sequences as the final columns of the output?"/> 50 <param name="alleleSequence" type="boolean" checked="false" label="Print the allele sequences as the final columns of the output?"/>
51 <param name="logging" type="boolean" checked="false" label="Include log file in the run outputs?" /> 51 <param name="logging" type="boolean" checked="false" label="Include log file in the run outputs?" />
52 </section> 52 </section>
53 </inputs> 53 </inputs>
54 <outputs> 54 <outputs>
55 <data name="output_result" format="tabular" from_work_dir="output.tsv" label="${tool.name} serotype report"> </data> 55 <data name="output_result" format="tabular" from_work_dir="output.tsv" label="${tool.name} serotype report on ${input.element_identifier}"> </data>
56 <data name="output_log" format="text" from_work_dir="ectyper.log" label="${tool.name} log file"> 56 <data name="output_log" format="txt" from_work_dir="ectyper.log" label="${tool.name} log file on ${input.element_identifier}">
57 <filter>adv_param['logging']==True</filter> 57 <filter>adv_param['logging']==True</filter>
58 </data> 58 </data>
59 </outputs> 59 </outputs>
60 <tests> 60 <tests>
61 <test> 61 <test>
73 </tests> 73 </tests>
74 74
75 <help> 75 <help>
76 **Syntax** 76 **Syntax**
77 77
78 This tool identifies the serotype of Escherichia coli genome sequences based on a set of *wzm/wzt*, *wzx/wzy* and *fliC/flkA/flmA* alleles corresponding to O and H antigens, respectively. 78
79 This tool identifies the serotype of assembled or assembly-free Escherichia coli genome sample based on a set of either *wzm/wzt* or *wzx/wzy* and *fliC/flkA/flmA* alleles corresponding to O and H antigens, respectively.
80 The non-E.coli genomes and other Escherichia genus species are successfully identified and well handled. The 0.9.0 version improves tool sensitivy when target alleles are truncated or
81 poorly covered by raw reads.
82
79 83
80 For more information please visit https://github.com/phac-nml/ecoli_serotyping. 84 For more information please visit https://github.com/phac-nml/ecoli_serotyping.
81 85
82 ----- 86 -----
83 87
84 **Input:** 88 **Input:**
85 89
86 Accepts a variety of inputs including single or multiple FASTQ and/or FASTA file(s). Inputs might contain pure raw reads, but for more accurate results draft assemblies are recommended. 90 Accepts a variety of inputs including both single and/or multiple FASTQ and/or FASTA file(s). Inputs might contain pure raw reads, but for more accurate results, draft assemblies are recommended.
87 91
88 Optionally select a MASH RefSeq genome sketch (version 2.0 and above) for cases when O/H typing would fail. Download RefSeq genome sketch containing 91,283 genomes with 1000 hashes each directly from https://gembox.cbcb.umd.edu/mash/refseq.genomes.k21s1000.msh . 92 The default MASH RefSeq genome sketch is included and updated every 6 months, but one can supply custom sketch file for species identification.
93
94 One can download RefSeq genome sketch containing approximately 91,283 genomes from https://gembox.cbcb.umd.edu/mash/refseq.genomes.k21s1000.msh.
95
89 96
90 **Output:** 97 **Output:**
91 98
92 Tab-delimited report listing identified O and H antigens together with corresponding highest scoring alleles and normalized BLAST score defined as (%identity x query coverage length) / 10000 99 Tab-delimited report listing identified O and H antigens together with corresponding highest scoring alleles and normalized BLAST score defined as (%identity x query coverage length) / 10000
93 100