changeset 2:e79a8dad83b4 draft

"planemo upload for repository https://github.com/phac-nml/ecoli_serotyping commit 42aaf40ea7e6658cf115edd8f2014ed0db9dbe38"
author nml
date Tue, 08 Oct 2019 15:48:45 -0400
parents b02c775b27c8
children fb3683870b74
files README.md ectyper.xml
diffstat 2 files changed, 26 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/README.md	Thu Jan 31 09:39:04 2019 -0500
+++ b/README.md	Tue Oct 08 15:48:45 2019 -0400
@@ -1,4 +1,4 @@
-# EC-Typer - in silico serotyping of *Escherichia coli* species
-EC-Typer is a standalone serotyping module for *Escherichia coli* typing. It supports fasta and fastq file formats. Designed by Dr. Chad Liang et al. Wrapped into Galaxy wrapper by Dr. Kyrylo Bessonov
+# EC-Typer - in silico serotyping of *Escherichia coli*  and other cryptic *Escherichia* species
+EC-Typer is a standalone serotyping module for *Escherichia coli* typing with automatic non-E.coli species identification. It supports fasta and fastq file formats (i.e. assembled and raw reads). Designed by Dr. Chad Liang et al. Maintained by Dr. Kyrylo Bessonov. This wrapper is designed and produced by Dr. Kyrylo Bessonov
 
 For more information please refer to https://github.com/phac-nml/ecoli_serotyping
--- a/ectyper.xml	Thu Jan 31 09:39:04 2019 -0500
+++ b/ectyper.xml	Tue Oct 08 15:48:45 2019 -0400
@@ -1,29 +1,29 @@
-<tool id="ectyper" name="ectyper" version="0.8.1">
+<tool id="ectyper" name="ectyper" version="0.9.0">
   <description>ectyper is a standalone serotyping module for Escherichia coli. It supports fasta and fastq file formats.</description>
   <requirements>
-     <requirement type="package" version="0.8.1">ectyper</requirement>
+     <requirement type="package" version="0.9.0">ectyper</requirement>
   </requirements>
   <command detect_errors="exit_code">
   <![CDATA[
   #set $genomes = ''
   #if hasattr($input, '__iter__')
     #for $i in $input
-        ln -s "${i}" "${i.name}" &&
+        ln -s "${i}" "${i.element_identifier}" &&
         #if len($genomes) > 0
-          #set $genomes = $genomes + ',' + str($i.name)
+          #set $genomes = $genomes + ',' + str($i.element_identifier)
         #else
-          #set $genomes = str($i.name)
+          #set $genomes = str($i.element_identifier)
         #end if      
     #end for
   #else
-    ln -s "${input}" "${input.name}" &&
-    #set $genomes = $input.name
+    ln -s "${input}" "${input.element_identifier}" &&
+    #set $genomes = $input.element_identifier
   #end if
 
   #if $mash_input
     ln -s "${mash_input}" mash_sketch.msh &&
   #end if
- 
+
   ectyper  --cores \${GALAXY_SLOTS:-4} 
   --input "${genomes}" 
   --percentIdentity '$adv_param.min_percentIdentity'
@@ -41,19 +41,19 @@
   ]]>
   </command>
   <inputs>
-    <param name="input" type="data"  format="fastq,fasta" label="Genome(s) input(s)" help="FASTA or FASTQ file(s) with contig(s)"/>
-    <param name="mash_input" type="data" optional="true" format="binary" label="Mash genome sketches (Optional)" help="Optionally provide MASH sketches to find closest genome (in case O/H typing fails)"/>
+    <param name="input" type="data"  format="fastq,fasta" label="Genome(s) input(s)" help="FASTA or FASTQ file(s)"/>
+    <param name="mash_input" type="data" optional="true" format="binary" label="Mash genome sketches (Optional)" help="Optionally provide custom MASH genome sketch to help with species identification (otherwise default RefSeq sketch is used)"/>
     <section name="adv_param" title="Advanced parameters" expanded="False">
       <param name="min_percentIdentity" type="integer" value="90" min="1" max="100"/>
-      <param name="percentLength" type="integer" value="50" min="1" max="100"/>
+      <param name="percentLength" type="integer" value="10" min="1" max="100"/>
       <param name="verifyEcoli" type="boolean" checked="true" label="Enable E. coli species verification"/>
       <param name="alleleSequence" type="boolean" checked="false" label="Print the allele sequences as the final columns of the output?"/> 
       <param name="logging" type="boolean" checked="false"  label="Include log file in the run outputs?" />
     </section>  
   </inputs>
   <outputs>
-    <data name="output_result" format="tabular" from_work_dir="output.tsv" label="${tool.name} serotype report"> </data> 
-    <data name="output_log" format="text" from_work_dir="ectyper.log" label="${tool.name} log file"> 
+    <data name="output_result" format="tabular" from_work_dir="output.tsv" label="${tool.name} serotype report on ${input.element_identifier}"> </data>
+    <data name="output_log" format="txt" from_work_dir="ectyper.log" label="${tool.name} log file on ${input.element_identifier}">
         <filter>adv_param['logging']==True</filter>
     </data>   
   </outputs>
@@ -75,7 +75,11 @@
   <help>
 **Syntax**
 
-This tool identifies the serotype of Escherichia coli genome sequences based on a set of *wzm/wzt*, *wzx/wzy* and *fliC/flkA/flmA* alleles corresponding to O and H antigens, respectively. 
+
+This tool identifies the serotype of assembled or assembly-free Escherichia coli genome sample based on a set of either *wzm/wzt* or *wzx/wzy* and *fliC/flkA/flmA* alleles corresponding to O and H antigens, respectively.
+The non-E.coli genomes and other Escherichia genus species are successfully identified and well handled. The 0.9.0 version improves tool sensitivy when target alleles are truncated or
+poorly covered by raw reads.
+
 
 For more information please visit https://github.com/phac-nml/ecoli_serotyping. 
 
@@ -83,9 +87,12 @@
 
 **Input:**
 
-Accepts a variety of inputs including single or multiple FASTQ and/or FASTA file(s). Inputs might contain pure raw reads, but for more accurate results draft assemblies are recommended. 
+Accepts a variety of inputs including both single and/or multiple FASTQ and/or FASTA file(s). Inputs might contain pure raw reads, but for more accurate results, draft assemblies are recommended.
 
-Optionally select a MASH RefSeq genome sketch (version 2.0 and above) for cases when O/H typing would fail. Download RefSeq genome sketch containing 91,283 genomes with 1000 hashes each directly from https://gembox.cbcb.umd.edu/mash/refseq.genomes.k21s1000.msh . 
+The default MASH RefSeq genome sketch is included and updated every 6 months, but one can supply custom sketch file for species identification.
+
+One can download RefSeq genome sketch containing approximately 91,283 genomes from https://gembox.cbcb.umd.edu/mash/refseq.genomes.k21s1000.msh.
+
 
 **Output:**
 
@@ -110,4 +117,4 @@
   url = {https://github.com/phac-nml/ecoli_serotyping}
     }</citation>
 </citations>
-</tool>
\ No newline at end of file
+</tool>