Mercurial > repos > nml > ectyper
view ectyper.xml @ 6:9cd096bee567 draft default tip
planemo upload for repository https://github.com/phac-nml/galaxy_tools/tree/master/tools/ectyper commit 20e805e46acfc9171da5e3281f3248295b6f8018
author | nml |
---|---|
date | Mon, 30 Dec 2024 17:58:28 +0000 |
parents | daba54cd25ca |
children |
line wrap: on
line source
<tool id="ectyper" name="ectyper" version="@VERSION@+galaxy0"> <description>ectyper is a standalone serotyping module for Escherichia coli. It supports fasta and fastq file formats.</description> <macros> <token name="@VERSION@">2.0.0</token> </macros> <xrefs> <xref type="bio.tools">ectyper</xref> </xrefs> <requirements> <requirement type="package" version="@VERSION@">ectyper</requirement> </requirements> <command detect_errors="exit_code"> <![CDATA[ #set $genomes = '' #if hasattr($input, '__iter__') #for $i in $input ln -s "${i}" "${i.element_identifier}" && #if len($genomes) > 0 #set $genomes = $genomes + ',' + str($i.element_identifier) #else #set $genomes = str($i.element_identifier) #end if #end for #else ln -s "${input}" "${input.element_identifier}" && #set $genomes = $input.element_identifier #end if #if $adv_param.mash_input ln -s "${adv_param.mash_input}" mash_sketch.msh && #end if #if $adv_param.db_input ln -s "${adv_param.db_input}" custom_db.json && #end if ectyper --cores \${GALAXY_SLOTS:-4} --input "${genomes}" -opid '$adv_param.opid' -opcov '$adv_param.opcov' -hpid '$adv_param.hpid' -hpcov '$adv_param.hpcov' #if $adv_param.verifyEcoli --verify #end if #if $adv_param.mash_input --refseq mash_sketch.msh #end if #if $adv_param.pathotype --pathotype -pathpid $adv_param.pathotype_adv_param.pathpid -pathpcov $adv_param.pathotype_adv_param.pathpcov #end if #if $adv_param.db_input --dbpath custom_db.json #end if #if $adv_param.longreadsmode --longreads #end if #if $adv_param.debugmode --debug #end if --output '.' ]]> </command> <inputs> <param name="input" type="data" format="fastq,fasta" label="Genome(s) input(s)" help="FASTA or FASTQ file(s)"/> <section name="adv_param" title="Advanced parameters" expanded="False"> <param name="opid" label="O antigen minimum %identity" type="integer" value="95" min="1" max="100"/> <param name="opcov" label="O antigen minimum %coverage" type="integer" value="90" min="1" max="100"/> <param name="hpid" label="H antigen minimum %identity" type="integer" value="95" min="1" max="100"/> <param name="hpcov" label="H antigen minimum %coverage" type="integer" value="50" min="1" max="100"/> <param name="verifyEcoli" type="boolean" checked="false" label="Enable E. coli species and results QC verification"/> <param name="blastresults" type="boolean" checked="false" label="Include BLAST allele alignment results tab-delim file in the outputs?" /> <param name="logging" type="boolean" checked="false" label="Include log file in the run outputs?" /> <param name="mash_input" type="data" optional="true" format="binary" label="Mash genome sketches (Optional)" help="Optionally provide custom MASH genome sketch to help with species identification (otherwise default RefSeq sketch is used)"/> <param name="pathotype" type="boolean" checked="true" label="Enable E.coli pathotyping and Shiga toxin (stx) subtyping (Recommended)"/> <section name="pathotype_adv_param" title="Advanced parameters (pathoyping and shiga toxin subtyping)" expanded="False"> <param name="pathpid" label="Minimum %identity required for a pathotype/toxin database reference allele match" type="integer" value="90" min="1" max="100"/> <param name="pathpcov" label="Minimum %coverage required for a pathotype/toxin database reference allele match" type="integer" value="50" min="1" max="100"/> </section> <param name="longreadsmode" type="boolean" checked="false" label="Enable long read mapping mode (Optional)" help="Enable this mode if no typing results are obtained"/> <param name="db_input" type="data" optional="true" format="json" label="Custom database of O and H antigen alleles (Optional)" help="Optionally provide custom database of alleles in JSON format"/> <param name="debugmode" type="boolean" checked="false" label="Enable debug mode with high log messages verbosity"/> </section> </inputs> <outputs> <data name="output_result" format="tabular" from_work_dir="output.tsv" label="${tool.name} serotype report on ${input.element_identifier}"> </data> <data name="output_log" format="txt" from_work_dir="ectyper.log" label="${tool.name} log file on ${input.element_identifier}"> <filter>adv_param['logging']==True</filter> </data> <data name="output_blast" format="tabular" from_work_dir="blast_output_alleles.txt" label="${tool.name} BLAST results file on ${input.element_identifier}"> <filter>adv_param['blastresults']==True</filter> </data> </outputs> <tests> <test expect_num_outputs="1"> <param name="input" value="Escherichia2.fastq" ftype="fastq"/> <output name="output_result" ftype="tabular" > <assert_contents> <has_text_matching expression="O22"/> <has_text text="H8"/> </assert_contents> </output> </test> <test expect_num_outputs="1"> <param name="input" value="Escherichia1.fasta" ftype="fasta"/> <output name="output_result" ftype="tabular"> <assert_contents> <has_text text="O103:H2"/> <has_text text="EHEC-STEC"/> <has_text text="stx1a;stx2a"/> <has_text text="eae,ehxA,hlyE,stx1,stx2"/> </assert_contents> </output> </test> </tests> <help> **Syntax** This tool identifies the serotype of both assembled or assembly-free Escherichia coli genome samples based on a set of the key O and H antigen determinant genes including *wzm/wzt* or *wzx/wzy* and *fliC/flkA/flmA*. Unique to the tool, species identification module allows for non-E.coli genomes identification including other Escherichia genus species. This version improves antigen call rates on "difficult samples" by use of an adaptive threshold. This is especially useful when antigen genes are truncated or poorly covered by raw reads. If no antigen call is being predicted by the tool, try to lower %coverage parameter first. For more information on the new Quality Control module and running parameter details please visit https://github.com/phac-nml/ecoli_serotyping. ----- **Input:** Accepts a variety of inputs including both single and/or multiple FASTQ and/or FASTA file(s). Inputs might contain pure raw reads, but for more accurate results, draft assemblies are recommended. The default MASH RefSeq genome sketch (https://gembox.cbcb.umd.edu/mash/refseq.genomes.k21s1000.msh) containing approximately 91K genomes is included and automatically updated every 6 months. **Output:** Tab-delimited report listing identified O and H antigens together with corresponding the highest-scoring alleles and normalized BLAST score defined as (%identity x %coverage) / 1e4. If *verifyEcoli* parameter is enabled, final report will contain allele quality control information on results for reporting purposes. PASS (REPORTABLE) QC flag means that O and H antigen calls are of sufficient to unambiguously resolve them from all other antigens. ----- **Parameters (Optional):** - **Enable E. coli species verification:** for species verification in case samples are of non-E.coli origin - **Include BLAST allele alignment results tab-delim file in the outputs?** Get reference allele sequences and detailed BLAST output - **Include log file in the run outputs?:** Get optional logs of the ectyper run for a more detailed results assessment and troubleshooting </help> <citations> <citation type="bibtex"> @misc{githubectyper, author = {Laing Chad}, title = {ECtyper - serotyping module for Escherichia coli}, publisher = {GitHub}, journal = {GitHub repository}, url = {https://github.com/phac-nml/ecoli_serotyping} }</citation> </citations> </tool>