Mercurial > repos > iuc > seqsero2

diff seqsero2.xml @ 0:e13b5be2de4e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seqsero2 commit 2268c10b0cf71b1a1967ce84720137f01db24c3b
author: iuc
date: Tue, 07 Nov 2023 19:18:04 +0000
children: 5a9a6074d99b
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seqsero2.xml	Tue Nov 07 19:18:04 2023 +0000
@@ -0,0 +1,208 @@
+<tool id="seqsero2" name="SeqSero2" version="@TOOL_VERSION@+galaxy0" profile="21.05">
+    <description>Salmonella serotype prediction</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <version_command>SeqSero2_package.py -v</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+    #import re
+    #set $seqsero_t_value = ''
+    #set $seqsero_workflow_type = ''
+
+    #if str($input_type_cond.input_type) in ['single', 'paired', 'assembly', 'nanopore']:
+        #set read1 = $input_type_cond.read1
+
+        #if str($input_type_cond.input_type) in ['assembly', 'nanopore']:
+            #set ext = '.fasta'
+        #else:
+            #set ext = '.fastq'
+        #end if
+        #if $read1.ext.endswith('.gz'):
+            #set ext = $ext+'.gz'
+        #end if
+
+        #if str($input_type_cond.input_type) == 'paired':
+            #set read1_file = re.sub('[^\w\-\.]', '_', str($read1.element_identifier)) + '_forward' + $ext
+            #set read2 = $input_type_cond.read2
+            #set read2_file = re.sub('[^\w\-\.]', '_', str($read2.element_identifier)) + '_reverse' + $ext
+            ln -s '$read2' '$read2_file' &&
+        #else:
+            #set read1_file = re.sub('[^\w\-\.]', '_', str($read1.element_identifier)) + $ext
+        #end if
+
+        ln -s '$read1' '$read1_file' &&
+    #else:
+        #set read1 = $input_type_cond.input_collection.forward
+
+        #set ext = '.fastq'
+        #if $read1.ext.endswith('.gz'):
+            #set ext = $ext+'.gz'
+        #end if
+
+        #set read1_file = re.sub('[^\w\-\.]', '_', str($read1.element_identifier)) + $ext
+        ln -s '$read1' '$read1_file' &&
+
+        #set read2 = $input_type_cond.input_collection.reverse
+        #set read2_file = re.sub('[^\w\-\.]', '_', str($read2.element_identifier)) + $ext
+        ln -s '$read2' '$read2_file' &&
+
+    #end if
+
+    #if str($input_type_cond.input_type) in ['paired', 'collection']:
+        #set $seqsero_t_value = '2'
+    #elif str($input_type_cond.input_type) == 'assembly':
+        #set $seqsero_t_value = '4'
+    #elif str($input_type_cond.input_type) == 'single':
+        #set $seqsero_t_value = '3'
+    #elif str($input_type_cond.input_type) == 'nanopore':
+        #set $seqsero_t_value = '5'
+    #end if
+
+    #if str($input_type_cond.input_type) in ['assembly', 'nanopore']:
+        #set $seqsero_workflow_type = 'k'
+    #else:
+        #set $seqsero_workflow_type = $input_type_cond.workflow
+    #end if
+
+    SeqSero2_package.py 
+    -m '$seqsero_workflow_type'
+    -t '$seqsero_t_value'
+    -i '$read1_file'
+    #if str($input_type_cond.input_type) in ['paired', 'collection']:
+        '$read2_file'
+    #end if
+    -p \${GALAXY_SLOTS:-4}
+    -d output
+    ]]>    </command>
+    <inputs>
+        <conditional name="input_type_cond">
+            <param name="input_type" type="select" label="Input type" help="Select 'paired end' reads or 'sequence' for genomes/contigs">
+                <option value="paired">Paired End</option>
+                <option value="collection">Collection</option>
+                <option value="assembly">Contigs</option>
+                <option value="single">Interleaved</option>
+                <option value="nanopore">Nanopore reads</option>
+            </param>
+            <when value="paired">
+                <param name="read1" type="data" format="fastqsanger,fastqsanger.gz" label="Forward reads (R1)" help="The file of forward reads in FASTQ format"/>
+                <param name="read2" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse reads (R2)" help="The file of reverse reads in FASTQ format"/>
+                <expand macro="select_workflow_type" />
+            </when>
+            <when value="collection">
+                <param name="input_collection" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Paired collection"/>
+                <expand macro="select_workflow_type" />
+            </when>
+            <when value="single">
+                <param name="read1" type="data" format="fastqsanger,fastqsanger.gz" multiple="false" label="Interleaved" />
+                <expand macro="select_workflow_type" />
+            </when>
+            <when value="nanopore">
+                <param name="read1" type="data" format="fasta" multiple="false" label="Nanopore reads" />
+            </when>
+            <when value="assembly">
+                <param name="read1" type="data" format="fasta" multiple="false" label="Contigs/genomes" />
+            </when>
+        </conditional>
+        <section name="output_options" title="Output options">
+            <param argument="logfile" type="boolean" checked="false" truevalue="true" falsevalue="" label="Include log as output file."/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="results" format="tabular" label="${tool.name} on ${on_string} Results" from_work_dir="output/SeqSero_result.tsv"/>
+        <data name="log" format="txt" label="${tool.name} on ${on_string} Log" from_work_dir="output/SeqSero_log.txt">
+            <filter>output_options['logfile']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="input_type" value="assembly" />
+            <param name="read1" value="CP009102.1.fasta" ftype="fasta" />
+            <param name="logfile" value="true" />
+            <output name="results">
+                <assert_contents>
+                    <has_text text="Salmonella enterica subspecies enterica (subspecies I)" />
+                    <has_text text="Typhimurium" />
+                </assert_contents>
+            </output>
+            <output name="log">
+                <assert_contents>
+                    <has_n_lines n="104" />
+                    <has_text text="O_scores" />
+                    <has_text text="H_scores" />
+                    <has_text text="Special_scores" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_type" value="paired" />
+            <param name="workflow" value="a" />
+            <param name="read1" value="SRR10859038_R1.fastq.gz" ftype="fastqsanger.gz" />
+            <param name="read2" value="SRR10859038_R2.fastq.gz" ftype="fastqsanger.gz" />
+            <output name="results">
+                <assert_contents>
+                    <has_text text="The input genome cannot be identified as Salmonella." />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_type" value="collection" />
+            <param name="workflow" value="k" />
+            <param name="input_collection">
+                <collection type="paired">
+                    <element name="forward" value="SRR10859038_R1.fastq.gz" ftype="fastqsanger.gz" />
+                    <element name="reverse" value="SRR10859038_R2.fastq.gz" ftype="fastqsanger.gz" />
+                </collection>
+            </param>
+            <output name="results">
+                <assert_contents>
+                    <has_text text="The input genome cannot be identified as Salmonella." />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_type" value="single" />
+            <param name="workflow" value="a" />
+            <param name="read1" value="SRR10859038_R1.fastq.gz" ftype="fastqsanger.gz" />
+            <output name="results">
+                <assert_contents>
+                    <has_text text="The input genome cannot be identified as Salmonella" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_type" value="nanopore" />
+            <param name="read1" value="CP009102.1.fasta" ftype="fasta" />
+            <output name="results">
+                <assert_contents>
+                    <has_text text="Salmonella enterica subspecies enterica (subspecies I)" />
+                    <has_text text="Typhimurium" />
+                </assert_contents>
+            </output>
+        </test>
+     </tests>
+    <help><![CDATA[
+    **What is SeqSero2**
+
+SeqSero2 is a pipeline for Salmonella serotype prediction from raw sequencing reads or genome assemblies.
+SeqSero is based on curated databases of Salmonella serotype determinants (rfb gene cluster, fliC and fljB alleles) and is predicted to determine serotype rapidly and accurately for nearly the full spectrum of Salmonella serotypes (more than 2,300 serotypes), from both raw sequencing reads and genome assemblies.
+
+**Workflows**
+
+* Allele micro-assembly (workflow: allele). This workflow takes raw reads as input and performs targeted assembly of serotype determinant alleles. Assembled alleles are used to predict serotype and flag potential inter-serotype contamination in sequencing data (i.e., presence of reads from multiple serotypes due to, for example, cross or carryover contamination during sequencing).
+* Raw reads k-mer (workflow: k-mer). This workflow takes raw reads as input and performs rapid serotype prediction based on unique k-mers of serotype determinants.
+* Genome assembly k-mer (input type: contigs). This workflow takes genome assemblies as input and the rest of the workflow largely overlaps with the raw reads k-mer workflow.
+
+**Extra parameter information.**
+
+The '-t' parameter is automatically determined using the input files, and therefore not customizable by the user. The logic used is based on the tool's parameter description:
+
+* -t {1,2,3,4,5}  <int>: '1' for interleaved paired-end reads, '2' for separated paired-end reads, '3' for single reads, '4' for genome assembly, '5' for nanopore reads (fasta/fastq)
+
+-----
+
+_`Document`:  https://github.com/denglab/SeqSero2
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
author	iuc
date	Tue, 07 Nov 2023 19:18:04 +0000
parents
children	5a9a6074d99b