comparison seqsero2.xml @ 0:e13b5be2de4e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seqsero2 commit 2268c10b0cf71b1a1967ce84720137f01db24c3b
author iuc
date Tue, 07 Nov 2023 19:18:04 +0000
parents
children 5a9a6074d99b
comparison
equal deleted inserted replaced
-1:000000000000 0:e13b5be2de4e
1 <tool id="seqsero2" name="SeqSero2" version="@TOOL_VERSION@+galaxy0" profile="21.05">
2 <description>Salmonella serotype prediction</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <version_command>SeqSero2_package.py -v</version_command>
8 <command detect_errors="exit_code"><![CDATA[
9 #import re
10 #set $seqsero_t_value = ''
11 #set $seqsero_workflow_type = ''
12
13 #if str($input_type_cond.input_type) in ['single', 'paired', 'assembly', 'nanopore']:
14 #set read1 = $input_type_cond.read1
15
16 #if str($input_type_cond.input_type) in ['assembly', 'nanopore']:
17 #set ext = '.fasta'
18 #else:
19 #set ext = '.fastq'
20 #end if
21 #if $read1.ext.endswith('.gz'):
22 #set ext = $ext+'.gz'
23 #end if
24
25 #if str($input_type_cond.input_type) == 'paired':
26 #set read1_file = re.sub('[^\w\-\.]', '_', str($read1.element_identifier)) + '_forward' + $ext
27 #set read2 = $input_type_cond.read2
28 #set read2_file = re.sub('[^\w\-\.]', '_', str($read2.element_identifier)) + '_reverse' + $ext
29 ln -s '$read2' '$read2_file' &&
30 #else:
31 #set read1_file = re.sub('[^\w\-\.]', '_', str($read1.element_identifier)) + $ext
32 #end if
33
34 ln -s '$read1' '$read1_file' &&
35 #else:
36 #set read1 = $input_type_cond.input_collection.forward
37
38 #set ext = '.fastq'
39 #if $read1.ext.endswith('.gz'):
40 #set ext = $ext+'.gz'
41 #end if
42
43 #set read1_file = re.sub('[^\w\-\.]', '_', str($read1.element_identifier)) + $ext
44 ln -s '$read1' '$read1_file' &&
45
46 #set read2 = $input_type_cond.input_collection.reverse
47 #set read2_file = re.sub('[^\w\-\.]', '_', str($read2.element_identifier)) + $ext
48 ln -s '$read2' '$read2_file' &&
49
50 #end if
51
52 #if str($input_type_cond.input_type) in ['paired', 'collection']:
53 #set $seqsero_t_value = '2'
54 #elif str($input_type_cond.input_type) == 'assembly':
55 #set $seqsero_t_value = '4'
56 #elif str($input_type_cond.input_type) == 'single':
57 #set $seqsero_t_value = '3'
58 #elif str($input_type_cond.input_type) == 'nanopore':
59 #set $seqsero_t_value = '5'
60 #end if
61
62 #if str($input_type_cond.input_type) in ['assembly', 'nanopore']:
63 #set $seqsero_workflow_type = 'k'
64 #else:
65 #set $seqsero_workflow_type = $input_type_cond.workflow
66 #end if
67
68 SeqSero2_package.py
69 -m '$seqsero_workflow_type'
70 -t '$seqsero_t_value'
71 -i '$read1_file'
72 #if str($input_type_cond.input_type) in ['paired', 'collection']:
73 '$read2_file'
74 #end if
75 -p \${GALAXY_SLOTS:-4}
76 -d output
77 ]]> </command>
78 <inputs>
79 <conditional name="input_type_cond">
80 <param name="input_type" type="select" label="Input type" help="Select 'paired end' reads or 'sequence' for genomes/contigs">
81 <option value="paired">Paired End</option>
82 <option value="collection">Collection</option>
83 <option value="assembly">Contigs</option>
84 <option value="single">Interleaved</option>
85 <option value="nanopore">Nanopore reads</option>
86 </param>
87 <when value="paired">
88 <param name="read1" type="data" format="fastqsanger,fastqsanger.gz" label="Forward reads (R1)" help="The file of forward reads in FASTQ format"/>
89 <param name="read2" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse reads (R2)" help="The file of reverse reads in FASTQ format"/>
90 <expand macro="select_workflow_type" />
91 </when>
92 <when value="collection">
93 <param name="input_collection" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Paired collection"/>
94 <expand macro="select_workflow_type" />
95 </when>
96 <when value="single">
97 <param name="read1" type="data" format="fastqsanger,fastqsanger.gz" multiple="false" label="Interleaved" />
98 <expand macro="select_workflow_type" />
99 </when>
100 <when value="nanopore">
101 <param name="read1" type="data" format="fasta" multiple="false" label="Nanopore reads" />
102 </when>
103 <when value="assembly">
104 <param name="read1" type="data" format="fasta" multiple="false" label="Contigs/genomes" />
105 </when>
106 </conditional>
107 <section name="output_options" title="Output options">
108 <param argument="logfile" type="boolean" checked="false" truevalue="true" falsevalue="" label="Include log as output file."/>
109 </section>
110 </inputs>
111 <outputs>
112 <data name="results" format="tabular" label="${tool.name} on ${on_string} Results" from_work_dir="output/SeqSero_result.tsv"/>
113 <data name="log" format="txt" label="${tool.name} on ${on_string} Log" from_work_dir="output/SeqSero_log.txt">
114 <filter>output_options['logfile']</filter>
115 </data>
116 </outputs>
117 <tests>
118 <test expect_num_outputs="2">
119 <param name="input_type" value="assembly" />
120 <param name="read1" value="CP009102.1.fasta" ftype="fasta" />
121 <param name="logfile" value="true" />
122 <output name="results">
123 <assert_contents>
124 <has_text text="Salmonella enterica subspecies enterica (subspecies I)" />
125 <has_text text="Typhimurium" />
126 </assert_contents>
127 </output>
128 <output name="log">
129 <assert_contents>
130 <has_n_lines n="104" />
131 <has_text text="O_scores" />
132 <has_text text="H_scores" />
133 <has_text text="Special_scores" />
134 </assert_contents>
135 </output>
136 </test>
137 <test expect_num_outputs="1">
138 <param name="input_type" value="paired" />
139 <param name="workflow" value="a" />
140 <param name="read1" value="SRR10859038_R1.fastq.gz" ftype="fastqsanger.gz" />
141 <param name="read2" value="SRR10859038_R2.fastq.gz" ftype="fastqsanger.gz" />
142 <output name="results">
143 <assert_contents>
144 <has_text text="The input genome cannot be identified as Salmonella." />
145 </assert_contents>
146 </output>
147 </test>
148 <test expect_num_outputs="1">
149 <param name="input_type" value="collection" />
150 <param name="workflow" value="k" />
151 <param name="input_collection">
152 <collection type="paired">
153 <element name="forward" value="SRR10859038_R1.fastq.gz" ftype="fastqsanger.gz" />
154 <element name="reverse" value="SRR10859038_R2.fastq.gz" ftype="fastqsanger.gz" />
155 </collection>
156 </param>
157 <output name="results">
158 <assert_contents>
159 <has_text text="The input genome cannot be identified as Salmonella." />
160 </assert_contents>
161 </output>
162 </test>
163 <test expect_num_outputs="1">
164 <param name="input_type" value="single" />
165 <param name="workflow" value="a" />
166 <param name="read1" value="SRR10859038_R1.fastq.gz" ftype="fastqsanger.gz" />
167 <output name="results">
168 <assert_contents>
169 <has_text text="The input genome cannot be identified as Salmonella" />
170 </assert_contents>
171 </output>
172 </test>
173 <test expect_num_outputs="1">
174 <param name="input_type" value="nanopore" />
175 <param name="read1" value="CP009102.1.fasta" ftype="fasta" />
176 <output name="results">
177 <assert_contents>
178 <has_text text="Salmonella enterica subspecies enterica (subspecies I)" />
179 <has_text text="Typhimurium" />
180 </assert_contents>
181 </output>
182 </test>
183 </tests>
184 <help><![CDATA[
185 **What is SeqSero2**
186
187 SeqSero2 is a pipeline for Salmonella serotype prediction from raw sequencing reads or genome assemblies.
188 SeqSero is based on curated databases of Salmonella serotype determinants (rfb gene cluster, fliC and fljB alleles) and is predicted to determine serotype rapidly and accurately for nearly the full spectrum of Salmonella serotypes (more than 2,300 serotypes), from both raw sequencing reads and genome assemblies.
189
190 **Workflows**
191
192 * Allele micro-assembly (workflow: allele). This workflow takes raw reads as input and performs targeted assembly of serotype determinant alleles. Assembled alleles are used to predict serotype and flag potential inter-serotype contamination in sequencing data (i.e., presence of reads from multiple serotypes due to, for example, cross or carryover contamination during sequencing).
193 * Raw reads k-mer (workflow: k-mer). This workflow takes raw reads as input and performs rapid serotype prediction based on unique k-mers of serotype determinants.
194 * Genome assembly k-mer (input type: contigs). This workflow takes genome assemblies as input and the rest of the workflow largely overlaps with the raw reads k-mer workflow.
195
196 **Extra parameter information.**
197
198 The '-t' parameter is automatically determined using the input files, and therefore not customizable by the user. The logic used is based on the tool's parameter description:
199
200 * -t {1,2,3,4,5} <int>: '1' for interleaved paired-end reads, '2' for separated paired-end reads, '3' for single reads, '4' for genome assembly, '5' for nanopore reads (fasta/fastq)
201
202 -----
203
204 _`Document`: https://github.com/denglab/SeqSero2
205
206 ]]></help>
207 <expand macro="citations"/>
208 </tool>