comparison processing_short_reads_macros.xml @ 0:cd7675c5b15a draft

planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/ commit 78ca62b54aee22893d278d9c3d495527be405f8a
author frogs
date Wed, 04 Feb 2026 13:15:55 +0000
parents
children 76ff9af5c0a3
comparison
equal deleted inserted replaced
-1:000000000000 0:cd7675c5b15a
1 <?xml version="1.0"?>
2 <macros>
3 <token name="@PROCESSING_SHORT_READS_CMD_LINE@">
4 #set $sep = ' '
5
6 reads_processing.py illumina
7 @CPUS@
8
9 ## PAIRED END READS
10 #if $fastq_input.input_type == "paired"
11 ## INPUTS
12 #if $fastq_input.input_files.file_type == "archive"
13 --input-archive '${fastq_input.input_files.input_archive}'
14 #else
15 --samples-names
16 #for $current in $fastq_input.input_files.samples
17 $sep'${current.samples_names.strip()}'
18 #end for
19 --input-R1
20 #for $current in $fastq_input.input_files.samples
21 $sep'${current.input_R1}'
22 #end for
23 --input-R2
24 #for $current in $fastq_input.input_files.samples
25 $sep'${current.input_R2}'
26 #end for
27 #end if
28
29 ## MERGING PARAMETERS
30 --R1-size $fastq_input.R1_size
31 --R2-size $fastq_input.R2_size
32 --mismatch-rate $fastq_input.mismatch_rate
33 --merge-software $fastq_input.merge_software_type.merge_software
34 #if $fastq_input.merge_software_type.merge_software == "flash"
35 --expected-amplicon-size $fastq_input.merge_software_type.expected_amplicon_size
36 #end if
37 #if $fastq_input.keep_unmerged
38 --keep-unmerged
39 #end if
40
41 ## SINGLE END READS
42 #elif $fastq_input.input_type == "single"
43 --already-contiged
44 ## INPUTS
45 #if $fastq_input.input_files.file_type == "archive"
46 --input-archive '${fastq_input.input_files.input_archive}'
47 #else
48 --samples-names
49 #for $current in $fastq_input.input_files.samples
50 $sep'${current.samples_names.strip()}'
51 #end for
52 --input-R1
53 #for $current in $fastq_input.input_files.samples
54 $sep'${current.input_R1}'
55 #end for
56 #end if
57 #end if
58
59 ## AMPLICON PARAMETERS
60 --min-amplicon-size $min_amplicon_size
61 --max-amplicon-size $max_amplicon_size
62
63 ## PRIMERS
64 #if $is_primer_in_seq.primer_choice == "true"
65 --five-prim-primer '$is_primer_in_seq.five_prim_primer'
66 --three-prim-primer '$is_primer_in_seq.three_prim_primer'
67 #else
68 --without-primers
69 #end if
70
71 ## PROCESS TYPE
72 #if $process_type.process == "Preprocess only"
73 --process preprocess-only
74 #elif $process_type.process == "Clustering Swarm"
75 --process swarm
76 --distance $process_type.distance
77 #if $process_type.cluster_improvements.clustering_options != "no-refinement"
78 $process_type.cluster_improvements.clustering_options
79 #end if
80 #elif $process_type.process == "Denoising DADA2"
81 --process dada2
82 --sample-inference $process_type.sample_inference
83 #end if
84
85 ## OUTPUTS
86 --output-biom $output_biom
87 --output-fasta $output_fasta
88 --html $html
89 #if $process_type.process == "Clustering Swarm"
90 --output-compo $output_cluster_compo
91 #end if
92 </token>
93
94 <xml name="processing_short_reads_inputs">
95 <!-- Files -->
96 <conditional name="fastq_input">
97 <param name="input_type" type="select" display="radio" label="Paired-end or Single-end reads" help="">
98 <option value="paired" selected="true">Paired-end reads</option>
99 <option value="single">Single-end reads or Paired reads that have already been merged.</option>
100 </param>
101 <when value="paired">
102 <conditional name="input_files">
103 <param name="file_type" type="select" label="Input" help="Sample files can be provided either as a single TAR archive or as separate files per sample (one or two files each).">
104 <option value="archive" selected="true">TAR Archive</option>
105 <option value="files_per_sample" >Files per sample</option>
106 </param>
107 <when value="archive">
108 <param argument="--input-archive" type="data" format="tar,tgz" label="Archive file (.tar.gz)" help="The TAR file containing the short R1 R2 read pairs (.fastq.gz) for each sample. Each sample should ideally have its R1 and R2 reads named consistently (e.g., sampleX_R1.fastq.gz and sampleX_R2.fastq.gz). Supported sequencers: Illumina, Aviti and IonTorrent." />
109 </when>
110 <when value="files_per_sample">
111 <repeat name="samples" title="Samples" default="1" min="1">
112 <param argument="--samples-names" type="text" label="Sample name" help="The sample name.">
113 <expand macro="restricted_sanitizer_validator"/>
114 </param>
115 <param argument="--input-R1" type="data" format="fastq" label="R1 reads (.fastq.gz)" help="R1 reads fastq file. Supported sequencers: Illumina and Aviti." />
116 <param argument="--input-R2" type="data" format="fastq" label="R2 reads (.fastq.gz)" help="R2 reads fastq file. Supported sequencers: Illumina and Aviti." />
117 </repeat>
118 </when>
119 </conditional>
120 <!-- Paired parameters -->
121 <param argument="--R1-size" type="integer" label="R1 read length" help="Please provide the maximum length of the R1 reads." />
122 <param argument="--R2-size" type="integer" label="R2 read length" help="Please provide the maximum length of the R2 reads." />
123 <param argument="--mismatch-rate" type="float" value="0.1" label="Mismatch rate (used for R1-R2 merging)" help="Maximum allowed mismatch rate in the overlap region between R1 and R2 reads."/>
124 <conditional name="merge_software_type">
125 <param argument="--merge-software" type="select" display="radio" label="Paired-end merging tool" help="Select the tool used to merge paired-end reads">
126 <!-- <option value="pear" >Pear (only for non commercial, non military use. See Pear licence)</option> -->
127 <option value="vsearch" >Vsearch</option>
128 <option value="flash">Flash</option>
129 </param>
130 <when value="vsearch"/>
131 <when value="flash">
132 <param argument="--expected-amplicon-size" type="integer" min="0" label="Expected amplicon length" help="Specify the maximum expected amplicon length (covers ~90% of amplicons)" />
133 </when>
134 </conditional>
135 <param argument="keep_unmerged" type="boolean" truevalue="--keep-unmerged" falsevalue="" checked="false" label="Would you like to keep unmerged reads?" help="No = unmerged reads will be removed; Yes = unmerged reads will be artificially combined with 100 N to allow further processing." />
136 </when>
137 <when value="single">
138 <conditional name="input_files">
139 <param name="file_type" type="select" label="Input" help="Sample files can be provided either as a single TAR archive or as separate files per sample (one or two files each).">
140 <option value="archive" selected="true">TAR Archive</option>
141 <option value="files_per_sample" >Files per sample</option>
142 </param>
143 <when value="archive">
144 <param argument="--input-archive" type="data" format="tar,tgz" label="Archive file (tar format)" help="The TAR file containing the short single-end reads or merged paired-end reads (.fastq.gz) for each sample. Supported sequencers: Illumina and Aviti." />
145 </when>
146 <when value="files_per_sample">
147 <repeat name="samples" title="Samples" default="1" min="1">
148 <param argument="--samples-names" type="text" label="Name" help="The sample name.">
149 <expand macro="restricted_sanitizer_validator"/>
150 </param>
151 <param argument="--input-R1" type="data" format="fastq" label="The short single-end reads or merged paired-end reads (.fastq.gz)" help="Single-end short reads or merge paired-end reads (.fastq.gz). Supported sequencers: Illumina and Aviti." />
152 </repeat>
153 </when>
154 </conditional>
155 </when>
156 </conditional>
157
158 <!-- Amplicons Parameters-->
159 <param argument="--min-amplicon-size" type="integer" value="" label="Minimum amplicon length" help="The minimum length of the amplicons (including primers). For paired-end reads, substract 10 bases to account for the minimum overlap between R1 and R2 reads."/>
160 <param argument="--max-amplicon-size" type="integer" value="" label="Maximum amplicon length" help="The maximum length of the amplicons (including primers). For paired-end reads, substract 10 bases to account for the minimum overlap between R1 and R2 reads."/>
161
162 <!-- Primers -->
163 <conditional name="is_primer_in_seq">
164 <param name="primer_choice" type="select" display="radio" label="Do the sequences include PCR primers?" help="Indicate whether the sequences still include PCR primers. Select “Yes” if primers are present, “No” if they have already been removed." >
165 <option value="true" selected="true">Yes</option>
166 <option value="false">No</option>
167 </param>
168 <when value="true">
169 <param argument="--five-prim-primer" type="text" label="5' primer" help="Enter the 5' primer sequence. Wildcards are allowed. The sequence must be provided in 5' → 3' orientation.">
170 <expand macro="only_letter_sanitizer_validator"/>
171 </param>
172 <param argument="--three-prim-primer" type="text" label="3' primer" help="Enter the 3' primer sequence. Wildcards are allowed. The sequence must be provided in 5' → 3' orientation.">
173 <expand macro="only_letter_sanitizer_validator"/>
174 </param>
175 </when>
176 <when value="false"/>
177 </conditional>
178
179 <!-- Preprocessing only, clustering or denoising -->
180 <conditional name="process_type">
181 <param argument="--process" type="select" display="radio" label="Process type" help="Select the type of process to run">
182 <option value="Preprocess only">Preprocessing only</option>
183 <option value="Clustering Swarm" selected="true">Preprocessing and clustering with Swarm</option>
184 <option value="Denoising DADA2">Preprocessing and denoising with DADA2</option>
185 </param>
186 <when value="Preprocess only"/>
187 <when value="Clustering Swarm">
188 <param argument="--distance" type="integer" min="1" value="1" optional="false" label="Swarm distance threshold" help="Distance threshold used by Swarm for clustering."/>
189 <conditional name="cluster_improvements">
190 <param name="clustering_options" type="select" display="radio" label="Clustering refinement" help="(i) With --distance = 1, use the Swarm --fastidious option to refine clustering (recommended since FROGS 3.2). (ii) With --distance > 1, enable pre-clustering to reduce redundancy before final clustering step. (iii) Select this option to apply neither refinement nor pre-clustering.">
191 <option value="--fastidious" selected="true">With --distance = 1, refine clusters with Swarm --fastidious option (recommended since FROGS 3.2)</option>
192 <option value="--pre-clustering">With --distance > 1, perform a pre-clustering step with FROGS --pre-clustering option</option>
193 <option value="no-refinement">No clustering refinement</option>
194 </param>
195 <when value="--fastidious"/>
196 <when value="--pre-clustering"/>
197 <when value="no-refinement"/>
198 </conditional>
199 </when>
200 <when value="Denoising DADA2">
201 <param argument="--sample-inference" type="select" display="radio" label="DADA2 pooling method" help="Choose how to consider sample prior to sample inference">
202 <option value="pseudo-pooling" selected="true">Pseudo pooling, samples will be pseudo-pooled prior to sample inference.</option>
203 <option value="independent">Independent, sample inference will be performed on each sample individually.</option>
204 <option value="pooling">Full pooling, all samples will be pooled together prior to sample inference.</option>
205 </param>
206 </when>
207 </conditional>
208 </xml>
209
210 <!-- Test swarm -->
211 <xml name="swarm_processing_short_reads_test_input">
212 <!-- Files -->
213 <conditional name="fastq_input">
214 <param name="input_type" value="paired" />
215 <conditional name="input_files">
216 <param name="file_type" value="archive" />
217 <param name="input_archive" ftype="tgz" value="input/test_dataset.tar.gz" />
218 </conditional>
219 <!-- Paired parameters -->
220 <param name="R1_size" value="266" />
221 <param name="R2_size" value="267"/>
222 <param name="mismatch_rate" value="0.15"/>
223 </conditional>
224
225 <!-- Amplicons Parameters-->
226 <param name="min_amplicon_size" value="44"/>
227 <param name="max_amplicon_size" value="490"/>
228
229 <!-- Primers -->
230 <conditional name="is_primer_in_seq">
231 <param name="primer_choice" value="true" />
232 <param name="five_prim_primer" value="GGCGVACGGGTGAGTAA" />
233 <param name="three_prim_primer" value="GTGCCAGCNGCNGCGG"/>
234 </conditional>
235
236 <!-- Preprocessing only, clustering or denoising -->
237 <conditional name="process_type">
238 <param name="process" value="Clustering Swarm" />
239 </conditional>
240 </xml>
241
242 <xml name="swarm_processing_short_reads_test_output">
243 <output name="output_biom" file="references/01-reads_processing-swarm-vsearch.biom" compare="sim_size" delta="0" />
244 <output name="output_fasta" file="references/01-reads_processing-swarm-vsearch.fasta" compare="diff" lines_diff="0" />
245 <output name="output_cluster_compo" file="references/01-reads_processing-swarm-vsearch_compo.tsv" compare="diff" lines_diff="0" />
246 <output name="html" file="references/01-reads_processing-swarm-vsearch.html" compare="diff" lines_diff="0" />
247 </xml>
248
249 <!-- Test dada2 -->
250 <xml name="dada2_processing_short_reads_test_input">
251 <!-- Files -->
252 <conditional name="fastq_input">
253 <param name="input_type" value="paired" />
254 <conditional name="input_files">
255 <param name="file_type" value="archive" />
256 <param name="input_archive" ftype="tgz" value="input/verysmallITS.tar.gz" />
257 </conditional>
258 <!-- Paired parameters -->
259 <param name="R1_size" value="300" />
260 <param name="R2_size" value="300"/>
261 <param name="keep_unmerged" value="true" />
262 </conditional>
263
264 <!-- Amplicons Parameters-->
265 <param name="min_amplicon_size" value="50"/>
266 <param name="max_amplicon_size" value="1000"/>
267
268 <!-- Primers -->
269 <conditional name="is_primer_in_seq">
270 <param name="primer_choice" value="true" />
271 <param name="five_prim_primer" value="TAGACTCGTCAHCGATGAAGAACGYRG" />
272 <param name="three_prim_primer" value="GCATATCAATAAGCGSAGGAA"/>
273 </conditional>
274
275 <!-- Preprocessing only, clustering or denoising -->
276 <conditional name="process_type">
277 <param name="process" value="Denoising DADA2" />
278 </conditional>
279 </xml>
280
281 <xml name="dada2_processing_short_reads_test_output">
282 <output name="output_biom" file="references/01-reads_processing-dada2-clusters.biom" compare="sim_size" delta="0" />
283 <output name="output_fasta" file="references/01-reads_processing-dada2-clusters.fasta" compare="diff" lines_diff="0" />
284 <output name="html" file="references/01-reads_processing-dada2.html" compare="diff" lines_diff="0" />
285 </xml>
286 </macros>