comparison mgnify_seqprep.xml @ 0:76ea9d4604bc draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mgnify_seqprep commit fd696b8f2ce44287b6ad19fe52277cfdbd7e94fb
author bgruening
date Tue, 14 May 2024 09:49:32 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:76ea9d4604bc
1 <tool id="mgnify_seqprep" name="Merging paired-end Illumina reads (SeqPrep, modified for use with MGnify piplines)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
2 <description>Merge and Trim Adapter Sequences from Paired-End Illumina Reads</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="biotools"/>
7 <expand macro="requirements"/>
8 <expand macro="creators"/>
9 <command detect_errors="exit_code"><![CDATA[
10 SeqPrep
11 -f '${input1}'
12 -r '${input2}'
13 -1 '${output1}'
14 -2 '${output2}'
15
16 #if $merge_reads
17 -s '${merged}'
18 #end if
19
20 ## General Arguments ##
21 #if $general_options.first_read_discarded
22 -3 '${general_options.first_read_discarded}'
23 #end if
24 #if $general_options.second_read_discarded
25 -4 '${general_options.second_read_discarded}'
26 #end if
27 #if $general_options.phred64
28 -6 '${general_options.phred64}'
29 #end if
30 #if $general_options.quality_cutoff
31 -q '${general_options.quality_cutoff}'
32 #end if
33 #if $general_options.min_length
34 -L '${general_options.min_length}'
35 #end if
36
37 ## Additional Adapter/Primer Trimming Arguments ##
38 #if $trimming_options.adapter_a
39 -A '${trimming_options.adapter_a}'
40 #end if
41 #if $trimming_options.adapter_b
42 -B '${trimming_options.adapter_b}'
43 #end if
44 #if $trimming_options.adapter_overlap
45 -O '${trimming_options.adapter_overlap}'
46 #end if
47 #if $trimming_options.max_mismatch_fraction
48 -M '${trimming_options.max_mismatch_fraction}'
49 #end if
50 #if $trimming_options.min_match_fraction
51 -N '${trimming_options.min_match_fraction}'
52 #end if
53 #if $trimming_options.adapter_bandwidth
54 -b '${trimming_options.adapter_bandwidth}'
55 #end if
56 #if $trimming_options.gap_open
57 -Q '${trimming_options.gap_open}'
58 #end if
59 #if $trimming_options.gap_extend
60 -t '${trimming_options.gap_extend}'
61 #end if
62 #if $trimming_options.gap_end
63 -e '${trimming_options.gap_end}'
64 #end if
65 #if $trimming_options.local_alignment_score
66 -Z '${trimming_options.local_alignment_score}'
67 #end if
68 #if $trimming_options.read_alignment_bandwidth
69 -w '${trimming_options.read_alignment_bandwidth}'
70 #end if
71 #if $trimming_options.read_alignment_gap_open
72 -W '${trimming_options.read_alignment_gap_open}'
73 #end if
74 #if $trimming_options.read_alignment_gap_extend
75 -p '${trimming_options.read_alignment_gap_extend}'
76 #end if
77 #if $trimming_options.read_alignment_gap_end
78 -P '${trimming_options.read_alignment_gap_end}'
79 #end if
80 #if $trimming_options.read_alignment_max_gap_fraction
81 -X '${trimming_options.read_alignment_max_gap_fraction}'
82 #end if
83
84 ## Additional Arguments for Merging ##
85 #if $merging_options.maximum_quality_score
86 -y '${merging_options.maximum_quality_score}'
87 #end if
88 #if $merging_options.print_overhang
89 -g '${merging_options.print_overhang}'
90 #end if
91 #if $merging_options.min_base_pair_overlap
92 -o '${merging_options.min_base_pair_overlap}'
93 #end if
94 #if $merging_options.max_mismatch_fraction
95 -m '${merging_options.max_mismatch_fraction}'
96 #end if
97 #if $merging_options.min_match_fraction
98 -n '${merging_options.min_match_fraction}'
99 #end if
100 ]]></command>
101 <inputs>
102 <param name="input1" type="data" format="fastq" label="First Read Input" help="Select the FASTQ file containing the first set of paired-end reads." />
103 <param name="input2" type="data" format="fastq" label="Second Read Input" help="Select the FASTQ file containing the second set of paired-end reads." />
104 <param name="merge_reads" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Merge Reads" help="Enable this to merge overlapping reads from the provided paired-end FASTQ files." />
105
106 <!-- Section for General Arguments -->
107 <section name="general_options" title="General Arguments (Optional)" expanded="false">
108 <param name="first_read_discarded" argument="-3" type="text" optional="true" label="First Read Discarded FASTQ Filename" help="first read discarded fastq filename" />
109 <param name="second_read_discarded" argument="-4" type="text" optional="true" label="Second Read Discarded FASTQ Filename" help="second read discarded fastq filename" />
110 <param name="phred64" argument="-6" type="boolean" truevalue="-6" falsevalue="" checked="false" label="Input Sequence is in Phred+64 Format" help="Input sequence is in phred+64 rather than phred+33 format, the output will still be phred+33"/>
111 <param name="quality_cutoff" argument="-q" type="integer" optional="true" value="13" label="Quality Score Cutoff" help="Quality score cutoff for mismatches to be counted in overlap" />
112 <param name="min_length" argument="-L" type="integer" optional="true" value="30" label="Minimum Length of Reads" help="Minimum length of a trimmed or merged read to print it" />
113 </section>
114
115 <!-- Section for Additional Adapter/Primer Trimming Arguments -->
116 <section name="trimming_options" title="Additional Adapter/Primer Trimming Arguments" expanded="false">
117 <param name="adapter_a" argument="-A" label="Adapter Sequence A" type="text" optional="true" value="AGATCGGAAGAGCGGTTCAG" help="Forward read primer/adapter sequence to trim as it would appear at the end of a read" />
118 <param name="adapter_b" argument="-B" label="Adapter Sequence B" type="text" optional="true" value="AGATCGGAAGAGCGTCGTGT" help="Reverse read primer/adapter sequence to trim as it would appear at the end of a read" />
119 <param name="adapter_overlap" argument="-O" label="Minimum Overall Base Pair Overlap with Adapter" type="integer" value="10" optional="true" help="minimum overall base pair overlap with adapter sequence to trim" />
120 <param name="max_mismatch_fraction" argument="-M" label="Maximum Fraction of Good Quality Mismatching Bases" type="float" value="0.02" optional="true" help="maximum fraction of good quality mismatching bases for primer/adapter overlap" />
121 <param name="min_match_fraction" argument="-N" label="Minimum Fraction of Matching Bases" type="float" value="0.87" optional="true" help="minimum fraction of matching bases for primer/adapter overlap" />
122 <param name="adapter_bandwidth" argument="-b" label="Adapter Alignment Band-width" type="integer" value="50" optional="true" />
123 <param name="gap_open" argument="-Q" label="Adapter Alignment Gap-Open" type="integer" value="8" optional="true" />
124 <param name="gap_extend" argument="-t" label="Adapter Alignment Gap-Extension" type="integer" value="2" optional="true" />
125 <param name="gap_end" argument="-e" label="Adapter Alignment Gap-End" type="integer" value="2" optional="true" />
126 <param name="local_alignment_score" argument="-Z" label="Minimum Local Alignment Score Cutoff" type="integer" value="26" optional="true" help="Adapter alignment minimum local alignment score cutoff [roughly (2*num_hits) - (num_gaps*gap_open) - (num_gaps*gap_close) - (gap_len*gap_extend) - (2*num_mismatches)]" />
127 <param name="read_alignment_bandwidth" argument="-w" label="Read Alignment Band-width" type="integer" value="50" optional="true" />
128 <param name="read_alignment_gap_open" argument="-W" label="Read Alignment Gap-Open" type="integer" value="26" optional="true" />
129 <param name="read_alignment_gap_extend" argument="-p" label="Read Alignment Gap-Extension" type="integer" value="9" optional="true" />
130 <param name="read_alignment_gap_end" argument="-P" label="Read Alignment Gap-End" type="integer" value="5" optional="true" help="read alignment maximum fraction gap cutoff" />
131 <param name="read_alignment_max_gap_fraction" argument="-X" label="Read Alignment Maximum Fraction Gap Cutoff" type="float" value="0.125" optional="true" />
132 </section>
133
134 <!-- Section for Optional Arguments for Merging: -->
135 <section name="merging_options" title="Optional Arguments for Merging" expanded="false">
136 <param name="maximum_quality_score" argument="-y" label="Maximum Quality Score in Output" type="text" optional="true" help="Maximum quality score in output (phred 33), default = ']'"/>
137 <param name="print_overhang" argument="-g" type="boolean" truevalue="-g" falsevalue="" checked="false" label="Print Overhang When Adapters Are Present and Stripped" help="Use this if reads are different lengths"/>
138 <param name="min_base_pair_overlap" argument="-o" type="integer" optional="true" value="15" label="Minimum Overall Base Pair Overlap" help="Minimum overall base pair overlap to merge two reads"/>
139 <param name="max_mismatch_fraction" argument="-m" type="float" optional="true" value="0.02" label="Maximum Fraction of Good Quality Mismatching Bases" help="Maximum fraction of good quality mismatching bases to overlap reads"/>
140 <param name="min_match_fraction" argument="-n" type="float" optional="true" value="0.9" label="Minimum Fraction of Matching Bases" help="Minimum fraction of matching bases to overlap reads"/>
141 </section>
142 </inputs>
143 <outputs>
144 <data format="fastq.gz" name="output1" label="${tool.name} on ${on_string}: First Read Output">
145 <filter>output_all</filter>
146 </data>
147 <data format="fastq.gz" name="output2" label="${tool.name} on ${on_string}: Second Read Output">
148 <filter>output_all</filter>
149 </data>
150 <data format="fastq.gz" name="merged" label="${tool.name} on ${on_string}: Merged Reads">
151 <filter>merge_reads</filter>
152 </data>
153 </outputs>
154 <tests>
155 <!-- Test default inputs #1 -->
156 <test expect_num_outputs="3">
157 <param name="input1" value="input1.fq" />
158 <param name="input2" value="input2.fq" />
159 <param name="merge_reads" value="true" />
160
161 <!-- Section for General Arguments -->
162 <section name="general_options" >
163 <param name="quality_cutoff" value="13" />
164 <param name="min_length" value="30" />
165 </section>
166
167 <!-- Section for Additional Adapter/Primer Trimming Arguments -->
168 <section name="trimming_options">
169 <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" />
170 <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" />
171 <param name="adapter_overlap" value="10" />
172 <param name="max_mismatch_fraction" value="0.02" />
173 <param name="min_match_fraction" value="0.87" />
174 <param name="adapter_bandwidth" value="50" />
175 <param name="gap_open" value="8" />
176 <param name="gap_extend" value="2" />
177 <param name="gap_end" value="2" />
178 <param name="local_alignment_score" value="26" />
179 <param name="read_alignment_bandwidth" value="50" />
180 <param name="read_alignment_gap_open" value="26" />
181 <param name="read_alignment_gap_extend" value="9" />
182 <param name="read_alignment_gap_end" value="5" />
183 <param name="read_alignment_max_gap_fraction" value="0.125" />
184 </section>
185 <output name="output1" file="output1.fq.gz" />
186 <output name="output2" file="output2.fq.gz" />
187 <output name="merged" file="merged_output.fq.gz" />
188 </test>
189
190 <!-- Without Merging, Two Outputs #2 -->
191 <test expect_num_outputs="2">
192 <param name="input1" value="input1.fq" />
193 <param name="input2" value="input2.fq" />
194 <param name="merge_reads" value="false" />
195
196 <!-- Section for General Arguments -->
197 <section name="general_options" >
198 <param name="quality_cutoff" value="13" />
199 <param name="min_length" value="30" />
200 </section>
201
202 <!-- Section for Additional Adapter/Primer Trimming Arguments -->
203 <section name="trimming_options">
204 <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" />
205 <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" />
206 <param name="adapter_overlap" value="10" />
207 <param name="max_mismatch_fraction" value="0.02" />
208 <param name="min_match_fraction" value="0.87" />
209 <param name="adapter_bandwidth" value="50" />
210 <param name="gap_open" value="8" />
211 <param name="gap_extend" value="2" />
212 <param name="gap_end" value="2" />
213 <param name="local_alignment_score" value="26" />
214 <param name="read_alignment_bandwidth" value="50" />
215 <param name="read_alignment_gap_open" value="26" />
216 <param name="read_alignment_gap_extend" value="9" />
217 <param name="read_alignment_gap_end" value="5" />
218 <param name="read_alignment_max_gap_fraction" value="0.125" />
219 </section>
220 <output name="output1" file="outputNoMerge1.fq.gz" />
221 <output name="output2" file="outputNoMerge2.fq.gz" />
222 </test>
223 <!-- Test with Empty Input Files #3 -->
224 <test expect_num_outputs="3">
225 <param name="input1" value="empty1.fq" />
226 <param name="input2" value="empty2.fq" />
227 <param name="merge_reads" value="true" />
228
229 <!-- Section for General Arguments -->
230 <section name="general_options" >
231 <param name="quality_cutoff" value="13" />
232 <param name="min_length" value="30" />
233 </section>
234
235 <!-- Section for Additional Adapter/Primer Trimming Arguments -->
236 <section name="trimming_options">
237 <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" />
238 <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" />
239 <param name="adapter_overlap" value="10" />
240 <param name="max_mismatch_fraction" value="0.02" />
241 <param name="min_match_fraction" value="0.87" />
242 <param name="adapter_bandwidth" value="50" />
243 <param name="gap_open" value="8" />
244 <param name="gap_extend" value="2" />
245 <param name="gap_end" value="2" />
246 <param name="local_alignment_score" value="26" />
247 <param name="read_alignment_bandwidth" value="50" />
248 <param name="read_alignment_gap_open" value="26" />
249 <param name="read_alignment_gap_extend" value="9" />
250 <param name="read_alignment_gap_end" value="5" />
251 <param name="read_alignment_max_gap_fraction" value="0.125" />
252 </section>
253 <output name="output1" file="empty_output1.fq.gz" />
254 <output name="output2" file="empty_output2.fq.gz" />
255 <output name="merged" file="empty_merged_output.fq.gz" />
256 </test>
257
258 <!-- Advanced Functional Tests -->
259 <!-- General Arguments Test #4 -->
260 <test expect_num_outputs="2">
261 <param name="input1" value="input1.fq" />
262 <param name="input2" value="input2.fq" />
263 <param name="merge_reads" value="false" />
264
265 <param name="quality_cutoff" value="15" />
266 <param name="min_length" value="25" />
267 <output name="output1" file="output1_general_args.fq.gz" />
268 <output name="output2" file="output2_general_args.fq.gz" />
269 </test>
270
271 <!-- Adapter/Primer Trimming Arguements Test #5 -->
272 <test expect_num_outputs="2">
273 <param name="input1" value="input1.fq" />
274 <param name="input2" value="input2.fq" />
275 <param name="merge_reads" value="false" />
276 <section name="trimming_options">
277 <param name="adapter_a" value="ACTGACTG" />
278 <param name="adapter_b" value="GTGACTGA" />
279 <param name="adapter_overlap" value="12" />
280 <param name="max_mismatch_fraction" value="0.03" />
281 <param name="min_match_fraction" value="0.85" />
282 <param name="adapter_bandwidth" value="55" />
283 <param name="gap_open" value="10" />
284 <param name="gap_extend" value="3" />
285 <param name="gap_end" value="3" />
286 <param name="local_alignment_score" value="28" />
287 </section>
288 <output name="output1" file="output1_adapter_trim.fq.gz" />
289 <output name="output2" file="output2_adapter_trim.fq.gz" />
290 </test>
291 <!-- Test with gzipped input files #6 -->
292 <test expect_num_outputs="3">
293 <param name="input1" value="input1.fastq.gz" />
294 <param name="input2" value="input2.fastq.gz" />
295 <param name="merge_reads" value="true" />
296 <output name="output1" file="output1_from_gzipped.fq.gz" />
297 <output name="output2" file="output2_from_gzipped.fq.gz" />
298 <output name="merged" file="merged_output_from_gzipped.fq.gz" />
299 </test>
300 </tests>
301 <help><![CDATA[
302 .. class:: warningmark
303
304 **Caution**
305 -----------
306 ::
307
308 This is a modified version of the 1.2 release. Made for use with the MGnify pipeline.
309
310 Difference in `utils.h`:
311
312 ::
313
314 -#define MAX_SEQ_LEN (256)
315 +#define MAX_SEQ_LEN (1024)
316
317 **SeqPrep**
318 -----------
319 ::
320
321 SeqPrep is a versatile tool designed for merging overlapping paired-end Illumina reads into a single, longer read.
322 Additionally, it offers the functionality to trim adapter sequences from reads, making it a needful tool for preprocessing Illumina sequencing data.
323
324 **Usage**
325 =========
326 ::
327
328 To utilize SeqPrep, start by selecting your input FASTQ files: one for the first set of reads and another for the second set.
329 SeqPrep provides several options to customize your data processing:
330
331 - Adapter Sequences: You can provide specific sequences for adapter trimming if they are known. SeqPrep will remove these sequences from the reads.
332 - Quality Score Cutoff: Set a threshold for the quality score. Reads with quality scores below this threshold can be discarded or trimmed.
333 - Minimum Read Length: Define the minimum length for reads to be retained after trimming. Reads shorter than this length will be discarded.
334
335 If the merging feature is enabled, SeqPrep will combine overlapping reads into longer sequences, thereby enhancing the data quality for downstream analysis.
336
337 **Outputs**
338 ===========
339 ::
340
341 SeqPrep generates outputs in gzipped FASTQ format.
342
343 See more details on `SeqPrep GitHub repository <https://github.com/jstjohn/SeqPrep>`_.
344
345 ]]></help>
346 <expand macro="citations"/>
347 </tool>