Mercurial > repos > bgruening > mgnify_seqprep
comparison mgnify_seqprep.xml @ 0:76ea9d4604bc draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mgnify_seqprep commit fd696b8f2ce44287b6ad19fe52277cfdbd7e94fb
author | bgruening |
---|---|
date | Tue, 14 May 2024 09:49:32 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:76ea9d4604bc |
---|---|
1 <tool id="mgnify_seqprep" name="Merging paired-end Illumina reads (SeqPrep, modified for use with MGnify piplines)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> | |
2 <description>Merge and Trim Adapter Sequences from Paired-End Illumina Reads</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="biotools"/> | |
7 <expand macro="requirements"/> | |
8 <expand macro="creators"/> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 SeqPrep | |
11 -f '${input1}' | |
12 -r '${input2}' | |
13 -1 '${output1}' | |
14 -2 '${output2}' | |
15 | |
16 #if $merge_reads | |
17 -s '${merged}' | |
18 #end if | |
19 | |
20 ## General Arguments ## | |
21 #if $general_options.first_read_discarded | |
22 -3 '${general_options.first_read_discarded}' | |
23 #end if | |
24 #if $general_options.second_read_discarded | |
25 -4 '${general_options.second_read_discarded}' | |
26 #end if | |
27 #if $general_options.phred64 | |
28 -6 '${general_options.phred64}' | |
29 #end if | |
30 #if $general_options.quality_cutoff | |
31 -q '${general_options.quality_cutoff}' | |
32 #end if | |
33 #if $general_options.min_length | |
34 -L '${general_options.min_length}' | |
35 #end if | |
36 | |
37 ## Additional Adapter/Primer Trimming Arguments ## | |
38 #if $trimming_options.adapter_a | |
39 -A '${trimming_options.adapter_a}' | |
40 #end if | |
41 #if $trimming_options.adapter_b | |
42 -B '${trimming_options.adapter_b}' | |
43 #end if | |
44 #if $trimming_options.adapter_overlap | |
45 -O '${trimming_options.adapter_overlap}' | |
46 #end if | |
47 #if $trimming_options.max_mismatch_fraction | |
48 -M '${trimming_options.max_mismatch_fraction}' | |
49 #end if | |
50 #if $trimming_options.min_match_fraction | |
51 -N '${trimming_options.min_match_fraction}' | |
52 #end if | |
53 #if $trimming_options.adapter_bandwidth | |
54 -b '${trimming_options.adapter_bandwidth}' | |
55 #end if | |
56 #if $trimming_options.gap_open | |
57 -Q '${trimming_options.gap_open}' | |
58 #end if | |
59 #if $trimming_options.gap_extend | |
60 -t '${trimming_options.gap_extend}' | |
61 #end if | |
62 #if $trimming_options.gap_end | |
63 -e '${trimming_options.gap_end}' | |
64 #end if | |
65 #if $trimming_options.local_alignment_score | |
66 -Z '${trimming_options.local_alignment_score}' | |
67 #end if | |
68 #if $trimming_options.read_alignment_bandwidth | |
69 -w '${trimming_options.read_alignment_bandwidth}' | |
70 #end if | |
71 #if $trimming_options.read_alignment_gap_open | |
72 -W '${trimming_options.read_alignment_gap_open}' | |
73 #end if | |
74 #if $trimming_options.read_alignment_gap_extend | |
75 -p '${trimming_options.read_alignment_gap_extend}' | |
76 #end if | |
77 #if $trimming_options.read_alignment_gap_end | |
78 -P '${trimming_options.read_alignment_gap_end}' | |
79 #end if | |
80 #if $trimming_options.read_alignment_max_gap_fraction | |
81 -X '${trimming_options.read_alignment_max_gap_fraction}' | |
82 #end if | |
83 | |
84 ## Additional Arguments for Merging ## | |
85 #if $merging_options.maximum_quality_score | |
86 -y '${merging_options.maximum_quality_score}' | |
87 #end if | |
88 #if $merging_options.print_overhang | |
89 -g '${merging_options.print_overhang}' | |
90 #end if | |
91 #if $merging_options.min_base_pair_overlap | |
92 -o '${merging_options.min_base_pair_overlap}' | |
93 #end if | |
94 #if $merging_options.max_mismatch_fraction | |
95 -m '${merging_options.max_mismatch_fraction}' | |
96 #end if | |
97 #if $merging_options.min_match_fraction | |
98 -n '${merging_options.min_match_fraction}' | |
99 #end if | |
100 ]]></command> | |
101 <inputs> | |
102 <param name="input1" type="data" format="fastq" label="First Read Input" help="Select the FASTQ file containing the first set of paired-end reads." /> | |
103 <param name="input2" type="data" format="fastq" label="Second Read Input" help="Select the FASTQ file containing the second set of paired-end reads." /> | |
104 <param name="merge_reads" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Merge Reads" help="Enable this to merge overlapping reads from the provided paired-end FASTQ files." /> | |
105 | |
106 <!-- Section for General Arguments --> | |
107 <section name="general_options" title="General Arguments (Optional)" expanded="false"> | |
108 <param name="first_read_discarded" argument="-3" type="text" optional="true" label="First Read Discarded FASTQ Filename" help="first read discarded fastq filename" /> | |
109 <param name="second_read_discarded" argument="-4" type="text" optional="true" label="Second Read Discarded FASTQ Filename" help="second read discarded fastq filename" /> | |
110 <param name="phred64" argument="-6" type="boolean" truevalue="-6" falsevalue="" checked="false" label="Input Sequence is in Phred+64 Format" help="Input sequence is in phred+64 rather than phred+33 format, the output will still be phred+33"/> | |
111 <param name="quality_cutoff" argument="-q" type="integer" optional="true" value="13" label="Quality Score Cutoff" help="Quality score cutoff for mismatches to be counted in overlap" /> | |
112 <param name="min_length" argument="-L" type="integer" optional="true" value="30" label="Minimum Length of Reads" help="Minimum length of a trimmed or merged read to print it" /> | |
113 </section> | |
114 | |
115 <!-- Section for Additional Adapter/Primer Trimming Arguments --> | |
116 <section name="trimming_options" title="Additional Adapter/Primer Trimming Arguments" expanded="false"> | |
117 <param name="adapter_a" argument="-A" label="Adapter Sequence A" type="text" optional="true" value="AGATCGGAAGAGCGGTTCAG" help="Forward read primer/adapter sequence to trim as it would appear at the end of a read" /> | |
118 <param name="adapter_b" argument="-B" label="Adapter Sequence B" type="text" optional="true" value="AGATCGGAAGAGCGTCGTGT" help="Reverse read primer/adapter sequence to trim as it would appear at the end of a read" /> | |
119 <param name="adapter_overlap" argument="-O" label="Minimum Overall Base Pair Overlap with Adapter" type="integer" value="10" optional="true" help="minimum overall base pair overlap with adapter sequence to trim" /> | |
120 <param name="max_mismatch_fraction" argument="-M" label="Maximum Fraction of Good Quality Mismatching Bases" type="float" value="0.02" optional="true" help="maximum fraction of good quality mismatching bases for primer/adapter overlap" /> | |
121 <param name="min_match_fraction" argument="-N" label="Minimum Fraction of Matching Bases" type="float" value="0.87" optional="true" help="minimum fraction of matching bases for primer/adapter overlap" /> | |
122 <param name="adapter_bandwidth" argument="-b" label="Adapter Alignment Band-width" type="integer" value="50" optional="true" /> | |
123 <param name="gap_open" argument="-Q" label="Adapter Alignment Gap-Open" type="integer" value="8" optional="true" /> | |
124 <param name="gap_extend" argument="-t" label="Adapter Alignment Gap-Extension" type="integer" value="2" optional="true" /> | |
125 <param name="gap_end" argument="-e" label="Adapter Alignment Gap-End" type="integer" value="2" optional="true" /> | |
126 <param name="local_alignment_score" argument="-Z" label="Minimum Local Alignment Score Cutoff" type="integer" value="26" optional="true" help="Adapter alignment minimum local alignment score cutoff [roughly (2*num_hits) - (num_gaps*gap_open) - (num_gaps*gap_close) - (gap_len*gap_extend) - (2*num_mismatches)]" /> | |
127 <param name="read_alignment_bandwidth" argument="-w" label="Read Alignment Band-width" type="integer" value="50" optional="true" /> | |
128 <param name="read_alignment_gap_open" argument="-W" label="Read Alignment Gap-Open" type="integer" value="26" optional="true" /> | |
129 <param name="read_alignment_gap_extend" argument="-p" label="Read Alignment Gap-Extension" type="integer" value="9" optional="true" /> | |
130 <param name="read_alignment_gap_end" argument="-P" label="Read Alignment Gap-End" type="integer" value="5" optional="true" help="read alignment maximum fraction gap cutoff" /> | |
131 <param name="read_alignment_max_gap_fraction" argument="-X" label="Read Alignment Maximum Fraction Gap Cutoff" type="float" value="0.125" optional="true" /> | |
132 </section> | |
133 | |
134 <!-- Section for Optional Arguments for Merging: --> | |
135 <section name="merging_options" title="Optional Arguments for Merging" expanded="false"> | |
136 <param name="maximum_quality_score" argument="-y" label="Maximum Quality Score in Output" type="text" optional="true" help="Maximum quality score in output (phred 33), default = ']'"/> | |
137 <param name="print_overhang" argument="-g" type="boolean" truevalue="-g" falsevalue="" checked="false" label="Print Overhang When Adapters Are Present and Stripped" help="Use this if reads are different lengths"/> | |
138 <param name="min_base_pair_overlap" argument="-o" type="integer" optional="true" value="15" label="Minimum Overall Base Pair Overlap" help="Minimum overall base pair overlap to merge two reads"/> | |
139 <param name="max_mismatch_fraction" argument="-m" type="float" optional="true" value="0.02" label="Maximum Fraction of Good Quality Mismatching Bases" help="Maximum fraction of good quality mismatching bases to overlap reads"/> | |
140 <param name="min_match_fraction" argument="-n" type="float" optional="true" value="0.9" label="Minimum Fraction of Matching Bases" help="Minimum fraction of matching bases to overlap reads"/> | |
141 </section> | |
142 </inputs> | |
143 <outputs> | |
144 <data format="fastq.gz" name="output1" label="${tool.name} on ${on_string}: First Read Output"> | |
145 <filter>output_all</filter> | |
146 </data> | |
147 <data format="fastq.gz" name="output2" label="${tool.name} on ${on_string}: Second Read Output"> | |
148 <filter>output_all</filter> | |
149 </data> | |
150 <data format="fastq.gz" name="merged" label="${tool.name} on ${on_string}: Merged Reads"> | |
151 <filter>merge_reads</filter> | |
152 </data> | |
153 </outputs> | |
154 <tests> | |
155 <!-- Test default inputs #1 --> | |
156 <test expect_num_outputs="3"> | |
157 <param name="input1" value="input1.fq" /> | |
158 <param name="input2" value="input2.fq" /> | |
159 <param name="merge_reads" value="true" /> | |
160 | |
161 <!-- Section for General Arguments --> | |
162 <section name="general_options" > | |
163 <param name="quality_cutoff" value="13" /> | |
164 <param name="min_length" value="30" /> | |
165 </section> | |
166 | |
167 <!-- Section for Additional Adapter/Primer Trimming Arguments --> | |
168 <section name="trimming_options"> | |
169 <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" /> | |
170 <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" /> | |
171 <param name="adapter_overlap" value="10" /> | |
172 <param name="max_mismatch_fraction" value="0.02" /> | |
173 <param name="min_match_fraction" value="0.87" /> | |
174 <param name="adapter_bandwidth" value="50" /> | |
175 <param name="gap_open" value="8" /> | |
176 <param name="gap_extend" value="2" /> | |
177 <param name="gap_end" value="2" /> | |
178 <param name="local_alignment_score" value="26" /> | |
179 <param name="read_alignment_bandwidth" value="50" /> | |
180 <param name="read_alignment_gap_open" value="26" /> | |
181 <param name="read_alignment_gap_extend" value="9" /> | |
182 <param name="read_alignment_gap_end" value="5" /> | |
183 <param name="read_alignment_max_gap_fraction" value="0.125" /> | |
184 </section> | |
185 <output name="output1" file="output1.fq.gz" /> | |
186 <output name="output2" file="output2.fq.gz" /> | |
187 <output name="merged" file="merged_output.fq.gz" /> | |
188 </test> | |
189 | |
190 <!-- Without Merging, Two Outputs #2 --> | |
191 <test expect_num_outputs="2"> | |
192 <param name="input1" value="input1.fq" /> | |
193 <param name="input2" value="input2.fq" /> | |
194 <param name="merge_reads" value="false" /> | |
195 | |
196 <!-- Section for General Arguments --> | |
197 <section name="general_options" > | |
198 <param name="quality_cutoff" value="13" /> | |
199 <param name="min_length" value="30" /> | |
200 </section> | |
201 | |
202 <!-- Section for Additional Adapter/Primer Trimming Arguments --> | |
203 <section name="trimming_options"> | |
204 <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" /> | |
205 <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" /> | |
206 <param name="adapter_overlap" value="10" /> | |
207 <param name="max_mismatch_fraction" value="0.02" /> | |
208 <param name="min_match_fraction" value="0.87" /> | |
209 <param name="adapter_bandwidth" value="50" /> | |
210 <param name="gap_open" value="8" /> | |
211 <param name="gap_extend" value="2" /> | |
212 <param name="gap_end" value="2" /> | |
213 <param name="local_alignment_score" value="26" /> | |
214 <param name="read_alignment_bandwidth" value="50" /> | |
215 <param name="read_alignment_gap_open" value="26" /> | |
216 <param name="read_alignment_gap_extend" value="9" /> | |
217 <param name="read_alignment_gap_end" value="5" /> | |
218 <param name="read_alignment_max_gap_fraction" value="0.125" /> | |
219 </section> | |
220 <output name="output1" file="outputNoMerge1.fq.gz" /> | |
221 <output name="output2" file="outputNoMerge2.fq.gz" /> | |
222 </test> | |
223 <!-- Test with Empty Input Files #3 --> | |
224 <test expect_num_outputs="3"> | |
225 <param name="input1" value="empty1.fq" /> | |
226 <param name="input2" value="empty2.fq" /> | |
227 <param name="merge_reads" value="true" /> | |
228 | |
229 <!-- Section for General Arguments --> | |
230 <section name="general_options" > | |
231 <param name="quality_cutoff" value="13" /> | |
232 <param name="min_length" value="30" /> | |
233 </section> | |
234 | |
235 <!-- Section for Additional Adapter/Primer Trimming Arguments --> | |
236 <section name="trimming_options"> | |
237 <param name="adapter_a" value="AGATCGGAAGAGCGGTTCAG" /> | |
238 <param name="adapter_b" value="AGATCGGAAGAGCGTCGTGT" /> | |
239 <param name="adapter_overlap" value="10" /> | |
240 <param name="max_mismatch_fraction" value="0.02" /> | |
241 <param name="min_match_fraction" value="0.87" /> | |
242 <param name="adapter_bandwidth" value="50" /> | |
243 <param name="gap_open" value="8" /> | |
244 <param name="gap_extend" value="2" /> | |
245 <param name="gap_end" value="2" /> | |
246 <param name="local_alignment_score" value="26" /> | |
247 <param name="read_alignment_bandwidth" value="50" /> | |
248 <param name="read_alignment_gap_open" value="26" /> | |
249 <param name="read_alignment_gap_extend" value="9" /> | |
250 <param name="read_alignment_gap_end" value="5" /> | |
251 <param name="read_alignment_max_gap_fraction" value="0.125" /> | |
252 </section> | |
253 <output name="output1" file="empty_output1.fq.gz" /> | |
254 <output name="output2" file="empty_output2.fq.gz" /> | |
255 <output name="merged" file="empty_merged_output.fq.gz" /> | |
256 </test> | |
257 | |
258 <!-- Advanced Functional Tests --> | |
259 <!-- General Arguments Test #4 --> | |
260 <test expect_num_outputs="2"> | |
261 <param name="input1" value="input1.fq" /> | |
262 <param name="input2" value="input2.fq" /> | |
263 <param name="merge_reads" value="false" /> | |
264 | |
265 <param name="quality_cutoff" value="15" /> | |
266 <param name="min_length" value="25" /> | |
267 <output name="output1" file="output1_general_args.fq.gz" /> | |
268 <output name="output2" file="output2_general_args.fq.gz" /> | |
269 </test> | |
270 | |
271 <!-- Adapter/Primer Trimming Arguements Test #5 --> | |
272 <test expect_num_outputs="2"> | |
273 <param name="input1" value="input1.fq" /> | |
274 <param name="input2" value="input2.fq" /> | |
275 <param name="merge_reads" value="false" /> | |
276 <section name="trimming_options"> | |
277 <param name="adapter_a" value="ACTGACTG" /> | |
278 <param name="adapter_b" value="GTGACTGA" /> | |
279 <param name="adapter_overlap" value="12" /> | |
280 <param name="max_mismatch_fraction" value="0.03" /> | |
281 <param name="min_match_fraction" value="0.85" /> | |
282 <param name="adapter_bandwidth" value="55" /> | |
283 <param name="gap_open" value="10" /> | |
284 <param name="gap_extend" value="3" /> | |
285 <param name="gap_end" value="3" /> | |
286 <param name="local_alignment_score" value="28" /> | |
287 </section> | |
288 <output name="output1" file="output1_adapter_trim.fq.gz" /> | |
289 <output name="output2" file="output2_adapter_trim.fq.gz" /> | |
290 </test> | |
291 <!-- Test with gzipped input files #6 --> | |
292 <test expect_num_outputs="3"> | |
293 <param name="input1" value="input1.fastq.gz" /> | |
294 <param name="input2" value="input2.fastq.gz" /> | |
295 <param name="merge_reads" value="true" /> | |
296 <output name="output1" file="output1_from_gzipped.fq.gz" /> | |
297 <output name="output2" file="output2_from_gzipped.fq.gz" /> | |
298 <output name="merged" file="merged_output_from_gzipped.fq.gz" /> | |
299 </test> | |
300 </tests> | |
301 <help><![CDATA[ | |
302 .. class:: warningmark | |
303 | |
304 **Caution** | |
305 ----------- | |
306 :: | |
307 | |
308 This is a modified version of the 1.2 release. Made for use with the MGnify pipeline. | |
309 | |
310 Difference in `utils.h`: | |
311 | |
312 :: | |
313 | |
314 -#define MAX_SEQ_LEN (256) | |
315 +#define MAX_SEQ_LEN (1024) | |
316 | |
317 **SeqPrep** | |
318 ----------- | |
319 :: | |
320 | |
321 SeqPrep is a versatile tool designed for merging overlapping paired-end Illumina reads into a single, longer read. | |
322 Additionally, it offers the functionality to trim adapter sequences from reads, making it a needful tool for preprocessing Illumina sequencing data. | |
323 | |
324 **Usage** | |
325 ========= | |
326 :: | |
327 | |
328 To utilize SeqPrep, start by selecting your input FASTQ files: one for the first set of reads and another for the second set. | |
329 SeqPrep provides several options to customize your data processing: | |
330 | |
331 - Adapter Sequences: You can provide specific sequences for adapter trimming if they are known. SeqPrep will remove these sequences from the reads. | |
332 - Quality Score Cutoff: Set a threshold for the quality score. Reads with quality scores below this threshold can be discarded or trimmed. | |
333 - Minimum Read Length: Define the minimum length for reads to be retained after trimming. Reads shorter than this length will be discarded. | |
334 | |
335 If the merging feature is enabled, SeqPrep will combine overlapping reads into longer sequences, thereby enhancing the data quality for downstream analysis. | |
336 | |
337 **Outputs** | |
338 =========== | |
339 :: | |
340 | |
341 SeqPrep generates outputs in gzipped FASTQ format. | |
342 | |
343 See more details on `SeqPrep GitHub repository <https://github.com/jstjohn/SeqPrep>`_. | |
344 | |
345 ]]></help> | |
346 <expand macro="citations"/> | |
347 </tool> |