comparison sickle.xml @ 0:1405432d1b9c draft default tip

Uploaded
author slegras
date Sun, 27 Sep 2015 15:52:49 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1405432d1b9c
1 <tool id="sickle" name="Sickle" version="1.33">
2 <description>Windowed Adaptive Trimming of FastQ data</description>
3 <requirements>
4 <requirement type="package" version="1.33">sickle</requirement>
5 </requirements>
6 <command>
7 sickle
8
9 #if str($readtype.single_or_paired) == "se":
10 se -f $input_single -o $output_single
11
12 #if $input_single.ext == "fastq":
13 -t sanger
14 #else if $input_single.ext == "fastqsanger":
15 -t sanger
16 #else if $input_single.ext == "fastqillumina":
17 -t illumina
18 #else if $input_single.ext == "fastqsolexa":
19 -t solexa
20 #end if
21
22 #end if
23
24 #if str($readtype.single_or_paired) == "pe_combo":
25 #if $readtype.output_n:
26 pe -c $input_combo -M $output_combo
27 #else
28 pe -c $input_combo -m $output_combo -s $output_combo_single
29 #end if
30
31 #if $input_combo.ext == "fastq":
32 -t sanger
33 #else if $input_combo.ext == "fastqsanger":
34 -t sanger
35 #else if $input_combo.ext == "fastqillumina":
36 -t illumina
37 #else if $input_combo.ext == "fastqsolexa":
38 -t solexa
39 #end if
40
41 #end if
42
43 #if str($readtype.single_or_paired) == "pe_sep":
44 pe -f $input_paired1 -r $input_paired2 -o $output_paired1 -p $output_paired2 -s $output_paired_single
45
46 #if $input_paired1.ext == "fastq":
47 -t sanger
48 #else if $input_paired1.ext == "fastqsanger":
49 -t sanger
50 #else if $input_paired1.ext == "fastqillumina":
51 -t illumina
52 #else if $input_paired1.ext == "fastqsolexa":
53 -t solexa
54 #end if
55
56 #end if
57
58 #if str($qual_threshold) != "":
59 -q $qual_threshold
60 #end if
61
62 #if str($length_threshold) != "":
63 -l $length_threshold
64 #end if
65
66 #if $no_five_prime:
67 -x
68 #end if
69
70 #if $trunc_n:
71 -n
72 #end if
73
74 </command>
75
76 <inputs>
77 <conditional name="readtype">
78 <param name="single_or_paired" type="select" optional="false" label="Single-End or Paired-End reads?" help="Note: Sickle will infer the quality type of the file from its datatype. I.e., if the datatype is fastqsanger, then the quality type is sanger. The default is fastqsanger.">
79 <option value="se" selected="true">Single-End</option>
80 <option value="pe_combo">Paired-End (one interleaved input file)</option>
81 <option value="pe_sep">Paired-End (two separate input files)</option>
82 </param>
83
84 <when value="se">
85 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_single" type="data" optional="false" label="Single-End FastQ Reads"/>
86 </when>
87
88 <when value="pe_combo">
89 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_combo" type="data" optional="false" label="Paired-End Interleaved FastQ Reads"/>
90 <param name="output_n" type="boolean" label="Output only one file with all reads" help="This will output only one file with all the reads, where the reads that did not pass filter will be replaced with a single 'N', rather than discarded."/>
91 </when>
92
93 <when value="pe_sep">
94 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired1" type="data" optional="false" label="Paired-End Forward Strand FastQ Reads"/>
95 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired2" type="data" optional="false" label="Paired-End Reverse Strand FastQ Reads"/>
96 </when>
97 </conditional>
98
99 <param name="qual_threshold" value="20" type="integer" optional="true" label="Quality Threshold">
100 <validator type="in_range" min="0" message="Minimum value is 0"/>
101 </param>
102
103 <param name="length_threshold" value="20" type="integer" optional="true" label="Length Threshold">
104 <validator type="in_range" min="0" message="Minimum value is 0"/>
105 </param>
106
107 <param name="no_five_prime" type="boolean" label="Don't do 5' trimming"/>
108 <param name="trunc_n" type="boolean" label="Truncate sequences with Ns at first N position"/>
109 </inputs>
110
111 <outputs>
112 <data format_source="input_single" name="output_single" label="Single-End output of ${tool.name} on ${on_string}">
113 <filter>(readtype['single_or_paired'] == 'se')</filter>
114 </data>
115
116 <data format_source="input_combo" name="output_combo" label="Paired-End interleaved output of ${tool.name} on ${on_string}">
117 <filter>(readtype['single_or_paired'] == 'pe_combo')</filter>
118 </data>
119
120 <data format_source="input_combo" name="output_combo_single" label="Singletons from Paired-End interleaved output of ${tool.name} on ${on_string}">
121 <filter>(readtype['single_or_paired'] == 'pe_combo')</filter>
122 <filter>(readtype['output_n'] == False)</filter>
123 </data>
124
125 <data format_source="input_paired1" name="output_paired1" label="Paired-End forward strand output of ${tool.name} on ${on_string}">
126 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
127 </data>
128
129 <data format_source="input_paired2" name="output_paired2" label="Paired-End reverse strand output of ${tool.name} on ${on_string}">
130 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
131 </data>
132
133 <data format_source="input_paired1" name="output_paired_single" label="Singletons from Paired-End output of ${tool.name} on ${on_string}">
134 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
135 </data>
136 </outputs>
137
138 <help>
139 **Sickle - A windowed adaptive trimming tool for FASTQ files using quality**
140
141 .. class:: infomark
142
143 **About**
144
145 Most modern sequencing technologies produce reads that have
146 deteriorating quality towards the 3'-end and some towards the 5'-end
147 as well. Incorrectly called bases in both regions negatively impact
148 assembles, mapping, and downstream bioinformatics analyses.
149
150 Sickle is a tool that uses sliding windows along with quality and
151 length thresholds to determine when quality is sufficiently low to
152 trim the 3'-end of reads and also determines when the quality is
153 sufficiently high enough to trim the 5'-end of reads. It will also
154 discard reads based upon the length threshold. It takes the quality
155 values and slides a window across them whose length is 0.1 times the
156 length of the read. If this length is less than 1, then the window is
157 set to be equal to the length of the read. Otherwise, the window
158 slides along the quality values until the average quality in the
159 window rises above the threshold, at which point the algorithm
160 determines where within the window the rise occurs and cuts the read
161 and quality there for the 5'-end cut. Then when the average quality
162 in the window drops below the threshold, the algorithm determines
163 where in the window the drop occurs and cuts both the read and quality
164 strings there for the 3'-end cut. However, if the length of the
165 remaining sequence is less than the minimum length threshold, then the
166 read is discarded entirely (or replaced with an "N" record). 5'-end
167 trimming can be disabled. Sickle also has an option to truncate reads
168 with Ns at the first N position.
169
170 Sickle supports three types of quality values: Illumina, Solexa, and
171 Sanger. Note that the Solexa quality setting is an approximation (the
172 actual conversion is a non-linear transformation). The end
173 approximation is close. Illumina quality refers to qualities encoded
174 with the CASAVA pipeline between versions 1.3 and 1.7. Illumina
175 quality using CASAVA >= 1.8 is Sanger encoded. The quality value will
176 be determined from the datatype of the data, i.e. a fastqsanger datatype
177 is assumed to be Sanger encoded.
178
179 Note that Sickle will remove the 2nd fastq record header (on the "+"
180 line) and replace it with simply a "+". This is the default format for
181 CASAVA >= 1.8.
182
183 -----
184
185 .. class:: infomark
186
187 **Options**
188
189 **Single-end**
190
191 This option takes one single-end input file and outputs one single-end
192 output file of reads that passed the filters.
193
194 **Paired-End (one interleaved input file)**
195
196 This option takes as input one interleaved paired-end file. If you then
197 check the "Output only one file with all reads" checkbox, it will output
198 one interleaved file where any read that did not pass filter will be replaced
199 with a FastQ record where the sequence is a single "N" and the quality is the
200 lowest quality possible for that quality type. This will preserve the paired
201 nature of the data. If you leave the checkbox unchecked, it will output two files,
202 one interleaved file with all the passed pairs and one singletons file where only
203 one of the pair passed filter.
204
205 **Paired-End (two separate input files)**
206
207 This option takes two separate (forward and reverse) paired-end files as input.
208 The output is three files: Two paired-end files with pairs that passed filter and
209 a singletons file where only one of the pair passed filter.
210
211 **Quality threshold**
212
213 Input your desired quality threshold. This threshold is phred-scaled, which is typically
214 values between 0-41 for FastQ data.
215
216 **Length threshold**
217
218 Input your desired length threshold. This is the threshold to determine if a read is kept
219 after all the trimming steps are done.
220
221 **Disable 5-prime trimming**
222
223 An option to disable trimming the read on the 5-prime end. This trimming trims the read
224 if the average quality values dip below the quality threshold at the 5-prime end.
225
226 **Truncate sequences with Ns**
227
228 This option will trim a read at the first "N" base in the read after doing quality trimming.
229 It is then still subject to the length threshold.
230
231 -----
232
233 .. class:: infomark
234
235 **Citation**
236
237 Sickle doesn't have a paper, but you can cite it like this::
238
239 Joshi NA, Fass JN. (2011). Sickle: A sliding-window, adaptive, quality-based trimming tool for FastQ files
240 (Version 1.33) [Software]. Available at https://github.com/najoshi/sickle.
241
242 -----
243
244 Copyright: Nikhil Joshi
245
246 http://bioinformatics.ucdavis.edu
247
248 http://github.com/ucdavis-bioinformatics
249
250 http://github.com/najoshi
251
252 </help>
253
254 </tool>