comparison sickle.xml @ 4:c70137414dcd draft

sickle v1.33
author nikhil-joshi
date Wed, 23 Jul 2014 18:35:10 -0400
parents
children
comparison
equal deleted inserted replaced
3:f6ebdaca9925 4:c70137414dcd
1 <tool id="sickle" name="Sickle" version="1.33">
2 <description>Windowed Adaptive Trimming of FastQ data</description>
3
4 <command>
5 sickle
6
7 #if str($readtype.single_or_paired) == "se":
8 se -f $input_single -o $output_single
9
10 #if $input_single.ext == "fastq":
11 -t sanger
12 #else if $input_single.ext == "fastqsanger":
13 -t sanger
14 #else if $input_single.ext == "fastqillumina":
15 -t illumina
16 #else if $input_single.ext == "fastqsolexa":
17 -t solexa
18 #end if
19
20 #end if
21
22 #if str($readtype.single_or_paired) == "pe_combo":
23 #if $readtype.output_n:
24 pe -c $input_combo -M $output_combo
25 #else
26 pe -c $input_combo -m $output_combo -s $output_combo_single
27 #end if
28
29 #if $input_combo.ext == "fastq":
30 -t sanger
31 #else if $input_combo.ext == "fastqsanger":
32 -t sanger
33 #else if $input_combo.ext == "fastqillumina":
34 -t illumina
35 #else if $input_combo.ext == "fastqsolexa":
36 -t solexa
37 #end if
38
39 #end if
40
41 #if str($readtype.single_or_paired) == "pe_sep":
42 pe -f $input_paired1 -r $input_paired2 -o $output_paired1 -p $output_paired2 -s $output_paired_single
43
44 #if $input_paired1.ext == "fastq":
45 -t sanger
46 #else if $input_paired1.ext == "fastqsanger":
47 -t sanger
48 #else if $input_paired1.ext == "fastqillumina":
49 -t illumina
50 #else if $input_paired1.ext == "fastqsolexa":
51 -t solexa
52 #end if
53
54 #end if
55
56 #if str($qual_threshold) != "":
57 -q $qual_threshold
58 #end if
59
60 #if str($length_threshold) != "":
61 -l $length_threshold
62 #end if
63
64 #if $no_five_prime:
65 -x
66 #end if
67
68 #if $trunc_n:
69 -n
70 #end if
71
72 --quiet
73 </command>
74
75 <inputs>
76 <conditional name="readtype">
77 <param name="single_or_paired" type="select" optional="false" label="Single-End or Paired-End reads?" help="Note: Sickle will infer the quality type of the file from its datatype. I.e., if the datatype is fastqsanger, then the quality type is sanger. The default is fastqsanger.">
78 <option value="se" selected="true">Single-End</option>
79 <option value="pe_combo">Paired-End (one interleaved input file)</option>
80 <option value="pe_sep">Paired-End (two separate input files)</option>
81 </param>
82
83 <when value="se">
84 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_single" type="data" optional="false" label="Single-End FastQ Reads"/>
85 </when>
86
87 <when value="pe_combo">
88 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_combo" type="data" optional="false" label="Paired-End Interleaved FastQ Reads"/>
89 <param name="output_n" type="boolean" label="Output only one file with all reads" help="This will output only one file with all the reads, where the reads that did not pass filter will be replaced with a single 'N', rather than discarded."/>
90 </when>
91
92 <when value="pe_sep">
93 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired1" type="data" optional="false" label="Paired-End Forward Strand FastQ Reads"/>
94 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired2" type="data" optional="false" label="Paired-End Reverse Strand FastQ Reads"/>
95 </when>
96 </conditional>
97
98 <param name="qual_threshold" value="20" type="integer" optional="true" label="Quality Threshold">
99 <validator type="in_range" min="0" message="Minimum value is 0"/>
100 </param>
101
102 <param name="length_threshold" value="20" type="integer" optional="true" label="Length Threshold">
103 <validator type="in_range" min="0" message="Minimum value is 0"/>
104 </param>
105
106 <param name="no_five_prime" type="boolean" label="Don't do 5' trimming"/>
107 <param name="trunc_n" type="boolean" label="Truncate sequences with Ns at first N position"/>
108 </inputs>
109
110 <outputs>
111 <data format_source="input_single" name="output_single" label="Single-End output of ${tool.name} on ${on_string}">
112 <filter>(readtype['single_or_paired'] == 'se')</filter>
113 </data>
114
115 <data format_source="input_combo" name="output_combo" label="Paired-End interleaved output of ${tool.name} on ${on_string}">
116 <filter>(readtype['single_or_paired'] == 'pe_combo')</filter>
117 </data>
118
119 <data format_source="input_combo" name="output_combo_single" label="Singletons from Paired-End interleaved output of ${tool.name} on ${on_string}">
120 <filter>(readtype['single_or_paired'] == 'pe_combo')</filter>
121 <filter>(readtype['output_n'] == False)</filter>
122 </data>
123
124 <data format_source="input_paired1" name="output_paired1" label="Paired-End forward strand output of ${tool.name} on ${on_string}">
125 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
126 </data>
127
128 <data format_source="input_paired2" name="output_paired2" label="Paired-End reverse strand output of ${tool.name} on ${on_string}">
129 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
130 </data>
131
132 <data format_source="input_paired1" name="output_paired_single" label="Singletons from Paired-End output of ${tool.name} on ${on_string}">
133 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
134 </data>
135 </outputs>
136
137 <help>
138 **Sickle - A windowed adaptive trimming tool for FASTQ files using quality**
139
140 .. class:: infomark
141
142 **About**
143
144 Most modern sequencing technologies produce reads that have
145 deteriorating quality towards the 3'-end and some towards the 5'-end
146 as well. Incorrectly called bases in both regions negatively impact
147 assembles, mapping, and downstream bioinformatics analyses.
148
149 Sickle is a tool that uses sliding windows along with quality and
150 length thresholds to determine when quality is sufficiently low to
151 trim the 3'-end of reads and also determines when the quality is
152 sufficiently high enough to trim the 5'-end of reads. It will also
153 discard reads based upon the length threshold. It takes the quality
154 values and slides a window across them whose length is 0.1 times the
155 length of the read. If this length is less than 1, then the window is
156 set to be equal to the length of the read. Otherwise, the window
157 slides along the quality values until the average quality in the
158 window rises above the threshold, at which point the algorithm
159 determines where within the window the rise occurs and cuts the read
160 and quality there for the 5'-end cut. Then when the average quality
161 in the window drops below the threshold, the algorithm determines
162 where in the window the drop occurs and cuts both the read and quality
163 strings there for the 3'-end cut. However, if the length of the
164 remaining sequence is less than the minimum length threshold, then the
165 read is discarded entirely (or replaced with an "N" record). 5'-end
166 trimming can be disabled. Sickle also has an option to truncate reads
167 with Ns at the first N position.
168
169 Sickle supports three types of quality values: Illumina, Solexa, and
170 Sanger. Note that the Solexa quality setting is an approximation (the
171 actual conversion is a non-linear transformation). The end
172 approximation is close. Illumina quality refers to qualities encoded
173 with the CASAVA pipeline between versions 1.3 and 1.7. Illumina
174 quality using CASAVA >= 1.8 is Sanger encoded. The quality value will
175 be determined from the datatype of the data, i.e. a fastqsanger datatype
176 is assumed to be Sanger encoded.
177
178 Note that Sickle will remove the 2nd fastq record header (on the "+"
179 line) and replace it with simply a "+". This is the default format for
180 CASAVA >= 1.8.
181
182 -----
183
184 .. class:: infomark
185
186 **Options**
187
188 **Single-end**
189
190 This option takes one single-end input file and outputs one single-end
191 output file of reads that passed the filters.
192
193 **Paired-End (one interleaved input file)**
194
195 This option takes as input one interleaved paired-end file. If you then
196 check the "Output only one file with all reads" checkbox, it will output
197 one interleaved file where any read that did not pass filter will be replaced
198 with a FastQ record where the sequence is a single "N" and the quality is the
199 lowest quality possible for that quality type. This will preserve the paired
200 nature of the data. If you leave the checkbox unchecked, it will output two files,
201 one interleaved file with all the passed pairs and one singletons file where only
202 one of the pair passed filter.
203
204 **Paired-End (two separate input files)**
205
206 This option takes two separate (forward and reverse) paired-end files as input.
207 The output is three files: Two paired-end files with pairs that passed filter and
208 a singletons file where only one of the pair passed filter.
209
210 **Quality threshold**
211
212 Input your desired quality threshold. This threshold is phred-scaled, which is typically
213 values between 0-41 for FastQ data.
214
215 **Length threshold**
216
217 Input your desired length threshold. This is the threshold to determine if a read is kept
218 after all the trimming steps are done.
219
220 **Disable 5-prime trimming**
221
222 An option to disable trimming the read on the 5-prime end. This trimming trims the read
223 if the average quality values dip below the quality threshold at the 5-prime end.
224
225 **Truncate sequences with Ns**
226
227 This option will trim a read at the first "N" base in the read after doing quality trimming.
228 It is then still subject to the length threshold.
229
230 -----
231
232 .. class:: infomark
233
234 **Citation**
235
236 Sickle doesn't have a paper, but you can cite it like this::
237
238 Joshi NA, Fass JN. (2011). Sickle: A sliding-window, adaptive, quality-based trimming tool for FastQ files
239 (Version 1.33) [Software]. Available at https://github.com/najoshi/sickle.
240
241 -----
242
243 Copyright: Nikhil Joshi
244
245 http://bioinformatics.ucdavis.edu
246
247 http://github.com/ucdavis-bioinformatics
248
249 http://github.com/najoshi
250
251 </help>
252
253 </tool>