comparison sickle.xml @ 9:7939dd56c4b4 draft

Uploaded
author nikhil-joshi
date Sat, 14 Mar 2015 18:19:57 -0400
parents
children
comparison
equal deleted inserted replaced
8:3ef3eb63a297 9:7939dd56c4b4
1 <tool id="sickle" name="Sickle" version="1.33">
2 <description>Windowed Adaptive Trimming of FastQ data</description>
3
4 <command>
5 sickle
6
7 #if str($readtype.single_or_paired) == "se":
8 se -f $input_single -o $output_single
9
10 #if $input_single.ext == "fastq":
11 -t sanger
12 #else if $input_single.ext == "fastqsanger":
13 -t sanger
14 #else if $input_single.ext == "fastqillumina":
15 -t illumina
16 #else if $input_single.ext == "fastqsolexa":
17 -t solexa
18 #end if
19
20 #end if
21
22 #if str($readtype.single_or_paired) == "pe_combo":
23 #if $readtype.output_n:
24 pe -c $input_combo -M $output_combo
25 #else
26 pe -c $input_combo -m $output_combo -s $output_combo_single
27 #end if
28
29 #if $input_combo.ext == "fastq":
30 -t sanger
31 #else if $input_combo.ext == "fastqsanger":
32 -t sanger
33 #else if $input_combo.ext == "fastqillumina":
34 -t illumina
35 #else if $input_combo.ext == "fastqsolexa":
36 -t solexa
37 #end if
38
39 #end if
40
41 #if str($readtype.single_or_paired) == "pe_sep":
42 pe -f $input_paired1 -r $input_paired2 -o $output_paired1 -p $output_paired2 -s $output_paired_single
43
44 #if $input_paired1.ext == "fastq":
45 -t sanger
46 #else if $input_paired1.ext == "fastqsanger":
47 -t sanger
48 #else if $input_paired1.ext == "fastqillumina":
49 -t illumina
50 #else if $input_paired1.ext == "fastqsolexa":
51 -t solexa
52 #end if
53
54 #end if
55
56 #if str($qual_threshold) != "":
57 -q $qual_threshold
58 #end if
59
60 #if str($length_threshold) != "":
61 -l $length_threshold
62 #end if
63
64 #if $no_five_prime:
65 -x
66 #end if
67
68 #if $trunc_n:
69 -n
70 #end if
71
72 </command>
73
74 <inputs>
75 <conditional name="readtype">
76 <param name="single_or_paired" type="select" optional="false" label="Single-End or Paired-End reads?" help="Note: Sickle will infer the quality type of the file from its datatype. I.e., if the datatype is fastqsanger, then the quality type is sanger. The default is fastqsanger.">
77 <option value="se" selected="true">Single-End</option>
78 <option value="pe_combo">Paired-End (one interleaved input file)</option>
79 <option value="pe_sep">Paired-End (two separate input files)</option>
80 </param>
81
82 <when value="se">
83 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_single" type="data" optional="false" label="Single-End FastQ Reads"/>
84 </when>
85
86 <when value="pe_combo">
87 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_combo" type="data" optional="false" label="Paired-End Interleaved FastQ Reads"/>
88 <param name="output_n" type="boolean" label="Output only one file with all reads" help="This will output only one file with all the reads, where the reads that did not pass filter will be replaced with a single 'N', rather than discarded."/>
89 </when>
90
91 <when value="pe_sep">
92 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired1" type="data" optional="false" label="Paired-End Forward Strand FastQ Reads"/>
93 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired2" type="data" optional="false" label="Paired-End Reverse Strand FastQ Reads"/>
94 </when>
95 </conditional>
96
97 <param name="qual_threshold" value="20" type="integer" optional="true" label="Quality Threshold">
98 <validator type="in_range" min="0" message="Minimum value is 0"/>
99 </param>
100
101 <param name="length_threshold" value="20" type="integer" optional="true" label="Length Threshold">
102 <validator type="in_range" min="0" message="Minimum value is 0"/>
103 </param>
104
105 <param name="no_five_prime" type="boolean" label="Don't do 5' trimming"/>
106 <param name="trunc_n" type="boolean" label="Truncate sequences with Ns at first N position"/>
107 </inputs>
108
109 <outputs>
110 <data format_source="input_single" name="output_single" label="Single-End output of ${tool.name} on ${on_string}">
111 <filter>(readtype['single_or_paired'] == 'se')</filter>
112 </data>
113
114 <data format_source="input_combo" name="output_combo" label="Paired-End interleaved output of ${tool.name} on ${on_string}">
115 <filter>(readtype['single_or_paired'] == 'pe_combo')</filter>
116 </data>
117
118 <data format_source="input_combo" name="output_combo_single" label="Singletons from Paired-End interleaved output of ${tool.name} on ${on_string}">
119 <filter>(readtype['single_or_paired'] == 'pe_combo')</filter>
120 <filter>(readtype['output_n'] == False)</filter>
121 </data>
122
123 <data format_source="input_paired1" name="output_paired1" label="Paired-End forward strand output of ${tool.name} on ${on_string}">
124 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
125 </data>
126
127 <data format_source="input_paired2" name="output_paired2" label="Paired-End reverse strand output of ${tool.name} on ${on_string}">
128 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
129 </data>
130
131 <data format_source="input_paired1" name="output_paired_single" label="Singletons from Paired-End output of ${tool.name} on ${on_string}">
132 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter>
133 </data>
134 </outputs>
135
136 <help>
137 **Sickle - A windowed adaptive trimming tool for FASTQ files using quality**
138
139 .. class:: infomark
140
141 **About**
142
143 Most modern sequencing technologies produce reads that have
144 deteriorating quality towards the 3'-end and some towards the 5'-end
145 as well. Incorrectly called bases in both regions negatively impact
146 assembles, mapping, and downstream bioinformatics analyses.
147
148 Sickle is a tool that uses sliding windows along with quality and
149 length thresholds to determine when quality is sufficiently low to
150 trim the 3'-end of reads and also determines when the quality is
151 sufficiently high enough to trim the 5'-end of reads. It will also
152 discard reads based upon the length threshold. It takes the quality
153 values and slides a window across them whose length is 0.1 times the
154 length of the read. If this length is less than 1, then the window is
155 set to be equal to the length of the read. Otherwise, the window
156 slides along the quality values until the average quality in the
157 window rises above the threshold, at which point the algorithm
158 determines where within the window the rise occurs and cuts the read
159 and quality there for the 5'-end cut. Then when the average quality
160 in the window drops below the threshold, the algorithm determines
161 where in the window the drop occurs and cuts both the read and quality
162 strings there for the 3'-end cut. However, if the length of the
163 remaining sequence is less than the minimum length threshold, then the
164 read is discarded entirely (or replaced with an "N" record). 5'-end
165 trimming can be disabled. Sickle also has an option to truncate reads
166 with Ns at the first N position.
167
168 Sickle supports three types of quality values: Illumina, Solexa, and
169 Sanger. Note that the Solexa quality setting is an approximation (the
170 actual conversion is a non-linear transformation). The end
171 approximation is close. Illumina quality refers to qualities encoded
172 with the CASAVA pipeline between versions 1.3 and 1.7. Illumina
173 quality using CASAVA >= 1.8 is Sanger encoded. The quality value will
174 be determined from the datatype of the data, i.e. a fastqsanger datatype
175 is assumed to be Sanger encoded.
176
177 Note that Sickle will remove the 2nd fastq record header (on the "+"
178 line) and replace it with simply a "+". This is the default format for
179 CASAVA >= 1.8.
180
181 -----
182
183 .. class:: infomark
184
185 **Options**
186
187 **Single-end**
188
189 This option takes one single-end input file and outputs one single-end
190 output file of reads that passed the filters.
191
192 **Paired-End (one interleaved input file)**
193
194 This option takes as input one interleaved paired-end file. If you then
195 check the "Output only one file with all reads" checkbox, it will output
196 one interleaved file where any read that did not pass filter will be replaced
197 with a FastQ record where the sequence is a single "N" and the quality is the
198 lowest quality possible for that quality type. This will preserve the paired
199 nature of the data. If you leave the checkbox unchecked, it will output two files,
200 one interleaved file with all the passed pairs and one singletons file where only
201 one of the pair passed filter.
202
203 **Paired-End (two separate input files)**
204
205 This option takes two separate (forward and reverse) paired-end files as input.
206 The output is three files: Two paired-end files with pairs that passed filter and
207 a singletons file where only one of the pair passed filter.
208
209 **Quality threshold**
210
211 Input your desired quality threshold. This threshold is phred-scaled, which is typically
212 values between 0-41 for FastQ data.
213
214 **Length threshold**
215
216 Input your desired length threshold. This is the threshold to determine if a read is kept
217 after all the trimming steps are done.
218
219 **Disable 5-prime trimming**
220
221 An option to disable trimming the read on the 5-prime end. This trimming trims the read
222 if the average quality values dip below the quality threshold at the 5-prime end.
223
224 **Truncate sequences with Ns**
225
226 This option will trim a read at the first "N" base in the read after doing quality trimming.
227 It is then still subject to the length threshold.
228
229 -----
230
231 .. class:: infomark
232
233 **Citation**
234
235 Sickle doesn't have a paper, but you can cite it like this::
236
237 Joshi NA, Fass JN. (2011). Sickle: A sliding-window, adaptive, quality-based trimming tool for FastQ files
238 (Version 1.33) [Software]. Available at https://github.com/najoshi/sickle.
239
240 -----
241
242 Copyright: Nikhil Joshi
243
244 http://bioinformatics.ucdavis.edu
245
246 http://github.com/ucdavis-bioinformatics
247
248 http://github.com/najoshi
249
250 </help>
251
252 </tool>