Mercurial > repos > nikhil-joshi > sickle
comparison sickle.xml @ 4:c70137414dcd draft
sickle v1.33
author | nikhil-joshi |
---|---|
date | Wed, 23 Jul 2014 18:35:10 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:f6ebdaca9925 | 4:c70137414dcd |
---|---|
1 <tool id="sickle" name="Sickle" version="1.33"> | |
2 <description>Windowed Adaptive Trimming of FastQ data</description> | |
3 | |
4 <command> | |
5 sickle | |
6 | |
7 #if str($readtype.single_or_paired) == "se": | |
8 se -f $input_single -o $output_single | |
9 | |
10 #if $input_single.ext == "fastq": | |
11 -t sanger | |
12 #else if $input_single.ext == "fastqsanger": | |
13 -t sanger | |
14 #else if $input_single.ext == "fastqillumina": | |
15 -t illumina | |
16 #else if $input_single.ext == "fastqsolexa": | |
17 -t solexa | |
18 #end if | |
19 | |
20 #end if | |
21 | |
22 #if str($readtype.single_or_paired) == "pe_combo": | |
23 #if $readtype.output_n: | |
24 pe -c $input_combo -M $output_combo | |
25 #else | |
26 pe -c $input_combo -m $output_combo -s $output_combo_single | |
27 #end if | |
28 | |
29 #if $input_combo.ext == "fastq": | |
30 -t sanger | |
31 #else if $input_combo.ext == "fastqsanger": | |
32 -t sanger | |
33 #else if $input_combo.ext == "fastqillumina": | |
34 -t illumina | |
35 #else if $input_combo.ext == "fastqsolexa": | |
36 -t solexa | |
37 #end if | |
38 | |
39 #end if | |
40 | |
41 #if str($readtype.single_or_paired) == "pe_sep": | |
42 pe -f $input_paired1 -r $input_paired2 -o $output_paired1 -p $output_paired2 -s $output_paired_single | |
43 | |
44 #if $input_paired1.ext == "fastq": | |
45 -t sanger | |
46 #else if $input_paired1.ext == "fastqsanger": | |
47 -t sanger | |
48 #else if $input_paired1.ext == "fastqillumina": | |
49 -t illumina | |
50 #else if $input_paired1.ext == "fastqsolexa": | |
51 -t solexa | |
52 #end if | |
53 | |
54 #end if | |
55 | |
56 #if str($qual_threshold) != "": | |
57 -q $qual_threshold | |
58 #end if | |
59 | |
60 #if str($length_threshold) != "": | |
61 -l $length_threshold | |
62 #end if | |
63 | |
64 #if $no_five_prime: | |
65 -x | |
66 #end if | |
67 | |
68 #if $trunc_n: | |
69 -n | |
70 #end if | |
71 | |
72 --quiet | |
73 </command> | |
74 | |
75 <inputs> | |
76 <conditional name="readtype"> | |
77 <param name="single_or_paired" type="select" optional="false" label="Single-End or Paired-End reads?" help="Note: Sickle will infer the quality type of the file from its datatype. I.e., if the datatype is fastqsanger, then the quality type is sanger. The default is fastqsanger."> | |
78 <option value="se" selected="true">Single-End</option> | |
79 <option value="pe_combo">Paired-End (one interleaved input file)</option> | |
80 <option value="pe_sep">Paired-End (two separate input files)</option> | |
81 </param> | |
82 | |
83 <when value="se"> | |
84 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_single" type="data" optional="false" label="Single-End FastQ Reads"/> | |
85 </when> | |
86 | |
87 <when value="pe_combo"> | |
88 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_combo" type="data" optional="false" label="Paired-End Interleaved FastQ Reads"/> | |
89 <param name="output_n" type="boolean" label="Output only one file with all reads" help="This will output only one file with all the reads, where the reads that did not pass filter will be replaced with a single 'N', rather than discarded."/> | |
90 </when> | |
91 | |
92 <when value="pe_sep"> | |
93 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired1" type="data" optional="false" label="Paired-End Forward Strand FastQ Reads"/> | |
94 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired2" type="data" optional="false" label="Paired-End Reverse Strand FastQ Reads"/> | |
95 </when> | |
96 </conditional> | |
97 | |
98 <param name="qual_threshold" value="20" type="integer" optional="true" label="Quality Threshold"> | |
99 <validator type="in_range" min="0" message="Minimum value is 0"/> | |
100 </param> | |
101 | |
102 <param name="length_threshold" value="20" type="integer" optional="true" label="Length Threshold"> | |
103 <validator type="in_range" min="0" message="Minimum value is 0"/> | |
104 </param> | |
105 | |
106 <param name="no_five_prime" type="boolean" label="Don't do 5' trimming"/> | |
107 <param name="trunc_n" type="boolean" label="Truncate sequences with Ns at first N position"/> | |
108 </inputs> | |
109 | |
110 <outputs> | |
111 <data format_source="input_single" name="output_single" label="Single-End output of ${tool.name} on ${on_string}"> | |
112 <filter>(readtype['single_or_paired'] == 'se')</filter> | |
113 </data> | |
114 | |
115 <data format_source="input_combo" name="output_combo" label="Paired-End interleaved output of ${tool.name} on ${on_string}"> | |
116 <filter>(readtype['single_or_paired'] == 'pe_combo')</filter> | |
117 </data> | |
118 | |
119 <data format_source="input_combo" name="output_combo_single" label="Singletons from Paired-End interleaved output of ${tool.name} on ${on_string}"> | |
120 <filter>(readtype['single_or_paired'] == 'pe_combo')</filter> | |
121 <filter>(readtype['output_n'] == False)</filter> | |
122 </data> | |
123 | |
124 <data format_source="input_paired1" name="output_paired1" label="Paired-End forward strand output of ${tool.name} on ${on_string}"> | |
125 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter> | |
126 </data> | |
127 | |
128 <data format_source="input_paired2" name="output_paired2" label="Paired-End reverse strand output of ${tool.name} on ${on_string}"> | |
129 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter> | |
130 </data> | |
131 | |
132 <data format_source="input_paired1" name="output_paired_single" label="Singletons from Paired-End output of ${tool.name} on ${on_string}"> | |
133 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter> | |
134 </data> | |
135 </outputs> | |
136 | |
137 <help> | |
138 **Sickle - A windowed adaptive trimming tool for FASTQ files using quality** | |
139 | |
140 .. class:: infomark | |
141 | |
142 **About** | |
143 | |
144 Most modern sequencing technologies produce reads that have | |
145 deteriorating quality towards the 3'-end and some towards the 5'-end | |
146 as well. Incorrectly called bases in both regions negatively impact | |
147 assembles, mapping, and downstream bioinformatics analyses. | |
148 | |
149 Sickle is a tool that uses sliding windows along with quality and | |
150 length thresholds to determine when quality is sufficiently low to | |
151 trim the 3'-end of reads and also determines when the quality is | |
152 sufficiently high enough to trim the 5'-end of reads. It will also | |
153 discard reads based upon the length threshold. It takes the quality | |
154 values and slides a window across them whose length is 0.1 times the | |
155 length of the read. If this length is less than 1, then the window is | |
156 set to be equal to the length of the read. Otherwise, the window | |
157 slides along the quality values until the average quality in the | |
158 window rises above the threshold, at which point the algorithm | |
159 determines where within the window the rise occurs and cuts the read | |
160 and quality there for the 5'-end cut. Then when the average quality | |
161 in the window drops below the threshold, the algorithm determines | |
162 where in the window the drop occurs and cuts both the read and quality | |
163 strings there for the 3'-end cut. However, if the length of the | |
164 remaining sequence is less than the minimum length threshold, then the | |
165 read is discarded entirely (or replaced with an "N" record). 5'-end | |
166 trimming can be disabled. Sickle also has an option to truncate reads | |
167 with Ns at the first N position. | |
168 | |
169 Sickle supports three types of quality values: Illumina, Solexa, and | |
170 Sanger. Note that the Solexa quality setting is an approximation (the | |
171 actual conversion is a non-linear transformation). The end | |
172 approximation is close. Illumina quality refers to qualities encoded | |
173 with the CASAVA pipeline between versions 1.3 and 1.7. Illumina | |
174 quality using CASAVA >= 1.8 is Sanger encoded. The quality value will | |
175 be determined from the datatype of the data, i.e. a fastqsanger datatype | |
176 is assumed to be Sanger encoded. | |
177 | |
178 Note that Sickle will remove the 2nd fastq record header (on the "+" | |
179 line) and replace it with simply a "+". This is the default format for | |
180 CASAVA >= 1.8. | |
181 | |
182 ----- | |
183 | |
184 .. class:: infomark | |
185 | |
186 **Options** | |
187 | |
188 **Single-end** | |
189 | |
190 This option takes one single-end input file and outputs one single-end | |
191 output file of reads that passed the filters. | |
192 | |
193 **Paired-End (one interleaved input file)** | |
194 | |
195 This option takes as input one interleaved paired-end file. If you then | |
196 check the "Output only one file with all reads" checkbox, it will output | |
197 one interleaved file where any read that did not pass filter will be replaced | |
198 with a FastQ record where the sequence is a single "N" and the quality is the | |
199 lowest quality possible for that quality type. This will preserve the paired | |
200 nature of the data. If you leave the checkbox unchecked, it will output two files, | |
201 one interleaved file with all the passed pairs and one singletons file where only | |
202 one of the pair passed filter. | |
203 | |
204 **Paired-End (two separate input files)** | |
205 | |
206 This option takes two separate (forward and reverse) paired-end files as input. | |
207 The output is three files: Two paired-end files with pairs that passed filter and | |
208 a singletons file where only one of the pair passed filter. | |
209 | |
210 **Quality threshold** | |
211 | |
212 Input your desired quality threshold. This threshold is phred-scaled, which is typically | |
213 values between 0-41 for FastQ data. | |
214 | |
215 **Length threshold** | |
216 | |
217 Input your desired length threshold. This is the threshold to determine if a read is kept | |
218 after all the trimming steps are done. | |
219 | |
220 **Disable 5-prime trimming** | |
221 | |
222 An option to disable trimming the read on the 5-prime end. This trimming trims the read | |
223 if the average quality values dip below the quality threshold at the 5-prime end. | |
224 | |
225 **Truncate sequences with Ns** | |
226 | |
227 This option will trim a read at the first "N" base in the read after doing quality trimming. | |
228 It is then still subject to the length threshold. | |
229 | |
230 ----- | |
231 | |
232 .. class:: infomark | |
233 | |
234 **Citation** | |
235 | |
236 Sickle doesn't have a paper, but you can cite it like this:: | |
237 | |
238 Joshi NA, Fass JN. (2011). Sickle: A sliding-window, adaptive, quality-based trimming tool for FastQ files | |
239 (Version 1.33) [Software]. Available at https://github.com/najoshi/sickle. | |
240 | |
241 ----- | |
242 | |
243 Copyright: Nikhil Joshi | |
244 | |
245 http://bioinformatics.ucdavis.edu | |
246 | |
247 http://github.com/ucdavis-bioinformatics | |
248 | |
249 http://github.com/najoshi | |
250 | |
251 </help> | |
252 | |
253 </tool> |