Mercurial > repos > slegras > sickle_1_33
comparison sickle.xml @ 0:1405432d1b9c draft default tip
Uploaded
author | slegras |
---|---|
date | Sun, 27 Sep 2015 15:52:49 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1405432d1b9c |
---|---|
1 <tool id="sickle" name="Sickle" version="1.33"> | |
2 <description>Windowed Adaptive Trimming of FastQ data</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.33">sickle</requirement> | |
5 </requirements> | |
6 <command> | |
7 sickle | |
8 | |
9 #if str($readtype.single_or_paired) == "se": | |
10 se -f $input_single -o $output_single | |
11 | |
12 #if $input_single.ext == "fastq": | |
13 -t sanger | |
14 #else if $input_single.ext == "fastqsanger": | |
15 -t sanger | |
16 #else if $input_single.ext == "fastqillumina": | |
17 -t illumina | |
18 #else if $input_single.ext == "fastqsolexa": | |
19 -t solexa | |
20 #end if | |
21 | |
22 #end if | |
23 | |
24 #if str($readtype.single_or_paired) == "pe_combo": | |
25 #if $readtype.output_n: | |
26 pe -c $input_combo -M $output_combo | |
27 #else | |
28 pe -c $input_combo -m $output_combo -s $output_combo_single | |
29 #end if | |
30 | |
31 #if $input_combo.ext == "fastq": | |
32 -t sanger | |
33 #else if $input_combo.ext == "fastqsanger": | |
34 -t sanger | |
35 #else if $input_combo.ext == "fastqillumina": | |
36 -t illumina | |
37 #else if $input_combo.ext == "fastqsolexa": | |
38 -t solexa | |
39 #end if | |
40 | |
41 #end if | |
42 | |
43 #if str($readtype.single_or_paired) == "pe_sep": | |
44 pe -f $input_paired1 -r $input_paired2 -o $output_paired1 -p $output_paired2 -s $output_paired_single | |
45 | |
46 #if $input_paired1.ext == "fastq": | |
47 -t sanger | |
48 #else if $input_paired1.ext == "fastqsanger": | |
49 -t sanger | |
50 #else if $input_paired1.ext == "fastqillumina": | |
51 -t illumina | |
52 #else if $input_paired1.ext == "fastqsolexa": | |
53 -t solexa | |
54 #end if | |
55 | |
56 #end if | |
57 | |
58 #if str($qual_threshold) != "": | |
59 -q $qual_threshold | |
60 #end if | |
61 | |
62 #if str($length_threshold) != "": | |
63 -l $length_threshold | |
64 #end if | |
65 | |
66 #if $no_five_prime: | |
67 -x | |
68 #end if | |
69 | |
70 #if $trunc_n: | |
71 -n | |
72 #end if | |
73 | |
74 </command> | |
75 | |
76 <inputs> | |
77 <conditional name="readtype"> | |
78 <param name="single_or_paired" type="select" optional="false" label="Single-End or Paired-End reads?" help="Note: Sickle will infer the quality type of the file from its datatype. I.e., if the datatype is fastqsanger, then the quality type is sanger. The default is fastqsanger."> | |
79 <option value="se" selected="true">Single-End</option> | |
80 <option value="pe_combo">Paired-End (one interleaved input file)</option> | |
81 <option value="pe_sep">Paired-End (two separate input files)</option> | |
82 </param> | |
83 | |
84 <when value="se"> | |
85 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_single" type="data" optional="false" label="Single-End FastQ Reads"/> | |
86 </when> | |
87 | |
88 <when value="pe_combo"> | |
89 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_combo" type="data" optional="false" label="Paired-End Interleaved FastQ Reads"/> | |
90 <param name="output_n" type="boolean" label="Output only one file with all reads" help="This will output only one file with all the reads, where the reads that did not pass filter will be replaced with a single 'N', rather than discarded."/> | |
91 </when> | |
92 | |
93 <when value="pe_sep"> | |
94 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired1" type="data" optional="false" label="Paired-End Forward Strand FastQ Reads"/> | |
95 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_paired2" type="data" optional="false" label="Paired-End Reverse Strand FastQ Reads"/> | |
96 </when> | |
97 </conditional> | |
98 | |
99 <param name="qual_threshold" value="20" type="integer" optional="true" label="Quality Threshold"> | |
100 <validator type="in_range" min="0" message="Minimum value is 0"/> | |
101 </param> | |
102 | |
103 <param name="length_threshold" value="20" type="integer" optional="true" label="Length Threshold"> | |
104 <validator type="in_range" min="0" message="Minimum value is 0"/> | |
105 </param> | |
106 | |
107 <param name="no_five_prime" type="boolean" label="Don't do 5' trimming"/> | |
108 <param name="trunc_n" type="boolean" label="Truncate sequences with Ns at first N position"/> | |
109 </inputs> | |
110 | |
111 <outputs> | |
112 <data format_source="input_single" name="output_single" label="Single-End output of ${tool.name} on ${on_string}"> | |
113 <filter>(readtype['single_or_paired'] == 'se')</filter> | |
114 </data> | |
115 | |
116 <data format_source="input_combo" name="output_combo" label="Paired-End interleaved output of ${tool.name} on ${on_string}"> | |
117 <filter>(readtype['single_or_paired'] == 'pe_combo')</filter> | |
118 </data> | |
119 | |
120 <data format_source="input_combo" name="output_combo_single" label="Singletons from Paired-End interleaved output of ${tool.name} on ${on_string}"> | |
121 <filter>(readtype['single_or_paired'] == 'pe_combo')</filter> | |
122 <filter>(readtype['output_n'] == False)</filter> | |
123 </data> | |
124 | |
125 <data format_source="input_paired1" name="output_paired1" label="Paired-End forward strand output of ${tool.name} on ${on_string}"> | |
126 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter> | |
127 </data> | |
128 | |
129 <data format_source="input_paired2" name="output_paired2" label="Paired-End reverse strand output of ${tool.name} on ${on_string}"> | |
130 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter> | |
131 </data> | |
132 | |
133 <data format_source="input_paired1" name="output_paired_single" label="Singletons from Paired-End output of ${tool.name} on ${on_string}"> | |
134 <filter>(readtype['single_or_paired'] == 'pe_sep')</filter> | |
135 </data> | |
136 </outputs> | |
137 | |
138 <help> | |
139 **Sickle - A windowed adaptive trimming tool for FASTQ files using quality** | |
140 | |
141 .. class:: infomark | |
142 | |
143 **About** | |
144 | |
145 Most modern sequencing technologies produce reads that have | |
146 deteriorating quality towards the 3'-end and some towards the 5'-end | |
147 as well. Incorrectly called bases in both regions negatively impact | |
148 assembles, mapping, and downstream bioinformatics analyses. | |
149 | |
150 Sickle is a tool that uses sliding windows along with quality and | |
151 length thresholds to determine when quality is sufficiently low to | |
152 trim the 3'-end of reads and also determines when the quality is | |
153 sufficiently high enough to trim the 5'-end of reads. It will also | |
154 discard reads based upon the length threshold. It takes the quality | |
155 values and slides a window across them whose length is 0.1 times the | |
156 length of the read. If this length is less than 1, then the window is | |
157 set to be equal to the length of the read. Otherwise, the window | |
158 slides along the quality values until the average quality in the | |
159 window rises above the threshold, at which point the algorithm | |
160 determines where within the window the rise occurs and cuts the read | |
161 and quality there for the 5'-end cut. Then when the average quality | |
162 in the window drops below the threshold, the algorithm determines | |
163 where in the window the drop occurs and cuts both the read and quality | |
164 strings there for the 3'-end cut. However, if the length of the | |
165 remaining sequence is less than the minimum length threshold, then the | |
166 read is discarded entirely (or replaced with an "N" record). 5'-end | |
167 trimming can be disabled. Sickle also has an option to truncate reads | |
168 with Ns at the first N position. | |
169 | |
170 Sickle supports three types of quality values: Illumina, Solexa, and | |
171 Sanger. Note that the Solexa quality setting is an approximation (the | |
172 actual conversion is a non-linear transformation). The end | |
173 approximation is close. Illumina quality refers to qualities encoded | |
174 with the CASAVA pipeline between versions 1.3 and 1.7. Illumina | |
175 quality using CASAVA >= 1.8 is Sanger encoded. The quality value will | |
176 be determined from the datatype of the data, i.e. a fastqsanger datatype | |
177 is assumed to be Sanger encoded. | |
178 | |
179 Note that Sickle will remove the 2nd fastq record header (on the "+" | |
180 line) and replace it with simply a "+". This is the default format for | |
181 CASAVA >= 1.8. | |
182 | |
183 ----- | |
184 | |
185 .. class:: infomark | |
186 | |
187 **Options** | |
188 | |
189 **Single-end** | |
190 | |
191 This option takes one single-end input file and outputs one single-end | |
192 output file of reads that passed the filters. | |
193 | |
194 **Paired-End (one interleaved input file)** | |
195 | |
196 This option takes as input one interleaved paired-end file. If you then | |
197 check the "Output only one file with all reads" checkbox, it will output | |
198 one interleaved file where any read that did not pass filter will be replaced | |
199 with a FastQ record where the sequence is a single "N" and the quality is the | |
200 lowest quality possible for that quality type. This will preserve the paired | |
201 nature of the data. If you leave the checkbox unchecked, it will output two files, | |
202 one interleaved file with all the passed pairs and one singletons file where only | |
203 one of the pair passed filter. | |
204 | |
205 **Paired-End (two separate input files)** | |
206 | |
207 This option takes two separate (forward and reverse) paired-end files as input. | |
208 The output is three files: Two paired-end files with pairs that passed filter and | |
209 a singletons file where only one of the pair passed filter. | |
210 | |
211 **Quality threshold** | |
212 | |
213 Input your desired quality threshold. This threshold is phred-scaled, which is typically | |
214 values between 0-41 for FastQ data. | |
215 | |
216 **Length threshold** | |
217 | |
218 Input your desired length threshold. This is the threshold to determine if a read is kept | |
219 after all the trimming steps are done. | |
220 | |
221 **Disable 5-prime trimming** | |
222 | |
223 An option to disable trimming the read on the 5-prime end. This trimming trims the read | |
224 if the average quality values dip below the quality threshold at the 5-prime end. | |
225 | |
226 **Truncate sequences with Ns** | |
227 | |
228 This option will trim a read at the first "N" base in the read after doing quality trimming. | |
229 It is then still subject to the length threshold. | |
230 | |
231 ----- | |
232 | |
233 .. class:: infomark | |
234 | |
235 **Citation** | |
236 | |
237 Sickle doesn't have a paper, but you can cite it like this:: | |
238 | |
239 Joshi NA, Fass JN. (2011). Sickle: A sliding-window, adaptive, quality-based trimming tool for FastQ files | |
240 (Version 1.33) [Software]. Available at https://github.com/najoshi/sickle. | |
241 | |
242 ----- | |
243 | |
244 Copyright: Nikhil Joshi | |
245 | |
246 http://bioinformatics.ucdavis.edu | |
247 | |
248 http://github.com/ucdavis-bioinformatics | |
249 | |
250 http://github.com/najoshi | |
251 | |
252 </help> | |
253 | |
254 </tool> |