comparison trimmomatic.xml @ 0:3358c3d30143 draft

Uploaded initial version.
author pjbriggs
date Mon, 01 Dec 2014 10:40:07 -0500
parents
children 2bd7cdbb6228
comparison
equal deleted inserted replaced
-1:000000000000 0:3358c3d30143
1 <tool id="trimmomatic" name="Trimmomatic" version="0.32.1">
2 <description>flexible read trimming tool for Illumina NGS data</description>
3 <command interpreter="bash">trimmomatic.sh
4 -mx8G
5 -jar \$TRIMMOMATIC_DIR/trimmomatic-0.32.jar
6 #if $paired_end.is_paired_end
7 PE -threads 6 -phred33 $fastq_r1_in $paired_end.fastq_r2_in $fastq_out_r1_paired $fastq_out_r1_unpaired $fastq_out_r2_paired $fastq_out_r2_unpaired
8 #else
9 SE -threads 6 -phred33 $fastq_in $fastq_out
10 #end if
11 ## ILLUMINACLIP option
12 #if $illuminaclip.do_illuminaclip
13 ILLUMINACLIP:\$TRIMMOMATIC_ADAPTERS_DIR/$illuminaclip.adapter_fasta:$illuminaclip.seed_mismatches:$illuminaclip.palindrome_clip_threshold:$illuminaclip.simple_clip_threshold
14 #end if
15 ## Other operations
16 #for $op in $operations
17 ## SLIDINGWINDOW
18 #if str( $op.operation.name ) == "SLIDINGWINDOW"
19 SLIDINGWINDOW:$op.operation.window_size:$op.operation.required_quality
20 #end if
21 ## MINLEN:36
22 #if str( $op.operation.name ) == "MINLEN"
23 MINLEN:$op.operation.minlen
24 #end if
25 #if str( $op.operation.name ) == "LEADING"
26 LEADING:$op.operation.leading
27 #end if
28 #if str( $op.operation.name ) == "TRAILING"
29 TRAILING:$op.operation.trailing
30 #end if
31 #if str( $op.operation.name ) == "CROP"
32 CROP:$op.operation.crop
33 #end if
34 #if str( $op.operation.name ) == "HEADCROP"
35 HEADCROP:$op.operation.headcrop
36 #end if
37 #end for
38 </command>
39 <requirements>
40 <requirement type="package" version="0.32">trimmomatic</requirement>
41 </requirements>
42 <inputs>
43 <conditional name="paired_end">
44 <param name="is_paired_end" type="boolean" label="Paired end data?" truevalue="yes" falsevalue="no" checked="on" />
45 <when value="no">
46 <param name="fastq_in" type="data" format="fastqsanger" label="Input FASTQ file" />
47 </when>
48 <when value="yes">
49 <param name="fastq_r1_in" type="data" format="fastqsanger"
50 label="Input FASTQ file (R1/first of pair)" />
51 <param name="fastq_r2_in" type="data" format="fastqsanger"
52 label="Input FASTQ file (R2/second of pair)" />
53 </when>
54 </conditional>
55 <conditional name="illuminaclip">
56 <param name="do_illuminaclip" type="boolean" label="Perform initial ILLUMINACLIP step?" help="Cut adapter and other illumina-specific sequences from the read" truevalue="yes" falsevalue="no" checked="off" />
57 <when value="yes">
58 <param name="adapter_fasta" type="select" label="Adapter sequences to use">
59 <option value="TruSeq2-SE.fa">TruSeq2 (single-ended, for Illumina GAII)</option>
60 <option value="TruSeq3-SE.fa">TruSeq3 (single-ended, for MiSeq and HiSeq)</option>
61 <option value="TruSeq2-PE.fa">TruSeq2 (paired-ended, for Illumina GAII)</option>
62 <option value="TruSeq3-PE.fa">TruSeq3 (paired-ended, for MiSeq and HiSeq)</option>
63 <option value="TruSeq3-PE-2.fa">TruSeq3 (additional seqs) (paired-ended, for MiSeq and HiSeq)</option>
64 <option value="NexteraPE-PE.fa">Nextera (paired-ended)</option>
65 </param>
66 <param name="seed_mismatches" type="integer" label="Maximum mismatch count which will still allow a full match to be performed" value="2" />
67 <param name="palindrome_clip_threshold" type="integer" label="How accurate the match between the two 'adapter ligated' reads must be for PE palindrome read alignment" value="30" />
68 <param name="simple_clip_threshold" type="integer" label="How accurate the match between any adapter etc. sequence must be against a read" value="10" />
69 </when>
70 </conditional>
71 <repeat name="operations" title="Trimmomatic Operation" min="1">
72 <conditional name="operation">
73 <param name="name" type="select" label="Select Trimmomatic operation to perform">
74 <option selected="true" value="SLIDINGWINDOW">Sliding window trimming (SLIDINGWINDOW)</option>
75 <option value="MINLEN">Drop reads below a specified length (MINLEN)</option>
76 <option value="LEADING">Cut bases off the start of a read, if below a threshold quality (LEADING)</option>
77 <option value="TRAILING">Cut bases off the end of a read, if below a threshold quality (TRAILING)</option>
78 <option value="CROP">Cut the read to a specified length (CROP)</option>
79 <option value="HEADCROP">Cut the specified number of bases from the start of the read (HEADCROP)</option>
80 </param>
81 <when value="SLIDINGWINDOW">
82 <param name="window_size" type="integer" label="Number of bases to average across" value="4" />
83 <param name="required_quality" type="integer" label="Average quality required" value="20" />
84 </when>
85 <when value="MINLEN">
86 <param name="minlen" type="integer" label="Minimum length of reads to be kept" value="20" />
87 </when>
88 <when value="LEADING">
89 <param name="leading" type="integer" label="Minimum quality required to keep a base" value="3" help="Bases at the start of the read with quality below the threshold will be removed" />
90 </when>
91 <when value="TRAILING">
92 <param name="trailing" type="integer" label="Minimum quality required to keep a base" value="3" help="Bases at the end of the read with quality below the threshold will be removed" />
93 </when>
94 <when value="CROP">
95 <param name="crop" type="integer" label="Number of bases to keep from the start of the read" value="" />
96 </when>
97 <when value="HEADCROP">
98 <param name="headcrop" type="integer" label="Number of bases to remove from the start of the read" value="" />
99 </when>
100 </conditional>
101 </repeat>
102 </inputs>
103 <outputs>
104 <data format="fastqsanger" name="fastq_out_r1_paired" label="${tool.name} on ${on_string} (R1 paired)">
105 <filter>paired_end['is_paired_end']</filter>
106 </data>
107 <data format="fastqsanger" name="fastq_out_r1_unpaired" label="${tool.name} on ${on_string} (R1 unpaired)">
108 <filter>paired_end['is_paired_end']</filter>
109 </data>
110 <data format="fastqsanger" name="fastq_out_r2_paired" label="${tool.name} on ${on_string} (R2 paired)">
111 <filter>paired_end['is_paired_end']</filter>
112 </data>
113 <data format="fastqsanger" name="fastq_out_r2_unpaired" label="${tool.name} on ${on_string} (R2 unpaired)">
114 <filter>paired_end['is_paired_end']</filter>
115 </data>
116 <data format="fastqsanger" name="fastq_out" label="${tool.name} on ${on_string}">
117 <filter>not paired_end['is_paired_end']</filter>
118 </data>
119 </outputs>
120 <tests>
121 <test>
122 <!-- Single-end example -->
123 <param name="is_paired_end" value="no" />
124 <param name="fastq_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" />
125 <param name="operations_0|operation|name" value="SLIDINGWINDOW" />
126 <!--
127 **NB** outputs have to be specified in order that they appear in the
128 tool (which is the order they will be written to the history) - the
129 test framework seems to use the order and ignores the "name" attribute
130 -->
131 <output name="fastq_out" file="trimmomatic_se_out1.fastq" />
132 </test>
133 <test>
134 <!-- Paired-end example -->
135 <param name="is_paired_end" value="yes" />
136 <param name="fastq_r1_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" />
137 <param name="fastq_r2_in" value="Illumina_SG_R2.fastq" ftype="fastqsanger" />
138 <param name="operations_0|operation|name" value="SLIDINGWINDOW" />
139 <!--
140 **NB** outputs have to be specified in order that they appear in the
141 tool (which is the order they will be written to the history) - the
142 test framework seems to use the order and ignores the "name" attribute
143 -->
144 <output name="fastq_out_r1_paired" file="trimmomatic_pe_r1_paired_out1.fastq" />
145 <output name="fastq_out_r1_unpaired" file="trimmomatic_pe_r1_unpaired_out1.fastq" />
146 <output name="fastq_out_r2_paired" file="trimmomatic_pe_r2_paired_out1.fastq" />
147 <output name="fastq_out_r2_unpaired" file="trimmomatic_pe_r2_unpaired_out1.fastq" />
148 </test>
149 <test>
150 <!-- Single-end example (cropping) -->
151 <param name="is_paired_end" value="no" />
152 <param name="fastq_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" />
153 <param name="operations_0|operation|name" value="CROP" />
154 <param name="operations_0|operation|crop" value="10" />
155 <!--
156 **NB** outputs have to be specified in order that they appear in the
157 tool (which is the order they will be written to the history) - the
158 test framework seems to use the order and ignores the "name" attribute
159 -->
160 <output name="fastq_out" file="trimmomatic_se_out2.fastq" />
161 </test>
162 </tests>
163 <help>
164 .. class:: infomark
165
166 **What it does**
167
168 Trimmomatic performs a variety of useful trimming tasks for illumina paired-end and
169 single ended data.
170
171 This tool allows the following trimming steps to be performed:
172
173 * **ILLUMINACLIP:** Cut adapter and other illumina-specific sequences from the read
174 * **SLIDINGWINDOW:** Perform a sliding window trimming, cutting once the average
175 quality within the window falls below a threshold
176 * **MINLEN:** Drop the read if it is below a specified length
177 * **LEADING:** Cut bases off the start of a read, if below a threshold quality
178 * **TRAILING:** Cut bases off the end of a read, if below a threshold quality
179 * **CROP:** Cut the read to a specified length
180 * **HEADCROP:** Cut the specified number of bases from the start of the read
181
182 If ILLUMINACLIP is requested then it is always performed first; subsequent options
183 can be mixed and matched and will be performed in the order that they have been
184 specified.
185
186 .. class:: warningmark
187
188 Note that trimming operation order is important.
189
190 -------------
191
192 .. class:: infomark
193
194 **Outputs**
195
196 For paired-end data a particular strength of Trimmomatic is that it retains the
197 pairing of reads (from R1 and R2) in the filtered output files:
198
199 * Two FASTQ files (R1-paired and R2-paired) contain one read from each pair where
200 both have survived filtering.
201 * Additionally two FASTQ files (R1-unpaired and R2-unpaired) contain reads where
202 one of the pair failed the filtering steps.
203
204 Retaining the same order and number of reads in the filtered output fastq files is
205 essential for many downstream analysis tools.
206
207 For single-end data the output is a single FASTQ file containing just the filtered
208 reads.
209
210 -------------
211
212 .. class:: infomark
213
214 **Credits**
215
216 This Galaxy tool has been developed within the Bioinformatics Core Facility at the
217 University of Manchester. It runs the Trimmomatic program which has been developed
218 within Bjorn Usadel's group at RWTH Aachen university.
219
220 Trimmomatic website (including documentation):
221
222 * http://www.usadellab.org/cms/index.php?page=trimmomatic
223
224 The reference for Trimmomatic is:
225
226 * Lohse M, Bolger AM, Nagel A, Fernie AR, Lunn JE, Stitt M, Usadel B. RobiNA: a
227 user-friendly, integrated software solution for RNA-Seq-based transcriptomics.
228 Nucleic Acids Res. 2012 Jul;40(Web Server issue):W622-7)
229
230 Please kindly acknowledge both this Galaxy tool and the Trimmomatic program if you
231 use it.
232 </help>
233 </tool>