Mercurial > repos > lparsons > cutadapt
comparison cutadapt.xml @ 11:8665bcc8b847 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cutadapt commit 07c4e473990f522bbe8254ddeded47ed5f3b2fe4
author | iuc |
---|---|
date | Mon, 05 Mar 2018 11:02:48 -0500 |
parents | 01d94df2e32a |
children | 78e1cf88d133 |
comparison
equal
deleted
inserted
replaced
10:01d94df2e32a | 11:8665bcc8b847 |
---|---|
1 <tool id="cutadapt" name="Cutadapt" version="1.6"> | 1 <tool id="cutadapt" name="Cutadapt" version="1.16"> |
2 <description>Remove adapter sequences from Fastq/Fasta</description> | 2 <description>Remove adapter sequences from Fastq/Fasta</description> |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
3 <requirements> | 6 <requirements> |
4 <requirement type="package" version="1.6">cutadapt</requirement> | 7 <requirement type="package" version="1.16">cutadapt</requirement> |
5 </requirements> | 8 </requirements> |
6 | 9 |
7 <stdio> | |
8 <exit_code range="1" level="fatal" description="IOError, FormatError, or Interrupt" /> | |
9 <exit_code range="2" level="fatal" description="Invalid options specified" /> | |
10 <exit_code range="3:" level="fatal" description="Unknown error" /> | |
11 </stdio> | |
12 | |
13 <version_command>cutadapt --version</version_command> | 10 <version_command>cutadapt --version</version_command> |
14 | 11 |
15 <command>cutadapt | 12 <command detect_errors="exit_code"><![CDATA[ |
16 #if $input.extension.startswith( "fastq"): | 13 |
17 --format=fastq | 14 ## Link in the input and output files, so Cutadapt can tell their type |
18 #if $input.extension == "fastqillumina": | 15 |
19 --quality-base=64 | 16 #set compressed="False" |
17 #set format = "fastq" | |
18 | |
19 #if str($library.type) == 'paired': | |
20 | |
21 #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): | |
22 #set read1 = "input_f.fastq.gz" | |
23 #set compressed = "GZ" | |
24 #set out1 = "out1.gz" | |
25 #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): | |
26 #set read1 = "input_f.fastq.bz2" | |
27 #set compressed = "BZ2" | |
28 #set out1 = "out1.bz2" | |
29 #else if $library.input_1.is_of_type('fasta'): | |
30 #set format = "fasta" | |
31 #set read1 = "input_f.fasta" | |
32 #set out1 = "out1.fa" | |
33 #else: | |
34 #set read1 = "input_f.fastq" | |
35 #set out1 = "out1.fq" | |
20 #end if | 36 #end if |
21 #if $input.extension == "fastqsolexa": | 37 ln -f -s '${library.input_1}' ${read1} && |
22 --quality-base=64 | 38 |
39 #if $library.input_2.is_of_type("fastq.gz", "fastqsanger.gz"): | |
40 #set read2 = "input_r.fastq.gz" | |
41 #set compressed = "GZ" | |
42 #set out2 = "out2.gz" | |
43 #else if $library.input_2.is_of_type("fastq.bz2", "fastqsanger.bz2"): | |
44 #set read2 = "input_r.fastq.bz2" | |
45 #set compressed = "BZ2" | |
46 #set out2 = "out2.bz2" | |
47 #else if $library.input_2.is_of_type('fasta'): | |
48 #set read2 = "input_r.fasta" | |
49 #set out2 = "out2.fa" | |
50 #set format = "fasta" | |
51 #else: | |
52 #set read2 = "input_r.fastq" | |
53 #set out2 = "out2.fq" | |
23 #end if | 54 #end if |
24 #else | 55 ln -f -s '${library.input_2}' ${read2} && |
25 --format=$input.extension | 56 |
26 #end if | 57 |
27 #for $a in $adapters | 58 #else if str($library.type) == 'paired_collection': |
28 #if $a.adapter_source.adapter_source_list == 'prebuilt': | 59 |
29 --adapter="${a.adapter_source.adapter.fields.name}"='${a.adapter_source.adapter}' | 60 #if $library.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"): |
30 #else if str($a.adapter_source.adapter_name) != "": | 61 #set read1 = "input_f.fastq.gz" |
31 --adapter='${a.adapter_source.adapter_name}'='${a.adapter_source.adapter}' | 62 #set compressed = "GZ" |
32 #else | 63 #set out1 = "out1.gz" |
33 --adapter='${a.adapter_source.adapter}' | 64 #else if $library.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"): |
65 #set read1 = "input_f.fastq.bz2" | |
66 #set compressed = "BZ2" | |
67 #set out1 = "out1.bz2" | |
68 #else if $library.input_1.forward.is_of_type('fasta'): | |
69 #set format = "fasta" | |
70 #set read1 = "input_f.fasta" | |
71 #set out1 = "out1.fa" | |
72 #else: | |
73 #set read1 = "input_f.fastq" | |
74 #set out1 = "out1.fq" | |
34 #end if | 75 #end if |
35 #end for | 76 ln -s '${library.input_1.forward}' ${read1} && |
36 #for $aa in $anywhere_adapters | 77 |
37 #if $aa.anywhere_adapter_source.anywhere_adapter_source_list == 'prebuilt': | 78 #if $library.input_1.reverse.is_of_type("fastq.gz", "fastqsanger.gz"): |
38 --anywhere="${aa.anywhere_adapter_source.anywhere_adapter.fields.name}"='${aa.anywhere_adapter_source.anywhere_adapter}' | 79 #set read2 = "input_r.fastq.gz" |
39 #else if str($aa.anywhere_adapter_source.anywhere_adapter_name) != "": | 80 #set compressed = "GZ" |
40 --anywhere='${aa.anywhere_adapter_source.anywhere_adapter_name}'='${aa.anywhere_adapter_source.anywhere_adapter}' | 81 #set out1 = "out2.gz" |
41 #else | 82 #else if $library.input_1.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"): |
42 --anywhere='${aa.anywhere_adapter_source.anywhere_adapter}' | 83 #set read2 = "input_r.fastq.bz2" |
84 #set compressed = "BZ2" | |
85 #set out2 = "out2.bz2" | |
86 #else if $library.input_1.reverse.is_of_type("fasta"): | |
87 #set format = "fasta" | |
88 #set read2 = "input_r.fasta" | |
89 #set out2 = "out2.fa" | |
90 #else: | |
91 #set read2 = "input_r.fastq" | |
92 #set out2 = "out2.fq" | |
43 #end if | 93 #end if |
44 #end for | 94 ln -s '${library.input_1.reverse}' ${read2} && |
45 #for $fa in $front_adapters | 95 |
46 #if $fa.front_adapter_source.front_adapter_source_list == 'prebuilt': | 96 #else: |
47 --front="${fa.front_adapter_source.front_adapter.fields.name}"='${fa.front_adapter_source.front_adapter}' | 97 #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): |
48 #else if str($fa.front_adapter_source.front_adapter_name) != "": | 98 #set read1 = "input_f.fastq.gz" |
49 --front='${fa.front_adapter_source.front_adapter_name}'='${fa.front_adapter_source.front_adapter}' | 99 #set compressed = "GZ" |
50 #else | 100 #set out1 = "out1.gz" |
51 --front='${fa.front_adapter_source.front_adapter}' | 101 #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): |
102 #set read1 = "input_f.fastq.bz2" | |
103 #set compressed = "BZ2" | |
104 #set out1 = "out1.bz2" | |
105 #else if $library.input_1.is_of_type('fasta'): | |
106 #set format = "fasta" | |
107 #set read1 = "input_f.fasta" | |
108 #set out1 = "out1.fa" | |
109 #else: | |
110 #set read1 = "input_f.fastq" | |
111 #set out1 = "out1.fq" | |
52 #end if | 112 #end if |
53 #end for | 113 ln -f -s '${library.input_1}' ${read1} && |
54 --error-rate=$error_rate | 114 #end if |
55 --times=$count | 115 |
56 --overlap=$overlap | 116 |
57 $no_indels | 117 ## Run Cutadapt |
58 $match_read_wildcards | 118 |
59 | 119 cutadapt |
60 #if str( $output_filtering_options.output_filtering) == "filter": | 120 |
61 $output_filtering_options.discard | 121 -j \${GALAXY_SLOTS:-4} |
62 $output_filtering_options.discard_untrimmed | 122 |
63 $output_filtering_options.no_trim | 123 --format=$format |
64 $output_filtering_options.mask_adapter | 124 |
65 #if str($output_filtering_options.min) != '0': | 125 #if str( $library.type ) == "single": |
66 --minimum-length=$output_filtering_options.min | 126 @read1_options@ |
127 --output='$out1' | |
128 #else: | |
129 @read1_options@ | |
130 @read2_options@ | |
131 --output='$out1' | |
132 --paired-output='$out2' | |
133 #end if | |
134 | |
135 --error-rate=$adapter_options.error_rate | |
136 --times=$adapter_options.count | |
137 --overlap=$adapter_options.overlap | |
138 $adapter_options.no_indels | |
139 $adapter_options.match_read_wildcards | |
140 | |
141 | |
142 $filter_options.discard | |
143 $filter_options.discard_untrimmed | |
144 $filter_options.no_trim | |
145 $filter_options.mask_adapter | |
146 #if str($filter_options.min) != '0': | |
147 --minimum-length=$filter_options.min | |
148 #end if | |
149 #if str($filter_options.max) != '0': | |
150 --maximum-length=$filter_options.max | |
151 #end if | |
152 #if $filter_options.max_n: | |
153 --max-n=$filter_options.max_n | |
154 #end if | |
155 #if str( $library.type ) != "single": | |
156 #if $filter_options.pair_filter: | |
157 --pair-filter=$filter_options.pair_filter | |
67 #end if | 158 #end if |
68 #if str($output_filtering_options.max) != '0': | 159 #end if |
69 --maximum-length=$output_filtering_options.max | 160 |
70 #end if | 161 |
71 #end if | 162 #if str($read_mod_options.quality_cutoff) != '0': |
72 | 163 --quality-cutoff=$read_mod_options.quality_cutoff |
73 --output='$output' | 164 #end if |
74 | 165 #if str($read_mod_options.nextseq_trim) != '0': |
75 #if $paired_end.paired_end_boolean: | 166 --nextseq-trim=$read_mod_options.nextseq_trim |
76 --paired-output='$paired_output' | 167 #end if |
77 #end if | 168 $read_mod_options.trim_n |
78 | 169 #if $read_mod_options.prefix != '': |
79 #if str( $output_params.output_type ) == "additional": | 170 --prefix="$read_mod_options.prefix" |
80 #if $output_params.rest_file: | 171 #end if |
81 --rest-file=$rest_output | 172 #if $read_mod_options.suffix != '': |
82 #end if | 173 --suffix="$read_mod_options.suffix" |
83 #if $output_params.wildcard_file: | 174 #end if |
84 --wildcard-file=$wild_output | 175 #if str($read_mod_options.length) != '0': |
85 #end if | 176 --length=$$read_mod_options.length |
86 #if $output_params.too_short_file: | 177 #end if |
87 --too-short-output=$too_short_output | 178 #if $read_mod_options.length_tag != '': |
88 #end if | 179 --length-tag="$read_mod_options.length_tag" |
89 #if $output_params.too_long_file: | 180 #end if |
90 --too-long-output=$too_long_output | 181 |
91 #end if | 182 #if str( $library.type ) == "single": |
92 #if $output_params.untrimmed_file: | 183 '${read1}' |
93 --untrimmed-output=$untrimmed_output | 184 #else: |
94 #if $paired_end.paired_end_boolean: | 185 '${read1}' |
95 --untrimmed-paired-output=$untrimmed_paired_output | 186 '${read2}' |
96 #end if | 187 #end if |
97 #end if | 188 |
98 #if $output_params.info_file: | 189 #if $output_options.report: |
99 --info-file=$info_file | 190 > report.txt |
100 #end if | 191 #end if |
101 | 192 |
102 #end if | 193 ]]></command> |
103 | 194 |
104 #if str( $read_modification_params.read_modification) == "modify": | |
105 #if str($read_modification_params.quality_cutoff) != '0': | |
106 --quality-cutoff=$read_modification_params.quality_cutoff | |
107 #end if | |
108 #if str($read_modification_params.cut) != '0': | |
109 --cut=$read_modification_params.cut | |
110 #end if | |
111 #if $read_modification_params.prefix != '': | |
112 --prefix="$read_modification_params.prefix" | |
113 #end if | |
114 #if $read_modification_params.suffix != '': | |
115 --suffix="$read_modification_params.suffix" | |
116 #end if | |
117 #if $read_modification_params.length_tag != '': | |
118 --length-tag="$read_modification_params.length_tag" | |
119 #end if | |
120 $read_modification_params.zero_cap | |
121 #end if | |
122 | |
123 '$input' | |
124 | |
125 #if $paired_end.paired_end_boolean: | |
126 '$input2' | |
127 #end if | |
128 | |
129 > $report | |
130 </command> | |
131 | |
132 <inputs> | 195 <inputs> |
133 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa, fasta" name="input" type="data" optional="false" label="Fastq file to trim" length="100"/> | 196 |
134 <conditional name="paired_end"> | 197 <!-- Reads --> |
135 <param name="paired_end_boolean" type="boolean" value="false" label="Track Paired Reads" help="This option will keep a second file synchronized if you use one of the filtering options that discards reads. It will NOT trim adapters off of the second read. You must run Cutadapt a second time on the output of the first run to trim adapters from both reads (see Cutadapt documentation for details)." /> | 198 <conditional name="library"> |
136 <when value="true"> | 199 <param name="type" type="select" label="Single-end or Paired-end reads?"> |
137 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa, fasta" name="input2" type="data" optional="false" label="Paired fastq file (NOT trimmed)" length="100"/> | 200 <option value="single">Single-end</option> |
201 <option value="paired">Paired-end</option> | |
202 <option value="paired_collection">Paired-end Collection</option> | |
203 </param> | |
204 | |
205 <when value="single"> | |
206 <param name="input_1" format="fastq.gz,fastq,fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTQ/A file" help="Should be of datatype "fastq.gz" or "fasta"" /> | |
207 <expand macro="single_end_options" /> | |
138 </when> | 208 </when> |
139 <when value="false" /> | 209 |
140 </conditional> | 210 <when value="paired"> |
141 | 211 <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTQ/A file #1" help="Should be of datatype "fastq.gz"or "fasta"" /> |
142 <repeat name="adapters" title="3' Adapters" help="Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed."> | 212 <param name="input_2" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTQ/A file #2" help="Should be of datatype "fastq.gz"or "fasta"" /> |
143 <conditional name="adapter_source"> | 213 <expand macro="paired_end_options" /> |
144 <param name="adapter_source_list" type="select" label="Source" > | 214 </when> |
145 <option value="prebuilt" selected="true">Standard (select from the list below)</option> | 215 |
146 <option value="user">Enter custom sequence</option> | 216 <when value="paired_collection"> |
147 </param> | 217 <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data_collection" collection_type="paired" label="Paired Collection" help="Should be of datatype "fastq.gz" or "fasta"" /> |
148 | 218 <expand macro="paired_end_options" /> |
149 <when value="user"> | |
150 <param name="adapter_name" size="30" label="Enter custom 3' adapter name (Optional)" type="text" value="" /> | |
151 <param name="adapter" size="30" label="Enter custom 3' adapter sequence" type="text" value="AATTGGCC" /> | |
152 </when> | |
153 | |
154 <when value="prebuilt"> | |
155 <param name="adapter" type="select" label="Choose 3' adapter"> | |
156 <options from_file="cutadapt_adapters.txt"> | |
157 <column name="name" index="1"/> | |
158 <column name="value" index="0"/> | |
159 </options> | |
160 </param> | |
161 </when> | |
162 </conditional> | |
163 </repeat> | |
164 | |
165 <repeat name="anywhere_adapters" title="5' or 3' (Anywhere) Adapters" help="Sequence of an adapter that was ligated to the 5' or 3' end. If the adapter is found within the read or overlapping the 3' end of the read, the behavior is the same as for the -a option. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a or -b options are given, only the best matching adapter is trimmed."> | |
166 <conditional name="anywhere_adapter_source"> | |
167 <param name="anywhere_adapter_source_list" type="select" label="Source"> | |
168 <option value="prebuilt" selected="true">Standard (select from the list below)</option> | |
169 <option value="user">Enter custom sequence</option> | |
170 </param> | |
171 | |
172 <when value="user"> | |
173 <param name="anywhere_adapter_name" size="30" label="Enter custom 5' or 3' adapter name (Optional)" type="text" value="" /> | |
174 <param name="anywhere_adapter" size="30" label="Enter custom 5' or 3' adapter sequence" type="text" value="AATTGGCC" /> | |
175 </when> | |
176 <when value="prebuilt"> | |
177 <param name="anywhere_adapter" type="select" label="Choose 5' or 3' adapter"> | |
178 <options from_file="cutadapt_adapters.txt"> | |
179 <column name="name" index="1"/> | |
180 <column name="value" index="0"/> | |
181 </options> | |
182 </param> | |
183 </when> | |
184 </conditional> | |
185 </repeat> | |
186 | |
187 <repeat name="front_adapters" title="5' (Front) Adapters" help="Sequence of an adapter that was ligated to the 5' end. If the adapter sequence starts with the character '^', the adapter is 'anchored'. An anchored adapter must appear in its entirety at the 5' end of the read (it is a prefix of the read). A non-anchored adapter may appear partially at the 5' end, or it may occur within the read. If it is found within a read, the sequence preceding the adapter is also trimmed. In all cases the adapter itself is trimmed."> | |
188 <conditional name="front_adapter_source"> | |
189 <param name="front_adapter_source_list" type="select" label="Source"> | |
190 <option value="prebuilt" selected="true">Standard (select from the list below)</option> | |
191 <option value="user">Enter custom sequence</option> | |
192 </param> | |
193 | |
194 <when value="user"> | |
195 <param name="front_adapter_name" size="30" label="Enter custom 5' adapter name (Optional)" type="text" value="" /> | |
196 <param name="front_adapter" size="30" label="Enter custom 5' adapter sequence" type="text" value="AATTGGCC" /> | |
197 </when> | |
198 <when value="prebuilt"> | |
199 <param name="front_adapter" type="select" label="Choose 5' adapter"> | |
200 <options from_file="cutadapt_adapters.txt"> | |
201 <column name="name" index="1"/> | |
202 <column name="value" index="0"/> | |
203 </options> | |
204 </param> | |
205 </when> | |
206 </conditional> | |
207 </repeat> | |
208 | |
209 <param name="error_rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." /> | |
210 <param name="no_indels" type="boolean" value="false" truevalue="--no-indels" falsevalue="" label="Do not allow indels (Use ONLY with anchored 5' (front) adapters)." help="Do not allow indels in the alignments. That is, allow only mismatches. This option is currently only supported for anchored 5' adapters ('^ADAPTER') (default: both mismatches and indels are allowed)." /> | |
211 <param name="count" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." /> | |
212 <param name="overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." /> | |
213 <param name="match_read_wildcards" type="boolean" value="false" truevalue="--match-read-wildcards" falsevalue="" label="Match Read Wildcards" help="Allow 'N's in the read as matches to the adapter." /> | |
214 | |
215 <conditional name="output_filtering_options"> | |
216 <param name="output_filtering" type="select" label="Output filtering options" help="Options for filtering processed reads by those that contain the adapter or by minimum or maximum length"> | |
217 <option value="default">Default (no filtering)</option> | |
218 <option value="filter">Set Filters</option> | |
219 </param> | |
220 <when value="default" /> | |
221 <when value="filter"> | |
222 <param name="discard" type="boolean" value="false" truevalue="--discard" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" /> | |
223 <param name="discard_untrimmed" type="boolean" value="false" truevalue="--discard-untrimmed" falsevalue="" label="Discard Untrimmed Reads" help="Discard reads that do not contain the adapter." /> | |
224 <param name="min" type="integer" min="0" optional="true" value="0" label="Minimum length" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no minimum length." /> | |
225 <param name="max" type="integer" min="0" optional="true" value="0" label="Maximum length" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no maximum length." /> | |
226 <param name="no_trim" type="boolean" value="false" truevalue="--no-trim" falsevalue="" label="Do not trim adapters" help="Match and redirect reads to output/untrimmed-output as usual, but don't remove the adapters (default: trim the adapters)." /> | |
227 <param name="mask_adapter" type="boolean" value="false" truevalue="--mask-adapter" falsevalue="" label="Mask Adapters" help="Mask adapter bases with 'N' instead of trimming them (default: trim adapters)." /> | |
228 </when> | 219 </when> |
229 </conditional> | 220 </conditional> |
230 | 221 |
231 <conditional name="output_params"> | 222 <!-- Adapter Options --> |
232 <param name="output_type" type="select" label="Additional output options" help="By default all reads will be put in the same file. However, reads with adapters matching in the middle, unmatched reads, and too-short reads can be saved in separate files."> | 223 <section name="adapter_options" title="Adapter Options"> |
233 <option value="default">Default</option> | 224 <param name="error_rate" argument="--error-rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." /> |
234 <option value="additional">Additional output files</option> | 225 <param name="no_indels" argument="--no-indels" type="boolean" value="False" truevalue="--no-indels" falsevalue="" label="Do not allow indels (Use ONLY with anchored 5' (front) adapters)." help="Do not allow indels in the alignments. That is, allow only mismatches. This option is currently only supported for anchored 5' adapters ('^ADAPTER') (default: both mismatches and indels are allowed)." /> |
226 <param name="count" argument="--times" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." /> | |
227 <param name="overlap" argument="--overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." /> | |
228 <param name="match_read_wildcards" argument="--match-read-wildcards" type="boolean" value="False" truevalue="--match-read-wildcards" falsevalue="" label="Match Read Wildcards" help="Allow 'N's in the read as matches to the adapter." /> | |
229 </section> | |
230 | |
231 <!-- Filter Options --> | |
232 <section name="filter_options" title="Filter Options"> | |
233 <param name="discard" argument="--discard-trimmed" type="boolean" value="False" truevalue="--discard" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" /> | |
234 <param name="discard_untrimmed" argument="--discard_untrimmed" type="boolean" value="False" truevalue="--discard-untrimmed" falsevalue="" label="Discard Untrimmed Reads" help="Discard reads that do not contain the adapter." /> | |
235 <param name="min" argument="--minimum-length" type="integer" min="0" optional="True" value="0" label="Minimum length" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no minimum length." /> | |
236 <param name="max" argument="--maximum-length" type="integer" min="0" optional="True" value="0" label="Maximum length" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no maximum length." /> | |
237 <param name="no_trim" argument="--no-trim" type="boolean" value="False" truevalue="--no-trim" falsevalue="" label="Do not trim adapters" help="Match and redirect reads to output/untrimmed-output as usual, but don't remove the adapters (default: trim the adapters)." /> | |
238 <param name="mask_adapter" argument="--mask-adapter" type="boolean" value="False" truevalue="--mask-adapter" falsevalue="" label="Mask Adapters" help="Mask adapter bases with 'N' instead of trimming them (default: trim adapters)." /> | |
239 <param name="max_n" argument="--max-n" type="float" min="0" optional="True" label="Max N" help="Discard reads with more than this number of 'N' bases. A number between 0 and 1 is interpreted as a fraction of the read length." /> | |
240 <param name="pair_filter" argument="--pair-filter" type="select" optional="True" label="Pair filter" help="Which of the reads in a paired-end read have to match the filtering criterion in order for the pair to be filtered. Default: any"> | |
241 <option value="any" selected="True">any</option> | |
242 <option value="both">both</option> | |
235 </param> | 243 </param> |
236 <when value="default" /> | 244 </section> |
237 <when value="additional"> | 245 |
238 <param name="info_file" type="boolean" value="false" label="Info File" help="Write information about each read and its adapter matches to a file."/> | 246 <!-- Read Modification Options --> |
239 <param name="rest_file" type="boolean" value="false" label="Rest of Read" help="When the adapter matches in the middle of a read, write the rest (after the adapter) into a file."/> | 247 <section name="read_mod_options" title="Read Modification Options"> |
240 <param name="wildcard_file" type="boolean" value="false" label="Wildcard File" help="When the adapter has wildcard bases ('N's) write adapter bases matching wildcard positions to file."/> | 248 <param name="quality_cutoff" argument="--quality-cutoff" type="text" value="0" label="Quality cutoff" help=" Trim low-quality bases from 5' and/or 3' ends of each read before adapter removal. Applied to both reads if data is paired. If one value is given, only the 3' end is trimmed. If two comma-separated cutoffs are given, the 5' end is trimmed with the first cutoff, the 3' end with the second."> |
241 <param name="too_short_file" type="boolean" value="false" label="Too Short Reads" help="Write reads that are too short (according to minimum length specified) to a file. (default: discard reads)"/> | 249 <sanitizer> |
242 <param name="too_long_file" type="boolean" value="false" label="Too Long Reads" help="Write reads that are too long (according to maximum length specified) to a file. (default: discard reads)"/> | 250 <valid initial="string.digits"><add value="," /></valid> |
243 <param name="untrimmed_file" type="boolean" value="false" label="Untrimmed Reads" help="Write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file. (default: output to same file as trimmed)"/> | 251 </sanitizer> |
244 </when> | |
245 </conditional> | |
246 | |
247 <conditional name="read_modification_params"> | |
248 <param name="read_modification" type="select" label="Additional modifications to reads" help="Various options to trim reads based on quality, modify read names and quality scores"> | |
249 <option value="none">No Read Modifications</option> | |
250 <option value="modify">Set Modification Options</option> | |
251 </param> | 252 </param> |
252 <when value="none" /> | 253 <param name="nextseq_trim" argument="--nextseq-trim" type="integer" value="0" label="NextSeq trimming" help="Experimental option for quality trimming of NextSeq data. This is necessary because that machine cannot distinguish between G and reaching the end of the fragment (it encodes G as ‘black’). This option works like regular quality trimming (where one would use -q 20 instead), except that the qualities of G bases are ignored." /> |
253 <when value="modify"> | 254 <param name="trim_n" argument="--trim-n" type="boolean" truevalue="--trim-n" falsevalue="" checked="False" label="Trim Ns" help="Trim N's on ends of reads." /> |
254 <param name="cut" type="integer" optional="true" value="0" label="Cut bases from reads before adapter trimming" help="Remove bases from the beginning or end of each read before trimming adapters. If positive, the bases are removed from the beginning of each read. If negative, the bases are removed from the end of each read." /> | 255 <param name="prefix" argument="--prefix" label="Prefix" type="text" help="Add this prefix to read names" /> |
255 <param name="quality_cutoff" type="integer" min="0" optional="true" value="0" label="Quality cutoff" help="Trim low-quality ends from reads before adapter removal. The algorithm is the same as the one used by BWA (Subtract CUTOFF from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal). Value of 0 means no quality trimming." /> | 256 <param name="suffix" argument="--suffix" label="Suffix" type="text" help="Add this suffix to read names" /> |
256 <param name="prefix" label="Prefix" type="text" help="Add this prefix to read names" /> | 257 <param name="strip_suffix" argument="--strip-suffix" label="Strip suffix" type="text" help="Remove this suffix from read names if present." /> |
257 <param name="suffix" label="Suffix" type="text" help="Add this suffix to read names" /> | 258 <param name="length" argument="--length" type="integer" value="0" label="Length" help="Shorten reads to this length. This modification is applied after adapter trimming." /> |
258 <param name="strip_suffix" label="Strip suffix" type="text" help="Remove this suffix from read names if present." /> | 259 <param name="length_tag" argument="--length-tag" label="Length Tag" type="text" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." /> |
259 <param name="length_tag" label="Length Tag" type="text" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." /> | 260 </section> |
260 <param name="zero_cap" type="boolean" value="false" label="Change negative quality values to zero (0)" truevalue="--zero-cap" falsevalue="" help="Workaround to avoid segmentation faults in BWA" /> | 261 |
261 </when> | 262 <!-- Output Options --> |
262 </conditional> | 263 <section name="output_options" title="Output Options"> |
264 <param name="report" type="boolean" value="False" label="Report" help="Cutadapt's per-adapter statistics."/> | |
265 <param name="info_file" argument="--info-file" type="boolean" value="False" label="Info File" help="Write information about each read and its adapter matches to a file."/> | |
266 <param name="rest_file" argument="--rest-file" type="boolean" value="False" label="Rest of Read" help="When the adapter matches in the middle of a read, write the rest (after the adapter) into a file."/> | |
267 <param name="wildcard_file" argument="--wildcard-file" type="boolean" value="False" label="Wildcard File" help="When the adapter has wildcard bases ('N's) write adapter bases matching wildcard positions to file."/> | |
268 <param name="too_short_file" argument="--too-short-output" type="boolean" value="False" label="Too Short Reads" help="Write reads that are too short (according to minimum length specified) to a file. (default: discard reads)"/> | |
269 <param name="too_long_file" argument="--too-long-output" type="boolean" value="False" label="Too Long Reads" help="Write reads that are too long (according to maximum length specified) to a file. (default: discard reads)"/> | |
270 <param name="untrimmed_file" argument="--untrimmed-output" type="boolean" value="False" label="Untrimmed Reads" help="Write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file. (default: output to same file as trimmed)"/> | |
271 </section> | |
272 | |
263 </inputs> | 273 </inputs> |
264 | 274 |
265 <outputs> | 275 <outputs> |
266 <data format="txt" name="report" label="${tool.name} on ${on_string} (Report)" /> | 276 <data name="out1" format_source="input_1" metadata_source="input_1" from_work_dir="out1*" label="${tool.name} on ${on_string}: Read 1 Output"/> |
267 <data format_source="input" name="output" metadata_source="input" label="${tool.name} on ${on_string} (Reads)"/> | 277 <data name="out2" format_source="input_2" metadata_source="input_2" from_work_dir="out2*" label="${tool.name} on ${on_string}: Read 2 Output" > |
268 <data format_source="input" name="paired_output" metadata_source="input" label="${tool.name} on ${on_string} (Paired Reads)" > | 278 <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection')</filter> |
269 <filter>(paired_end['paired_end_boolean'] is True)</filter> | 279 </data> |
270 </data> | 280 |
271 <data format_source="input" name="rest_output" metadata_source="input" label="${tool.name} on ${on_string} (Rest of Reads)" > | 281 <data name="report" format="txt" from_work_dir="report.txt" label="${tool.name} on ${on_string}: Report"> |
272 <filter>(output_params['output_type'] == "additional")</filter> | 282 <filter>(output_options['report'] is True)</filter> |
273 <filter>(output_params['rest_file'] is True)</filter> | 283 </data> |
274 </data> | 284 |
275 <data format_source="txt" name="wild_output" metadata_source="input" label="${tool.name} on ${on_string} (Wildcard File)" > | 285 <data name="info_file" format_source="txt" metadata_source="input_1" label="${tool.name} on ${on_string}: Info File" > |
276 <filter>(output_params['output_type'] == "additional")</filter> | 286 <filter>(output_options['info_file'] is True)</filter> |
277 <filter>(output_params['wildcard_file'] is True)</filter> | 287 </data> |
278 </data> | 288 |
279 <data format_source="input" name="too_short_output" metadata_source="input" label="${tool.name} on ${on_string} (Too Short Reads)" > | 289 <data name="rest_output" format_source="input_1" metadata_source="input_1" label="${tool.name} on ${on_string}: Rest of Reads (R1 only)" > |
280 <filter>(output_params['output_type'] == "additional")</filter> | 290 <filter>(output_options['rest_file'] is True)</filter> |
281 <filter>(output_params['too_short_file'] is True)</filter> | 291 </data> |
282 </data> | 292 |
283 <data format_source="input" name="too_long_output" metadata_source="input" label="${tool.name} on ${on_string} (Too Long Reads)" > | 293 <data name="wild_output" format_source="txt" metadata_source="input_1" label="${tool.name} on ${on_string}: Wildcard File" > |
284 <filter>(output_params['output_type'] == "additional")</filter> | 294 <filter>(output_options['wildcard_file'] is True)</filter> |
285 <filter>(output_params['too_long_file'] is True)</filter> | 295 </data> |
286 </data> | 296 |
287 <data format_source="input" name="untrimmed_output" metadata_source="input" label="${tool.name} on ${on_string} (Untrimmed Reads)" > | 297 <data name="untrimmed_output" format_source="input_1" metadata_source="input_1" label="${tool.name} on ${on_string}: Untrimmed Read 1" > |
288 <filter>(output_params['output_type'] == "additional")</filter> | 298 <filter>(output_options['untrimmed_file'] is True)</filter> |
289 <filter>(output_params['untrimmed_file'] is True)</filter> | 299 </data> |
290 </data> | 300 <data name="untrimmed_paired_output" format_source="input_2" metadata_source="input_2" label="${tool.name} on ${on_string}: Untrimmed Read 2" > |
291 <data format_source="input" name="untrimmed_paired_output" metadata_source="input" label="${tool.name} on ${on_string} (Untrimmed Paired Reads)" > | 301 <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection')</filter> |
292 <filter>(paired_end['paired_end_boolean'] is True)</filter> | 302 <filter>(output_options['untrimmed_file'] is True)</filter> |
293 <filter>(output_params['output_type'] == "additional")</filter> | 303 </data> |
294 <filter>(output_params['untrimmed_file'] is True)</filter> | 304 |
295 </data> | 305 <data name="too_short_output" format_source="input_1" metadata_source="input_1" label="${tool.name} on ${on_string}: Too Short Read 1" > |
296 <data format_source="txt" name="info_file" metadata_source="input" label="${tool.name} on ${on_string} (Info File)" > | 306 <filter>(output_options['too_short_file'] is True)</filter> |
297 <filter>(output_params['output_type'] == "additional")</filter> | 307 </data> |
298 <filter>(output_params['info_file'] is True)</filter> | 308 <data name="too_short_paired_output" format_source="input_2" metadata_source="input_2" label="${tool.name} on ${on_string}: Too Short Read 2" > |
309 <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection')</filter> | |
310 <filter>(output_options['too_short_file'] is True)</filter> | |
311 </data> | |
312 | |
313 <data name="too_long_output" format_source="input_1" metadata_source="input_1" label="${tool.name} on ${on_string}: Too Long Read 1" > | |
314 <filter>(output_options['too_long_file'] is True)</filter> | |
315 </data> | |
316 <data name="too_long_paired_output" format_source="input_2" metadata_source="input_2" label="${tool.name} on ${on_string}: Too Long Read 2" > | |
317 <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection')</filter> | |
318 <filter>(output_options['too_long_file'] is True)</filter> | |
299 </data> | 319 </data> |
300 </outputs> | 320 </outputs> |
301 | 321 |
302 <tests> | 322 <tests> |
303 <test> | 323 <!-- Ensure fastq works --> |
304 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/> | 324 <test> |
305 <param name="anywhere_adapter_source_list" value="user"/> | 325 <param name="type" value="single" /> |
306 <param name="anywhere_adapter" value="TTAGACATATCTCCGTCG"/> | 326 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" /> |
307 <param name="output_filtering" value="default"/> | 327 <param name="adapter_source_list" value="user"/> |
308 <param name="read_modification" value="none"/> | 328 <param name="adapter" value="AGATCGGAAGAGC"/> |
309 <param name="output_type" value="default"/> | 329 <output name="out1" file="cutadapt_small.out"/> |
310 <output name="output" file="cutadapt_small.out"/> | 330 </test> |
311 </test> | 331 <!-- Ensure single end fastq.gz works --> |
312 <test> | 332 <test> |
313 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/> | 333 <param name="type" value="single" /> |
334 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" /> | |
335 <param name="adapter_source_list" value="user"/> | |
336 <param name="adapter" value="AGATCGGAAGAGC"/> | |
337 <output name="out1" decompress="True" file="cutadapt_out1.fq.gz"/> | |
338 </test> | |
339 <!-- Ensure paired end fastq.gz works --> | |
340 <test> | |
341 <param name="type" value="paired" /> | |
342 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" /> | |
343 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2.fq.gz" /> | |
344 <param name="adapter_source_list" value="user"/> | |
345 <param name="adapter" value="AGATCGGAAGAGC"/> | |
346 <param name="adapter_source_list2" value="user"/> | |
347 <param name="adapter2" value="AGATCGGAAGAGC"/> | |
348 <output name="out1" decompress="True" file="cutadapt_out1.fq.gz"/> | |
349 <output name="out2" decompress="True" file="cutadapt_out2.fq.gz"/> | |
350 </test> | |
351 <!-- Ensure built-in adapters work --> | |
352 <test> | |
353 <param name="type" value="single" /> | |
354 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" /> | |
355 <param name="adapter_source_list" value="builtin"/> | |
356 <param name="adapter" value="TGTAGGCC"/> | |
357 <output name="out1" file="cutadapt_builtin.out"/> | |
358 </test> | |
359 <!-- Ensure discard file output works --> | |
360 <test> | |
361 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" /> | |
314 <param name="adapter_source_list" value="user"/> | 362 <param name="adapter_source_list" value="user"/> |
315 <param name="adapter" value="TTAGACATATCTCCGTCG"/> | 363 <param name="adapter" value="TTAGACATATCTCCGTCG"/> |
316 <param name="output_filtering" value="filter"/> | 364 <param name="output_filtering" value="filter"/> |
317 <param name="discard" value="true"/> | 365 <param name="discard" value="True"/> |
318 <param name="read_modification" value="none"/> | 366 <param name="read_modification" value="none"/> |
319 <param name="output_type" value="default"/> | 367 <param name="output_type" value="default"/> |
320 <output name="output" file="cutadapt_discard.out"/> | 368 <output name="out1" file="cutadapt_discard.out"/> |
321 </test> | 369 </test> |
322 <test> | 370 <!-- Ensure rest file output works --> |
323 <param name="input" value="cutadapt_rest.fa" ftype="fasta"/> | 371 <test> |
372 <param name="input_1" ftype="fasta" value="cutadapt_rest.fa" /> | |
324 <param name="adapter_source_list" value="user"/> | 373 <param name="adapter_source_list" value="user"/> |
325 <param name="adapter" value="ADAPTER"/> | 374 <param name="adapter" value="AAAGATG"/> |
326 <param name="output_filtering" value="default"/> | 375 <param name="output_filtering" value="default"/> |
327 <param name="read_modification" value="none"/> | 376 <param name="read_modification" value="none"/> |
328 <param name="output_type" value="additional"/> | 377 <param name="output_type" value="additional"/> |
329 <param name="rest_file" value="true"/> | 378 <param name="rest_file" value="True"/> |
330 <output name="output" file="cutadapt_rest.out"/> | 379 <output name="out1" file="cutadapt_rest.out"/> |
331 <output name="rest_output" file="cutadapt_rest2.out"/> | 380 <output name="rest_output" file="cutadapt_rest2.out"/> |
332 </test> | 381 </test> |
382 <!-- Ensure nextseq-trim option works --> | |
383 <test> | |
384 <param name="type" value="single" /> | |
385 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" /> | |
386 <param name="adapter_source_list" value="user"/> | |
387 <param name="adapter" value="AGATCGGAAGAGC"/> | |
388 <param name="read_modification" value="modify"/> | |
389 <param name="nextseq_trim" value="20" /> | |
390 <output name="out1" decompress="True" file="cutadapt_nextseq_out.fq.gz"/> | |
391 </test> | |
392 <!-- Ensure Report and Info file output work --> | |
393 <test> | |
394 <param name="type" value="single" /> | |
395 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" /> | |
396 <param name="adapter_source_list" value="user"/> | |
397 <param name="adapter" value="AGATCGGAAGAGC"/> | |
398 <param name="report" value="True" /> | |
399 <param name="info_file" value="True" /> | |
400 <output name="out1" value="cutadapt_small.out"/> | |
401 <output name="report"> | |
402 <assert_contents> | |
403 <has_text text="Summary"/> | |
404 </assert_contents> | |
405 </output> | |
406 <output name="info_file" value="cutadapt_info_out.txt"/> | |
407 </test> | |
333 </tests> | 408 </tests> |
334 | 409 |
335 <help> | 410 <help><![CDATA[ |
336 Summary | 411 |
337 ------- | 412 .. class:: infomark |
338 This tool removes adapter sequences from DNA high-throughput | 413 |
339 sequencing data. This is usually necessary when the read length of the | 414 **What it does** |
340 machine is longer than the molecule that is sequenced, such as in | 415 |
341 microRNA data. | 416 ------------------- |
342 | 417 |
343 The tool is based on the opensource `cutadapt | 418 **Cutadapt** finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads. |
344 <http://code.google.com/p/cutadapt/>`_ tool. See the `complete cutadapt | 419 |
345 documentation <https://cutadapt.readthedocs.org/en/latest/index.html>`_ for additional details. | 420 Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’ sequencing adapter because the read is longer than the molecule that is sequenced, such as in microRNA, or CRISPR data, or Poly-A tails that are useful for pulling out RNA from your sample but often you don’t want them to be in your reads. |
346 | 421 |
347 ----- | 422 Cutadapt_ helps with these trimming tasks by finding the adapter or primer sequences in an error-tolerant way. It can also modify and filter reads in various ways. Cutadapt searches for the adapter in all reads and removes it when it finds it. Unless you use a filtering option, all reads that were present in the input file will also be present in the output file, some of them trimmed, some of them not. Even reads that were trimmed entirely (because the adapter was found in the very beginning) are output. All of this can be changed with options in the tool form above. |
348 | 423 |
349 Algorithm | 424 The tool is based on the **Open Source** Cutadapt_ tool. See the complete `Cutadapt documentation`_ for additional details. If you use Cutadapt, please cite *Marcel, 2011* under **Citations** below. |
350 --------- | 425 |
351 | 426 ------------------- |
352 cutadapt uses a simple semi-global alignment algorithm, without any special optimizations. | 427 |
353 For speed, the algorithm is implemented as a Python extension module in ``calignmodule.c``. | 428 **Inputs** |
354 | 429 |
355 | 430 ------------------- |
356 Partial adapter matches | 431 |
357 ----------------------- | 432 Input files for Cutadapt need to be: |
358 | 433 |
359 Cutadapt correctly deals with partial adapter matches. As an example, suppose | 434 - FASTQ.GZ, FASTQ.BZ2, FASTQ or FASTA |
360 your adapter sequence is ``ADAPTER`` (specified via 3' Adapters parameter). | 435 |
361 If you have these input sequences:: | 436 To trim an adapter, input the ADAPTER sequence e.g. AACCGGTT (with the characters: **$**, **^**, **...**, if anchored or linked). |
362 | 437 |
363 MYSEQUENCEADAPTER | 438 ============================================= =================== |
364 MYSEQUENCEADAP | 439 **Option** **Sequence** |
365 MYSEQUENCEADAPTERSOMETHINGELSE | 440 --------------------------------------------- ------------------- |
366 | 441 3’ (End) Adapter ADAPTER |
367 All of them will be trimmed to ``MYSEQUENCE``. If the sequence starts with an | 442 Anchored 3’ Adapter ADAPTER$ |
368 adapter, like this:: | 443 |
369 | 444 5’ (Front) Adapter ADAPTER |
370 ADAPTERSOMETHING | 445 Anchored 5’ Adapter ^ADAPTER |
371 | 446 |
372 It will be empty after trimming. | 447 5’ or 3’ (Both possible) ADAPTER |
373 | 448 |
374 When the allowed error rate is sufficiently high, errors in | 449 Linked Adapter - 3' (End) only ADAPTER1...ADAPTER2 |
375 the adapter sequence are allowed. For example, ``ADABTER`` (1 mismatch), ``ADAPTR`` (1 deletion), | 450 Non-anchored Linked Adapter - 5' (Front) only ADAPTER1...ADAPTER2 |
376 and ``ADAPPTER`` (1 insertion) will all be recognized if the error rate is set to 0.15. | 451 ============================================= =================== |
377 | 452 |
378 | 453 Below is an illustration of the allowed adapter locations relative to the read and depending on the adapter type: |
379 Anchoring 5' adapters | 454 |
380 --------------------- | 455 .. image:: $PATH_TO_IMAGES/adapters.svg |
381 | 456 |
382 If you specify a 5' (Front) adapter, the adapter may overlap the beginning of the read or | 457 |
383 occur anywhere whithin it. If it appears withing the read, the sequence that precedes it | 458 ------------------- |
384 will also be trimmed in addition to the adapter. For example when the adapter sequence is | 459 |
385 ``ADAPTER``:: | 460 *Example: Illumina TruSeq Adapters* |
386 | 461 |
387 HELLOADAPTERTHERE | 462 ------------------- |
388 APTERTHERE | 463 |
389 | 464 If you have reads containing Illumina TruSeq adapters, for example, follow these steps. |
390 will both be trimmed to ``THERE``. To avoid this, you can prefix the adapter with the character | 465 |
391 ``^``. This will restrict the search, forcing the adapter to be a prefix of the read. With | 466 |
392 the adapter sequence set to ``^ADAPTER``, only reads like this will be trimmed:: | 467 For Single-end reads as well as the first reads of Paired-end data: |
393 | 468 |
394 ADAPTERHELLO | 469 **Read 1** |
395 | 470 |
396 | 471 In the **3' (End) Adapters** option above, insert A + the “TruSeq Indexed Adapter” prefix that is common to all Indexed Adapter sequences, e.g insert: |
397 Allowing adapters anywhere | 472 |
398 -------------------------- | 473 AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC |
399 | 474 |
400 Cutadapt assumes that any adapter specified via the 3' Adapter parameter | 475 |
401 was ligated to the 3\' end of the sequence. This is the correct assumption for | 476 For the second reads of Paired-end data: |
402 at least the SOLiD and Illumina small RNA protocols and probably others. | 477 |
403 The assumption is enforced by the alignment algorithm, which only finds the adapter | 478 **Read 2** |
404 when its starting position is within the read. In other words, the 5' base of | 479 |
405 the adapter must appear within the read. The adapter and all bases following | 480 In the **3' (End) Adapters** option above, insert the reverse complement of the “TruSeq Universal Adapter”: |
406 it are removed. | 481 |
407 | 482 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT |
408 If, on the other hand, your adapter can also be ligated to the 5' end (on | 483 |
409 purpose or by accident), you should tell cutadapt so by using the Anywhere Adapter | 484 The adapter sequences can be found in the document `Illumina TruSeq Adapters De-Mystified`_. |
410 parameter. It will then use a slightly different alignment algorithm | 485 |
411 (so-called semiglobal alignment), which allows any type of overlap between the | 486 ----------- |
412 adapter and the sequence. In particular, the adapter may appear only partially | 487 |
413 in the beginning of the read, like this:: | 488 **Outputs** |
414 | 489 |
415 PTERMYSEQUENCE | 490 ----------- |
416 | 491 |
417 The decision which part of the read to remove is made as follows: If there is at | 492 - Trimmed reads |
418 least one base before the found adapter, then the adapter is considered to be | 493 |
419 a 3' adapter and the adapter itself and everything following it is removed. | 494 Optionally, under **Output Options** you can choose to output |
420 Otherwise, the adapter is considered to be a 5' adapter and it is removed from | 495 |
421 the read. | 496 * Report |
422 | 497 * Info file |
423 Here are some examples, which may make this clearer (left: read, right: trimmed | 498 |
424 read):: | 499 |
425 | 500 **Report** |
426 MYSEQUENCEADAPTER -> MYSEQUENCE (3' adapter) | 501 |
427 MADAPTER -> M (3' adapter) | 502 Cutadapt can output per-adapter statistics if you select to output the report above. |
428 ADAPTERMYSEQUENCE -> MYSEQUENCE (5' adapter) | 503 |
429 PTERMYSEQUENCE -> MYSEQUENCE (5' adapter) | 504 Example: |
430 | 505 |
431 The regular algorithm (3' Adapter) would trim the first two examples in the same way, | 506 *This is cutadapt 1.16 with Python 3.6.4* |
432 but trim the third to an empty sequence and trim the fourth not at all. | 507 |
433 | 508 *Command line parameters: -j 1 --format=fastq -a AGATCGGAAGAGC --info-file=/tmp/tmpX0DlY1/files/000/dataset_21.dat --output=out1.fq --error-rate=0.1 --times=1 --overlap=3 input_f.fastq* |
434 | 509 *Running on 1 core* |
435 Format of the info file | 510 *Trimming 1 adapter with at most 10.0% errors in single-end mode ...* |
436 ----------------------- | 511 *Finished in 0.00 s (1426 us/read; 0.04 M reads/minute).* |
437 The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file. The fields are: | 512 |
438 | 513 *=== Summary ===* |
439 1. Read name | 514 |
440 2. Number of errors | 515 * Total reads processed: 3* |
441 3. 0-based start coordinate of the adapter match | 516 * Reads with adapters: 0 (0.0%)* |
442 4. 0-based end coordinate of the adapter match | 517 * Reads written (passing filters): 3 (100.0%)* |
443 5. Sequence of the read to the left of the adapter match (can be empty) | 518 |
444 6. Sequence of the read that was matched to the adapter | 519 * Total basepairs processed: 102 bp* |
445 7. Sequence of the read to the right of the adapter match (can be empty) | 520 * Total written (filtered): 102 bp (100.0%)* |
446 8. Name of the found adapter. | 521 |
447 | 522 *=== Adapter 1 ===* |
448 The concatenation of the fields 5-7 yields the full read sequence. In column 8, adapters without a name are numbered starting from 1. | 523 |
524 *Sequence: AGATCGGAAGAGC; Type: regular 3'; Length: 13; Trimmed: 0 times.* | |
525 | |
526 | |
527 **Info file** | |
528 | |
529 The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file. | |
530 | |
531 Columns contain the following data: | |
532 | |
533 * **1st**: Read name | |
534 * **2nd**: Number of errors | |
535 * **3rd**: 0-based start coordinate of the adapter match | |
536 * **4th**: 0-based end coordinate of the adapter match | |
537 * **5th**: Sequence of the read to the left of the adapter match (can be empty) | |
538 * **6th**: Sequence of the read that was matched to the adapter | |
539 * **7th**: Sequence of the read to the right of the adapter match (can be empty) | |
540 * **8th**: Name of the found adapter | |
541 * **9th**: Quality values corresponding to sequence left of the adapter match (can be empty) | |
542 * **10th**: Quality values corresponding to sequence matched to the adapter (can be empty) | |
543 * **11th**: Quality values corresponding to sequence to the right of the adapter (can be empty) | |
544 | |
545 The concatenation of columns 5-7 yields the full read sequence. Column 8 identifies the found adapter. Adapters without a name are numbered starting from 1. Fields 9-11 are empty if quality values are not available. Concatenating them yields the full sequence of quality values. | |
449 | 546 |
450 If no adapter was found, the format is as follows: | 547 If no adapter was found, the format is as follows: |
451 | 548 |
452 1. Read name | 549 #. Read name |
453 2. The value -1 | 550 #. The value -1 |
454 3. The read sequence | 551 #. The read sequence |
455 | 552 #. Quality values |
456 When parsing that file, be aware that additional columns may be added in the future. Note also that some fields can be empty, resulting in consecutive tabs within a line. Also, in the current version, when the *Match times* option is set to a value other than 1 (the default value), multiple lines are written to the info file for each read. | 553 |
457 | 554 When parsing the file, be aware that additional columns may be added in the future. Note also that some fields can be empty, resulting in consecutive tabs within a line. |
458 .. _cutadapt: http://code.google.com/p/cutadapt/ | 555 |
459 </help> | 556 If the --times option is used and greater than 1, each read can appear more than once in the info file. There will be one line for each found adapter, all with identical read names. Only for the first of those lines will the concatenation of columns 5-7 be identical to the original read sequence (and accordingly for columns 9-11). For subsequent lines, the shown sequence are the ones that were used in subsequent rounds of adapter trimming, that is, they get successively shorter. |
557 | |
558 -------------------- | |
559 | |
560 **More Information** | |
561 | |
562 -------------------- | |
563 | |
564 See the excellent `Cutadapt documentation`_ | |
565 | |
566 .. _Cutadapt: https://cutadapt.readthedocs.io/en/stable/ | |
567 .. _`Cutadapt documentation`: https://cutadapt.readthedocs.io/en/latest/index.html | |
568 .. _`Illumina TruSeq Adapters De-Mystified`: http://tucf-genomics.tufts.edu/documents/protocols/TUCF_Understanding_Illumina_TruSeq_Adapters.pdf | |
569 | |
570 | |
571 -------------------- | |
572 | |
573 **Galaxy Wrapper Development** | |
574 | |
575 -------------------- | |
576 | |
577 Author: Lance Parsons <lparsons@princeton.edu> | |
578 | |
579 ]]></help> | |
460 | 580 |
461 <citations> | 581 <citations> |
462 <citation type="bibtex"> | 582 <citation type="bibtex"> |
463 @article{marcel_cutadapt_2011, | 583 @article{marcel_cutadapt_2011, |
464 title = {Cutadapt removes adapter sequences from high-throughput sequencing reads}, | 584 title = {Cutadapt removes adapter sequences from high-throughput sequencing reads}, |