comparison cutadapt.xml @ 9:93d58ffe39f1 draft

Updated to version 1.6
author lparsons
date Mon, 06 Oct 2014 14:01:06 -0400
parents 2d6671b10919
children 01d94df2e32a
comparison
equal deleted inserted replaced
8:2d6671b10919 9:93d58ffe39f1
1 <tool id="cutadapt" name="Cutadapt" version="1.1.a"> 1 <tool id="cutadapt" name="Cutadapt" version="1.6">
2 <description>Remove adapter sequences from Fastq/Fasta</description> 2 <description>Remove adapter sequences from Fastq/Fasta</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.1">cutadapt</requirement> 4 <requirement type="package" version="1.6">cutadapt</requirement>
5 </requirements> 5 </requirements>
6 <version_command>cutadapt --version</version_command> 6 <version_command>cutadapt --version</version_command>
7 7
8 <command>cutadapt 8 <command>cutadapt
9 #if $input.extension.startswith( "fastq"): 9 #if $input.extension.startswith( "fastq"):
10 --format=fastq 10 --format=fastq
11 #if $input.extension == "fastqillumina": 11 #if $input.extension == "fastqillumina":
12 --quality-base=64 12 --quality-base=64
13 #end if 13 #end if
14 #if $input.extension == "fastqsolexa": 14 #if $input.extension == "fastqsolexa":
15 --quality-base=64 15 --quality-base=64
16 #end if 16 #end if
17 #else 17 #else
18 --format=$input.extension 18 --format=$input.extension
19 #end if 19 #end if
20 #for $a in $adapters 20 #for $a in $adapters
21 --adapter='${a.adapter_source.adapter}' 21 #if $a.adapter_source.adapter_source_list == 'prebuilt':
22 #end for 22 --adapter="${a.adapter_source.adapter.fields.name}"='${a.adapter_source.adapter}'
23 #for $aa in $anywhere_adapters 23 #else if str($a.adapter_source.adapter_name) != "":
24 --anywhere='${aa.anywhere_adapter_source.anywhere_adapter}' 24 --adapter='${a.adapter_source.adapter_name}'='${a.adapter_source.adapter}'
25 #end for 25 #else
26 #for $fa in $front_adapters 26 --adapter='${a.adapter_source.adapter}'
27 --front='${fa.front_adapter_source.front_adapter}' 27 #end if
28 #end for 28 #end for
29 --error-rate=$error_rate 29 #for $aa in $anywhere_adapters
30 --times=$count 30 #if $aa.anywhere_adapter_source.anywhere_adapter_source_list == 'prebuilt':
31 --overlap=$overlap 31 --anywhere="${aa.anywhere_adapter_source.anywhere_adapter.fields.name}"='${aa.anywhere_adapter_source.anywhere_adapter}'
32 #else if str($aa.anywhere_adapter_source.anywhere_adapter_name) != "":
33 --anywhere='${aa.anywhere_adapter_source.anywhere_adapter_name}'='${aa.anywhere_adapter_source.anywhere_adapter}'
34 #else
35 --anywhere='${aa.anywhere_adapter_source.anywhere_adapter}'
36 #end if
37 #end for
38 #for $fa in $front_adapters
39 #if $fa.front_adapter_source.front_adapter_source_list == 'prebuilt':
40 --front="${fa.front_adapter_source.front_adapter.fields.name}"='${fa.front_adapter_source.front_adapter}'
41 #else if str($fa.front_adapter_source.front_adapter_name) != "":
42 --front='${fa.front_adapter_source.front_adapter_name}'='${fa.front_adapter_source.front_adapter}'
43 #else
44 --front='${fa.front_adapter_source.front_adapter}'
45 #end if
46 #end for
47 --error-rate=$error_rate
48 --times=$count
49 --overlap=$overlap
50 $no_indels
32 $match_read_wildcards 51 $match_read_wildcards
33 $no_match_adapters_wildcards
34 52
35 #if str( $output_filtering_options.output_filtering) == "filter": 53 #if str( $output_filtering_options.output_filtering) == "filter":
36 $output_filtering_options.discard 54 $output_filtering_options.discard
37 #if str($output_filtering_options.min) != '0': 55 $output_filtering_options.discard_untrimmed
38 --minimum-length=$output_filtering_options.min 56 $output_filtering_options.no_trim
39 #end if 57 $output_filtering_options.mask_adapter
40 #if str($output_filtering_options.max) != '0': 58 #if str($output_filtering_options.min) != '0':
41 --maximum-length=$output_filtering_options.max 59 --minimum-length=$output_filtering_options.min
42 #end if 60 #end if
43 #end if 61 #if str($output_filtering_options.max) != '0':
44 62 --maximum-length=$output_filtering_options.max
45 --output='$output' 63 #end if
46 #if str( $output_params.output_type ) == "additional": 64 #end if
47 #if $output_params.rest_file: 65
48 --rest-file=$rest_output 66 --output='$output'
49 #end if 67
50 #if $output_params.wildcard_file: 68 #if $paired_end.paired_end_boolean:
51 --wildcard-file=$wild_output 69 --paired-output='$paired_output'
52 #end if 70 #end if
53 #if $output_params.too_short_file: 71
54 --too-short-output=$too_short_output 72 #if str( $output_params.output_type ) == "additional":
55 #end if 73 #if $output_params.rest_file:
56 #if $output_params.untrimmed_file: 74 --rest-file=$rest_output
57 --untrimmed-output=$untrimmed_output 75 #end if
58 #end if 76 #if $output_params.wildcard_file:
59 #end if 77 --wildcard-file=$wild_output
60 78 #end if
61 #if str( $read_modification_params.read_modification) == "modify": 79 #if $output_params.too_short_file:
62 #if str($read_modification_params.quality_cutoff) != '0': 80 --too-short-output=$too_short_output
63 --quality-cutoff=$read_modification_params.quality_cutoff 81 #end if
64 #end if 82 #if $output_params.too_long_file:
83 --too-long-output=$too_long_output
84 #end if
85 #if $output_params.untrimmed_file:
86 --untrimmed-output=$untrimmed_output
87 #if $paired_end.paired_end_boolean:
88 --untrimmed-paired-output=$untrimmed_paired_output
89 #end if
90 #end if
91 #if $output_params.info_file:
92 --info-file=$info_file
93 #end if
94
95 #end if
96
97 #if str( $read_modification_params.read_modification) == "modify":
98 #if str($read_modification_params.quality_cutoff) != '0':
99 --quality-cutoff=$read_modification_params.quality_cutoff
100 #end if
101 #if str($read_modification_params.cut) != '0':
102 --cut=$read_modification_params.cut
103 #end if
65 #if $read_modification_params.prefix != '': 104 #if $read_modification_params.prefix != '':
66 --prefix="$read_modification_params.prefix" 105 --prefix="$read_modification_params.prefix"
67 #end if 106 #end if
68 #if $read_modification_params.suffix != '': 107 #if $read_modification_params.suffix != '':
69 --suffix="$read_modification_params.suffix" 108 --suffix="$read_modification_params.suffix"
72 --length-tag="$read_modification_params.length_tag" 111 --length-tag="$read_modification_params.length_tag"
73 #end if 112 #end if
74 $read_modification_params.zero_cap 113 $read_modification_params.zero_cap
75 #end if 114 #end if
76 115
77 '$input' 116 '$input'
78 > $report 117
79 </command> 118 #if $paired_end.paired_end_boolean:
80 <inputs> 119 '$input2'
81 <param format="fastqsanger, fastqillumina, fastqsolexa, fasta" name="input" type="data" optional="false" label="Fastq file to trim" length="100"/> 120 #end if
82 121
83 <repeat name="adapters" title="3' Adapters" help="Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed."> 122 > $report
84 <conditional name="adapter_source"> 123 </command>
85 <param name="adapter_source_list" type="select" label="Source" > 124 <inputs>
86 <option value="prebuilt" selected="true">Standard (select from the list below)</option> 125 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa, fasta" name="input" type="data" optional="false" label="Fastq file to trim" length="100"/>
87 <option value="user">Enter custom sequence</option> 126 <conditional name="paired_end">
88 </param> 127 <param name="paired_end_boolean" type="boolean" value="false" label="Track Paired Reads" help="This option will keep a second file synchronized if you use one of the filtering options that discards reads. It will NOT trim adapters off of the second read. You must run Cutadapt a second time on the output of the first run to trim adapters from both reads (see Cutadapt documentation for details)." />
89 128 <when value="true">
90 <when value="user"> 129 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa, fasta" name="input2" type="data" optional="false" label="Paired fastq file (NOT trimmed)" length="100"/>
91 <param name="adapter" size="30" label="Enter custom 3' adapter sequence" type="text" value="AATTGGCC" /> 130 </when>
92 </when> 131 </conditional>
93 132
94 <when value="prebuilt"> 133 <repeat name="adapters" title="3' Adapters" help="Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed.">
95 <param name="adapter" type="select" label="Choose 3' adapter"> 134 <conditional name="adapter_source">
96 <options from_file="fastx_clipper_sequences.txt"> 135 <param name="adapter_source_list" type="select" label="Source" >
97 <column name="name" index="1"/> 136 <option value="prebuilt" selected="true">Standard (select from the list below)</option>
98 <column name="value" index="0"/> 137 <option value="user">Enter custom sequence</option>
99 </options> 138 </param>
100 </param> 139
101 </when> 140 <when value="user">
102 </conditional> 141 <param name="adapter_name" size="30" label="Enter custom 3' adapter name (Optional)" type="text" value="" />
103 </repeat> 142 <param name="adapter" size="30" label="Enter custom 3' adapter sequence" type="text" value="AATTGGCC" />
104 143 </when>
105 <repeat name="anywhere_adapters" title="5' or 3' (Anywhere) Adapters" help="Sequence of an adapter that was ligated to the 5' or 3' end. If the adapter is found within the read or overlapping the 3' end of the read, the behavior is the same as for the -a option. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a or -b options are given, only the best matching adapter is trimmed."> 144
106 <conditional name="anywhere_adapter_source"> 145 <when value="prebuilt">
107 <param name="anywhere_adapter_source_list" type="select" label="Source"> 146 <param name="adapter" type="select" label="Choose 3' adapter">
108 <option value="prebuilt" selected="true">Standard (select from the list below)</option> 147 <options from_file="cutadapt_adapters.txt">
109 <option value="user">Enter custom sequence</option> 148 <column name="name" index="1"/>
110 </param> 149 <column name="value" index="0"/>
111 150 </options>
112 <when value="user"> 151 </param>
113 <param name="anywhere_adapter" size="30" label="Enter custom 5' or 3' adapter sequence" type="text" value="AATTGGCC" /> 152 </when>
114 </when> 153 </conditional>
115 <when value="prebuilt"> 154 </repeat>
116 <param name="anywhere_adapter" type="select" label="Choose 5' or 3' adapter"> 155
117 <options from_file="fastx_clipper_sequences.txt"> 156 <repeat name="anywhere_adapters" title="5' or 3' (Anywhere) Adapters" help="Sequence of an adapter that was ligated to the 5' or 3' end. If the adapter is found within the read or overlapping the 3' end of the read, the behavior is the same as for the -a option. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a or -b options are given, only the best matching adapter is trimmed.">
118 <column name="name" index="1"/> 157 <conditional name="anywhere_adapter_source">
119 <column name="value" index="0"/> 158 <param name="anywhere_adapter_source_list" type="select" label="Source">
120 </options> 159 <option value="prebuilt" selected="true">Standard (select from the list below)</option>
121 </param> 160 <option value="user">Enter custom sequence</option>
122 </when> 161 </param>
123 </conditional> 162
124 </repeat> 163 <when value="user">
125 164 <param name="anywhere_adapter_name" size="30" label="Enter custom 5' or 3' adapter name (Optional)" type="text" value="" />
126 <repeat name="front_adapters" title="5' (Front) Adapters" help="Sequence of an adapter that was ligated to the 5' end. If the adapter sequence starts with the character '^', the adapter is 'anchored'. An anchored adapter must appear in its entirety at the 5' end of the read (it is a prefix of the read). A non-anchored adapter may appear partially at the 5' end, or it may occur within the read. If it is found within a read, the sequence preceding the adapter is also trimmed. In all cases the adapter itself is trimmed."> 165 <param name="anywhere_adapter" size="30" label="Enter custom 5' or 3' adapter sequence" type="text" value="AATTGGCC" />
127 <conditional name="front_adapter_source"> 166 </when>
128 <param name="front_adapter_source_list" type="select" label="Source"> 167 <when value="prebuilt">
129 <option value="prebuilt" selected="true">Standard (select from the list below)</option> 168 <param name="anywhere_adapter" type="select" label="Choose 5' or 3' adapter">
130 <option value="user">Enter custom sequence</option> 169 <options from_file="cutadapt_adapters.txt">
131 </param> 170 <column name="name" index="1"/>
132 171 <column name="value" index="0"/>
133 <when value="user"> 172 </options>
134 <param name="front_adapter" size="30" label="Enter custom 5' adapter sequence" type="text" value="AATTGGCC" /> 173 </param>
135 </when> 174 </when>
136 <when value="prebuilt"> 175 </conditional>
137 <param name="front_adapter" type="select" label="Choose 5' adapter"> 176 </repeat>
138 <options from_file="fastx_clipper_sequences.txt"> 177
139 <column name="name" index="1"/> 178 <repeat name="front_adapters" title="5' (Front) Adapters" help="Sequence of an adapter that was ligated to the 5' end. If the adapter sequence starts with the character '^', the adapter is 'anchored'. An anchored adapter must appear in its entirety at the 5' end of the read (it is a prefix of the read). A non-anchored adapter may appear partially at the 5' end, or it may occur within the read. If it is found within a read, the sequence preceding the adapter is also trimmed. In all cases the adapter itself is trimmed.">
140 <column name="value" index="0"/> 179 <conditional name="front_adapter_source">
141 </options> 180 <param name="front_adapter_source_list" type="select" label="Source">
142 </param> 181 <option value="prebuilt" selected="true">Standard (select from the list below)</option>
143 </when> 182 <option value="user">Enter custom sequence</option>
144 </conditional> 183 </param>
145 </repeat> 184
146 185 <when value="user">
147 <param name="error_rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." /> 186 <param name="front_adapter_name" size="30" label="Enter custom 5' adapter name (Optional)" type="text" value="" />
148 <param name="count" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." /> 187 <param name="front_adapter" size="30" label="Enter custom 5' adapter sequence" type="text" value="AATTGGCC" />
149 <param name="overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." /> 188 </when>
150 189 <when value="prebuilt">
151 <param name="match_read_wildcards" type="boolean" value="false" truevalue="--match-read-wildcards" falsevalue="" label="Match Read Wildcards" help="Allow 'N's in the read as matches to the adapter." /> 190 <param name="front_adapter" type="select" label="Choose 5' adapter">
152 <param name="no_match_adapters_wildcards" type="boolean" value="false" truevalue="--no-match-adapter-wildcards" falsevalue="" label="Do Not Match Adapter Wildcards" help="Do not treat 'N' in the adapter sequence as wildcards. This is needed when you want to search for literal 'N' characters." /> 191 <options from_file="cutadapt_adapters.txt">
192 <column name="name" index="1"/>
193 <column name="value" index="0"/>
194 </options>
195 </param>
196 </when>
197 </conditional>
198 </repeat>
199
200 <param name="error_rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." />
201 <param name="no_indels" type="boolean" value="false" truevalue="--no-indels" falsevalue="" label="Do not allow indels (Use ONLY with anchored 5' (front) adapters)." help="Do not allow indels in the alignments. That is, allow only mismatches. This option is currently only supported for anchored 5' adapters ('^ADAPTER') (default: both mismatches and indels are allowed)." />
202 <param name="count" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." />
203 <param name="overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." />
204 <param name="match_read_wildcards" type="boolean" value="false" truevalue="--match-read-wildcards" falsevalue="" label="Match Read Wildcards" help="Allow 'N's in the read as matches to the adapter." />
153 205
154 <conditional name="output_filtering_options"> 206 <conditional name="output_filtering_options">
155 <param name="output_filtering" type="select" label="Output filtering options" help="Options for filtering processed reads by those that contain the adapter or by minimum or maximum length"> 207 <param name="output_filtering" type="select" label="Output filtering options" help="Options for filtering processed reads by those that contain the adapter or by minimum or maximum length">
156 <option value="default">Default (no filtering)</option> 208 <option value="default">Default (no filtering)</option>
157 <option value="filter">Set Filters</option> 209 <option value="filter">Set Filters</option>
158 </param> 210 </param>
159 <when value="default" /> 211 <when value="default" />
160 <when value="filter"> 212 <when value="filter">
161 <param name="discard" type="boolean" value="false" truevalue="--discard" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" /> 213 <param name="discard" type="boolean" value="false" truevalue="--discard" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" />
162 <param name="min" type="integer" min="0" optional="true" value="0" label="Minimum length" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no minimum length." /> 214 <param name="discard_untrimmed" type="boolean" value="false" truevalue="--discard-untrimmed" falsevalue="" label="Discard Untrimmed Reads" help="Discard reads that do not contain the adapter." />
163 <param name="max" type="integer" min="0" optional="true" value="0" label="Maximum length" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no maximum length." /> 215 <param name="min" type="integer" min="0" optional="true" value="0" label="Minimum length" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no minimum length." />
216 <param name="max" type="integer" min="0" optional="true" value="0" label="Maximum length" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no maximum length." />
217 <param name="no_trim" type="boolean" value="false" truevalue="--no-trim" falsevalue="" label="Do not trim adapters" help="Match and redirect reads to output/untrimmed-output as usual, but don't remove the adapters (default: trim the adapters)." />
218 <param name="mask_adapter" type="boolean" value="false" truevalue="--mask-adapter" falsevalue="" label="Mask Adapters" help="Mask adapter bases with 'N' instead of trimming them (default: trim adapters)." />
164 </when> 219 </when>
165 </conditional> 220 </conditional>
166 221
167 <conditional name="output_params"> 222 <conditional name="output_params">
168 <param name="output_type" type="select" label="Additional output options" help="By default all reads will be put in the same file. However, reads with adapters matching in the middle, unmatched reads, and too-short reads can be saved in separate files."> 223 <param name="output_type" type="select" label="Additional output options" help="By default all reads will be put in the same file. However, reads with adapters matching in the middle, unmatched reads, and too-short reads can be saved in separate files.">
169 <option value="default">Default</option> 224 <option value="default">Default</option>
170 <option value="additional">Additional output files</option> 225 <option value="additional">Additional output files</option>
171 </param> 226 </param>
172 <when value="default" /> 227 <when value="default" />
173 <when value="additional"> 228 <when value="additional">
174 <param name="rest_file" type="boolean" value="false" label="Rest of Read" help="When the adapter matches in the middle of a read, write the rest (after the adapter) into a file."/> 229 <param name="info_file" type="boolean" value="false" label="Info File" help="Write information about each read and its adapter matches to a file."/>
175 <param name="wildcard_file" type="boolean" value="false" label="Wildcard File" help="When the adapter has wildcard bases ('N's) write adapter bases matching wildcard positions to file."/> 230 <param name="rest_file" type="boolean" value="false" label="Rest of Read" help="When the adapter matches in the middle of a read, write the rest (after the adapter) into a file."/>
176 <param name="too_short_file" type="boolean" value="false" label="Too Short Reads" help="Write reads that are too short (according to minimum length specified) to a file. (default: discard reads)"/> 231 <param name="wildcard_file" type="boolean" value="false" label="Wildcard File" help="When the adapter has wildcard bases ('N's) write adapter bases matching wildcard positions to file."/>
177 <param name="untrimmed_file" type="boolean" value="false" label="Untrimmed Reads" help="Write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file. (default: output to same file as trimmed)"/> 232 <param name="too_short_file" type="boolean" value="false" label="Too Short Reads" help="Write reads that are too short (according to minimum length specified) to a file. (default: discard reads)"/>
178 </when> 233 <param name="too_long_file" type="boolean" value="false" label="Too Long Reads" help="Write reads that are too long (according to maximum length specified) to a file. (default: discard reads)"/>
179 </conditional> 234 <param name="untrimmed_file" type="boolean" value="false" label="Untrimmed Reads" help="Write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file. (default: output to same file as trimmed)"/>
235 </when>
236 </conditional>
180 237
181 <conditional name="read_modification_params"> 238 <conditional name="read_modification_params">
182 <param name="read_modification" type="select" label="Additional modifications to reads" help="Various options to trim reads based on quality, modify read names and quality scores"> 239 <param name="read_modification" type="select" label="Additional modifications to reads" help="Various options to trim reads based on quality, modify read names and quality scores">
183 <option value="none">No Read Modifications</option> 240 <option value="none">No Read Modifications</option>
184 <option value="modify">Set Modification Options</option> 241 <option value="modify">Set Modification Options</option>
185 </param> 242 </param>
186 <when value="none" /> 243 <when value="none" />
187 <when value="modify"> 244 <when value="modify">
245 <param name="cut" type="integer" optional="true" value="0" label="Cut bases from reads before adapter trimming" help="Remove bases from the beginning or end of each read before trimming adapters. If positive, the bases are removed from the beginning of each read. If negative, the bases are removed from the end of each read." />
188 <param name="quality_cutoff" type="integer" min="0" optional="true" value="0" label="Quality cutoff" help="Trim low-quality ends from reads before adapter removal. The algorithm is the same as the one used by BWA (Subtract CUTOFF from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal). Value of 0 means no quality trimming." /> 246 <param name="quality_cutoff" type="integer" min="0" optional="true" value="0" label="Quality cutoff" help="Trim low-quality ends from reads before adapter removal. The algorithm is the same as the one used by BWA (Subtract CUTOFF from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal). Value of 0 means no quality trimming." />
189 <param name="prefix" label="Prefix" type="text" help="Add this prefix to read names" /> 247 <param name="prefix" label="Prefix" type="text" help="Add this prefix to read names" />
190 <param name="suffix" label="Suffix" type="text" help="Add this suffix to read names" /> 248 <param name="suffix" label="Suffix" type="text" help="Add this suffix to read names" />
249 <param name="strip_suffix" label="Strip suffix" type="text" help="Remove this suffix from read names if present." />
191 <param name="length_tag" label="Length Tag" type="text" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." /> 250 <param name="length_tag" label="Length Tag" type="text" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." />
192 <param name="zero_cap" type="boolean" value="false" label="Change negative quality values to zero (0)" truevalue="--zero-cap" falsevalue="" help="Workaround to avoid segmentation faults in BWA" /> 251 <param name="zero_cap" type="boolean" value="false" label="Change negative quality values to zero (0)" truevalue="--zero-cap" falsevalue="" help="Workaround to avoid segmentation faults in BWA" />
193 </when> 252 </when>
194 </conditional> 253 </conditional>
195 </inputs> 254 </inputs>
196 255
197 <outputs> 256 <outputs>
198 <data format="txt" name="report" label="${tool.name} on ${on_string} (Report)" /> 257 <data format="txt" name="report" label="${tool.name} on ${on_string} (Report)" />
199 <data format="input" name="output" metadata_source="input"/> 258 <data format="input" name="output" metadata_source="input" label="${tool.name} on ${on_string} (Reads)"/>
200 <data format="input" name="rest_output" metadata_source="input" label="${tool.name} on ${on_string} (Rest of Reads)" > 259 <data format="input" name="paired_output" metadata_source="input" label="${tool.name} on ${on_string} (Paired Reads)" >
201 <filter>(output_params['output_type'] == "additional")</filter> 260 <filter>(paired_end['paired_end_boolean'] is True)</filter>
202 <filter>(output_params['rest_file'] is True)</filter> 261 </data>
203 </data> 262 <data format="input" name="rest_output" metadata_source="input" label="${tool.name} on ${on_string} (Rest of Reads)" >
204 <data format="txt" name="wild_output" metadata_source="input" label="${tool.name} on ${on_string} (Wildcard File)" > 263 <filter>(output_params['output_type'] == "additional")</filter>
205 <filter>(output_params['output_type'] == "additional")</filter> 264 <filter>(output_params['rest_file'] is True)</filter>
206 <filter>(output_params['wild_file'] is True)</filter> 265 </data>
207 </data> 266 <data format="txt" name="wild_output" metadata_source="input" label="${tool.name} on ${on_string} (Wildcard File)" >
208 <data format="input" name="too_short_output" metadata_source="input" label="${tool.name} on ${on_string} (Too Short Reads)" > 267 <filter>(output_params['output_type'] == "additional")</filter>
209 <filter>(output_params['output_type'] == "additional")</filter> 268 <filter>(output_params['wildcard_file'] is True)</filter>
210 <filter>(output_params['too_short_file'] is True)</filter> 269 </data>
211 </data> 270 <data format="input" name="too_short_output" metadata_source="input" label="${tool.name} on ${on_string} (Too Short Reads)" >
212 <data format="input" name="untrimmed_output" metadata_source="input" label="${tool.name} on ${on_string} (Untrimmed Reads)" > 271 <filter>(output_params['output_type'] == "additional")</filter>
213 <filter>(output_params['output_type'] == "additional")</filter> 272 <filter>(output_params['too_short_file'] is True)</filter>
214 <filter>(output_params['untrimmed_file'] is True)</filter> 273 </data>
215 </data> 274 <data format="input" name="too_long_output" metadata_source="input" label="${tool.name} on ${on_string} (Too Long Reads)" >
216 </outputs> 275 <filter>(output_params['output_type'] == "additional")</filter>
217 276 <filter>(output_params['too_long_file'] is True)</filter>
218 <tests> 277 </data>
219 <test> 278 <data format="input" name="untrimmed_output" metadata_source="input" label="${tool.name} on ${on_string} (Untrimmed Reads)" >
220 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/> 279 <filter>(output_params['output_type'] == "additional")</filter>
221 <param name="adapter_source_list" value="user"/> 280 <filter>(output_params['untrimmed_file'] is True)</filter>
222 <param name="adapter" value=""/> 281 </data>
223 <param name="anywhere_adapter_source_list" value="user"/> 282 <data format="input" name="untrimmed_paired_output" metadata_source="input" label="${tool.name} on ${on_string} (Untrimmed Paired Reads)" >
224 <param name="anywhere_adapter" value="TTAGACATATCTCCGTCG"/> 283 <filter>(paired_end['paired_end_boolean'] is True)</filter>
225 <param name="front_adapter_source_list" value="user"/> 284 <filter>(output_params['output_type'] == "additional")</filter>
226 <param name="front_adapter" value=""/> 285 <filter>(output_params['untrimmed_file'] is True)</filter>
227 <param name="output_filtering" value="default"/> 286 </data>
228 <param name="read_modification" value="none"/> 287 <data format="txt" name="info_file" metadata_source="input" label="${tool.name} on ${on_string} (Info File)" >
229 <param name="output_type" value="default"/> 288 <filter>(output_params['output_type'] == "additional")</filter>
230 <output name="output" file="cutadapt_small.out"/> 289 <filter>(output_params['info_file'] is True)</filter>
231 </test> 290 </data>
291 </outputs>
292
293 <stdio>
294 <exit_code range="1" level="fatal" description="IOError, FormatError, or Interrupt" />
295 <exit_code range="2" level="fatal" description="Invalid options specified" />
296 <exit_code range="3:" level="fatal" description="Unknown error" />
297 </stdio>
298
299 <tests>
300 <test>
301 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/>
302 <param name="adapter_source_list" value="user"/>
303 <param name="adapter" value=""/>
304 <param name="anywhere_adapter_source_list" value="user"/>
305 <param name="anywhere_adapter" value="TTAGACATATCTCCGTCG"/>
306 <param name="front_adapter_source_list" value="user"/>
307 <param name="front_adapter" value=""/>
308 <param name="output_filtering" value="default"/>
309 <param name="read_modification" value="none"/>
310 <param name="output_type" value="default"/>
311 <output name="output" file="cutadapt_small.out"/>
312 </test>
232 <!-- Unable to get tests to function with conditional parameters 313 <!-- Unable to get tests to function with conditional parameters
233 <test> 314 <test>
234 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/> 315 <param name="input" value="cutadapt_small.fastq" ftype="fastqsanger"/>
235 <param name="adapter_source_list" value="user"/> 316 <param name="adapter_source_list" value="user"/>
236 <param name="adapter" value="TTAGACATATCTCCGTCG"/> 317 <param name="adapter" value="TTAGACATATCTCCGTCG"/>
237 <param name="anywhere_adapter_source_list" value="user"/> 318 <param name="anywhere_adapter_source_list" value="user"/>
238 <param name="anywhere_adapter" value=""/> 319 <param name="anywhere_adapter" value=""/>
239 <param name="front_adapter_source_list" value="user"/> 320 <param name="front_adapter_source_list" value="user"/>
240 <param name="front_adapter" value=""/> 321 <param name="front_adapter" value=""/>
241 <param name="output_filtering" value="filter"/> 322 <param name="output_filtering" value="filter"/>
242 <param name="discard" value="true"/> 323 <param name="discard" value="true"/>
243 <param name="read_modification" value="none"/> 324 <param name="read_modification" value="none"/>
244 <param name="output_type" value="default"/> 325 <param name="output_type" value="default"/>
245 <output name="output" file="cutadapt_discard.out"/> 326 <output name="output" file="cutadapt_discard.out"/>
246 </test> 327 </test>
247 <test> 328 <test>
248 <param name="input" value="cutadapt_rest.fa" ftype="fasta"/> 329 <param name="input" value="cutadapt_rest.fa" ftype="fasta"/>
249 <param name="adapter_source_list" value="user"/> 330 <param name="adapter_source_list" value="user"/>
250 <param name="adapter" value="ADAPTER"/> 331 <param name="adapter" value="ADAPTER"/>
251 <param name="anywhere_adapter_source_list" value="user"/> 332 <param name="anywhere_adapter_source_list" value="user"/>
252 <param name="anywhere_adapter" value=""/> 333 <param name="anywhere_adapter" value=""/>
253 <param name="front_adapter_source_list" value="user"/> 334 <param name="front_adapter_source_list" value="user"/>
254 <param name="front_adapter" value=""/> 335 <param name="front_adapter" value=""/>
255 <param name="output_filtering" value="default"/> 336 <param name="output_filtering" value="default"/>
256 <param name="read_modification" value="none"/> 337 <param name="read_modification" value="none"/>
257 <param name="output_type" value="additional"/> 338 <param name="output_type" value="additional"/>
258 <param name="rest_file" value="true"/> 339 <param name="rest_file" value="true"/>
259 <output name="output" file="cutadapt_rest.out"/> 340 <output name="output" file="cutadapt_rest.out"/>
260 <output name="rest_output" file="cutadapt_rest2.out"/> 341 <output name="rest_output" file="cutadapt_rest2.out"/>
261 </test> 342 </test>
262 --> 343 -->
263 </tests> 344 </tests>
264 345
265 <help> 346 <help>
266 Summary 347 Summary
267 ------- 348 -------
268 This tool removes adapter sequences from DNA high-throughput 349 This tool removes adapter sequences from DNA high-throughput
269 sequencing data. This is usually necessary when the read length of the 350 sequencing data. This is usually necessary when the read length of the
270 machine is longer than the molecule that is sequenced, such as in 351 machine is longer than the molecule that is sequenced, such as in
271 microRNA data. 352 microRNA data.
272 353
273 The tool is based on the opensource cutadapt_ tool. 354 The tool is based on the opensource `cutadapt
355 &lt;http://code.google.com/p/cutadapt/>`_ tool. See the `complete cutadapt
356 documentation &lt;https://cutadapt.readthedocs.org/en/latest/index.html>`_ for additional details.
274 357
275 ----- 358 -----
276 359
277 Algorithm 360 Algorithm
278 --------- 361 ---------
286 369
287 Cutadapt correctly deals with partial adapter matches. As an example, suppose 370 Cutadapt correctly deals with partial adapter matches. As an example, suppose
288 your adapter sequence is ``ADAPTER`` (specified via 3' Adapters parameter). 371 your adapter sequence is ``ADAPTER`` (specified via 3' Adapters parameter).
289 If you have these input sequences:: 372 If you have these input sequences::
290 373
291 MYSEQUENCEADAPTER 374 MYSEQUENCEADAPTER
292 MYSEQUENCEADAP 375 MYSEQUENCEADAP
293 MYSEQUENCEADAPTERSOMETHINGELSE 376 MYSEQUENCEADAPTERSOMETHINGELSE
294 377
295 All of them will be trimmed to ``MYSEQUENCE``. If the sequence starts with an 378 All of them will be trimmed to ``MYSEQUENCE``. If the sequence starts with an
296 adapter, like this:: 379 adapter, like this::
297 380
298 ADAPTERSOMETHING 381 ADAPTERSOMETHING
299 382
300 It will be empty after trimming. 383 It will be empty after trimming.
301 384
302 When the allowed error rate is sufficiently high, errors in 385 When the allowed error rate is sufficiently high, errors in
303 the adapter sequence are allowed. For example, ``ADABTER`` (1 mismatch), ``ADAPTR`` (1 deletion), 386 the adapter sequence are allowed. For example, ``ADABTER`` (1 mismatch), ``ADAPTR`` (1 deletion),
306 389
307 Anchoring 5' adapters 390 Anchoring 5' adapters
308 --------------------- 391 ---------------------
309 392
310 If you specify a 5' (Front) adapter, the adapter may overlap the beginning of the read or 393 If you specify a 5' (Front) adapter, the adapter may overlap the beginning of the read or
311 occur anywhere whithin it. If it appears withing the read, the sequence that precedes it 394 occur anywhere whithin it. If it appears withing the read, the sequence that precedes it
312 will also be trimmed in addition to the adapter. For example when the adapter sequence is 395 will also be trimmed in addition to the adapter. For example when the adapter sequence is
313 ``ADAPTER``:: 396 ``ADAPTER``::
314 397
315 HELLOADAPTERTHERE 398 HELLOADAPTERTHERE
316 APTERTHERE 399 APTERTHERE
329 was ligated to the 3\' end of the sequence. This is the correct assumption for 412 was ligated to the 3\' end of the sequence. This is the correct assumption for
330 at least the SOLiD and Illumina small RNA protocols and probably others. 413 at least the SOLiD and Illumina small RNA protocols and probably others.
331 The assumption is enforced by the alignment algorithm, which only finds the adapter 414 The assumption is enforced by the alignment algorithm, which only finds the adapter
332 when its starting position is within the read. In other words, the 5' base of 415 when its starting position is within the read. In other words, the 5' base of
333 the adapter must appear within the read. The adapter and all bases following 416 the adapter must appear within the read. The adapter and all bases following
334 it are remved. 417 it are removed.
335 418
336 If, on the other hand, your adapter can also be ligated to the 5' end (on 419 If, on the other hand, your adapter can also be ligated to the 5' end (on
337 purpose or by accident), you should tell cutadapt so by using the Anywhere Adapter 420 purpose or by accident), you should tell cutadapt so by using the Anywhere Adapter
338 parameter. It will then use a slightly different alignment algorithm 421 parameter. It will then use a slightly different alignment algorithm
339 (so-called semiglobal alignment), which allows any type of overlap between the 422 (so-called semiglobal alignment), which allows any type of overlap between the
358 441
359 The regular algorithm (3' Adapter) would trim the first two examples in the same way, 442 The regular algorithm (3' Adapter) would trim the first two examples in the same way,
360 but trim the third to an empty sequence and trim the fourth not at all. 443 but trim the third to an empty sequence and trim the fourth not at all.
361 444
362 445
446 Format of the info file
447 -----------------------
448 The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file. The fields are:
449
450 1. Read name
451 2. Number of errors
452 3. 0-based start coordinate of the adapter match
453 4. 0-based end coordinate of the adapter match
454 5. Sequence of the read to the left of the adapter match (can be empty)
455 6. Sequence of the read that was matched to the adapter
456 7. Sequence of the read to the right of the adapter match (can be empty)
457 8. Name of the found adapter.
458
459 The concatenation of the fields 5-7 yields the full read sequence. In column 8, adapters without a name are numbered starting from 1.
460
461 If no adapter was found, the format is as follows:
462
463 1. Read name
464 2. The value -1
465 3. The read sequence
466
467 When parsing that file, be aware that additional columns may be added in the future. Note also that some fields can be empty, resulting in consecutive tabs within a line. Also, in the current version, when the *Match times* option is set to a value other than 1 (the default value), multiple lines are written to the info file for each read.
468
363 .. _cutadapt: http://code.google.com/p/cutadapt/ 469 .. _cutadapt: http://code.google.com/p/cutadapt/
364 </help> 470 </help>
365 471
366 </tool> 472 </tool>