comparison cutadapt.xml @ 24:288f97432497 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cutadapt commit b26a2d90a1c70476fd2de33cd9fd739100f54fcf"
author iuc
date Wed, 02 Jun 2021 22:47:34 +0000
parents c4b82dce8335
children 4e5056fefd88
comparison
equal deleted inserted replaced
23:c4b82dce8335 24:288f97432497
1 <tool id="cutadapt" name="Cutadapt" version="1.16.9" profile="17.09"> 1 <tool id="cutadapt" name="Cutadapt" version="@TOOL_VERSION@+@GALAXY_TOOL_VERSION@" profile="20.01">
2 <description>Remove adapter sequences from Fastq/Fasta</description> 2 <description>Remove adapter sequences from FASTQ/FASTA</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <requirements> 6 <expand macro='edam_ontology' />
7 <requirement type="package" version="1.16">cutadapt</requirement> 7 <expand macro='requirements' />
8 </requirements> 8 <expand macro='xrefs'/>
9
10 <version_command>cutadapt --version</version_command> 9 <version_command>cutadapt --version</version_command>
11 10
12 <command detect_errors="exit_code"><![CDATA[ 11 <command detect_errors="exit_code"><![CDATA[
13 ## Link in the input and output files, so Cutadapt can tell their type 12 ## Link in the input and output files, so Cutadapt can tell their type
14 13
16 #set read1 = "input_f" 15 #set read1 = "input_f"
17 #set read2 = "input_r" 16 #set read2 = "input_r"
18 #set paired = False 17 #set paired = False
19 #set library_type = str($library.type) 18 #set library_type = str($library.type)
20 #if $library_type == 'paired': 19 #if $library_type == 'paired':
21 #set paired = True 20 #set paired = True
22 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.element_identifier)) 21 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.element_identifier))
23 #set read2 = re.sub('[^\w\-\s]', '_', str($library.input_2.element_identifier)) 22 #set read2 = re.sub('[^\w\-\s]', '_', str($library.input_2.element_identifier))
24 #set input_1 = $library.input_1 23 #set input_1 = $library.input_1
25 #set input_2 = $library.input_2 24 #set input_2 = $library.input_2
26 #else if $library_type == 'paired_collection' 25 #else if $library_type == 'paired_collection'
27 #set paired = True 26 #set paired = True
28 #set input_1 = $library.input_1.forward 27 #set input_1 = $library.input_1.forward
29 #set input_2 = $library.input_1.reverse 28 #set input_2 = $library.input_1.reverse
30 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.name)) + "_1" 29 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.name)) + "_1"
31 #set read2 = re.sub('[^\w\-\s]', '_', str($library.input_1.name)) + "_2" 30 #set read2 = re.sub('[^\w\-\s]', '_', str($library.input_1.name)) + "_2"
32 #else 31 #else
33 #set input_1 = $library.input_1 32 #set input_1 = $library.input_1
34 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.element_identifier)) 33 #set read1 = re.sub('[^\w\-\s]', '_', str($library.input_1.element_identifier))
35 #end if 34 #end if
36 35
37 #if $input_1.is_of_type("fastq.gz", "fastqsanger.gz"): 36 #if $input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
38 #set ext = ".fq.gz" 37 #set ext = ".fq.gz"
39 #else if $input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): 38 #else if $input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
70 ln -f -s '${input_2}' '$read2' && 69 ln -f -s '${input_2}' '$read2' &&
71 #end if 70 #end if
72 71
73 ## Run Cutadapt 72 ## Run Cutadapt
74 73
75 #if $output_options.multiple_output: 74 #if 'multiple_output' in $output_selector:
76 mkdir split && 75 mkdir split &&
77 #end if 76 #end if
78 77
79 cutadapt 78 cutadapt
80 79
81 ## cutadapt (up to version 1.16) can't be run in multicore mode with these options 80 -j=\${GALAXY_SLOTS:-4}
82 #if not any(($output_options.info_file, $output_options.rest_file, $output_options.wildcard_file, $output_options.too_short_file, $output_options.too_long_file, $output_options.untrimmed_file))
83 -j \${GALAXY_SLOTS:-1}
84 #end if
85 81
86 #if str( $library.type ) == "single": 82 #if str( $library.type ) == "single":
87 @read1_options@ 83 @read1_options@
88 #if $output_options.multiple_output: 84 #if 'multiple_output' in $output_selector:
89 --output='split/{name}.${input_1.ext}' 85 --output='split/{name}.${input_1.ext}'
90 #else: 86 #else:
91 --output='$out1' 87 --output='$out1'
92 #end if 88 #end if
93 #else: 89 #else:
101 --paired-output='$out_pairs.reverse' 97 --paired-output='$out_pairs.reverse'
102 #end if 98 #end if
103 #end if 99 #end if
104 100
105 --error-rate=$adapter_options.error_rate 101 --error-rate=$adapter_options.error_rate
106 --times=$adapter_options.count 102 --times=$adapter_options.times
107 --overlap=$adapter_options.overlap 103 --overlap=$adapter_options.overlap
108 $adapter_options.no_indels 104 $adapter_options.no_indels
109 $adapter_options.match_read_wildcards 105 $adapter_options.match_read_wildcards
110 $adapter_options.no_trim 106 --action=$adapter_options.action
111 $adapter_options.mask_adapter 107 $adapter_options.revcomp
112 108
113 $filter_options.discard 109 $filter_options.discard_trimmed
114 $filter_options.discard_untrimmed 110 $filter_options.discard_untrimmed
115 #if str($filter_options.min): 111
116 --minimum-length=$filter_options.min 112 #if str($filter_options.minimum_length) and str($filter_options.length_R2_options.length_R2_status) == 'True':
117 #end if 113 --minimum-length=$filter_options.minimum_length:$filter_options.length_R2_options.R2_minimum
118 #if str($filter_options.max): 114 #else if str($filter_options.minimum_length):
119 --maximum-length=$filter_options.max 115 --minimum-length=$filter_options.minimum_length
116 #end if
117 #if str($filter_options.maximum_length) and str($filter_options.length_R2_options.length_R2_status) == 'True':
118 --maximum-length=$filter_options.maximum_length:$filter_options.length_R2_options.R2_maximum
119 #else if str($filter_options.maximum_length):
120 --maximum-length=$filter_options.maximum_length
120 #end if 121 #end if
121 #if str($filter_options.max_n): 122 #if str($filter_options.max_n):
122 --max-n=$filter_options.max_n 123 --max-n=$filter_options.max_n
123 #end if 124 #end if
124 #if str( $library.type ) != "single": 125 #if str( $library.type ) != "single":
125 #if $filter_options.pair_filter: 126 #if $filter_options.pair_filter:
126 --pair-filter=$filter_options.pair_filter 127 --pair-filter=$filter_options.pair_filter
127 #end if 128 #end if
128 #end if 129 #end if
130 #if str($filter_options.max_expected_errors):
131 --max-expected-errors=$filter_options.max_expected_errors
132 #end if
129 133
130 134
131 #if str($read_mod_options.quality_cutoff) != '0': 135 #if str($read_mod_options.quality_cutoff) != '0':
132 --quality-cutoff=$read_mod_options.quality_cutoff 136 --quality-cutoff=$read_mod_options.quality_cutoff
133 #end if 137 #end if
134 #if str($read_mod_options.nextseq_trim) != '0': 138 #if str($read_mod_options.nextseq_trim) != '0':
135 --nextseq-trim=$read_mod_options.nextseq_trim 139 --nextseq-trim=$read_mod_options.nextseq_trim
136 #end if 140 #end if
137 $read_mod_options.trim_n 141 $read_mod_options.trim_n
138 #if $read_mod_options.prefix != '': 142 #if str($read_mod_options.shorten_options.shorten_values) == 'True':
139 --prefix='$read_mod_options.prefix' 143 #if str($read_mod_options.shorten_options.shorten_end) == '3prime'
140 #end if 144 --length=$read_mod_options.shorten_options.length
141 #if $read_mod_options.suffix != '': 145 #else
142 --suffix='$read_mod_options.suffix' 146 --length=-$read_mod_options.shorten_options.length
143 #end if 147 #end if
144 #if str($read_mod_options.length) != '0': 148 #end if
145 --length=$read_mod_options.length 149 #if str($read_mod_options.length_tag) != '':
146 #end if
147 #if $read_mod_options.length_tag != '':
148 --length-tag='$read_mod_options.length_tag' 150 --length-tag='$read_mod_options.length_tag'
149 #end if 151 #end if
152 #if str($read_mod_options.cut) != '0':
153 --cut=$read_mod_options.cut
154 #end if
155 #if str($read_mod_options.rename) != '':
156 --rename='$read_mod_options.rename'
157 #end if
158 $read_mod_options.zero_cap
159
150 160
151 '${read1}' 161 '${read1}'
152 #if $paired: 162 #if $paired:
153 '${read2}' 163 '${read2}'
154 #end if 164 #end if
155 165
156 #if $output_options.report: 166 #if 'report' in $output_selector:
157 > report.txt 167 > report.txt
158 #end if 168 #end if
159 ]]></command> 169 ]]></command>
160 <inputs> 170 <inputs>
161 171
185 195
186 </conditional> 196 </conditional>
187 197
188 <!-- Adapter Options --> 198 <!-- Adapter Options -->
189 <section name="adapter_options" title="Adapter Options"> 199 <section name="adapter_options" title="Adapter Options">
190 <param name="error_rate" argument="--error-rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." /> 200 <param name="action" type="select" label="What to do if a match is found">
191 <param name="no_indels" argument="--no-indels" type="boolean" value="False" truevalue="--no-indels" falsevalue="" label="Do not allow indels (Use ONLY with anchored 5' (front) adapters)." help="Do not allow indels in the alignments. That is, allow only mismatches. This option is currently only supported for anchored 5' adapters ('^ADAPTER') (default: both mismatches and indels are allowed)." /> 201 <option value="trim" selected="True">Trim: trim adapter and upstream or downstream sequence</option>
192 <param name="count" argument="--times" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." /> 202 <option value="retain">Retain: the read is trimmed, but the adapter sequence is not removed</option>
193 <param name="overlap" argument="--overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." /> 203 <option value="mask">Mask: mask adapters with 'N' characters instead of trimming them</option>
194 <param name="match_read_wildcards" type="select" label="Match Wildcards" help="Allow 'N's as matches. Default: In the adapters but not in the reads"> 204 <option value="lowercase">Lowercase: convert to lowercase</option>
205 <option value="none">None: leave unchanged</option>
206 </param>
207 <param name="internal" type="select" label="Disallow internal adaptor ocurrences" help="The non-internal 5’ and 3’ adapter types disallow internal occurrences of the adapter sequence. The adapter must always be at one of the ends of the read, but partial occurrences are also ok.">
208 <option value="X">Enabled</option>
209 <option value="" selected="True">Disabled</option>
210 </param>
211 <param argument="--error-rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." />
212 <param argument="--no-indels" type="boolean" value="False" truevalue="--no-indels" falsevalue="" label="Do not allow indels (Use ONLY with anchored 5' (front) adapters)." help="Do not allow indels in the alignments. That is, allow only mismatches. This option is currently only supported for anchored 5' adapters ('^ADAPTER') (default: both mismatches and indels are allowed)." />
213 <param argument="--times" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." />
214 <param argument="--overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified. This reduces the number of bases trimmed purely due to short random adapter matches." />
215 <param name="match_read_wildcards" type="select" label="Match wildcards" help="Allow 'N's as matches. Default: in the adapters but not in the reads">
195 <option value=" " selected="True">In the adapters but not in the reads</option> 216 <option value=" " selected="True">In the adapters but not in the reads</option>
196 <option value="--match-read-wildcards">In the adapters and in the reads</option> 217 <option value="--match-read-wildcards">In the adapters and in the reads</option>
197 <option value="--no-match-adapter-wildcards">Nowhere</option> 218 <option value="--no-match-adapter-wildcards">Nowhere</option>
198 </param> 219 </param>
199 <param name="no_trim" argument="--no-trim" type="boolean" value="False" truevalue="--no-trim" falsevalue="" label="Do not trim adapters" help="Match and redirect reads to output/untrimmed-output as usual, but don't remove the adapters (default: trim the adapters)." /> 220 <param argument="--revcomp" type="boolean" value="False" truevalue="--revcomp" falsevalue="" label="Look for adapters in the reverse complement" help="Check both the read and its reverse complement for adapter matches. If match is on reverse-complemented version, output that one. Default: check only read." />
200 <param name="mask_adapter" argument="--mask-adapter" type="boolean" value="False" truevalue="--mask-adapter" falsevalue="" label="Mask Adapters" help="Mask adapter bases with 'N' instead of trimming them (default: trim adapters)." />
201 </section> 221 </section>
202 222
203 <!-- Filter Options --> 223 <!-- Filter Options -->
204 <section name="filter_options" title="Filter Options"> 224 <section name="filter_options" title="Filter Options">
205 <param name="discard" argument="--discard-trimmed" type="boolean" value="False" truevalue="--discard-trimmed" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" /> 225 <param argument="--discard-trimmed" type="boolean" value="False" truevalue="--discard-trimmed" falsevalue="" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" />
206 <param name="discard_untrimmed" argument="--discard_untrimmed" type="boolean" value="False" truevalue="--discard-untrimmed" falsevalue="" label="Discard Untrimmed Reads" help="Discard reads that do not contain the adapter." /> 226 <param argument="--discard_untrimmed" type="boolean" value="False" truevalue="--discard-untrimmed" falsevalue="" label="Discard Untrimmed Reads" help="Discard reads that do not contain the adapter." />
207 <param name="min" argument="--minimum-length" type="integer" min="0" optional="True" value="" label="Minimum length" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted." /> 227 <param argument="--minimum-length" type="integer" min="0" optional="True" value="" label="Minimum length (R1)" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded." />
208 <param name="max" argument="--maximum-length" type="integer" min="0" optional="True" value="" label="Maximum length" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded. In colorspace, an initial primer is not counted." /> 228 <param argument="--maximum-length" type="integer" min="0" optional="True" value="" label="Maximum length (R1)" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded." />
209 <param name="max_n" argument="--max-n" type="float" min="0" optional="True" label="Max N" help="Discard reads with more than this number of 'N' bases. A number between 0 and 1 is interpreted as a fraction of the read length." /> 229 <conditional name="length_R2_options">
210 <param name="pair_filter" argument="--pair-filter" type="select" optional="True" label="Pair filter" help="Which of the reads in a paired-end read have to match the filtering criterion in order for the pair to be filtered. Default: any"> 230 <param name="length_R2_status" type="select" label="Specify a minimum/maximum length for reverse reads (R2)" help="When trimming paired-end reads, the minimum/maximum lengths for R1 and R2 can be specified separately. If not provided, the same minimum length applies to both reads.">
211 <option value="any" selected="True">any</option> 231 <option value="True">Enabled</option>
212 <option value="both">both</option> 232 <option value="False" selected="True">Disabled</option>
233 </param>
234 <when value="True">
235 <param name="R2_minimum" type="integer" min="0" value="" optional="True" label="Minimum length (R2)" />
236 <param name="R2_maximum" type="integer" min="0" value="" optional="True" label="Maximum length (R2)" />
237 </when>
238 <when value="False">
239 </when>
240 </conditional>
241 <param argument="--max-n" type="float" min="0" optional="True" label="Max N" help="Discard reads with more than this number of 'N' bases. A number between 0 and 1 is interpreted as a fraction of the read length." />
242 <param argument="--pair-filter" type="select" optional="True" label="Pair filter" help="Which of the reads in a paired-end read have to match the filtering criterion in order for the pair to be filtered. Default: any">
243 <option value="any" selected="True">Any: a read pair is discarded (or redirected) if one of the reads (R1 or R2) fulfills the filtering criterion. </option>
244 <option value="both">Both: filtering criteria must apply to both reads in order for a read pair to be discarded. </option>
245 <option value="first">First: will make a decision about the read pair by inspecting whether the filtering criterion applies to the first read, ignoring the second read.</option>
246
213 </param> 247 </param>
248 <param argument="--max-expected-errors" type="integer" min="0" optional="True" value="" label="Max expected errors" help="Discard reads whose expected number of errors (computed from quality values) exceeds this value." />
249 <param argument="--discard-cassava" type="boolean" truevalue="--discard-cassava" falsevalue="" checked="False" label="Discard CASAVA filtering" help="Discard reads that did not pass CASAVA filtering (header has :Y:)." />
214 </section> 250 </section>
215 251
216 <!-- Read Modification Options --> 252 <!-- Read Modification Options -->
217 <section name="read_mod_options" title="Read Modification Options"> 253 <section name="read_mod_options" title="Read Modification Options">
218 <param name="quality_cutoff" argument="--quality-cutoff" type="text" value="0" label="Quality cutoff" help=" Trim low-quality bases from 5' and/or 3' ends of each read before adapter removal. Applied to both reads if data is paired. If one value is given, only the 3' end is trimmed. If two comma-separated cutoffs are given, the 5' end is trimmed with the first cutoff, the 3' end with the second."> 254 <param argument="--quality-cutoff" type="text" value="0" label="Quality cutoff" help=" Trim low-quality bases from 5' and/or 3' ends of each read before adapter removal. Applied to both reads if data is paired. If one value is given, only the 3' end is trimmed. If two comma-separated cutoffs are given, the 5' end is trimmed with the first cutoff, the 3' end with the second.">
219 <sanitizer> 255 <sanitizer>
220 <valid initial="string.digits"><add value="," /></valid> 256 <valid initial="string.digits"><add value="," /></valid>
221 </sanitizer> 257 </sanitizer>
258 <validator type="regex">[0-9,]+</validator>
222 </param> 259 </param>
223 <param name="nextseq_trim" argument="--nextseq-trim" type="integer" value="0" label="NextSeq trimming" help="Experimental option for quality trimming of NextSeq data. This is necessary because that machine cannot distinguish between G and reaching the end of the fragment (it encodes G as ‘black’). This option works like regular quality trimming (where one would use -q 20 instead), except that the qualities of G bases are ignored." /> 260 <param argument="--nextseq-trim" type="integer" value="0" label="NextSeq trimming" help="Experimental option for quality trimming of NextSeq data. This is necessary because that machine cannot distinguish between G and reaching the end of the fragment (it encodes G as ‘black’). This option works like regular quality trimming (where one would use -q 20 instead), except that the qualities of G bases are ignored." />
224 <param name="trim_n" argument="--trim-n" type="boolean" truevalue="--trim-n" falsevalue="" checked="False" label="Trim Ns" help="Trim N's on ends of reads." /> 261 <param argument="--trim-n" type="boolean" truevalue="--trim-n" falsevalue="" checked="False" label="Trim Ns" help="Trim N's on ends of reads." />
225 <param name="prefix" argument="--prefix" label="Prefix" type="text" help="Add this prefix to read names" /> 262 <param argument="--strip-suffix" label="Strip suffix" type="text" help="Remove this suffix from read names if present." />
226 <param name="suffix" argument="--suffix" label="Suffix" type="text" help="Add this suffix to read names" /> 263 <conditional name="shorten_options">
227 <param name="strip_suffix" argument="--strip-suffix" label="Strip suffix" type="text" help="Remove this suffix from read names if present." /> 264 <param name="shorten_values" type="select" label="Shortening reads to a fixed length" help="If you want to remove a fixed number of bases from each read, use the –cut option instead.">
228 <param name="length" argument="--length" type="integer" value="0" label="Length" help="Shorten reads to this length. This modification is applied after adapter trimming." /> 265 <option value="True">Enabled</option>
229 <param name="length_tag" argument="--length-tag" label="Length Tag" type="text" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." /> 266 <option value="False" selected="True">Disabled</option>
267 </param>
268 <when value="True">
269 <param argument="--length" type="integer" value="0" label="Length" help="Shorten reads to this length. This modification is applied after adapter trimming." />
270 <param name="shorten_end" type="select" label="End at which to apply the slice">
271 <option value="3prime">3' ends</option>
272 <option value="5prime">5' ends</option>
273 </param>
274 </when>
275 <when value="False">
276 </when>
277 </conditional>
278 <param argument="--length-tag" label="Length tag" type="text" optional="True" help="Search for TAG followed by a decimal number in the name of the read (description/comment field of the FASTA or FASTQ file). Replace the decimal number with the correct length of the trimmed read. For example, use --length-tag 'length=' to search for fields like 'length=123'." >
279 <sanitizer invalid_char="">
280 <valid initial="string.letters,string.digits">
281 <add value="/" />
282 <add value="+" />
283 <add value="-" />
284 <add value="=" />
285 <add value=" " />
286 <add value="_" />
287 </valid>
288 </sanitizer>
289 <validator type="regex">[A-Za-z0-9 =-_/+]+</validator>
290 </param>
291 <param argument="--rename" label="Rename reads" type="text" optional="True" help="This option can be used to rename both single-end and paired-end reads. ">
292 <sanitizer invalid_char="">
293 <valid initial="string.letters,string.digits">
294 <add value="{" />
295 <add value="}" />
296 <add value="_" />
297 <add value="=" />
298 <add value=" " />
299 </valid>
300 </sanitizer>
301 <validator type="regex">[A-Za-z0-9 {}=_]+</validator>
302 </param>
303 <param argument="--cut" label="Remove a fixed number of bases" type="integer" value="0" help="This option allows to unconditionally remove bases from the beginning or end of each read. If the given length is positive, the bases are removed from the beginning of each read. If it is negative, the bases are removed from the end." />
304 <param argument="--zero-cap" type="boolean" truevalue="--zero-cap" falsevalue="" checked="False" label="Change negative quality values to zero" />
230 </section> 305 </section>
231 306
232 <!-- Output Options --> 307 <!-- Output Options -->
233 <section name="output_options" title="Output Options"> 308 <param name="output_selector" type="select" multiple="True" display="checkboxes" label="Outputs selector">
234 <param name="report" type="boolean" value="False" label="Report" help="Cutadapt's per-adapter statistics. You can use this file with MultiQC."/> 309 <option value="report">Report: Cutadapt's per-adapter statistics. You can use this file with MultiQC.</option>
235 <param name="info_file" argument="--info-file" type="boolean" value="False" label="Info File" help="Write information about each read and its adapter matches to a file."/> 310 <option value="info_file">Info file: write information about each read and its adapter matches.</option>
236 <param name="rest_file" argument="--rest-file" type="boolean" value="False" label="Rest of Read" help="When the adapter matches in the middle of a read, write the rest (after the adapter) into a file."/> 311 <option value="rest_file">Rest of read: when the adapter matches in the middle of a read, write the rest (after the adapter).</option>
237 <param name="wildcard_file" argument="--wildcard-file" type="boolean" value="False" label="Wildcard File" help="When the adapter has wildcard bases ('N's) write adapter bases matching wildcard positions to file."/> 312 <option value="wildcard_file">Wildcard file: when the adapter has wildcard bases (Ns) write adapter bases matching wildcard positions.</option>
238 <param name="too_short_file" argument="--too-short-output" type="boolean" value="False" label="Too Short Reads" help="Write reads that are too short (according to minimum length specified) to a file. (default: discard reads)"/> 313 <option value="too_short_file">Too short reads: write reads that are too short according to minimum length specified (default: discard reads).</option>
239 <param name="too_long_file" argument="--too-long-output" type="boolean" value="False" label="Too Long Reads" help="Write reads that are too long (according to maximum length specified) to a file. (default: discard reads)"/> 314 <option value="too_long_file">Too long reads: write reads that are too long (according to maximum length specified)</option>
240 <param name="untrimmed_file" argument="--untrimmed-output" type="boolean" value="False" label="Untrimmed Reads" help="Write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file. (default: output to same file as trimmed)"/> 315 <option value="untrimmed_file">Untrimmed reads: write reads that do not contain the adapter to a separate file, instead of writing them to the regular output file (default: output to same file as trimmed)</option>
241 <param name="multiple_output" argument="" type="boolean" value="False" label="Multiple output" help="Create a separate file for each adapter trimmed (default: all trimmed reads are in a single file)"/> 316 <option value="multiple_output">Multiple output: create a separate file for each adapter trimmed (default: all trimmed reads are in a single file)</option>
242 </section> 317 </param>
243
244 </inputs> 318 </inputs>
245 319
246 <outputs> 320 <outputs>
247 <data name="out1" format="fastqsanger" metadata_source="input_1" from_work_dir="out1*" label="${tool.name} on ${on_string}: Read 1 Output"> 321 <data name="out1" format="fastqsanger" metadata_source="input_1" from_work_dir="out1*" label="${tool.name} on ${on_string}: Read 1 Output">
248 <filter>(output_options['multiple_output'] is False and library['type'] != 'paired_collection')</filter> 322 <filter>library['type'] != 'paired_collection' and 'multiple_output' not in output_selector</filter>
249 <expand macro="inherit_format_1" /> 323 <expand macro="inherit_format_1" />
250 </data> 324 </data>
325
251 <data name="out2" format="fastqsanger" metadata_source="input_2" from_work_dir="out2*" label="${tool.name} on ${on_string}: Read 2 Output" > 326 <data name="out2" format="fastqsanger" metadata_source="input_2" from_work_dir="out2*" label="${tool.name} on ${on_string}: Read 2 Output" >
252 <filter>(output_options['multiple_output'] is False and library['type'] == 'paired')</filter> 327 <filter>library['type'] == 'paired' and 'multiple_output' not in output_selector</filter>
253 <expand macro="inherit_format_2" /> 328 <expand macro="inherit_format_2" />
254 </data> 329 </data>
255 330
256 <collection name="out_pairs" type="paired" format_source="input_1" label="${tool.name} on ${on_string}: Reads"> 331 <collection name="out_pairs" type="paired" format_source="input_1" label="${tool.name} on ${on_string}: Reads">
257 <filter>(output_options['multiple_output'] is False and library['type'] == 'paired_collection')</filter> 332 <filter>library['type'] == 'paired_collection' and 'multiple_output' not in output_selector</filter>
258 </collection> 333 </collection>
259 334
260 <data name="report" format="txt" from_work_dir="report.txt" label="${tool.name} on ${on_string}: Report"> 335 <data name="report" format="txt" from_work_dir="report.txt" label="${tool.name} on ${on_string}: Report">
261 <filter>(output_options['report'] is True)</filter> 336 <filter>output_selector and 'report' in output_selector</filter>
262 </data> 337 </data>
263
264 <data name="info_file" format="txt" metadata_source="input_1" label="${tool.name} on ${on_string}: Info File" > 338 <data name="info_file" format="txt" metadata_source="input_1" label="${tool.name} on ${on_string}: Info File" >
265 <filter>(output_options['info_file'] is True)</filter> 339 <filter>output_selector and 'info_file' in output_selector</filter>
266 </data> 340 </data>
267 341
268 <data name="rest_output" format="fastqsanger" metadata_source="input_1" from_work_dir="rest_output*" label="${tool.name} on ${on_string}: Rest of Reads (R1 only)" > 342 <data name="rest_output" format="fastqsanger" metadata_source="input_1" from_work_dir="rest_output*" label="${tool.name} on ${on_string}: Rest of Reads (R1 only)" >
269 <filter>(output_options['rest_file'] is True)</filter> 343 <filter>output_selector and 'rest_file' in output_selector</filter>
270 <expand macro="inherit_format_1" /> 344 <expand macro="inherit_format_1" />
271 </data> 345 </data>
272 346
273 <data name="wild_output" format="txt" metadata_source="input_1" from_work_dir="wild_output*" label="${tool.name} on ${on_string}: Wildcard File" > 347 <data name="wild_output" format="txt" metadata_source="input_1" from_work_dir="wild_output*" label="${tool.name} on ${on_string}: Wildcard File" >
274 <filter>(output_options['wildcard_file'] is True)</filter> 348 <filter>output_selector and 'wildcard_file' in output_selector</filter>
275 </data> 349 </data>
276 350
277 <data name="untrimmed_output" format="fastqsanger" metadata_source="input_1" from_work_dir="untrimmed_output*" label="${tool.name} on ${on_string}: Untrimmed Read 1" > 351 <data name="untrimmed_output" format="fastqsanger" metadata_source="input_1" from_work_dir="untrimmed_output*" label="${tool.name} on ${on_string}: Untrimmed Read 1" >
278 <filter>(output_options['untrimmed_file'] is True)</filter> 352 <filter>output_selector and 'untrimmed_file' in output_selector</filter>
279 <expand macro="inherit_format_1" /> 353 <expand macro="inherit_format_1" />
280 </data> 354 </data>
281 <data name="untrimmed_paired_output" format="fastqsanger" metadata_source="input_2" from_work_dir="untrimmed_paired_output*" label="${tool.name} on ${on_string}: Untrimmed Read 2" > 355 <data name="untrimmed_paired_output" format="fastqsanger" metadata_source="input_2" from_work_dir="untrimmed_paired_output*" label="${tool.name} on ${on_string}: Untrimmed Read 2" >
282 <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection')</filter> 356 <filter>library['type'] == 'paired' or library['type'] == 'paired_collection'</filter>
283 <filter>(output_options['untrimmed_file'] is True)</filter> 357 <filter>output_selector and 'untrimmed_file' in output_selector </filter>
284 <expand macro="inherit_format_2" /> 358 <expand macro="inherit_format_2" />
285 </data> 359 </data>
286 360
287 <data name="too_short_output" format="fastqsanger" metadata_source="input_1" from_work_dir="too_short_output*" label="${tool.name} on ${on_string}: Too Short Read 1" > 361 <data name="too_short_output" format="fastqsanger" metadata_source="input_1" from_work_dir="too_short_output*" label="${tool.name} on ${on_string}: Too Short Read 1" >
288 <filter>(output_options['too_short_file'] is True)</filter> 362 <filter>output_selector and 'too_short_file' in output_selector</filter>
289 <expand macro="inherit_format_1" /> 363 <expand macro="inherit_format_1" />
290 </data> 364 </data>
291 <data name="too_short_paired_output" format="fastqsanger" metadata_source="input_2" from_work_dir="too_short_paired_output*" label="${tool.name} on ${on_string}: Too Short Read 2" > 365 <data name="too_short_paired_output" format="fastqsanger" metadata_source="input_2" from_work_dir="too_short_paired_output*" label="${tool.name} on ${on_string}: Too Short Read 2" >
292 <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection')</filter> 366 <filter>library['type'] == 'paired' or library['type'] == 'paired_collection'</filter>
293 <filter>(output_options['too_short_file'] is True)</filter> 367 <filter>output_selector and 'too_short_file' in output_selector</filter>
294 <expand macro="inherit_format_2" /> 368 <expand macro="inherit_format_2" />
295 </data> 369 </data>
296
297 <data name="too_long_output" format="fastqsanger" metadata_source="input_1" from_work_dir="too_long_output*" label="${tool.name} on ${on_string}: Too Long Read 1" > 370 <data name="too_long_output" format="fastqsanger" metadata_source="input_1" from_work_dir="too_long_output*" label="${tool.name} on ${on_string}: Too Long Read 1" >
298 <filter>(output_options['too_long_file'] is True)</filter> 371 <filter>output_selector and 'too_long_file' in output_selector</filter>
299 <expand macro="inherit_format_1" /> 372 <expand macro="inherit_format_1" />
300 </data> 373 </data>
301 <data name="too_long_paired_output" format="fastqsanger" metadata_source="input_2" from_work_dir="too_long_paired_output*" label="${tool.name} on ${on_string}: Too Long Read 2" > 374 <data name="too_long_paired_output" format="fastqsanger" metadata_source="input_2" from_work_dir="too_long_paired_output*" label="${tool.name} on ${on_string}: Too Long Read 2" >
302 <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection')</filter> 375 <filter>library['type'] == 'paired' or library['type'] == 'paired_collection'</filter>
303 <filter>(output_options['too_long_file'] is True)</filter> 376 <filter>output_selector and 'too_long_file' in output_selector</filter>
304 <expand macro="inherit_format_2" /> 377 <expand macro="inherit_format_2" />
305 </data> 378 </data>
306 379
307 <collection name="split_output" type="list" label="${tool.name} on ${on_string}: Split outputs" > 380 <collection name="split_output" type="list" label="${tool.name} on ${on_string}: Split outputs" >
308 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fastq.*)" directory="split" /> 381 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fastq.*)" directory="split" />
309 <filter>(output_options['multiple_output'] is True)</filter> 382 <filter>output_selector and 'multiple_output' in output_selector</filter>
310 </collection> 383 </collection>
311
312 </outputs> 384 </outputs>
313 385
314 <tests> 386 <tests>
315 <!-- Ensure fastq works --> 387 <!-- Ensure fastq works -->
316 <test expect_num_outputs="1"> 388 <test expect_num_outputs="1">
379 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" /> 451 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
380 <param name="adapter_source_list" value="user"/> 452 <param name="adapter_source_list" value="user"/>
381 <param name="adapter" value="TTAGACATATCTCCGTCG"/> 453 <param name="adapter" value="TTAGACATATCTCCGTCG"/>
382 <param name="output_filtering" value="filter"/> 454 <param name="output_filtering" value="filter"/>
383 <section name="filter_options"> 455 <section name="filter_options">
384 <param name="discard" value="True"/> 456 <param name="discard_trimmed" value="True"/>
385 </section> 457 </section>
386 <param name="read_modification" value="none"/> 458 <param name="read_modification" value="none"/>
387 <param name="output_type" value="default"/>
388 <output name="out1" file="cutadapt_discard.out" ftype="fastq"/> 459 <output name="out1" file="cutadapt_discard.out" ftype="fastq"/>
389 <assert_command> 460 <assert_command>
390 <has_text text="--discard-trimmed"/> 461 <has_text text="--discard-trimmed"/>
391 </assert_command> 462 </assert_command>
392 </test> 463 </test>
395 <param name="input_1" ftype="fasta" value="cutadapt_rest.fa" /> 466 <param name="input_1" ftype="fasta" value="cutadapt_rest.fa" />
396 <param name="adapter_source_list" value="user"/> 467 <param name="adapter_source_list" value="user"/>
397 <param name="adapter" value="AAAGATG"/> 468 <param name="adapter" value="AAAGATG"/>
398 <param name="output_filtering" value="default"/> 469 <param name="output_filtering" value="default"/>
399 <param name="read_modification" value="none"/> 470 <param name="read_modification" value="none"/>
400 <param name="output_type" value="additional"/> 471 <param name="output_selector" value="rest_file"/>
401 <param name="rest_file" value="True"/>
402 <output name="out1" file="cutadapt_rest.out" ftype="fasta"/> 472 <output name="out1" file="cutadapt_rest.out" ftype="fasta"/>
403 <output name="rest_output" file="cutadapt_rest2.out" ftype="fasta"/> 473 <output name="rest_output" file="cutadapt_rest2.out" ftype="fasta"/>
404 </test> 474 </test>
405 <!-- Ensure nextseq-trim option works --> 475 <!-- Ensure nextseq-trim option works -->
406 <test expect_num_outputs="1"> 476 <test expect_num_outputs="1">
416 <test expect_num_outputs="3"> 486 <test expect_num_outputs="3">
417 <param name="type" value="single" /> 487 <param name="type" value="single" />
418 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" /> 488 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
419 <param name="adapter_source_list" value="user"/> 489 <param name="adapter_source_list" value="user"/>
420 <param name="adapter" value="AGATCGGAAGAGC"/> 490 <param name="adapter" value="AGATCGGAAGAGC"/>
421 <param name="report" value="True" /> 491 <param name="output_selector" value="report,info_file" />
422 <param name="info_file" value="True" />
423 <output name="out1" value="cutadapt_small.out" ftype="fastq"/> 492 <output name="out1" value="cutadapt_small.out" ftype="fastq"/>
424 <output name="report"> 493 <output name="report">
425 <assert_contents> 494 <assert_contents>
426 <has_text text="Summary"/> 495 <has_text text="Summary"/>
427 </assert_contents> 496 </assert_contents>
449 <param name="front_adapter" value="^AGGTCACT" /> 518 <param name="front_adapter" value="^AGGTCACT" />
450 </conditional> 519 </conditional>
451 </repeat> 520 </repeat>
452 </section> 521 </section>
453 </conditional> 522 </conditional>
454 <param name="report" value="False" /> 523 <param name="output_selector" value="multiple_output" />
455 <param name="info_file" value="False" />
456 <param name="multiple_output" value="True" />
457 <output_collection name="split_output" type="list" count="3"> 524 <output_collection name="split_output" type="list" count="3">
458 <element name="A1" value="A1.fastq" ftype="fastq"> 525 <element name="A1" value="A1.fastq" ftype="fastq">
459 </element> 526 </element>
460 <element name="A2" value="A2.fastq" ftype="fastq"> 527 <element name="A2" value="A2.fastq" ftype="fastq">
461 </element> 528 </element>
475 <param name="front_adapter_file" ftype="fasta" value="barcodes.fasta" /> 542 <param name="front_adapter_file" ftype="fasta" value="barcodes.fasta" />
476 </conditional> 543 </conditional>
477 </repeat> 544 </repeat>
478 </section> 545 </section>
479 </conditional> 546 </conditional>
480 <param name="report" value="False" /> 547 <param name="output_selector" value="multiple_output" />
481 <param name="info_file" value="False" />
482 <param name="multiple_output" value="True" />
483 <output_collection name="split_output" type="list" count="3"> 548 <output_collection name="split_output" type="list" count="3">
484 <element name="A1" decompress="True" file="A1.fastq.gz" ftype="fastq.gz"> 549 <element name="A1" decompress="True" file="A1.fastq.gz" ftype="fastq.gz">
485 </element> 550 </element>
486 <element name="A2" decompress="True" file="A2.fastq.gz" ftype="fastq.gz"> 551 <element name="A2" decompress="True" file="A2.fastq.gz" ftype="fastq.gz">
487 </element> 552 </element>
494 <test expect_num_outputs="2"> 559 <test expect_num_outputs="2">
495 <param name="type" value="single" /> 560 <param name="type" value="single" />
496 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" /> 561 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
497 <param name="adapter_source_list" value="user"/> 562 <param name="adapter_source_list" value="user"/>
498 <param name="adapter" value="AAAT"/> 563 <param name="adapter" value="AAAT"/>
499 <param name="untrimmed_file" value="True" /> 564 <param name="output_selector" value="untrimmed_file" />
500 <output name="out1" file="cutadapt_trimmed.out" ftype="fastq"/> 565 <output name="out1" file="cutadapt_trimmed.out" ftype="fastq"/>
501 <output name="untrimmed_output" file="cutadapt_untrimmed.out" ftype="fastq"/> 566 <output name="untrimmed_output" file="cutadapt_untrimmed.out" ftype="fastq"/>
502 </test> 567 </test>
503 <!-- Ensure untrimmed gzip file output works --> 568 <!-- Ensure untrimmed gzip file output works -->
504 <test expect_num_outputs="2"> 569 <test expect_num_outputs="2">
505 <param name="type" value="single" /> 570 <param name="type" value="single" />
506 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" /> 571 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1.fq.gz" />
507 <param name="adapter_source_list" value="user"/> 572 <param name="adapter_source_list" value="user"/>
508 <param name="adapter" value="AGATCGGAAGAGC"/> 573 <param name="adapter" value="AGATCGGAAGAGC"/>
509 <param name="untrimmed_file" value="True" /> 574 <param name="output_selector" value="untrimmed_file" />
510 <output name="out1" decompress="True" file="cutadapt_trimmed.out.gz" ftype="fastq.gz"/> 575 <output name="out1" decompress="True" file="cutadapt_trimmed.out.gz" ftype="fastq.gz"/>
511 <!-- 576 <!--
512 Do not use the decompress option for this assertion, since it does NOT test that the file is compressed 577 Do not use the decompress option for this assertion, since it does NOT test that the file is compressed
513 See discussion at https://github.com/galaxyproject/galaxy/issues/7671 578 See discussion at https://github.com/galaxyproject/galaxy/issues/7671
514 `delta="4000" is more than the difference between gzip level 1 and gzip level 9, but much less than the 579 `delta="4000" is more than the difference between gzip level 1 and gzip level 9, but much less than the
525 <param name="adapter" value="AGATCGGAAGAGC"/> 590 <param name="adapter" value="AGATCGGAAGAGC"/>
526 <param name="adapter_source_list2" value="user"/> 591 <param name="adapter_source_list2" value="user"/>
527 <param name="adapter2" value="AGATCGGAAGAGC"/> 592 <param name="adapter2" value="AGATCGGAAGAGC"/>
528 <section name="filter_options"> 593 <section name="filter_options">
529 <param name="discard_untrimmed" value="true"/> 594 <param name="discard_untrimmed" value="true"/>
530 <param name="min" value="1"/> 595 <param name="minimun_length" value="1"/>
531 <param name="max" value="1000"/> 596 <param name="maximum_length" value="1000"/>
532 <param name="max_n" value="0"/> 597 <param name="max_n" value="0"/>
533 <param name="pair_filter" value="both"/> 598 <param name="pair_filter" value="both"/>
534 </section> 599 </section>
535 <assert_command> 600 <assert_command>
536 <has_text text="--discard-untrimmed"/> 601 <has_text text="--discard-untrimmed"/>
537 <has_text text="--minimum-length=1"/>
538 <has_text text="--maximum-length=1000"/> 602 <has_text text="--maximum-length=1000"/>
539 <has_text text="--max-n=0"/> 603 <has_text text="--max-n=0"/>
540 <has_text text="--pair-filter=both"/> 604 <has_text text="--pair-filter=both"/>
605 </assert_command>
606 </test>
607 <!-- Test cut option -->
608 <test expect_num_outputs="1">
609 <param name="type" value="single" />
610 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
611 <param name="adapter_source_list" value="user"/>
612 <param name="adapter" value="AGATCGGAAGAGC"/>
613 <section name="read_mod_options">
614 <param name="cut" value="5"/>
615 </section>
616 <output name="out1" file="cutadapt_small_cut.out" ftype="fastq"/>
617 </test>
618 <!-- Test rename options -->
619 <test expect_num_outputs="1">
620 <param name="type" value="single" />
621 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
622 <param name="adapter_source_list" value="user"/>
623 <param name="adapter" value="AGATCGGAAGAGC"/>
624 <section name="read_mod_options">
625 <param name="cut" value="5"/>
626 <param name="rename" value="{id} barcode={cut_prefix}"/>
627 </section>
628 <output name="out1" file="cutadapt_small_rename.out" ftype="fastq">
629 <assert_contents>
630 <has_text text="@prefix:1_13_1259/1 barcode=AGCCG"/>
631 </assert_contents>
632 </output>
633 </test>
634 <!-- Test action options -->
635 <test expect_num_outputs="1">
636 <param name="type" value="single" />
637 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
638 <param name="adapter_source_list" value="user"/>
639 <param name="adapter" value="CGTCCGAANTAG"/>
640 <section name="adapter_options">
641 <param name="action" value="retain"/>
642 </section>
643 <output name="out1" file="cutadapt_action_retain.out" ftype="fastq"/>
644 </test>
645 <test expect_num_outputs="1">
646 <param name="type" value="single" />
647 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
648 <param name="adapter_source_list" value="user"/>
649 <param name="adapter" value="CGTCCGAANTAG"/>
650 <section name="adapter_options">
651 <param name="action" value="mask"/>
652 </section>
653 <output name="out1" file="cutadapt_action_mask.out" ftype="fastq"/>
654 </test>
655 <test expect_num_outputs="1">
656 <param name="type" value="single" />
657 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
658 <param name="adapter_source_list" value="user"/>
659 <param name="adapter" value="CGTCCGAANTAG"/>
660 <section name="adapter_options">
661 <param name="action" value="lowercase"/>
662 </section>
663 <output name="out1" file="cutadapt_action_lowercase.out" ftype="fastq"/>
664 </test>
665 <test expect_num_outputs="1">
666 <param name="type" value="single" />
667 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
668 <param name="adapter_source_list" value="user"/>
669 <param name="adapter" value="CGTCCGAANTAG"/>
670 <section name="adapter_options">
671 <param name="action" value="none"/>
672 </section>
673 <output name="out1" file="cutadapt_action_none.out" ftype="fastq"/>
674 </test>
675 <!-- Test revcomp options -->
676 <test expect_num_outputs="1">
677 <param name="type" value="single" />
678 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
679 <param name="adapter_source_list" value="user"/>
680 <param name="adapter" value="TAAACAGATTAGT"/>
681 <section name="adapter_options">
682 <param name="revcomp" value="true"/>
683 </section>
684 <output name="out1" file="cutadapt_revcomp.out" ftype="fastq"/>
685 </test>
686 <!-- Test minimum lenghth paired-reads -->
687 <test expect_num_outputs="2">
688 <param name="type" value="paired" />
689 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" />
690 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" />
691 <param name="adapter_source_list" value="user"/>
692 <param name="adapter" value="ATCTGGTTCC"/>
693 <param name="adapter_source_list2" value="user"/>
694 <param name="adapter2" value="CTACAAG"/>
695 <section name="filter_options">
696 <param name="minimum_length" value="30"/>
697 <param name="pair_filter" value="both"/>
698 <conditional name="length_R2_options">
699 <param name="length_R2_status" value="True"/>
700 <param name="R2_minimum" value="10"/>
701 </conditional>
702 </section>
703 <output name="out1" decompress="True" file="cutadapt_out1_min_length.fq.gz" ftype="fastq.gz"/>
704 <output name="out2" decompress="True" file="cutadapt_out2_min_length.fq.gz" ftype="fastq.gz"/>
705 <assert_command>
706 <has_text text="--minimum-length=30:10"/>
707 <has_text text="--pair-filter=both"/>
708 </assert_command>
709 </test>
710 <!-- Test maximum lenghth paired-reads -->
711 <test expect_num_outputs="2">
712 <param name="type" value="paired" />
713 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" />
714 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" />
715 <param name="adapter_source_list" value="user"/>
716 <param name="adapter" value="AGATCGGAAGAGC"/>
717 <param name="adapter_source_list2" value="user"/>
718 <param name="adapter2" value="AGATCGGAAGAGC"/>
719 <section name="filter_options">
720 <param name="pair_filter" value="both"/>
721 <param name="maximum_length" value="50"/>
722 <conditional name="length_R2_options">
723 <param name="length_R2_status" value="True"/>
724 <param name="R2_maximum" value="30"/>
725 </conditional>
726 </section>
727 <output name="out1" decompress="True" file="cutadapt_out1_max_length.fq.gz" ftype="fastq.gz"/>
728 <output name="out2" decompress="True" file="cutadapt_out2_max_length.fq.gz" ftype="fastq.gz"/>
729 <assert_command>
730 <has_text text="--maximum-length=50:30"/>
731 <has_text text="--pair-filter=both"/>
732 </assert_command>
733 </test>
734 <!-- Test combination maximum and minimum length paired reads -->
735 <test expect_num_outputs="2">
736 <param name="type" value="paired" />
737 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" />
738 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" />
739 <param name="adapter_source_list" value="user"/>
740 <param name="adapter" value="AGATCGGAAGAGC"/>
741 <param name="adapter_source_list2" value="user"/>
742 <param name="adapter2" value="AGATCGGAAGAGC"/>
743 <section name="filter_options">
744 <param name="pair_filter" value="both"/>
745 <param name="minimum_length" value="10"/>
746 <param name="maximum_length" value="50"/>
747 <conditional name="length_R2_options">
748 <param name="length_R2_status" value="True"/>
749 <param name="R2_maximum" value="30"/>
750 </conditional>
751 </section>
752 <output name="out1" decompress="True" file="cutadapt_out1_max_min_01.fq.gz" ftype="fastq.gz"/>
753 <output name="out2" decompress="True" file="cutadapt_out2_max_min_01.fq.gz" ftype="fastq.gz"/>
754 <assert_command>
755 <has_text text="--minimum-length=10:"/>
756 <has_text text="--maximum-length=50:30"/>
757 <has_text text="--pair-filter=both"/>
758 </assert_command>
759 </test>
760 <test expect_num_outputs="2">
761 <param name="type" value="paired" />
762 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" />
763 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" />
764 <param name="adapter_source_list" value="user"/>
765 <param name="adapter" value="AGATCGGAAGAGC"/>
766 <param name="adapter_source_list2" value="user"/>
767 <param name="adapter2" value="AGATCGGAAGAGC"/>
768 <section name="filter_options">
769 <param name="pair_filter" value="both"/>
770 <param name="minimum_length" value="10"/>
771 <param name="maximum_length" value="50"/>
772 <conditional name="length_R2_options">
773 <param name="length_R2_status" value="True"/>
774 <param name="R2_minimum" value="10"/>
775 </conditional>
776 </section>
777 <output name="out1" decompress="True" file="cutadapt_out1_max_min_02.fq.gz" ftype="fastq.gz"/>
778 <output name="out2" decompress="True" file="cutadapt_out2_max_min_02.fq.gz" ftype="fastq.gz"/>
779 <assert_command>
780 <has_text text="--minimum-length=10:10"/>
781 <has_text text="--maximum-length=50:"/>
782 <has_text text="--pair-filter=both"/>
783 </assert_command>
784 </test>
785 <!-- Test length options -->
786 <test expect_num_outputs="1">
787 <param name="type" value="single" />
788 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
789 <param name="adapter_source_list" value="user"/>
790 <param name="adapter" value="AGCCGCTANGACG"/>
791 <section name="read_mod_options">
792 <conditional name="shorten_options">
793 <param name="shorten_values" value="True"/>
794 <param name="shorten_end" value="3prime"/>
795 <param name="length" value="10"/>
796 </conditional>
797 </section>
798 <output name="out1" file="cutadapt_shorten_3prime.out" ftype="fastq"/>
799 </test>
800 <test expect_num_outputs="1">
801 <param name="type" value="single" />
802 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
803 <param name="adapter_source_list" value="user"/>
804 <param name="adapter" value="AGCCGCTANGACG"/>
805 <section name="read_mod_options">
806 <conditional name="shorten_options">
807 <param name="shorten_values" value="True"/>
808 <param name="shorten_end" value="5prime"/>
809 <param name="length" value="10"/>
810 </conditional>
811 </section>
812 <output name="out1" file="cutadapt_shorten_5prime.out" ftype="fastq"/>
813 </test>
814 <!-- Test max expected errors options -->
815 <test expect_num_outputs="1">
816 <param name="type" value="single" />
817 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
818 <param name="adapter_source_list" value="user"/>
819 <param name="adapter" value="AGCGGCTTAGACG"/>
820 <section name="filter_options">
821 <param name="max_expected_errors" value="10"/>
822 </section>
823 <output name="out1" file="cutadapt_shorten_expected_errors.out" ftype="fastq"/>
824 </test>
825 <!-- Test disallow internal adaptors option -->
826 <test expect_num_outputs="1">
827 <param name="type" value="single" />
828 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
829 <param name="adapter_source_list" value="user"/>
830 <param name="adapter" value="GAANTAGCTACCAC"/>
831 <section name="adapter_options">
832 <param name="internal" value="X"/>
833 </section>
834 <output name="out1" file="cutadapt_shorten_internal_adapters.out" ftype="fastq"/>
835 <assert_command>
836 <has_text text="GAANTAGCTACCACX"/>
837 </assert_command>
838 </test>
839 <test expect_num_outputs="2">
840 <param name="type" value="paired" />
841 <param name="input_1" ftype="fastq.gz" value="bwa-mem-fastq1_assimetric.fq.gz" />
842 <param name="input_2" ftype="fastq.gz" value="bwa-mem-fastq2_assimetric.fq.gz" />
843 <param name="adapter_source_list" value="user"/>
844 <param name="adapter" value="AGATCGGAAGAGC"/>
845 <param name="adapter_source_list2" value="user"/>
846 <param name="adapter2" value="AGATCGGAAGAGC"/>
847 <section name="adapter_options">
848 <param name="internal" value="X"/>
849 </section>
850 <output name="out1" decompress="True" file="cutadapt_out1_internal_adapter.fq.gz" ftype="fastq.gz"/>
851 <output name="out2" decompress="True" file="cutadapt_out2_internal_adapter.fq.gz" ftype="fastq.gz"/>
852 <assert_command>
853 <has_text text="AGATCGGAAGAGCX"/>
854 </assert_command>
855 </test>
856 <test expect_num_outputs="1">
857 <param name="type" value="single" />
858 <param name="input_1" ftype="fastq" value="cutadapt_small.fastq" />
859 <param name="adapter_source_list" value="builtin"/>
860 <param name="adapter" value="TGTAGGCC"/>
861 <section name="adapter_options">
862 <param name="internal" value="X"/>
863 </section>
864 <output name="out1" file="cutadapt_builtin_internal_adapter.out" ftype="fastq"/>
865 <assert_command>
866 <has_text text="TGTAGGCCX"/>
541 </assert_command> 867 </assert_command>
542 </test> 868 </test>
543 </tests> 869 </tests>
544 870
545 <help><![CDATA[ 871 <help><![CDATA[
636 962
637 Cutadapt can output per-adapter statistics if you select to output the report above. 963 Cutadapt can output per-adapter statistics if you select to output the report above.
638 964
639 Example: 965 Example:
640 966
641 *This is cutadapt 1.16 with Python 3.6.4* 967 ::
642 968
643 *Command line parameters: -j 1 --format=fastq -a AGATCGGAAGAGC --info-file=/tmp/tmpX0DlY1/files/000/dataset_21.dat --output=out1.fq --error-rate=0.1 --times=1 --overlap=3 input_f.fastq* 969 This is cutadapt 3.4 with Python 3.9.2
644 *Running on 1 core* 970
645 *Trimming 1 adapter with at most 10.0% errors in single-end mode ...* 971 Command line parameters: -j=1 -a AGATCGGAAGAGC -A AGATCGGAAGAGC --output=out1.fq.gz --paired-output=out2.fq.gz --error-rate=0.1 --times=1
646 *Finished in 0.00 s (1426 us/read; 0.04 M reads/minute).* 972 --overlap=3 --action=trim --minimum-length=30:40 --pair-filter=both --cut=0 bwa-mem-fastq1_assimetric_fq_gz.fq.gz bwa-mem-fastq2_assimetric_fq_gz.fq.gz
647 973
648 *=== Summary ===* 974 Processing reads on 1 core in paired-end mode ...
649 975 Finished in 0.01 s (129 µs/read; 0.46 M reads/minute).
650 * Total reads processed: 3* 976
651 * Reads with adapters: 0 (0.0%)* 977 === Summary ===
652 * Reads written (passing filters): 3 (100.0%)* 978
653 979 Total read pairs processed: 99
654 * Total basepairs processed: 102 bp* 980 Read 1 with adapter: 2 (2.0%)
655 * Total written (filtered): 102 bp (100.0%)* 981 Read 2 with adapter: 4 (4.0%)
656 982 Pairs that were too short: 3 (3.0%)
657 *=== Adapter 1 ===* 983 Pairs written (passing filters): 96 (97.0%)
658 984
659 *Sequence: AGATCGGAAGAGC; Type: regular 3'; Length: 13; Trimmed: 0 times.* 985 Total basepairs processed: 48,291 bp
986 Read 1: 24,147 bp
987 Read 2: 24,144 bp
988 Total written (filtered): 48,171 bp (99.8%)
989 Read 1: 24,090 bp
990 Read 2: 24,081 bp
660 991
661 992
662 **Info file** 993 **Info file**
663 994
664 The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file. 995 The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file.
688 1019
689 When parsing the file, be aware that additional columns may be added in the future. Note also that some fields can be empty, resulting in consecutive tabs within a line. 1020 When parsing the file, be aware that additional columns may be added in the future. Note also that some fields can be empty, resulting in consecutive tabs within a line.
690 1021
691 If the --times option is used and greater than 1, each read can appear more than once in the info file. There will be one line for each found adapter, all with identical read names. Only for the first of those lines will the concatenation of columns 5-7 be identical to the original read sequence (and accordingly for columns 9-11). For subsequent lines, the shown sequence are the ones that were used in subsequent rounds of adapter trimming, that is, they get successively shorter. 1022 If the --times option is used and greater than 1, each read can appear more than once in the info file. There will be one line for each found adapter, all with identical read names. Only for the first of those lines will the concatenation of columns 5-7 be identical to the original read sequence (and accordingly for columns 9-11). For subsequent lines, the shown sequence are the ones that were used in subsequent rounds of adapter trimming, that is, they get successively shorter.
692 1023
1024
1025 --------------------
1026
1027 **Rename Reads**
1028
1029 --------------------
1030
1031
1032 The --rename option expects a template string such as {id} extra_info {adapter_name} as a parameter. It can contain regular text and placeholders that consist of a name enclosed in curly braces ({placeholdername}).
1033
1034 The read name will be set to the template string in which the placeholders are replaced with the actual values relevant for the current read.
1035
1036 The following placeholders are currently available for single-end reads:
1037
1038 * {header} – the full, unchanged header
1039 * {id} – the read ID, that is, the part of the header before the first whitespace
1040 * {comment} – the part of the header after the whitespace following the ID
1041 * {adapter_name} – the name of adapter that was found in this read or no_adapter if there was none adapter match. If you use --times to do multiple rounds of adapter matching, this is the name of the last found adapter.
1042 * {cut_prefix} – the prefix removed by the --cut (or -u) option (that is, when used with a positive length argument)
1043 * {cut_suffix} – the suffix removed by the --cut (or -u) option (that is, when used with a negative length argument)
1044
1045 If the --rename option is used with paired-end data, the template is applied separately to both R1 and R2. That is, for R1, the placeholders are replaced with values from R1, and for R2, the placeholders are replaced with values from R2. For example, {comment} becomes R1’s comment in R1 and it becomes R2’s comment in R2.
1046
1047
693 -------------------- 1048 --------------------
694 1049
695 **More Information** 1050 **More Information**
696 1051
697 -------------------- 1052 --------------------
710 -------------------- 1065 --------------------
711 1066
712 Author: Lance Parsons <lparsons@princeton.edu> 1067 Author: Lance Parsons <lparsons@princeton.edu>
713 1068
714 ]]></help> 1069 ]]></help>
715 1070 <expand macro="citations" />
716 <citations>
717 <citation type="bibtex">
718 @article{marcel_cutadapt_2011,
719 title = {Cutadapt removes adapter sequences from high-throughput sequencing reads},
720 volume = {17},
721 copyright = {Authors who publish with this journal agree to the following terms: Authors retain copyright and grant the journal right of first publication with the work simultaneously licensed under a Creative Commons Attribution License that allows others to share the work with an acknowledgement of the work's authorship and initial publication in this journal. Authors are able to enter into separate, additional contractual arrangements for the non-exclusive distribution of the journal's published version of the work (e.g., post it to an institutional repository or publish it in a book), with an acknowledgement of its initial publication in this journal. Authors are permitted and encouraged to post their work online (e.g., in institutional repositories or on their website) prior to and during the submission process, as it can lead to productive exchanges, as well as earlier and greater citation of published work (See The Effect of Open Access ).},
722 url = {http://journal.embnet.org/index.php/embnetjournal/article/view/200},
723 abstract = {When small RNA is sequenced on current sequencing machines, the resulting reads are usually longer than the RNA and therefore contain parts of the 3' adapter. That adapter must be found and removed error-tolerantly from each read before read mapping. Previous solutions are either hard to use or do not offer required features, in particular support for color space data. As an easy to use alternative, we developed the command-line tool cutadapt, which supports 454, Illumina and SOLiD (color space) data, offers two adapter trimming algorithms, and has other useful features.
724
725 Cutadapt, including its MIT-licensed source code, is available for download at http://code.google.com/p/cutadapt/},
726 number = {1},
727 urldate = {2011-08-02},
728 journal = {EMBnet.journal},
729 author = {Marcel, Martin},
730 year = {2011},
731 note = {When small RNA is sequenced on current sequencing machines, the resulting reads are usually longer than the RNA and therefore contain parts of the 3' adapter. That adapter must be found and removed error-tolerantly from each read before read mapping. Previous solutions are either hard to use or do not offer required features, in particular support for color space data. As an easy to use alternative, we developed the command-line tool cutadapt, which supports 454, Illumina and SOLiD (color space) data, offers two adapter trimming algorithms, and has other useful features. Cutadapt, including its MIT-licensed source code, is available for download at http://code.google.com/p/cutadapt/},
732 keywords = {Adapter removal;, fastq, MicroRNA, Sequencing, Small RNA, software},
733 file = {Cutadapt removes adapter sequences from high-throughput sequencing reads | Martin | EMBnet.journal:/Users/lparsons/Library/Application Support/Firefox/Profiles/thd2t4je.default/zotero/storage/ZXZT4PSE/200.html:text/html}
734 }
735 </citation>
736 </citations>
737
738 </tool> 1071 </tool>