comparison macros.xml @ 15:27ac32a22ad2 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
author iuc
date Mon, 13 Sep 2021 14:52:06 +0000
parents d5ff68d2d5ff
children 7accf7407811
comparison
equal deleted inserted replaced
14:9fa7803d1c51 15:27ac32a22ad2
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 <macros> 2 <macros>
3
4 <!-- macros applying to all umi_tools -->
5
6 <token name="@TOOL_VERSION@">1.1.2</token>
7 <token name="@VERSION_SUFFIX@">0</token>
8 <token name="@PROFILE@">21.01</token>
9 <xml name="requirements">
10 <requirements>
11 <requirement type="package" version="@TOOL_VERSION@">umi_tools</requirement>
12 <yield />
13 </requirements>
14 </xml>
15 <xml name="citations">
16 <citations>
17 <citation type="doi">10.1101/gr.209601.116</citation>
18 <citation type="bibtex">
19 @misc{githubUMI-tools,
20 title = {UMI-tools},
21 publisher = {GitHub},
22 journal = {GitHub repository},
23 url = {https://github.com/CGATOxford/UMI-tools},
24 }
25 </citation>
26 </citations>
27 </xml>
28 <xml name="advanced_options_macro">
29 <section name="advanced" title="Extra parameters" expanded="false">
30 <param argument="--random-seed" type="integer" min="0" optional="true" label="Random Seed" />
31 </section>
32 </xml>
33 <token name="@ADVANCED_OPTIONS@"><![CDATA[
34 #if str($advanced.random_seed) != ''
35 --random-seed='$advanced.random_seed'
36 #end if
37 ]]></token>
38
39 <!-- macros for extract and whitelist-->
40
3 <macro name="barcode_sanitizer" > 41 <macro name="barcode_sanitizer" >
4 <sanitizer invalid_char=""> 42 <sanitizer invalid_char="">
5 <valid initial="string.letters,string.digits"> 43 <valid initial="string.letters,string.digits">
6 <add value="&#42;" /><!-- asterisk --> 44 <add value="&#42;" /><!-- asterisk -->
7 <add value="&#44;" /><!-- comma --> 45 <add value="&#44;" /><!-- comma -->
21 <add value="-"/> 59 <add value="-"/>
22 <add value="!"/> 60 <add value="!"/>
23 </valid> 61 </valid>
24 </sanitizer> 62 </sanitizer>
25 </macro> 63 </macro>
26 <macro name="barcode2_conditional" > 64 <xml name="sanitize_tag" >
27 <conditional name="barcode"> 65 <sanitizer invalid_char="">
28 <param name="barcode_select" argument="--split-barcode" type="select" label="Barcode on both reads?"> 66 <valid initial="string.letters,string.digits" />
29 <option value="first_read_only">Barcode on first read only</option> 67 </sanitizer>
30 <option value="both_reads">Barcode on both reads</option> 68 </xml>
31 </param> 69 <macro name="barcode1_macro" >
32 <when value="first_read_only"/> 70 <param argument="--bc-pattern" type="text" label="Barcode pattern for first read"
33 <when value="both_reads"> 71 help="Use this option to specify the format of the UMI/barcode. Use Ns to
34 <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read" 72 represent the random positions and Xs to indicate the bc positions.
35 help="Use this option to specify the format of the UMI/barcode for 73 Bases with Ns will be extracted and added to the read name. Remaining
36 the second read pair if required." > 74 bases, marked with an X will be reattached to the read">
37 <expand macro="barcode_sanitizer" /> 75 <validator type="empty_field" />
38 </param> 76 <expand macro="barcode_sanitizer" />
39 </when> 77 </param>
40 </conditional>
41 </macro> 78 </macro>
79 <macro name="barcode2_macro" >
80 <param argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"
81 help="Use this option to specify the format of the UMI/barcode for
82 the second read pair if required" >
83 <expand macro="barcode_sanitizer" />
84 </param>
85 </macro>
86 <!-- not just fastq because this would allow also fastqcsanger -->
87 <token name="@FASTQ_FORMATS@">fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz,fastqsolexa,fastqsolexa.gz</token>
88 <xml name="bio_tools">
89 <xrefs>
90 <xref type="bio.tools">umi-tools</xref>
91 </xrefs>
92 </xml>
42 <xml name="input_types"> 93 <xml name="input_types">
43 <conditional name="input_type"> 94 <conditional name="input_type_cond">
44 <param name="type" type="select" label="Library type"> 95 <param name="input_type" type="select" label="Library type">
45 <option value="single">Single-end</option> 96 <option value="single">Single-end</option>
46 <option value="paired">Paired-end</option> 97 <option value="paired">Paired-end</option>
47 <option value="paired_collection">Paired-end Dataset Collection</option> 98 <option value="paired_collection">Paired-end Dataset Collection</option>
48 </param> 99 </param>
49 <when value="single"> 100 <when value="single">
50 <param name="input_single" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" /> 101 <param name="input_read1" type="data" format="@FASTQ_FORMATS@" label="Reads in FASTQ format" />
102 <expand macro="barcode1_macro"/>
51 </when> 103 </when>
52 <when value="paired"> 104 <when value="paired">
53 <param name="input_read1" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" /> 105 <param name="input_read1" type="data" format="@FASTQ_FORMATS@" label="Reads in FASTQ format" />
54 <param name="input_read2" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" /> 106 <param name="input_read2" type="data" format="@FASTQ_FORMATS@" label="Reads in FASTQ format" />
55 <expand macro="barcode2_conditional" /> 107 <expand macro="barcode1_macro"/>
108 <expand macro="barcode2_macro"/>
109 <yield/>
56 </when> 110 </when>
57 <when value="paired_collection"> 111 <when value="paired_collection">
58 <param name="input_readpair" type="data_collection" collection_type="paired" format="fastq,fastq.gz" label="Reads in FASTQ format" /> 112 <param name="input_readpair" type="data_collection" collection_type="paired" format="@FASTQ_FORMATS@" label="Reads in FASTQ format" />
59 <expand macro="barcode2_conditional" /> 113 <expand macro="barcode1_macro"/>
114 <expand macro="barcode2_macro"/>
115 <yield/>
60 </when> 116 </when>
61 </conditional> 117 </conditional>
62 </xml> 118 </xml>
63 <xml name="citations">
64 <citations>
65 <citation type="doi">10.1101/gr.209601.116</citation>
66 <citation type="bibtex">
67 @misc{githubUMI-tools,
68 title = {UMI-tools},
69 publisher = {GitHub},
70 journal = {GitHub repository},
71 url = {https://github.com/CGATOxford/UMI-tools},
72 }
73 </citation>
74 </citations>
75 </xml>
76 <xml name="requirements">
77 <requirements>
78 <requirement type="package" version="@VERSION@">umi_tools</requirement>
79 <yield />
80 </requirements>
81 </xml>
82 <token name="@VERSION@">0.5.5</token>
83 <token name="@COMMAND_LINK@"><![CDATA[ 119 <token name="@COMMAND_LINK@"><![CDATA[
84 #set $gz = False 120 #set $gz = False
85 #if $input_type.type == 'single': 121 #if $input_type_cond.input_type == 'single':
86 #if $input_type.input_single.is_of_type("fastq.gz", "fastqsanger.gz"): 122 #if $input_type_cond.input_read1.is_of_type("fastq.gz", "fastqsanger.gz"):
87 ln -s '$input_type.input_single' input_single.gz && 123 ln -s '$input_type_cond.input_read1' input_single.gz &&
88 #set $gz = True 124 #set $gz = True
89 #else 125 #else
90 ln -s '$input_type.input_single' input_single.txt && 126 ln -s '$input_type_cond.input_read1' input_single.txt &&
91 #end if 127 #end if
92 #elif $input_type.type == 'paired': 128 #elif $input_type_cond.input_type == 'paired':
93 #if $input_type.input_read1.is_of_type("fastq.gz", "fastqsanger.gz"): 129 #if $input_type_cond.input_read1.is_of_type("fastq.gz", "fastqsanger.gz"):
94 ln -s '$input_type.input_read1' input_read1.gz && 130 ln -s '$input_type_cond.input_read1' input_read1.gz &&
95 ln -s '$input_type.input_read2' input_read2.gz && 131 ln -s '$input_type_cond.input_read2' input_read2.gz &&
96 #set $gz = True 132 #set $gz = True
97 #else 133 #else
98 ln -s '$input_type.input_read1' input_read1.txt && 134 ln -s '$input_type_cond.input_read1' input_read1.txt &&
99 ln -s '$input_type.input_read2' input_read2.txt && 135 ln -s '$input_type_cond.input_read2' input_read2.txt &&
100 #end if 136 #end if
101 #else ## paired_collection 137 #else ## paired_collection
102 #if $input_type.input_readpair.forward.is_of_type("fastq.gz", "fastqsanger.gz"): 138 #if $input_type_cond.input_readpair.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
103 ln -s '$input_type.input_readpair.forward' input_read1.gz && 139 ln -s '$input_type_cond.input_readpair.forward' input_read1.gz &&
104 ln -s '$input_type.input_readpair.reverse' input_read2.gz && 140 ln -s '$input_type_cond.input_readpair.reverse' input_read2.gz &&
105 #set $gz = True 141 #set $gz = True
106 #else 142 #else
107 ln -s '$input_type.input_readpair.forward' input_read1.txt && 143 ln -s '$input_type_cond.input_readpair.forward' input_read1.txt &&
108 ln -s '$input_type.input_readpair.reverse' input_read2.txt && 144 ln -s '$input_type_cond.input_readpair.reverse' input_read2.txt &&
109 #end if 145 #end if
110 #end if 146 #end if
111 ]]></token> 147 ]]></token>
148
149 <!-- macros for count, dedup, and group -->
150
151 <token name="@LINK_SAM_BAM_INPUT@"><![CDATA[
152 #if $input.is_of_type("sam"):
153 ## TODO dedup has problems with SAM input in some cases
154 ## https://github.com/CGATOxford/UMI-tools/issues/483
155 ## so convert it to sorted BAM for now
156 ## #set $input_file = $input
157 samtools sort --no-PG '$input' > 'input.bam' &&
158 samtools index -b 'input.bam' &&
159 #set $input_file = 'input.bam'
160 #else:
161 ln -sf '${input}' 'input.bam' &&
162 ln -sf '$input.metadata.bam_index' 'input.bam.bai' &&
163 #set $input_file = 'input.bam'
164 #end if
165 ]]></token>
166 <token name="@SET_INPUT_TYPE@"><![CDATA[
167 ## TODO see comment in LINK_SAM_BAM_INPUT
168 ## #if $input.is_of_type("sam"):
169 ## --in-sam
170 ## #end if
171 ]]></token>
172
173 <xml name="fastq_barcode_extraction_options_macro">
174 <conditional name="extract_method_cond">
175 <param argument="--extract-method" type="select" label="Barcode Extraction Method"
176 help="If bracketed expressions are used in the above barcode pattern, then set this to 'regex'. Otherwise leave as 'string'" >
177 <option value="string" selected="true" />
178 <option value="regex" />
179 </param>
180 <when value="string">
181 <param argument="--3prime" name="prime3" type="boolean" label="Is barcode on 3' end of the read?"
182 truevalue="--3prime" falsevalue=""
183 help="By default the barcode is assumed to be on the 5' end of the read, but
184 use this option to specify that it is on the 3' end instead.
185 This option only works with ``--extract-method=string``
186 since 3' encoding can be specified explicitly with a regex, e.g
187 ``.*(?P&lt;umi_1&gt;.{5})$``" />
188 </when>
189 <when value="regex">
190 <param name="filtered_out_bool" type="boolean" label="Write out reads not matching regex pattern"/>
191 </when>
192 </conditional>
193 <param argument="--ignore-read-pair-suffixes" type="boolean" truevalue="--ignore-read-pair-suffixes" falsevalue="" label="Ignore '\1' and '\2' read name suffixes"/>
194 </xml>
195 <token name="@FASTQ_BARCODE_EXTRACTION_OPTIONS@"><![CDATA[
196 ## fastq barcode extraction options:
197 --extract-method='$extract_method_cond.extract_method'
198 --bc-pattern='$input_type_cond.bc_pattern'
199 #if $input_type_cond.input_type != 'single' and $input_type_cond.bc_pattern2 != ''
200 --bc-pattern2='$input_type_cond.bc_pattern2'
201 #end if
202 #if $extract_method_cond.extract_method == 'string'
203 $extract_method_cond.prime3
204 #else if $extract_method_cond.filtered_out_bool
205 #if $input_type_cond.input_type == 'single':
206 --filtered-out='$filtered_out'
207 #else if $input_type_cond.input_type == 'paired':
208 --filtered-out='$filtered_out'
209 --filtered-out2='$filtered_out_paired'
210 #else
211 --filtered-out='$filtered_out_paired_collection.forward'
212 --filtered-out2='$filtered_out_paired_collection.reverse'
213 #end if
214 #end if
215 $ignore_read_pair_suffixes
216 ]]></token>
217 <token name="@FASTQ_BARCODE_EXTRACTION_HELP@"><![CDATA[
218 There are two methods enabled to extract the umi barcode (+/-
219 cell barcode). For both methods, the patterns should be provided
220 using the ``--bc-pattern`` and ``--bc-pattern2`` options.x
221
222 - ``string``
223 This should be used where the barcodes are always in the same
224 place in the read.
225
226 - N = UMI position (required)
227 - C = cell barcode position (optional)
228 - X = sample position (optional)
229
230 Bases with Ns and Cs will be extracted and added to the read
231 name. The corresponding sequence qualities will be removed from
232 the read. Bases with an X will be reattached to the read.
233
234 E.g. If the pattern is `NNNNCC`,
235 Then the read::
236
237 @HISEQ:87:00000000 read1
238 AAGGTTGCTGATTGGATGGGCTAG
239 +
240 DA1AEBFGGCG01DFH00B1FF0B
241
242 will become::
243
244 @HISEQ:87:00000000_TT_AAGG read1
245 GCTGATTGGATGGGCTAG
246 +
247 1AFGGCG01DFH00B1FF0B
248
249 where 'TT' is the cell barcode and 'AAGG' is the UMI.
250
251 - ``regex``
252 This method allows for more flexible barcode extraction and
253 should be used where the cell barcodes are variable in
254 length. Alternatively, the regex option can also be used to
255 filter out reads which do not contain an expected adapter
256 sequence. UMI-tools uses the regex module rather than the more
257 standard re module since the former also enables fuzzy matching
258
259 The regex must contain groups to define how the barcodes are
260 encoded in the read. The expected groups in the regex are:
261
262 umi_n = UMI positions, where n can be any value (required)
263 cell_n = cell barcode positions, where n can be any value (optional)
264 discard_n = positions to discard, where n can be any value (optional)
265
266 UMI positions and cell barcode positions will be extracted and
267 added to the read name. The corresponding sequence qualities
268 will be removed from the read.
269
270 Discard bases and the corresponding quality scores will be
271 removed from the read. All bases matched by other groups or
272 components of the regex will be reattached to the read sequence
273
274 For example, the following regex can be used to extract reads
275 from the Klein et al inDrop data::
276
277 (?P<cell_1>.{8,12})(?P<discard_1>GAGTGATTGCTTGTGACGCCTT)(?P<cell_2>.{8})(?P<umi_1>.{6})T{3}.*
278
279 Where only reads with a 3' T-tail and `GAGTGATTGCTTGTGACGCCTT` in
280 the correct position to yield two cell barcodes of 8-12 and 8bp
281 respectively, and a 6bp UMI will be retained.
282
283 You can also specify fuzzy matching to allow errors. For example if
284 the discard group above was specified as below this would enable
285 matches with up to 2 errors in the discard_1 group.
286
287 ::
288
289 (?P<discard_1>GAGTGATTGCTTGTGACGCCTT){s<=2}
290
291 Note that all UMIs must be the same length for downstream
292 processing with dedup, group or count commands]]></token>
293
294 <xml name="barcode_options_macro">
295 <conditional name="bc" >
296 <param argument="--extract-umi-method" type="select" label="Umi Extract Method" help="How are the barcodes encoded in the read?" >
297 <option value="read_id" selected="true">Barcodes are contained at the end of the read seperated by a delimiter</option>
298 <option value="tag" >Barcodes are contained in tags</option>
299 <option value="umis" >Barcodes were extracted using umis</option>
300 </param>
301 <when value="read_id" >
302 <param argument="--umi-separator" type="text" label="Delimiter between read id and the UMI" value="_" >
303 <sanitizer invalid_char="" >
304 <valid initial="string.punctuation" />
305 </sanitizer>
306 </param>
307 </when>
308 <when value="tag" >
309 <param argument="--umi-tag" type="text" label="Tag which contains the UMI" value="RX" >
310 <expand macro="sanitize_tag" />
311 </param>
312 <param argument="--umi-tag-split" type="text" label="Separate the UMI in tag by SPLIT" help="and take the first element"/>
313 <param argument="--umi-tag-delimiter" type="text" label="Separate the UMI in tag by DELIMITER" help="and concatenate the elements"/>
314 <param argument="--cell-tag" type="text" label="Tag which contains the cell barcode" >
315 <expand macro="sanitize_tag" />
316 </param>
317 <param argument="--cell-tag-split" type="text" label="Separate the cell barcode in tag by SPLIT" help="and take the first element"/>
318 <param argument="--cell-tag-delimiter" type="text" label="Separate the cell barcode in tag by DELIMITER" help="and concatenate the elements"/>
319 </when>
320 <when value="umis"/>
321 </conditional>
322 </xml>
323 <token name="@BARCODE_OPTIONS@"><![CDATA[
324 --extract-umi-method $bc.extract_umi_method
325 #if str($bc.extract_umi_method) == 'read_id':
326 --umi-separator '$bc.umi_separator'
327 #else if str($bc.extract_umi_method) == 'tag':
328 --umi-tag '$bc.umi_tag'
329 #if $bc.umi_tag_split != ''
330 --umi-tag-split '$bc.umi_tag_split'
331 #end if
332 #if $bc.umi_tag_delimiter != ''
333 --umi-tag-delimiter '$bc.umi_tag_delimiter'
334 #end if
335 --cell-tag '$bc.cell_tag'
336 #if $bc.cell_tag_split != ''
337 --cell-tag-split '$bc.cell_tag_split'
338 #end if
339 #if $bc.cell_tag_delimiter != ''
340 --cell-tag-delimiter '$bc.cell_tag_delimiter'
341 #end if
342 #end if
343 ]]></token>
344 <token name="@BARCODE_HELP@"><![CDATA[
345 Extracting barcodes
346 -------------------
347
348 It is assumed that the FASTQ files were processed with ``umi_tools
349 extract`` before mapping and thus the UMI is the last word of the read
350 name. e.g:
351
352 @HISEQ:87:00000000_AATT
353
354 where ``AATT`` is the UMI sequeuence.
355
356 If you have used an alternative method which does not separate the
357 read id and UMI with a "_", such as bcl2fastq which uses ":", you can
358 specify the separator with the option ``--umi-separator=<sep>``,
359 replacing <sep> with e.g ":".
360
361 Alternatively, if your UMIs are encoded in a tag, you can specify this
362 by setting the option --extract-umi-method=tag and set the tag name
363 with the --umi-tag option. For example, if your UMIs are encoded in
364 the 'UM' tag, provide the following options:
365 ``--extract-umi-method=tag`` ``--umi-tag=UM``
366
367 Finally, if you have used umis to extract the UMI +/- cell barcode,
368 you can specify ``--extract-umi-method=umis``
369
370 The start position of a read is considered to be the start of its alignment
371 minus any soft clipped bases. A read aligned at position 500 with
372 cigar 2S98M will be assumed to start at position 498.]]></token>
373
374
375 <xml name="umi_grouping_options_macro">
376 <section name="umi" title="UMI grouping options">
377 <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads" help="All methods start by identifying the reads with the same mapping position">
378 <option value="unique">Reads group share the exact same UMI</option>
379 <option value="percentile">Reads group share the exact same UMI. UMIs with counts less than 1% of the median counts for UMIs at the same position are ignored</option>
380 <option value="cluster">Identify clusters based on hamming distance</option>
381 <option value="adjacency">Identify clusters based on hamming distance and resolve networks by using the node counts</option>
382 <option value="directional">Identify clusters based on distance and counts, restrict network expansion by threshold</option>
383 </param>
384 <param argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (&gt;14bp)" />
385 <param argument="--spliced-is-unique" type="boolean" truevalue="--spliced-is-unique" falsevalue="" label="Spliced reads are unique" help="Causes two reads that start in the same position on the same strand and having the same UMI to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation to test for splicing)" />
386 <param argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced" />
387 <param argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" />
388 </section>
389 </xml>
390 <token name="@UMI_GROUPING_OPTIONS@"><![CDATA[
391 --method $umi.method
392 --edit-distance-threshold $umi.edit_distance_threshold
393 $umi.spliced_is_unique
394 --soft-clip-threshold $umi.soft_clip_threshold
395 $umi.read_length
396 ]]></token>
397 <token name="@UMI_GROUPING_HELP@"><![CDATA[
398 UMI grouping options
399 --------------------
400
401 Grouping Method
402 ...............
403
404 What method to use to identify group of reads with the same (or
405 similar) UMI(s)?
406
407 All methods start by identifying the reads with the same mapping position.
408
409 The simplest methods, unique and percentile, group reads with
410 the exact same UMI. The network-based methods, cluster, adjacency and
411 directional, build networks where nodes are UMIs and edges connect UMIs
412 with an edit distance <= threshold (usually 1). The groups of reads
413 are then defined from the network in a method-specific manner. For all
414 the network-based methods, each read group is equivalent to one read
415 count for the gene.
416
417 - unique
418 Reads group share the exact same UMI
419
420 - percentile
421 Reads group share the exact same UMI. UMIs with counts < 1% of the
422 median counts for UMIs at the same position are ignored.
423
424 - cluster
425 Identify clusters of connected UMIs (based on hamming distance
426 threshold). Each network is a read group
427
428 - adjacency
429 Cluster UMIs as above. For each cluster, select the node (UMI)
430 with the highest counts. Visit all nodes one edge away. If all
431 nodes have been visited, stop. Otherwise, repeat with remaining
432 nodes until all nodes have been visted. Each step
433 defines a read group.
434
435 - directional (default)
436 Identify clusters of connected UMIs (based on hamming distance
437 threshold) and umi A counts >= (2* umi B counts) - 1. Each
438 network is a read group.
439
440 ]]></token>
441
442 <xml name="sambam_options_macro">
443 <section name="sambam" title="SAM/BAM options">
444 <param argument="--mapping-quality" type="integer" value="0" label="Minimum mapping quality for a read to be retained"/>
445 <param argument="--unmapped-reads" type="select" label="How to handle unmapped reads">
446 <option value="discard">discard</option>
447 <option value="use">use</option>
448 <option value="correct">correct</option>
449 </param>
450 <param argument="--chimeric-pairs" type="select" optional="true" label="How to handle chimeric read pairs (default: use)">
451 <option value="discard">discard</option>
452 <option value="use">use</option>
453 <option value="correct">correct</option>
454 </param>
455 <param argument="--unpaired-reads" type="select" optional="true" label="How to handle unpaired reads (default: use)">
456 <option value="discard">discard</option>
457 <option value="use">use</option>
458 <option value="correct">correct</option>
459 </param>
460 <param argument="--ignore-umi" type="boolean" truevalue="--ignore-umi" falsevalue="" label="Ignore UMI and dedup only on position"/>
461 <param argument="--ignore-tlen" type="boolean" truevalue="--ignore-tlen" falsevalue="" label="Dedup paired end reads based solely on read1" help="whether or not the template length is the same"/>
462 <param argument="--chrom" type="text" value="" label="Consider only chromosome" help="If a value is given only a single chromosome with the given name is considered"/>
463 <param argument="--subset" type="float" min="0.0" max="1.0" value="1.0" label="Only consider a random selection of the reads" />
464 <!--in-sam is set automatically-->
465 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates" />
466 </section>
467 </xml>
468 <token name="@SAMBAM_OPTIONS@"><![CDATA[
469 --mapping-quality $sambam.mapping_quality
470 --unmapped-reads $sambam.unmapped_reads
471 #if $sambam.chimeric_pairs
472 --chimeric-pairs $sambam.chimeric_pairs
473 #end if
474 #if $sambam.unpaired_reads
475 --unpaired-reads $sambam.unpaired_reads
476 #end if
477 $sambam.ignore_umi
478 $sambam.ignore_tlen
479 #if str($sambam.chrom) != ''
480 --chrom '$sambam.chrom'
481 #end if
482 --subset $sambam.subset
483 $sambam.paired
484 @SET_INPUT_TYPE@
485 ]]></token>
486
487 <!-- per-gene is hard coded in count https://github.com/CGATOxford/UMI-tools/blob/c3ead0792ad590822ca72239ef01b8e559802da9/umi_tools/count.py#L92
488 hence we need a specialized macro here
489 TODO count used XF as default for gene-tag now I set it explicitly for the tests but we could as well parametrize the macro and set tool specific defaults
490 -->
491
492 <xml name="fullsc_options_macro">
493 <expand macro="sc_options_macro">
494 <param argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene"
495 help="Must combine with either --gene-tag or --per-contig. As for --per-contig except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file" />
496 </expand>
497 </xml>
498 <token name="@FULLSC_OPTIONS@"><![CDATA[
499 $sc.per_gene
500 @SC_OPTIONS@
501 ]]></token>
502
503 <xml name="sc_options_macro">
504 <section name="sc" title="Single-cell RNA-Seq options">
505 <yield/>
506 <param argument="--gene-tag" type="text" optional="true" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file">
507 <expand macro="sanitize_tag" />
508 </param>
509 <param argument="--assigned-status-tag" type="text" optional="true" label="Bam tag describing whether read is assigned to a gene" help="By default, this is set as the same tag as --gene-tag">
510 <expand macro="sanitize_tag" />
511 </param>
512 <param argument="--skip-tags-regex" name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" >
513 <expand macro="barcode_sanitizer" />
514 </param>
515 <param argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" />
516 <param argument="--gene-transcript-map" type="data" format="tabular" optional="true" label="Tabular file mapping genes to transripts" />
517 <param argument="--per-cell" name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" label="Group reads only if they have the same cell barcode" />
518 </section>
519 </xml>
520 <token name="@SC_OPTIONS@"><![CDATA[
521 #if str($sc.gene_tag) != "":
522 --gene-tag '$sc.gene_tag'
523 #end if
524 #if str($sc.assigned_status_tag) != "":
525 --assigned-status-tag '$sc.assigned_status_tag'
526 #end if
527 #if str($sc.skip_tags_regex) != "":
528 --skip-tags-regex '$sc.skip_tags_regex'
529 #end if
530 $sc.per_contig
531 #if $sc.gene_transcript_map:
532 --gene-transcript-map '$sc.gene_transcript_map'
533 #end if
534 $sc.per_cell
535 ]]></token>
536
537 <xml name="groupdedup_options_macro">
538 <section name="gd" title="group/dedup specific options">
539 <param argument="--buffer-whole-contig" type="boolean" truevalue="--buffer-whole-contig" falsevalue="" label="Read whole contig before outputting bundles" help="Guarantees that no reads are missed, but increases memory usage" />
540 <!-- TODO this option is hidden on the CLI. Should we expose it? -->
541 <param argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" />
542 <param argument="--multimapping-detection-method" type="select" optional="true" label="BAM Tag indicating multimapping " help="Some aligners identify multimapping using bam tags. Setting this option to NH, X0 or XT will use these tags when selecting the best read amongst reads with the same position and umi">
543 <option value="NH">NH</option>
544 <option value="X0">X0</option>
545 <option value="XT">XT</option>
546 </param>
547 </section>
548 </xml>
549 <token name="@GROUPDEDUP_OPTIONS@"><![CDATA[
550 $gd.buffer_whole_contig
551 $gd.whole_contig
552 $gd.multimapping_detection_method
553 ]]></token>
554
555 <xml name="log_input_macro">
556 <param argument="--log" type="boolean" label="Output log?" truevalue="--log" falsevalue="" help="Choose if you want to generate a text file containing logging information" />
557 </xml>
558 <xml name="log_output_macro">
559 <data name="out_log" format="txt" label="${tool.name} on ${on_string}: logfile" >
560 <filter>log</filter>
561 </data>
562 </xml>
563 <token name="@LOG@"><![CDATA[
564 #if $log:
565 --log='$out_log'
566 #end if
567 --log2stderr
568 ]]></token>
569
112 </macros> 570 </macros>