comparison umi-tools_counts.xml @ 0:8db56d2f8b72 draft

planemo upload commit c79a5f4a05156bb2a6035a844aa9ad8f0e59ecb5
author iuc
date Thu, 21 Jun 2018 15:20:14 -0400
parents
children 3c932ad4a174
comparison
equal deleted inserted replaced
-1:000000000000 0:8db56d2f8b72
1 <tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.0">
2 <description>Count UMIs from BAM files</description>
3 <macros>
4 <import>macros.xml</import>
5 <xml name="sanitize_tag" >
6 <sanitizer invalid_char="">
7 <valid initial="string.letters,string.digits" />
8 </sanitizer>
9 </xml>
10 </macros>
11 <expand macro="requirements" />
12 <command detect_errors="exit_code"><![CDATA[
13
14 ln -s '${input_bam}' 'input.bam' &&
15 ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' &&
16
17 umi_tools count
18 -I input.bam
19 '$bam_paired'
20 --extract-umi-method='$barcodes.extract_umi_method.value'
21 #if $barcodes.extract_umi_method == 'read_id':
22 --umi-separator='$barcodes.delimiter'
23 #else if $barcodes.extract_umi_method == 'tag':
24 --umi-tag='$barcodes.umi_tag'
25 --cell-tag='$barcodes.cell_tag'
26 #end if
27 --method='$grouping_method.value'
28 --edit-distance-threshold='$hamming_distance'
29 --mapping-quality='$advanced.mapping_quality'
30 --per-gene
31 $wide_format_cell_counts
32 $advanced.per_contig
33 '$advanced.per_cell'
34 #if $advanced.gene_tag:
35 --gene-tag='$advanced.gene_tag'
36 #end if
37 #if $advanced.skip_tags_regex.value:
38 --skip-tags-regex='$advanced.skip_tags_regex'
39 #end if
40 #if $advanced.random_seed != 0:
41 --random-seed='$advanced.random_seed'
42 #end if
43 -S '$out_counts'
44 -L '$out_log'
45 ]]></command>
46 <inputs>
47 <param name="input_bam" type="data" format="bam" label="Sorted BAM file" help="Please use the samtools sort tool to ensure a correct BAM input" />
48
49 <param name="bam_paired" type="boolean" truevalue="--paired" falsevalue="" checked="false"
50 label="Bam is paired-end"
51 help="both read pairs will be output. This will also force the use of the template length to determine
52 reads with the same mapping coordinates." />
53
54 <conditional name="barcodes" >
55 <param name="extract_umi_method" type="select" label="Umi Extract Method" help="How are the barcodes encoded in the read?" >
56 <option value="read_id" selected="true">Barcodes are contained at the end of the read seperated by a delimiter</option>
57 <option value="tag" >Barcodes are contained in tags</option>
58 <option value="umis" >Barcodes were extracted using umis</option>
59 </param>
60 <when value="read_id" >
61 <param name="delimiter" type="text" label="Delimiter between read id and the UMI" value="_" >
62 <expand macro="sanitize_tag" />
63 </param>
64 </when>
65 <when value="tag" >
66 <param name="umi_tag" type="text" label="Tag which contains the UMI" >
67 <expand macro="sanitize_tag" />
68 </param>
69 <param name="cell_tag" type="text" label="Tag which contains the cell barcode" >
70 <expand macro="sanitize_tag" />
71 </param>
72 </when>
73 <when value="umis"></when>
74 </conditional>
75
76 <param name="grouping_method" type="select" label="Method to identify group of reads" help="UMIs with the same (or similar) codes can be grouped together. The simplest methods 'unique' and 'percentile' group identical
77 UMIs, however 'cluster', 'adjacency', and 'directional' can group similar umis with edit distances less than some threshold. Unique: Reads group share the exact same UMI. Percentile: Reads group share the same UMI, and UMIs with
78 counts &lt; 1% of the median counts for UMIs at the same position are ignored. Cluster: Identify clusters of connected UMIs (based on hamming distance threshold). Adjacency: Same as cluster, but considers only directly ajacent
79 UMIs in the cluster. Directional: Identify cluster of connected UMIs based on hamming distance and umi." >
80 <option value="unique" >Unique</option>
81 <option value="percentile">Percentile</option>
82 <option value="cluster">Cluster</option>
83 <option value="adjacency">Adjacency</option>
84 <option value="directional" selected="true" >Directional</option>
85 </param>
86
87 <param name="hamming_distance" type="integer" label="Edit distance threshold" min="0" value="1" />
88 <param name="wide_format_cell_counts" type="boolean" truevalue="--wide-format-cell-counts" falsevalue="" checked="false" label="Output a mtrix of genes and cells, instead of a flat file" />
89
90 <section name="advanced" title="Extra parameters" >
91 <param name="mapping_quality" type="integer" min="0" value="0" label="Minimum mapping quality" />
92 <!-- Currently hard-coded parameter. Leave here if useful to future wrapper -->
93 <!-- <param argument="-\-per-gene" name="per_gene" type="text" label="Group reads together if they have the same gene" help="Reads will be grouped together if they have the same gene. This is useful if your library
94 prep generates PCR duplicates with non-identical alignment positions such as CEL-Seq. Note this option is hardcoded to be on with the count command. I.e counting is always performed per-gene. Must be combined with either
95 -\-gene-tag or -\-per-contig option" /> -->
96 <param name="gene_tag" type="text" label="Deduplicate per gene." help="The gene information is encoded in the bam read tag." value="" >
97 <expand macro="sanitize_tag" />
98 </param>
99 <param name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" >
100 <sanitizer invalid_char="">
101 <valid initial="string.letters,string.digits">
102 <add value="!="/>
103 <add value="-"/>
104 <add value="_"/>
105 <add value="."/>
106 <add value="?"/>
107 <add value="&lt;"/><!-- left triangle bracket -->
108 <add value="&gt;"/><!-- right triangle bracket -->
109 <add value="&#91;"/> <!-- left square bracket -->
110 <add value="&#93;"/> <!-- right square bracket -->
111 <add value="&#94;"/> <!-- caret -->
112 <add value="&#123;"/> <!-- left curly -->
113 <add value="&#125;"/> <!-- right curly -->
114 <add value="&#40;"/> <!-- left parenthesis -->
115 <add value="&#41;"/> <!-- right parenthesis -->
116 </valid>
117 </sanitizer>
118 </param>
119 <param name="per_contig" type="boolean" truevalue="--per-contig" falsevalue="" checked="false"
120 label="Deduplicate per contig (field 3 in BAM; RNAME)"
121 help="All reads with the same contig will be considered to have the same alignment position. This is useful if you have aligned to a reference transcriptome with one transcript per gene." />
122 <param name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="false"
123 label="Group reads only if they have the same cell barcode." />
124 <param name="random_seed" type="integer" min="0" value="0" label="Random Seed" />
125 </section>
126 </inputs>
127 <outputs>
128 <data name="out_counts" format="tsv" />
129 <data name="out_log" format="txt" />
130 </outputs>
131 <tests>
132 <test><!--count_single_gene_tag:-->
133 <param name="input_bam" value="chr19_gene_tags.bam" />
134 <param name="random_seed" value="123456789" />
135 <param name="grouping_method" value="directional" />
136 <param name="gene_tag" value="XF" />
137 <param name="skip_tags_regex" value="^[__|Unassigned]" />
138 <param name="extract_umi_method" value="umis" />
139 <output name="out_counts" value="count_single_gene_tag.tsv" />
140 </test>
141 <test><!--count_single_cells_gene_tag:-->
142 <param name="input_bam" value="chr19_gene_tags.bam" />
143 <param name="random_seed" value="123456789" />
144 <param name="grouping_method" value="directional" />
145 <param name="gene_tag" value="XF" />
146 <param name="skip_tags_regex" value="^[__|Unassigned]" />
147 <param name="per_cell" value="true" /><!-- new -->
148 <param name="extract_umi_method" value="umis" />
149 <output name="out_counts" value="count_single_cells_gene_tag.tsv" />
150 </test>
151 <test><!--count_single_cells_wide_gene_tag:-->
152 <param name="input_bam" value="chr19_gene_tags.bam" />
153 <param name="random_seed" value="123456789" />
154 <param name="grouping_method" value="directional" />
155 <param name="gene_tag" value="XF" />
156 <param name="skip_tags_regex" value="^[__|Unassigned]" />
157 <param name="per_cell" value="true" /><!-- new -->
158 <param name="extract_umi_method" value="umis" />
159 <param name="wide_format_cell_counts" value="true" />
160 <output name="out_counts" value="count_single_cells_gene_tag_wide.tsv" />
161 </test>
162 </tests>
163 <help><![CDATA[
164
165 UMI Tools count - Count reads per gene from BAM using UMIs
166 ----------------------------------------------------------
167
168 Purpose
169 -------
170
171 The purpose of this command is to count the number of reads per gene based
172 on the mapping co-ordinate and the UMI attached to the read.
173
174
175 It is assumed that the FASTQ files were processed with extract_umi.py
176 before mapping and thus the UMI is the last word of the read name. e.g:
177
178 @HISEQ:87:00000000_AATT
179
180 where AATT is the UMI sequeuence.
181
182 If you have used an alternative method which does not separate the
183 read id and UMI with a "_", such as bcl2fastq which uses ":", you can
184 specify the separator, or if your UMIs are encoded in a tag you can also specify this.
185
186 ]]></help>
187 <expand macro="citations" />
188 </tool>