comparison umi-tools_counts.xml @ 1:3c932ad4a174 draft

planemo upload commit 9a3aeb2c588f9f67824ea5568923ce70b048499a
author iuc
date Sat, 14 Jul 2018 06:14:24 -0400
parents 8db56d2f8b72
children b557acca0b56
comparison
equal deleted inserted replaced
0:8db56d2f8b72 1:3c932ad4a174
1 <tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.0"> 1 <tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.1">
2 <description>Count UMIs from BAM files</description> 2 <description>performs quantification of UMIs from BAM files</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 <xml name="sanitize_tag" > 5 <xml name="sanitize_tag" >
6 <sanitizer invalid_char=""> 6 <sanitizer invalid_char="">
7 <valid initial="string.letters,string.digits" /> 7 <valid initial="string.letters,string.digits" />
8 </sanitizer> 8 </sanitizer>
9 </xml> 9 </xml>
10 </macros> 10 </macros>
11 <expand macro="requirements" /> 11 <expand macro="requirements" />
12 <command detect_errors="exit_code"><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
13
14 ln -s '${input_bam}' 'input.bam' && 13 ln -s '${input_bam}' 'input.bam' &&
15 ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' && 14 ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' &&
16 15
17 umi_tools count 16 umi_tools count
18 -I input.bam 17 -I input.bam
19 '$bam_paired' 18 '$paired'
20 --extract-umi-method='$barcodes.extract_umi_method.value' 19 --extract-umi-method='$barcodes.extract_umi_method.value'
21 #if $barcodes.extract_umi_method == 'read_id': 20 #if str($barcodes.extract_umi_method) == 'read_id':
22 --umi-separator='$barcodes.delimiter' 21 --umi-separator='$barcodes.umi_separator.value'
23 #else if $barcodes.extract_umi_method == 'tag': 22 #else if str($barcodes.extract_umi_method) == 'tag':
24 --umi-tag='$barcodes.umi_tag' 23 --umi-tag='$barcodes.umi_tag.value'
25 --cell-tag='$barcodes.cell_tag' 24 --cell-tag='$barcodes.cell_tag.value'
26 #end if 25 #end if
27 --method='$grouping_method.value' 26 --method='$method.value'
28 --edit-distance-threshold='$hamming_distance' 27 --edit-distance-threshold='$edit_distance_threshold'
29 --mapping-quality='$advanced.mapping_quality' 28 --mapping-quality='$advanced.mapping_quality'
30 --per-gene 29 --per-gene
31 $wide_format_cell_counts 30 '$wide_format_cell_counts'
32 $advanced.per_contig 31 '$advanced.per_contig'
33 '$advanced.per_cell' 32 '$advanced.per_cell'
34 #if $advanced.gene_tag: 33 #if str($advanced.gene_tag) != "":
35 --gene-tag='$advanced.gene_tag' 34 --gene-tag='$advanced.gene_tag.value'
36 #end if 35 #end if
37 #if $advanced.skip_tags_regex.value: 36 #if str($advanced.skip_tags_regex) != "":
38 --skip-tags-regex='$advanced.skip_tags_regex' 37 --skip-tags-regex='$advanced.skip_tags_regex.value'
39 #end if 38 #end if
40 #if $advanced.random_seed != 0: 39 #if '$advanced.random_seed' != 0:
41 --random-seed='$advanced.random_seed' 40 --random-seed='$advanced.random_seed'
42 #end if 41 #end if
43 -S '$out_counts' 42 -S '$out_counts'
44 -L '$out_log'
45 ]]></command> 43 ]]></command>
46 <inputs> 44 <inputs>
47 <param name="input_bam" type="data" format="bam" label="Sorted BAM file" help="Please use the samtools sort tool to ensure a correct BAM input" /> 45 <param name="input_bam" type="data" format="bam" label="Sorted BAM file" help="Please use the samtools sort tool to ensure a correct BAM input" />
48 46 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" checked="false" label="Bam is paired-end" help="both read pairs will be output. This will also force the use of the template length to determine reads with the same mapping coordinates." />
49 <param name="bam_paired" type="boolean" truevalue="--paired" falsevalue="" checked="false"
50 label="Bam is paired-end"
51 help="both read pairs will be output. This will also force the use of the template length to determine
52 reads with the same mapping coordinates." />
53
54 <conditional name="barcodes" > 47 <conditional name="barcodes" >
55 <param name="extract_umi_method" type="select" label="Umi Extract Method" help="How are the barcodes encoded in the read?" > 48 <param argument="--extract-umi-method" name="extract_umi_method" type="select" label="Umi Extract Method" help="How are the barcodes encoded in the read?" >
56 <option value="read_id" selected="true">Barcodes are contained at the end of the read seperated by a delimiter</option> 49 <option value="read_id" selected="true">Barcodes are contained at the end of the read seperated by a delimiter</option>
57 <option value="tag" >Barcodes are contained in tags</option> 50 <option value="tag" >Barcodes are contained in tags</option>
58 <option value="umis" >Barcodes were extracted using umis</option> 51 <option value="umis" >Barcodes were extracted using umis</option>
59 </param> 52 </param>
60 <when value="read_id" > 53 <when value="read_id" >
61 <param name="delimiter" type="text" label="Delimiter between read id and the UMI" value="_" > 54 <param argument="--umi-separator" name="umi_separator" type="text" label="Delimiter between read id and the UMI" value="_" >
62 <expand macro="sanitize_tag" /> 55 <sanitizer invalid_char="" >
56 <valid initial="string.punctuation" />
57 </sanitizer>
63 </param> 58 </param>
64 </when> 59 </when>
65 <when value="tag" > 60 <when value="tag" >
66 <param name="umi_tag" type="text" label="Tag which contains the UMI" > 61 <param argument="--umi-tag" name="umi_tag" type="text" label="Tag which contains the UMI" >
67 <expand macro="sanitize_tag" /> 62 <expand macro="sanitize_tag" />
68 </param> 63 </param>
69 <param name="cell_tag" type="text" label="Tag which contains the cell barcode" > 64 <param argument="--cell-tag" name="cell_tag" type="text" label="Tag which contains the cell barcode" >
70 <expand macro="sanitize_tag" /> 65 <expand macro="sanitize_tag" />
71 </param> 66 </param>
72 </when> 67 </when>
73 <when value="umis"></when> 68 <when value="umis"></when>
74 </conditional> 69 </conditional>
75 70 <param argument="--method" type="select" label="Method to identify group of reads" help="UMIs with the same (or similar) codes can be grouped together. The simplest methods 'unique' and 'percentile' group identical
76 <param name="grouping_method" type="select" label="Method to identify group of reads" help="UMIs with the same (or similar) codes can be grouped together. The simplest methods 'unique' and 'percentile' group identical 71 UMIs, however 'cluster', 'adjacency', and 'directional' can group similar umis with edit distances less than some threshold. Unique: Reads group share the exact same UMI. Percentile: Reads group share the same UMI, and UMIs with
77 UMIs, however 'cluster', 'adjacency', and 'directional' can group similar umis with edit distances less than some threshold. Unique: Reads group share the exact same UMI. Percentile: Reads group share the same UMI, and UMIs with 72 counts &lt; 1% of the median counts for UMIs at the same position are ignored. Cluster: Identify clusters of connected UMIs (based on hamming distance threshold). Adjacency: Same as cluster, but considers only directly ajacent UMIs in the cluster. Directional: Identify cluster of connected UMIs based on hamming distance and umi." >
78 counts &lt; 1% of the median counts for UMIs at the same position are ignored. Cluster: Identify clusters of connected UMIs (based on hamming distance threshold). Adjacency: Same as cluster, but considers only directly ajacent
79 UMIs in the cluster. Directional: Identify cluster of connected UMIs based on hamming distance and umi." >
80 <option value="unique" >Unique</option> 73 <option value="unique" >Unique</option>
81 <option value="percentile">Percentile</option> 74 <option value="percentile">Percentile</option>
82 <option value="cluster">Cluster</option> 75 <option value="cluster">Cluster</option>
83 <option value="adjacency">Adjacency</option> 76 <option value="adjacency">Adjacency</option>
84 <option value="directional" selected="true" >Directional</option> 77 <option value="directional" selected="true" >Directional</option>
85 </param> 78 </param>
86 79 <param argument="--edit-distance-threshold" name="edit_distance_threshold" type="integer" label="Edit distance threshold" min="0" value="1" />
87 <param name="hamming_distance" type="integer" label="Edit distance threshold" min="0" value="1" /> 80 <param argument="--wide-format-cell-counts" name="wide_format_cell_counts" type="boolean" truevalue="--wide-format-cell-counts" falsevalue="" checked="true" label="Output a matrix of genes and cells, instead of a flat file" />
88 <param name="wide_format_cell_counts" type="boolean" truevalue="--wide-format-cell-counts" falsevalue="" checked="false" label="Output a mtrix of genes and cells, instead of a flat file" />
89
90 <section name="advanced" title="Extra parameters" > 81 <section name="advanced" title="Extra parameters" >
91 <param name="mapping_quality" type="integer" min="0" value="0" label="Minimum mapping quality" /> 82 <param argument="--mapping-quality" name="mapping_quality" type="integer" min="0" value="0" label="Minimum mapping quality" />
92 <!-- Currently hard-coded parameter. Leave here if useful to future wrapper --> 83 <!-- Currently hard-coded parameter. Leave here if useful to future wrapper -->
93 <!-- <param argument="-\-per-gene" name="per_gene" type="text" label="Group reads together if they have the same gene" help="Reads will be grouped together if they have the same gene. This is useful if your library 84 <!-- <param argument="-\-per-gene" name="per_gene" type="text" label="Group reads together if they have the same gene" help="Reads will be grouped together if they have the same gene. This is useful if your library
94 prep generates PCR duplicates with non-identical alignment positions such as CEL-Seq. Note this option is hardcoded to be on with the count command. I.e counting is always performed per-gene. Must be combined with either 85 prep generates PCR duplicates with non-identical alignment positions such as CEL-Seq. Note this option is hardcoded to be on with the count command. I.e counting is always performed per-gene. Must be combined with either
95 -\-gene-tag or -\-per-contig option" /> --> 86 -\-gene-tag or -\-per-contig option" /> -->
96 <param name="gene_tag" type="text" label="Deduplicate per gene." help="The gene information is encoded in the bam read tag." value="" > 87 <param argument="--gene-tag" name="gene_tag" type="text" label="Deduplicate per gene." help="The gene information is encoded in the bam read tag." value="XT" >
97 <expand macro="sanitize_tag" /> 88 <expand macro="sanitize_tag" />
98 </param> 89 </param>
99 <param name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" > 90 <param argument="--skip-tags-regex" name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" >
100 <sanitizer invalid_char=""> 91 <sanitizer invalid_char="">
101 <valid initial="string.letters,string.digits"> 92 <valid initial="string.letters,string.digits">
102 <add value="!="/> 93 <add value="!="/>
103 <add value="-"/> 94 <add value="-"/>
104 <add value="_"/> 95 <add value="_"/>
114 <add value="&#40;"/> <!-- left parenthesis --> 105 <add value="&#40;"/> <!-- left parenthesis -->
115 <add value="&#41;"/> <!-- right parenthesis --> 106 <add value="&#41;"/> <!-- right parenthesis -->
116 </valid> 107 </valid>
117 </sanitizer> 108 </sanitizer>
118 </param> 109 </param>
119 <param name="per_contig" type="boolean" truevalue="--per-contig" falsevalue="" checked="false" 110 <param argument="--per-contig" name="per_contig" type="boolean" truevalue="--per-contig" falsevalue="" checked="false" label="Deduplicate per contig (field 3 in BAM; RNAME)" help="All reads with the same contig will be considered to have the same alignment position. This is useful if you have aligned to a reference transcriptome with one transcript per gene." />
120 label="Deduplicate per contig (field 3 in BAM; RNAME)" 111 <param argument="--per-cell" name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="true" label="Group reads only if they have the same cell barcode." />
121 help="All reads with the same contig will be considered to have the same alignment position. This is useful if you have aligned to a reference transcriptome with one transcript per gene." /> 112 <param argument="--random-seed" name="random_seed" type="integer" min="0" value="0" label="Random Seed" />
122 <param name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="false" 113 </section>
123 label="Group reads only if they have the same cell barcode." />
124 <param name="random_seed" type="integer" min="0" value="0" label="Random Seed" />
125 </section>
126 </inputs> 114 </inputs>
127 <outputs> 115 <outputs>
128 <data name="out_counts" format="tsv" /> 116 <data name="out_counts" format="tabular" />
129 <data name="out_log" format="txt" />
130 </outputs> 117 </outputs>
131 <tests> 118 <tests>
132 <test><!--count_single_gene_tag:--> 119 <test><!--count_single_gene_tag:-->
133 <param name="input_bam" value="chr19_gene_tags.bam" /> 120 <param name="input_bam" value="chr19_gene_tags.bam" />
134 <param name="random_seed" value="123456789" /> 121 <param name="random_seed" value="123456789" />
135 <param name="grouping_method" value="directional" /> 122 <param name="method" value="directional" />
136 <param name="gene_tag" value="XF" /> 123 <param name="gene_tag" value="XF" />
137 <param name="skip_tags_regex" value="^[__|Unassigned]" /> 124 <param name="skip_tags_regex" value="^[__|Unassigned]" />
138 <param name="extract_umi_method" value="umis" /> 125 <param name="extract_umi_method" value="umis" />
126 <param name="wide_format_cell_counts" value="false" />
127 <param name="per_cell" value="false" />
139 <output name="out_counts" value="count_single_gene_tag.tsv" /> 128 <output name="out_counts" value="count_single_gene_tag.tsv" />
140 </test> 129 </test>
141 <test><!--count_single_cells_gene_tag:--> 130 <test><!--count_single_cells_gene_tag:-->
142 <param name="input_bam" value="chr19_gene_tags.bam" /> 131 <param name="input_bam" value="chr19_gene_tags.bam" />
143 <param name="random_seed" value="123456789" /> 132 <param name="random_seed" value="123456789" />
144 <param name="grouping_method" value="directional" /> 133 <param name="method" value="directional" />
145 <param name="gene_tag" value="XF" /> 134 <param name="gene_tag" value="XF" />
146 <param name="skip_tags_regex" value="^[__|Unassigned]" /> 135 <param name="skip_tags_regex" value="^[__|Unassigned]" />
147 <param name="per_cell" value="true" /><!-- new --> 136 <param name="per_cell" value="true" />
148 <param name="extract_umi_method" value="umis" /> 137 <param name="extract_umi_method" value="umis" />
138 <param name="wide_format_cell_counts" value="false" />
149 <output name="out_counts" value="count_single_cells_gene_tag.tsv" /> 139 <output name="out_counts" value="count_single_cells_gene_tag.tsv" />
150 </test> 140 </test>
151 <test><!--count_single_cells_wide_gene_tag:--> 141 <test><!--count_single_cells_wide_gene_tag:-->
152 <param name="input_bam" value="chr19_gene_tags.bam" /> 142 <param name="input_bam" value="chr19_gene_tags.bam" />
153 <param name="random_seed" value="123456789" /> 143 <param name="random_seed" value="123456789" />
154 <param name="grouping_method" value="directional" /> 144 <param name="method" value="directional" />
155 <param name="gene_tag" value="XF" /> 145 <param name="gene_tag" value="XF" />
156 <param name="skip_tags_regex" value="^[__|Unassigned]" /> 146 <param name="skip_tags_regex" value="^[__|Unassigned]" />
157 <param name="per_cell" value="true" /><!-- new --> 147 <param name="per_cell" value="true" />
158 <param name="extract_umi_method" value="umis" /> 148 <param name="extract_umi_method" value="umis" />
159 <param name="wide_format_cell_counts" value="true" /> 149 <param name="wide_format_cell_counts" value="true" />
160 <output name="out_counts" value="count_single_cells_gene_tag_wide.tsv" /> 150 <output name="out_counts" value="count_single_cells_gene_tag_wide.tsv" />
151 </test>
152 <test><!-- count ENSDARG00000019692, with defaults -->
153 <param name="input_bam" value="fc.ENSDARG00000019692.bam" />
154 <param name="method" value="unique" />
155 <output name="out_counts" value="fc.ENSDARG00000019692.counts" />
161 </test> 156 </test>
162 </tests> 157 </tests>
163 <help><![CDATA[ 158 <help><![CDATA[
164 159
165 UMI Tools count - Count reads per gene from BAM using UMIs 160 UMI Tools count - Count reads per gene from BAM using UMIs