annotate umi-tools_dedup.xml @ 7:e9256e2e22e0 draft

planemo upload commit a7a086ce7d7d84f53d4a022fa1da25ef7b9a5b9a
author iuc
date Fri, 20 Jul 2018 03:49:09 -0400
parents a6477bafd522
children 1692b1acebfd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
1 <tool id="umi_tools_dedup" name="UMI-tools deduplicate" version="@VERSION@.0">
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
2 <description>Extract UMI from fastq files</description>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
3 <macros>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
4 <import>macros.xml</import>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
5 </macros>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
6 <expand macro="requirements">
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
7 <requirement type="package" version="1.6">samtools</requirement>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
8 </expand>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
9 <command detect_errors="exit_code"><![CDATA[
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
10 #if $input.is_of_type("sam"):
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
11 #set $input_file = $input
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
12 #else:
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
13 ln -sf '${input}' 'input.bam' &&
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
14 ln -sf '$input.metadata.bam_index' 'input.bam.bai' &&
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
15 #set $input_file = 'input.bam'
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
16 #end if
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
17
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
18 umi_tools dedup
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
19 --random-seed 0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
20 --extract-umi-method $extract_umi_method
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
21 #if str($extract_umi_method) != 'read_id':
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
22 --umi-separator '$umi_separator' --umi-tag '$umi_tag'
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
23 #end if
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
24 --method $method --edit-distance-threshold $edit_distance_threshold
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
25 $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
26 $read_length $whole_contig --subset $subset $per_contig $per_gene
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
27 #if $gene_transcript_map:
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
28 --gene-transcript-map '$gene_transcript_map'
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
29 #end if
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
30 #if len(str($gene_tag)) > 0:
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
31 --gene-tag '$gene_tag'
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
32 #end if
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
33 #if $input.is_of_type("sam"):
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
34 --in-sam
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
35 #end if
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
36 -I '$input_file' -S deduped.bam &&
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
37 samtools sort deduped.bam -@ \${GALAXY_SLOTS:-1} -o '$output' -O BAM
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
38 ]]></command>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
39 <inputs>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
40 <param name="input" type="data" format="sam,bam" label="Reads to deduplicate in SAM or BAM format" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
41 <param name="extract_umi_method" argument="--extract-umi-method" type="select">
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
42 <option value="read_id" selected="True">Read ID</option>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
43 <option value="tag">Tag</option>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
44 </param>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
45 <param name="umi_separator" argument="--umi-separator" type="text" label="Separator between read id and UMI." help="Ignored unless extracting by tag" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
46 <param name="umi_tag" argument="--umi-tag" type="text" label="Tag which contains UMI." />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
47 <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position">
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
48 <option value="unique">Reads group share the exact same UMI</option>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
49 <option value="percentile">Reads group share the exact same UMI. UMIs with counts less than 1% of the median counts for UMIs at the same position are ignored</option>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
50 <option value="cluster">Identify clusters based on hamming distance</option>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
51 <option value="adjacency">Identify clusters based on hamming distance and resolve networks by using the node counts</option>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
52 <option value="directional">Identify clusters based on distance and counts, restrict network expansion by threshold</option>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
53 </param>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
54 <param name="edit_distance_threshold" argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (&gt;14bp)" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
55 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates." />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
56 <param name="spliced_is_unique" argument="--spliced-is-unique" type="boolean" truevalue="--spliced-is-unique" falsevalue="" label="Spliced reads are unique" help="Causes two reads that start in the same position on the same strand and having the same UMI to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation to test for splicing)" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
57 <param name="soft_clip_threshold" argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced." />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
58 <param name="read_length" argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
59 <param name="whole_contig" argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
60 <param argument="--subset" type="float" min="0.0" max="1.0" value="1.0" label="Only consider a random selection of the reads" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
61 <param argument="--chrom" type="boolean" truevalue="--chrom" falsevalue="" label="Only consider a single chromosome" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
62 <param name="per_contig" argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
63 <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
64 <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
65 <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
66 </inputs>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
67 <outputs>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
68 <data format="bam" name="output" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
69 </outputs>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
70 <tests>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
71 <test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
72 <param name="input" value="group_in1.sam" ftype="sam" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
73 <param name="extract_umi_method" value="read_id" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
74 <param name="method" value="unique" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
75 <output name="output" file="dedup_out1.bam" ftype="bam" sort="True"/>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
76 </test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
77 <test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
78 <param name="input" value="group_in2.bam" ftype="bam" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
79 <param name="extract_umi_method" value="read_id" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
80 <param name="paired" value="True" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
81 <param name="method" value="unique" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
82 <output name="output" file="dedup_out2.bam" ftype="bam" sort="True" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
83 </test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
84 <test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
85 <param name="input" value="group_in3.bam" ftype="bam" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
86 <param name="extract_umi_method" value="read_id" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
87 <param name="method" value="unique" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
88 <output name="output" file="dedup_out3.bam" ftype="bam" sort="True" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
89 </test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
90 <test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
91 <param name="input" value="group_in4.bam" ftype="bam" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
92 <param name="extract_umi_method" value="tag" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
93 <param name="umi_tag" value="BX" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
94 <param name="method" value="unique" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
95 <output name="output" file="dedup_out4.bam" ftype="bam" sort="True" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
96 </test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
97 <test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
98 <param name="input" value="group_in5.bam" ftype="bam" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
99 <param name="extract_umi_method" value="read_id" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
100 <param name="umi_tag" value="BX" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
101 <param name="method" value="cluster" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
102 <output name="output" file="dedup_out5.bam" ftype="bam" sort="True" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
103 </test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
104 <test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
105 <param name="input" value="group_in6.bam" ftype="bam" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
106 <param name="extract_umi_method" value="read_id" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
107 <param name="umi_tag" value="BX" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
108 <param name="method" value="directional" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
109 <output name="output" file="dedup_out6.bam" ftype="bam" sort="True" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
110 </test>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
111 </tests>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
112 <help><![CDATA[
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
113 umi_tools dedup - Deduplicate reads based on their UMI
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
114 ======================================================
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
115
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
116 Purpose
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
117 -------
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
118
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
119 The purpose of this command is to deduplicate BAM files based on the first
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
120 mapping co-ordinate and the UMI attached to the read. It is assumed that the
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
121 FASTQ files were processed with extract_umi.py before mapping and thus the UMI
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
122 is the last word of the read name. e.g:
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
123
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
124 @HISEQ:87:00000000_AATT
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
125
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
126 where AATT is the UMI sequeuence.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
127
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
128 If you have used an alternative method which does not separate the
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
129 read id and UMI with a "_", such as bcl2fastq which uses ":", you can
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
130 specify the separator with the option "--umi-separator=<sep>",
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
131 replacing <sep> with e.g ":".
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
132
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
133 Alternatively, if your UMIs are encoded in a tag, you can specify this
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
134 by setting the option --extract-umi-method=tag and set the tag name
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
135 with the --umi-tag option. For example, if your UMIs are encoded in
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
136 the 'UM' tag, provide the following options:
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
137 "--extract-umi-method=tag --umi-tag=UM"
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
138
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
139 The start postion of a read is considered to be the start of its alignment
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
140 minus any soft clipped bases. A read aligned at position 500 with
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
141 cigar 2S98M will be assumed to start at postion 498.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
142
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
143
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
144 Methods
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
145 -------
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
146
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
147 dedup can be run with multiple methods to identify groups of reads with
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
148 the same (or similar) UMI(s). All methods start by identifying the
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
149 reads with the same mapping position.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
150
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
151 The simpliest method, "unique", groups reads with the exact same
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
152 UMI. The network-based methods, "cluster", "adjacency" and
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
153 "directional", build networks where nodes are UMIs and edges connect
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
154 UMIs with an edit distance <= threshold (usually 1). The groups of
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
155 reads are then defined from the network in a method-specific manner.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
156
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
157 "unique"
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
158 Reads group share the exact same UMI
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
159
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
160 "percentile"
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
161 Reads group share the exact same UMI. UMIs with counts < 1% of the
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
162 median counts for UMIs at the same position are ignored.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
163
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
164 "cluster"
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
165 Identify clusters of connected UMIs (based on hamming distance
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
166 threshold). Each network is a read group
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
167
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
168 "adjacency"
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
169 Cluster UMIs as above. For each cluster, select the node(UMI)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
170 with the highest counts. Visit all nodes one edge away. If all
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
171 nodes have been visted, stop. Otherise, repeat with remaining
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
172 nodes until all nodes have been visted. Each step
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
173 defines a read group.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
174
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
175 "directional" (default)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
176 Identify clusters of connected UMIs (based on hamming distance
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
177 threshold) and umi A counts >= (2* umi B counts) - 1. Each
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
178 network is a read group.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
179
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
180 Options
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
181 -------
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
182
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
183 --extract-umi-method (choice)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
184 How are the UMIs encoded in the read?
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
185
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
186 Options are:
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
187
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
188 - "read_id" (default)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
189 UMIs contained at the end of the read separated as
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
190 specified with --umi-separator option
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
191
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
192 - "tag"
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
193 UMIs contained in a tag, see --umi-tag option
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
194
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
195 --umi-separator (string)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
196 Separator between read id and UMI. See --extract-umi-method above
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
197
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
198 --umi-tag (string)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
199 Tag which contains UMI. See --extract-umi-method above
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
200
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
201 --edit-distance-threshold (int)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
202 For the adjacency and cluster methods the threshold for the
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
203 edit distance to connect two UMIs in the network can be
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
204 increased. The default value of 1 works best unless the UMI is
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
205 very long (>14bp)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
206
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
207 --paired
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
208 BAM is paired end - output both read pairs. This will also
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
209 force the use of the template length to determine reads with
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
210 the same mapping coordinates.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
211
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
212 --spliced-is-unique
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
213 Causes two reads that start in the same position on the same
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
214 strand and having the same UMI to be considered unique if one is
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
215 spliced and the other is not. (Uses the 'N' cigar operation to test
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
216 for splicing)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
217
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
218 --soft-clip-threshold (int)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
219 Mappers that soft clip, will sometimes do so rather than mapping a
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
220 spliced read if there is only a small overhang over the exon
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
221 junction. By setting this option, you can treat reads with at least
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
222 this many bases soft-clipped at the 3' end as spliced.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
223
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
224 --multimapping-detection-method (string, choice)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
225 If the sam/bam contains tags to identify multimapping reads, you can
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
226 specify for use when selecting the best read at a given loci.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
227 Supported tags are "NH", "X0" and "XT". If not specified, the read
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
228 with the highest mapping quality will be selected
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
229
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
230 --read-length
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
231 Use the read length as as a criteria when deduping, for e.g sRNA-Seq
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
232
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
233 --whole-contig
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
234 Consider all alignments to a single contig together. This is useful if
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
235 you have aligned to a transcriptome multi-fasta
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
236
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
237 --subset (float, [0-1])
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
238 Only consider a fraction of the reads, chosen at random. This is useful
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
239 for doing saturation analyses.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
240
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
241 --chrom
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
242 Only consider a single chromosome. This is useful for debugging purposes
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
243
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
244 --per-contig (string)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
245 Deduplicate per contig (field 3 in BAM; RNAME).
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
246 All reads with the same contig will be
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
247 considered to have the same alignment position. This is useful
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
248 if your library prep generates PCR duplicates with non identical
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
249 alignment positions such as CEL-Seq. In this case, you would
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
250 align to a reference transcriptome with one transcript per gene
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
251
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
252 --per-gene (string)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
253 Deduplicate per gene. As above except with this option you can
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
254 align to a reference transcriptome with more than one transcript
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
255 per gene. You need to also provide --gene-transcript-map option.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
256 This will also add a metacontig ('MC') tag to the reads if used
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
257 in conjunction with --output-bam
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
258
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
259 --gene-transcript-map (string)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
260 File mapping genes to transripts (tab separated), e.g:
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
261
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
262 gene1 transcript1
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
263 gene1 transcript2
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
264 gene2 transcript3
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
265
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
266 --gene-tag (string)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
267 Deduplicate per gene. As per --per-gene except here the gene
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
268 information is encoded in the bam read tag specified so you do
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
269 not need to supply --gene-transcript-map
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
270
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
271 --output-bam (string, filename)
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
272 Output a tagged bam file to stdout or -S <filename>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
273
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
274 -i, --in-sam/-o, --out-sam
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
275 By default, inputs are assumed to be in BAM format and output are output
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
276 in BAM format. Use these options to specify the use of SAM format for
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
277 inputs or outputs.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
278
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
279 -I (string, filename) input file name
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
280 The input file must be sorted and indexed.
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
281
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
282 -S (string, filename) output file name
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
283
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
284 -L (string, filename) log file name
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
285
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
286 Usage
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
287 -----
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
288 umi_tools dedup -I infile.bam -S grouped.bam --
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
289
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
290 ]]></help>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
291 <expand macro="citations" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
292 </tool>