Mercurial > repos > iuc > umi_tools_dedup
annotate umi-tools_dedup.xml @ 0:a6477bafd522 draft
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
author | iuc |
---|---|
date | Wed, 10 Jan 2018 19:09:42 -0500 |
parents | |
children | 1692b1acebfd |
rev | line source |
---|---|
0
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
1 <tool id="umi_tools_dedup" name="UMI-tools deduplicate" version="@VERSION@.0"> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
2 <description>Extract UMI from fastq files</description> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
3 <macros> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
4 <import>macros.xml</import> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
5 </macros> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
6 <expand macro="requirements"> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
7 <requirement type="package" version="1.6">samtools</requirement> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
8 </expand> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
9 <command detect_errors="exit_code"><![CDATA[ |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
10 #if $input.is_of_type("sam"): |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
11 #set $input_file = $input |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
12 #else: |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
13 ln -sf '${input}' 'input.bam' && |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
14 ln -sf '$input.metadata.bam_index' 'input.bam.bai' && |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
15 #set $input_file = 'input.bam' |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
16 #end if |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
17 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
18 umi_tools dedup |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
19 --random-seed 0 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
20 --extract-umi-method $extract_umi_method |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
21 #if str($extract_umi_method) != 'read_id': |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
22 --umi-separator '$umi_separator' --umi-tag '$umi_tag' |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
23 #end if |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
24 --method $method --edit-distance-threshold $edit_distance_threshold |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
25 $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
26 $read_length $whole_contig --subset $subset $per_contig $per_gene |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
27 #if $gene_transcript_map: |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
28 --gene-transcript-map '$gene_transcript_map' |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
29 #end if |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
30 #if len(str($gene_tag)) > 0: |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
31 --gene-tag '$gene_tag' |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
32 #end if |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
33 #if $input.is_of_type("sam"): |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
34 --in-sam |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
35 #end if |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
36 -I '$input_file' -S deduped.bam && |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
37 samtools sort deduped.bam -@ \${GALAXY_SLOTS:-1} -o '$output' -O BAM |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
38 ]]></command> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
39 <inputs> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
40 <param name="input" type="data" format="sam,bam" label="Reads to deduplicate in SAM or BAM format" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
41 <param name="extract_umi_method" argument="--extract-umi-method" type="select"> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
42 <option value="read_id" selected="True">Read ID</option> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
43 <option value="tag">Tag</option> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
44 </param> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
45 <param name="umi_separator" argument="--umi-separator" type="text" label="Separator between read id and UMI." help="Ignored unless extracting by tag" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
46 <param name="umi_tag" argument="--umi-tag" type="text" label="Tag which contains UMI." /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
47 <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position"> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
48 <option value="unique">Reads group share the exact same UMI</option> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
49 <option value="percentile">Reads group share the exact same UMI. UMIs with counts less than 1% of the median counts for UMIs at the same position are ignored</option> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
50 <option value="cluster">Identify clusters based on hamming distance</option> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
51 <option value="adjacency">Identify clusters based on hamming distance and resolve networks by using the node counts</option> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
52 <option value="directional">Identify clusters based on distance and counts, restrict network expansion by threshold</option> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
53 </param> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
54 <param name="edit_distance_threshold" argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (>14bp)" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
55 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates." /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
56 <param name="spliced_is_unique" argument="--spliced-is-unique" type="boolean" truevalue="--spliced-is-unique" falsevalue="" label="Spliced reads are unique" help="Causes two reads that start in the same position on the same strand and having the same UMI to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation to test for splicing)" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
57 <param name="soft_clip_threshold" argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced." /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
58 <param name="read_length" argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
59 <param name="whole_contig" argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
60 <param argument="--subset" type="float" min="0.0" max="1.0" value="1.0" label="Only consider a random selection of the reads" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
61 <param argument="--chrom" type="boolean" truevalue="--chrom" falsevalue="" label="Only consider a single chromosome" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
62 <param name="per_contig" argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
63 <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
64 <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
65 <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
66 </inputs> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
67 <outputs> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
68 <data format="bam" name="output" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
69 </outputs> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
70 <tests> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
71 <test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
72 <param name="input" value="group_in1.sam" ftype="sam" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
73 <param name="extract_umi_method" value="read_id" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
74 <param name="method" value="unique" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
75 <output name="output" file="dedup_out1.bam" ftype="bam" sort="True"/> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
76 </test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
77 <test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
78 <param name="input" value="group_in2.bam" ftype="bam" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
79 <param name="extract_umi_method" value="read_id" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
80 <param name="paired" value="True" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
81 <param name="method" value="unique" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
82 <output name="output" file="dedup_out2.bam" ftype="bam" sort="True" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
83 </test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
84 <test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
85 <param name="input" value="group_in3.bam" ftype="bam" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
86 <param name="extract_umi_method" value="read_id" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
87 <param name="method" value="unique" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
88 <output name="output" file="dedup_out3.bam" ftype="bam" sort="True" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
89 </test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
90 <test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
91 <param name="input" value="group_in4.bam" ftype="bam" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
92 <param name="extract_umi_method" value="tag" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
93 <param name="umi_tag" value="BX" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
94 <param name="method" value="unique" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
95 <output name="output" file="dedup_out4.bam" ftype="bam" sort="True" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
96 </test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
97 <test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
98 <param name="input" value="group_in5.bam" ftype="bam" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
99 <param name="extract_umi_method" value="read_id" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
100 <param name="umi_tag" value="BX" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
101 <param name="method" value="cluster" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
102 <output name="output" file="dedup_out5.bam" ftype="bam" sort="True" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
103 </test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
104 <test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
105 <param name="input" value="group_in6.bam" ftype="bam" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
106 <param name="extract_umi_method" value="read_id" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
107 <param name="umi_tag" value="BX" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
108 <param name="method" value="directional" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
109 <output name="output" file="dedup_out6.bam" ftype="bam" sort="True" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
110 </test> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
111 </tests> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
112 <help><![CDATA[ |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
113 umi_tools dedup - Deduplicate reads based on their UMI |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
114 ====================================================== |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
115 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
116 Purpose |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
117 ------- |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
118 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
119 The purpose of this command is to deduplicate BAM files based on the first |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
120 mapping co-ordinate and the UMI attached to the read. It is assumed that the |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
121 FASTQ files were processed with extract_umi.py before mapping and thus the UMI |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
122 is the last word of the read name. e.g: |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
123 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
124 @HISEQ:87:00000000_AATT |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
125 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
126 where AATT is the UMI sequeuence. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
127 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
128 If you have used an alternative method which does not separate the |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
129 read id and UMI with a "_", such as bcl2fastq which uses ":", you can |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
130 specify the separator with the option "--umi-separator=<sep>", |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
131 replacing <sep> with e.g ":". |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
132 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
133 Alternatively, if your UMIs are encoded in a tag, you can specify this |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
134 by setting the option --extract-umi-method=tag and set the tag name |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
135 with the --umi-tag option. For example, if your UMIs are encoded in |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
136 the 'UM' tag, provide the following options: |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
137 "--extract-umi-method=tag --umi-tag=UM" |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
138 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
139 The start postion of a read is considered to be the start of its alignment |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
140 minus any soft clipped bases. A read aligned at position 500 with |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
141 cigar 2S98M will be assumed to start at postion 498. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
142 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
143 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
144 Methods |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
145 ------- |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
146 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
147 dedup can be run with multiple methods to identify groups of reads with |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
148 the same (or similar) UMI(s). All methods start by identifying the |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
149 reads with the same mapping position. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
150 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
151 The simpliest method, "unique", groups reads with the exact same |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
152 UMI. The network-based methods, "cluster", "adjacency" and |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
153 "directional", build networks where nodes are UMIs and edges connect |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
154 UMIs with an edit distance <= threshold (usually 1). The groups of |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
155 reads are then defined from the network in a method-specific manner. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
156 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
157 "unique" |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
158 Reads group share the exact same UMI |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
159 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
160 "percentile" |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
161 Reads group share the exact same UMI. UMIs with counts < 1% of the |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
162 median counts for UMIs at the same position are ignored. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
163 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
164 "cluster" |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
165 Identify clusters of connected UMIs (based on hamming distance |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
166 threshold). Each network is a read group |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
167 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
168 "adjacency" |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
169 Cluster UMIs as above. For each cluster, select the node(UMI) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
170 with the highest counts. Visit all nodes one edge away. If all |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
171 nodes have been visted, stop. Otherise, repeat with remaining |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
172 nodes until all nodes have been visted. Each step |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
173 defines a read group. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
174 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
175 "directional" (default) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
176 Identify clusters of connected UMIs (based on hamming distance |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
177 threshold) and umi A counts >= (2* umi B counts) - 1. Each |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
178 network is a read group. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
179 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
180 Options |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
181 ------- |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
182 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
183 --extract-umi-method (choice) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
184 How are the UMIs encoded in the read? |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
185 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
186 Options are: |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
187 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
188 - "read_id" (default) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
189 UMIs contained at the end of the read separated as |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
190 specified with --umi-separator option |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
191 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
192 - "tag" |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
193 UMIs contained in a tag, see --umi-tag option |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
194 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
195 --umi-separator (string) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
196 Separator between read id and UMI. See --extract-umi-method above |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
197 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
198 --umi-tag (string) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
199 Tag which contains UMI. See --extract-umi-method above |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
200 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
201 --edit-distance-threshold (int) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
202 For the adjacency and cluster methods the threshold for the |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
203 edit distance to connect two UMIs in the network can be |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
204 increased. The default value of 1 works best unless the UMI is |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
205 very long (>14bp) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
206 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
207 --paired |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
208 BAM is paired end - output both read pairs. This will also |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
209 force the use of the template length to determine reads with |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
210 the same mapping coordinates. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
211 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
212 --spliced-is-unique |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
213 Causes two reads that start in the same position on the same |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
214 strand and having the same UMI to be considered unique if one is |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
215 spliced and the other is not. (Uses the 'N' cigar operation to test |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
216 for splicing) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
217 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
218 --soft-clip-threshold (int) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
219 Mappers that soft clip, will sometimes do so rather than mapping a |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
220 spliced read if there is only a small overhang over the exon |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
221 junction. By setting this option, you can treat reads with at least |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
222 this many bases soft-clipped at the 3' end as spliced. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
223 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
224 --multimapping-detection-method (string, choice) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
225 If the sam/bam contains tags to identify multimapping reads, you can |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
226 specify for use when selecting the best read at a given loci. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
227 Supported tags are "NH", "X0" and "XT". If not specified, the read |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
228 with the highest mapping quality will be selected |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
229 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
230 --read-length |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
231 Use the read length as as a criteria when deduping, for e.g sRNA-Seq |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
232 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
233 --whole-contig |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
234 Consider all alignments to a single contig together. This is useful if |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
235 you have aligned to a transcriptome multi-fasta |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
236 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
237 --subset (float, [0-1]) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
238 Only consider a fraction of the reads, chosen at random. This is useful |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
239 for doing saturation analyses. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
240 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
241 --chrom |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
242 Only consider a single chromosome. This is useful for debugging purposes |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
243 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
244 --per-contig (string) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
245 Deduplicate per contig (field 3 in BAM; RNAME). |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
246 All reads with the same contig will be |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
247 considered to have the same alignment position. This is useful |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
248 if your library prep generates PCR duplicates with non identical |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
249 alignment positions such as CEL-Seq. In this case, you would |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
250 align to a reference transcriptome with one transcript per gene |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
251 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
252 --per-gene (string) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
253 Deduplicate per gene. As above except with this option you can |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
254 align to a reference transcriptome with more than one transcript |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
255 per gene. You need to also provide --gene-transcript-map option. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
256 This will also add a metacontig ('MC') tag to the reads if used |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
257 in conjunction with --output-bam |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
258 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
259 --gene-transcript-map (string) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
260 File mapping genes to transripts (tab separated), e.g: |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
261 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
262 gene1 transcript1 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
263 gene1 transcript2 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
264 gene2 transcript3 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
265 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
266 --gene-tag (string) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
267 Deduplicate per gene. As per --per-gene except here the gene |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
268 information is encoded in the bam read tag specified so you do |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
269 not need to supply --gene-transcript-map |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
270 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
271 --output-bam (string, filename) |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
272 Output a tagged bam file to stdout or -S <filename> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
273 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
274 -i, --in-sam/-o, --out-sam |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
275 By default, inputs are assumed to be in BAM format and output are output |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
276 in BAM format. Use these options to specify the use of SAM format for |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
277 inputs or outputs. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
278 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
279 -I (string, filename) input file name |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
280 The input file must be sorted and indexed. |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
281 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
282 -S (string, filename) output file name |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
283 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
284 -L (string, filename) log file name |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
285 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
286 Usage |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
287 ----- |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
288 umi_tools dedup -I infile.bam -S grouped.bam -- |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
289 |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
290 ]]></help> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
291 <expand macro="citations" /> |
a6477bafd522
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
292 </tool> |