annotate umi-tools_dedup.xml @ 13:aa9a4233c641 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit b5b83fd1cf0a44edcd9a6cc7562a3add2f47bd3f"
author iuc
date Sat, 23 Oct 2021 20:37:31 +0000
parents 4098ab380097
children 04e09969d376
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
1 <tool id="umi_tools_dedup" name="UMI-tools deduplicate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
2 <description>Extract UMI from fastq files</description>
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
3 <expand macro="bio_tools"/>
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
4 <macros>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
5 <import>macros.xml</import>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
6 </macros>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
7 <expand macro="requirements">
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
8 <requirement type="package" version="1.12">samtools</requirement>
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
9 </expand>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
10 <command detect_errors="exit_code"><![CDATA[
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
11 @LINK_SAM_BAM_INPUT@
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
12
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
13 echo $input.ext &&
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
14 umi_tools dedup
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
15 #if $output_stats_bool
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
16 --output-stats=stats_outputs
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
17 #end if
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
18 @GROUPDEDUP_OPTIONS@
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
19 @BARCODE_OPTIONS@
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
20 @UMI_GROUPING_OPTIONS@
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
21 @SAMBAM_OPTIONS@
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
22 @FULLSC_OPTIONS@
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
23 @ADVANCED_OPTIONS@
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
24 -I '$input_file' -S deduped.bam
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
25 ## TODO using samtools sort is a workaround, for the following error that appears when Galaxy
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
26 ## compares the generated file with the one in test-data
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
27 ## `Converting history BAM to SAM failed: 'samtools returned with error 1: stdout=None, stderr=[main_samview] fail to read the header from "/tmp/tmpd8o61jykdedup_out6.bam".\n'. Will compare BAM files`
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
28 ## problem seems to be the BAM file generated with pysam
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
29 ## may be dropped in the future
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
30 --no-sort-output
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
31 @LOG@
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
32 && samtools sort --no-PG deduped.bam -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" -o '$output' -O BAM
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
33
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
34 ]]></command>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
35 <inputs>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
36 <param name="input" type="data" format="sam,bam" label="Reads to deduplicate in SAM or BAM format" />
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
37 <param name="output_stats_bool" type="boolean" checked="false" label="Output UMI related statistics files?"/>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
38 <expand macro="groupdedup_options_macro"/>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
39 <expand macro="barcode_options_macro"/>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
40 <expand macro="umi_grouping_options_macro"/>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
41 <expand macro="sambam_options_macro"/>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
42 <expand macro="fullsc_options_macro"/>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
43 <expand macro="advanced_options_macro"/>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
44 <expand macro="log_input_macro"/>
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
45 </inputs>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
46 <outputs>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
47 <data format="bam" name="output" />
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
48 <collection name="output_stats" type="list" label="${tool.name} on ${on_string} stats">
11
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
49 <filter>output_stats_bool</filter>
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
50 <data name="edit_distance" format="tabular" from_work_dir="stats_outputs_edit_distance.tsv"/>
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
51 <data name="per_umi" format="tabular" from_work_dir="stats_outputs_per_umi.tsv"/>
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
52 <data name="per_umi_per_position" format="tabular" from_work_dir="stats_outputs_per_umi_per_position.tsv"/>
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
53 </collection>
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
54 <expand macro="log_output_macro"/>
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
55 </outputs>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
56 <tests>
11
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
57 <test expect_num_outputs="1">
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
58 <param name="input" value="group_in1.sam" ftype="sam" />
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
59 <section name="advanced">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
60 <param name="random_seed" value="0" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
61 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
62 <conditional name="bc">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
63 <param name="extract_umi_method" value="read_id" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
64 </conditional>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
65 <section name="umi">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
66 <param name="method" value="unique" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
67 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
68 <output name="output" file="dedup_out1.bam" ftype="bam" lines_diff="2"/><!--lines_diff won't be needed in later versions since umitools use \-\-no-PG internally -->
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
69 </test>
11
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
70 <test expect_num_outputs="1">
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
71 <param name="input" value="group_in2.sam" ftype="sam" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
72 <section name="advanced">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
73 <param name="random_seed" value="0" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
74 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
75 <conditional name="bc">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
76 <param name="extract_umi_method" value="read_id" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
77 </conditional>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
78 <section name="sambam">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
79 <param name="paired" value="true" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
80 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
81 <section name="umi">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
82 <param name="method" value="unique" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
83 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
84 <output name="output" file="dedup_out2.bam" ftype="bam" lines_diff="2" />
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
85 </test>
11
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
86 <test expect_num_outputs="1">
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
87 <param name="input" value="group_in3.bam" ftype="bam" />
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
88 <section name="advanced">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
89 <param name="random_seed" value="0" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
90 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
91 <conditional name="bc">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
92 <param name="extract_umi_method" value="read_id" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
93 </conditional>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
94 <section name="umi">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
95 <param name="method" value="unique" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
96 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
97 <output name="output" file="dedup_out3.bam" ftype="bam" lines_diff="2" />
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
98 </test>
11
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
99 <test expect_num_outputs="1">
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
100 <param name="input" value="group_in4.bam" ftype="bam" />
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
101 <section name="advanced">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
102 <param name="random_seed" value="0" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
103 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
104 <conditional name="bc">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
105 <param name="extract_umi_method" value="tag" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
106 <param name="umi_tag" value="BX" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
107 </conditional>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
108 <section name="umi">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
109 <param name="method" value="unique" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
110 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
111 <output name="output" file="dedup_out4.bam" ftype="bam" lines_diff="2"/>
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
112 </test>
11
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
113 <test expect_num_outputs="1">
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
114 <param name="input" value="group_in5.bam" ftype="bam" />
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
115 <section name="advanced">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
116 <param name="random_seed" value="0" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
117 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
118 <conditional name="bc">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
119 <param name="extract_umi_method" value="read_id" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
120 <param name="umi_tag" value="BX" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
121 </conditional>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
122 <section name="umi">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
123 <param name="method" value="cluster" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
124 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
125 <output name="output" file="dedup_out5.bam" ftype="bam" lines_diff="2"/>
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
126 </test>
11
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
127 <test expect_num_outputs="1">
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
128 <param name="input" value="group_in6.bam" ftype="bam" />
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
129 <section name="advanced">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
130 <param name="random_seed" value="0" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
131 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
132 <conditional name="bc">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
133 <param name="extract_umi_method" value="read_id" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
134 <param name="umi_tag" value="BX" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
135 </conditional>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
136 <section name="umi">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
137 <param name="method" value="directional" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
138 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
139 <output name="output" file="dedup_out6.bam" ftype="bam" lines_diff="2"/>
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
140 </test>
11
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
141 <test expect_num_outputs="5">
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
142 <param name="input" value="group_in6.bam" ftype="bam" />
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
143 <section name="advanced">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
144 <param name="random_seed" value="0" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
145 </section>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
146 <conditional name="bc">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
147 <param name="extract_umi_method" value="read_id" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
148 <param name="umi_tag" value="BX" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
149 </conditional>
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
150 <section name="umi">
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
151 <param name="method" value="directional" />
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
152 </section>
11
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
153 <param name="output_stats_bool" value="true"/>
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
154 <output name="output" file="dedup_out6.bam" ftype="bam" lines_diff="2"/>
11
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
155 <output_collection name="output_stats">
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
156 <element name="edit_distance" file="stats_outputs_edit_distance.tsv" />
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
157 <element name="per_umi" file="stats_outputs_per_umi.tsv" />
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
158 <element name="per_umi_per_position" file="stats_outputs_per_umi_per_position.tsv" />
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
159 </output_collection>
7fa28eb10fed "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
160 </test>
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
161 </tests>
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
162 <help><![CDATA[
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
163 umi_tools dedup - Deduplicate reads based on their UMI and mapping coordinates
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
164 ==============================================================================
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
165
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
166 Purpose
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
167 -------
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
168
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
169 The purpose of this command is to deduplicate BAM files based on the first
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
170 mapping co-ordinate and the UMI attached to the read.
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
171
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
172 @BARCODE_HELP@
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
173
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
174 @UMI_GROUPING_HELP@
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
175
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
176 Selecting the representative read
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
177 ---------------------------------
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
178 For every group of duplicate reads, a single representative read is
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
179 retained.The following criteria are applied to select the read that
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
180 will be retained from a group of duplicated reads:
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
181
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
182 1. The read with the lowest number of mapping coordinates (see
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
183 ``--multimapping-detection-method`` option)
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
184
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
185 2. The read with the highest mapping quality. Note that this is not
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
186 the read sequencing quality and that if two reads have the same
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
187 mapping quality then one will be picked at random regardless of the
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
188 read quality.
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
189
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
190 Otherwise a read is chosen at random.
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
191
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
192 Optional statistics output
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
193 --------------------------
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
194
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
195 One can use the edit distance between UMIs at the same position as an
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
196 quality control for the deduplication process by comparing with
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
197 a null expectation of random sampling. For the random sampling, the
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
198 observed frequency of UMIs is used to more reasonably model the null
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
199 expectation.
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
200
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
201 Use the option ``Output UMI related statistics files?`` generate stats outfiles:
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
202
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
203 edit_distance
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
204 Reports the (binned) average edit distance between the UMIs at each
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
205 position. Positions with a single UMI are reported seperately. The
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
206 edit distances are reported pre- and post-deduplication alongside
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
207 the null expectation from random sampling of UMIs from the UMIs
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
208 observed across all positions. Note that separate null
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
209 distributions are reported since the null depends on the observed
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
210 frequency of each UMI which is different pre- and
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
211 post-deduplication. The post-duplication values should be closer to
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
212 their respective null than the pre-deduplication vs null comparison
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
213
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
214 In addition, this option will trigger reporting of further summary
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
215 statistics for the UMIs which may be informative for selecting the
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
216 optimal deduplication method or debugging.
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
217
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
218 Each unique UMI sequence may be observed [0-many] times at multiple
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
219 positions in the BAM. The following files report the distribution for
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
220 the frequencies of each UMI.
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
221
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
222 per_umi_per_position
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
223 The `_stats_per_umi_per_position.tsv` file simply tabulates the
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
224 counts for unique combinations of UMI and position. E.g if prior to
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
225 deduplication, we have two positions in the BAM (POSa, POSb), at
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
226 POSa we have observed 2*UMIa, 1*UMIb and at POSb: 1*UMIc, 3*UMId,
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
227 then the stats file is populated thus:
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
228
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
229 ====== =============
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
230 counts instances_pre
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
231 ------ -------------
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
232 1 2
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
233 2 1
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
234 3 1
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
235 ====== =============
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
236
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
237 If post deduplication, UMIb is grouped with UMIa such that POSa:
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
238 3*UMIa, then the `instances_post` column is populated thus:
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
239
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
240 ====== ============= ==============
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
241 counts instances_pre instances_post
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
242 ------ ------------- --------------
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
243 1 2 1
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
244 2 1 0
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
245 3 1 2
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
246 ====== ============= ==============
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
247
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
248 per_umi_per
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
249 The `_stats_per_umi_per.tsv` table provides UMI-level summary
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
250 statistics. Keeping in mind that each unique UMI sequence can be
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
251 observed at [0-many] times across multiple positions in the BAM,
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
252
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
253 :times_observed: How many positions the UMI was observed at
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
254 :total_counts: The total number of times the UMI was observed across all positions
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
255 :median_counts: The median for the distribution of how often the UMI was observed at each position (excluding zeros)
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
256
12
4098ab380097 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
iuc
parents: 11
diff changeset
257 Hence, whenever times_observed=1, total_counts==median_counts.]]></help>
0
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
258 <expand macro="citations" />
a6477bafd522 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
259 </tool>