annotate fgbio_group_reads_by_umi.xml @ 0:900cd2865768 draft

"planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
author jjohnson
date Sun, 21 Feb 2021 23:40:34 +0000
parents
children 568816308859
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
1 <tool id="fgbio_group_reads_by_umi" name="fgbio GroupReadsByUmi" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
2 <description>Groups reads together that appear to have come from the same original molecule</description>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
3 <macros>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
4 <import>macros.xml</import>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
5 </macros>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
6 <expand macro="requirements" />
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
7 <version_command>fgbio --version</version_command>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
8 <command detect_errors="exit_code"><![CDATA[
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
9 fgbio GroupReadsByUmi
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
10 --input '$input'
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
11 --strategy=$strategy
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
12 --output '$output'
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
13 ## optional settings
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
14 #if $optional.edits:
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
15 --edits $optional.edits
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
16 #end if
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
17 #if $optional.min_umi_length
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
18 --min-umi-length $optional.min_umi_length
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
19 #end if
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
20 #if $optional.min_map_q:
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
21 --min-map-q $optional.min_map_q
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
22 #end if
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
23 #if $optional.raw_tag
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
24 --raw-tag=$optional.raw_tag
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
25 #end if
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
26 #if $optional.assign_tag
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
27 --assign-tag=$optional.assign_tag
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
28 #end if
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
29 #if $optional.include_non_pf_reads
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
30 --include-non-pf-reads=$optional.include_non_pf_reads
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
31 #end if
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
32 #if $output_counts
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
33 --family-size-histogram='$family_size_histogram'
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
34 #end if
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
35 ]]></command>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
36 <inputs>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
37 <param name="input" type="data" format="bam" label="Fastq files corresponding to each sequencing read"/>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
38 <param argument="--strategy" type="select" label="UMI assignment strategy">
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
39 <option value="identity">identity</option>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
40 <option value="edit">edit</option>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
41 <option value="adjacency">adjacency</option>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
42 <option value="paired">paired</option>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
43 </param>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
44 <section name="optional" title="Optional Settings" expanded="false">
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
45 <param argument="--edits" type="integer" value="" optional="true" label="Allowable number of edits between UMIs"
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
46 help="Control the matching of non-identical UMIs. Default: 1"/>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
47 <param argument="--min-umi-length" type="integer" value="" optional="true" label="The minimum UMI length" >
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
48 <help>If not specified then all UMIs must have the same length,
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
49 otherwise discard reads with UMIs shorter than this length
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
50 and allow for differing UMI lengths.
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
51 </help>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
52 </param>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
53 <param argument="--min-map-q" type="integer" value="" optional="true" label="Minimum mapping quality" help="Default: 30"/>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
54 <param argument="--raw-tag" type="text" value="" label="The tag containing the raw UMI" help="Default: RX">
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
55 <expand macro="sam_tag_validator"/>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
56 </param>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
57 <param argument="--assign-tag" type="text" value="" label="The output tag for UMI grouping" help="Default: MI">
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
58 <expand macro="sam_tag_validator"/>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
59 </param>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
60 <param argument="--include-non-pf-reads" type="select" value="true" optional="true" label="Include non-PF reads">
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
61 <option value="true">Yes</option>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
62 <option value="flse">No</option>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
63 </param>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
64 </section>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
65 <param argument="output_counts" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output tag family size counts"/>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
66
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
67 </inputs>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
68 <outputs>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
69 <data name="family_size_histogram" format="tabular" >
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
70 <filter>output_counts == True</filter>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
71 </data>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
72 <data name="output" format="bam" />
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
73 </outputs>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
74 <help><![CDATA[
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
75 **fgbio GroupReadsByUmi**
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
76
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
77 Groups reads together that appear to have come from the same original molecule. Reads are grouped by template, and then templates are sorted by the 5’ mapping positions of the reads from the template, used from earliest mapping position to latest. Reads that have the same end positions are then sub-grouped by UMI sequence.
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
78
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
79 Accepts reads in any order (including unsorted) and outputs reads sorted by:
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
80
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
81 - The lower genome coordinate of the two outer ends of the templates
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
82 - The sequencing library
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
83 - The assigned UMI tag
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
84 - Read Name
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
85
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
86 Reads are aggressively filtered out so that only high quality reads/mappings are taken forward. Single-end reads must have mapping quality >= min-map-q. Paired-end reads must have both reads mapped to the same chromosome with both reads having mapping quality >= min-mapq. (Note: the MQ tag is required on reads with mapped mates).
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
87
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
88 This is done with the expectation that the next step is building consensus reads, where it is undesirable to either:
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
89
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
90 - Assign reads together that are really from different source molecules
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
91 - Build two groups from reads that are really from the same molecule
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
92 - Errors in mapping reads could lead to both and therefore are minimized.
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
93
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
94 Grouping of UMIs is performed by one of three strategies:
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
95
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
96 - identity: only reads with identical UMI sequences are grouped together. This strategy may be useful for evaluating data, but should generally be avoided as it will generate multiple UMI groups per original molecule in the presence of errors.
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
97 - edit: reads are clustered into groups such that each read within a group has at least one other read in the group with <= edits differences and there are inter-group pairings with <= edits differences. Effective when there are small numbers of reads per UMI, but breaks down at very high coverage of UMIs.
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
98 - adjacency: a version of the directed adjacency method described in umi_tools that allows for errors between UMIs but only when there is a count gradient.
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
99 - paired: similar to adjacency but for methods that produce template with a pair of UMIs such that a read with A-B is related to but not identical to a read with B-A. Expects the pair of UMIs to be stored in a single tag, separated by a hyphen (e.g. ACGT-CCGG). The molecular IDs produced have more structure than for single UMI strategies, and are of the form {base}/{AB|BA}. E.g. two UMI pairs would be mapped as follows AAAA-GGGG -> 1/AB, GGGG-AAAA -> 1/BA.
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
100
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
101 edit, adjacency and paired make use of the --edits parameter to control the matching of non-identical UMIs.
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
102
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
103 By default, all UMIs must be the same length. If --min-umi-length=len is specified then reads that have a UMI shorter than len will be discarded, and when comparing UMIs of different lengths, the first len bases will be compared, where len is the length of the shortest UMI. The UMI length is the number of [ACGT] bases in the UMI (i.e. does not count dashes and other non-ACGT characters). This option is not implemented for reads with UMI pairs (i.e. using the paired assigner).
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
104
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
105 http://fulcrumgenomics.github.io/fgbio/tools/latest/GroupReadsByUmi.html
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
106 ]]></help>
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
107 <expand macro="citations" />
900cd2865768 "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
108 </tool>