annotate fgbio_call_molecular_consensus_reads.xml @ 0:0ad5327b80cc draft

"planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
author jjohnson
date Sun, 21 Feb 2021 23:40:53 +0000
parents
children b7795c2f0b81
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
1 <tool id="fgbio_call_molecular_consensus_reads" name="fgbio CallMolecularConsensusReads" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
2 <description>Calls consensus sequences from reads with the same unique molecular tag</description>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
3 <macros>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
4 <import>macros.xml</import>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
5 </macros>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
6 <expand macro="requirements" />
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
7 <version_command>fgbio --version</version_command>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
8 <command detect_errors="exit_code"><![CDATA[
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
9 fgbio CallMolecularConsensusReads
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
10 --input '$input'
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
11 --min-reads=$min_reads
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
12 --output '$output'
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
13 ## optional settings
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
14 #if $filter_options.max_reads:
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
15 --max-reads=$filter_options.max_reads
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
16 #end if
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
17 #if $filter_options.error_rate_pre_umi
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
18 --error-rate-pre-umi=$filter_options.error_rate_pre_umi
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
19 #end if
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
20 #if $filter_options.error_rate_post_umi
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
21 --error-rate-post-umi=$filter_options.error_rate_post_umi
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
22 #end if
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
23 #if $filter_options.min_input_base_quality
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
24 --min-input-base-quality=$filter_options.min_input_base_quality
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
25 #end if
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
26
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
27 #if $bam_options.read-name-prefix
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
28 --read-name-prefix='$bam_options.read-name-prefix'
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
29 #end if
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
30 #if $bam_options.tag
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
31 --tag=$bam_options.tag
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
32 #end if
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
33 #if $bam_options.read_group_id
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
34 --read-group-id=$bam_options.read_group_id
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
35 #end if
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
36 #if $bam_options.output_per_base_tags
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
37 --output-per-base-tags=$bam_options.output_per_base_tags
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
38 #end if
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
39 #if $output_rejects
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
40 --rejects='$rejects'
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
41 #end if
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
42 ]]></command>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
43 <inputs>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
44 <param name="input" type="data" format="bam" label="Fastq files corresponding to each sequencing read"/>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
45 <param argument="--min-reads" type="integer" value="" min="1" label="Minimum number of reads to produce a consensus base" help="Default: 1"/>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
46
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
47 <section name="filter_options" title="Optional Filter Settings" expanded="false">
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
48 <param argument="--max-reads" type="integer" value="" min="1" label="Maximum number of reads to to use when building a consensus"
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
49 help="If more than this many reads are present in a tag family, the family is randomly downsampled to exactly max-reads reads."/>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
50 <param argument="--error-rate-pre-umi" type="integer" value="" min="1" label="Phred-scaled error rate for an error prior to the UMIs being integrated" help="Default: 45"/>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
51 <param argument="--error-rate-post-umi" type="integer" value="" min="1" label="Phred-scaled error rate for an error post the UMIs being integrated" help="Default: 40"/>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
52 <param argument="--min-input-base-quality" type="integer" value="" min="1" label="Ignore bases in raw reads that have Q below this value" help="Default: 10"/>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
53 </section>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
54
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
55 <section name="bam_options" title="BAM Settings" expanded="false">
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
56 <param argument="--read-name-prefix" type="text" value="" label="Prefix for all consensus read names"/>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
57 <param argument="--tag" type="text" value="" label="The SAM attribute with the unique molecule tag" help="Default: MI">
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
58 <expand macro="sam_tag_validator"/>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
59 </param>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
60 <param argument="--read-group-id" type="text" value="" optional="true" label="The new read group ID for all the consensus reads" help="Default: A"/>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
61 <param argument="--output-per-base-tags" type="select" value="" optional="true" label="Produce tags on consensus reads that contain per-base information">
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
62 <option value="true">Yes</option>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
63 <option value="flse">Yes</option>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
64 </param>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
65 </section>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
66 <expand macro="sam_sort_order" />
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
67 <param argument="output_rejects" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output tag family size counts"/>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
68 </inputs>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
69 <outputs>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
70 <data name="rejects" format="bam" >
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
71 <filter>output_rejects == True</filter>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
72 </data>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
73 <data name="output" format="bam" />
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
74 </outputs>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
75 <help><![CDATA[
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
76 **fgbio CallMolecularConsensusReads**
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
77
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
78 Calls consensus sequences from reads with the same unique molecular tag.
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
79
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
80 Reads with the same unique molecular tag are examined base-by-base to assess the likelihood of each base in the source molecule. The likelihood model is as follows:
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
81
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
82 1. First, the base qualities are adjusted. The base qualities are assumed to represent the probability of a sequencing error (i.e. the sequencer observed the wrong base present on the cluster/flowcell/well). The base quality scores are converted to probabilities incorporating a probability representing the chance of an error from the time the unique molecular tags were integrated to just prior to sequencing. The resulting probability is the error rate of all processes from right after integrating the molecular tag through to the end of sequencing.
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
83 2. Next, a consensus sequence is called for all reads with the same unique molecular tag base-by-base. For a given base position in the reads, the likelihoods that an A, C, G, or T is the base for the underlying source molecule respectively are computed by multiplying the likelihood of each read observing the base position being considered. The probability of error (from 1.) is used when the observed base does not match the hypothesized base for the underlying source molecule, while one minus that probability is used otherwise. The computed likelihoods are normalized by dividing them by the sum of all four likelihoods to produce a posterior probability, namely the probability that the source molecule was an A, C, G, or T from just after integrating molecular tag through to sequencing, given the observations. The base with the maximum posterior probability as the consensus call, and the posterior probability is used as its raw base quality.
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
84 3. Finally, the consensus raw base quality is modified by incorporating the probability of an error prior to integrating the unique molecular tags. Therefore, the probability used for the final consensus base quality is the posterior probability of the source molecule having the consensus base given the observed reads with the same molecular tag, all the way from sample extraction and through sample and library preparation, through preparing the library for sequencing (e.g. amplification, target selection), and finally, through sequencing.
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
85 This tool assumes that reads with the same tag are grouped together (consecutive in the file). Also, this tool calls each end of a pair independently, and does not jointly call bases that overlap within a pair. Insertion or deletion errors in the reads are not considered in the consensus model.
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
86
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
87 Particular attention should be paid to setting the --min-reads parameter as this can have a dramatic effect on both results and runtime. For libraries with low duplication rates (e.g. 100-300X exomes libraries) in which it is desirable to retain singleton reads while making consensus reads from sets of duplicates, --min-reads=1 is appropriate. For libraries with high duplication rates where it is desirable to only produce consensus reads supported by 2+ reads to allow error correction, --min-reads=2 or higher is appropriate. After generation, consensus reads can be further filtered using the FilterConsensusReads tool. As such it is always safe to run with --min-reads=1 and filter later, but filtering at this step can improve performance significantly.
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
88
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
89 Consensus reads have a number of additional optional tags set in the resulting BAM file. The tags break down into those that are single-valued per read:
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
90
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
91 - consensus depth [cD] (int) : the maximum depth of raw reads at any point in the consensus read
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
92 - consensus min depth [cM] (int) : the minimum depth of raw reads at any point in the consensus read
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
93 - consensus error rate [cE] (float): the fraction of bases in raw reads disagreeing with the final consensus calls
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
94
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
95 And those that have a value per base:
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
96
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
97 - consensus depth [cd] (short[]): the count of bases contributing to the consensus read at each position
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
98 - consensus errors [ce] (short[]): the number of bases from raw reads disagreeing with the final consensus base
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
99
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
100 The per base depths and errors are both capped at 32,767. In all cases no-calls (Ns) and bases below the --min-input-base-quality are not counted in tag value calculations.
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
101
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
102
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
103
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
104 http://fulcrumgenomics.github.io/fgbio/tools/latest/CallMolecularConsensusReads.html
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
105 ]]></help>
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
106 <expand macro="citations" />
0ad5327b80cc "planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
jjohnson
parents:
diff changeset
107 </tool>