annotate tools/coverage_stats/coverage_stats.xml @ 4:ed501717f6cd draft default tip

"Update all the pico_galaxy tools on main Tool Shed"
author peterjc
date Fri, 16 Apr 2021 22:34:30 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
1 <tool id="coverage_stats" name="BAM coverage statistics" version="0.1.0">
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
2 <description>using samtools idxstats and depth</description>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
3 <requirements>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
4 <requirement type="package" version="1.4.1">samtools</requirement>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
5 </requirements>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
6 <version_command>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
7 python $__tool_directory__/coverage_stats.py --version
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
8 </version_command>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
9 <command detect_errors="aggressive">
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
10 python $__tool_directory__/coverage_stats.py
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
11 -b '$input_bam'
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
12 -i '${input_bam.metadata.bam_index}'
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
13 -o '$out_tabular'
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
14 -d '$max_depth'
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
15 </command>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
16 <inputs>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
17 <param name="input_bam" type="data" format="bam" label="Input BAM file" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
18 <param name="max_depth" type="integer" min="0" max="10000000" label="Max depth" value="8000" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
19 </inputs>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
20 <outputs>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
21 <data name="out_tabular" format="tabular" label="$input_bam.name (coverage stats)" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
22 </outputs>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
23 <tests>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
24 <test>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
25 <param name="input_bam" value="ex1.bam" ftype="bam" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
26 <param name="max_depth" value="123" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
27 <output name="out_tabular" file="ex1.coverage_stats.tabular" ftype="tabular" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
28 </test>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
29 <test>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
30 <param name="input_bam" value="ex1.bam" ftype="bam" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
31 <param name="max_depth" value="50" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
32 <output name="out_tabular" file="ex1.coverage_stats.md50.tabular" ftype="tabular" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
33 </test>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
34 <test>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
35 <param name="input_bam" value="coverage_test.bam" ftype="bam" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
36 <param name="max_depth" value="123" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
37 <output name="out_tabular" file="coverage_test.coverage_stats.tabular" ftype="tabular" />
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
38 </test>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
39 </tests>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
40 <help>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
41 **What it does**
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
42
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
43 This tool runs the commands ``samtools idxstats`` and ``samtools depth`` from the
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
44 SAMtools toolkit, and parses their output to produce a consise summary of the
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
45 coverage information for each reference sequence.
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
46
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
47 Input is a sorted and indexed BAM file, the output is tabular. The first four
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
48 columns match the output from ``samtools idxstats``, the additional columns are
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
49 calculated from the ``samtools depth`` output. The final row with a star as the
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
50 reference identifier represents unmapped reads, and will have zeros in every
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
51 column except columns one and four.
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
52
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
53 ====== =================================================================================
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
54 Column Description
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
55 ------ ---------------------------------------------------------------------------------
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
56 1 Reference sequence identifier
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
57 2 Reference sequence length
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
58 3 Number of mapped reads
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
59 4 Number of placed but unmapped reads (typically unmapped partners of mapped reads)
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
60 5 Minimum coverage (per base of reference)
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
61 6 Maximum coverage (per base of reference)
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
62 7 Mean coverage (given to 2 dp)
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
63 ====== =================================================================================
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
64
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
65 Example output from a *de novo* assembly:
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
66
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
67 ========== ====== ====== ====== ======= ======= ========
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
68 identifier length mapped placed min_cov max_cov mean_cov
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
69 ---------- ------ ------ ------ ------- ------- --------
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
70 contig_1 833604 436112 0 1 157 71.95
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
71 contig_2 14820 9954 0 1 152 91.27
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
72 contig_3 272099 142958 0 1 150 72.31
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
73 contig_4 135519 73288 0 1 149 75.23
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
74 contig_5 91245 46759 0 1 157 70.92
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
75 contig_6 175604 95744 0 1 146 75.99
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
76 contig_7 90586 48158 0 1 151 72.93
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
77 contig_9 234347 126458 0 1 159 75.40
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
78 contig_10 121515 60211 0 1 152 68.12
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
79 ... ... ... ... ... ... ...
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
80 contig_604 712 85 0 1 49 21.97
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
81 \* 0 0 950320 0 0 0.00
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
82 ========== ====== ====== ====== ======= ======= ========
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
83
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
84 In this example there were 604 contigs, each with one line in the output table,
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
85 plus the final row (labelled with an asterisk) representing 950320 unmapped reads.
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
86 In this BAM file, the fourth column was otherwise zero.
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
87
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
88 .. class:: warningmark
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
89
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
90 **Note**. If using this on a mapping BAM file, beware that the coverage counting is
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
91 done per base of the reference. This means if your reference has any extra bases
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
92 compared to the reads being mapped, those bases will be skipped by CIGAR D operators
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
93 and these "extra" bases can have an extremely low coverage, giving a potentially
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
94 misleading ``min_cov`` values. A sliding window coverage may be more appropriate.
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
95
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
96 **Note**. Up until samtools 1.2, there was an internal hard limit of 8000 for the
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
97 pileup routine, meaning the reported coverage from ``samtools depth`` would show
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
98 maximum coverage depths *around* 8000. This is now a run time option.
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
99
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
100
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
101 **Citation**
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
102
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
103 If you use this Galaxy tool in work leading to a scientific publication please
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
104 cite:
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
105
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
106 Heng Li et al (2009). The Sequence Alignment/Map format and SAMtools.
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
107 Bioinformatics 25(16), 2078-9.
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
108 https://doi.org/10.1093/bioinformatics/btp352
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
109
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
110 Peter J.A. Cock (2013), BAM coverage statistics using samtools idxstats and depth.
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
111 http://toolshed.g2.bx.psu.edu/view/peterjc/coverage_stats
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
112
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
113 This wrapper is available to install into other Galaxy Instances via the Galaxy
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
114 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/coverage_stats
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
115 </help>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
116 <citations>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
117 <citation type="doi">10.1093/bioinformatics/btp352</citation>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
118 </citations>
ed501717f6cd "Update all the pico_galaxy tools on main Tool Shed"
peterjc
parents:
diff changeset
119 </tool>