changeset 7:77114c36b8ab draft default tip

planemo upload
author marpiech
date Mon, 29 Aug 2016 07:28:10 -0400
parents 97fb58d6c0cb
children
files README.txt bam_to_bigwig.py bam_to_bigwig.xml sample.xml test-data/bam_to_bigwig_test.bam test-data/bam_to_bigwig_test.bigwig tool_dependencies.xml
diffstat 7 files changed, 236 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt	Mon Aug 29 07:28:10 2016 -0400
@@ -0,0 +1,48 @@
+Convert a BAM file into a BigWig coverage file. This can be used directly from
+Galaxy for display at UCSC. The advantage over standard Wiggle format is that
+the data is stored in a compressed format and can be retrieved by genome
+region. This allows you to view regions of arbitrarily large Wiggle file data
+at UCSC while avoiding the upload costs.
+
+History
+-------
+
+v0.2.0 add a sort step after genomeCoverageBed which is required in some
+instances otherwise bedGraphToBigWig will complain. This version also uses
+Galaxy's dependency mechanism, added some tests, and updated some formatting.
+By Lance Parsons.
+
+v0.1.1 passes the forgotten split argument and moves to using the new
+sub-command enabled bedtools. Thanks to David Leader.
+
+As of v0.1.0, the Galaxy tools uses a revised bam_to_bigwig.py script using
+genomeCoverageBed and bedGraphToBigWig - this approach allows gaps/skpis to
+be excluded from the coverage calculation, which is important for RNA-Seq.
+
+Until v0.0.2, this Galaxy tool used the bam_to_wiggle.py script from
+https://github.com/chapmanb/bcbb/blob/master/nextgen/scripts/bam_to_wiggle.py
+which internally used pysam (and thus samtools) and wigToBigWig from UCSC.
+
+Requirements
+------------
+
+If you are installing this tool manually, place the Python script in the
+same directory as the XML configuration file, or provide a soft link to it.
+Ensure the following command line tools are on the system path:
+
+pysam - Python interface to samtools (http://code.google.com/p/pysam/)
+genomeCoverageBed - part of BedTools (http://code.google.com/p/bedtools/)
+bedGraphToBigWig  - from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
+
+Credits
+-------
+
+Original script by Brad Chapman, revisions from Peter Cock including the
+switch to using genomeCoverageBed and bedGraphToBigWig based on the work
+of Lance Parsons.
+
+License
+------
+
+The code is freely available under the MIT license:
+http://www.opensource.org/licenses/mit-license.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bam_to_bigwig.py	Mon Aug 29 07:28:10 2016 -0400
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+"""Convert BAM files to BigWig file format in a specified region.
+
+Original version copyright Brad Chapman with revisions from Peter Cock
+and ideas from Lance Parsons
+
+Usage:
+    bam_to_bigwig.py <BAM file> [--outfile=<output file name>] [--split]
+
+The --split argument is passed to bedtools genomecov
+
+The script requires:
+    pysam (http://code.google.com/p/pysam/)
+    bedtools genomecov (http://code.google.com/p/bedtools/)
+    bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
+"""
+import os
+import sys
+import subprocess
+import tempfile
+from optparse import OptionParser
+from contextlib import contextmanager, closing
+
+import pysam
+
+
+def main(bam_file, outfile=None, split=False):
+    config = {"program": {"ucsc_bedGraphToBigWig": ["bedGraphToBigWig"],
+                          "bedtools_genomeCoverageBed":
+                          ["bedtools", "genomecov"]}}
+    if outfile is None:
+        outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
+    if os.path.abspath(bam_file) == os.path.abspath(outfile):
+        sys.stderr.write("Bad arguments, "
+                         "input and output files are the same.\n")
+        sys.exit(1)
+    if os.path.exists(outfile) and os.path.getsize(outfile) > 0:
+        sys.stderr.write("Warning, output file already exists.\n")
+
+    sizes = get_sizes(bam_file, config)
+    print "Have %i references" % len(sizes)
+    if not sizes:
+        sys.stderr.write("Problem reading BAM header.\n")
+        sys.exit(1)
+
+    # Use a temp file to avoid any possiblity of not having write permission
+    temp_handle = tempfile.NamedTemporaryFile(delete=False)
+    temp_file = temp_handle.name
+    with closing(temp_handle):
+        print "Calculating coverage..."
+        convert_to_graph(bam_file, split, config, temp_handle)
+    try:
+        print("Converting %i MB graph file to bigwig..." %
+              (os.path.getsize(temp_file) // (1024 * 1024)))
+        # Can't pipe this as stdin due to converter design,
+        # https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html
+        convert_to_bigwig(temp_file, sizes, config, outfile)
+    finally:
+        if os.path.isfile(temp_file):
+            os.remove(temp_file)
+    print "Done"
+
+
+@contextmanager
+def indexed_bam(bam_file, config):
+    if not os.path.exists(bam_file + ".bai"):
+        pysam.index(bam_file)
+    sam_reader = pysam.Samfile(bam_file, "rb")
+    yield sam_reader
+    sam_reader.close()
+
+
+def get_sizes(bam_file, config):
+    with indexed_bam(bam_file, config) as work_bam:
+        sizes = zip(work_bam.references, work_bam.lengths)
+    return sizes
+
+
+def convert_to_graph(bam_file, split, config, out_handle):
+    cl = config["program"]["bedtools_genomeCoverageBed"] + \
+        ["-ibam", bam_file, "-bg"]
+    if split:
+        cl.append("-split")
+    new_env = os.environ.copy()
+    new_env['LC_COLLATE'] = 'C'
+    p1 = subprocess.Popen(cl, stdout=subprocess.PIPE)
+    p2 = subprocess.Popen(["sort", "-k1,1", "-k2,2n"],
+                          env=new_env,
+                          stdin=p1.stdout,
+                          stdout=out_handle)
+    p1.stdout.close()
+    p2.communicate()
+
+
+def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file):
+    # This will be fine under Galaxy, but could use temp folder?
+    size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0])
+    with open(size_file, "w") as out_handle:
+        for chrom, size in chr_sizes:
+            out_handle.write("%s\t%s\n" % (chrom, size))
+    try:
+        cl = config["program"]["ucsc_bedGraphToBigWig"] + \
+            [bedgraph_file, size_file, bw_file]
+        subprocess.check_call(cl)
+    finally:
+        os.remove(size_file)
+    return bw_file
+
+
+if __name__ == "__main__":
+    parser = OptionParser()
+    parser.add_option("-o", "--outfile", dest="outfile")
+    parser.add_option("-s", "--split", action="store_true", dest="split")
+    (options, args) = parser.parse_args()
+    if len(args) not in [1, 2]:
+        print "Incorrect arguments"
+        print __doc__
+        sys.exit()
+    kwargs = dict(
+        outfile=options.outfile,
+        split=options.split)
+    main(*args, **kwargs)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bam_to_bigwig.xml	Mon Aug 29 07:28:10 2016 -0400
@@ -0,0 +1,58 @@
+<tool id="bam_to_bigwig" name="BAM to BigWig" version="0.2.0">
+
+  <description>Calculates coverage from a BAM alignment file</description>
+
+  <requirements>
+    <requirement type="package" version="0.8.3">pysam</requirement>
+    <requirement type="package" version="2.24">bedtools</requirement>
+    <requirement type="package" version="312">ucsc_tools</requirement>
+  </requirements>
+
+  <command detect_errors="aggressive" interpreter="python"><![CDATA[
+    bam_to_bigwig.py $align --outfile=$out --split
+  ]]></command>
+
+  <inputs>
+    <param format="bam" name="align" type="data" label="BAM alignment file"/>
+  </inputs>
+
+  <outputs>
+    <data format="bigwig" name="out" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="align" value="bam_to_bigwig_test.bam"/>
+      <output name="out" file="bam_to_bigwig_test.bigwig"/>
+    </test>
+  </tests>
+
+<help><![CDATA[
+**What it does**
+
+Creates a coverage file in BigWig format, given a BAM alignment file.
+
+Gaps or skips (CIGAR D or N operators) are not counted towards the coverage
+calculation, which is important when mapping RNA Seq reads to genes with
+introns.
+
+**Input**
+
+A BAM alignment file. This needs to have the genome database build used in
+alignment annotated. If your file has '?' for the database build, click on the
+pencil icon to edit the alignment attributes, and specify the organism used to
+align against.
+
+**Output**
+
+BigWig files can be loaded directly from Galaxy into the UCSC browser. They can
+be loaded incrementally by UCSC, so a single file can be used to represent the
+entire genome without having to upload the entire thing as a custom track.
+
+]]></help>
+
+  <citations>
+    <citation type="doi">10.1093/bioinformatics/btp352</citation>
+    <citation type="doi">10.1093/bioinformatics/btq033</citation>
+  </citations>
+</tool>
--- a/sample.xml	Mon Aug 29 07:21:05 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-<tool id="sam_filter_1" name="SAM Filter">
-	<command interpreter="python">
-		
-	</command>
-	<inputs>
-		
-	</inputs>
-	<outputs>
-	
-	</outputs>
-</tool>
Binary file test-data/bam_to_bigwig_test.bam has changed
Binary file test-data/bam_to_bigwig_test.bigwig has changed
--- a/tool_dependencies.xml	Mon Aug 29 07:21:05 2016 -0400
+++ b/tool_dependencies.xml	Mon Aug 29 07:28:10 2016 -0400
@@ -1,6 +1,12 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="abyss" version="1.9.0">
-        <repository changeset_revision="ded58f1da91c" name="package_abyss_1_9_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <package name="pysam" version="0.8.3">
+        <repository changeset_revision="08db58be052a" name="package_python_2_7_pysam_0_8_3" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="bedtools" version="2.24">
+        <repository changeset_revision="39b86c1e267d" name="package_bedtools_2_24" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="ucsc_tools" version="312">
+        <repository changeset_revision="2d6bafd63401" name="package_ucsc_tools_312" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>