# HG changeset patch # User marpiech # Date 1472470090 14400 # Node ID 77114c36b8abf4ad0c595b878d0f0a591306752c # Parent 97fb58d6c0cb4fbccc048cfbf48855d358d03ab4 planemo upload diff -r 97fb58d6c0cb -r 77114c36b8ab README.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.txt Mon Aug 29 07:28:10 2016 -0400 @@ -0,0 +1,48 @@ +Convert a BAM file into a BigWig coverage file. This can be used directly from +Galaxy for display at UCSC. The advantage over standard Wiggle format is that +the data is stored in a compressed format and can be retrieved by genome +region. This allows you to view regions of arbitrarily large Wiggle file data +at UCSC while avoiding the upload costs. + +History +------- + +v0.2.0 add a sort step after genomeCoverageBed which is required in some +instances otherwise bedGraphToBigWig will complain. This version also uses +Galaxy's dependency mechanism, added some tests, and updated some formatting. +By Lance Parsons. + +v0.1.1 passes the forgotten split argument and moves to using the new +sub-command enabled bedtools. Thanks to David Leader. + +As of v0.1.0, the Galaxy tools uses a revised bam_to_bigwig.py script using +genomeCoverageBed and bedGraphToBigWig - this approach allows gaps/skpis to +be excluded from the coverage calculation, which is important for RNA-Seq. + +Until v0.0.2, this Galaxy tool used the bam_to_wiggle.py script from +https://github.com/chapmanb/bcbb/blob/master/nextgen/scripts/bam_to_wiggle.py +which internally used pysam (and thus samtools) and wigToBigWig from UCSC. + +Requirements +------------ + +If you are installing this tool manually, place the Python script in the +same directory as the XML configuration file, or provide a soft link to it. +Ensure the following command line tools are on the system path: + +pysam - Python interface to samtools (http://code.google.com/p/pysam/) +genomeCoverageBed - part of BedTools (http://code.google.com/p/bedtools/) +bedGraphToBigWig - from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) + +Credits +------- + +Original script by Brad Chapman, revisions from Peter Cock including the +switch to using genomeCoverageBed and bedGraphToBigWig based on the work +of Lance Parsons. + +License +------ + +The code is freely available under the MIT license: +http://www.opensource.org/licenses/mit-license.html diff -r 97fb58d6c0cb -r 77114c36b8ab bam_to_bigwig.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam_to_bigwig.py Mon Aug 29 07:28:10 2016 -0400 @@ -0,0 +1,122 @@ +#!/usr/bin/env python +"""Convert BAM files to BigWig file format in a specified region. + +Original version copyright Brad Chapman with revisions from Peter Cock +and ideas from Lance Parsons + +Usage: + bam_to_bigwig.py [--outfile=] [--split] + +The --split argument is passed to bedtools genomecov + +The script requires: + pysam (http://code.google.com/p/pysam/) + bedtools genomecov (http://code.google.com/p/bedtools/) + bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) +""" +import os +import sys +import subprocess +import tempfile +from optparse import OptionParser +from contextlib import contextmanager, closing + +import pysam + + +def main(bam_file, outfile=None, split=False): + config = {"program": {"ucsc_bedGraphToBigWig": ["bedGraphToBigWig"], + "bedtools_genomeCoverageBed": + ["bedtools", "genomecov"]}} + if outfile is None: + outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] + if os.path.abspath(bam_file) == os.path.abspath(outfile): + sys.stderr.write("Bad arguments, " + "input and output files are the same.\n") + sys.exit(1) + if os.path.exists(outfile) and os.path.getsize(outfile) > 0: + sys.stderr.write("Warning, output file already exists.\n") + + sizes = get_sizes(bam_file, config) + print "Have %i references" % len(sizes) + if not sizes: + sys.stderr.write("Problem reading BAM header.\n") + sys.exit(1) + + # Use a temp file to avoid any possiblity of not having write permission + temp_handle = tempfile.NamedTemporaryFile(delete=False) + temp_file = temp_handle.name + with closing(temp_handle): + print "Calculating coverage..." + convert_to_graph(bam_file, split, config, temp_handle) + try: + print("Converting %i MB graph file to bigwig..." % + (os.path.getsize(temp_file) // (1024 * 1024))) + # Can't pipe this as stdin due to converter design, + # https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html + convert_to_bigwig(temp_file, sizes, config, outfile) + finally: + if os.path.isfile(temp_file): + os.remove(temp_file) + print "Done" + + +@contextmanager +def indexed_bam(bam_file, config): + if not os.path.exists(bam_file + ".bai"): + pysam.index(bam_file) + sam_reader = pysam.Samfile(bam_file, "rb") + yield sam_reader + sam_reader.close() + + +def get_sizes(bam_file, config): + with indexed_bam(bam_file, config) as work_bam: + sizes = zip(work_bam.references, work_bam.lengths) + return sizes + + +def convert_to_graph(bam_file, split, config, out_handle): + cl = config["program"]["bedtools_genomeCoverageBed"] + \ + ["-ibam", bam_file, "-bg"] + if split: + cl.append("-split") + new_env = os.environ.copy() + new_env['LC_COLLATE'] = 'C' + p1 = subprocess.Popen(cl, stdout=subprocess.PIPE) + p2 = subprocess.Popen(["sort", "-k1,1", "-k2,2n"], + env=new_env, + stdin=p1.stdout, + stdout=out_handle) + p1.stdout.close() + p2.communicate() + + +def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file): + # This will be fine under Galaxy, but could use temp folder? + size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0]) + with open(size_file, "w") as out_handle: + for chrom, size in chr_sizes: + out_handle.write("%s\t%s\n" % (chrom, size)) + try: + cl = config["program"]["ucsc_bedGraphToBigWig"] + \ + [bedgraph_file, size_file, bw_file] + subprocess.check_call(cl) + finally: + os.remove(size_file) + return bw_file + + +if __name__ == "__main__": + parser = OptionParser() + parser.add_option("-o", "--outfile", dest="outfile") + parser.add_option("-s", "--split", action="store_true", dest="split") + (options, args) = parser.parse_args() + if len(args) not in [1, 2]: + print "Incorrect arguments" + print __doc__ + sys.exit() + kwargs = dict( + outfile=options.outfile, + split=options.split) + main(*args, **kwargs) diff -r 97fb58d6c0cb -r 77114c36b8ab bam_to_bigwig.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam_to_bigwig.xml Mon Aug 29 07:28:10 2016 -0400 @@ -0,0 +1,58 @@ + + + Calculates coverage from a BAM alignment file + + + pysam + bedtools + ucsc_tools + + + + + + + + + + + + + + + + + + + + + + + 10.1093/bioinformatics/btp352 + 10.1093/bioinformatics/btq033 + + diff -r 97fb58d6c0cb -r 77114c36b8ab sample.xml --- a/sample.xml Mon Aug 29 07:21:05 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ - - - - - - - - - - - diff -r 97fb58d6c0cb -r 77114c36b8ab test-data/bam_to_bigwig_test.bam Binary file test-data/bam_to_bigwig_test.bam has changed diff -r 97fb58d6c0cb -r 77114c36b8ab test-data/bam_to_bigwig_test.bigwig Binary file test-data/bam_to_bigwig_test.bigwig has changed diff -r 97fb58d6c0cb -r 77114c36b8ab tool_dependencies.xml --- a/tool_dependencies.xml Mon Aug 29 07:21:05 2016 -0400 +++ b/tool_dependencies.xml Mon Aug 29 07:28:10 2016 -0400 @@ -1,6 +1,12 @@ - - + + + + + + + +