Mercurial > repos > marpiech > norwich_tools
diff bam_to_bigwig.py @ 7:77114c36b8ab draft default tip
planemo upload
author | marpiech |
---|---|
date | Mon, 29 Aug 2016 07:28:10 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam_to_bigwig.py Mon Aug 29 07:28:10 2016 -0400 @@ -0,0 +1,122 @@ +#!/usr/bin/env python +"""Convert BAM files to BigWig file format in a specified region. + +Original version copyright Brad Chapman with revisions from Peter Cock +and ideas from Lance Parsons + +Usage: + bam_to_bigwig.py <BAM file> [--outfile=<output file name>] [--split] + +The --split argument is passed to bedtools genomecov + +The script requires: + pysam (http://code.google.com/p/pysam/) + bedtools genomecov (http://code.google.com/p/bedtools/) + bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) +""" +import os +import sys +import subprocess +import tempfile +from optparse import OptionParser +from contextlib import contextmanager, closing + +import pysam + + +def main(bam_file, outfile=None, split=False): + config = {"program": {"ucsc_bedGraphToBigWig": ["bedGraphToBigWig"], + "bedtools_genomeCoverageBed": + ["bedtools", "genomecov"]}} + if outfile is None: + outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] + if os.path.abspath(bam_file) == os.path.abspath(outfile): + sys.stderr.write("Bad arguments, " + "input and output files are the same.\n") + sys.exit(1) + if os.path.exists(outfile) and os.path.getsize(outfile) > 0: + sys.stderr.write("Warning, output file already exists.\n") + + sizes = get_sizes(bam_file, config) + print "Have %i references" % len(sizes) + if not sizes: + sys.stderr.write("Problem reading BAM header.\n") + sys.exit(1) + + # Use a temp file to avoid any possiblity of not having write permission + temp_handle = tempfile.NamedTemporaryFile(delete=False) + temp_file = temp_handle.name + with closing(temp_handle): + print "Calculating coverage..." + convert_to_graph(bam_file, split, config, temp_handle) + try: + print("Converting %i MB graph file to bigwig..." % + (os.path.getsize(temp_file) // (1024 * 1024))) + # Can't pipe this as stdin due to converter design, + # https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html + convert_to_bigwig(temp_file, sizes, config, outfile) + finally: + if os.path.isfile(temp_file): + os.remove(temp_file) + print "Done" + + +@contextmanager +def indexed_bam(bam_file, config): + if not os.path.exists(bam_file + ".bai"): + pysam.index(bam_file) + sam_reader = pysam.Samfile(bam_file, "rb") + yield sam_reader + sam_reader.close() + + +def get_sizes(bam_file, config): + with indexed_bam(bam_file, config) as work_bam: + sizes = zip(work_bam.references, work_bam.lengths) + return sizes + + +def convert_to_graph(bam_file, split, config, out_handle): + cl = config["program"]["bedtools_genomeCoverageBed"] + \ + ["-ibam", bam_file, "-bg"] + if split: + cl.append("-split") + new_env = os.environ.copy() + new_env['LC_COLLATE'] = 'C' + p1 = subprocess.Popen(cl, stdout=subprocess.PIPE) + p2 = subprocess.Popen(["sort", "-k1,1", "-k2,2n"], + env=new_env, + stdin=p1.stdout, + stdout=out_handle) + p1.stdout.close() + p2.communicate() + + +def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file): + # This will be fine under Galaxy, but could use temp folder? + size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0]) + with open(size_file, "w") as out_handle: + for chrom, size in chr_sizes: + out_handle.write("%s\t%s\n" % (chrom, size)) + try: + cl = config["program"]["ucsc_bedGraphToBigWig"] + \ + [bedgraph_file, size_file, bw_file] + subprocess.check_call(cl) + finally: + os.remove(size_file) + return bw_file + + +if __name__ == "__main__": + parser = OptionParser() + parser.add_option("-o", "--outfile", dest="outfile") + parser.add_option("-s", "--split", action="store_true", dest="split") + (options, args) = parser.parse_args() + if len(args) not in [1, 2]: + print "Incorrect arguments" + print __doc__ + sys.exit() + kwargs = dict( + outfile=options.outfile, + split=options.split) + main(*args, **kwargs)