Mercurial > repos > brad-chapman > bam_to_bigwig
view bam_to_bigwig/bam_to_bigwig.py @ 2:e2edfa478eb4
Covert to use bedGraph intermediate with bedtools. Thanks to Peter Cock and Lance Parsons for the updates
author | Brad Chapman <chapmanb@50mail.com> |
---|---|
date | Wed, 05 Sep 2012 21:14:54 -0400 |
parents | |
children | 14e5258d1b39 |
line wrap: on
line source
#!/usr/bin/env python #Original version copyright Brad Chapman with revisions from Peter Cock #and ideas from Lance Parsons """Convert BAM files to BigWig file format in a specified region. Usage: bam_to_wiggle.py <BAM file> [<YAML config>] [--outfile=<output file name>] The optional config file (not used by the Galaxy interface) is in YAML format and specifies the location of the binary tools. program: bedtools_genomeCoverageBed: genomeCoverageBed ucsc_bedGraphToBigWig: bedGraphToBigWig If not specified, these will be assumed to be present in the system path. The script requires: pysam (http://code.google.com/p/pysam/) genomeCoverageBed from BedTools (http://code.google.com/p/bedtools/) bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) If a configuration file is used, then PyYAML is also required (http://pyyaml.org/) """ import os import sys import subprocess import tempfile from optparse import OptionParser from contextlib import contextmanager, closing import pysam def main(bam_file, config_file=None, outfile=None, split=False): if config_file: import yaml with open(config_file) as in_handle: config = yaml.load(in_handle) else: config = {"program": {"ucsc_bedGraphToBigWig" : "bedGraphToBigWig", "bedtools_genomeCoverageBed" : "genomeCoverageBed"}} if outfile is None: outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] if os.path.abspath(bam_file) == os.path.abspath(outfile): sys.stderr.write("Bad arguments, input and output files are the same.\n") sys.exit(1) if os.path.exists(outfile) and os.path.getsize(outfile) > 0: sys.stderr.write("Warning, output file already exists.\n") sizes = get_sizes(bam_file, config) print "Have %i references" % len(sizes) if not sizes: sys.stderr.write("Problem reading BAM header.\n") sys.exit(1) #Use a temp file to avoid any possiblity of not having write permission temp_handle = tempfile.NamedTemporaryFile(delete=False) temp_file = temp_handle.name with closing(temp_handle): print "Calculating coverage..." convert_to_graph(bam_file, split, config, temp_handle) try: print "Converting %i MB graph file to bigwig..." % (os.path.getsize(temp_file) // (1024*1024)) #Can't pipe this as stdin due to converter design, #https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html convert_to_bigwig(temp_file, sizes, config, outfile) finally: if os.path.isfile(temp_file): os.remove(temp_file) print "Done" @contextmanager def indexed_bam(bam_file, config): if not os.path.exists(bam_file + ".bai"): pysam.index(bam_file) sam_reader = pysam.Samfile(bam_file, "rb") yield sam_reader sam_reader.close() def get_sizes(bam_file, config): with indexed_bam(bam_file, config) as work_bam: sizes = zip(work_bam.references, work_bam.lengths) return sizes def convert_to_graph(bam_file, split, config, out_handle): cl = [config["program"]["bedtools_genomeCoverageBed"], "-ibam", bam_file, "-bg"] if split: cl.append("-split") subprocess.check_call(cl, stdout=out_handle) def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file): #This will be fine under Galaxy, but could use temp folder? size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0]) with open(size_file, "w") as out_handle: for chrom, size in chr_sizes: out_handle.write("%s\t%s\n" % (chrom, size)) try: cl = [config["program"]["ucsc_bedGraphToBigWig"], bedgraph_file, size_file, bw_file] subprocess.check_call(cl) finally: os.remove(size_file) return bw_file if __name__ == "__main__": parser = OptionParser() parser.add_option("-o", "--outfile", dest="outfile") parser.add_option("-s", "--split", action="store_true", dest="split") (options, args) = parser.parse_args() if len(args) not in [1, 2]: print "Incorrect arguments" print __doc__ sys.exit() kwargs = dict( outfile=options.outfile, split=options.split) main(*args, **kwargs)