Mercurial > repos > marpiech > norwich_tools
comparison bam_to_bigwig.py @ 7:77114c36b8ab draft default tip
planemo upload
| author | marpiech |
|---|---|
| date | Mon, 29 Aug 2016 07:28:10 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 6:97fb58d6c0cb | 7:77114c36b8ab |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """Convert BAM files to BigWig file format in a specified region. | |
| 3 | |
| 4 Original version copyright Brad Chapman with revisions from Peter Cock | |
| 5 and ideas from Lance Parsons | |
| 6 | |
| 7 Usage: | |
| 8 bam_to_bigwig.py <BAM file> [--outfile=<output file name>] [--split] | |
| 9 | |
| 10 The --split argument is passed to bedtools genomecov | |
| 11 | |
| 12 The script requires: | |
| 13 pysam (http://code.google.com/p/pysam/) | |
| 14 bedtools genomecov (http://code.google.com/p/bedtools/) | |
| 15 bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) | |
| 16 """ | |
| 17 import os | |
| 18 import sys | |
| 19 import subprocess | |
| 20 import tempfile | |
| 21 from optparse import OptionParser | |
| 22 from contextlib import contextmanager, closing | |
| 23 | |
| 24 import pysam | |
| 25 | |
| 26 | |
| 27 def main(bam_file, outfile=None, split=False): | |
| 28 config = {"program": {"ucsc_bedGraphToBigWig": ["bedGraphToBigWig"], | |
| 29 "bedtools_genomeCoverageBed": | |
| 30 ["bedtools", "genomecov"]}} | |
| 31 if outfile is None: | |
| 32 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] | |
| 33 if os.path.abspath(bam_file) == os.path.abspath(outfile): | |
| 34 sys.stderr.write("Bad arguments, " | |
| 35 "input and output files are the same.\n") | |
| 36 sys.exit(1) | |
| 37 if os.path.exists(outfile) and os.path.getsize(outfile) > 0: | |
| 38 sys.stderr.write("Warning, output file already exists.\n") | |
| 39 | |
| 40 sizes = get_sizes(bam_file, config) | |
| 41 print "Have %i references" % len(sizes) | |
| 42 if not sizes: | |
| 43 sys.stderr.write("Problem reading BAM header.\n") | |
| 44 sys.exit(1) | |
| 45 | |
| 46 # Use a temp file to avoid any possiblity of not having write permission | |
| 47 temp_handle = tempfile.NamedTemporaryFile(delete=False) | |
| 48 temp_file = temp_handle.name | |
| 49 with closing(temp_handle): | |
| 50 print "Calculating coverage..." | |
| 51 convert_to_graph(bam_file, split, config, temp_handle) | |
| 52 try: | |
| 53 print("Converting %i MB graph file to bigwig..." % | |
| 54 (os.path.getsize(temp_file) // (1024 * 1024))) | |
| 55 # Can't pipe this as stdin due to converter design, | |
| 56 # https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html | |
| 57 convert_to_bigwig(temp_file, sizes, config, outfile) | |
| 58 finally: | |
| 59 if os.path.isfile(temp_file): | |
| 60 os.remove(temp_file) | |
| 61 print "Done" | |
| 62 | |
| 63 | |
| 64 @contextmanager | |
| 65 def indexed_bam(bam_file, config): | |
| 66 if not os.path.exists(bam_file + ".bai"): | |
| 67 pysam.index(bam_file) | |
| 68 sam_reader = pysam.Samfile(bam_file, "rb") | |
| 69 yield sam_reader | |
| 70 sam_reader.close() | |
| 71 | |
| 72 | |
| 73 def get_sizes(bam_file, config): | |
| 74 with indexed_bam(bam_file, config) as work_bam: | |
| 75 sizes = zip(work_bam.references, work_bam.lengths) | |
| 76 return sizes | |
| 77 | |
| 78 | |
| 79 def convert_to_graph(bam_file, split, config, out_handle): | |
| 80 cl = config["program"]["bedtools_genomeCoverageBed"] + \ | |
| 81 ["-ibam", bam_file, "-bg"] | |
| 82 if split: | |
| 83 cl.append("-split") | |
| 84 new_env = os.environ.copy() | |
| 85 new_env['LC_COLLATE'] = 'C' | |
| 86 p1 = subprocess.Popen(cl, stdout=subprocess.PIPE) | |
| 87 p2 = subprocess.Popen(["sort", "-k1,1", "-k2,2n"], | |
| 88 env=new_env, | |
| 89 stdin=p1.stdout, | |
| 90 stdout=out_handle) | |
| 91 p1.stdout.close() | |
| 92 p2.communicate() | |
| 93 | |
| 94 | |
| 95 def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file): | |
| 96 # This will be fine under Galaxy, but could use temp folder? | |
| 97 size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0]) | |
| 98 with open(size_file, "w") as out_handle: | |
| 99 for chrom, size in chr_sizes: | |
| 100 out_handle.write("%s\t%s\n" % (chrom, size)) | |
| 101 try: | |
| 102 cl = config["program"]["ucsc_bedGraphToBigWig"] + \ | |
| 103 [bedgraph_file, size_file, bw_file] | |
| 104 subprocess.check_call(cl) | |
| 105 finally: | |
| 106 os.remove(size_file) | |
| 107 return bw_file | |
| 108 | |
| 109 | |
| 110 if __name__ == "__main__": | |
| 111 parser = OptionParser() | |
| 112 parser.add_option("-o", "--outfile", dest="outfile") | |
| 113 parser.add_option("-s", "--split", action="store_true", dest="split") | |
| 114 (options, args) = parser.parse_args() | |
| 115 if len(args) not in [1, 2]: | |
| 116 print "Incorrect arguments" | |
| 117 print __doc__ | |
| 118 sys.exit() | |
| 119 kwargs = dict( | |
| 120 outfile=options.outfile, | |
| 121 split=options.split) | |
| 122 main(*args, **kwargs) |
