7
|
1 #!/usr/bin/env python
|
|
2 """Convert BAM files to BigWig file format in a specified region.
|
|
3
|
|
4 Original version copyright Brad Chapman with revisions from Peter Cock
|
|
5 and ideas from Lance Parsons
|
|
6
|
|
7 Usage:
|
|
8 bam_to_bigwig.py <BAM file> [--outfile=<output file name>] [--split]
|
|
9
|
|
10 The --split argument is passed to bedtools genomecov
|
|
11
|
|
12 The script requires:
|
|
13 pysam (http://code.google.com/p/pysam/)
|
|
14 bedtools genomecov (http://code.google.com/p/bedtools/)
|
|
15 bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
|
|
16 """
|
|
17 import os
|
|
18 import sys
|
|
19 import subprocess
|
|
20 import tempfile
|
|
21 from optparse import OptionParser
|
|
22 from contextlib import contextmanager, closing
|
|
23
|
|
24 import pysam
|
|
25
|
|
26
|
|
27 def main(bam_file, outfile=None, split=False):
|
|
28 config = {"program": {"ucsc_bedGraphToBigWig": ["bedGraphToBigWig"],
|
|
29 "bedtools_genomeCoverageBed":
|
|
30 ["bedtools", "genomecov"]}}
|
|
31 if outfile is None:
|
|
32 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
|
|
33 if os.path.abspath(bam_file) == os.path.abspath(outfile):
|
|
34 sys.stderr.write("Bad arguments, "
|
|
35 "input and output files are the same.\n")
|
|
36 sys.exit(1)
|
|
37 if os.path.exists(outfile) and os.path.getsize(outfile) > 0:
|
|
38 sys.stderr.write("Warning, output file already exists.\n")
|
|
39
|
|
40 sizes = get_sizes(bam_file, config)
|
|
41 print "Have %i references" % len(sizes)
|
|
42 if not sizes:
|
|
43 sys.stderr.write("Problem reading BAM header.\n")
|
|
44 sys.exit(1)
|
|
45
|
|
46 # Use a temp file to avoid any possiblity of not having write permission
|
|
47 temp_handle = tempfile.NamedTemporaryFile(delete=False)
|
|
48 temp_file = temp_handle.name
|
|
49 with closing(temp_handle):
|
|
50 print "Calculating coverage..."
|
|
51 convert_to_graph(bam_file, split, config, temp_handle)
|
|
52 try:
|
|
53 print("Converting %i MB graph file to bigwig..." %
|
|
54 (os.path.getsize(temp_file) // (1024 * 1024)))
|
|
55 # Can't pipe this as stdin due to converter design,
|
|
56 # https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html
|
|
57 convert_to_bigwig(temp_file, sizes, config, outfile)
|
|
58 finally:
|
|
59 if os.path.isfile(temp_file):
|
|
60 os.remove(temp_file)
|
|
61 print "Done"
|
|
62
|
|
63
|
|
64 @contextmanager
|
|
65 def indexed_bam(bam_file, config):
|
|
66 if not os.path.exists(bam_file + ".bai"):
|
|
67 pysam.index(bam_file)
|
|
68 sam_reader = pysam.Samfile(bam_file, "rb")
|
|
69 yield sam_reader
|
|
70 sam_reader.close()
|
|
71
|
|
72
|
|
73 def get_sizes(bam_file, config):
|
|
74 with indexed_bam(bam_file, config) as work_bam:
|
|
75 sizes = zip(work_bam.references, work_bam.lengths)
|
|
76 return sizes
|
|
77
|
|
78
|
|
79 def convert_to_graph(bam_file, split, config, out_handle):
|
|
80 cl = config["program"]["bedtools_genomeCoverageBed"] + \
|
|
81 ["-ibam", bam_file, "-bg"]
|
|
82 if split:
|
|
83 cl.append("-split")
|
|
84 new_env = os.environ.copy()
|
|
85 new_env['LC_COLLATE'] = 'C'
|
|
86 p1 = subprocess.Popen(cl, stdout=subprocess.PIPE)
|
|
87 p2 = subprocess.Popen(["sort", "-k1,1", "-k2,2n"],
|
|
88 env=new_env,
|
|
89 stdin=p1.stdout,
|
|
90 stdout=out_handle)
|
|
91 p1.stdout.close()
|
|
92 p2.communicate()
|
|
93
|
|
94
|
|
95 def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file):
|
|
96 # This will be fine under Galaxy, but could use temp folder?
|
|
97 size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0])
|
|
98 with open(size_file, "w") as out_handle:
|
|
99 for chrom, size in chr_sizes:
|
|
100 out_handle.write("%s\t%s\n" % (chrom, size))
|
|
101 try:
|
|
102 cl = config["program"]["ucsc_bedGraphToBigWig"] + \
|
|
103 [bedgraph_file, size_file, bw_file]
|
|
104 subprocess.check_call(cl)
|
|
105 finally:
|
|
106 os.remove(size_file)
|
|
107 return bw_file
|
|
108
|
|
109
|
|
110 if __name__ == "__main__":
|
|
111 parser = OptionParser()
|
|
112 parser.add_option("-o", "--outfile", dest="outfile")
|
|
113 parser.add_option("-s", "--split", action="store_true", dest="split")
|
|
114 (options, args) = parser.parse_args()
|
|
115 if len(args) not in [1, 2]:
|
|
116 print "Incorrect arguments"
|
|
117 print __doc__
|
|
118 sys.exit()
|
|
119 kwargs = dict(
|
|
120 outfile=options.outfile,
|
|
121 split=options.split)
|
|
122 main(*args, **kwargs)
|