comparison bam_to_bigwig.py @ 7:77114c36b8ab draft default tip

planemo upload
author marpiech
date Mon, 29 Aug 2016 07:28:10 -0400
parents
children
comparison
equal deleted inserted replaced
6:97fb58d6c0cb 7:77114c36b8ab
1 #!/usr/bin/env python
2 """Convert BAM files to BigWig file format in a specified region.
3
4 Original version copyright Brad Chapman with revisions from Peter Cock
5 and ideas from Lance Parsons
6
7 Usage:
8 bam_to_bigwig.py <BAM file> [--outfile=<output file name>] [--split]
9
10 The --split argument is passed to bedtools genomecov
11
12 The script requires:
13 pysam (http://code.google.com/p/pysam/)
14 bedtools genomecov (http://code.google.com/p/bedtools/)
15 bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
16 """
17 import os
18 import sys
19 import subprocess
20 import tempfile
21 from optparse import OptionParser
22 from contextlib import contextmanager, closing
23
24 import pysam
25
26
27 def main(bam_file, outfile=None, split=False):
28 config = {"program": {"ucsc_bedGraphToBigWig": ["bedGraphToBigWig"],
29 "bedtools_genomeCoverageBed":
30 ["bedtools", "genomecov"]}}
31 if outfile is None:
32 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
33 if os.path.abspath(bam_file) == os.path.abspath(outfile):
34 sys.stderr.write("Bad arguments, "
35 "input and output files are the same.\n")
36 sys.exit(1)
37 if os.path.exists(outfile) and os.path.getsize(outfile) > 0:
38 sys.stderr.write("Warning, output file already exists.\n")
39
40 sizes = get_sizes(bam_file, config)
41 print "Have %i references" % len(sizes)
42 if not sizes:
43 sys.stderr.write("Problem reading BAM header.\n")
44 sys.exit(1)
45
46 # Use a temp file to avoid any possiblity of not having write permission
47 temp_handle = tempfile.NamedTemporaryFile(delete=False)
48 temp_file = temp_handle.name
49 with closing(temp_handle):
50 print "Calculating coverage..."
51 convert_to_graph(bam_file, split, config, temp_handle)
52 try:
53 print("Converting %i MB graph file to bigwig..." %
54 (os.path.getsize(temp_file) // (1024 * 1024)))
55 # Can't pipe this as stdin due to converter design,
56 # https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html
57 convert_to_bigwig(temp_file, sizes, config, outfile)
58 finally:
59 if os.path.isfile(temp_file):
60 os.remove(temp_file)
61 print "Done"
62
63
64 @contextmanager
65 def indexed_bam(bam_file, config):
66 if not os.path.exists(bam_file + ".bai"):
67 pysam.index(bam_file)
68 sam_reader = pysam.Samfile(bam_file, "rb")
69 yield sam_reader
70 sam_reader.close()
71
72
73 def get_sizes(bam_file, config):
74 with indexed_bam(bam_file, config) as work_bam:
75 sizes = zip(work_bam.references, work_bam.lengths)
76 return sizes
77
78
79 def convert_to_graph(bam_file, split, config, out_handle):
80 cl = config["program"]["bedtools_genomeCoverageBed"] + \
81 ["-ibam", bam_file, "-bg"]
82 if split:
83 cl.append("-split")
84 new_env = os.environ.copy()
85 new_env['LC_COLLATE'] = 'C'
86 p1 = subprocess.Popen(cl, stdout=subprocess.PIPE)
87 p2 = subprocess.Popen(["sort", "-k1,1", "-k2,2n"],
88 env=new_env,
89 stdin=p1.stdout,
90 stdout=out_handle)
91 p1.stdout.close()
92 p2.communicate()
93
94
95 def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file):
96 # This will be fine under Galaxy, but could use temp folder?
97 size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0])
98 with open(size_file, "w") as out_handle:
99 for chrom, size in chr_sizes:
100 out_handle.write("%s\t%s\n" % (chrom, size))
101 try:
102 cl = config["program"]["ucsc_bedGraphToBigWig"] + \
103 [bedgraph_file, size_file, bw_file]
104 subprocess.check_call(cl)
105 finally:
106 os.remove(size_file)
107 return bw_file
108
109
110 if __name__ == "__main__":
111 parser = OptionParser()
112 parser.add_option("-o", "--outfile", dest="outfile")
113 parser.add_option("-s", "--split", action="store_true", dest="split")
114 (options, args) = parser.parse_args()
115 if len(args) not in [1, 2]:
116 print "Incorrect arguments"
117 print __doc__
118 sys.exit()
119 kwargs = dict(
120 outfile=options.outfile,
121 split=options.split)
122 main(*args, **kwargs)