Mercurial > repos > marpiech > norwich_tools
comparison bam_to_bigwig.py @ 7:77114c36b8ab draft default tip
planemo upload
author | marpiech |
---|---|
date | Mon, 29 Aug 2016 07:28:10 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
6:97fb58d6c0cb | 7:77114c36b8ab |
---|---|
1 #!/usr/bin/env python | |
2 """Convert BAM files to BigWig file format in a specified region. | |
3 | |
4 Original version copyright Brad Chapman with revisions from Peter Cock | |
5 and ideas from Lance Parsons | |
6 | |
7 Usage: | |
8 bam_to_bigwig.py <BAM file> [--outfile=<output file name>] [--split] | |
9 | |
10 The --split argument is passed to bedtools genomecov | |
11 | |
12 The script requires: | |
13 pysam (http://code.google.com/p/pysam/) | |
14 bedtools genomecov (http://code.google.com/p/bedtools/) | |
15 bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) | |
16 """ | |
17 import os | |
18 import sys | |
19 import subprocess | |
20 import tempfile | |
21 from optparse import OptionParser | |
22 from contextlib import contextmanager, closing | |
23 | |
24 import pysam | |
25 | |
26 | |
27 def main(bam_file, outfile=None, split=False): | |
28 config = {"program": {"ucsc_bedGraphToBigWig": ["bedGraphToBigWig"], | |
29 "bedtools_genomeCoverageBed": | |
30 ["bedtools", "genomecov"]}} | |
31 if outfile is None: | |
32 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] | |
33 if os.path.abspath(bam_file) == os.path.abspath(outfile): | |
34 sys.stderr.write("Bad arguments, " | |
35 "input and output files are the same.\n") | |
36 sys.exit(1) | |
37 if os.path.exists(outfile) and os.path.getsize(outfile) > 0: | |
38 sys.stderr.write("Warning, output file already exists.\n") | |
39 | |
40 sizes = get_sizes(bam_file, config) | |
41 print "Have %i references" % len(sizes) | |
42 if not sizes: | |
43 sys.stderr.write("Problem reading BAM header.\n") | |
44 sys.exit(1) | |
45 | |
46 # Use a temp file to avoid any possiblity of not having write permission | |
47 temp_handle = tempfile.NamedTemporaryFile(delete=False) | |
48 temp_file = temp_handle.name | |
49 with closing(temp_handle): | |
50 print "Calculating coverage..." | |
51 convert_to_graph(bam_file, split, config, temp_handle) | |
52 try: | |
53 print("Converting %i MB graph file to bigwig..." % | |
54 (os.path.getsize(temp_file) // (1024 * 1024))) | |
55 # Can't pipe this as stdin due to converter design, | |
56 # https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html | |
57 convert_to_bigwig(temp_file, sizes, config, outfile) | |
58 finally: | |
59 if os.path.isfile(temp_file): | |
60 os.remove(temp_file) | |
61 print "Done" | |
62 | |
63 | |
64 @contextmanager | |
65 def indexed_bam(bam_file, config): | |
66 if not os.path.exists(bam_file + ".bai"): | |
67 pysam.index(bam_file) | |
68 sam_reader = pysam.Samfile(bam_file, "rb") | |
69 yield sam_reader | |
70 sam_reader.close() | |
71 | |
72 | |
73 def get_sizes(bam_file, config): | |
74 with indexed_bam(bam_file, config) as work_bam: | |
75 sizes = zip(work_bam.references, work_bam.lengths) | |
76 return sizes | |
77 | |
78 | |
79 def convert_to_graph(bam_file, split, config, out_handle): | |
80 cl = config["program"]["bedtools_genomeCoverageBed"] + \ | |
81 ["-ibam", bam_file, "-bg"] | |
82 if split: | |
83 cl.append("-split") | |
84 new_env = os.environ.copy() | |
85 new_env['LC_COLLATE'] = 'C' | |
86 p1 = subprocess.Popen(cl, stdout=subprocess.PIPE) | |
87 p2 = subprocess.Popen(["sort", "-k1,1", "-k2,2n"], | |
88 env=new_env, | |
89 stdin=p1.stdout, | |
90 stdout=out_handle) | |
91 p1.stdout.close() | |
92 p2.communicate() | |
93 | |
94 | |
95 def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file): | |
96 # This will be fine under Galaxy, but could use temp folder? | |
97 size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0]) | |
98 with open(size_file, "w") as out_handle: | |
99 for chrom, size in chr_sizes: | |
100 out_handle.write("%s\t%s\n" % (chrom, size)) | |
101 try: | |
102 cl = config["program"]["ucsc_bedGraphToBigWig"] + \ | |
103 [bedgraph_file, size_file, bw_file] | |
104 subprocess.check_call(cl) | |
105 finally: | |
106 os.remove(size_file) | |
107 return bw_file | |
108 | |
109 | |
110 if __name__ == "__main__": | |
111 parser = OptionParser() | |
112 parser.add_option("-o", "--outfile", dest="outfile") | |
113 parser.add_option("-s", "--split", action="store_true", dest="split") | |
114 (options, args) = parser.parse_args() | |
115 if len(args) not in [1, 2]: | |
116 print "Incorrect arguments" | |
117 print __doc__ | |
118 sys.exit() | |
119 kwargs = dict( | |
120 outfile=options.outfile, | |
121 split=options.split) | |
122 main(*args, **kwargs) |