annotate bam_to_bigwig.py @ 7:77114c36b8ab draft default tip

planemo upload
author marpiech
date Mon, 29 Aug 2016 07:28:10 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
1 #!/usr/bin/env python
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
2 """Convert BAM files to BigWig file format in a specified region.
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
3
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
4 Original version copyright Brad Chapman with revisions from Peter Cock
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
5 and ideas from Lance Parsons
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
6
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
7 Usage:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
8 bam_to_bigwig.py <BAM file> [--outfile=<output file name>] [--split]
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
9
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
10 The --split argument is passed to bedtools genomecov
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
11
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
12 The script requires:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
13 pysam (http://code.google.com/p/pysam/)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
14 bedtools genomecov (http://code.google.com/p/bedtools/)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
15 bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
16 """
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
17 import os
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
18 import sys
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
19 import subprocess
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
20 import tempfile
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
21 from optparse import OptionParser
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
22 from contextlib import contextmanager, closing
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
23
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
24 import pysam
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
25
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
26
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
27 def main(bam_file, outfile=None, split=False):
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
28 config = {"program": {"ucsc_bedGraphToBigWig": ["bedGraphToBigWig"],
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
29 "bedtools_genomeCoverageBed":
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
30 ["bedtools", "genomecov"]}}
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
31 if outfile is None:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
32 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
33 if os.path.abspath(bam_file) == os.path.abspath(outfile):
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
34 sys.stderr.write("Bad arguments, "
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
35 "input and output files are the same.\n")
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
36 sys.exit(1)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
37 if os.path.exists(outfile) and os.path.getsize(outfile) > 0:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
38 sys.stderr.write("Warning, output file already exists.\n")
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
39
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
40 sizes = get_sizes(bam_file, config)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
41 print "Have %i references" % len(sizes)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
42 if not sizes:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
43 sys.stderr.write("Problem reading BAM header.\n")
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
44 sys.exit(1)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
45
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
46 # Use a temp file to avoid any possiblity of not having write permission
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
47 temp_handle = tempfile.NamedTemporaryFile(delete=False)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
48 temp_file = temp_handle.name
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
49 with closing(temp_handle):
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
50 print "Calculating coverage..."
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
51 convert_to_graph(bam_file, split, config, temp_handle)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
52 try:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
53 print("Converting %i MB graph file to bigwig..." %
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
54 (os.path.getsize(temp_file) // (1024 * 1024)))
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
55 # Can't pipe this as stdin due to converter design,
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
56 # https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
57 convert_to_bigwig(temp_file, sizes, config, outfile)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
58 finally:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
59 if os.path.isfile(temp_file):
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
60 os.remove(temp_file)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
61 print "Done"
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
62
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
63
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
64 @contextmanager
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
65 def indexed_bam(bam_file, config):
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
66 if not os.path.exists(bam_file + ".bai"):
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
67 pysam.index(bam_file)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
68 sam_reader = pysam.Samfile(bam_file, "rb")
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
69 yield sam_reader
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
70 sam_reader.close()
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
71
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
72
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
73 def get_sizes(bam_file, config):
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
74 with indexed_bam(bam_file, config) as work_bam:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
75 sizes = zip(work_bam.references, work_bam.lengths)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
76 return sizes
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
77
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
78
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
79 def convert_to_graph(bam_file, split, config, out_handle):
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
80 cl = config["program"]["bedtools_genomeCoverageBed"] + \
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
81 ["-ibam", bam_file, "-bg"]
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
82 if split:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
83 cl.append("-split")
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
84 new_env = os.environ.copy()
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
85 new_env['LC_COLLATE'] = 'C'
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
86 p1 = subprocess.Popen(cl, stdout=subprocess.PIPE)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
87 p2 = subprocess.Popen(["sort", "-k1,1", "-k2,2n"],
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
88 env=new_env,
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
89 stdin=p1.stdout,
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
90 stdout=out_handle)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
91 p1.stdout.close()
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
92 p2.communicate()
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
93
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
94
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
95 def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file):
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
96 # This will be fine under Galaxy, but could use temp folder?
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
97 size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0])
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
98 with open(size_file, "w") as out_handle:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
99 for chrom, size in chr_sizes:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
100 out_handle.write("%s\t%s\n" % (chrom, size))
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
101 try:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
102 cl = config["program"]["ucsc_bedGraphToBigWig"] + \
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
103 [bedgraph_file, size_file, bw_file]
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
104 subprocess.check_call(cl)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
105 finally:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
106 os.remove(size_file)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
107 return bw_file
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
108
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
109
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
110 if __name__ == "__main__":
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
111 parser = OptionParser()
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
112 parser.add_option("-o", "--outfile", dest="outfile")
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
113 parser.add_option("-s", "--split", action="store_true", dest="split")
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
114 (options, args) = parser.parse_args()
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
115 if len(args) not in [1, 2]:
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
116 print "Incorrect arguments"
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
117 print __doc__
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
118 sys.exit()
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
119 kwargs = dict(
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
120 outfile=options.outfile,
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
121 split=options.split)
77114c36b8ab planemo upload
marpiech
parents:
diff changeset
122 main(*args, **kwargs)