Mercurial > repos > fubar > bigwig_outlier_bed
comparison bigwig_outlier_bed.py @ 0:c71db540eb38 draft
planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author | fubar |
---|---|
date | Mon, 01 Jul 2024 02:48:46 +0000 |
parents | |
children | eb17eb8a3658 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c71db540eb38 |
---|---|
1 """ | |
2 Ross Lazarus June 2024 for VGP | |
3 | |
4 Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions. | |
5 Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point. | |
6 0.99 and 0.01 work well in testing with a minimum span of 10 bp. | |
7 | |
8 Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately | |
9 Combining multiple references works but is silly because only display will rely on one reference so others will not be shown... | |
10 | |
11 Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html | |
12 takes about 95 seconds for a 17MB test wiggle | |
13 | |
14 JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option. | |
15 | |
16 Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores | |
17 Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi). | |
18 | |
19 """ | |
20 | |
21 | |
22 import argparse | |
23 import numpy as np | |
24 import pybigtools | |
25 import sys | |
26 | |
27 | |
28 from pathlib import Path | |
29 | |
30 class findOut(): | |
31 | |
32 def __init__(self, args): | |
33 self.bwnames=args.bigwig | |
34 self.bwlabels=args.bigwiglabels | |
35 self.bedwin=args.minwin | |
36 self.qlo=args.qlo | |
37 self.qhi=args.qhi | |
38 self.bedouthilo=args.bedouthilo | |
39 self.bedouthi=args.bedouthi | |
40 self.bedoutlo=args.bedoutlo | |
41 self.tableout = args.tableout | |
42 self.bedwin = args.minwin | |
43 self.qhi = args.qhi | |
44 self.qlo = args.qlo | |
45 self.makeBed() | |
46 | |
47 def processVals(self, bw, isTop): | |
48 # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html | |
49 if isTop: | |
50 bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s | |
51 else: | |
52 bwex = np.r_[False, bw <= self.bwbot, False] | |
53 bwexd = np.diff(bwex) | |
54 bwexdnz = bwexd.nonzero()[0] | |
55 bwregions = np.reshape(bwexdnz, (-1,2)) | |
56 return bwregions | |
57 | |
58 def writeBed(self, bed, bedfname): | |
59 """ | |
60 potentially multiple | |
61 """ | |
62 bed.sort() | |
63 beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed] | |
64 with open(bedfname, "w") as bedf: | |
65 bedf.write('\n'.join(beds)) | |
66 bedf.write('\n') | |
67 print('Wrote %d bed regions to %s' % (len(bed), bedfname)) | |
68 | |
69 def makeBed(self): | |
70 bedhi = [] | |
71 bedlo = [] | |
72 bwlabels = self.bwlabels | |
73 bwnames = self.bwnames | |
74 print('bwnames=', bwnames, "bwlabs=", bwlabels) | |
75 for i, bwname in enumerate(bwnames): | |
76 bwlabel = bwlabels[i].replace(" ",'') | |
77 p = Path('in.bw') | |
78 p.symlink_to( bwname ) # required by pybigtools (!) | |
79 bwf = pybigtools.open('in.bw') | |
80 chrlist = bwf.chroms() | |
81 chrs = list(chrlist.keys()) | |
82 chrs.sort() | |
83 restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"] | |
84 for chr in chrs: | |
85 bw = bwf.values(chr) | |
86 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered | |
87 if self.qhi is not None: | |
88 self.bwtop = np.quantile(bw, self.qhi) | |
89 bwhi = self.processVals(bw, isTop=True) | |
90 for i, seg in enumerate(bwhi): | |
91 if seg[1] - seg[0] >= self.bedwin: | |
92 bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1)) | |
93 if self.qlo is not None: | |
94 self.bwbot = np.quantile(bw, self.qlo) | |
95 bwlo = self.processVals(bw, isTop=False) | |
96 for i, seg in enumerate(bwlo): | |
97 if seg[1] - seg[0] >= self.bedwin: | |
98 bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1)) | |
99 bwmean = np.mean(bw) | |
100 bwstd = np.std(bw) | |
101 bwmax = np.max(bw) | |
102 nrow = np.size(bw) | |
103 bwmin = np.min(bw) | |
104 restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot)) | |
105 print('\n'.join(restab), '\n') | |
106 if self.tableout: | |
107 with open(self.tableout) as t: | |
108 t.write('\n'.join(restab)) | |
109 t.write('\n') | |
110 if self.bedoutlo: | |
111 if self.qlo: | |
112 self.writeBed(bedlo, self.bedoutlo) | |
113 if self.bedouthi: | |
114 if self.qhi: | |
115 self.writeBed(bedhi, self.bedouthi) | |
116 if self.bedouthilo: | |
117 allbed = bedlo + bedhi | |
118 self.writeBed(allbed, self.bedouthilo) | |
119 return restab | |
120 | |
121 | |
122 if __name__ == "__main__": | |
123 parser = argparse.ArgumentParser() | |
124 a = parser.add_argument | |
125 a('-m', '--minwin',default=10, type=int) | |
126 a('-l', '--qlo',default=None, type=float) | |
127 a('-i', '--qhi',default=None, type=float) | |
128 a('-w', '--bigwig', nargs='+') | |
129 a('-n', '--bigwiglabels', nargs='+') | |
130 a('-o', '--bedouthilo', default=None, help="optional high and low combined bed") | |
131 a('-u', '--bedouthi', default=None, help="optional high only bed") | |
132 a('-b', '--bedoutlo', default=None, help="optional low only bed") | |
133 a('-t', '--tableout', default=None) | |
134 args = parser.parse_args() | |
135 print('args=', args) | |
136 if not (args.bedouthilo or args.bedouthi or args.bedoutlo): | |
137 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined") | |
138 sys.exit(2) | |
139 if not (args.qlo or args.qhi): | |
140 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo") | |
141 sys.exit(2) | |
142 restab = findOut(args) | |
143 if args.tableout: | |
144 with open(args.tableout, 'w') as tout: | |
145 tout.write('\n'.join(restab)) | |
146 tout.write('\n') | |
147 | |
148 |