comparison bigwig_outlier_bed.py @ 0:c71db540eb38 draft

planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author fubar
date Mon, 01 Jul 2024 02:48:46 +0000
parents
children eb17eb8a3658
comparison
equal deleted inserted replaced
-1:000000000000 0:c71db540eb38
1 """
2 Ross Lazarus June 2024 for VGP
3
4 Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions.
5 Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point.
6 0.99 and 0.01 work well in testing with a minimum span of 10 bp.
7
8 Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately
9 Combining multiple references works but is silly because only display will rely on one reference so others will not be shown...
10
11 Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
12 takes about 95 seconds for a 17MB test wiggle
13
14 JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option.
15
16 Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores
17 Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi).
18
19 """
20
21
22 import argparse
23 import numpy as np
24 import pybigtools
25 import sys
26
27
28 from pathlib import Path
29
30 class findOut():
31
32 def __init__(self, args):
33 self.bwnames=args.bigwig
34 self.bwlabels=args.bigwiglabels
35 self.bedwin=args.minwin
36 self.qlo=args.qlo
37 self.qhi=args.qhi
38 self.bedouthilo=args.bedouthilo
39 self.bedouthi=args.bedouthi
40 self.bedoutlo=args.bedoutlo
41 self.tableout = args.tableout
42 self.bedwin = args.minwin
43 self.qhi = args.qhi
44 self.qlo = args.qlo
45 self.makeBed()
46
47 def processVals(self, bw, isTop):
48 # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
49 if isTop:
50 bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s
51 else:
52 bwex = np.r_[False, bw <= self.bwbot, False]
53 bwexd = np.diff(bwex)
54 bwexdnz = bwexd.nonzero()[0]
55 bwregions = np.reshape(bwexdnz, (-1,2))
56 return bwregions
57
58 def writeBed(self, bed, bedfname):
59 """
60 potentially multiple
61 """
62 bed.sort()
63 beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed]
64 with open(bedfname, "w") as bedf:
65 bedf.write('\n'.join(beds))
66 bedf.write('\n')
67 print('Wrote %d bed regions to %s' % (len(bed), bedfname))
68
69 def makeBed(self):
70 bedhi = []
71 bedlo = []
72 bwlabels = self.bwlabels
73 bwnames = self.bwnames
74 print('bwnames=', bwnames, "bwlabs=", bwlabels)
75 for i, bwname in enumerate(bwnames):
76 bwlabel = bwlabels[i].replace(" ",'')
77 p = Path('in.bw')
78 p.symlink_to( bwname ) # required by pybigtools (!)
79 bwf = pybigtools.open('in.bw')
80 chrlist = bwf.chroms()
81 chrs = list(chrlist.keys())
82 chrs.sort()
83 restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"]
84 for chr in chrs:
85 bw = bwf.values(chr)
86 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered
87 if self.qhi is not None:
88 self.bwtop = np.quantile(bw, self.qhi)
89 bwhi = self.processVals(bw, isTop=True)
90 for i, seg in enumerate(bwhi):
91 if seg[1] - seg[0] >= self.bedwin:
92 bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1))
93 if self.qlo is not None:
94 self.bwbot = np.quantile(bw, self.qlo)
95 bwlo = self.processVals(bw, isTop=False)
96 for i, seg in enumerate(bwlo):
97 if seg[1] - seg[0] >= self.bedwin:
98 bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1))
99 bwmean = np.mean(bw)
100 bwstd = np.std(bw)
101 bwmax = np.max(bw)
102 nrow = np.size(bw)
103 bwmin = np.min(bw)
104 restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot))
105 print('\n'.join(restab), '\n')
106 if self.tableout:
107 with open(self.tableout) as t:
108 t.write('\n'.join(restab))
109 t.write('\n')
110 if self.bedoutlo:
111 if self.qlo:
112 self.writeBed(bedlo, self.bedoutlo)
113 if self.bedouthi:
114 if self.qhi:
115 self.writeBed(bedhi, self.bedouthi)
116 if self.bedouthilo:
117 allbed = bedlo + bedhi
118 self.writeBed(allbed, self.bedouthilo)
119 return restab
120
121
122 if __name__ == "__main__":
123 parser = argparse.ArgumentParser()
124 a = parser.add_argument
125 a('-m', '--minwin',default=10, type=int)
126 a('-l', '--qlo',default=None, type=float)
127 a('-i', '--qhi',default=None, type=float)
128 a('-w', '--bigwig', nargs='+')
129 a('-n', '--bigwiglabels', nargs='+')
130 a('-o', '--bedouthilo', default=None, help="optional high and low combined bed")
131 a('-u', '--bedouthi', default=None, help="optional high only bed")
132 a('-b', '--bedoutlo', default=None, help="optional low only bed")
133 a('-t', '--tableout', default=None)
134 args = parser.parse_args()
135 print('args=', args)
136 if not (args.bedouthilo or args.bedouthi or args.bedoutlo):
137 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined")
138 sys.exit(2)
139 if not (args.qlo or args.qhi):
140 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo")
141 sys.exit(2)
142 restab = findOut(args)
143 if args.tableout:
144 with open(args.tableout, 'w') as tout:
145 tout.write('\n'.join(restab))
146 tout.write('\n')
147
148