comparison bigwigtobed.python.txt @ 0:c71db540eb38 draft

planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author fubar
date Mon, 01 Jul 2024 02:48:46 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c71db540eb38
1 #raw
2 """
3 Ross Lazarus June 2024 for VGP
4 Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions.
5 Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point.
6 0.99 and 0.01 work well in testing with a minimum span of 10 bp.
7 Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately
8 Combining multiple references works but is silly because only display will rely on one reference so others will not be shown...
9 Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
10 takes about 95 seconds for a 17MB test wiggle
11 JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option.
12 Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores
13 Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi).
14 """
15 import argparse
16 import numpy as np
17 import pybigtools
18 import sys
19 from pathlib import Path
20 class findOut():
21 def __init__(self, args):
22 self.bwnames=args.bigwig
23 self.bwlabels=args.bigwiglabels
24 self.bedwin=args.minwin
25 self.qlo=args.qlo
26 self.qhi=args.qhi
27 self.bedouthilo=args.bedouthilo
28 self.bedouthi=args.bedouthi
29 self.bedoutlo=args.bedoutlo
30 self.tableout = args.tableout
31 self.bedwin = args.minwin
32 self.qhi = args.qhi
33 self.qlo = args.qlo
34 self.makeBed()
35 def processVals(self, bw, isTop):
36 # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
37 if isTop:
38 bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s
39 else:
40 bwex = np.r_[False, bw <= self.bwbot, False]
41 bwexd = np.diff(bwex)
42 bwexdnz = bwexd.nonzero()[0]
43 bwregions = np.reshape(bwexdnz, (-1,2))
44 return bwregions
45 def writeBed(self, bed, bedfname):
46 """
47 potentially multiple
48 """
49 bed.sort()
50 beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed]
51 with open(bedfname, "w") as bedf:
52 bedf.write('\n'.join(beds))
53 bedf.write('\n')
54 print('Wrote %d bed regions to %s' % (len(bed), bedfname))
55 def makeBed(self):
56 bedhi = []
57 bedlo = []
58 bwlabels = self.bwlabels
59 bwnames = self.bwnames
60 print('bwnames=', bwnames, "bwlabs=", bwlabels)
61 for i, bwname in enumerate(bwnames):
62 bwlabel = bwlabels[i].replace(" ",'')
63 p = Path('in.bw')
64 p.symlink_to( bwname ) # required by pybigtools (!)
65 bwf = pybigtools.open('in.bw')
66 chrlist = bwf.chroms()
67 chrs = list(chrlist.keys())
68 chrs.sort()
69 restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"]
70 for chr in chrs:
71 bw = bwf.values(chr)
72 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered
73 if self.qhi is not None:
74 self.bwtop = np.quantile(bw, self.qhi)
75 bwhi = self.processVals(bw, isTop=True)
76 for i, seg in enumerate(bwhi):
77 if seg[1] - seg[0] >= self.bedwin:
78 bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1))
79 if self.qlo is not None:
80 self.bwbot = np.quantile(bw, self.qlo)
81 bwlo = self.processVals(bw, isTop=False)
82 for i, seg in enumerate(bwlo):
83 if seg[1] - seg[0] >= self.bedwin:
84 bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1))
85 bwmean = np.mean(bw)
86 bwstd = np.std(bw)
87 bwmax = np.max(bw)
88 nrow = np.size(bw)
89 bwmin = np.min(bw)
90 restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot))
91 print('\n'.join(restab), '\n')
92 if self.tableout:
93 with open(self.tableout) as t:
94 t.write('\n'.join(restab))
95 t.write('\n')
96 if self.bedoutlo:
97 if self.qlo:
98 self.writeBed(bedlo, self.bedoutlo)
99 if self.bedouthi:
100 if self.qhi:
101 self.writeBed(bedhi, self.bedouthi)
102 if self.bedouthilo:
103 allbed = bedlo + bedhi
104 self.writeBed(allbed, self.bedouthilo)
105 return restab
106 if __name__ == "__main__":
107 parser = argparse.ArgumentParser()
108 a = parser.add_argument
109 a('-m', '--minwin',default=10, type=int)
110 a('-l', '--qlo',default=None, type=float)
111 a('-i', '--qhi',default=None, type=float)
112 a('-w', '--bigwig', nargs='+')
113 a('-n', '--bigwiglabels', nargs='+')
114 a('-o', '--bedouthilo', default=None, help="optional high and low combined bed")
115 a('-u', '--bedouthi', default=None, help="optional high only bed")
116 a('-b', '--bedoutlo', default=None, help="optional low only bed")
117 a('-t', '--tableout', default=None)
118 args = parser.parse_args()
119 print('args=', args)
120 if not (args.bedouthilo or args.bedouthi or args.bedoutlo):
121 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined")
122 sys.exit(2)
123 if not (args.qlo or args.qhi):
124 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo")
125 sys.exit(2)
126 restab = findOut(args)
127 if args.tableout:
128 with open(args.tableout, 'w') as tout:
129 tout.write('\n'.join(restab))
130 tout.write('\n')
131 #end raw