Mercurial > repos > fubar > bigwig_outlier_bed
comparison bigwigtobed.python.txt @ 0:c71db540eb38 draft
planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author | fubar |
---|---|
date | Mon, 01 Jul 2024 02:48:46 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c71db540eb38 |
---|---|
1 #raw | |
2 """ | |
3 Ross Lazarus June 2024 for VGP | |
4 Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions. | |
5 Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point. | |
6 0.99 and 0.01 work well in testing with a minimum span of 10 bp. | |
7 Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately | |
8 Combining multiple references works but is silly because only display will rely on one reference so others will not be shown... | |
9 Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html | |
10 takes about 95 seconds for a 17MB test wiggle | |
11 JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option. | |
12 Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores | |
13 Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi). | |
14 """ | |
15 import argparse | |
16 import numpy as np | |
17 import pybigtools | |
18 import sys | |
19 from pathlib import Path | |
20 class findOut(): | |
21 def __init__(self, args): | |
22 self.bwnames=args.bigwig | |
23 self.bwlabels=args.bigwiglabels | |
24 self.bedwin=args.minwin | |
25 self.qlo=args.qlo | |
26 self.qhi=args.qhi | |
27 self.bedouthilo=args.bedouthilo | |
28 self.bedouthi=args.bedouthi | |
29 self.bedoutlo=args.bedoutlo | |
30 self.tableout = args.tableout | |
31 self.bedwin = args.minwin | |
32 self.qhi = args.qhi | |
33 self.qlo = args.qlo | |
34 self.makeBed() | |
35 def processVals(self, bw, isTop): | |
36 # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html | |
37 if isTop: | |
38 bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s | |
39 else: | |
40 bwex = np.r_[False, bw <= self.bwbot, False] | |
41 bwexd = np.diff(bwex) | |
42 bwexdnz = bwexd.nonzero()[0] | |
43 bwregions = np.reshape(bwexdnz, (-1,2)) | |
44 return bwregions | |
45 def writeBed(self, bed, bedfname): | |
46 """ | |
47 potentially multiple | |
48 """ | |
49 bed.sort() | |
50 beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed] | |
51 with open(bedfname, "w") as bedf: | |
52 bedf.write('\n'.join(beds)) | |
53 bedf.write('\n') | |
54 print('Wrote %d bed regions to %s' % (len(bed), bedfname)) | |
55 def makeBed(self): | |
56 bedhi = [] | |
57 bedlo = [] | |
58 bwlabels = self.bwlabels | |
59 bwnames = self.bwnames | |
60 print('bwnames=', bwnames, "bwlabs=", bwlabels) | |
61 for i, bwname in enumerate(bwnames): | |
62 bwlabel = bwlabels[i].replace(" ",'') | |
63 p = Path('in.bw') | |
64 p.symlink_to( bwname ) # required by pybigtools (!) | |
65 bwf = pybigtools.open('in.bw') | |
66 chrlist = bwf.chroms() | |
67 chrs = list(chrlist.keys()) | |
68 chrs.sort() | |
69 restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"] | |
70 for chr in chrs: | |
71 bw = bwf.values(chr) | |
72 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered | |
73 if self.qhi is not None: | |
74 self.bwtop = np.quantile(bw, self.qhi) | |
75 bwhi = self.processVals(bw, isTop=True) | |
76 for i, seg in enumerate(bwhi): | |
77 if seg[1] - seg[0] >= self.bedwin: | |
78 bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1)) | |
79 if self.qlo is not None: | |
80 self.bwbot = np.quantile(bw, self.qlo) | |
81 bwlo = self.processVals(bw, isTop=False) | |
82 for i, seg in enumerate(bwlo): | |
83 if seg[1] - seg[0] >= self.bedwin: | |
84 bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1)) | |
85 bwmean = np.mean(bw) | |
86 bwstd = np.std(bw) | |
87 bwmax = np.max(bw) | |
88 nrow = np.size(bw) | |
89 bwmin = np.min(bw) | |
90 restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot)) | |
91 print('\n'.join(restab), '\n') | |
92 if self.tableout: | |
93 with open(self.tableout) as t: | |
94 t.write('\n'.join(restab)) | |
95 t.write('\n') | |
96 if self.bedoutlo: | |
97 if self.qlo: | |
98 self.writeBed(bedlo, self.bedoutlo) | |
99 if self.bedouthi: | |
100 if self.qhi: | |
101 self.writeBed(bedhi, self.bedouthi) | |
102 if self.bedouthilo: | |
103 allbed = bedlo + bedhi | |
104 self.writeBed(allbed, self.bedouthilo) | |
105 return restab | |
106 if __name__ == "__main__": | |
107 parser = argparse.ArgumentParser() | |
108 a = parser.add_argument | |
109 a('-m', '--minwin',default=10, type=int) | |
110 a('-l', '--qlo',default=None, type=float) | |
111 a('-i', '--qhi',default=None, type=float) | |
112 a('-w', '--bigwig', nargs='+') | |
113 a('-n', '--bigwiglabels', nargs='+') | |
114 a('-o', '--bedouthilo', default=None, help="optional high and low combined bed") | |
115 a('-u', '--bedouthi', default=None, help="optional high only bed") | |
116 a('-b', '--bedoutlo', default=None, help="optional low only bed") | |
117 a('-t', '--tableout', default=None) | |
118 args = parser.parse_args() | |
119 print('args=', args) | |
120 if not (args.bedouthilo or args.bedouthi or args.bedoutlo): | |
121 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined") | |
122 sys.exit(2) | |
123 if not (args.qlo or args.qhi): | |
124 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo") | |
125 sys.exit(2) | |
126 restab = findOut(args) | |
127 if args.tableout: | |
128 with open(args.tableout, 'w') as tout: | |
129 tout.write('\n'.join(restab)) | |
130 tout.write('\n') | |
131 #end raw |