comparison bigwig_outlier_bed.py @ 2:61946b8bd43b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bigwig_outlier_bed commit 3cce4c76a60b9353298fdcf759e893b8fcdfaa77
author iuc
date Thu, 25 Jul 2024 14:38:34 +0000
parents 8377a6abb4da
children 00b3da7776a0
comparison
equal deleted inserted replaced
1:8377a6abb4da 2:61946b8bd43b
11 Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores 11 Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores
12 Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi). 12 Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi).
13 """ 13 """
14 14
15 import argparse 15 import argparse
16 import copy
17 import os 16 import os
18 import sys 17 import sys
19 from pathlib import Path 18 from pathlib import Path
20 19
21 import numpy as np 20 import numpy as np
22 import pybigtools 21 import pybigtools
22
23
24 class asciihist:
25
26 def __init__(
27 self,
28 data,
29 bins=10,
30 minmax=None,
31 str_tag="",
32 scale_output=80,
33 generate_only=True,
34 ):
35 """
36 https://gist.github.com/bgbg/608d9ef4fd75032731651257fe67fc81
37 Create an ASCII histogram from an interable of numbers.
38 Author: Boris Gorelik boris@gorelik.net. based on http://econpy.googlecode.com/svn/trunk/pytrix/pytrix.py
39 License: MIT
40 """
41 self.data = data
42 self.minmax = minmax
43 self.str_tag = str_tag
44 self.bins = bins
45 self.generate_only = generate_only
46 self.scale_output = scale_output
47 self.itarray = np.asanyarray(self.data)
48 if self.minmax == "auto":
49 self.minmax = np.percentile(data, [5, 95])
50 if self.minmax[0] == self.minmax[1]:
51 # for very ugly distributions
52 self.minmax = None
53 if self.minmax is not None:
54 # discard values that are outside minmax range
55 mn = self.minmax[0]
56 mx = self.minmax[1]
57 self.itarray = self.itarray[self.itarray >= mn]
58 self.itarray = self.itarray[self.itarray <= mx]
59
60 def draw(self):
61 values, counts = np.unique(self.data, return_counts=True)
62 if len(values) <= 20:
63 self.bins = len(values)
64 ret = []
65 if self.itarray.size:
66 total = len(self.itarray)
67 counts, cutoffs = np.histogram(self.itarray, bins=self.bins)
68 cutoffs = cutoffs[1:]
69 if self.str_tag:
70 self.str_tag = "%s " % self.str_tag
71 else:
72 self.str_tag = ""
73 if self.scale_output is not None:
74 scaled_counts = counts.astype(float) / counts.sum() * self.scale_output
75 else:
76 scaled_counts = counts
77 footerbar = "{:s}{:s} |{:s} |".format(
78 self.str_tag,
79 "-" * 12,
80 "-" * 12,
81 )
82 if self.minmax is not None:
83 ret.append(
84 "Trimmed to range (%s - %s)"
85 % (str(self.minmax[0]), str(self.minmax[1]))
86 )
87 for cutoff, original_count, scaled_count in zip(
88 cutoffs, counts, scaled_counts
89 ):
90 ret.append(
91 "{:s}{:>12.2f} |{:>12,d} | {:s}".format(
92 self.str_tag, cutoff, original_count, "*" * int(scaled_count)
93 )
94 )
95 ret.append(footerbar)
96 ret.append("{:s}{:>12s} |{:>12,d} |".format(self.str_tag, "N=", total))
97 ret.append(footerbar)
98 ret.append("")
99 else:
100 ret = []
101 if not self.generate_only:
102 for line in ret:
103 print(line)
104 ret = "\n".join(ret)
105 return ret
23 106
24 107
25 class findOut: 108 class findOut:
26 109
27 def __init__(self, args): 110 def __init__(self, args):
32 self.bedouthi = args.bedouthi 115 self.bedouthi = args.bedouthi
33 self.bedoutlo = args.bedoutlo 116 self.bedoutlo = args.bedoutlo
34 self.bedouthilo = args.bedouthilo 117 self.bedouthilo = args.bedouthilo
35 self.tableoutfile = args.tableoutfile 118 self.tableoutfile = args.tableoutfile
36 self.bedwin = args.minwin 119 self.bedwin = args.minwin
37 self.qhi = args.qhi
38 self.qlo = None 120 self.qlo = None
39 try: 121 self.qhi = None
40 f = float(args.qlo) 122 if args.outbeds != "outtab":
41 self.qlo = f 123 self.qhi = args.qhi
42 except Exception as e: 124 if args.qlo:
43 s = str(e) 125 try:
44 print(s, ' qlo=', args.qlo) 126 f = float(args.qlo)
127 self.qlo = f
128 except Exception:
129 print("qlo not provided")
45 nbw = len(args.bigwig) 130 nbw = len(args.bigwig)
46 nlab = len(args.bigwiglabels) 131 nlab = len(args.bigwiglabels)
47 if nlab < nbw: 132 if nlab < nbw:
48 self.bwlabels += ["Nolabel"] * (nbw - nlab) 133 self.bwlabels += ["Nolabel"] * (nbw - nlab)
49 self.makeBed() 134 self.makeBed()
104 return row 189 return row
105 190
106 def makeBed(self): 191 def makeBed(self):
107 bedhi = [] 192 bedhi = []
108 bedlo = [] 193 bedlo = []
194 restab = []
109 bwlabels = self.bwlabels 195 bwlabels = self.bwlabels
110 bwnames = self.bwnames 196 bwnames = self.bwnames
111 if self.tableoutfile: 197 bwnames.sort()
112 restab = ["bigwig\tcontig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"] 198 reshead = "bigwig\tcontig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"
113 for i, bwname in enumerate(bwnames): 199 for i, bwname in enumerate(bwnames):
114 bwlabel = bwlabels[i].replace(" ", "") 200 bwlabel = bwlabels[i].replace(" ", "")
115 fakepath = "in%d.bw" % i 201 fakepath = "in%d.bw" % i
116 if os.path.isfile(fakepath): 202 if os.path.isfile(fakepath):
117 os.remove(fakepath) 203 os.remove(fakepath)
118 p = Path(fakepath) 204 p = Path(fakepath)
119 p.symlink_to(bwname) # required by pybigtools (!) 205 p.symlink_to(bwname) # required by pybigtools (!)
120 bwf = pybigtools.open(fakepath) 206 bwf = pybigtools.open(fakepath)
121 chrlist = bwf.chroms() 207 chrlist = bwf.chroms()
122 chrs = list(chrlist.keys()) 208 chrs = list(chrlist.keys())
123 chrs.sort()
124 for chr in chrs: 209 for chr in chrs:
210 first_few = None
125 bw = bwf.values(chr) 211 bw = bwf.values(chr)
212 values, counts = np.unique(bw, return_counts=True)
213 nvalues = len(values)
214 if nvalues <= 20:
215 histo = "\n".join(
216 [
217 "%s: %f occurs %d times" % (chr, values[x], counts[x])
218 for x in range(len(values))
219 ]
220 )
221 else:
222 last10 = range(nvalues - 10, nvalues)
223 first_few = ["%.2f\t%d" % (values[x], counts[x]) for x in range(10)]
224 first_few += ["%.2f\t%d" % (values[x], counts[x]) for x in last10]
225 first_few.insert(0, "First/Last 10 value counts\nValue\tCount")
226 ha = asciihist(data=bw, bins=20, str_tag="%s_%s" % (bwlabel, chr))
227 histo = ha.draw()
228 histo = (
229 "\n".join(first_few)
230 + "\nHistogram of %s bigwig values\n" % bwlabel
231 + histo
232 )
126 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered 233 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered
127 if self.qhi is not None: 234 if self.qhi is not None:
128 self.bwtop = np.quantile(bw, self.qhi) 235 self.bwtop = np.quantile(bw, self.qhi)
129 bwhi = self.processVals(bw, isTop=True) 236 bwhi = self.processVals(bw, isTop=True)
130 for j, seg in enumerate(bwhi): 237 for j, seg in enumerate(bwhi):
131 if seg[1] - seg[0] >= self.bedwin: 238 seglen = seg[1] - seg[0]
132 score = np.sum(bw[seg[0]:seg[1]]) 239 if seglen >= self.bedwin:
240 score = np.sum(bw[seg[0]:seg[1]]) / float(seglen)
133 bedhi.append( 241 bedhi.append(
134 ( 242 (
135 chr, 243 chr,
136 seg[0], 244 seg[0],
137 seg[1], 245 seg[1],
142 if self.qlo is not None: 250 if self.qlo is not None:
143 self.bwbot = np.quantile(bw, self.qlo) 251 self.bwbot = np.quantile(bw, self.qlo)
144 bwlo = self.processVals(bw, isTop=False) 252 bwlo = self.processVals(bw, isTop=False)
145 for j, seg in enumerate(bwlo): 253 for j, seg in enumerate(bwlo):
146 if seg[1] - seg[0] >= self.bedwin: 254 if seg[1] - seg[0] >= self.bedwin:
147 score = -1 * np.sum(bw[seg[0]:seg[1]]) 255 score = -1 * np.sum(bw[seg[0]:seg[1]]) / float(seglen)
148 bedlo.append( 256 bedlo.append(
149 ( 257 (
150 chr, 258 chr,
151 seg[0], 259 seg[0],
152 seg[1], 260 seg[1],
154 score, 262 score,
155 ) 263 )
156 ) 264 )
157 if self.tableoutfile: 265 if self.tableoutfile:
158 row = self.makeTableRow(bw, bwlabel, chr) 266 row = self.makeTableRow(bw, bwlabel, chr)
159 restab.append(copy.copy(row)) 267 resheadl = reshead.split("\t")
268 rowl = row.split()
269 desc = ["%s\t%s" % (resheadl[x], rowl[x]) for x in range(len(rowl))]
270 desc.insert(0, "Descriptive measures")
271 descn = "\n".join(desc)
272 restab.append(descn)
273 restab.append(histo)
274 if os.path.isfile(fakepath):
275 os.remove(fakepath)
160 if self.tableoutfile: 276 if self.tableoutfile:
161 stable = "\n".join(restab) 277 stable = "\n".join(restab)
162 with open(self.tableoutfile, "w") as t: 278 with open(self.tableoutfile, "w") as t:
163 t.write(stable) 279 t.write(stable)
164 t.write("\n") 280 t.write("\n")
173 some = True 289 some = True
174 if self.outbeds in ["outall", "outhilo"]: 290 if self.outbeds in ["outall", "outhilo"]:
175 allbed = bedlo + bedhi 291 allbed = bedlo + bedhi
176 self.writeBed(allbed, self.bedouthilo) 292 self.writeBed(allbed, self.bedouthilo)
177 some = True 293 some = True
178 if not some: 294 if not ((self.outbeds == "outtab") or some):
179 sys.stderr.write( 295 sys.stderr.write(
180 "Invalid configuration - no output could be created. Was qlo missing and only low output requested for example?" 296 "Invalid configuration - no output could be created. Was qlo missing and only low output requested for example?"
181 ) 297 )
182 sys.exit(2) 298 sys.exit(2)
183 299