Mercurial > repos > iuc > bigwig_outlier_bed
comparison bigwig_outlier_bed.py @ 2:61946b8bd43b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bigwig_outlier_bed commit 3cce4c76a60b9353298fdcf759e893b8fcdfaa77
| author | iuc |
|---|---|
| date | Thu, 25 Jul 2024 14:38:34 +0000 |
| parents | 8377a6abb4da |
| children | 00b3da7776a0 |
comparison
equal
deleted
inserted
replaced
| 1:8377a6abb4da | 2:61946b8bd43b |
|---|---|
| 11 Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores | 11 Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores |
| 12 Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi). | 12 Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi). |
| 13 """ | 13 """ |
| 14 | 14 |
| 15 import argparse | 15 import argparse |
| 16 import copy | |
| 17 import os | 16 import os |
| 18 import sys | 17 import sys |
| 19 from pathlib import Path | 18 from pathlib import Path |
| 20 | 19 |
| 21 import numpy as np | 20 import numpy as np |
| 22 import pybigtools | 21 import pybigtools |
| 22 | |
| 23 | |
| 24 class asciihist: | |
| 25 | |
| 26 def __init__( | |
| 27 self, | |
| 28 data, | |
| 29 bins=10, | |
| 30 minmax=None, | |
| 31 str_tag="", | |
| 32 scale_output=80, | |
| 33 generate_only=True, | |
| 34 ): | |
| 35 """ | |
| 36 https://gist.github.com/bgbg/608d9ef4fd75032731651257fe67fc81 | |
| 37 Create an ASCII histogram from an interable of numbers. | |
| 38 Author: Boris Gorelik boris@gorelik.net. based on http://econpy.googlecode.com/svn/trunk/pytrix/pytrix.py | |
| 39 License: MIT | |
| 40 """ | |
| 41 self.data = data | |
| 42 self.minmax = minmax | |
| 43 self.str_tag = str_tag | |
| 44 self.bins = bins | |
| 45 self.generate_only = generate_only | |
| 46 self.scale_output = scale_output | |
| 47 self.itarray = np.asanyarray(self.data) | |
| 48 if self.minmax == "auto": | |
| 49 self.minmax = np.percentile(data, [5, 95]) | |
| 50 if self.minmax[0] == self.minmax[1]: | |
| 51 # for very ugly distributions | |
| 52 self.minmax = None | |
| 53 if self.minmax is not None: | |
| 54 # discard values that are outside minmax range | |
| 55 mn = self.minmax[0] | |
| 56 mx = self.minmax[1] | |
| 57 self.itarray = self.itarray[self.itarray >= mn] | |
| 58 self.itarray = self.itarray[self.itarray <= mx] | |
| 59 | |
| 60 def draw(self): | |
| 61 values, counts = np.unique(self.data, return_counts=True) | |
| 62 if len(values) <= 20: | |
| 63 self.bins = len(values) | |
| 64 ret = [] | |
| 65 if self.itarray.size: | |
| 66 total = len(self.itarray) | |
| 67 counts, cutoffs = np.histogram(self.itarray, bins=self.bins) | |
| 68 cutoffs = cutoffs[1:] | |
| 69 if self.str_tag: | |
| 70 self.str_tag = "%s " % self.str_tag | |
| 71 else: | |
| 72 self.str_tag = "" | |
| 73 if self.scale_output is not None: | |
| 74 scaled_counts = counts.astype(float) / counts.sum() * self.scale_output | |
| 75 else: | |
| 76 scaled_counts = counts | |
| 77 footerbar = "{:s}{:s} |{:s} |".format( | |
| 78 self.str_tag, | |
| 79 "-" * 12, | |
| 80 "-" * 12, | |
| 81 ) | |
| 82 if self.minmax is not None: | |
| 83 ret.append( | |
| 84 "Trimmed to range (%s - %s)" | |
| 85 % (str(self.minmax[0]), str(self.minmax[1])) | |
| 86 ) | |
| 87 for cutoff, original_count, scaled_count in zip( | |
| 88 cutoffs, counts, scaled_counts | |
| 89 ): | |
| 90 ret.append( | |
| 91 "{:s}{:>12.2f} |{:>12,d} | {:s}".format( | |
| 92 self.str_tag, cutoff, original_count, "*" * int(scaled_count) | |
| 93 ) | |
| 94 ) | |
| 95 ret.append(footerbar) | |
| 96 ret.append("{:s}{:>12s} |{:>12,d} |".format(self.str_tag, "N=", total)) | |
| 97 ret.append(footerbar) | |
| 98 ret.append("") | |
| 99 else: | |
| 100 ret = [] | |
| 101 if not self.generate_only: | |
| 102 for line in ret: | |
| 103 print(line) | |
| 104 ret = "\n".join(ret) | |
| 105 return ret | |
| 23 | 106 |
| 24 | 107 |
| 25 class findOut: | 108 class findOut: |
| 26 | 109 |
| 27 def __init__(self, args): | 110 def __init__(self, args): |
| 32 self.bedouthi = args.bedouthi | 115 self.bedouthi = args.bedouthi |
| 33 self.bedoutlo = args.bedoutlo | 116 self.bedoutlo = args.bedoutlo |
| 34 self.bedouthilo = args.bedouthilo | 117 self.bedouthilo = args.bedouthilo |
| 35 self.tableoutfile = args.tableoutfile | 118 self.tableoutfile = args.tableoutfile |
| 36 self.bedwin = args.minwin | 119 self.bedwin = args.minwin |
| 37 self.qhi = args.qhi | |
| 38 self.qlo = None | 120 self.qlo = None |
| 39 try: | 121 self.qhi = None |
| 40 f = float(args.qlo) | 122 if args.outbeds != "outtab": |
| 41 self.qlo = f | 123 self.qhi = args.qhi |
| 42 except Exception as e: | 124 if args.qlo: |
| 43 s = str(e) | 125 try: |
| 44 print(s, ' qlo=', args.qlo) | 126 f = float(args.qlo) |
| 127 self.qlo = f | |
| 128 except Exception: | |
| 129 print("qlo not provided") | |
| 45 nbw = len(args.bigwig) | 130 nbw = len(args.bigwig) |
| 46 nlab = len(args.bigwiglabels) | 131 nlab = len(args.bigwiglabels) |
| 47 if nlab < nbw: | 132 if nlab < nbw: |
| 48 self.bwlabels += ["Nolabel"] * (nbw - nlab) | 133 self.bwlabels += ["Nolabel"] * (nbw - nlab) |
| 49 self.makeBed() | 134 self.makeBed() |
| 104 return row | 189 return row |
| 105 | 190 |
| 106 def makeBed(self): | 191 def makeBed(self): |
| 107 bedhi = [] | 192 bedhi = [] |
| 108 bedlo = [] | 193 bedlo = [] |
| 194 restab = [] | |
| 109 bwlabels = self.bwlabels | 195 bwlabels = self.bwlabels |
| 110 bwnames = self.bwnames | 196 bwnames = self.bwnames |
| 111 if self.tableoutfile: | 197 bwnames.sort() |
| 112 restab = ["bigwig\tcontig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"] | 198 reshead = "bigwig\tcontig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot" |
| 113 for i, bwname in enumerate(bwnames): | 199 for i, bwname in enumerate(bwnames): |
| 114 bwlabel = bwlabels[i].replace(" ", "") | 200 bwlabel = bwlabels[i].replace(" ", "") |
| 115 fakepath = "in%d.bw" % i | 201 fakepath = "in%d.bw" % i |
| 116 if os.path.isfile(fakepath): | 202 if os.path.isfile(fakepath): |
| 117 os.remove(fakepath) | 203 os.remove(fakepath) |
| 118 p = Path(fakepath) | 204 p = Path(fakepath) |
| 119 p.symlink_to(bwname) # required by pybigtools (!) | 205 p.symlink_to(bwname) # required by pybigtools (!) |
| 120 bwf = pybigtools.open(fakepath) | 206 bwf = pybigtools.open(fakepath) |
| 121 chrlist = bwf.chroms() | 207 chrlist = bwf.chroms() |
| 122 chrs = list(chrlist.keys()) | 208 chrs = list(chrlist.keys()) |
| 123 chrs.sort() | |
| 124 for chr in chrs: | 209 for chr in chrs: |
| 210 first_few = None | |
| 125 bw = bwf.values(chr) | 211 bw = bwf.values(chr) |
| 212 values, counts = np.unique(bw, return_counts=True) | |
| 213 nvalues = len(values) | |
| 214 if nvalues <= 20: | |
| 215 histo = "\n".join( | |
| 216 [ | |
| 217 "%s: %f occurs %d times" % (chr, values[x], counts[x]) | |
| 218 for x in range(len(values)) | |
| 219 ] | |
| 220 ) | |
| 221 else: | |
| 222 last10 = range(nvalues - 10, nvalues) | |
| 223 first_few = ["%.2f\t%d" % (values[x], counts[x]) for x in range(10)] | |
| 224 first_few += ["%.2f\t%d" % (values[x], counts[x]) for x in last10] | |
| 225 first_few.insert(0, "First/Last 10 value counts\nValue\tCount") | |
| 226 ha = asciihist(data=bw, bins=20, str_tag="%s_%s" % (bwlabel, chr)) | |
| 227 histo = ha.draw() | |
| 228 histo = ( | |
| 229 "\n".join(first_few) | |
| 230 + "\nHistogram of %s bigwig values\n" % bwlabel | |
| 231 + histo | |
| 232 ) | |
| 126 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered | 233 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered |
| 127 if self.qhi is not None: | 234 if self.qhi is not None: |
| 128 self.bwtop = np.quantile(bw, self.qhi) | 235 self.bwtop = np.quantile(bw, self.qhi) |
| 129 bwhi = self.processVals(bw, isTop=True) | 236 bwhi = self.processVals(bw, isTop=True) |
| 130 for j, seg in enumerate(bwhi): | 237 for j, seg in enumerate(bwhi): |
| 131 if seg[1] - seg[0] >= self.bedwin: | 238 seglen = seg[1] - seg[0] |
| 132 score = np.sum(bw[seg[0]:seg[1]]) | 239 if seglen >= self.bedwin: |
| 240 score = np.sum(bw[seg[0]:seg[1]]) / float(seglen) | |
| 133 bedhi.append( | 241 bedhi.append( |
| 134 ( | 242 ( |
| 135 chr, | 243 chr, |
| 136 seg[0], | 244 seg[0], |
| 137 seg[1], | 245 seg[1], |
| 142 if self.qlo is not None: | 250 if self.qlo is not None: |
| 143 self.bwbot = np.quantile(bw, self.qlo) | 251 self.bwbot = np.quantile(bw, self.qlo) |
| 144 bwlo = self.processVals(bw, isTop=False) | 252 bwlo = self.processVals(bw, isTop=False) |
| 145 for j, seg in enumerate(bwlo): | 253 for j, seg in enumerate(bwlo): |
| 146 if seg[1] - seg[0] >= self.bedwin: | 254 if seg[1] - seg[0] >= self.bedwin: |
| 147 score = -1 * np.sum(bw[seg[0]:seg[1]]) | 255 score = -1 * np.sum(bw[seg[0]:seg[1]]) / float(seglen) |
| 148 bedlo.append( | 256 bedlo.append( |
| 149 ( | 257 ( |
| 150 chr, | 258 chr, |
| 151 seg[0], | 259 seg[0], |
| 152 seg[1], | 260 seg[1], |
| 154 score, | 262 score, |
| 155 ) | 263 ) |
| 156 ) | 264 ) |
| 157 if self.tableoutfile: | 265 if self.tableoutfile: |
| 158 row = self.makeTableRow(bw, bwlabel, chr) | 266 row = self.makeTableRow(bw, bwlabel, chr) |
| 159 restab.append(copy.copy(row)) | 267 resheadl = reshead.split("\t") |
| 268 rowl = row.split() | |
| 269 desc = ["%s\t%s" % (resheadl[x], rowl[x]) for x in range(len(rowl))] | |
| 270 desc.insert(0, "Descriptive measures") | |
| 271 descn = "\n".join(desc) | |
| 272 restab.append(descn) | |
| 273 restab.append(histo) | |
| 274 if os.path.isfile(fakepath): | |
| 275 os.remove(fakepath) | |
| 160 if self.tableoutfile: | 276 if self.tableoutfile: |
| 161 stable = "\n".join(restab) | 277 stable = "\n".join(restab) |
| 162 with open(self.tableoutfile, "w") as t: | 278 with open(self.tableoutfile, "w") as t: |
| 163 t.write(stable) | 279 t.write(stable) |
| 164 t.write("\n") | 280 t.write("\n") |
| 173 some = True | 289 some = True |
| 174 if self.outbeds in ["outall", "outhilo"]: | 290 if self.outbeds in ["outall", "outhilo"]: |
| 175 allbed = bedlo + bedhi | 291 allbed = bedlo + bedhi |
| 176 self.writeBed(allbed, self.bedouthilo) | 292 self.writeBed(allbed, self.bedouthilo) |
| 177 some = True | 293 some = True |
| 178 if not some: | 294 if not ((self.outbeds == "outtab") or some): |
| 179 sys.stderr.write( | 295 sys.stderr.write( |
| 180 "Invalid configuration - no output could be created. Was qlo missing and only low output requested for example?" | 296 "Invalid configuration - no output could be created. Was qlo missing and only low output requested for example?" |
| 181 ) | 297 ) |
| 182 sys.exit(2) | 298 sys.exit(2) |
| 183 | 299 |
