bigwig_outlier_bed: bigwig_outlier

comparison bigwig_outlier_bed.py @ 2:61946b8bd43b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bigwig_outlier_bed commit 3cce4c76a60b9353298fdcf759e893b8fcdfaa77

author	iuc
date	Thu, 25 Jul 2024 14:38:34 +0000
parents	8377a6abb4da
children	00b3da7776a0

comparison

equal deleted inserted replaced

-:8377a6abb4da
+:61946b8bd43b
 Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores
 Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi).
 """
 import argparse
-import copy
 import os
 import sys
 from pathlib import Path
 import numpy as np
 import pybigtools
+class asciihist:
+def __init__(
+self,
+data,
+bins=10,
+minmax=None,
+str_tag="",
+scale_output=80,
+generate_only=True,
+):
+"""
+https://gist.github.com/bgbg/608d9ef4fd75032731651257fe67fc81
+Create an ASCII histogram from an interable of numbers.
+Author: Boris Gorelik boris@gorelik.net. based on  http://econpy.googlecode.com/svn/trunk/pytrix/pytrix.py
+License: MIT
+"""
+self.data = data
+self.minmax = minmax
+self.str_tag = str_tag
+self.bins = bins
+self.generate_only = generate_only
+self.scale_output = scale_output
+self.itarray = np.asanyarray(self.data)
+if self.minmax == "auto":
+self.minmax = np.percentile(data, [5, 95])
+if self.minmax[0] == self.minmax[1]:
+# for very ugly distributions
+self.minmax = None
+if self.minmax is not None:
+# discard values that are outside minmax range
+mn = self.minmax[0]
+mx = self.minmax[1]
+self.itarray = self.itarray[self.itarray >= mn]
+self.itarray = self.itarray[self.itarray <= mx]
+def draw(self):
+values, counts = np.unique(self.data, return_counts=True)
+if len(values) <= 20:
+self.bins = len(values)
+ret = []
+if self.itarray.size:
+total = len(self.itarray)
+counts, cutoffs = np.histogram(self.itarray, bins=self.bins)
+cutoffs = cutoffs[1:]
+if self.str_tag:
+self.str_tag = "%s " % self.str_tag
+else:
+self.str_tag = ""
+if self.scale_output is not None:
+scaled_counts = counts.astype(float) / counts.sum() * self.scale_output
+else:
+scaled_counts = counts
+footerbar = "{:s}{:s} |{:s} |".format(
+self.str_tag,
+"-" * 12,
+"-" * 12,
+)
+if self.minmax is not None:
+ret.append(
+"Trimmed to range (%s - %s)"
+% (str(self.minmax[0]), str(self.minmax[1]))
+)
+for cutoff, original_count, scaled_count in zip(
+cutoffs, counts, scaled_counts
+):
+ret.append(
+"{:s}{:>12.2f} |{:>12,d} | {:s}".format(
+self.str_tag, cutoff, original_count, "*" * int(scaled_count)
+)
+)
+ret.append(footerbar)
+ret.append("{:s}{:>12s} |{:>12,d} |".format(self.str_tag, "N=", total))
+ret.append(footerbar)
+ret.append("")
+else:
+ret = []
+if not self.generate_only:
+for line in ret:
+print(line)
+ret = "\n".join(ret)
+return ret
 class findOut:
 def __init__(self, args):
 self.bedouthi = args.bedouthi
 self.bedoutlo = args.bedoutlo
 self.bedouthilo = args.bedouthilo
 self.tableoutfile = args.tableoutfile
 self.bedwin = args.minwin
-self.qhi = args.qhi
 self.qlo = None
-try:
+self.qhi = None
-f = float(args.qlo)
+if args.outbeds != "outtab":
-self.qlo = f
+self.qhi = args.qhi
-except Exception as e:
+if args.qlo:
-s = str(e)
+try:
-print(s, ' qlo=', args.qlo)
+f = float(args.qlo)
+self.qlo = f
+except Exception:
+print("qlo not provided")
 nbw = len(args.bigwig)
 nlab = len(args.bigwiglabels)
 if nlab < nbw:
 self.bwlabels += ["Nolabel"] * (nbw - nlab)
 self.makeBed()
 return row
 def makeBed(self):
 bedhi = []
 bedlo = []
+restab = []
 bwlabels = self.bwlabels
 bwnames = self.bwnames
-if self.tableoutfile:
+bwnames.sort()
-restab = ["bigwig\tcontig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"]
+reshead = "bigwig\tcontig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"
 for i, bwname in enumerate(bwnames):
 bwlabel = bwlabels[i].replace(" ", "")
 fakepath = "in%d.bw" % i
 if os.path.isfile(fakepath):
 os.remove(fakepath)
 p = Path(fakepath)
 p.symlink_to(bwname)  # required by pybigtools (!)
 bwf = pybigtools.open(fakepath)
 chrlist = bwf.chroms()
 chrs = list(chrlist.keys())
-chrs.sort()
 for chr in chrs:
+first_few = None
 bw = bwf.values(chr)
+values, counts = np.unique(bw, return_counts=True)
+nvalues = len(values)
+if nvalues <= 20:
+histo = "\n".join(
+[
+"%s: %f occurs %d times" % (chr, values[x], counts[x])
+for x in range(len(values))
+]
+)
+else:
+last10 = range(nvalues - 10, nvalues)
+first_few = ["%.2f\t%d" % (values[x], counts[x]) for x in range(10)]
+first_few += ["%.2f\t%d" % (values[x], counts[x]) for x in last10]
+first_few.insert(0, "First/Last 10 value counts\nValue\tCount")
+ha = asciihist(data=bw, bins=20, str_tag="%s_%s" % (bwlabel, chr))
+histo = ha.draw()
+histo = (
+"\n".join(first_few)
++ "\nHistogram of %s bigwig values\n" % bwlabel
++ histo
+)
 bw = bw[~np.isnan(bw)]  # some have NaN if parts of a contig not covered
 if self.qhi is not None:
 self.bwtop = np.quantile(bw, self.qhi)
 bwhi = self.processVals(bw, isTop=True)
 for j, seg in enumerate(bwhi):
-if seg[1] - seg[0] >= self.bedwin:
+seglen = seg[1] - seg[0]
-score = np.sum(bw[seg[0]:seg[1]])
+if seglen >= self.bedwin:
+score = np.sum(bw[seg[0]:seg[1]]) / float(seglen)
 bedhi.append(
 (
 chr,
 seg[0],
 seg[1],
 if self.qlo is not None:
 self.bwbot = np.quantile(bw, self.qlo)
 bwlo = self.processVals(bw, isTop=False)
 for j, seg in enumerate(bwlo):
 if seg[1] - seg[0] >= self.bedwin:
-score = -1 * np.sum(bw[seg[0]:seg[1]])
+score = -1 * np.sum(bw[seg[0]:seg[1]]) / float(seglen)
 bedlo.append(
 (
 chr,
 seg[0],
 seg[1],
 score,
 )
 )
 if self.tableoutfile:
 row = self.makeTableRow(bw, bwlabel, chr)
-restab.append(copy.copy(row))
+resheadl = reshead.split("\t")
+rowl = row.split()
+desc = ["%s\t%s" % (resheadl[x], rowl[x]) for x in range(len(rowl))]
+desc.insert(0, "Descriptive measures")
+descn = "\n".join(desc)
+restab.append(descn)
+restab.append(histo)
+if os.path.isfile(fakepath):
+os.remove(fakepath)
 if self.tableoutfile:
 stable = "\n".join(restab)
 with open(self.tableoutfile, "w") as t:
 t.write(stable)
 t.write("\n")
 some = True
 if self.outbeds in ["outall", "outhilo"]:
 allbed = bedlo + bedhi
 self.writeBed(allbed, self.bedouthilo)
 some = True
-if not some:
+if not ((self.outbeds == "outtab") or some):
 sys.stderr.write(
 "Invalid configuration - no output could be created. Was qlo missing and only low output requested for example?"
 )
 sys.exit(2)

Mercurial > repos > iuc > bigwig_outlier_bed

comparison bigwig_outlier_bed.py @ 2:61946b8bd43b draft