### view tools/mytools/metaintv.py @ 1:cdcb0ce84a1b

author xuebing Fri, 09 Mar 2012 19:45:15 -0500 9071e359b9a3
line wrap: on
line source
```
'''
get binned score of intervals,allow extension
'''

import os,sys,numpy

from resize import *

from bx.bbi.bigwig_file import BigWigFile

def binning(x,n):
# make n bin of x
y = numpy.zeros(n,dtype=float)
if len(x) == 0:
return y
step = float(len(x))/n
for k in range(n):
i = int(step*k)
j = int(step*(k+1)) + 1
y[k] = x[i:j].mean()
#print i,j,k,y[k]
return y

def getBinnedScore(bwfile,intvfile,nbin):
'''
get binned average and std
'''
fbw = open(bwfile)
bw = BigWigFile(file=fbw)
fin = open(intvfile)
avg = numpy.zeros(nbin)
sqr = numpy.zeros(nbin)
N = 0
for line in fin:
#chrom,start,end,name,score,strand
flds = line.strip().split('\t')
#get the score at base resolution as an array
scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2]))
if scores == None:
continue
N = N + 1
#print line,scores
# reverse if on minus strand
if flds[5] == '-':
scores = scores[::-1]
# no score = 0
scores = numpy.nan_to_num(scores)
# bin the data
binned = binning(scores,nbin)
avg = avg + binned
sqr = sqr + binned**2
# compute avg and std
avg = avg / N
err = ((sqr/N-avg**2)**0.5)/(N**0.5)
return avg,err

def getExtendedBinScore(bwfile,intvfile,nbins,exts):
'''
nbins: n1,n2,n3
exts: l1,l2,l3,l4
'''
# make left extension
resize(intvfile,intvfile+'.tmp','start-'+str(exts[0]),'start+'+str(exts[1]),'stranded')
# compute binned average
avg,err = getBinnedScore(bwfile,intvfile+'.tmp',nbins[0])
# make center region
resize(intvfile,intvfile+'.tmp','start+'+str(exts[1]),'end-'+str(exts[2]),'stranded')
# compute binned average
avg1,err1 = getBinnedScore(bwfile,intvfile+'.tmp',nbins[1])
avg = numpy.concatenate((avg,avg1))
err = numpy.concatenate((err,err1))
# make right region
resize(intvfile,intvfile+'.tmp','end-'+str(exts[2]),'end+'+str(exts[3]),'stranded')
# compute binned average
avg2,err2 = getBinnedScore(bwfile,intvfile+'.tmp',nbins[2])
avg = numpy.concatenate((avg,avg2))
err = numpy.concatenate((err,err2))

return avg,err

print sys.argv
prog,bwfile,intvfile,nbin,outfile,outplot = sys.argv
avg, err = getBinnedScore(bwfile,intvfile,int(nbin))
out = open(outfile,'w')
numpy.savetxt(out, avg, fmt='%.6f', delimiter=' ', newline=' ')
out.write('\n')
numpy.savetxt(out, err, fmt='%.6f', delimiter=' ', newline=' ')
out.write('\n')
out.close()

# plot
rscript = open("tmp.r","w")
rscript.write("options(warn=-1)\n")
rscript.write("pdf('"+outplot+"')\n")
rscript.write("avg <- x[1,]\n")
rscript.write("err <- x[2,]\n")
rscript.write("print(x)\n")
rscript.write("ylim=c(min(avg-err),max(avg+err))\n")
rscript.write("xticks <- seq(ncol(x))\n")
rscript.write("plot(xticks,avg,xlab='',ylab='average coverage',type='l',lwd=0,ylim=ylim)\n")
rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n")
rscript.write("lines(xticks,avg,type='l',lwd=1)\n")
rscript.write("dev.off()\n")
rscript.close()
os.system("R --vanilla < tmp.r")
os.system("rm tmp.r")

```