| 0 | 1 ''' | 
|  | 2 get binned score of intervals,allow extension | 
|  | 3 ''' | 
|  | 4 | 
|  | 5 import os,sys,numpy | 
|  | 6 | 
|  | 7 from resize import * | 
|  | 8 | 
|  | 9 from bx.bbi.bigwig_file import BigWigFile | 
|  | 10 | 
|  | 11 def binning(x,n): | 
|  | 12     # make n bin of x | 
|  | 13     y = numpy.zeros(n,dtype=float) | 
|  | 14     if len(x) == 0: | 
|  | 15         return y | 
|  | 16     step = float(len(x))/n | 
|  | 17     for k in range(n): | 
|  | 18         i = int(step*k) | 
|  | 19         j = int(step*(k+1)) + 1 | 
|  | 20         y[k] = x[i:j].mean() | 
|  | 21         #print i,j,k,y[k] | 
|  | 22     return y | 
|  | 23 | 
|  | 24 def getBinnedScore(bwfile,intvfile,nbin): | 
|  | 25     ''' | 
|  | 26     get binned average and std | 
|  | 27     ''' | 
|  | 28     fbw = open(bwfile) | 
|  | 29     bw = BigWigFile(file=fbw) | 
|  | 30     fin = open(intvfile) | 
|  | 31     avg = numpy.zeros(nbin) | 
|  | 32     sqr = numpy.zeros(nbin) | 
|  | 33     N = 0 | 
|  | 34     for line in fin: | 
|  | 35         #chrom,start,end,name,score,strand | 
|  | 36         flds = line.strip().split('\t') | 
|  | 37         #get the score at base resolution as an array | 
|  | 38         scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2])) | 
|  | 39         if scores == None: | 
|  | 40             print 'not found:\t',line | 
|  | 41             continue | 
|  | 42         N = N + 1 | 
|  | 43         #print line,scores | 
|  | 44         # reverse if on minus strand | 
|  | 45         if flds[5] == '-': | 
|  | 46             scores = scores[::-1] | 
|  | 47         # no score = 0 | 
|  | 48         scores = numpy.nan_to_num(scores) | 
|  | 49         # bin the data | 
|  | 50         binned = binning(scores,nbin) | 
|  | 51         avg = avg + binned | 
|  | 52         sqr = sqr + binned**2 | 
|  | 53     # compute avg and std | 
|  | 54     avg = avg / N | 
|  | 55     err = ((sqr/N-avg**2)**0.5)/(N**0.5) | 
|  | 56     return avg,err | 
|  | 57 | 
|  | 58 def getExtendedBinScore(bwfile,intvfile,nbins,exts): | 
|  | 59     ''' | 
|  | 60     nbins: n1,n2,n3 | 
|  | 61     exts: l1,l2,l3,l4 | 
|  | 62     ''' | 
|  | 63     # make left extension | 
|  | 64     resize(intvfile,intvfile+'.tmp','start-'+str(exts[0]),'start+'+str(exts[1]),'stranded') | 
|  | 65     # compute binned average | 
|  | 66     avg,err = getBinnedScore(bwfile,intvfile+'.tmp',nbins[0]) | 
|  | 67     # make center region | 
|  | 68     resize(intvfile,intvfile+'.tmp','start+'+str(exts[1]),'end-'+str(exts[2]),'stranded') | 
|  | 69     # compute binned average | 
|  | 70     avg1,err1 = getBinnedScore(bwfile,intvfile+'.tmp',nbins[1]) | 
|  | 71     avg = numpy.concatenate((avg,avg1)) | 
|  | 72     err = numpy.concatenate((err,err1)) | 
|  | 73     # make right region | 
|  | 74     resize(intvfile,intvfile+'.tmp','end-'+str(exts[2]),'end+'+str(exts[3]),'stranded') | 
|  | 75     # compute binned average | 
|  | 76     avg2,err2 = getBinnedScore(bwfile,intvfile+'.tmp',nbins[2]) | 
|  | 77     avg = numpy.concatenate((avg,avg2)) | 
|  | 78     err = numpy.concatenate((err,err2)) | 
|  | 79 | 
|  | 80     return avg,err | 
|  | 81 | 
|  | 82 print sys.argv | 
|  | 83 bwfile,intvfile,exts,nbins,outfile,outplot = sys.argv | 
|  | 84 avg, err = getExtendedBinScore(bwfile,intvfile,numpy.fromstring(nbins,sep=','),numpy.fromstring(exts,sep=',')) | 
|  | 85 out = open(outfile,'w') | 
|  | 86 numpy.savetxt(out, avg, fmt='%.6f', delimiter=' ', newline=' ') | 
|  | 87 out.write('\n') | 
|  | 88 numpy.savetxt(out, err, fmt='%.6f', delimiter=' ', newline=' ') | 
|  | 89 out.write('\n') | 
|  | 90 out.close() | 
|  | 91 | 
|  | 92 # plot | 
|  | 93 rscript = open("tmp.r","w") | 
|  | 94 rscript.write("options(warn=-1)\n") | 
|  | 95 rscript.write("x <- read.table('"+outfile+"')\n") | 
|  | 96 rscript.write("pdf('"+outplot+"')\n") | 
|  | 97 rscript.write("avg <- x[1,]\n") | 
|  | 98 rscript.write("err <- x[2,]\n") | 
|  | 99 rscript.write("print(x)\n") | 
|  | 100 rscript.write("ylim=c(min(avg-err),max(avg+err))\n") | 
|  | 101 rscript.write("xticks <- seq(ncol(x))\n") | 
|  | 102 rscript.write("plot(xticks,avg,ylab='average coverage',type='l',lwd=0,ylim=ylim)\n") | 
|  | 103 rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n") | 
|  | 104 rscript.write("lines(xticks,avg,type='l',lwd=1)\n") | 
|  | 105 rscript.write("dev.off()\n") | 
|  | 106 rscript.close() | 
|  | 107 os.system("R --vanilla < tmp.r") | 
|  | 108 os.system("rm tmp.r") | 
|  | 109 |