diff binnedAverage.py @ 14:76e1b1b21cce default tip

Deleted selected files
author xuebing
date Tue, 13 Mar 2012 19:05:10 -0400
parents 292186c14b08
children
line wrap: on
line diff
--- a/binnedAverage.py	Sat Mar 10 08:17:36 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,77 +0,0 @@
-'''
-get binned score of intervals,allow extension
-'''
-
-import os,sys,numpy,random,string
-
-from resize import *
-
-from bx.bbi.bigwig_file import BigWigFile
-
-def binning(x,n):
-    # make n bin of x
-    y = numpy.zeros(n,dtype=float)
-    if len(x) == 0:
-        return y
-    step = float(len(x))/n
-    for k in range(n):
-        i = int(step*k)
-        j = int(step*(k+1)) + 1
-        y[k] = x[i:j].mean()
-        #print i,j,k,y[k]
-    return y
-
-def getBinnedScore(bwfile,intvfile,outfile,outplot,nbin):
-    '''
-    get binned average and std
-    '''
-    fbw = open(bwfile)
-    bw = BigWigFile(file=fbw)
-    fin = open(intvfile)
-    out = open(outfile,'w')
-    zeros = '\t'.join(['0']*nbin)
-    for line in fin:
-        #chrom,start,end,name,score,strand
-        line = line.strip()
-        flds = line.split('\t')
-        #get the score at base resolution as an array
-        scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2]))
-        if scores == None:
-            print 'not found:\t',line
-            out.write(line+'\t'+zeros+'\n')
-            continue
-        # reverse if on minus strand
-        if flds[5] == '-':
-            scores = scores[::-1]
-        # no score = 0    
-        scores = numpy.nan_to_num(scores)
-        # bin the data
-        binned = binning(scores,nbin)
-        out.write(line+'\t'+'\t'.join(map(str,binned))+'\n')
-    fin.close()
-    out.close()
-    # plot
-    if nbin > 1:
-        tmp = "".join(random.sample(string.letters+string.digits, 8))
-        rscript = open(tmp,"w")
-        rscript.write("options(warn=-1)\n")
-        rscript.write("x <- read.table('"+outfile+"',sep='\t')\n")
-        rscript.write("x <- x[,(ncol(x)+1-"+str(nbin)+"):ncol(x)]\n")
-        rscript.write("pdf('"+outplot+"')\n")
-        rscript.write("avg <- apply(x,2,mean)\n")
-        rscript.write("err <- apply(x,2,sd)/sqrt(nrow(x))\n")
-        rscript.write("print(avg)\n")
-        rscript.write("print(err)\n")
-        rscript.write("ylim=c(min(avg-err),max(avg+err))\n")
-        rscript.write("xticks <- seq(ncol(x))\n")
-        rscript.write("plot(xticks,avg,xlab='',ylab='average',type='l',lwd=0,ylim=ylim)\n")   
-        rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n")
-        rscript.write("lines(xticks,avg,type='l',lwd=1)\n")   
-        rscript.write("dev.off()\n")
-        rscript.close()
-        os.system("R --vanilla < "+tmp)
-        os.system("rm "+tmp)
-
-print sys.argv
-prog,bwfile,intvfile,nbin,outfile,outplot = sys.argv
-getBinnedScore(bwfile,intvfile,outfile,outplot,int(nbin))