annotate binnedAverage.py @ 13:292186c14b08

Uploaded
author xuebing
date Sat, 10 Mar 2012 08:17:36 -0500
parents b7f1d9f8f3bc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
1 '''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
2 get binned score of intervals,allow extension
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
3 '''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
4
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
5 import os,sys,numpy,random,string
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
6
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
7 from resize import *
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
8
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
9 from bx.bbi.bigwig_file import BigWigFile
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
10
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
11 def binning(x,n):
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
12 # make n bin of x
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
13 y = numpy.zeros(n,dtype=float)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
14 if len(x) == 0:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
15 return y
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
16 step = float(len(x))/n
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
17 for k in range(n):
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
18 i = int(step*k)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
19 j = int(step*(k+1)) + 1
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
20 y[k] = x[i:j].mean()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
21 #print i,j,k,y[k]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
22 return y
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
23
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
24 def getBinnedScore(bwfile,intvfile,outfile,outplot,nbin):
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
25 '''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
26 get binned average and std
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
27 '''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
28 fbw = open(bwfile)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
29 bw = BigWigFile(file=fbw)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
30 fin = open(intvfile)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
31 out = open(outfile,'w')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
32 zeros = '\t'.join(['0']*nbin)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
33 for line in fin:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
34 #chrom,start,end,name,score,strand
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
35 line = line.strip()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
36 flds = line.split('\t')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
37 #get the score at base resolution as an array
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
38 scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2]))
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
39 if scores == None:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
40 print 'not found:\t',line
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
41 out.write(line+'\t'+zeros+'\n')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
42 continue
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
43 # reverse if on minus strand
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
44 if flds[5] == '-':
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
45 scores = scores[::-1]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
46 # no score = 0
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
47 scores = numpy.nan_to_num(scores)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
48 # bin the data
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
49 binned = binning(scores,nbin)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
50 out.write(line+'\t'+'\t'.join(map(str,binned))+'\n')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
51 fin.close()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
52 out.close()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
53 # plot
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
54 if nbin > 1:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
55 tmp = "".join(random.sample(string.letters+string.digits, 8))
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
56 rscript = open(tmp,"w")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
57 rscript.write("options(warn=-1)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
58 rscript.write("x <- read.table('"+outfile+"',sep='\t')\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
59 rscript.write("x <- x[,(ncol(x)+1-"+str(nbin)+"):ncol(x)]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
60 rscript.write("pdf('"+outplot+"')\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
61 rscript.write("avg <- apply(x,2,mean)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
62 rscript.write("err <- apply(x,2,sd)/sqrt(nrow(x))\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
63 rscript.write("print(avg)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
64 rscript.write("print(err)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
65 rscript.write("ylim=c(min(avg-err),max(avg+err))\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
66 rscript.write("xticks <- seq(ncol(x))\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
67 rscript.write("plot(xticks,avg,xlab='',ylab='average',type='l',lwd=0,ylim=ylim)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
68 rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
69 rscript.write("lines(xticks,avg,type='l',lwd=1)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
70 rscript.write("dev.off()\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
71 rscript.close()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
72 os.system("R --vanilla < "+tmp)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
73 os.system("rm "+tmp)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
74
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
75 print sys.argv
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
76 prog,bwfile,intvfile,nbin,outfile,outplot = sys.argv
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
77 getBinnedScore(bwfile,intvfile,outfile,outplot,int(nbin))