annotate metaintv2.py @ 23:4e646baac551

Uploaded
author xuebing
date Sat, 31 Mar 2012 11:53:40 -0400
parents 16ba480adf96
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
20
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
1 '''
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
2 get binned score of intervals,allow extension
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
3 '''
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
4
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
5 import os,sys,numpy
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
6
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
7 from resize import *
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
8
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
9 from bx.bbi.bigwig_file import BigWigFile
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
10
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
11 def binning(x,n):
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
12 # make n bin of x
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
13 y = numpy.zeros(n,dtype=float)
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
14 if len(x) == 0:
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
15 return y
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
16 step = float(len(x))/n
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
17 for k in range(n):
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
18 i = int(step*k)
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
19 j = int(step*(k+1)) + 1
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
20 y[k] = x[i:j].mean()
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
21 #print i,j,k,y[k]
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
22 return y
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
23
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
24 def getBinnedScore(bwfile,intvfile,nbin):
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
25 '''
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
26 get binned average and std
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
27 '''
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
28 fbw = open(bwfile)
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
29 bw = BigWigFile(file=fbw)
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
30 fin = open(intvfile)
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
31 avg = numpy.zeros(nbin)
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
32 sqr = numpy.zeros(nbin)
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
33 N = 0
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
34 for line in fin:
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
35 #chrom,start,end,name,score,strand
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
36 flds = line.strip().split('\t')
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
37 #get the score at base resolution as an array
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
38 scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2]))
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
39 if scores == None:
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
40 print 'not found:\t',line
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
41 continue
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
42 N = N + 1
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
43 #print line,scores
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
44 # reverse if on minus strand
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
45 if flds[5] == '-':
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
46 scores = scores[::-1]
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
47 # no score = 0
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
48 scores = numpy.nan_to_num(scores)
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
49 # bin the data
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
50 binned = binning(scores,nbin)
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
51 avg = avg + binned
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
52 sqr = sqr + binned**2
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
53 # compute avg and std
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
54 avg = avg / N
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
55 err = ((sqr/N-avg**2)**0.5)/(N**0.5)
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
56 return avg,err
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
57
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
58 def getExtendedBinScore(bwfile,intvfile,nbins,exts):
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
59 '''
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
60 nbins: n1,n2,n3
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
61 exts: l1,l2,l3,l4
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
62 '''
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
63 # make left extension
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
64 resize(intvfile,intvfile+'.tmp','start-'+str(exts[0]),'start+'+str(exts[1]),'stranded')
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
65 # compute binned average
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
66 avg,err = getBinnedScore(bwfile,intvfile+'.tmp',nbins[0])
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
67 # make center region
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
68 resize(intvfile,intvfile+'.tmp','start+'+str(exts[1]),'end-'+str(exts[2]),'stranded')
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
69 # compute binned average
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
70 avg1,err1 = getBinnedScore(bwfile,intvfile+'.tmp',nbins[1])
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
71 avg = numpy.concatenate((avg,avg1))
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
72 err = numpy.concatenate((err,err1))
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
73 # make right region
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
74 resize(intvfile,intvfile+'.tmp','end-'+str(exts[2]),'end+'+str(exts[3]),'stranded')
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
75 # compute binned average
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
76 avg2,err2 = getBinnedScore(bwfile,intvfile+'.tmp',nbins[2])
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
77 avg = numpy.concatenate((avg,avg2))
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
78 err = numpy.concatenate((err,err2))
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
79
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
80 return avg,err
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
81
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
82 print sys.argv
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
83 bwfile,intvfile,exts,nbins,outfile,outplot = sys.argv
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
84 avg, err = getExtendedBinScore(bwfile,intvfile,numpy.fromstring(nbins,sep=','),numpy.fromstring(exts,sep=','))
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
85 out = open(outfile,'w')
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
86 numpy.savetxt(out, avg, fmt='%.6f', delimiter=' ', newline=' ')
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
87 out.write('\n')
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
88 numpy.savetxt(out, err, fmt='%.6f', delimiter=' ', newline=' ')
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
89 out.write('\n')
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
90 out.close()
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
91
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
92 # plot
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
93 rscript = open("tmp.r","w")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
94 rscript.write("options(warn=-1)\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
95 rscript.write("x <- read.table('"+outfile+"')\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
96 rscript.write("pdf('"+outplot+"')\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
97 rscript.write("avg <- x[1,]\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
98 rscript.write("err <- x[2,]\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
99 rscript.write("print(x)\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
100 rscript.write("ylim=c(min(avg-err),max(avg+err))\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
101 rscript.write("xticks <- seq(ncol(x))\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
102 rscript.write("plot(xticks,avg,ylab='average coverage',type='l',lwd=0,ylim=ylim)\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
103 rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
104 rscript.write("lines(xticks,avg,type='l',lwd=1)\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
105 rscript.write("dev.off()\n")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
106 rscript.close()
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
107 os.system("R --vanilla < tmp.r")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
108 os.system("rm tmp.r")
16ba480adf96 Uploaded
xuebing
parents:
diff changeset
109