annotate align2database.py @ 13:292186c14b08

Uploaded
author xuebing
date Sat, 10 Mar 2012 08:17:36 -0500
parents b7f1d9f8f3bc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
1 '''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
2 align mulitple bed to one bed
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
3 python align_multiple.py hmChIP_mm9_peak_bed/mm9-GSE19999_PolII_P25_all.cod.bed hmChIP_mm9_peak_bed/ test.txt test.pdf 100 5000
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
4 '''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
5
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
6 import os,sys,random
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
7 def main():
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
8 queryfile = sys.argv[1]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
9 inpath = sys.argv[2]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
10 outputdata = sys.argv[3]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
11 outputerr = sys.argv[4]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
12 barplotpdf = sys.argv[5]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
13 min_feat = sys.argv[6] # min features overlap
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
14 windowsize = sys.argv[7]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
15 anchor = sys.argv[8]
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
16 span = sys.argv[9] # loess smooth parameter
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
17
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
18 inpath = inpath.rstrip('/')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
19 #os.system('rm '+inpath+'/*tmp*')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
20
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
21 infiles = os.listdir(inpath)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
22
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
23 #print len(infiles),' files\n'
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
24 i = 0
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
25 for infile in infiles:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
26 if 'tmp' in infile:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
27 #os.system('rm '+inpath+'/'+infile)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
28 continue
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
29 i = i +1
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
30 print i,infile
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
31 output = infile.split('/')[-1]+'-to-'+queryfile.split('/')[-1]#'.summary'
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
32 if anchor == 'database':
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
33 command = 'python /Users/xuebing/galaxy-dist/tools/mytools/alignr.py -b '+inpath+'/'+infile+' -a '+queryfile+' -o '+output+' --summary-only -q -w '+windowsize
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
34 else:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
35 command = 'python /Users/xuebing/galaxy-dist/tools/mytools/alignr.py -a '+inpath+'/'+infile+' -b '+queryfile+' -o '+output+' --summary-only -q -w '+windowsize
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
36 #print command+'\n'
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
37 os.system(command)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
38 print 'start visualization...'
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
39 # visualize
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
40 rscriptfile = 'f-'+str(random.random())+'.r'
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
41 r = open(rscriptfile,'w')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
42 r.write("files <- dir('.','summary',full.name=T)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
43 #r.write("print(files)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
44 r.write("x <- read.table(files[1])\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
45 r.write("err <- read.table(gsub('summary','standarderror',files[1]))\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
46 r.write("for (filename in files[2:length(files)]){\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
47 r.write(" x <- rbind(x,read.table(filename))\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
48 r.write(" err <- rbind(err,read.table(gsub('summary','standarderror',filename)))\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
49 r.write("}\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
50 r.write("x <- x[x[,2] > "+min_feat+",]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
51 r.write("err <- err[err[,2] > "+min_feat+",]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
52 r.write("name <- as.character(x[,1])\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
53 r.write("nfeat <- x[,2]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
54 r.write("x <- x[,3:ncol(x)]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
55 r.write("err <- err[,3:ncol(err)]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
56 r.write("for (i in 1:nrow(x)) {\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
57 r.write(" name[i] <- strsplit(name[i],'-to-')[[1]][1]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
58 r.write("}\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
59 # remove rows that have no variation, which cause problem in heatmap. This is the case when align to itself
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
60 r.write("toremove <- seq(nrow(x))\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
61 r.write("for (i in 1:nrow(x)){\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
62 r.write(" toremove[i] <- all(x[i,] == x[i,1])\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
63 r.write("}\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
64 r.write("x <- x[!toremove,]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
65 r.write("err <- err[!toremove,]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
66 r.write("name <- name[!toremove]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
67 r.write("nfeat <- nfeat[!toremove]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
68 r.write("write.table(cbind(name,nfeat,x),file='"+outputdata+"',sep ='\\t',quote=F,row.names=F,col.names=F)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
69 r.write("write.table(cbind(name,nfeat,err),file='"+outputerr+"',sep ='\\t',quote=F,row.names=F,col.names=F)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
70
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
71 r.write("pdf('"+barplotpdf+"')\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
72 r.write("heatmap(t(scale(t(as.matrix(x,nc=ncol(x))))),Colv=NA,labRow=name,labCol=NA,margins=c(1,8),cexRow=0.02+1/log(nrow(x)),col=heat.colors(1000))\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
73
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
74 if windowsize != '0' :
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
75 r.write("xticks <- seq(-"+windowsize+","+windowsize+",length.out=100)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
76 r.write("xlab <- 'relative position (bp)'\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
77 else:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
78 r.write("xticks <- seq(100)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
79 r.write("xlab <- 'relative position (bin)'\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
80
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
81 #r.write("plot(xticks,colSums(t(scale(t(as.matrix(x,nc=ncol(x)))))),xlab='relative position (bp)',type='l',lwd=2,main='total signal')\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
82 r.write("for (i in 1:nrow(x)) {\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
83 r.write(" avg <- x[i,]/nfeat[i]\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
84 #r.write(" maxv <- max(avg)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
85 #r.write(" minv <- min(avg)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
86 #r.write(" medv <- median(avg)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
87 #r.write(" if (maxv < "+fold+"*medv | minv*"+fold+">medv){next}\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
88 #smooth
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
89 if float(span) >= 0.1:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
90 r.write(" avg = loess(as.numeric(avg)~xticks,span="+span+")$fitted\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
91 r.write(" err[i,] = loess(as.numeric(err[i,])~xticks,span="+span+")$fitted\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
92 r.write(" par(cex=1.5)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
93 r.write(" plot(xticks,avg,ylab='average coverage',main=paste(name[i],'\n n=',nfeat[i],sep=''),xlab=xlab,type='l',lwd=1,ylim=c(min(avg-err[i,]),max(avg+err[i,])))\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
94 r.write(" polygon(c(xticks,rev(xticks)),c(avg+err[i,],rev(avg-err[i,])),col='slateblue1',border=NA)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
95 r.write(" lines(xticks,avg,type='l',lwd=1)\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
96 r.write("}\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
97 r.write("dev.off()\n")
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
98 r.close()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
99 os.system("R --vanilla < "+rscriptfile)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
100 os.system('rm '+rscriptfile)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
101 os.system('rm *.summary')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
102 os.system('rm *.standarderror')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
103
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
104 main()