Mercurial > repos > alermine > nebula
view [APliBio]Nebula tools suite/Nebula/MakeTSSdist/makeTSSdist.R @ 4:0b8b39c2ce01 draft default tip
Uploaded
author | alermine |
---|---|
date | Wed, 14 Nov 2012 06:04:04 -0500 |
parents | 2ec3ba0e9e70 |
children |
line wrap: on
line source
#usage $0 STEP RIGHT chipPeaks outputFile.png output.txt [controlPeaks] [1 for pdf] args <- commandArgs() print (args) myStep <- type.convert(args[4]) maxValue <- type.convert(args[5]) dataTable <-read.table(file=paste(args[6],".genes.ClosestPeakDist", sep=""), header=TRUE); chip.genes.ClosestPeakDist<-data.frame(dataTable) ifReg <- 0 if (length(unique(chip.genes.ClosestPeakDist$Reg))>1) { ifReg <- 1 } ifControl <- 0 ifPDF <- 0 if (length(args)>=10) { ifPDF=args[10] } if (length(args)==9 & args[9]==1) { ifPDF=1 } library(Hmisc) if (length(args)>=9 & args[9]!=1 & args[9]!=0) { dataTable <-read.table(file=paste(args[9],".genes.ClosestPeakDist", sep=""), header=TRUE); control.genes.ClosestPeakDist<-data.frame(dataTable) ifControl <- 1 } if (ifReg & ifControl) { if (ifPDF==1) { pdf(file = args[7], width = 19, height = 8, pointsize = 20, bg = "white") } else { png(filename = args[7], width = 1440 , height = 680, units = "px", pointsize = 20, bg = "white", res = NA) plot(1:10) } op <- par(mfrow = c(2,3)) } else { if (ifPDF==1) { pdf(file = args[7], width = 10, height = 13, pointsize = 20, bg = "white") } else { png(filename = args[7], width = 680, height = 880, units = "px", pointsize = 20, bg = "white", res = NA) plot(1:10) } # plot(1:10) op <- par(mfrow = c(2,1)) } myColor <- 1 myColor[1] <- colors()[131] myColor[2] <- "darkolivegreen3" myColor[3] <- "azure4" myColor[4] <- "royalblue3" myColor[5] <- colors()[17] myColorControl <- 1 myColorControl[1] <- colors()[24] myColorControl[2] <- colors()[278] myColorControl[3] <- colors()[305] myColorControl[4] <- colors()[219] myColorControl[5] <- colors()[343] #for cumulative: dist_real_f <- chip.genes.ClosestPeakDist if (ifControl) { dist_control_f <- control.genes.ClosestPeakDist } step <- myStep lim <- maxValue x <- 0 count <- 1 countL <-1 n.types <- 1 myLevels <- 0 countTotalCont <- 0 countTotal <-0 countLCont <- 0 cumTotalCont <- 0 if (ifReg) { n.types <- length(levels(chip.genes.ClosestPeakDist$Reg)) myLevels <- levels(chip.genes.ClosestPeakDist$Reg) cum = matrix( 0, nrow=lim/step +1, ncol=n.types, byrow = TRUE) for (i in c(1:n.types)) { t <- which ((dist_real_f$Reg==myLevels[i])) countL[i] <- length(t) } count <-1 for (i in seq(length=lim/step +1, from=0, by=step)) { for (t in c(1:n.types)) { tt <- which ((dist_real_f$Reg==myLevels[t])&(dist_real_f$Dist<=i)&(dist_real_f$Dist>=-i)) cum[count,t] <- length(tt) } x[count] <- i count <- count + 1 } ymax <- max(cum[,1]/countL[1], na.rm=TRUE) for (i in c(2:n.types)) { ymax <- max(ymax,max(cum[,i]/countL[i], na.rm=TRUE)) } myLocCol <- myColor[2] par(mar=c(5.1, 7.1, 4.1, 2.1)) plot (x,cum[,1]/countL[1] ,col = myColor[2],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(0, lim),xaxt="n" , ylim=c(0,ymax)) for (i in c(2:n.types)) { colorr <- i+1 myLocCol <- c(myLocCol,myColor[colorr]) lines (x,cum[,i]/countL[i] ,col = myColor[colorr],type="l", lwd = 2) print (myColor[colorr]) } gradi <- 1000 if (lim>10000) { gradi <- 10000 } if (lim<3000) { gradi <- 500 } axisx <- seq(length=lim/gradi+1, from=0, by=gradi) axisxlab <- paste(axisx/1000,"", sep = "") axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) ymax <- max(cum[,i]/countL[i], na.rm=TRUE) minor.tick(nx=5,tick.ratio=0.5) legend(lim*0.45, ymax*0.45, myLevels, cex=1, lwd = 2, bty = "n", col = myLocCol, lty = c(1), pt.bg= c(myLocCol) , merge = TRUE) title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (cumulative)") if (ifControl) { count <-1 n.types <- length(levels(control.genes.ClosestPeakDist$Reg)) myLevels <- levels(control.genes.ClosestPeakDist$Reg) cumCont = matrix( 0, nrow=lim/step +1, ncol=n.types, byrow = TRUE) for (i in c(1:n.types)) { t <- which ((dist_control_f$Reg==myLevels[i])) countLCont[i] <- length(t) } for (i in seq(length=lim/step +1, from=0, by=step)) { for (t in c(1:n.types)) { tt <- which ((dist_control_f$Reg==myLevels[t])&(dist_control_f$Dist<=i)&(dist_control_f$Dist>=-i)) cumCont[count,t] <- length(tt) } x[count] <- i count <- count + 1 } ymax <- max(cumCont[,1]/countLCont[1], na.rm=TRUE) for (i in c(2:n.types)) { ymax <- max(ymax,max(cumCont[,i]/countLCont[i], na.rm=TRUE)) } myLocColCntr <- myColorControl[2] plot (x,cumCont[,1]/countLCont[1] ,col = myLocColCntr[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(0, lim),xaxt="n" , ylim=c(0,ymax)) for (i in c(2:n.types)) { colorr <- i+1 myLocColCntr <- c(myLocColCntr,myColorControl[colorr]) lines (x,cumCont[,i]/countLCont[i] ,col = myColorControl[colorr],type="l", lwd = 2) } if (lim>10000) { gradi <- 10000 } if (lim<3000) { gradi <- 500 } axisx <- seq(length=lim/gradi+1, from=0, by=gradi) axisxlab <- paste(axisx/1000, sep = "") axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) minor.tick(nx=5,tick.ratio=0.5) legend(lim*0.45, ymax*0.45, myLevels, cex=1 , lwd = 2, bty = "n", col = myLocColCntr, lty = c(1), pt.bg= c(myLocCol) , merge = TRUE) title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (cumulative)") #real_vs_control_cumulative: count <-1 countTotal <- length(dist_real_f$Reg) cumTotal <- 0 for (i in seq(length=lim/step +1, from=0, by=step)) { t <- which ((dist_real_f$Dist<=i)&(dist_real_f$Dist>=-i)) cumTotal[count] <- length(t) x[count] <- i count <- count + 1 } plot (x,cumTotal/countTotal ,col = myColor[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(0, lim),xaxt="n" ) gradi <- 1000 if (lim>10000) { gradi <- 10000 } if (lim<3000) { gradi <- 500 } axisx <- seq(length=lim/gradi+1, from=0, by=gradi) axisxlab <- paste(axisx/1000, sep = "") axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) ymax <- max(cumTotal/countTotal, na.rm=TRUE) minor.tick(nx=5,tick.ratio=0.5) countTotalCont <- length(dist_control_f$Reg) cumTotalCont <- 0 count <- 1 for (i in seq(length=lim/step +1, from=0, by=step)) { t <- which ((dist_control_f$Dist<=i)&(dist_control_f$Dist>=-i)) cumTotalCont[count] <- length(t) x[count] <- i count <- count + 1 } lines (x,cumTotalCont/countTotalCont ,col = colors()[328],type="l", lwd = 2) legend(lim*0.45, ymax*0.45, c("ChIP","Control"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1], colors()[328]), lty = c(1), pt.bg= c(myColor[1], colors()[328]) , merge = TRUE) title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (cumulative)") } } else { countTotal <- length(dist_real_f$Reg) cumTotal <- 0 count <-1 gradi <- 1000 if (lim>10000) { gradi <- 10000 } if (lim<3000) { gradi <- 500 } for (i in seq(length=lim/step +1, from=0, by=step)) { t <- which ((dist_real_f$Dist<=i)&(dist_real_f$Dist>=-i)) cumTotal[count] <- length(t) x[count] <- i count <- count + 1 } par(mar=c(5.1, 7.1, 4.1, 2.1)) plot (x,cumTotal/countTotal ,col = myColor[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(0, lim),xaxt="n" ) axisx <- seq(length=lim/gradi+1, from=0, by=gradi) axisxlab <- paste(axisx/1000, sep = "") axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (cumulative)") ymax <- max(cumTotal/countTotal, na.rm=TRUE) if (ifControl) { countTotalCont <- length(dist_control_f$Reg) cumTotalCont <- 0 count <- 1 for (i in seq(length=lim/step +1, from=0, by=step)) { t <- which ((dist_control_f$Dist<=i)&(dist_control_f$Dist>=-i)) cumTotalCont[count] <- length(t) x[count] <- i count <- count + 1 } lines (x,cumTotalCont/countTotalCont ,col = colors()[328],type="l", lwd = 2) legend(lim*0.45, ymax*0.45, c("ChIP","Control"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1], colors()[328]), lty = c(1), pt.bg= c(myColor[1], colors()[328]) , merge = TRUE) } else { legend(lim*0.45, ymax*0.45, c("ChIP"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1]), lty = c(1), pt.bg= c(myColor[1]) , merge = TRUE) } } sink(args[8], append=FALSE, split=FALSE) if (ifReg) { if (ifControl) { cat (paste("Dist_TSS","% genes w/ a peak in ChIP","% genes w/ a peak in control",sep='\t')) cat("\t") for (i in c(1:n.types)) { cat(paste("% ", myLevels[i]," genes w/ a peak in ChIP", sep="")) cat("\t") } for (i in c(1:n.types)) { cat(paste("% ", myLevels[i]," genes w/ a peak in Control", sep="")) cat("\t") } cat("\n") for (i in c(1:length(x))) { cat(paste(x[i],cumTotal[i]/countTotal,cumTotalCont[i]/countTotalCont,sep="\t")) cat("\t") for (t in c(1:n.types)) { cat(paste(cum[i,t]/countL[t],"\t", sep="")) } for (t in c(1:n.types)) { cat(paste(cumCont[i,t]/countLCont[t],"\t", sep="")) } cat("\n") } }else { cat (paste("Dist_TSS","\t",sep='')) for (i in c(1:n.types)) { cat(paste("% ", myLevels[i]," genes w/ a peak in ChIP", "\t", sep="")) } cat("\n") for (i in c(1:length(x))) { cat(paste(x[i],"\t",sep="")) for (t in c(1:n.types)) { cat(paste(cum[i,t]/countL[t],"\t", sep="")) } cat("\n") } } } else { if (ifControl) { cat (paste("Dist_TSS","% genes w/ a peak in ChIP","% genes w/ a peak in control",sep='\t')) cat("\n") for (i in c(1:length(x))) { cat(paste(x[i],cumTotal[i]/countTotal,cumTotalCont[i]/countTotalCont,sep="\t")) cat("\n") } }else { cat (paste("Dist_TSS","% genes w/ a peak in ChIP",sep='\t')) cat("\n") for (i in c(1:length(x))) { cat(paste(x[i],cumTotal[i]/countTotal,sep="\t")) cat("\n") } } } #stop sinking: sink() #around TSS: lim <- maxValue step <- myStep my_breaks <- seq(length=lim/step*2 +1, from=-lim, by=step) chip.genes <- read.table(file=paste(args[6],".genes", sep=""), header=TRUE) ; dist_real_f <- chip.genes if (ifControl) { control.genes <- read.table(file=paste(args[9],".genes", sep=""), header=TRUE) ; dist_control_f<-data.frame(control.genes) } if (ifReg) { #n.types <- length(levels(chip.genes.ClosestPeakDist$Reg)) #myLevels <- levels(dist_real_f$Reg) t<- which (dist_real_f$Reg==myLevels[1]) values_real <-dist_real_f$Dist[t] hTSSreal = hist(values_real,plot=FALSE,breaks = c(min(values_real),my_breaks,max(values_real)) ) ymax <- max(hTSSreal$density, na.rm=TRUE) for (i in c(2:n.types)) { t<- which (dist_real_f$Reg==myLevels[i]) values_real <-dist_real_f$Dist[t] hTSSreal = hist(values_real,plot=FALSE,breaks = c(min(values_real),my_breaks,max(values_real)) ) ymax <- max(ymax,max(hTSSreal$density, na.rm=TRUE)) } t<- which (dist_real_f$Reg==myLevels[1]) values_real <-dist_real_f$Dist[t] hTSSreal = hist(values_real,plot=FALSE,breaks = c(min(values_real),my_breaks,max(values_real)) ) plot (hTSSreal$mids,hTSSreal$density,col = myLocCol[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(-lim, lim),ylim = c(0, ymax), xaxt="n" ) title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (density)") for (i in c(2:n.types)) { t<- which (dist_real_f$Reg==myLevels[i]) values_real <-dist_real_f$Dist[t] hTSSreal = hist(values_real,plot=FALSE,breaks = c(min(values_real),my_breaks,max(values_real)) ) lines (hTSSreal$mids,hTSSreal$density,col = myLocCol[i],type="l", lwd = 2) } legend(lim*0.1, ymax*0.9, myLevels, cex=1 , lwd = 2, bty = "n", col = myLocCol, lty = c(1), pt.bg= c(myLocCol) , merge = TRUE) gradi <- 1000 if (lim>10000) { gradi <- 10000 } if (lim<3000) { gradi <- 500 } axisx <- seq(length=2*lim/gradi+1, from=-lim, by=gradi) axisxlab <- paste(axisx/1000, sep = "") axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) #minor.tick(nx=10,tick.ratio=0.5) if (ifControl) { t<- which (dist_control_f$Reg==myLevels[1]) values_control <-dist_control_f$Dist[t] hTSScontrol= hist(values_control,plot=FALSE,breaks = c(min(values_control),my_breaks,max(values_control)) ) ymax <- max(hTSScontrol$density, na.rm=TRUE) for (i in c(2:n.types)) { t<- which (dist_control_f$Reg==myLevels[i]) values_control <-dist_control_f$Dist[t] hTSScontrol = hist(values_control,plot=FALSE,breaks = c(min(values_control),my_breaks,max(values_control)) ) ymax <- max(ymax,max(hTSScontrol$density, na.rm=TRUE)) } t<- which (dist_control_f$Reg==myLevels[1]) values_control <-dist_control_f$Dist[t] hTSScontrol= hist(values_control,plot=FALSE,breaks = c(min(values_control),my_breaks,max(values_control)) ) plot (hTSScontrol$mids,hTSScontrol$density,col = myLocColCntr[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(-lim, lim),ylim = c(0, ymax),xaxt="n" ) title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (density)") for (i in c(2:n.types)) { t<- which (dist_control_f$Reg==myLevels[i]) values_control <-dist_control_f$Dist[t] hTSScontrol = hist(values_control,plot=FALSE,breaks = c(min(values_control),my_breaks,max(values_control)) ) lines (hTSScontrol$mids,hTSScontrol$density,col = myLocColCntr[i],type="l", lwd = 2) } gradi <- 1000 if (lim>10000) { gradi <- 10000 } if (lim<3000) { gradi <- 500 } axisx <- seq(length=2*lim/gradi+1, from=-lim, by=gradi) axisxlab <- paste(axisx/1000, sep = "") axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) legend(lim*0.1, ymax*0.9, myLevels, cex=1 , lwd = 2, bty = "n", col = myLocColCntr, lty = c(1), pt.bg= c(myLocCol) , merge = TRUE) # minor.tick(nx=10,tick.ratio=0.5) #control vs real values_real <-dist_real_f$Dist hTSSreal = hist(values_real, plot=FALSE, breaks = c(min(values_real),my_breaks,max(values_real)) ) plot (hTSSreal$mids,hTSSreal$density,col = myColor[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(-lim, lim),xaxt="n") title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (density)") ymax <- max(hTSSreal$density, na.rm=TRUE) values_control <-dist_control_f$Dist hTSScontrol = hist(values_control, plot=FALSE, breaks = c(min(values_control),my_breaks,max(values_control)) ) lines (hTSScontrol$mids,hTSScontrol$density,col = colors()[328],type="l", lwd = 2) legend(lim*0.2, ymax*0.9, c("ChIP","Control"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1], colors()[328]), lty = c(1), pt.bg= c(myColor[1], colors()[328]) , merge = TRUE) gradi <- 1000 if (lim>10000) { gradi <- 10000 } if (lim<3000) { gradi <- 500 } axisx <- seq(length=2*lim/gradi+1, from=-lim, by=gradi) axisxlab <- paste(axisx/1000, sep = "") axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) # minor.tick(nx=10,tick.ratio=0.5) } } else { values_real <-dist_real_f$Dist hTSSreal = hist(values_real, plot=FALSE, breaks = c(min(values_real),my_breaks,max(values_real)) ) plot (hTSSreal$mids,hTSSreal$density,col = myColor[1],type="l", main="",xlab="",ylab="", lwd = 2, xlim = c(-lim, lim),xaxt="n") title( main="",xlab="Distance from TSS (Kb)",ylab="Proportion of genes with a peak\nat a given distance (density)") ymax <- max(hTSSreal$density, na.rm=TRUE) if (ifControl) { values_control <-dist_control_f$Dist hTSScontrol = hist(values_control, plot=FALSE, breaks = c(min(values_control),my_breaks,max(values_control)) ) lines (hTSScontrol$mids,hTSScontrol$density,col = colors()[328],type="l", lwd = 2) legend(lim*0.2, ymax*0.9, c("ChIP","Control"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1], colors()[328]), lty = c(1), pt.bg= c(myColor[1], colors()[328]) , merge = TRUE) } else { legend(lim*0.2, ymax*0.9, c("ChIP"), cex=1 , lwd = 2, bty = "n", col = c(myColor[1]), lty = c(1), pt.bg= c(myColor[1]) , merge = TRUE) } gradi <- 1000 if (lim>10000) { gradi <- 10000 } if (lim<3000) { gradi <- 500 } axisx <- seq(length=2*lim/gradi+1, from=-lim, by=gradi) axisxlab <- paste(axisx/1000, sep = "") axis(1, at=axisx,labels=axisxlab , las=1, cex.axis=1) # minor.tick(nx=10,tick.ratio=0.5) } dev.off() q(); cat (paste("peak height","# peaks in ChIP","# peaks in Control","#control/chip","\n",sep='\t')) for (xval in c(minValue:maxValue)) { for (i in (1:length(chipHist$mids))) { if (xval==chipHist$mids[i]) { ychip <- chipHist$counts[i] } } for (i in (1:length(controlHist$mids))) { if (xval==controlHist$mids[i]) { ycontrol <- controlHist$counts[i] } } cat (paste(xval,ychip,ycontrol,ycontrol/ychip,"\n",sep='\t')) }