Mercurial > repos > alermine > nebula
comparison [APliBio]Nebula tools suite/Nebula/AnnotateGenes/geneDist.R @ 0:2ec3ba0e9e70 draft
Uploaded
author | alermine |
---|---|
date | Thu, 25 Oct 2012 08:18:25 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2ec3ba0e9e70 |
---|---|
1 args <- commandArgs() | |
2 input <- args[4] | |
3 pngFile <- args[5] | |
4 dataTable <-read.table(file=input, header=TRUE); | |
5 chip.data<-data.frame(dataTable) | |
6 ifReg <- 0 | |
7 if (length(unique(chip.data$Reg))>1) { | |
8 ifReg <- 1 | |
9 } | |
10 | |
11 ifPDF <- 0 | |
12 bootstrap <- 1 | |
13 | |
14 if (length(args)>=8) { | |
15 ifPDF=args[8] | |
16 bootstrap=args[9] | |
17 } | |
18 if (length(args)==7 & args[7]==1) { | |
19 ifPDF=1 | |
20 } | |
21 | |
22 ifControl <- 0 | |
23 if (length(args)>=7 & args[7]!=1 & args[7]!=0) { | |
24 dataTable <-read.table(file=args[7], header=TRUE); | |
25 control.data<-data.frame(dataTable) | |
26 ifControl <- 1 | |
27 | |
28 | |
29 } | |
30 | |
31 error.bar <- function(x, y, upper, lower=upper, length=0.1,...){ | |
32 if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper)) | |
33 stop("vectors must be same length") | |
34 arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...) | |
35 } | |
36 | |
37 | |
38 | |
39 logFile <- args[6] | |
40 sink(logFile, append=FALSE, split=FALSE) | |
41 | |
42 if (ifReg & ifControl) { | |
43 | |
44 if (ifPDF==1) { | |
45 pdf(file = pngFile, width = 14, height = 13, pointsize = 20, bg = "white") | |
46 } else { | |
47 png(filename = pngFile, width = 1140, height = 840, units = "px", pointsize = 20, bg = "white", res = NA) | |
48 plot(1:10) | |
49 } | |
50 op <- par(mfrow = c(3,1)) | |
51 } else { | |
52 if (ifReg | ifControl) { | |
53 | |
54 if (ifPDF==1) { | |
55 pdf(file = pngFile, width = 20, height = 17, pointsize = 20, bg = "white") | |
56 } else { | |
57 png(filename = pngFile, width = 1580, height = 1230, units = "px", pointsize = 20, bg = "white", res = NA) | |
58 plot(1:10) | |
59 } | |
60 op <- par(mfrow = c(2,1)) | |
61 } else { | |
62 if (ifPDF==1) { | |
63 pdf(file = pngFile, width = 22, height = 8, pointsize = 20, bg = "white") | |
64 } else { | |
65 png(filename = pngFile, width = 1580, height = 530, units = "px", pointsize = 20, bg = "white", res = NA) | |
66 plot(1:10) | |
67 } | |
68 op <- par(mfrow = c(1,1)) | |
69 } | |
70 } | |
71 myColor <- 1 | |
72 myColor[1] <- colors()[131] | |
73 myColor[2] <- colors()[59] | |
74 myColor[3] <- colors()[76] | |
75 myColor[4] <- colors()[88] | |
76 myColor[5] <- colors()[17] | |
77 myColor[6] <- colors()[565] | |
78 myColor[7] <- colors()[454] | |
79 myColor[8] <- colors()[401] | |
80 myColor[9] <- colors()[99] | |
81 myColorControl <- 1 | |
82 myColorControl[1] <- colors()[24] | |
83 myColorControl[2] <- colors()[278] | |
84 myColorControl[3] <- colors()[305] | |
85 myColorControl[4] <- colors()[219] | |
86 myColorControl[5] <- colors()[343] | |
87 myColorControl[6] <- colors()[245] | |
88 myLevels <- 0 | |
89 nn <- colnames(chip.data) | |
90 cc <- c(1:41) | |
91 colnames(chip.data) <- cc | |
92 countTotal <- length(unique(chip.data$"1")) | |
93 tt <- which(chip.data$"16">0) | |
94 countTotalEnh <- length(unique(chip.data$"1"[tt])) | |
95 tt <- which(chip.data$"10">0) | |
96 countTotalProm <- length(unique(chip.data$"1"[tt])) | |
97 tt <- which(chip.data$"12">0) | |
98 countTotalImDown <- length(unique(chip.data$"1"[tt])) | |
99 tt <- which(chip.data$"18">0) | |
100 countTotalIntra <- length(unique(chip.data$"1"[tt])) | |
101 tt <- which(chip.data$"20">0) | |
102 countTotal5kbD <- length(unique(chip.data$"1"[tt])) | |
103 tt <- which(chip.data$"28">0) | |
104 countTotal1IntronI <- length(unique(chip.data$"1"[tt])) | |
105 tt <- which(chip.data$"32">0) | |
106 countTotalExonsI <- length(unique(chip.data$"1"[tt])) | |
107 tt <- which(chip.data$"36">0) | |
108 countTotalIntronsI <- length(unique(chip.data$"1"[tt])) | |
109 tt <- which(chip.data$"40">0) | |
110 countTotalJonctionsI <- length(unique(chip.data$"1"[tt])) | |
111 | |
112 names <- c("Enh.","Prom.","Imm.Down.","Intrag.","GeneDown.","F.Intron","Exons","2,3,etc.Introns","E.I.Junctions") | |
113 yChIPTotal <- c (countTotalEnh/countTotal,countTotalProm/countTotal, countTotalImDown/countTotal,countTotalIntra/countTotal,countTotal5kbD/countTotal,countTotal1IntronI/countTotal,countTotalExonsI/countTotal,countTotalIntronsI/countTotal,countTotalJonctionsI/countTotal) | |
114 if(!ifReg&!ifControl) { | |
115 par(mar=c(5.1, 7.1, 4.1, 2.1)) | |
116 barplot(yChIPTotal,xlab="",beside=TRUE, col=c(myColor), names.arg=c(names),ylab="Proportion of genes with a peak") | |
117 cat ("Proportion of genes with a peak in a given genomic region:\n") | |
118 cat (paste(c(names),sep='\t')) | |
119 cat("\n") | |
120 cat (paste(c(yChIPTotal),sep='\t')) | |
121 cat("\n\n") | |
122 } | |
123 if (ifControl) { | |
124 if (bootstrap>1) { | |
125 yControlTotalMatrix <- NULL | |
126 } | |
127 for (fileNumber in 1:bootstrap) { | |
128 | |
129 if (fileNumber>=2) { | |
130 dataTable <-read.table(file=paste(args[7],fileNumber,sep=""), header=TRUE); | |
131 control.data<-data.frame(dataTable) | |
132 } | |
133 | |
134 colnames(control.data) <- cc | |
135 countTotalCntr <- length(unique(control.data$"1")) | |
136 tt <- which(control.data$"16">0) | |
137 countTotalEnhCntr <- length(unique(control.data$"1"[tt])) | |
138 tt <- which(control.data$"10">0) | |
139 countTotalPromCntr <- length(unique(control.data$"1"[tt])) | |
140 tt <- which(control.data$"12">0) | |
141 countTotalImDownCntr <- length(unique(control.data$"1"[tt])) | |
142 tt <- which(control.data$"18">0) | |
143 countTotalIntraCntr <- length(unique(control.data$"1"[tt])) | |
144 tt <- which(control.data$"20">0) | |
145 countTotal5kbDCntr <- length(unique(control.data$"1"[tt])) | |
146 tt <- which(control.data$"28">0) | |
147 countTotal1IntronICntr <- length(unique(control.data$"1"[tt])) | |
148 tt <- which(control.data$"32">0) | |
149 countTotalExonsICntr <- length(unique(control.data$"1"[tt])) | |
150 tt <- which(control.data$"36">0) | |
151 countTotalIntronsICntr <- length(unique(control.data$"1"[tt])) | |
152 tt <- which(control.data$"40">0) | |
153 countTotalJonctionsICntr <- length(unique(control.data$"1"[tt])) | |
154 yControlTotal <- c (countTotalEnhCntr/countTotalCntr,countTotalPromCntr/countTotalCntr, countTotalImDownCntr/countTotalCntr,countTotalIntraCntr/countTotalCntr,countTotal5kbDCntr/countTotalCntr,countTotal1IntronICntr/countTotalCntr,countTotalExonsICntr/countTotalCntr,countTotalIntronsICntr/countTotalCntr,countTotalJonctionsICntr/countTotalCntr) | |
155 if (bootstrap>1) { | |
156 yControlTotalMatrix <- rbind(yControlTotalMatrix,yControlTotal) | |
157 } | |
158 } | |
159 if (bootstrap>1) { | |
160 yControlTotal <- colMeans(yControlTotalMatrix) | |
161 Nrows <- nrow(yControlTotalMatrix) | |
162 colVars <- Nrows/(Nrows-1) * (colMeans(yControlTotalMatrix*yControlTotalMatrix)-colMeans(yControlTotalMatrix)^2) | |
163 } | |
164 if (!ifReg) { | |
165 cum = matrix( 0, nrow=2, ncol=length(names), byrow = TRUE) | |
166 for (i in c(1:length(names))) { | |
167 cum[1,i] <- yChIPTotal[i] | |
168 cum[2,i] <- yControlTotal[i] | |
169 } | |
170 if (bootstrap>1) { | |
171 wiskers <- matrix(c(colVars-colVars,sqrt(colVars)),2,length(names),byrow=TRUE)*1.96 | |
172 } | |
173 par(mar=c(5.1, 7.1, 4.1, 2.1)) | |
174 barx <- barplot(cum,xlab="",beside=TRUE, col=c(myColor[6],myColor[5]), legend = c("ChIP","Control"), names.arg=c(names),ylab="Proportion of genes with a peak") | |
175 cat ("Proportion of genes with a peak from the ChIP dataset in a given genomic region:\n") | |
176 cat (paste(c(names),sep='\t')) | |
177 cat("\n") | |
178 cat (paste(c(yChIPTotal),sep='\t')) | |
179 cat("\n\n") | |
180 cat ("Proportion of genes with a peak from the Control dataset in a given genomic region:\n") | |
181 cat (paste(c(names),sep='\t')) | |
182 cat("\n") | |
183 cat (paste(c(yControlTotal),sep='\t')) | |
184 cat("\n\n") | |
185 if (bootstrap>1) { | |
186 error.bar(barx,cum,wiskers) | |
187 cat ("Standard deviation for the Control dataset in a given genomic region:\n") | |
188 cat (paste(c(names),sep='\t')) | |
189 cat("\n") | |
190 cat (paste(c(sqrt(colVars)),sep='\t')) | |
191 cat("\n\n") | |
192 } | |
193 enrich <- NULL | |
194 for (i in c(1:length(names))) { | |
195 enrich[i] <- yChIPTotal[i]/yControlTotal[i]; | |
196 } | |
197 barplot(enrich-1,xlab="",beside=TRUE, col=c(myColor), names.arg=c(names),ylab="Enrichment in comparison\nwith the control dataset", yaxt="n") | |
198 minX <- min(enrich-1) | |
199 maxX <- max(enrich-1) | |
200 x = seq(length=11, from=round(minX*10)/10, by=round((maxX-minX)*10)/100) | |
201 axis(2, at=x,labels=x+1, las=2) | |
202 | |
203 cat ("Enrichment of genomic regions, ChIP peaks vs Control Peaks:\n") | |
204 cat (paste(c(names),sep='\t')) | |
205 cat("\n") | |
206 cat (paste(c(yChIPTotal/yControlTotal),sep='\t')) | |
207 cat("\n\n") | |
208 if (bootstrap>1) { | |
209 z <- (yChIPTotal-yControlTotal)/sqrt(colVars) | |
210 pvalues <- 2*pnorm(-abs(z)) | |
211 cat ("Two-side P-values for each genomic category:\n") | |
212 cat (paste(c(names),sep='\t')) | |
213 cat("\n") | |
214 cat (paste(c(yChIPTotal/yControlTotal),sep='\t')) | |
215 cat("\n\n") | |
216 } | |
217 } | |
218 } | |
219 if (ifReg) { | |
220 n.types <- length(levels(chip.data$"6")) | |
221 myLevels <- levels(chip.data$"6") | |
222 nlev <- length(names) | |
223 | |
224 if (ifControl) { | |
225 cum = matrix( 0, nrow=length(myLevels)+1, ncol=nlev, byrow = TRUE) | |
226 cumEnrichTotal = matrix( 0, nrow=length(myLevels), ncol=nlev, byrow = TRUE) | |
227 cumEnrichControl = matrix( 0, nrow=length(myLevels), ncol=nlev, byrow = TRUE) | |
228 }else { | |
229 cum = matrix( 0, nrow=length(myLevels), ncol=nlev, byrow = TRUE) | |
230 cumEnrichTotal = matrix( 0, nrow=length(myLevels), ncol=nlev, byrow = TRUE) | |
231 } | |
232 colReg <-NULL | |
233 for (r in c(1:length(myLevels))) { | |
234 tt <- which(chip.data$"6"==myLevels[r]) | |
235 subset.data <- (chip.data[tt,]) | |
236 countTotalSubset <- length(unique(subset.data$"1")) | |
237 tt <- which(subset.data$"16">0) | |
238 countTotalEnhSubset <- length(unique(subset.data$"1"[tt])) | |
239 tt <- which(subset.data$"10">0) | |
240 countTotalPromSubset <- length(unique(subset.data$"1"[tt])) | |
241 tt <- which(subset.data$"12">0) | |
242 countTotalImDownSubset <- length(unique(subset.data$"1"[tt])) | |
243 tt <- which(subset.data$"18">0) | |
244 countTotalIntraSubset <- length(unique(subset.data$"1"[tt])) | |
245 tt <- which(subset.data$"20">0) | |
246 countTotal5kbDSubset <- length(unique(subset.data$"1"[tt])) | |
247 tt <- which(subset.data$"28">0) | |
248 countTotal1IntronISubset <- length(unique(subset.data$"1"[tt])) | |
249 tt <- which(subset.data$"32">0) | |
250 countTotalExonsISubset <- length(unique(subset.data$"1"[tt])) | |
251 tt <- which(subset.data$"36">0) | |
252 countTotalIntronsISubset <- length(unique(subset.data$"1"[tt])) | |
253 tt <- which(subset.data$"40">0) | |
254 countTotalJonctionsISubset <- length(unique(subset.data$"1"[tt])) | |
255 ySubsetTotal <- c (countTotalEnhSubset/countTotalSubset,countTotalPromSubset/countTotalSubset, countTotalImDownSubset/countTotalSubset,countTotalIntraSubset/countTotalSubset,countTotal5kbDSubset/countTotalSubset,countTotal1IntronISubset/countTotalSubset,countTotalExonsISubset/countTotalSubset,countTotalIntronsISubset/countTotalSubset,countTotalJonctionsISubset/countTotalSubset) | |
256 for (i in c(1:nlev)) { | |
257 cum[r,i] <- ySubsetTotal[i] | |
258 cumEnrichTotal[r,i] <- ySubsetTotal[i]/yChIPTotal[i] | |
259 if (ifControl) { | |
260 cumEnrichControl[r,i] <- ySubsetTotal[i]/yControlTotal[i] | |
261 } | |
262 } | |
263 colReg[r]<-myColor[r+3] | |
264 } | |
265 if (ifControl) { | |
266 for (i in c(1:nlev)) { | |
267 cum[4,i] <- yControlTotal[i] | |
268 } | |
269 } | |
270 | |
271 cat ("Proportion of genes with a peak from the ChIP dataset in a given genomic region:\n") | |
272 for (r in c(1:length(myLevels))) { | |
273 cat (paste(myLevels[r],":\n",sep="")) | |
274 cat (paste(c(names),sep='\t')) | |
275 cat("\n") | |
276 cat (paste(c(cum[r,] ),sep='\t')) | |
277 cat("\n") | |
278 } | |
279 cat("\n") | |
280 par(mar=c(5.1, 7.1, 4.1, 2.1)) | |
281 if (ifControl) { | |
282 barx <- barplot(cum,xlab="",beside=TRUE, col=c(colReg, colors()[334]), legend = c(myLevels,"Control"), names.arg=c(names),ylab="Proportion of genes with a peak") | |
283 cat ("Proportion of genes with a peak from the Control dataset in a given genomic region:\n") | |
284 cat (paste(c(names),sep='\t')) | |
285 cat("\n") | |
286 cat (paste(c(yControlTotal),sep='\t')) | |
287 cat("\n\n") | |
288 if (bootstrap>1) { | |
289 wiskers <- cum-cum | |
290 wiskers[nrow(wiskers),] <- sqrt(colVars)*1.96 | |
291 error.bar(barx,cum,wiskers) | |
292 cat ("Standard deviation for the Control dataset in a given genomic region:\n") | |
293 cat (paste(c(names),sep='\t')) | |
294 cat("\n") | |
295 cat (paste(c(sqrt(colVars)),sep='\t')) | |
296 cat("\n\n") | |
297 } | |
298 } else { | |
299 barplot(cum,xlab="",beside=TRUE, col=c(colReg), legend = c(myLevels), names.arg=c(names),ylab="Proportion of genes with a peak") | |
300 } | |
301 barplot(cumEnrichTotal-1,xlab="",beside=TRUE, col=c(colReg), names.arg=c(names),ylab="Enrichment in comparison\nwith the total gene set", yaxt="n") | |
302 minX <- min(cumEnrichTotal-1) | |
303 maxX <- max(cumEnrichTotal-1) | |
304 x = seq(length=11, from=round(minX*10)/10, by=round((maxX-minX)*10)/100) | |
305 axis(2, at=x,labels=x+1, las=2) | |
306 | |
307 cat ("Enrichment of genomic regions, Transcriptional categories vs All Genes:\n") | |
308 for (r in c(1:length(myLevels))) { | |
309 cat (paste(myLevels[r],":\n",sep="")) | |
310 cat (paste(c(names),sep='\t')) | |
311 cat("\n") | |
312 cat (paste(c(cumEnrichTotal[r,]),sep='\t')) | |
313 cat("\n") | |
314 } | |
315 cat("\n") | |
316 | |
317 if (ifControl) { | |
318 barplot(cumEnrichControl-1,xlab="",beside=TRUE, col=c(colReg), names.arg=c(names),ylab="Enrichment in comparison\nwith control", yaxt="n") | |
319 minX <- min(cumEnrichControl-1) | |
320 maxX <- max(cumEnrichControl-1) | |
321 x = seq(length=11, from=round(minX*10)/10, by=round((maxX-minX)*10)/100) | |
322 axis(2, at=x,labels=x+1, las=2) | |
323 cat ("Enrichment of genomic regions, ChIP peaks vs Control Peaks:\n") | |
324 for (r in c(1:length(myLevels))) { | |
325 cat (paste(myLevels[r],":\n",sep="")) | |
326 cat (paste(c(names),sep='\t')) | |
327 cat("\n") | |
328 cat (paste(c(cumEnrichControl[r,]),sep='\t')) | |
329 cat("\n") | |
330 } | |
331 cat("\n") | |
332 if (bootstrap>1) { | |
333 cat ("Two-side P-values for each genomic category:\n") | |
334 for (r in c(1:length(myLevels))) { | |
335 z <- (cum[r,]-yControlTotal)/sqrt(colVars) | |
336 pvalues <- 2*pnorm(-abs(z)) | |
337 cat (paste(myLevels[r],":\n",sep="")) | |
338 cat (paste(c(names),sep='\t')) | |
339 cat("\n") | |
340 cat (paste(c(pvalues),sep='\t')) | |
341 cat("\n") | |
342 } | |
343 } | |
344 } | |
345 } | |
346 sink() #stop sinking :) | |
347 dev.off() | |
348 |