Mercurial > repos > kmace > mtls_analysis
comparison mtls_analyze/heatmap.R @ 4:b465306d00ba draft default tip
Uploaded
author | kmace |
---|---|
date | Mon, 23 Jul 2012 13:00:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:a0306edbf2f8 | 4:b465306d00ba |
---|---|
1 GeneratePeakMatrix <- function(experiments, scores) { | |
2 # Generates a score matrix for all mtls. | |
3 # | |
4 # Args: | |
5 # experiments: A list of underscore deliminated experiments for each mtl. | |
6 # There should never be a completely empty item in this list. | |
7 # for eg. experiments[1] = expA_expD_expB. | |
8 # scores: A list of underscore deliminated scores for each mtl, the length | |
9 # of these scores should be identicle to the length of the | |
10 # experiments. eg. scores[1] = 55_33_245. | |
11 # | |
12 # Returns: | |
13 # The peak score matrix for all mtls. | |
14 experiments <- sapply(experiments, function(x) strsplit(x, split = "_")) | |
15 scores <- sapply(scores, function(x) strsplit(x, split = "_")) | |
16 unique.experiments <- unique(unlist(experiments)) | |
17 | |
18 peaks=matrix(0,nr=length(experiments),nc=length(unique.experiments)) | |
19 colnames(peaks) <- unique.experiments | |
20 | |
21 for(i in 1:length(experiments)){ | |
22 for(j in 1:length(experiments[[i]])){ | |
23 peaks[i,experiments[[i]][j]]=as.numeric(scores[[i]][j]) | |
24 } | |
25 } | |
26 return(peaks) | |
27 ################################################################## | |
28 } | |
29 GetChipData <- function(file.name, | |
30 proximity = "distal", | |
31 include.targetless = TRUE, | |
32 column.order = NA) { | |
33 # Reads in, filters, and organizes mtls data | |
34 # | |
35 # Args: | |
36 # file.name: The path to the mtls file (including the file name). | |
37 # proximity: Either "distal" or "proximal", defines the gene target distance | |
38 # from the mtl. Default is "distal". | |
39 # include.targetless: If TRUE, includes mtls with no targets (applied after | |
40 # the proximity filter); if not, mtls with no target | |
41 # will be exclided. Default is TRUE. | |
42 # column.order: An optional vector of column names in the order in which | |
43 # they will be presented. If this is left to default (NA) the | |
44 # presented order of the chip columns will be the order in | |
45 # which they are seen. | |
46 # | |
47 # Returns: | |
48 # An organized list of mtls data with the following elements: | |
49 # $peaks - a matrix of peak p-values | |
50 # $targets - a list of underscore deliminated gene targets for each mtl | |
51 if(param$debug) { | |
52 print("In GetChipData") | |
53 } | |
54 # Set Constants for the mtls file type: | |
55 MTLNUM <- "mtl.id" | |
56 CHROMOSOME <- "chr" | |
57 EXPERIMENTS <- "expt" | |
58 EXPERIMENTS.SORTED <- "expt.alphanum.sorted" | |
59 START <- "start" | |
60 END <- "end" | |
61 SUMMIT <- "summit" | |
62 SCORES <- "score" | |
63 SCORE.MEAN <- "score.mean" | |
64 SPANS <- "span.tfs" | |
65 SPAN.TOTAL <- "span.l" | |
66 PEAK.IDS <- "peak.ids" | |
67 TARGETS <- "trgt" | |
68 # PROXIMAL.TARGETS <- "trgt.prox" | |
69 # DISTAL.TARGETS <- "trgt.dist" | |
70 # PROXIMAL.TARGETS.TSS.DISTANCE <- "dtss.prox" | |
71 # DISTAL.TARGETS.TSS.DISTANCE <- "dtss.dist" | |
72 | |
73 | |
74 #Get chip data in from files: | |
75 #TARGETS <- switch(proximity, | |
76 # distal = DISTAL.TARGETS, | |
77 # proximal = PROXIMAL.TARGETS, | |
78 # stop(" Bad proximity argument supplied.")) # Default | |
79 cat("param$rna.files = ", param$rna.files, "\n") | |
80 if(!is.na(param$rna.files) && param$rna.files != "none") { | |
81 keep.columns <- c(EXPERIMENTS, SCORES, TARGETS) | |
82 } else { | |
83 keep.columns <- c(EXPERIMENTS, SCORES) | |
84 } | |
85 file <- read.delim(file.name, header=T, as.is=T)[, keep.columns] | |
86 if(!is.na(param$rna.files) && param$rna.files != "none") { | |
87 if (!include.targetless) { | |
88 ix.has.trgt <- which(as.character(file[, TARGETS])!="") | |
89 file <- file[ix.has.trgt, ] | |
90 } | |
91 } | |
92 chip = list() | |
93 | |
94 chip$peaks <- GeneratePeakMatrix(file[, EXPERIMENTS], file[, SCORES]) | |
95 if(!is.na(column.order)) { | |
96 #If you specify an order, or a subset of experiments to include, this is where | |
97 #that gets done. | |
98 order <- unlist(strsplit(column.order, split="::")) | |
99 chip$peaks <- chip$peaks[,order] | |
100 } | |
101 if(!is.na(param$rna.files) && param$rna.files != "none") { | |
102 chip$targets <- file[ , TARGETS] | |
103 } | |
104 | |
105 return (chip) | |
106 } | |
107 GetRNADataFromOneFile <- function(file.name, fpkm = "avg") { | |
108 # Reads in rna expression data from cufflinks | |
109 # | |
110 # Args: | |
111 # file.name: The path to the cufflinks file (including the file name). | |
112 # fpkm: What fpkm is chosen, options are hi, low and avg | |
113 # Returns: | |
114 # An organized vector of rnaseq data with the following elements: | |
115 # names(data) - the genes from the file. eg names(data[1]) = JUND | |
116 # data - the fpkm score for that gene data[1] = 31 | |
117 if(param$debug) { | |
118 cat("In GetRNADataFromOneFile for ",file.name) | |
119 } | |
120 | |
121 # Set Constants for the cufflinks file type: | |
122 GENE <- "gene_id" | |
123 BUNDLE <- "bundle_id" | |
124 CHROMOSOME <- "chr" | |
125 LEFT <- "left" | |
126 RIGHT <- "right" | |
127 FPKM_AVG <- "FPKM" | |
128 FPKM_LOW <- "FPKM_conf_lo" | |
129 FPKM_HIGH <- "FPKM_conf_hi" | |
130 STATUS <- "status" | |
131 | |
132 | |
133 #Get chip data in from files: | |
134 FPKM <- switch(fpkm, | |
135 hi = FPKM_HIGH, | |
136 avg = FPKM_AVG, | |
137 low = FPKM_LOW, | |
138 stop(" Bad fpkm quality argument supplied.")) # Default | |
139 | |
140 keep.columns <- c(GENE, FPKM) | |
141 | |
142 file <- read.delim(file.name, header=T, as.is=T)[, keep.columns] | |
143 | |
144 rna = vector(mode = "numeric", length = nrow(file)) | |
145 | |
146 rna <- file[, FPKM] | |
147 names(rna) <- file[ , GENE] | |
148 return (rna) | |
149 } | |
150 | |
151 GetRNAData <- function(file.names, file.lables = file.names, fpkm = "avg") { | |
152 # Reads in rna expression data from cufflinks | |
153 # | |
154 # Args: | |
155 # file.names: The list of paths to the cufflinks files (including the file name). | |
156 # fpkm: What fpkm is chosen, options are hi, low and avg | |
157 # Returns: | |
158 # A matrix of rnaseq data with the following description: | |
159 # each col corresponds to an rnaseq run | |
160 # eacg row corresponds to a gene | |
161 if(param$debug) { | |
162 print("In GetRNAData") | |
163 } | |
164 files <- list() | |
165 for(i in 1:length(file.names)) { | |
166 files[[i]] <-GetRNADataFromOneFile(file.names[i]) | |
167 } | |
168 genes <- unique(names(unlist(files))) | |
169 scores <- matrix(0,nrow=length(genes),ncol=length(file.names)) | |
170 rownames(scores) <- genes | |
171 colnames(scores) <- file.names | |
172 | |
173 for(j in 1:length(file.names)) { | |
174 scores[names(files[[j]]),j] <- files[[j]] | |
175 } | |
176 print("# of cols for scores is ") | |
177 print(ncol(scores)) | |
178 print ("file lables are ") | |
179 print (file.lables) | |
180 colnames(scores) <- file.lables | |
181 return(scores) | |
182 #scores[genes,file.names] <- files[[]] | |
183 } | |
184 | |
185 NormalizeRNA <- function(scores) { | |
186 | |
187 #add psudocount | |
188 scores <- scores+1 | |
189 | |
190 numerator <- scores[,1:(ncol(scores)/2)] | |
191 denominator <- scores[,(ncol(scores)/2 + 1):ncol(scores)] | |
192 | |
193 #new.scores <- scores[,-which(colnames(scores) == norm.exp)] | |
194 #norm.score <- scores[,which(colnames(scores) == norm.exp)] | |
195 #return(log2(new.scores/norm.score)) | |
196 | |
197 return(log2(numerator/denominator)) | |
198 | |
199 } | |
200 | |
201 PrepareRNAforHeatmap <- function(new.scores.foldchange){ | |
202 new.scores.split <- matrix(0, nr=nrow(new.scores.foldchange), nc=2*ncol(new.scores.foldchange)) | |
203 is.even <- function(x){ x %% 2 == 0 } | |
204 corresponding.col <- function(x){ceiling(x/2)} | |
205 new.col.names <- vector(length = ncol(new.scores.split)) | |
206 rownames(new.scores.split) <- rownames(new.scores.foldchange) | |
207 for(i in 1:ncol(new.scores.split)) { | |
208 if(is.even(i)) {sign <- "down"} else {sign <- "up"} | |
209 new.col.names[i] <- paste(colnames(new.scores.foldchange)[corresponding.col(i)], | |
210 sign, sep =".") | |
211 new.scores.split[,i] <- new.scores.foldchange[,corresponding.col(i)] | |
212 if(is.even(i)) { | |
213 new.scores.split[which(new.scores.split[,i]>0),i] <- 0 | |
214 new.scores.split[,i] <- -1*new.scores.split[,i] | |
215 } else { | |
216 new.scores.split[which(new.scores.split[,i]<0),i] <- 0 | |
217 } | |
218 colnames(new.scores.split) <- new.col.names | |
219 } | |
220 return(new.scores.split)} | |
221 | |
222 #ConvertExpressionToPval = function(expression, threshold = 1) | |
223 #{ | |
224 # print("In ConvertExpressionToPval") | |
225 # #Generate Normal Stats | |
226 # # expression.mean = apply(expression, 2, mean) | |
227 # # expression.sd = apply(expression, 2, sd) | |
228 # | |
229 # ix.above.threshold = which(expression > threshold) | |
230 # expression.mean = apply(expression, 2, function(i) mean(i[which(i > threshold)])) | |
231 # expression.sd = apply(expression, 2, function(i) sd(i[which(i > threshold)])) | |
232 # | |
233 | |
234 # # CDF from zero to point | |
235 # # expression.pval = matrix(,nr=nrow(expression),nc=ncol(expression),dimnames=dimnames(expression)) | |
236 # # ix.low = which(expression < expression.mean) | |
237 # # ix.upper = which(expression >= expression.mean) | |
238 # # expression.pval[ix.low] = (-10 * pnorm(expression[ix.low], mean = expression.mean, sd = expression.sd, log=T) *log10(exp(1))) | |
239 # | |
240 # expression.pval = (-10 * log10(exp(1)) * pnorm(expression, | |
241 # mean = expression.mean, | |
242 # sd = expression.sd, | |
243 # log=T, | |
244 # lower.tail=F | |
245 # ) | |
246 # ) | |
247 # #squash those under threshold to zero | |
248 # expression.pval[-ix.above.threshold] = 0 | |
249 | |
250 # # correct for upper tail (ie from point to inf) | |
251 # #Still need to do | |
252 # return (expression.pval) | |
253 #} | |
254 MapExpressiontoChip2 = function(chip.targets, expression) | |
255 { | |
256 mymatrix = matrix(0, nr=length(chip.targets), nc=ncol(expression)) | |
257 rownames(mymatrix) <- chip.targets | |
258 colnames(mymatrix) <- colnames(expression) | |
259 | |
260 | |
261 j <- 1 #expression counter | |
262 i <- 1 #mymatrix counter | |
263 expression.genes <- rownames(expression) | |
264 previous.expression.gene <- expression.genes[j] | |
265 | |
266 # Progress bar: | |
267 total <- length(chip.targets) | |
268 # create progress bar | |
269 pb <- txtProgressBar(min = 0, max = total, style = 3) | |
270 | |
271 for ( i in 1:length(chip.targets)) { | |
272 current.target <- chip.targets[i] | |
273 setTxtProgressBar(pb, i) | |
274 while (current.target > previous.expression.gene && j<=length(expression.genes)) { | |
275 j <- j + 1 | |
276 previous.expression.gene <- expression.genes[j] | |
277 } | |
278 if (current.target == previous.expression.gene){ | |
279 mymatrix[i,] <- expression[j,] | |
280 } | |
281 } | |
282 | |
283 close(pb) | |
284 | |
285 print("Leaving MapRNAtoChip") | |
286 return(mymatrix) | |
287 } | |
288 | |
289 | |
290 MapExpressiontoChip = function(chip.targets, expression) | |
291 { | |
292 print("In MapRNAtoChip") | |
293 # targets = unlist(strsplit(chip$targets[i], "_")) | |
294 # exp = matrix(rna$all.pval[targets, ]) | |
295 # return (apply(exp, 2, mean)) | |
296 | |
297 #rna col 1 = th0 overexpressed | |
298 #rna col 2 = th17 overexpressed | |
299 #rna col 3 = true zscores | |
300 | |
301 | |
302 | |
303 | |
304 | |
305 | |
306 #################################################################### | |
307 chip.targets.notnull.unique <- unique(chip.targets[which(chip.targets!="")]) | |
308 gene.intersect <- intersect(rownames(expression), chip.targets.notnull.unique) | |
309 chip.targets.notnull.unique.with.data <- chip.targets.notnull.unique[which(chip.targets.notnull.unique %in% gene.intersect)] | |
310 expression.useful.data <- expression[which(rownames(expression) %in% gene.intersect),] | |
311 chip.targets.notnull.unique.with.data.sorted <- chip.targets.notnull.unique.with.data[order(chip.targets.notnull.unique.with.data)] | |
312 expression.useful.data.sorted <- expression.useful.data[order(rownames(expression.useful.data)),] | |
313 #################################################################### | |
314 | |
315 mymatrix = matrix(0, nr=length(chip.targets), nc=ncol(expression)) | |
316 rownames(mymatrix) <- chip.targets | |
317 colnames(mymatrix) <- colnames(expression) | |
318 head(chip.targets.notnull.unique.with.data.sorted) | |
319 head(rownames(expression.useful.data.sorted)) | |
320 if(!identical(chip.targets.notnull.unique.with.data.sorted, rownames(expression.useful.data.sorted))){ | |
321 stop("We have a serious problem, chip is not alligned to expression") | |
322 } | |
323 for(i in 1:length(chip.targets.notnull.unique.with.data.sorted)) | |
324 { | |
325 mtls.ix <- which(chip.targets == chip.targets.notnull.unique.with.data.sorted[i]) | |
326 for(j in 1:length(mtls.ix)){ | |
327 mymatrix[mtls.ix[j],] <- expression.useful.data.sorted[i,] | |
328 } | |
329 } | |
330 print("Leaving MapRNAtoChip") | |
331 return(mymatrix) | |
332 | |
333 #return( sapply(chip$targets, function(x) apply(rna$all.pval[unlist(strsplit(chip$targets[3], "_")), ], 2, mean)) ) | |
334 } | |
335 | |
336 | |
337 | |
338 | |
339 | |
340 GenerateKMOrder <- function(data, km) { | |
341 # generates the order of clusters from high to low mean values | |
342 # | |
343 # Args: | |
344 # data: A matrix of scores that have already been clustered. | |
345 # km: The K-means object generated from running kmeans on data. Another | |
346 # method could be used so long as it supplies a (km)$cluser list. Must | |
347 # have the same length as the number of rows in data | |
348 # | |
349 # Returns: | |
350 # cluster.order: The order in which the clusters should be displayed. | |
351 | |
352 km.cluster = km$cluster | |
353 clusters = unique(km.cluster) | |
354 clusters.avg = numeric() | |
355 for(i in clusters) { | |
356 clusters.avg = c(clusters.avg, mean(data[which(km.cluster == i), ])) | |
357 } | |
358 if(param$debug) { | |
359 print ("straight clusters") | |
360 print (clusters) | |
361 print ("straigth average") | |
362 print (clusters.avg) | |
363 print ("ordered clusters") | |
364 print (clusters[order(clusters.avg)]) | |
365 print("ordered average") | |
366 print (clusters.avg[order(clusters.avg)]) | |
367 } | |
368 return(clusters[rev(order(clusters.avg))]) | |
369 } | |
370 | |
371 OrderMTL <- function(data, km, cluster.order) { | |
372 # Orders a matrix of data according to a clustering algorithm | |
373 # | |
374 # Args: | |
375 # data: A matrix of scores that have already been clustered. | |
376 # km: The K-means object generated from running kmeans on data. Another | |
377 # method could be used so long as it supplies a (km)$cluser list. Must | |
378 # have the same length as the number of rows in data | |
379 # cluster.order: The order in which the clusters should be displayed. | |
380 # for eg. km.order = c(2, 3, 1) would result in cluster 2 | |
381 # being on top, then cluster 3 and lastly cluster 1. | |
382 # | |
383 # Returns: | |
384 # a list that contains 3 objects: | |
385 # list$data: the ordered version of the data. | |
386 # list$color.vector: a list of colors that should be assigned to each row. | |
387 # list$start.row: the starting row of each cluster in data. | |
388 number.clusters <- length(cluster.order) | |
389 cluster.colors <- sample(rainbow(number.clusters)) | |
390 | |
391 # Set up return objects | |
392 sorted.data <- matrix(,nr=nrow(data), nc=ncol(data)) | |
393 colnames(sorted.data) <- colnames(data) | |
394 cluster.color.vector = vector(length=length(km$cluster)) | |
395 cluster.start.row = numeric(number.clusters) | |
396 cluster.start.row[1]=1 | |
397 | |
398 for (i in 1:number.clusters) | |
399 { | |
400 current.cluster = cluster.order[i] | |
401 ix = which(km$cluster == current.cluster) | |
402 current.cluster.range <- cluster.start.row[i]:(cluster.start.row[i]+length(ix)-1) | |
403 sorted.data[current.cluster.range, ] = data[ix, ] | |
404 cluster.color.vector[current.cluster.range] = cluster.colors[i] | |
405 cluster.start.row[i+1] = (cluster.start.row[i]+length(ix)) | |
406 } | |
407 ret.list = list() | |
408 ret.list$data = sorted.data | |
409 ret.list$color.vector = cluster.color.vector | |
410 ret.list$cluster.index = cluster.start.row | |
411 return(ret.list) | |
412 } | |
413 | |
414 | |
415 CreateHeatMap <- function(data, | |
416 km, | |
417 cluster.order, | |
418 document.name, | |
419 document.type = "png", | |
420 number.colors = 30) { | |
421 # Generates a heatmap image based for a matrix based on a clustering | |
422 # algorithm. | |
423 # | |
424 # Args: | |
425 # data: A matrix of scores that have already been clustered, the column | |
426 # names of this matrix will become the column titels of the heatmap. | |
427 # km: The K-means object generated from running kmeans on data. Another | |
428 # method could be used so long as it supplies a (km)$cluser list. | |
429 # cluster.order: The order in which the clusters should be displayed. | |
430 # for eg. km.order = c(2, 3, 1) would result in cluster 2 | |
431 # being on top, then cluster 3 and lastly cluster 1. | |
432 # document.name: A name for the produced file. there is no need to | |
433 # supply the .png/.pdf in your argument. | |
434 # document.type: The type of file you want to produce. current options are | |
435 # png and pdf. Default is pdf. | |
436 # | |
437 # Returns: | |
438 # Nothing to the script that calls it, however it creates an image at the | |
439 # path specified. | |
440 if(param$debug) { | |
441 print("In CreateHeatMap") | |
442 } | |
443 | |
444 data.ordered <- OrderMTL(data, km, cluster.order) | |
445 | |
446 | |
447 #Load Lib | |
448 library(gplots) | |
449 | |
450 #Set Color gradient | |
451 color.ramp = colorRampPalette(c("black", | |
452 "darkblue", | |
453 "blue", | |
454 "yellow", | |
455 "orange", | |
456 "red"))(number.colors) #7 | |
457 | |
458 if(document.type == "png") { | |
459 png(paste(document.name,".png", sep = ""), | |
460 #width=15360,#1920, | |
461 #height=204080,#2560, | |
462 #res=500, | |
463 antialias="gray") | |
464 } | |
465 else if(document.type == "pdf") { | |
466 pdf(paste(document.name,".pdf", sep = "")) | |
467 } | |
468 else if(document.type == "tiff") { | |
469 tiff(paste(document.name,".tiff", sep = ""), | |
470 res=800, | |
471 pointsize=2, | |
472 width=1920, | |
473 height=1920) | |
474 } | |
475 else { | |
476 bitmap(paste(document.name,".bmp", sep = ""), | |
477 height = 5, | |
478 width = 5, | |
479 res = 500) | |
480 } | |
481 #op <- par(mar = rep(0, 4)) | |
482 | |
483 heatmap.2( | |
484 data.ordered$data, #data.ordered$data[,sort(colnames(data.ordered$data))], | |
485 rowsep = data.ordered$cluster.index[-1], | |
486 sepwidth = c(0.5, ncol(data)/100), | |
487 dendrogram = "none", | |
488 Rowv = F, | |
489 Colv = F, | |
490 trace = "none", | |
491 labRow = F, #sapply(seq(1:length(data.ordered$cluster.index)), toString), | |
492 labCol = colnames(data.ordered$data), #sort(colnames(data.ordered$data)), | |
493 RowSideColors = data.ordered$color.vector, | |
494 keysize=0.6, | |
495 key=F, | |
496 col = color.ramp, | |
497 cexCol = 0.8, | |
498 cexRow = 0.8) | |
499 | |
500 dev.off() | |
501 | |
502 } | |
503 | |
504 CreateIndividualHeatMap <- function(data, | |
505 km, | |
506 cluster.order, | |
507 color.ramp = colorRampPalette(c("black", | |
508 "darkblue", | |
509 "blue", | |
510 "yellow", | |
511 "orange", | |
512 "red"))(30)) { | |
513 # Generates a heatmap image based for a matrix based on a clustering | |
514 # algorithm. | |
515 # | |
516 # Args: | |
517 # data: A matrix of scores that have already been clustered, the column | |
518 # names of this matrix will become the column titels of the heatmap. | |
519 # km: The K-means object generated from running kmeans on data. Another | |
520 # method could be used so long as it supplies a (km)$cluser list. | |
521 # cluster.order: The order in which the clusters should be displayed. | |
522 # for eg. km.order = c(2, 3, 1) would result in cluster 2 | |
523 # being on top, then cluster 3 and lastly cluster 1. | |
524 # document.name: A name for the produced file. there is no need to | |
525 # supply the .png/.pdf in your argument. | |
526 # document.type: The type of file you want to produce. current options are | |
527 # png and pdf. Default is pdf. | |
528 # | |
529 # Returns: | |
530 # Nothing to the script that calls it, however it creates an image at the | |
531 # path specified. | |
532 if(param$debug) { | |
533 print("In CreateHeatMap") | |
534 } | |
535 | |
536 data.ordered <- OrderMTL(data, km, cluster.order) | |
537 | |
538 | |
539 #Load Lib | |
540 library(gplots) | |
541 | |
542 | |
543 | |
544 | |
545 heatmap.3( | |
546 data.ordered$data, #data.ordered$data[,sort(colnames(data.ordered$data))], | |
547 rowsep = data.ordered$cluster.index[-1], | |
548 sepwidth = c(0.5, nrow(data.ordered$data)/100), | |
549 dendrogram = "none", | |
550 Rowv = F, | |
551 Colv = F, | |
552 trace = "none", | |
553 labRow = F, #sapply(seq(1:length(data.ordered$cluster.index)), toString), | |
554 labCol = colnames(data.ordered$data), #sort(colnames(data.ordered$data)), | |
555 #sourcRowSideColors = data.ordered$color.vector, | |
556 keysize=0.6, | |
557 key=F, | |
558 col = color.ramp, | |
559 cexCol = 0.8, | |
560 cexRow = 0.8) | |
561 | |
562 | |
563 } | |
564 | |
565 ReadCommadLineParameters <- function(argument.names, command.line.arguments, optional = F) { | |
566 # Reads the parameters from the command line arguments. | |
567 # | |
568 # Args: | |
569 # argument.names: A list of expected argument names. | |
570 # command.line.arguments: The list of recieved command line arguments | |
571 # optional: Are the areguments optional, or are they required, default is required | |
572 # | |
573 # Returns: | |
574 # The arguments for argument.names. As strings In that order. | |
575 | |
576 if(length(grep("--version",command.line.arguments))) { | |
577 cat("version",script.version,"\n") | |
578 q() | |
579 } | |
580 # Split command line arguments | |
581 args <- sapply(strsplit(command.line.arguments, " "),function(i) i) | |
582 vals <- character(length(argument.names)) | |
583 # split cmd.line to key and value pairs | |
584 for(i in 1:length(argument.names)) { | |
585 ix <- grep(argument.names[i], args) | |
586 if(length(ix)>1) { | |
587 stop("arg ", | |
588 argument.names[i], | |
589 " used more than once. Bailing out...\n", | |
590 PrintParamError()) | |
591 } | |
592 else if (length(ix)==0 && !optional) { | |
593 stop("could not find ", | |
594 argument.names[i], | |
595 ". Bailing out...\n", | |
596 PrintParamError()) | |
597 } | |
598 else if (length(ix)==0 && optional) { | |
599 vals[i] <- NA | |
600 } | |
601 else { | |
602 vals[i] <- args[ix+1] | |
603 } | |
604 } | |
605 return(vals) | |
606 } | |
607 | |
608 PrintParamError <- function(){ | |
609 # Prints the usage of the function, shows users what arguments they can use | |
610 # | |
611 # Args: | |
612 # param: A list that contains all the paramaters. | |
613 # | |
614 # Returns: | |
615 # A modified version of the param list, with the default values loaded. | |
616 cat(" | |
617 DESCRIPTIION: | |
618 heatmap.R takes a ... | |
619 | |
620 INPUT: | |
621 1.--mtls_file: path to mtls file.\n | |
622 2.--cluster_file: the destination path for the output cluster file.\n | |
623 3.--chip_experiment_order: The order of desired chip experiments (optional).\n | |
624 4.--heatmap_file: path for output heatmap image (no extension).\n | |
625 5.--heatmap_type: choice of image format, currently support png, pdf, tiff and bmp (optional)\n | |
626 6.--expression_file: list of expression files to be included in analysis (optional).\n | |
627 7.--expression_name: lables for the expression files (optional).\n | |
628 8.--normalization_file: a list of files to be used for normalization, | |
629 they can be the same file, however the number of expression nominated | |
630 normalization files must match the number of expression files (optional)\n | |
631 9.--n_clusters: number of clusters\n | |
632 10.--filter_percentage: percentage of mtls that will be analysed. Eg: if | |
633 we make filter_percentage 30, we will take the union of the top mtls in | |
634 mean, non-zero mean and variance (optional).\n | |
635 | |
636 EXAMPLE RUN: | |
637 Rscript heatmap.R | |
638 --mtls_file path/to/mtls.xls | |
639 --cluster_file path/to/output/cluster | |
640 --chip_experiment_order tf1::tf2::tf5::tf3 | |
641 --heatmap_file path/to/output/heatmap | |
642 --heatmap_type png | |
643 --expression_file path/to/exp1::path/to/exp2 | |
644 --expression_name myexp1::myexp2 | |
645 --normalization_file path/to/exp3::path/to/exp3 | |
646 --n_clusters 13 | |
647 --filter_percentage 100 | |
648 --include_targetless yes | |
649 --number_bins 30 | |
650 | |
651 ") | |
652 } | |
653 | |
654 #LoadDefaultParams <- function(param) { | |
655 # # Loads default paramaters for the heatmap application | |
656 # # | |
657 # # Args: | |
658 # # param: A list that contains all the previous paramaters. | |
659 # # | |
660 # # Returns: | |
661 # # A modified version of the param list, with the default values loaded. | |
662 | |
663 #script.version=0.1 | |
664 #param$debug = F | |
665 | |
666 ## RNA data: | |
667 #param$rna.files = "" | |
668 #param$rna.normalization = "none" | |
669 | |
670 ## Filter: | |
671 #param$filter.percentage <- 100 | |
672 | |
673 ## Clustering: | |
674 #param$clustering.number.of.clusters <- 13 | |
675 | |
676 ## Heatmap: | |
677 #param$heatmap.document.name <- "heatmap" | |
678 #param$heatmap.document.type <- "png" | |
679 ##Cluster Groups: | |
680 #param$cluster.groups.document.name <- "clusters" | |
681 | |
682 #return(param) | |
683 | |
684 #} | |
685 | |
686 | |
687 | |
688 LoadParams <- function(cmd.args, args.nms, n.start.numeric, optional = F) { | |
689 # Loads user defined paramaters for the heatmap application | |
690 # | |
691 # Args: | |
692 # cmd.args: The command line arguments given. | |
693 # arg.nms: A list of possible command arguments. | |
694 # n.start.numeric: the first argument that should be numeric (alway put | |
695 # these last). | |
696 # optional: This specifies if the params in cmd.args are optional or | |
697 # required. | |
698 # Returns: | |
699 # A list of values assigned to each argument. | |
700 | |
701 | |
702 vals <- ReadCommadLineParameters(argument.names = args.nms, | |
703 command.line.arguments = cmd.args, | |
704 optional = optional) | |
705 | |
706 #check if numeric params are indeed numeric | |
707 if(!optional) { | |
708 for(i in n.start.numeric:length(vals)){ | |
709 if(is.na(as.numeric(vals[i]))){ | |
710 stop("arg ",args.nms[i]," is not numeric. Bailing out...\n",print.error()) | |
711 } | |
712 } | |
713 } | |
714 return (vals) | |
715 | |
716 } | |
717 | |
718 #ValidateParams <- function(params) { | |
719 # return(T) | |
720 #} | |
721 | |
722 | |
723 ########## | |
724 | |
725 LoadDebugParams <- function(param) { | |
726 | |
727 cmd.args <- c( | |
728 "--mtls_file data/test/mtls.xls", | |
729 "--cluster_file data/test/cluster", | |
730 "--heatmap_file data/test/heatmap", | |
731 "--heatmap_type bmp", | |
732 "--n_clusters 13", | |
733 "--filter_percentage 100", | |
734 "--expression_file /home/kieran/code/scorchR-heatmap/data/expression/rna1.tabular::/home/kieran/code/scorchR-heatmap/data/expression/rna2.tabular", | |
735 "--expression_name batf.ko.0::batf.ko.17", | |
736 "--normalization_file mean", | |
737 "--chip_experiment_order ac::bc::cc::dc::ec", | |
738 "--include_targetless yes", | |
739 "--number_bins 20" | |
740 ) | |
741 | |
742 args.nms <- c( "--mtls_file", #1 | |
743 "--cluster_file", #2 | |
744 "--chip_experiment_order", #3 | |
745 "--heatmap_file", #4 | |
746 "--heatmap_type", #5 | |
747 "--expression_file", #6 | |
748 "--expression_name", #7 | |
749 "--normalization_file", #8 | |
750 "--include_targetless", #9 | |
751 "--n_clusters", #10 | |
752 "--filter_percentage", #11 | |
753 "--number_bins") #12 | |
754 | |
755 | |
756 # vals has the same order as args.nms | |
757 vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 10, optional = F) | |
758 | |
759 | |
760 # ChIP data: | |
761 param$annotated.macs.file <- vals[1] | |
762 param$chip.order <- vals[3] | |
763 # RNA data: | |
764 param$rna.files = vals[6] | |
765 param$rna.names = vals[7] | |
766 param$rna.normalization.file = vals[8] | |
767 param$include.targetless = vals[9] | |
768 | |
769 # Filter: | |
770 param$filter.percentage <- as.numeric(vals[11]) | |
771 | |
772 # Clustering: | |
773 param$number.bins <- as.numeric(vals[12]) | |
774 param$clustering.number.of.clusters <- as.numeric(vals[10]) | |
775 | |
776 # Heatmap: | |
777 param$heatmap.document.name <- vals[4] | |
778 param$heatmap.document.type <- vals[5] | |
779 #Cluster Groups: | |
780 param$cluster.groups.document.name <- vals[2] | |
781 | |
782 return(param) | |
783 | |
784 } | |
785 | |
786 | |
787 | |
788 LoadOptionalParams <- function(param) { | |
789 | |
790 cmd.args <- commandArgs(trailingOnly = T) | |
791 | |
792 args.nms <- c( "--chip_experiment_order", #1 | |
793 "--expression_file", #2 | |
794 "--expression_name", #3 | |
795 "--normalization_file", #4 | |
796 "--heatmap_type", #5 | |
797 "--include_targetless", #6 | |
798 "--filter_percentage", #7 | |
799 "--number_bins") #8 | |
800 | |
801 | |
802 # vals has the same order as args.nms | |
803 vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 7, optional = T) | |
804 | |
805 # ChIP data: | |
806 param$chip.order <- if(!is.na(vals[1])){vals[1]}else{NA} | |
807 | |
808 # RNA data: | |
809 param$rna.files <- if(!is.na(vals[2])){vals[2]}else{"none"} | |
810 param$rna.names <- if(!is.na(vals[3])){vals[3]}else{"none"} | |
811 param$rna.normalization.file <- if(!is.na(vals[4])){vals[4]}else{"no"} | |
812 param$include.targetless <- if(!is.na(vals[6])){vals[6]}else{"yes"} | |
813 # Filter: | |
814 param$filter.percentage <- if(!is.na(vals[7])){as.numeric(vals[7])}else{100} | |
815 param$number.bins <- if(!is.na(vals[8])){as.numeric(vals[8])}else{30} | |
816 # Heatmap file (output) | |
817 param$heatmap.document.type <- if(!is.na(vals[5])){vals[5]}else{"none"} | |
818 | |
819 return(param) | |
820 } | |
821 | |
822 | |
823 LoadReqiredParams <- function(param){ | |
824 | |
825 cmd.args <- commandArgs(trailingOnly = T) | |
826 | |
827 args.nms <- c( "--mtls_file", #1 | |
828 "--cluster_file", #2 | |
829 "--heatmap_file", #3 | |
830 "--n_clusters") #4 | |
831 | |
832 | |
833 # vals has the same order as args.nms | |
834 vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 4, optional = F) | |
835 | |
836 # ChIP data: | |
837 param$annotated.macs.file <- vals[1] | |
838 | |
839 # Clustering | |
840 param$clustering.number.of.clusters <- as.numeric(vals[4]) | |
841 | |
842 # Cluster file (output): | |
843 param$cluster.groups.document.name <- vals[2] | |
844 | |
845 | |
846 # Heatmap file (output): | |
847 param$heatmap.document.name <- vals[3] | |
848 | |
849 return(param) | |
850 | |
851 } | |
852 | |
853 ########### | |
854 # here we output the fasta file of the targets of each kmeans cluster | |
855 CreateClusterGroups <- function(trgts,k.ix,f.nm="output/clust.to.trgts", km.order){ | |
856 f.nm = paste(f.nm, ".fasta", sep="") | |
857 clusters = km.order | |
858 for(i in 1:length(clusters)){ | |
859 v=trgts[which(k.ix==clusters[i])] | |
860 v.split = unlist(sapply(v,strsplit, "_")) | |
861 if(i == 1){ | |
862 cat(sep="",file=f.nm,">cluster_",i,"\n")#clusters[i],"\n") | |
863 } else { | |
864 cat(sep="",file=f.nm,">cluster_",i,"\n",append=TRUE)#clusters[i],"\n",append=TRUE) | |
865 } | |
866 cat(sep="\n",file=f.nm,v.split,append=TRUE) | |
867 } | |
868 } | |
869 | |
870 | |
871 PrintClusters <- function(trgts,k.ix,f.nm="output/clust.to.trgts", km.order){ | |
872 f.nm = paste(f.nm, ".tsv", sep="") | |
873 cat(sep="\t",file=f.nm,"row_number/target","cluster","\n") | |
874 trgts[which(trgts=="")] <- "no_target" | |
875 for(i in 1:length(trgts)){ | |
876 cat(sep="",file=f.nm,trgts[i],"\t","cluster_",k.ix[i],"\n",append=TRUE) | |
877 } | |
878 # if(i == 1){ | |
879 # cat(sep="",file=f.nm,"cluster_",i,"\n")#clusters[i],"\n") | |
880 # } else { | |
881 # cat(sep="",file=f.nm,">cluster_",i,"\n",append=TRUE)#clusters[i],"\n",append=TRUE) | |
882 # } | |
883 #cat(sep="\n",file=f.nm,v.split,append=TRUE) | |
884 } | |
885 GetTopRowsFromMatrix = function(mtrx, percentage = 10) | |
886 { | |
887 if (param$debug) { | |
888 print("In GetTopRowsFromMatrix") | |
889 } | |
890 #Store the stats for the mtrx | |
891 stats = list() | |
892 stats$mean=apply(mtrx,1,mean) | |
893 stats$sd=apply(mtrx,1,sd) | |
894 stats$nonzero.mean=apply(mtrx, 1, function(x) mean(x[which(x != 0)])) | |
895 | |
896 | |
897 #Store the indexes for the mtrx | |
898 index = list() | |
899 index$mean = sort(stats$mean, decreasing=T, index.return=T)$ix | |
900 index$sd = sort(stats$sd, decreasing=T, index.return=T)$ix | |
901 index$nonzero.mean = sort(stats$nonzero.mean, decreasing=T, index.return=T)$ix | |
902 | |
903 #Calculate how many data points we want to take | |
904 cutoff = floor(length(mtrx[ ,1])*(percentage/100)) | |
905 | |
906 #Extract Indexes and return to caller | |
907 index$union = union(index$mean[1:cutoff], union(index$sd[1:cutoff], index$nonzero.mean[1:cutoff])) | |
908 | |
909 return(index) | |
910 } | |
911 | |
912 #GetUniq = function(targets.cluster) | |
913 #{ | |
914 # #problem with this function is that it agrigates the list handed to it. after this point you cant maintain order | |
915 # | |
916 # return(unique(unlist(lapply(targets.cluster, function(i) strsplit(i, "_"))))) | |
917 # #return(unlist(lapply(targets.cluster, function(i) strsplit(i, "_")))) | |
918 # | |
919 #} | |
920 | |
921 bindata = function(d,qunts=seq(.4,.9,.1)) | |
922 { | |
923 tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d)) | |
924 for(j in 1:ncol(d)) | |
925 { | |
926 bins=quantile(d[,j],qunts) | |
927 for(i in 1:length(bins)) | |
928 { | |
929 tmp[which(d[,j]>bins[i]),j] = i | |
930 } | |
931 } | |
932 return(tmp) | |
933 } | |
934 | |
935 bindata.non.zero = function(d,qunts=seq(0,0.9,0.1)) | |
936 { | |
937 tmp=matrix(0,nr=nrow(d),nc=ncol(d)) | |
938 for(j in 1:ncol(d)) | |
939 { | |
940 ix.non.zero=which(d[,j]!=0) | |
941 bins=quantile(d[ix.non.zero,j],qunts) | |
942 for(i in 1:length(bins)) | |
943 { | |
944 tmp[which(d[,j]>bins[i]),j] = i | |
945 } | |
946 } | |
947 | |
948 return(tmp) | |
949 } | |
950 bindata.matrix = function(d,qunts=seq(0,0.9,0.1)) | |
951 { | |
952 tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d)) | |
953 #ix.non.zero=which(d!=0) | |
954 bins=quantile(d[],qunts) | |
955 for(i in 1:length(bins)) | |
956 { | |
957 tmp[which(d>bins[i])] = i | |
958 } | |
959 return(tmp) | |
960 } | |
961 bindata.non.zero.matrix = function(d,qunts=seq(0,0.9,0.1)) | |
962 { | |
963 tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d)) | |
964 ix.non.zero=which(d!=0) | |
965 bins=quantile(d[ix.non.zero],qunts) | |
966 for(i in 1:length(bins)) | |
967 { | |
968 tmp[which(d>bins[i])] = i | |
969 } | |
970 return(tmp) | |
971 } | |
972 heatmap.3 <- function (x, Rowv = TRUE, Colv = if (symm) "Rowv" else TRUE, | |
973 distfun = dist, hclustfun = hclust, dendrogram = c("both", | |
974 "row", "column", "none"), symm = FALSE, scale = c("none", | |
975 "row", "column"), na.rm = TRUE, revC = identical(Colv, | |
976 "Rowv"), add.expr, breaks, symbreaks = min(x < 0, na.rm = TRUE) || | |
977 scale != "none", col = "heat.colors", colsep, rowsep, | |
978 sepcolor = "white", sepwidth = c(0.05, 0.05), cellnote, notecex = 1, | |
979 notecol = "cyan", na.color = par("bg"), trace = c("column", | |
980 "row", "both", "none"), tracecol = "cyan", hline = median(breaks), | |
981 vline = median(breaks), linecol = tracecol, margins = c(5, | |
982 5), ColSideColors, RowSideColors, cexRow = 0.2 + 1/log10(nr), | |
983 cexCol = 0.2 + 1/log10(nc), labRow = NULL, labCol = NULL, | |
984 key = TRUE, keysize = 1.5, density.info = c("histogram", | |
985 "density", "none"), denscol = tracecol, symkey = min(x < | |
986 0, na.rm = TRUE) || symbreaks, densadj = 0.25, main = NULL, | |
987 xlab = NULL, ylab = NULL, lmat = NULL, lhei = NULL, lwid = NULL, | |
988 ...) | |
989 { | |
990 scale01 <- function(x, low = min(x), high = max(x)) { | |
991 x <- (x - low)/(high - low) | |
992 x | |
993 } | |
994 retval <- list() | |
995 scale <- if (symm && missing(scale)) | |
996 "none" | |
997 else match.arg(scale) | |
998 dendrogram <- match.arg(dendrogram) | |
999 trace <- match.arg(trace) | |
1000 density.info <- match.arg(density.info) | |
1001 if (length(col) == 1 && is.character(col)) | |
1002 col <- get(col, mode = "function") | |
1003 if (!missing(breaks) && (scale != "none")) | |
1004 warning("Using scale=\"row\" or scale=\"column\" when breaks are", | |
1005 "specified can produce unpredictable results.", "Please consider using only one or the other.") | |
1006 if (is.null(Rowv) || is.na(Rowv)) | |
1007 Rowv <- FALSE | |
1008 if (is.null(Colv) || is.na(Colv)) | |
1009 Colv <- FALSE | |
1010 else if (Colv == "Rowv" && !isTRUE(Rowv)) | |
1011 Colv <- FALSE | |
1012 if (length(di <- dim(x)) != 2 || !is.numeric(x)) | |
1013 stop("`x' must be a numeric matrix") | |
1014 nr <- di[1] | |
1015 nc <- di[2] | |
1016 if (nr <= 1 || nc <= 1) | |
1017 stop("`x' must have at least 2 rows and 2 columns") | |
1018 if (!is.numeric(margins) || length(margins) != 2) | |
1019 stop("`margins' must be a numeric vector of length 2") | |
1020 if (missing(cellnote)) | |
1021 cellnote <- matrix("", ncol = ncol(x), nrow = nrow(x)) | |
1022 if (!inherits(Rowv, "dendrogram")) { | |
1023 if (((!isTRUE(Rowv)) || (is.null(Rowv))) && (dendrogram %in% | |
1024 c("both", "row"))) { | |
1025 if (is.logical(Colv) && (Colv)) | |
1026 dendrogram <- "column" | |
1027 else dedrogram <- "none" | |
1028 warning("Discrepancy: Rowv is FALSE, while dendrogram is `", | |
1029 dendrogram, "'. Omitting row dendogram.") | |
1030 } | |
1031 } | |
1032 if (!inherits(Colv, "dendrogram")) { | |
1033 if (((!isTRUE(Colv)) || (is.null(Colv))) && (dendrogram %in% | |
1034 c("both", "column"))) { | |
1035 if (is.logical(Rowv) && (Rowv)) | |
1036 dendrogram <- "row" | |
1037 else dendrogram <- "none" | |
1038 warning("Discrepancy: Colv is FALSE, while dendrogram is `", | |
1039 dendrogram, "'. Omitting column dendogram.") | |
1040 } | |
1041 } | |
1042 if (inherits(Rowv, "dendrogram")) { | |
1043 ddr <- Rowv | |
1044 rowInd <- order.dendrogram(ddr) | |
1045 } | |
1046 else if (is.integer(Rowv)) { | |
1047 hcr <- hclustfun(distfun(x)) | |
1048 ddr <- as.dendrogram(hcr) | |
1049 ddr <- reorder(ddr, Rowv) | |
1050 rowInd <- order.dendrogram(ddr) | |
1051 if (nr != length(rowInd)) | |
1052 stop("row dendrogram ordering gave index of wrong length") | |
1053 } | |
1054 else if (isTRUE(Rowv)) { | |
1055 Rowv <- rowMeans(x, na.rm = na.rm) | |
1056 hcr <- hclustfun(distfun(x)) | |
1057 ddr <- as.dendrogram(hcr) | |
1058 ddr <- reorder(ddr, Rowv) | |
1059 rowInd <- order.dendrogram(ddr) | |
1060 if (nr != length(rowInd)) | |
1061 stop("row dendrogram ordering gave index of wrong length") | |
1062 } | |
1063 else { | |
1064 rowInd <- nr:1 | |
1065 } | |
1066 if (inherits(Colv, "dendrogram")) { | |
1067 ddc <- Colv | |
1068 colInd <- order.dendrogram(ddc) | |
1069 } | |
1070 else if (identical(Colv, "Rowv")) { | |
1071 if (nr != nc) | |
1072 stop("Colv = \"Rowv\" but nrow(x) != ncol(x)") | |
1073 if (exists("ddr")) { | |
1074 ddc <- ddr | |
1075 colInd <- order.dendrogram(ddc) | |
1076 } | |
1077 else colInd <- rowInd | |
1078 } | |
1079 else if (is.integer(Colv)) { | |
1080 hcc <- hclustfun(distfun(if (symm) | |
1081 x | |
1082 else t(x))) | |
1083 ddc <- as.dendrogram(hcc) | |
1084 ddc <- reorder(ddc, Colv) | |
1085 colInd <- order.dendrogram(ddc) | |
1086 if (nc != length(colInd)) | |
1087 stop("column dendrogram ordering gave index of wrong length") | |
1088 } | |
1089 else if (isTRUE(Colv)) { | |
1090 Colv <- colMeans(x, na.rm = na.rm) | |
1091 hcc <- hclustfun(distfun(if (symm) | |
1092 x | |
1093 else t(x))) | |
1094 ddc <- as.dendrogram(hcc) | |
1095 ddc <- reorder(ddc, Colv) | |
1096 colInd <- order.dendrogram(ddc) | |
1097 if (nc != length(colInd)) | |
1098 stop("column dendrogram ordering gave index of wrong length") | |
1099 } | |
1100 else { | |
1101 colInd <- 1:nc | |
1102 } | |
1103 retval$rowInd <- rowInd | |
1104 retval$colInd <- colInd | |
1105 retval$call <- match.call() | |
1106 x <- x[rowInd, colInd] | |
1107 x.unscaled <- x | |
1108 cellnote <- cellnote[rowInd, colInd] | |
1109 if (is.null(labRow)) | |
1110 labRow <- if (is.null(rownames(x))) | |
1111 (1:nr)[rowInd] | |
1112 else rownames(x) | |
1113 else labRow <- labRow[rowInd] | |
1114 if (is.null(labCol)) | |
1115 labCol <- if (is.null(colnames(x))) | |
1116 (1:nc)[colInd] | |
1117 else colnames(x) | |
1118 else labCol <- labCol[colInd] | |
1119 if (scale == "row") { | |
1120 retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm) | |
1121 x <- sweep(x, 1, rm) | |
1122 retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm) | |
1123 x <- sweep(x, 1, sx, "/") | |
1124 } | |
1125 else if (scale == "column") { | |
1126 retval$colMeans <- rm <- colMeans(x, na.rm = na.rm) | |
1127 x <- sweep(x, 2, rm) | |
1128 retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm) | |
1129 x <- sweep(x, 2, sx, "/") | |
1130 } | |
1131 if (missing(breaks) || is.null(breaks) || length(breaks) < | |
1132 1) { | |
1133 if (missing(col) || is.function(col)) | |
1134 breaks <- 16 | |
1135 else breaks <- length(col) + 1 | |
1136 } | |
1137 if (length(breaks) == 1) { | |
1138 if (!symbreaks) | |
1139 breaks <- seq(min(x, na.rm = na.rm), max(x, na.rm = na.rm), | |
1140 length = breaks) | |
1141 else { | |
1142 extreme <- max(abs(x), na.rm = TRUE) | |
1143 breaks <- seq(-extreme, extreme, length = breaks) | |
1144 } | |
1145 } | |
1146 nbr <- length(breaks) | |
1147 ncol <- length(breaks) - 1 | |
1148 if (class(col) == "function") | |
1149 col <- col(ncol) | |
1150 min.breaks <- min(breaks) | |
1151 max.breaks <- max(breaks) | |
1152 x[x < min.breaks] <- min.breaks | |
1153 x[x > max.breaks] <- max.breaks | |
1154 # if (missing(lhei) || is.null(lhei)) | |
1155 # lhei <- c(keysize, 4) | |
1156 # if (missing(lwid) || is.null(lwid)) | |
1157 # lwid <- c(keysize, 4) | |
1158 # if (missing(lmat) || is.null(lmat)) { | |
1159 # lmat <- rbind(4:3, 2:1) | |
1160 # if (!missing(ColSideColors)) { | |
1161 # if (!is.character(ColSideColors) || length(ColSideColors) != | |
1162 # nc) | |
1163 # stop("'ColSideColors' must be a character vector of length ncol(x)") | |
1164 # lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] + | |
1165 # 1) | |
1166 # lhei <- c(lhei[1], 0.2, lhei[2]) | |
1167 # } | |
1168 # if (!missing(RowSideColors)) { | |
1169 # if (!is.character(RowSideColors) || length(RowSideColors) != | |
1170 # nr) | |
1171 # stop("'RowSideColors' must be a character vector of length nrow(x)") | |
1172 # lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) - | |
1173 # 1), 1), lmat[, 2] + 1) | |
1174 # lwid <- c(lwid[1], 0.2, lwid[2]) | |
1175 # } | |
1176 # lmat[is.na(lmat)] <- 0 | |
1177 # } | |
1178 # if (length(lhei) != nrow(lmat)) | |
1179 # stop("lhei must have length = nrow(lmat) = ", nrow(lmat)) | |
1180 # if (length(lwid) != ncol(lmat)) | |
1181 # stop("lwid must have length = ncol(lmat) =", ncol(lmat)) | |
1182 # op <- par(no.readonly = TRUE) | |
1183 # on.exit(par(op)) | |
1184 # layout(lmat, widths = lwid, heights = lhei, respect = FALSE) | |
1185 if (!missing(RowSideColors)) { | |
1186 par(mar = c(margins[1], 0, 0, 0.5)) | |
1187 image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE) | |
1188 } | |
1189 if (!missing(ColSideColors)) { | |
1190 par(mar = c(0.5, 0, 0, margins[2])) | |
1191 image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE) | |
1192 } | |
1193 par(mar = c(margins[1], 0, 0, margins[2])) | |
1194 x <- t(x) | |
1195 cellnote <- t(cellnote) | |
1196 if (revC) { | |
1197 iy <- nr:1 | |
1198 if (exists("ddr")) | |
1199 ddr <- rev(ddr) | |
1200 x <- x[, iy] | |
1201 cellnote <- cellnote[, iy] | |
1202 } | |
1203 else iy <- 1:nr | |
1204 image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + | |
1205 c(0, nr), axes = FALSE, xlab = "", ylab = "", col = col, | |
1206 breaks = breaks, ...) | |
1207 retval$carpet <- x | |
1208 if (exists("ddr")) | |
1209 retval$rowDendrogram <- ddr | |
1210 if (exists("ddc")) | |
1211 retval$colDendrogram <- ddc | |
1212 retval$breaks <- breaks | |
1213 retval$col <- col | |
1214 if (!invalid(na.color) & any(is.na(x))) { | |
1215 mmat <- ifelse(is.na(x), 1, NA) | |
1216 image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "", | |
1217 col = na.color, add = TRUE) | |
1218 } | |
1219 axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0, | |
1220 cex.axis = cexCol) | |
1221 if (!is.null(xlab)) | |
1222 mtext(xlab, side = 1, line = margins[1] - 1.25) | |
1223 axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0, | |
1224 cex.axis = cexRow) | |
1225 if (!is.null(ylab)) | |
1226 mtext(ylab, side = 4, line = margins[2] - 1.25) | |
1227 if (!missing(add.expr)) | |
1228 eval(substitute(add.expr)) | |
1229 if (!missing(colsep)) | |
1230 for (csep in colsep) rect(xleft = csep + 0.5, ybottom = rep(0, | |
1231 length(csep)), xright = csep + 0.5 + sepwidth[1], | |
1232 ytop = rep(ncol(x) + 1, csep), lty = 1, lwd = 1, | |
1233 col = sepcolor, border = sepcolor) | |
1234 if (!missing(rowsep)) | |
1235 for (rsep in rowsep) rect(xleft = 0, ybottom = (ncol(x) + | |
1236 1 - rsep) - 0.5, xright = nrow(x) + 1, ytop = (ncol(x) + | |
1237 1 - rsep) - 0.5 - sepwidth[2], lty = 1, lwd = 1, | |
1238 col = sepcolor, border = sepcolor) | |
1239 min.scale <- min(breaks) | |
1240 max.scale <- max(breaks) | |
1241 x.scaled <- scale01(t(x), min.scale, max.scale) | |
1242 if (trace %in% c("both", "column")) { | |
1243 retval$vline <- vline | |
1244 vline.vals <- scale01(vline, min.scale, max.scale) | |
1245 for (i in colInd) { | |
1246 if (!is.null(vline)) { | |
1247 abline(v = i - 0.5 + vline.vals, col = linecol, | |
1248 lty = 2) | |
1249 } | |
1250 xv <- rep(i, nrow(x.scaled)) + x.scaled[, i] - 0.5 | |
1251 xv <- c(xv[1], xv) | |
1252 yv <- 1:length(xv) - 0.5 | |
1253 lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s") | |
1254 } | |
1255 } | |
1256 if (trace %in% c("both", "row")) { | |
1257 retval$hline <- hline | |
1258 hline.vals <- scale01(hline, min.scale, max.scale) | |
1259 for (i in rowInd) { | |
1260 if (!is.null(hline)) { | |
1261 abline(h = i + hline, col = linecol, lty = 2) | |
1262 } | |
1263 yv <- rep(i, ncol(x.scaled)) + x.scaled[i, ] - 0.5 | |
1264 yv <- rev(c(yv[1], yv)) | |
1265 xv <- length(yv):1 - 0.5 | |
1266 lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s") | |
1267 } | |
1268 } | |
1269 if (!missing(cellnote)) | |
1270 text(x = c(row(cellnote)), y = c(col(cellnote)), labels = c(cellnote), | |
1271 col = notecol, cex = notecex) | |
1272 par(mar = c(margins[1], 0, 0, 0)) | |
1273 # if (dendrogram %in% c("both", "row")) { | |
1274 # plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none") | |
1275 # } | |
1276 # else plot.new() | |
1277 par(mar = c(0, 0, if (!is.null(main)) 5 else 0, margins[2])) | |
1278 # if (dendrogram %in% c("both", "column")) { | |
1279 # plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none") | |
1280 # } | |
1281 # else plot.new() | |
1282 if (!is.null(main)) | |
1283 title(main, cex.main = 1.5 * op[["cex.main"]]) | |
1284 # if (key) { | |
1285 # par(mar = c(5, 4, 2, 1), cex = 0.75) | |
1286 # tmpbreaks <- breaks | |
1287 # if (symkey) { | |
1288 # max.raw <- max(abs(c(x, breaks)), na.rm = TRUE) | |
1289 # min.raw <- -max.raw | |
1290 # tmpbreaks[1] <- -max(abs(x), na.rm = TRUE) | |
1291 # tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE) | |
1292 # } | |
1293 # else { | |
1294 # min.raw <- min(x, na.rm = TRUE) | |
1295 # max.raw <- max(x, na.rm = TRUE) | |
1296 # } | |
1297 # z <- seq(min.raw, max.raw, length = length(col)) | |
1298 # image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks, | |
1299 # xaxt = "n", yaxt = "n") | |
1300 # par(usr = c(0, 1, 0, 1)) | |
1301 # lv <- pretty(breaks) | |
1302 # xv <- scale01(as.numeric(lv), min.raw, max.raw) | |
1303 # axis(1, at = xv, labels = lv) | |
1304 # if (scale == "row") | |
1305 # mtext(side = 1, "Row Z-Score", line = 2) | |
1306 # else if (scale == "column") | |
1307 # mtext(side = 1, "Column Z-Score", line = 2) | |
1308 # else mtext(side = 1, "Value", line = 2) | |
1309 # if (density.info == "density") { | |
1310 # dens <- density(x, adjust = densadj, na.rm = TRUE) | |
1311 # omit <- dens$x < min(breaks) | dens$x > max(breaks) | |
1312 # dens$x <- dens$x[-omit] | |
1313 # dens$y <- dens$y[-omit] | |
1314 # dens$x <- scale01(dens$x, min.raw, max.raw) | |
1315 # lines(dens$x, dens$y/max(dens$y) * 0.95, col = denscol, | |
1316 # lwd = 1) | |
1317 # axis(2, at = pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y)) | |
1318 # title("Color Key\nand Density Plot") | |
1319 # par(cex = 0.5) | |
1320 # mtext(side = 2, "Density", line = 2) | |
1321 # } | |
1322 # else if (density.info == "histogram") { | |
1323 # h <- hist(x, plot = FALSE, breaks = breaks) | |
1324 # hx <- scale01(breaks, min.raw, max.raw) | |
1325 # hy <- c(h$counts, h$counts[length(h$counts)]) | |
1326 # lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s", | |
1327 # col = denscol) | |
1328 # axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy)) | |
1329 # title("Color Key\nand Histogram") | |
1330 # par(cex = 0.5) | |
1331 # mtext(side = 2, "Count", line = 2) | |
1332 # } | |
1333 # else title("Color Key") | |
1334 # } | |
1335 # else plot.new() | |
1336 retval$colorTable <- data.frame(low = retval$breaks[-length(retval$breaks)], | |
1337 high = retval$breaks[-1], color = retval$col) | |
1338 invisible(retval) | |
1339 } | |
1340 | |
1341 #rm(list=ls()) | |
1342 param <- list() | |
1343 param$debug <- FALSE #T | |
1344 | |
1345 print("Finished loading functions") | |
1346 | |
1347 | |
1348 if(param$debug) { | |
1349 param <- LoadDebugParams(param) | |
1350 } else { | |
1351 param <- LoadReqiredParams(param) | |
1352 param <- LoadOptionalParams(param) | |
1353 } | |
1354 print (param) | |
1355 Rprof() #Remove me | |
1356 # Load Chip data, alread in pval format | |
1357 # Column.order need to be a vecor of strings, that correspond to the order (and | |
1358 # inclustion) of chip experiments. | |
1359 chip <- GetChipData(param$annotated.macs.file, column.order = param$chip.order) | |
1360 | |
1361 #Get the top percentages on different criteria | |
1362 #ix.top <- GetTopRowsFromMatrix(chip$peaks, percentage = param$filter.percentage) | |
1363 #chip$peaks <- chip$peaks[ix.top$union, ] | |
1364 #chip$targets <- chip$targets[ix.top$union] | |
1365 #Bin data: | |
1366 chip$peaks <- bindata.non.zero.matrix(chip$peaks, qunts = seq(0, 0.9, length=(param$number.bins+1))) | |
1367 | |
1368 file.names <- unlist(strsplit(param$rna.files, split="::")) | |
1369 print(file.names) | |
1370 file.lables <- unlist(strsplit(param$rna.names, split="::")) | |
1371 print(file.lables) | |
1372 | |
1373 library("RColorBrewer") | |
1374 color.2 = rev(colorRampPalette(c("darkblue", "steelblue", "lightgrey"))(param$number.bins)) | |
1375 if(!is.na(file.names) && file.names != "none") { | |
1376 #rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg") | |
1377 #do differential expression if the user specifies such: | |
1378 if(param$rna.normalization != "no") { | |
1379 if(param$rna.normalization != "mean"){ | |
1380 norm.file.names <- unlist(strsplit(param$rna.normalization.file, split="::")) | |
1381 all.file.names <- c(file.names, norm.file.names) | |
1382 all.file.lables <- c(file.lables, norm.file.names) | |
1383 print (all.file.names) | |
1384 print (all.file.lables) | |
1385 rna.scores <- GetRNAData(all.file.names, file.lables = all.file.lables, fpkm = "avg") | |
1386 rna.scores <- NormalizeRNA(scores = rna.scores) | |
1387 rna.scores.sign <- rna.scores/abs(rna.scores) | |
1388 rna.scores.sign[which(is.nan(rna.scores.sign))] <- 0 | |
1389 rna.scores <- bindata.non.zero.matrix(abs(rna.scores), qunts = seq(0, 0.9, length=(param$number.bins+1))) | |
1390 rna.scores <- rna.scores.sign * rna.scores | |
1391 color.2 = colorRampPalette(c("darkblue", "steelblue", "lightgrey", "pink", "darkred"))(param$number.bins) | |
1392 } else { | |
1393 print("we are normalizing by average") | |
1394 print("file lables are:") | |
1395 print(file.lables) | |
1396 rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg") | |
1397 rna.scores <- t(apply(rna.scores,1, function(x) { if(mean(x)!=0){ return(x/mean(x)) } else { return(x) } })) | |
1398 rna.scores <- bindata.non.zero.matrix(rna.scores, qunts = seq(0, 0.9, length=(param$number.bins+1))) | |
1399 } | |
1400 } else { | |
1401 rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg") | |
1402 rna.scores <- bindata.non.zero.matrix(rna.scores, qunts = seq(0, 0.9, length=(param$number.bins+1))) | |
1403 } | |
1404 | |
1405 # chip$peaks <- chip$peaks[order(chip$targets), ] | |
1406 # rna.scores <- rna.scores[order(rownames(rna.scores)), ] | |
1407 # chip$targets <- chip$targets[order(chip$targets)] | |
1408 | |
1409 rna.scores.mapped <- MapExpressiontoChip(chip.targets = chip$targets, expression=rna.scores) | |
1410 all.data <- cbind(chip$peaks, rna.scores.mapped) | |
1411 if(param$include.targetless!="yes"){ | |
1412 all.data <- all.data[-which(chip$targets==""),] | |
1413 chip$peaks <- chip$peaks[-which(chip$targets==""),] | |
1414 rna.scores.mapped <- rna.scores.mapped[-which(chip$targets==""),] | |
1415 chip$targets <- chip$targets[-which(chip$targets=="")] | |
1416 } | |
1417 } else { | |
1418 all.data <- chip$peaks | |
1419 } | |
1420 | |
1421 print("Clustering") | |
1422 set.seed(1234) | |
1423 km <- kmeans(all.data, param$clustering.number.of.clusters, iter.max = 50) | |
1424 print("Ordering") | |
1425 km.order <- GenerateKMOrder(all.data, km) | |
1426 | |
1427 ##AM edits## | |
1428 km.new.order <- numeric(length(km$cluster)) | |
1429 for(i in 1:length(km.order)){ | |
1430 cur.clst <- km.order[i] | |
1431 ix <- which(km$cluster==cur.clst) | |
1432 km.new.order[ix] <- i | |
1433 } | |
1434 km.order <- 1:length(km.order) | |
1435 km$cluster <- km.new.order | |
1436 ## Done AM ## | |
1437 print("Creating image") | |
1438 #bmp(param$heatmap.document.name, 640, 480)#1280, 960) | |
1439 bmp(paste(param$heatmap.document.name,".bmp", sep = ""), 5120, 3840)#2560, 1920) | |
1440 #pdf(paste(param$heatmap.document.name,".pdf", sep = "")) | |
1441 color.1 <- c("#000000", colorRampPalette(c("blue", "yellow","orange","red"))(param$number.bins)) | |
1442 if(!is.na(file.names) && file.names != "none") { | |
1443 PrintClusters(trgts=chip$targets, | |
1444 k.ix=km$cluster, | |
1445 f.nm = param$cluster.groups.document.name, | |
1446 km.order = NA) | |
1447 | |
1448 #if(param$rna.normalization != "no") { | |
1449 # data.split <- cbind(chip$peaks, PrepareRNAforHeatmap(rna.scores.mapped)) | |
1450 #} else { | |
1451 # data.split <- cbind(chip$peaks, rna.scores.mapped) | |
1452 #} | |
1453 expression.width.multiplier <- 2 | |
1454 if(ncol(rna.scores.mapped)==1) { | |
1455 rna.scores.mapped <- cbind(rna.scores.mapped, rna.scores.mapped) | |
1456 expression.width.multiplier <- 1 | |
1457 } | |
1458 | |
1459 layout(matrix(c(1,2,2,5, | |
1460 1,3,4,6, | |
1461 1,3,4,7, | |
1462 1,3,4,8, | |
1463 1,3,4,9), | |
1464 5,4, | |
1465 byrow=T), | |
1466 widths=c(1,2*ncol(chip$peaks),expression.width.multiplier*ncol(rna.scores.mapped),1), | |
1467 heights=c(1,10,2,10,2)) | |
1468 #1 | |
1469 plot.new() | |
1470 #2 | |
1471 plot.new() | |
1472 #3 | |
1473 print("Creating peak image") | |
1474 CreateIndividualHeatMap(chip$peaks, | |
1475 km, | |
1476 km.order, color.ramp=color.1) | |
1477 #4 | |
1478 print("Creating rna image") | |
1479 CreateIndividualHeatMap(rna.scores.mapped, | |
1480 km, | |
1481 km.order, color.ramp=color.2) | |
1482 #5 | |
1483 plot.new() | |
1484 #6 | |
1485 image(t(matrix(1:param$number.bins,nc=1)), col=color.1) | |
1486 #7 | |
1487 plot.new() | |
1488 #8 | |
1489 image(t(matrix(1:param$number.bins,nc=1)), col=color.2) | |
1490 #9 | |
1491 plot.new() | |
1492 } else { | |
1493 PrintClusters(trgts=1:nrow(chip$peaks), | |
1494 k.ix=km$cluster, | |
1495 f.nm = param$cluster.groups.document.name, | |
1496 km.order = NA) | |
1497 | |
1498 | |
1499 layout(matrix(c(1,2,2, | |
1500 1,3,4, | |
1501 1,3,5),3,3,byrow=T), | |
1502 widths=c(1,3*ncol(chip$peaks),1), | |
1503 heights=c(1,8,1)) | |
1504 #1 | |
1505 plot.new() | |
1506 #2 | |
1507 plot.new() | |
1508 #3 | |
1509 CreateIndividualHeatMap(chip$peaks, | |
1510 km, | |
1511 km.order, color.ramp=color.1) | |
1512 #4 | |
1513 image(t(matrix(1:param$number.bins,nc=1)), col=color.1) | |
1514 #5 | |
1515 plot.new() | |
1516 | |
1517 } | |
1518 | |
1519 dev.off() | |
1520 Rprof(NULL) | |
1521 profile <- summaryRprof() | |
1522 print(str(profile)) | |
1523 print(profile) | |
1524 #save.image("test/testData.RData") |