comparison mtls_analyze/heatmap.R @ 4:b465306d00ba draft default tip

Uploaded
author kmace
date Mon, 23 Jul 2012 13:00:15 -0400
parents
children
comparison
equal deleted inserted replaced
3:a0306edbf2f8 4:b465306d00ba
1 GeneratePeakMatrix <- function(experiments, scores) {
2 # Generates a score matrix for all mtls.
3 #
4 # Args:
5 # experiments: A list of underscore deliminated experiments for each mtl.
6 # There should never be a completely empty item in this list.
7 # for eg. experiments[1] = expA_expD_expB.
8 # scores: A list of underscore deliminated scores for each mtl, the length
9 # of these scores should be identicle to the length of the
10 # experiments. eg. scores[1] = 55_33_245.
11 #
12 # Returns:
13 # The peak score matrix for all mtls.
14 experiments <- sapply(experiments, function(x) strsplit(x, split = "_"))
15 scores <- sapply(scores, function(x) strsplit(x, split = "_"))
16 unique.experiments <- unique(unlist(experiments))
17
18 peaks=matrix(0,nr=length(experiments),nc=length(unique.experiments))
19 colnames(peaks) <- unique.experiments
20
21 for(i in 1:length(experiments)){
22 for(j in 1:length(experiments[[i]])){
23 peaks[i,experiments[[i]][j]]=as.numeric(scores[[i]][j])
24 }
25 }
26 return(peaks)
27 ##################################################################
28 }
29 GetChipData <- function(file.name,
30 proximity = "distal",
31 include.targetless = TRUE,
32 column.order = NA) {
33 # Reads in, filters, and organizes mtls data
34 #
35 # Args:
36 # file.name: The path to the mtls file (including the file name).
37 # proximity: Either "distal" or "proximal", defines the gene target distance
38 # from the mtl. Default is "distal".
39 # include.targetless: If TRUE, includes mtls with no targets (applied after
40 # the proximity filter); if not, mtls with no target
41 # will be exclided. Default is TRUE.
42 # column.order: An optional vector of column names in the order in which
43 # they will be presented. If this is left to default (NA) the
44 # presented order of the chip columns will be the order in
45 # which they are seen.
46 #
47 # Returns:
48 # An organized list of mtls data with the following elements:
49 # $peaks - a matrix of peak p-values
50 # $targets - a list of underscore deliminated gene targets for each mtl
51 if(param$debug) {
52 print("In GetChipData")
53 }
54 # Set Constants for the mtls file type:
55 MTLNUM <- "mtl.id"
56 CHROMOSOME <- "chr"
57 EXPERIMENTS <- "expt"
58 EXPERIMENTS.SORTED <- "expt.alphanum.sorted"
59 START <- "start"
60 END <- "end"
61 SUMMIT <- "summit"
62 SCORES <- "score"
63 SCORE.MEAN <- "score.mean"
64 SPANS <- "span.tfs"
65 SPAN.TOTAL <- "span.l"
66 PEAK.IDS <- "peak.ids"
67 TARGETS <- "trgt"
68 # PROXIMAL.TARGETS <- "trgt.prox"
69 # DISTAL.TARGETS <- "trgt.dist"
70 # PROXIMAL.TARGETS.TSS.DISTANCE <- "dtss.prox"
71 # DISTAL.TARGETS.TSS.DISTANCE <- "dtss.dist"
72
73
74 #Get chip data in from files:
75 #TARGETS <- switch(proximity,
76 # distal = DISTAL.TARGETS,
77 # proximal = PROXIMAL.TARGETS,
78 # stop(" Bad proximity argument supplied.")) # Default
79 cat("param$rna.files = ", param$rna.files, "\n")
80 if(!is.na(param$rna.files) && param$rna.files != "none") {
81 keep.columns <- c(EXPERIMENTS, SCORES, TARGETS)
82 } else {
83 keep.columns <- c(EXPERIMENTS, SCORES)
84 }
85 file <- read.delim(file.name, header=T, as.is=T)[, keep.columns]
86 if(!is.na(param$rna.files) && param$rna.files != "none") {
87 if (!include.targetless) {
88 ix.has.trgt <- which(as.character(file[, TARGETS])!="")
89 file <- file[ix.has.trgt, ]
90 }
91 }
92 chip = list()
93
94 chip$peaks <- GeneratePeakMatrix(file[, EXPERIMENTS], file[, SCORES])
95 if(!is.na(column.order)) {
96 #If you specify an order, or a subset of experiments to include, this is where
97 #that gets done.
98 order <- unlist(strsplit(column.order, split="::"))
99 chip$peaks <- chip$peaks[,order]
100 }
101 if(!is.na(param$rna.files) && param$rna.files != "none") {
102 chip$targets <- file[ , TARGETS]
103 }
104
105 return (chip)
106 }
107 GetRNADataFromOneFile <- function(file.name, fpkm = "avg") {
108 # Reads in rna expression data from cufflinks
109 #
110 # Args:
111 # file.name: The path to the cufflinks file (including the file name).
112 # fpkm: What fpkm is chosen, options are hi, low and avg
113 # Returns:
114 # An organized vector of rnaseq data with the following elements:
115 # names(data) - the genes from the file. eg names(data[1]) = JUND
116 # data - the fpkm score for that gene data[1] = 31
117 if(param$debug) {
118 cat("In GetRNADataFromOneFile for ",file.name)
119 }
120
121 # Set Constants for the cufflinks file type:
122 GENE <- "gene_id"
123 BUNDLE <- "bundle_id"
124 CHROMOSOME <- "chr"
125 LEFT <- "left"
126 RIGHT <- "right"
127 FPKM_AVG <- "FPKM"
128 FPKM_LOW <- "FPKM_conf_lo"
129 FPKM_HIGH <- "FPKM_conf_hi"
130 STATUS <- "status"
131
132
133 #Get chip data in from files:
134 FPKM <- switch(fpkm,
135 hi = FPKM_HIGH,
136 avg = FPKM_AVG,
137 low = FPKM_LOW,
138 stop(" Bad fpkm quality argument supplied.")) # Default
139
140 keep.columns <- c(GENE, FPKM)
141
142 file <- read.delim(file.name, header=T, as.is=T)[, keep.columns]
143
144 rna = vector(mode = "numeric", length = nrow(file))
145
146 rna <- file[, FPKM]
147 names(rna) <- file[ , GENE]
148 return (rna)
149 }
150
151 GetRNAData <- function(file.names, file.lables = file.names, fpkm = "avg") {
152 # Reads in rna expression data from cufflinks
153 #
154 # Args:
155 # file.names: The list of paths to the cufflinks files (including the file name).
156 # fpkm: What fpkm is chosen, options are hi, low and avg
157 # Returns:
158 # A matrix of rnaseq data with the following description:
159 # each col corresponds to an rnaseq run
160 # eacg row corresponds to a gene
161 if(param$debug) {
162 print("In GetRNAData")
163 }
164 files <- list()
165 for(i in 1:length(file.names)) {
166 files[[i]] <-GetRNADataFromOneFile(file.names[i])
167 }
168 genes <- unique(names(unlist(files)))
169 scores <- matrix(0,nrow=length(genes),ncol=length(file.names))
170 rownames(scores) <- genes
171 colnames(scores) <- file.names
172
173 for(j in 1:length(file.names)) {
174 scores[names(files[[j]]),j] <- files[[j]]
175 }
176 print("# of cols for scores is ")
177 print(ncol(scores))
178 print ("file lables are ")
179 print (file.lables)
180 colnames(scores) <- file.lables
181 return(scores)
182 #scores[genes,file.names] <- files[[]]
183 }
184
185 NormalizeRNA <- function(scores) {
186
187 #add psudocount
188 scores <- scores+1
189
190 numerator <- scores[,1:(ncol(scores)/2)]
191 denominator <- scores[,(ncol(scores)/2 + 1):ncol(scores)]
192
193 #new.scores <- scores[,-which(colnames(scores) == norm.exp)]
194 #norm.score <- scores[,which(colnames(scores) == norm.exp)]
195 #return(log2(new.scores/norm.score))
196
197 return(log2(numerator/denominator))
198
199 }
200
201 PrepareRNAforHeatmap <- function(new.scores.foldchange){
202 new.scores.split <- matrix(0, nr=nrow(new.scores.foldchange), nc=2*ncol(new.scores.foldchange))
203 is.even <- function(x){ x %% 2 == 0 }
204 corresponding.col <- function(x){ceiling(x/2)}
205 new.col.names <- vector(length = ncol(new.scores.split))
206 rownames(new.scores.split) <- rownames(new.scores.foldchange)
207 for(i in 1:ncol(new.scores.split)) {
208 if(is.even(i)) {sign <- "down"} else {sign <- "up"}
209 new.col.names[i] <- paste(colnames(new.scores.foldchange)[corresponding.col(i)],
210 sign, sep =".")
211 new.scores.split[,i] <- new.scores.foldchange[,corresponding.col(i)]
212 if(is.even(i)) {
213 new.scores.split[which(new.scores.split[,i]>0),i] <- 0
214 new.scores.split[,i] <- -1*new.scores.split[,i]
215 } else {
216 new.scores.split[which(new.scores.split[,i]<0),i] <- 0
217 }
218 colnames(new.scores.split) <- new.col.names
219 }
220 return(new.scores.split)}
221
222 #ConvertExpressionToPval = function(expression, threshold = 1)
223 #{
224 # print("In ConvertExpressionToPval")
225 # #Generate Normal Stats
226 # # expression.mean = apply(expression, 2, mean)
227 # # expression.sd = apply(expression, 2, sd)
228 #
229 # ix.above.threshold = which(expression > threshold)
230 # expression.mean = apply(expression, 2, function(i) mean(i[which(i > threshold)]))
231 # expression.sd = apply(expression, 2, function(i) sd(i[which(i > threshold)]))
232 #
233
234 # # CDF from zero to point
235 # # expression.pval = matrix(,nr=nrow(expression),nc=ncol(expression),dimnames=dimnames(expression))
236 # # ix.low = which(expression < expression.mean)
237 # # ix.upper = which(expression >= expression.mean)
238 # # expression.pval[ix.low] = (-10 * pnorm(expression[ix.low], mean = expression.mean, sd = expression.sd, log=T) *log10(exp(1)))
239 #
240 # expression.pval = (-10 * log10(exp(1)) * pnorm(expression,
241 # mean = expression.mean,
242 # sd = expression.sd,
243 # log=T,
244 # lower.tail=F
245 # )
246 # )
247 # #squash those under threshold to zero
248 # expression.pval[-ix.above.threshold] = 0
249
250 # # correct for upper tail (ie from point to inf)
251 # #Still need to do
252 # return (expression.pval)
253 #}
254 MapExpressiontoChip2 = function(chip.targets, expression)
255 {
256 mymatrix = matrix(0, nr=length(chip.targets), nc=ncol(expression))
257 rownames(mymatrix) <- chip.targets
258 colnames(mymatrix) <- colnames(expression)
259
260
261 j <- 1 #expression counter
262 i <- 1 #mymatrix counter
263 expression.genes <- rownames(expression)
264 previous.expression.gene <- expression.genes[j]
265
266 # Progress bar:
267 total <- length(chip.targets)
268 # create progress bar
269 pb <- txtProgressBar(min = 0, max = total, style = 3)
270
271 for ( i in 1:length(chip.targets)) {
272 current.target <- chip.targets[i]
273 setTxtProgressBar(pb, i)
274 while (current.target > previous.expression.gene && j<=length(expression.genes)) {
275 j <- j + 1
276 previous.expression.gene <- expression.genes[j]
277 }
278 if (current.target == previous.expression.gene){
279 mymatrix[i,] <- expression[j,]
280 }
281 }
282
283 close(pb)
284
285 print("Leaving MapRNAtoChip")
286 return(mymatrix)
287 }
288
289
290 MapExpressiontoChip = function(chip.targets, expression)
291 {
292 print("In MapRNAtoChip")
293 # targets = unlist(strsplit(chip$targets[i], "_"))
294 # exp = matrix(rna$all.pval[targets, ])
295 # return (apply(exp, 2, mean))
296
297 #rna col 1 = th0 overexpressed
298 #rna col 2 = th17 overexpressed
299 #rna col 3 = true zscores
300
301
302
303
304
305
306 ####################################################################
307 chip.targets.notnull.unique <- unique(chip.targets[which(chip.targets!="")])
308 gene.intersect <- intersect(rownames(expression), chip.targets.notnull.unique)
309 chip.targets.notnull.unique.with.data <- chip.targets.notnull.unique[which(chip.targets.notnull.unique %in% gene.intersect)]
310 expression.useful.data <- expression[which(rownames(expression) %in% gene.intersect),]
311 chip.targets.notnull.unique.with.data.sorted <- chip.targets.notnull.unique.with.data[order(chip.targets.notnull.unique.with.data)]
312 expression.useful.data.sorted <- expression.useful.data[order(rownames(expression.useful.data)),]
313 ####################################################################
314
315 mymatrix = matrix(0, nr=length(chip.targets), nc=ncol(expression))
316 rownames(mymatrix) <- chip.targets
317 colnames(mymatrix) <- colnames(expression)
318 head(chip.targets.notnull.unique.with.data.sorted)
319 head(rownames(expression.useful.data.sorted))
320 if(!identical(chip.targets.notnull.unique.with.data.sorted, rownames(expression.useful.data.sorted))){
321 stop("We have a serious problem, chip is not alligned to expression")
322 }
323 for(i in 1:length(chip.targets.notnull.unique.with.data.sorted))
324 {
325 mtls.ix <- which(chip.targets == chip.targets.notnull.unique.with.data.sorted[i])
326 for(j in 1:length(mtls.ix)){
327 mymatrix[mtls.ix[j],] <- expression.useful.data.sorted[i,]
328 }
329 }
330 print("Leaving MapRNAtoChip")
331 return(mymatrix)
332
333 #return( sapply(chip$targets, function(x) apply(rna$all.pval[unlist(strsplit(chip$targets[3], "_")), ], 2, mean)) )
334 }
335
336
337
338
339
340 GenerateKMOrder <- function(data, km) {
341 # generates the order of clusters from high to low mean values
342 #
343 # Args:
344 # data: A matrix of scores that have already been clustered.
345 # km: The K-means object generated from running kmeans on data. Another
346 # method could be used so long as it supplies a (km)$cluser list. Must
347 # have the same length as the number of rows in data
348 #
349 # Returns:
350 # cluster.order: The order in which the clusters should be displayed.
351
352 km.cluster = km$cluster
353 clusters = unique(km.cluster)
354 clusters.avg = numeric()
355 for(i in clusters) {
356 clusters.avg = c(clusters.avg, mean(data[which(km.cluster == i), ]))
357 }
358 if(param$debug) {
359 print ("straight clusters")
360 print (clusters)
361 print ("straigth average")
362 print (clusters.avg)
363 print ("ordered clusters")
364 print (clusters[order(clusters.avg)])
365 print("ordered average")
366 print (clusters.avg[order(clusters.avg)])
367 }
368 return(clusters[rev(order(clusters.avg))])
369 }
370
371 OrderMTL <- function(data, km, cluster.order) {
372 # Orders a matrix of data according to a clustering algorithm
373 #
374 # Args:
375 # data: A matrix of scores that have already been clustered.
376 # km: The K-means object generated from running kmeans on data. Another
377 # method could be used so long as it supplies a (km)$cluser list. Must
378 # have the same length as the number of rows in data
379 # cluster.order: The order in which the clusters should be displayed.
380 # for eg. km.order = c(2, 3, 1) would result in cluster 2
381 # being on top, then cluster 3 and lastly cluster 1.
382 #
383 # Returns:
384 # a list that contains 3 objects:
385 # list$data: the ordered version of the data.
386 # list$color.vector: a list of colors that should be assigned to each row.
387 # list$start.row: the starting row of each cluster in data.
388 number.clusters <- length(cluster.order)
389 cluster.colors <- sample(rainbow(number.clusters))
390
391 # Set up return objects
392 sorted.data <- matrix(,nr=nrow(data), nc=ncol(data))
393 colnames(sorted.data) <- colnames(data)
394 cluster.color.vector = vector(length=length(km$cluster))
395 cluster.start.row = numeric(number.clusters)
396 cluster.start.row[1]=1
397
398 for (i in 1:number.clusters)
399 {
400 current.cluster = cluster.order[i]
401 ix = which(km$cluster == current.cluster)
402 current.cluster.range <- cluster.start.row[i]:(cluster.start.row[i]+length(ix)-1)
403 sorted.data[current.cluster.range, ] = data[ix, ]
404 cluster.color.vector[current.cluster.range] = cluster.colors[i]
405 cluster.start.row[i+1] = (cluster.start.row[i]+length(ix))
406 }
407 ret.list = list()
408 ret.list$data = sorted.data
409 ret.list$color.vector = cluster.color.vector
410 ret.list$cluster.index = cluster.start.row
411 return(ret.list)
412 }
413
414
415 CreateHeatMap <- function(data,
416 km,
417 cluster.order,
418 document.name,
419 document.type = "png",
420 number.colors = 30) {
421 # Generates a heatmap image based for a matrix based on a clustering
422 # algorithm.
423 #
424 # Args:
425 # data: A matrix of scores that have already been clustered, the column
426 # names of this matrix will become the column titels of the heatmap.
427 # km: The K-means object generated from running kmeans on data. Another
428 # method could be used so long as it supplies a (km)$cluser list.
429 # cluster.order: The order in which the clusters should be displayed.
430 # for eg. km.order = c(2, 3, 1) would result in cluster 2
431 # being on top, then cluster 3 and lastly cluster 1.
432 # document.name: A name for the produced file. there is no need to
433 # supply the .png/.pdf in your argument.
434 # document.type: The type of file you want to produce. current options are
435 # png and pdf. Default is pdf.
436 #
437 # Returns:
438 # Nothing to the script that calls it, however it creates an image at the
439 # path specified.
440 if(param$debug) {
441 print("In CreateHeatMap")
442 }
443
444 data.ordered <- OrderMTL(data, km, cluster.order)
445
446
447 #Load Lib
448 library(gplots)
449
450 #Set Color gradient
451 color.ramp = colorRampPalette(c("black",
452 "darkblue",
453 "blue",
454 "yellow",
455 "orange",
456 "red"))(number.colors) #7
457
458 if(document.type == "png") {
459 png(paste(document.name,".png", sep = ""),
460 #width=15360,#1920,
461 #height=204080,#2560,
462 #res=500,
463 antialias="gray")
464 }
465 else if(document.type == "pdf") {
466 pdf(paste(document.name,".pdf", sep = ""))
467 }
468 else if(document.type == "tiff") {
469 tiff(paste(document.name,".tiff", sep = ""),
470 res=800,
471 pointsize=2,
472 width=1920,
473 height=1920)
474 }
475 else {
476 bitmap(paste(document.name,".bmp", sep = ""),
477 height = 5,
478 width = 5,
479 res = 500)
480 }
481 #op <- par(mar = rep(0, 4))
482
483 heatmap.2(
484 data.ordered$data, #data.ordered$data[,sort(colnames(data.ordered$data))],
485 rowsep = data.ordered$cluster.index[-1],
486 sepwidth = c(0.5, ncol(data)/100),
487 dendrogram = "none",
488 Rowv = F,
489 Colv = F,
490 trace = "none",
491 labRow = F, #sapply(seq(1:length(data.ordered$cluster.index)), toString),
492 labCol = colnames(data.ordered$data), #sort(colnames(data.ordered$data)),
493 RowSideColors = data.ordered$color.vector,
494 keysize=0.6,
495 key=F,
496 col = color.ramp,
497 cexCol = 0.8,
498 cexRow = 0.8)
499
500 dev.off()
501
502 }
503
504 CreateIndividualHeatMap <- function(data,
505 km,
506 cluster.order,
507 color.ramp = colorRampPalette(c("black",
508 "darkblue",
509 "blue",
510 "yellow",
511 "orange",
512 "red"))(30)) {
513 # Generates a heatmap image based for a matrix based on a clustering
514 # algorithm.
515 #
516 # Args:
517 # data: A matrix of scores that have already been clustered, the column
518 # names of this matrix will become the column titels of the heatmap.
519 # km: The K-means object generated from running kmeans on data. Another
520 # method could be used so long as it supplies a (km)$cluser list.
521 # cluster.order: The order in which the clusters should be displayed.
522 # for eg. km.order = c(2, 3, 1) would result in cluster 2
523 # being on top, then cluster 3 and lastly cluster 1.
524 # document.name: A name for the produced file. there is no need to
525 # supply the .png/.pdf in your argument.
526 # document.type: The type of file you want to produce. current options are
527 # png and pdf. Default is pdf.
528 #
529 # Returns:
530 # Nothing to the script that calls it, however it creates an image at the
531 # path specified.
532 if(param$debug) {
533 print("In CreateHeatMap")
534 }
535
536 data.ordered <- OrderMTL(data, km, cluster.order)
537
538
539 #Load Lib
540 library(gplots)
541
542
543
544
545 heatmap.3(
546 data.ordered$data, #data.ordered$data[,sort(colnames(data.ordered$data))],
547 rowsep = data.ordered$cluster.index[-1],
548 sepwidth = c(0.5, nrow(data.ordered$data)/100),
549 dendrogram = "none",
550 Rowv = F,
551 Colv = F,
552 trace = "none",
553 labRow = F, #sapply(seq(1:length(data.ordered$cluster.index)), toString),
554 labCol = colnames(data.ordered$data), #sort(colnames(data.ordered$data)),
555 #sourcRowSideColors = data.ordered$color.vector,
556 keysize=0.6,
557 key=F,
558 col = color.ramp,
559 cexCol = 0.8,
560 cexRow = 0.8)
561
562
563 }
564
565 ReadCommadLineParameters <- function(argument.names, command.line.arguments, optional = F) {
566 # Reads the parameters from the command line arguments.
567 #
568 # Args:
569 # argument.names: A list of expected argument names.
570 # command.line.arguments: The list of recieved command line arguments
571 # optional: Are the areguments optional, or are they required, default is required
572 #
573 # Returns:
574 # The arguments for argument.names. As strings In that order.
575
576 if(length(grep("--version",command.line.arguments))) {
577 cat("version",script.version,"\n")
578 q()
579 }
580 # Split command line arguments
581 args <- sapply(strsplit(command.line.arguments, " "),function(i) i)
582 vals <- character(length(argument.names))
583 # split cmd.line to key and value pairs
584 for(i in 1:length(argument.names)) {
585 ix <- grep(argument.names[i], args)
586 if(length(ix)>1) {
587 stop("arg ",
588 argument.names[i],
589 " used more than once. Bailing out...\n",
590 PrintParamError())
591 }
592 else if (length(ix)==0 && !optional) {
593 stop("could not find ",
594 argument.names[i],
595 ". Bailing out...\n",
596 PrintParamError())
597 }
598 else if (length(ix)==0 && optional) {
599 vals[i] <- NA
600 }
601 else {
602 vals[i] <- args[ix+1]
603 }
604 }
605 return(vals)
606 }
607
608 PrintParamError <- function(){
609 # Prints the usage of the function, shows users what arguments they can use
610 #
611 # Args:
612 # param: A list that contains all the paramaters.
613 #
614 # Returns:
615 # A modified version of the param list, with the default values loaded.
616 cat("
617 DESCRIPTIION:
618 heatmap.R takes a ...
619
620 INPUT:
621 1.--mtls_file: path to mtls file.\n
622 2.--cluster_file: the destination path for the output cluster file.\n
623 3.--chip_experiment_order: The order of desired chip experiments (optional).\n
624 4.--heatmap_file: path for output heatmap image (no extension).\n
625 5.--heatmap_type: choice of image format, currently support png, pdf, tiff and bmp (optional)\n
626 6.--expression_file: list of expression files to be included in analysis (optional).\n
627 7.--expression_name: lables for the expression files (optional).\n
628 8.--normalization_file: a list of files to be used for normalization,
629 they can be the same file, however the number of expression nominated
630 normalization files must match the number of expression files (optional)\n
631 9.--n_clusters: number of clusters\n
632 10.--filter_percentage: percentage of mtls that will be analysed. Eg: if
633 we make filter_percentage 30, we will take the union of the top mtls in
634 mean, non-zero mean and variance (optional).\n
635
636 EXAMPLE RUN:
637 Rscript heatmap.R
638 --mtls_file path/to/mtls.xls
639 --cluster_file path/to/output/cluster
640 --chip_experiment_order tf1::tf2::tf5::tf3
641 --heatmap_file path/to/output/heatmap
642 --heatmap_type png
643 --expression_file path/to/exp1::path/to/exp2
644 --expression_name myexp1::myexp2
645 --normalization_file path/to/exp3::path/to/exp3
646 --n_clusters 13
647 --filter_percentage 100
648 --include_targetless yes
649 --number_bins 30
650
651 ")
652 }
653
654 #LoadDefaultParams <- function(param) {
655 # # Loads default paramaters for the heatmap application
656 # #
657 # # Args:
658 # # param: A list that contains all the previous paramaters.
659 # #
660 # # Returns:
661 # # A modified version of the param list, with the default values loaded.
662
663 #script.version=0.1
664 #param$debug = F
665
666 ## RNA data:
667 #param$rna.files = ""
668 #param$rna.normalization = "none"
669
670 ## Filter:
671 #param$filter.percentage <- 100
672
673 ## Clustering:
674 #param$clustering.number.of.clusters <- 13
675
676 ## Heatmap:
677 #param$heatmap.document.name <- "heatmap"
678 #param$heatmap.document.type <- "png"
679 ##Cluster Groups:
680 #param$cluster.groups.document.name <- "clusters"
681
682 #return(param)
683
684 #}
685
686
687
688 LoadParams <- function(cmd.args, args.nms, n.start.numeric, optional = F) {
689 # Loads user defined paramaters for the heatmap application
690 #
691 # Args:
692 # cmd.args: The command line arguments given.
693 # arg.nms: A list of possible command arguments.
694 # n.start.numeric: the first argument that should be numeric (alway put
695 # these last).
696 # optional: This specifies if the params in cmd.args are optional or
697 # required.
698 # Returns:
699 # A list of values assigned to each argument.
700
701
702 vals <- ReadCommadLineParameters(argument.names = args.nms,
703 command.line.arguments = cmd.args,
704 optional = optional)
705
706 #check if numeric params are indeed numeric
707 if(!optional) {
708 for(i in n.start.numeric:length(vals)){
709 if(is.na(as.numeric(vals[i]))){
710 stop("arg ",args.nms[i]," is not numeric. Bailing out...\n",print.error())
711 }
712 }
713 }
714 return (vals)
715
716 }
717
718 #ValidateParams <- function(params) {
719 # return(T)
720 #}
721
722
723 ##########
724
725 LoadDebugParams <- function(param) {
726
727 cmd.args <- c(
728 "--mtls_file data/test/mtls.xls",
729 "--cluster_file data/test/cluster",
730 "--heatmap_file data/test/heatmap",
731 "--heatmap_type bmp",
732 "--n_clusters 13",
733 "--filter_percentage 100",
734 "--expression_file /home/kieran/code/scorchR-heatmap/data/expression/rna1.tabular::/home/kieran/code/scorchR-heatmap/data/expression/rna2.tabular",
735 "--expression_name batf.ko.0::batf.ko.17",
736 "--normalization_file mean",
737 "--chip_experiment_order ac::bc::cc::dc::ec",
738 "--include_targetless yes",
739 "--number_bins 20"
740 )
741
742 args.nms <- c( "--mtls_file", #1
743 "--cluster_file", #2
744 "--chip_experiment_order", #3
745 "--heatmap_file", #4
746 "--heatmap_type", #5
747 "--expression_file", #6
748 "--expression_name", #7
749 "--normalization_file", #8
750 "--include_targetless", #9
751 "--n_clusters", #10
752 "--filter_percentage", #11
753 "--number_bins") #12
754
755
756 # vals has the same order as args.nms
757 vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 10, optional = F)
758
759
760 # ChIP data:
761 param$annotated.macs.file <- vals[1]
762 param$chip.order <- vals[3]
763 # RNA data:
764 param$rna.files = vals[6]
765 param$rna.names = vals[7]
766 param$rna.normalization.file = vals[8]
767 param$include.targetless = vals[9]
768
769 # Filter:
770 param$filter.percentage <- as.numeric(vals[11])
771
772 # Clustering:
773 param$number.bins <- as.numeric(vals[12])
774 param$clustering.number.of.clusters <- as.numeric(vals[10])
775
776 # Heatmap:
777 param$heatmap.document.name <- vals[4]
778 param$heatmap.document.type <- vals[5]
779 #Cluster Groups:
780 param$cluster.groups.document.name <- vals[2]
781
782 return(param)
783
784 }
785
786
787
788 LoadOptionalParams <- function(param) {
789
790 cmd.args <- commandArgs(trailingOnly = T)
791
792 args.nms <- c( "--chip_experiment_order", #1
793 "--expression_file", #2
794 "--expression_name", #3
795 "--normalization_file", #4
796 "--heatmap_type", #5
797 "--include_targetless", #6
798 "--filter_percentage", #7
799 "--number_bins") #8
800
801
802 # vals has the same order as args.nms
803 vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 7, optional = T)
804
805 # ChIP data:
806 param$chip.order <- if(!is.na(vals[1])){vals[1]}else{NA}
807
808 # RNA data:
809 param$rna.files <- if(!is.na(vals[2])){vals[2]}else{"none"}
810 param$rna.names <- if(!is.na(vals[3])){vals[3]}else{"none"}
811 param$rna.normalization.file <- if(!is.na(vals[4])){vals[4]}else{"no"}
812 param$include.targetless <- if(!is.na(vals[6])){vals[6]}else{"yes"}
813 # Filter:
814 param$filter.percentage <- if(!is.na(vals[7])){as.numeric(vals[7])}else{100}
815 param$number.bins <- if(!is.na(vals[8])){as.numeric(vals[8])}else{30}
816 # Heatmap file (output)
817 param$heatmap.document.type <- if(!is.na(vals[5])){vals[5]}else{"none"}
818
819 return(param)
820 }
821
822
823 LoadReqiredParams <- function(param){
824
825 cmd.args <- commandArgs(trailingOnly = T)
826
827 args.nms <- c( "--mtls_file", #1
828 "--cluster_file", #2
829 "--heatmap_file", #3
830 "--n_clusters") #4
831
832
833 # vals has the same order as args.nms
834 vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 4, optional = F)
835
836 # ChIP data:
837 param$annotated.macs.file <- vals[1]
838
839 # Clustering
840 param$clustering.number.of.clusters <- as.numeric(vals[4])
841
842 # Cluster file (output):
843 param$cluster.groups.document.name <- vals[2]
844
845
846 # Heatmap file (output):
847 param$heatmap.document.name <- vals[3]
848
849 return(param)
850
851 }
852
853 ###########
854 # here we output the fasta file of the targets of each kmeans cluster
855 CreateClusterGroups <- function(trgts,k.ix,f.nm="output/clust.to.trgts", km.order){
856 f.nm = paste(f.nm, ".fasta", sep="")
857 clusters = km.order
858 for(i in 1:length(clusters)){
859 v=trgts[which(k.ix==clusters[i])]
860 v.split = unlist(sapply(v,strsplit, "_"))
861 if(i == 1){
862 cat(sep="",file=f.nm,">cluster_",i,"\n")#clusters[i],"\n")
863 } else {
864 cat(sep="",file=f.nm,">cluster_",i,"\n",append=TRUE)#clusters[i],"\n",append=TRUE)
865 }
866 cat(sep="\n",file=f.nm,v.split,append=TRUE)
867 }
868 }
869
870
871 PrintClusters <- function(trgts,k.ix,f.nm="output/clust.to.trgts", km.order){
872 f.nm = paste(f.nm, ".tsv", sep="")
873 cat(sep="\t",file=f.nm,"row_number/target","cluster","\n")
874 trgts[which(trgts=="")] <- "no_target"
875 for(i in 1:length(trgts)){
876 cat(sep="",file=f.nm,trgts[i],"\t","cluster_",k.ix[i],"\n",append=TRUE)
877 }
878 # if(i == 1){
879 # cat(sep="",file=f.nm,"cluster_",i,"\n")#clusters[i],"\n")
880 # } else {
881 # cat(sep="",file=f.nm,">cluster_",i,"\n",append=TRUE)#clusters[i],"\n",append=TRUE)
882 # }
883 #cat(sep="\n",file=f.nm,v.split,append=TRUE)
884 }
885 GetTopRowsFromMatrix = function(mtrx, percentage = 10)
886 {
887 if (param$debug) {
888 print("In GetTopRowsFromMatrix")
889 }
890 #Store the stats for the mtrx
891 stats = list()
892 stats$mean=apply(mtrx,1,mean)
893 stats$sd=apply(mtrx,1,sd)
894 stats$nonzero.mean=apply(mtrx, 1, function(x) mean(x[which(x != 0)]))
895
896
897 #Store the indexes for the mtrx
898 index = list()
899 index$mean = sort(stats$mean, decreasing=T, index.return=T)$ix
900 index$sd = sort(stats$sd, decreasing=T, index.return=T)$ix
901 index$nonzero.mean = sort(stats$nonzero.mean, decreasing=T, index.return=T)$ix
902
903 #Calculate how many data points we want to take
904 cutoff = floor(length(mtrx[ ,1])*(percentage/100))
905
906 #Extract Indexes and return to caller
907 index$union = union(index$mean[1:cutoff], union(index$sd[1:cutoff], index$nonzero.mean[1:cutoff]))
908
909 return(index)
910 }
911
912 #GetUniq = function(targets.cluster)
913 #{
914 # #problem with this function is that it agrigates the list handed to it. after this point you cant maintain order
915 #
916 # return(unique(unlist(lapply(targets.cluster, function(i) strsplit(i, "_")))))
917 # #return(unlist(lapply(targets.cluster, function(i) strsplit(i, "_"))))
918 #
919 #}
920
921 bindata = function(d,qunts=seq(.4,.9,.1))
922 {
923 tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d))
924 for(j in 1:ncol(d))
925 {
926 bins=quantile(d[,j],qunts)
927 for(i in 1:length(bins))
928 {
929 tmp[which(d[,j]>bins[i]),j] = i
930 }
931 }
932 return(tmp)
933 }
934
935 bindata.non.zero = function(d,qunts=seq(0,0.9,0.1))
936 {
937 tmp=matrix(0,nr=nrow(d),nc=ncol(d))
938 for(j in 1:ncol(d))
939 {
940 ix.non.zero=which(d[,j]!=0)
941 bins=quantile(d[ix.non.zero,j],qunts)
942 for(i in 1:length(bins))
943 {
944 tmp[which(d[,j]>bins[i]),j] = i
945 }
946 }
947
948 return(tmp)
949 }
950 bindata.matrix = function(d,qunts=seq(0,0.9,0.1))
951 {
952 tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d))
953 #ix.non.zero=which(d!=0)
954 bins=quantile(d[],qunts)
955 for(i in 1:length(bins))
956 {
957 tmp[which(d>bins[i])] = i
958 }
959 return(tmp)
960 }
961 bindata.non.zero.matrix = function(d,qunts=seq(0,0.9,0.1))
962 {
963 tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d))
964 ix.non.zero=which(d!=0)
965 bins=quantile(d[ix.non.zero],qunts)
966 for(i in 1:length(bins))
967 {
968 tmp[which(d>bins[i])] = i
969 }
970 return(tmp)
971 }
972 heatmap.3 <- function (x, Rowv = TRUE, Colv = if (symm) "Rowv" else TRUE,
973 distfun = dist, hclustfun = hclust, dendrogram = c("both",
974 "row", "column", "none"), symm = FALSE, scale = c("none",
975 "row", "column"), na.rm = TRUE, revC = identical(Colv,
976 "Rowv"), add.expr, breaks, symbreaks = min(x < 0, na.rm = TRUE) ||
977 scale != "none", col = "heat.colors", colsep, rowsep,
978 sepcolor = "white", sepwidth = c(0.05, 0.05), cellnote, notecex = 1,
979 notecol = "cyan", na.color = par("bg"), trace = c("column",
980 "row", "both", "none"), tracecol = "cyan", hline = median(breaks),
981 vline = median(breaks), linecol = tracecol, margins = c(5,
982 5), ColSideColors, RowSideColors, cexRow = 0.2 + 1/log10(nr),
983 cexCol = 0.2 + 1/log10(nc), labRow = NULL, labCol = NULL,
984 key = TRUE, keysize = 1.5, density.info = c("histogram",
985 "density", "none"), denscol = tracecol, symkey = min(x <
986 0, na.rm = TRUE) || symbreaks, densadj = 0.25, main = NULL,
987 xlab = NULL, ylab = NULL, lmat = NULL, lhei = NULL, lwid = NULL,
988 ...)
989 {
990 scale01 <- function(x, low = min(x), high = max(x)) {
991 x <- (x - low)/(high - low)
992 x
993 }
994 retval <- list()
995 scale <- if (symm && missing(scale))
996 "none"
997 else match.arg(scale)
998 dendrogram <- match.arg(dendrogram)
999 trace <- match.arg(trace)
1000 density.info <- match.arg(density.info)
1001 if (length(col) == 1 && is.character(col))
1002 col <- get(col, mode = "function")
1003 if (!missing(breaks) && (scale != "none"))
1004 warning("Using scale=\"row\" or scale=\"column\" when breaks are",
1005 "specified can produce unpredictable results.", "Please consider using only one or the other.")
1006 if (is.null(Rowv) || is.na(Rowv))
1007 Rowv <- FALSE
1008 if (is.null(Colv) || is.na(Colv))
1009 Colv <- FALSE
1010 else if (Colv == "Rowv" && !isTRUE(Rowv))
1011 Colv <- FALSE
1012 if (length(di <- dim(x)) != 2 || !is.numeric(x))
1013 stop("`x' must be a numeric matrix")
1014 nr <- di[1]
1015 nc <- di[2]
1016 if (nr <= 1 || nc <= 1)
1017 stop("`x' must have at least 2 rows and 2 columns")
1018 if (!is.numeric(margins) || length(margins) != 2)
1019 stop("`margins' must be a numeric vector of length 2")
1020 if (missing(cellnote))
1021 cellnote <- matrix("", ncol = ncol(x), nrow = nrow(x))
1022 if (!inherits(Rowv, "dendrogram")) {
1023 if (((!isTRUE(Rowv)) || (is.null(Rowv))) && (dendrogram %in%
1024 c("both", "row"))) {
1025 if (is.logical(Colv) && (Colv))
1026 dendrogram <- "column"
1027 else dedrogram <- "none"
1028 warning("Discrepancy: Rowv is FALSE, while dendrogram is `",
1029 dendrogram, "'. Omitting row dendogram.")
1030 }
1031 }
1032 if (!inherits(Colv, "dendrogram")) {
1033 if (((!isTRUE(Colv)) || (is.null(Colv))) && (dendrogram %in%
1034 c("both", "column"))) {
1035 if (is.logical(Rowv) && (Rowv))
1036 dendrogram <- "row"
1037 else dendrogram <- "none"
1038 warning("Discrepancy: Colv is FALSE, while dendrogram is `",
1039 dendrogram, "'. Omitting column dendogram.")
1040 }
1041 }
1042 if (inherits(Rowv, "dendrogram")) {
1043 ddr <- Rowv
1044 rowInd <- order.dendrogram(ddr)
1045 }
1046 else if (is.integer(Rowv)) {
1047 hcr <- hclustfun(distfun(x))
1048 ddr <- as.dendrogram(hcr)
1049 ddr <- reorder(ddr, Rowv)
1050 rowInd <- order.dendrogram(ddr)
1051 if (nr != length(rowInd))
1052 stop("row dendrogram ordering gave index of wrong length")
1053 }
1054 else if (isTRUE(Rowv)) {
1055 Rowv <- rowMeans(x, na.rm = na.rm)
1056 hcr <- hclustfun(distfun(x))
1057 ddr <- as.dendrogram(hcr)
1058 ddr <- reorder(ddr, Rowv)
1059 rowInd <- order.dendrogram(ddr)
1060 if (nr != length(rowInd))
1061 stop("row dendrogram ordering gave index of wrong length")
1062 }
1063 else {
1064 rowInd <- nr:1
1065 }
1066 if (inherits(Colv, "dendrogram")) {
1067 ddc <- Colv
1068 colInd <- order.dendrogram(ddc)
1069 }
1070 else if (identical(Colv, "Rowv")) {
1071 if (nr != nc)
1072 stop("Colv = \"Rowv\" but nrow(x) != ncol(x)")
1073 if (exists("ddr")) {
1074 ddc <- ddr
1075 colInd <- order.dendrogram(ddc)
1076 }
1077 else colInd <- rowInd
1078 }
1079 else if (is.integer(Colv)) {
1080 hcc <- hclustfun(distfun(if (symm)
1081 x
1082 else t(x)))
1083 ddc <- as.dendrogram(hcc)
1084 ddc <- reorder(ddc, Colv)
1085 colInd <- order.dendrogram(ddc)
1086 if (nc != length(colInd))
1087 stop("column dendrogram ordering gave index of wrong length")
1088 }
1089 else if (isTRUE(Colv)) {
1090 Colv <- colMeans(x, na.rm = na.rm)
1091 hcc <- hclustfun(distfun(if (symm)
1092 x
1093 else t(x)))
1094 ddc <- as.dendrogram(hcc)
1095 ddc <- reorder(ddc, Colv)
1096 colInd <- order.dendrogram(ddc)
1097 if (nc != length(colInd))
1098 stop("column dendrogram ordering gave index of wrong length")
1099 }
1100 else {
1101 colInd <- 1:nc
1102 }
1103 retval$rowInd <- rowInd
1104 retval$colInd <- colInd
1105 retval$call <- match.call()
1106 x <- x[rowInd, colInd]
1107 x.unscaled <- x
1108 cellnote <- cellnote[rowInd, colInd]
1109 if (is.null(labRow))
1110 labRow <- if (is.null(rownames(x)))
1111 (1:nr)[rowInd]
1112 else rownames(x)
1113 else labRow <- labRow[rowInd]
1114 if (is.null(labCol))
1115 labCol <- if (is.null(colnames(x)))
1116 (1:nc)[colInd]
1117 else colnames(x)
1118 else labCol <- labCol[colInd]
1119 if (scale == "row") {
1120 retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm)
1121 x <- sweep(x, 1, rm)
1122 retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm)
1123 x <- sweep(x, 1, sx, "/")
1124 }
1125 else if (scale == "column") {
1126 retval$colMeans <- rm <- colMeans(x, na.rm = na.rm)
1127 x <- sweep(x, 2, rm)
1128 retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm)
1129 x <- sweep(x, 2, sx, "/")
1130 }
1131 if (missing(breaks) || is.null(breaks) || length(breaks) <
1132 1) {
1133 if (missing(col) || is.function(col))
1134 breaks <- 16
1135 else breaks <- length(col) + 1
1136 }
1137 if (length(breaks) == 1) {
1138 if (!symbreaks)
1139 breaks <- seq(min(x, na.rm = na.rm), max(x, na.rm = na.rm),
1140 length = breaks)
1141 else {
1142 extreme <- max(abs(x), na.rm = TRUE)
1143 breaks <- seq(-extreme, extreme, length = breaks)
1144 }
1145 }
1146 nbr <- length(breaks)
1147 ncol <- length(breaks) - 1
1148 if (class(col) == "function")
1149 col <- col(ncol)
1150 min.breaks <- min(breaks)
1151 max.breaks <- max(breaks)
1152 x[x < min.breaks] <- min.breaks
1153 x[x > max.breaks] <- max.breaks
1154 # if (missing(lhei) || is.null(lhei))
1155 # lhei <- c(keysize, 4)
1156 # if (missing(lwid) || is.null(lwid))
1157 # lwid <- c(keysize, 4)
1158 # if (missing(lmat) || is.null(lmat)) {
1159 # lmat <- rbind(4:3, 2:1)
1160 # if (!missing(ColSideColors)) {
1161 # if (!is.character(ColSideColors) || length(ColSideColors) !=
1162 # nc)
1163 # stop("'ColSideColors' must be a character vector of length ncol(x)")
1164 # lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] +
1165 # 1)
1166 # lhei <- c(lhei[1], 0.2, lhei[2])
1167 # }
1168 # if (!missing(RowSideColors)) {
1169 # if (!is.character(RowSideColors) || length(RowSideColors) !=
1170 # nr)
1171 # stop("'RowSideColors' must be a character vector of length nrow(x)")
1172 # lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) -
1173 # 1), 1), lmat[, 2] + 1)
1174 # lwid <- c(lwid[1], 0.2, lwid[2])
1175 # }
1176 # lmat[is.na(lmat)] <- 0
1177 # }
1178 # if (length(lhei) != nrow(lmat))
1179 # stop("lhei must have length = nrow(lmat) = ", nrow(lmat))
1180 # if (length(lwid) != ncol(lmat))
1181 # stop("lwid must have length = ncol(lmat) =", ncol(lmat))
1182 # op <- par(no.readonly = TRUE)
1183 # on.exit(par(op))
1184 # layout(lmat, widths = lwid, heights = lhei, respect = FALSE)
1185 if (!missing(RowSideColors)) {
1186 par(mar = c(margins[1], 0, 0, 0.5))
1187 image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE)
1188 }
1189 if (!missing(ColSideColors)) {
1190 par(mar = c(0.5, 0, 0, margins[2]))
1191 image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE)
1192 }
1193 par(mar = c(margins[1], 0, 0, margins[2]))
1194 x <- t(x)
1195 cellnote <- t(cellnote)
1196 if (revC) {
1197 iy <- nr:1
1198 if (exists("ddr"))
1199 ddr <- rev(ddr)
1200 x <- x[, iy]
1201 cellnote <- cellnote[, iy]
1202 }
1203 else iy <- 1:nr
1204 image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 +
1205 c(0, nr), axes = FALSE, xlab = "", ylab = "", col = col,
1206 breaks = breaks, ...)
1207 retval$carpet <- x
1208 if (exists("ddr"))
1209 retval$rowDendrogram <- ddr
1210 if (exists("ddc"))
1211 retval$colDendrogram <- ddc
1212 retval$breaks <- breaks
1213 retval$col <- col
1214 if (!invalid(na.color) & any(is.na(x))) {
1215 mmat <- ifelse(is.na(x), 1, NA)
1216 image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "",
1217 col = na.color, add = TRUE)
1218 }
1219 axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0,
1220 cex.axis = cexCol)
1221 if (!is.null(xlab))
1222 mtext(xlab, side = 1, line = margins[1] - 1.25)
1223 axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0,
1224 cex.axis = cexRow)
1225 if (!is.null(ylab))
1226 mtext(ylab, side = 4, line = margins[2] - 1.25)
1227 if (!missing(add.expr))
1228 eval(substitute(add.expr))
1229 if (!missing(colsep))
1230 for (csep in colsep) rect(xleft = csep + 0.5, ybottom = rep(0,
1231 length(csep)), xright = csep + 0.5 + sepwidth[1],
1232 ytop = rep(ncol(x) + 1, csep), lty = 1, lwd = 1,
1233 col = sepcolor, border = sepcolor)
1234 if (!missing(rowsep))
1235 for (rsep in rowsep) rect(xleft = 0, ybottom = (ncol(x) +
1236 1 - rsep) - 0.5, xright = nrow(x) + 1, ytop = (ncol(x) +
1237 1 - rsep) - 0.5 - sepwidth[2], lty = 1, lwd = 1,
1238 col = sepcolor, border = sepcolor)
1239 min.scale <- min(breaks)
1240 max.scale <- max(breaks)
1241 x.scaled <- scale01(t(x), min.scale, max.scale)
1242 if (trace %in% c("both", "column")) {
1243 retval$vline <- vline
1244 vline.vals <- scale01(vline, min.scale, max.scale)
1245 for (i in colInd) {
1246 if (!is.null(vline)) {
1247 abline(v = i - 0.5 + vline.vals, col = linecol,
1248 lty = 2)
1249 }
1250 xv <- rep(i, nrow(x.scaled)) + x.scaled[, i] - 0.5
1251 xv <- c(xv[1], xv)
1252 yv <- 1:length(xv) - 0.5
1253 lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
1254 }
1255 }
1256 if (trace %in% c("both", "row")) {
1257 retval$hline <- hline
1258 hline.vals <- scale01(hline, min.scale, max.scale)
1259 for (i in rowInd) {
1260 if (!is.null(hline)) {
1261 abline(h = i + hline, col = linecol, lty = 2)
1262 }
1263 yv <- rep(i, ncol(x.scaled)) + x.scaled[i, ] - 0.5
1264 yv <- rev(c(yv[1], yv))
1265 xv <- length(yv):1 - 0.5
1266 lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
1267 }
1268 }
1269 if (!missing(cellnote))
1270 text(x = c(row(cellnote)), y = c(col(cellnote)), labels = c(cellnote),
1271 col = notecol, cex = notecex)
1272 par(mar = c(margins[1], 0, 0, 0))
1273 # if (dendrogram %in% c("both", "row")) {
1274 # plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
1275 # }
1276 # else plot.new()
1277 par(mar = c(0, 0, if (!is.null(main)) 5 else 0, margins[2]))
1278 # if (dendrogram %in% c("both", "column")) {
1279 # plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")
1280 # }
1281 # else plot.new()
1282 if (!is.null(main))
1283 title(main, cex.main = 1.5 * op[["cex.main"]])
1284 # if (key) {
1285 # par(mar = c(5, 4, 2, 1), cex = 0.75)
1286 # tmpbreaks <- breaks
1287 # if (symkey) {
1288 # max.raw <- max(abs(c(x, breaks)), na.rm = TRUE)
1289 # min.raw <- -max.raw
1290 # tmpbreaks[1] <- -max(abs(x), na.rm = TRUE)
1291 # tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE)
1292 # }
1293 # else {
1294 # min.raw <- min(x, na.rm = TRUE)
1295 # max.raw <- max(x, na.rm = TRUE)
1296 # }
1297 # z <- seq(min.raw, max.raw, length = length(col))
1298 # image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks,
1299 # xaxt = "n", yaxt = "n")
1300 # par(usr = c(0, 1, 0, 1))
1301 # lv <- pretty(breaks)
1302 # xv <- scale01(as.numeric(lv), min.raw, max.raw)
1303 # axis(1, at = xv, labels = lv)
1304 # if (scale == "row")
1305 # mtext(side = 1, "Row Z-Score", line = 2)
1306 # else if (scale == "column")
1307 # mtext(side = 1, "Column Z-Score", line = 2)
1308 # else mtext(side = 1, "Value", line = 2)
1309 # if (density.info == "density") {
1310 # dens <- density(x, adjust = densadj, na.rm = TRUE)
1311 # omit <- dens$x < min(breaks) | dens$x > max(breaks)
1312 # dens$x <- dens$x[-omit]
1313 # dens$y <- dens$y[-omit]
1314 # dens$x <- scale01(dens$x, min.raw, max.raw)
1315 # lines(dens$x, dens$y/max(dens$y) * 0.95, col = denscol,
1316 # lwd = 1)
1317 # axis(2, at = pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y))
1318 # title("Color Key\nand Density Plot")
1319 # par(cex = 0.5)
1320 # mtext(side = 2, "Density", line = 2)
1321 # }
1322 # else if (density.info == "histogram") {
1323 # h <- hist(x, plot = FALSE, breaks = breaks)
1324 # hx <- scale01(breaks, min.raw, max.raw)
1325 # hy <- c(h$counts, h$counts[length(h$counts)])
1326 # lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s",
1327 # col = denscol)
1328 # axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy))
1329 # title("Color Key\nand Histogram")
1330 # par(cex = 0.5)
1331 # mtext(side = 2, "Count", line = 2)
1332 # }
1333 # else title("Color Key")
1334 # }
1335 # else plot.new()
1336 retval$colorTable <- data.frame(low = retval$breaks[-length(retval$breaks)],
1337 high = retval$breaks[-1], color = retval$col)
1338 invisible(retval)
1339 }
1340
1341 #rm(list=ls())
1342 param <- list()
1343 param$debug <- FALSE #T
1344
1345 print("Finished loading functions")
1346
1347
1348 if(param$debug) {
1349 param <- LoadDebugParams(param)
1350 } else {
1351 param <- LoadReqiredParams(param)
1352 param <- LoadOptionalParams(param)
1353 }
1354 print (param)
1355 Rprof() #Remove me
1356 # Load Chip data, alread in pval format
1357 # Column.order need to be a vecor of strings, that correspond to the order (and
1358 # inclustion) of chip experiments.
1359 chip <- GetChipData(param$annotated.macs.file, column.order = param$chip.order)
1360
1361 #Get the top percentages on different criteria
1362 #ix.top <- GetTopRowsFromMatrix(chip$peaks, percentage = param$filter.percentage)
1363 #chip$peaks <- chip$peaks[ix.top$union, ]
1364 #chip$targets <- chip$targets[ix.top$union]
1365 #Bin data:
1366 chip$peaks <- bindata.non.zero.matrix(chip$peaks, qunts = seq(0, 0.9, length=(param$number.bins+1)))
1367
1368 file.names <- unlist(strsplit(param$rna.files, split="::"))
1369 print(file.names)
1370 file.lables <- unlist(strsplit(param$rna.names, split="::"))
1371 print(file.lables)
1372
1373 library("RColorBrewer")
1374 color.2 = rev(colorRampPalette(c("darkblue", "steelblue", "lightgrey"))(param$number.bins))
1375 if(!is.na(file.names) && file.names != "none") {
1376 #rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg")
1377 #do differential expression if the user specifies such:
1378 if(param$rna.normalization != "no") {
1379 if(param$rna.normalization != "mean"){
1380 norm.file.names <- unlist(strsplit(param$rna.normalization.file, split="::"))
1381 all.file.names <- c(file.names, norm.file.names)
1382 all.file.lables <- c(file.lables, norm.file.names)
1383 print (all.file.names)
1384 print (all.file.lables)
1385 rna.scores <- GetRNAData(all.file.names, file.lables = all.file.lables, fpkm = "avg")
1386 rna.scores <- NormalizeRNA(scores = rna.scores)
1387 rna.scores.sign <- rna.scores/abs(rna.scores)
1388 rna.scores.sign[which(is.nan(rna.scores.sign))] <- 0
1389 rna.scores <- bindata.non.zero.matrix(abs(rna.scores), qunts = seq(0, 0.9, length=(param$number.bins+1)))
1390 rna.scores <- rna.scores.sign * rna.scores
1391 color.2 = colorRampPalette(c("darkblue", "steelblue", "lightgrey", "pink", "darkred"))(param$number.bins)
1392 } else {
1393 print("we are normalizing by average")
1394 print("file lables are:")
1395 print(file.lables)
1396 rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg")
1397 rna.scores <- t(apply(rna.scores,1, function(x) { if(mean(x)!=0){ return(x/mean(x)) } else { return(x) } }))
1398 rna.scores <- bindata.non.zero.matrix(rna.scores, qunts = seq(0, 0.9, length=(param$number.bins+1)))
1399 }
1400 } else {
1401 rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg")
1402 rna.scores <- bindata.non.zero.matrix(rna.scores, qunts = seq(0, 0.9, length=(param$number.bins+1)))
1403 }
1404
1405 # chip$peaks <- chip$peaks[order(chip$targets), ]
1406 # rna.scores <- rna.scores[order(rownames(rna.scores)), ]
1407 # chip$targets <- chip$targets[order(chip$targets)]
1408
1409 rna.scores.mapped <- MapExpressiontoChip(chip.targets = chip$targets, expression=rna.scores)
1410 all.data <- cbind(chip$peaks, rna.scores.mapped)
1411 if(param$include.targetless!="yes"){
1412 all.data <- all.data[-which(chip$targets==""),]
1413 chip$peaks <- chip$peaks[-which(chip$targets==""),]
1414 rna.scores.mapped <- rna.scores.mapped[-which(chip$targets==""),]
1415 chip$targets <- chip$targets[-which(chip$targets=="")]
1416 }
1417 } else {
1418 all.data <- chip$peaks
1419 }
1420
1421 print("Clustering")
1422 set.seed(1234)
1423 km <- kmeans(all.data, param$clustering.number.of.clusters, iter.max = 50)
1424 print("Ordering")
1425 km.order <- GenerateKMOrder(all.data, km)
1426
1427 ##AM edits##
1428 km.new.order <- numeric(length(km$cluster))
1429 for(i in 1:length(km.order)){
1430 cur.clst <- km.order[i]
1431 ix <- which(km$cluster==cur.clst)
1432 km.new.order[ix] <- i
1433 }
1434 km.order <- 1:length(km.order)
1435 km$cluster <- km.new.order
1436 ## Done AM ##
1437 print("Creating image")
1438 #bmp(param$heatmap.document.name, 640, 480)#1280, 960)
1439 bmp(paste(param$heatmap.document.name,".bmp", sep = ""), 5120, 3840)#2560, 1920)
1440 #pdf(paste(param$heatmap.document.name,".pdf", sep = ""))
1441 color.1 <- c("#000000", colorRampPalette(c("blue", "yellow","orange","red"))(param$number.bins))
1442 if(!is.na(file.names) && file.names != "none") {
1443 PrintClusters(trgts=chip$targets,
1444 k.ix=km$cluster,
1445 f.nm = param$cluster.groups.document.name,
1446 km.order = NA)
1447
1448 #if(param$rna.normalization != "no") {
1449 # data.split <- cbind(chip$peaks, PrepareRNAforHeatmap(rna.scores.mapped))
1450 #} else {
1451 # data.split <- cbind(chip$peaks, rna.scores.mapped)
1452 #}
1453 expression.width.multiplier <- 2
1454 if(ncol(rna.scores.mapped)==1) {
1455 rna.scores.mapped <- cbind(rna.scores.mapped, rna.scores.mapped)
1456 expression.width.multiplier <- 1
1457 }
1458
1459 layout(matrix(c(1,2,2,5,
1460 1,3,4,6,
1461 1,3,4,7,
1462 1,3,4,8,
1463 1,3,4,9),
1464 5,4,
1465 byrow=T),
1466 widths=c(1,2*ncol(chip$peaks),expression.width.multiplier*ncol(rna.scores.mapped),1),
1467 heights=c(1,10,2,10,2))
1468 #1
1469 plot.new()
1470 #2
1471 plot.new()
1472 #3
1473 print("Creating peak image")
1474 CreateIndividualHeatMap(chip$peaks,
1475 km,
1476 km.order, color.ramp=color.1)
1477 #4
1478 print("Creating rna image")
1479 CreateIndividualHeatMap(rna.scores.mapped,
1480 km,
1481 km.order, color.ramp=color.2)
1482 #5
1483 plot.new()
1484 #6
1485 image(t(matrix(1:param$number.bins,nc=1)), col=color.1)
1486 #7
1487 plot.new()
1488 #8
1489 image(t(matrix(1:param$number.bins,nc=1)), col=color.2)
1490 #9
1491 plot.new()
1492 } else {
1493 PrintClusters(trgts=1:nrow(chip$peaks),
1494 k.ix=km$cluster,
1495 f.nm = param$cluster.groups.document.name,
1496 km.order = NA)
1497
1498
1499 layout(matrix(c(1,2,2,
1500 1,3,4,
1501 1,3,5),3,3,byrow=T),
1502 widths=c(1,3*ncol(chip$peaks),1),
1503 heights=c(1,8,1))
1504 #1
1505 plot.new()
1506 #2
1507 plot.new()
1508 #3
1509 CreateIndividualHeatMap(chip$peaks,
1510 km,
1511 km.order, color.ramp=color.1)
1512 #4
1513 image(t(matrix(1:param$number.bins,nc=1)), col=color.1)
1514 #5
1515 plot.new()
1516
1517 }
1518
1519 dev.off()
1520 Rprof(NULL)
1521 profile <- summaryRprof()
1522 print(str(profile))
1523 print(profile)
1524 #save.image("test/testData.RData")