annotate heatmap.r @ 45:b84a193361be draft

Uploaded
author guerler
date Thu, 15 May 2014 15:15:21 -0400
parents eafc7121f553
children 5a478e171752
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
37
eafc7121f553 Uploaded
guerler
parents:
diff changeset
1 # limits
eafc7121f553 Uploaded
guerler
parents:
diff changeset
2 min_limit = 20
eafc7121f553 Uploaded
guerler
parents:
diff changeset
3
eafc7121f553 Uploaded
guerler
parents:
diff changeset
4 # load sparse matrix package
eafc7121f553 Uploaded
guerler
parents:
diff changeset
5 suppressPackageStartupMessages(library('Matrix'))
eafc7121f553 Uploaded
guerler
parents:
diff changeset
6
eafc7121f553 Uploaded
guerler
parents:
diff changeset
7 # access a numeric column
eafc7121f553 Uploaded
guerler
parents:
diff changeset
8 get_numeric <- function(table, column_key) {
eafc7121f553 Uploaded
guerler
parents:
diff changeset
9 column <- as.numeric(column_key)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
10 column_data <- sapply( table[column], as.numeric )
eafc7121f553 Uploaded
guerler
parents:
diff changeset
11 return (c(column_data))
eafc7121f553 Uploaded
guerler
parents:
diff changeset
12 }
eafc7121f553 Uploaded
guerler
parents:
diff changeset
13
eafc7121f553 Uploaded
guerler
parents:
diff changeset
14 # access a label column
eafc7121f553 Uploaded
guerler
parents:
diff changeset
15 get_label <- function(table, column_key) {
eafc7121f553 Uploaded
guerler
parents:
diff changeset
16 column <- as.numeric(column_key)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
17 return (c(table[column]))
eafc7121f553 Uploaded
guerler
parents:
diff changeset
18 }
eafc7121f553 Uploaded
guerler
parents:
diff changeset
19
eafc7121f553 Uploaded
guerler
parents:
diff changeset
20 # inflate three columns into matrix
eafc7121f553 Uploaded
guerler
parents:
diff changeset
21 matrify <- function (data) {
eafc7121f553 Uploaded
guerler
parents:
diff changeset
22 if (ncol(data) != 3)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
23 stop('Data frame must have three column format')
eafc7121f553 Uploaded
guerler
parents:
diff changeset
24 plt <- data[, 1]
eafc7121f553 Uploaded
guerler
parents:
diff changeset
25 spc <- data[, 2]
eafc7121f553 Uploaded
guerler
parents:
diff changeset
26 abu <- data[, 3]
eafc7121f553 Uploaded
guerler
parents:
diff changeset
27 plt.codes <- levels(factor(plt))
eafc7121f553 Uploaded
guerler
parents:
diff changeset
28 spc.codes <- levels(factor(spc))
eafc7121f553 Uploaded
guerler
parents:
diff changeset
29 taxa <- Matrix(0, nrow=length(plt.codes), ncol=length(spc.codes), sparse=TRUE)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
30 row <- match(plt, plt.codes)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
31 col <- match(spc, spc.codes)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
32 for (i in 1:length(abu)) {
eafc7121f553 Uploaded
guerler
parents:
diff changeset
33 taxa[row[i], col[i]] <- abu[i]
eafc7121f553 Uploaded
guerler
parents:
diff changeset
34 }
eafc7121f553 Uploaded
guerler
parents:
diff changeset
35 colnames(taxa) <- spc.codes
eafc7121f553 Uploaded
guerler
parents:
diff changeset
36 rownames(taxa) <- plt.codes
eafc7121f553 Uploaded
guerler
parents:
diff changeset
37 taxa
eafc7121f553 Uploaded
guerler
parents:
diff changeset
38 }
eafc7121f553 Uploaded
guerler
parents:
diff changeset
39
eafc7121f553 Uploaded
guerler
parents:
diff changeset
40 # flatten data.frame into three column format
eafc7121f553 Uploaded
guerler
parents:
diff changeset
41 flatten <- function(my_matrix) {
eafc7121f553 Uploaded
guerler
parents:
diff changeset
42 summ <-summary(my_matrix)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
43 summ <- data.frame(i=rownames(my_matrix)[summ$i], j=colnames(my_matrix)[summ$j], x=summ$x)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
44 summ
eafc7121f553 Uploaded
guerler
parents:
diff changeset
45 }
eafc7121f553 Uploaded
guerler
parents:
diff changeset
46
eafc7121f553 Uploaded
guerler
parents:
diff changeset
47 # wrapper
eafc7121f553 Uploaded
guerler
parents:
diff changeset
48 wrapper <- function(table, columns, options) {
eafc7121f553 Uploaded
guerler
parents:
diff changeset
49
eafc7121f553 Uploaded
guerler
parents:
diff changeset
50 # initialize output list
eafc7121f553 Uploaded
guerler
parents:
diff changeset
51 l <- list()
eafc7121f553 Uploaded
guerler
parents:
diff changeset
52
eafc7121f553 Uploaded
guerler
parents:
diff changeset
53 # get number of columns
eafc7121f553 Uploaded
guerler
parents:
diff changeset
54 n = length(columns)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
55
eafc7121f553 Uploaded
guerler
parents:
diff changeset
56 # consistency check
eafc7121f553 Uploaded
guerler
parents:
diff changeset
57 if (n %% 3 != 0) {
eafc7121f553 Uploaded
guerler
parents:
diff changeset
58 print ('heatmap::wrapper() - Data not consistent (n mod 3 != 0)')
eafc7121f553 Uploaded
guerler
parents:
diff changeset
59 return (l)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
60 }
eafc7121f553 Uploaded
guerler
parents:
diff changeset
61
eafc7121f553 Uploaded
guerler
parents:
diff changeset
62 # create index sequence
eafc7121f553 Uploaded
guerler
parents:
diff changeset
63 index = seq(1, n, by=3)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
64
eafc7121f553 Uploaded
guerler
parents:
diff changeset
65 # get keys
eafc7121f553 Uploaded
guerler
parents:
diff changeset
66 keys = names(columns)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
67
eafc7121f553 Uploaded
guerler
parents:
diff changeset
68 # loop through blocks
eafc7121f553 Uploaded
guerler
parents:
diff changeset
69 for (i in index) {
eafc7121f553 Uploaded
guerler
parents:
diff changeset
70 # create columns
eafc7121f553 Uploaded
guerler
parents:
diff changeset
71 ci <- get_label(table, columns[keys[i]])
eafc7121f553 Uploaded
guerler
parents:
diff changeset
72 cj <- get_label(table, columns[keys[i+1]])
eafc7121f553 Uploaded
guerler
parents:
diff changeset
73 cx <- get_numeric(table, columns[keys[i+2]])
eafc7121f553 Uploaded
guerler
parents:
diff changeset
74
eafc7121f553 Uploaded
guerler
parents:
diff changeset
75 # create a frame from columns
eafc7121f553 Uploaded
guerler
parents:
diff changeset
76 my_frame <- data.frame(ci=ci, cj=cj, cx=cx)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
77
eafc7121f553 Uploaded
guerler
parents:
diff changeset
78 # create matrix out of the frame
eafc7121f553 Uploaded
guerler
parents:
diff changeset
79 my_matrix <- matrify(my_frame)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
80
eafc7121f553 Uploaded
guerler
parents:
diff changeset
81 # create/cluster matrix
eafc7121f553 Uploaded
guerler
parents:
diff changeset
82 row_order <- hclust(dist(my_matrix))$order
eafc7121f553 Uploaded
guerler
parents:
diff changeset
83 col_order <- hclust(dist(t(my_matrix)))$order
eafc7121f553 Uploaded
guerler
parents:
diff changeset
84
eafc7121f553 Uploaded
guerler
parents:
diff changeset
85 # reorder matrix
eafc7121f553 Uploaded
guerler
parents:
diff changeset
86 my_matrix <- my_matrix[row_order, col_order]
eafc7121f553 Uploaded
guerler
parents:
diff changeset
87
eafc7121f553 Uploaded
guerler
parents:
diff changeset
88 # get min size
eafc7121f553 Uploaded
guerler
parents:
diff changeset
89 min_limit = max(as.integer(options$limit), min_limit)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
90
eafc7121f553 Uploaded
guerler
parents:
diff changeset
91 # get max size
eafc7121f553 Uploaded
guerler
parents:
diff changeset
92 max_row = min(length(row_order), min_limit)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
93 max_col = min(length(col_order), min_limit)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
94
eafc7121f553 Uploaded
guerler
parents:
diff changeset
95 # transform back to three columns
eafc7121f553 Uploaded
guerler
parents:
diff changeset
96 my_flatmatrix = flatten(my_matrix[1:max_row, 1:max_col])
eafc7121f553 Uploaded
guerler
parents:
diff changeset
97
eafc7121f553 Uploaded
guerler
parents:
diff changeset
98 # append to result list
eafc7121f553 Uploaded
guerler
parents:
diff changeset
99 l <- append(l, list(my_flatmatrix$i))
eafc7121f553 Uploaded
guerler
parents:
diff changeset
100 l <- append(l, list(my_flatmatrix$j))
eafc7121f553 Uploaded
guerler
parents:
diff changeset
101 l <- append(l, list(my_flatmatrix$x))
eafc7121f553 Uploaded
guerler
parents:
diff changeset
102 }
eafc7121f553 Uploaded
guerler
parents:
diff changeset
103
eafc7121f553 Uploaded
guerler
parents:
diff changeset
104 # return
eafc7121f553 Uploaded
guerler
parents:
diff changeset
105 return (l)
eafc7121f553 Uploaded
guerler
parents:
diff changeset
106 }