annotate histogram.r @ 27:9a77b557ca2a draft

Uploaded
author guerler
date Tue, 29 Apr 2014 15:28:12 -0400
parents 0e3df2630d9b
children 524184c2f524
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
23
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
1 # utilities
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
2 boundary <- function(x, increment) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
3 return (floor(x / increment) * increment)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
4 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
5
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
6 roundup <- function(x) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
7 return (sign(x) * 10^ceiling(log10(abs(x))))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
8 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
9
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
10 # wrapper
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
11 wrapper <- function(table, columns, options) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
12
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
13 # initialize output list
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
14 l <- list()
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
15
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
16 # loop through all columns
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
17 m <- list()
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
18 for (key in names(columns)) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
19 # load column data
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
20 column <- as.numeric(columns[key])
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
21 column_data <- sapply( table[column], as.numeric )
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
22
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
23 # collect vectors in list
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
24 m <- append(m, list(column_data))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
25 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
26
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
27 # get min/max boundaries
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
28 min_value <- min(unlist(m))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
29 max_value <- max(unlist(m))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
30
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
31 # identify increment
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
32 increment <- roundup((max_value - min_value) / 10)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
33
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
34 # fix min value
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
35 min_value <- boundary(min_value, increment)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
36
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
37 # fix max value
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
38 max_value <- min_value + increment * 10
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
39
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
40 # check if single bin is enough
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
41 if (min_value == max_value) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
42 l <- append(l, max_value)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
43 for (key in seq(m)) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
44 l <- append(l, 1.0)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
45 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
46 return (l)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
47 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
48
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
49 # fix range and bins
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
50 bin_seq = seq(min_value, max_value, by=increment)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
51
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
52 # add as first column
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
53 l <- append(l, list(bin_seq[2: length(bin_seq)]))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
54
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
55 # loop through all columns
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
56 for (key in seq(m)) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
57 # load column data
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
58 column_data <- m[[key]]
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
59
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
60 # create hist data
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
61 hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
62
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
63 # normalize densities
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
64 count_sum <- sum(hist_data$counts)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
65 if (count_sum > 0) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
66 hist_data$counts = hist_data$counts / count_sum
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
67 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
68
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
69 # collect vectors in list
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
70 l <- append(l, list(hist_data$counts))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
71 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
72
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
73
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
74 # return
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
75 return (l)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
76 }