comparison histogram.r @ 23:0e3df2630d9b draft

Uploaded
author guerler
date Fri, 25 Apr 2014 20:08:55 -0400
parents
children 524184c2f524
comparison
equal deleted inserted replaced
22:f1c9378592be 23:0e3df2630d9b
1 # utilities
2 boundary <- function(x, increment) {
3 return (floor(x / increment) * increment)
4 }
5
6 roundup <- function(x) {
7 return (sign(x) * 10^ceiling(log10(abs(x))))
8 }
9
10 # wrapper
11 wrapper <- function(table, columns, options) {
12
13 # initialize output list
14 l <- list()
15
16 # loop through all columns
17 m <- list()
18 for (key in names(columns)) {
19 # load column data
20 column <- as.numeric(columns[key])
21 column_data <- sapply( table[column], as.numeric )
22
23 # collect vectors in list
24 m <- append(m, list(column_data))
25 }
26
27 # get min/max boundaries
28 min_value <- min(unlist(m))
29 max_value <- max(unlist(m))
30
31 # identify increment
32 increment <- roundup((max_value - min_value) / 10)
33
34 # fix min value
35 min_value <- boundary(min_value, increment)
36
37 # fix max value
38 max_value <- min_value + increment * 10
39
40 # check if single bin is enough
41 if (min_value == max_value) {
42 l <- append(l, max_value)
43 for (key in seq(m)) {
44 l <- append(l, 1.0)
45 }
46 return (l)
47 }
48
49 # fix range and bins
50 bin_seq = seq(min_value, max_value, by=increment)
51
52 # add as first column
53 l <- append(l, list(bin_seq[2: length(bin_seq)]))
54
55 # loop through all columns
56 for (key in seq(m)) {
57 # load column data
58 column_data <- m[[key]]
59
60 # create hist data
61 hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE)
62
63 # normalize densities
64 count_sum <- sum(hist_data$counts)
65 if (count_sum > 0) {
66 hist_data$counts = hist_data$counts / count_sum
67 }
68
69 # collect vectors in list
70 l <- append(l, list(hist_data$counts))
71 }
72
73
74 # return
75 return (l)
76 }