36
|
1 # binsize
|
|
2 min_binsize = 10
|
|
3
|
|
4 # lower boundary
|
|
5 lowerboundary <- function(x, increment) {
|
23
|
6 return (floor(x / increment) * increment)
|
|
7 }
|
|
8
|
36
|
9 # upper boundary
|
|
10 upperboundary <- function(x, increment) {
|
|
11 return (ceiling(x / increment) * increment)
|
|
12 }
|
|
13
|
|
14 # round to decimals
|
23
|
15 roundup <- function(x) {
|
|
16 return (sign(x) * 10^ceiling(log10(abs(x))))
|
|
17 }
|
|
18
|
|
19 # wrapper
|
|
20 wrapper <- function(table, columns, options) {
|
|
21
|
36
|
22 # get binsize
|
|
23 binsize = max(as.integer(options$binsize), min_binsize)
|
|
24
|
23
|
25 # initialize output list
|
|
26 l <- list()
|
|
27
|
|
28 # loop through all columns
|
|
29 m <- list()
|
|
30 for (key in names(columns)) {
|
|
31 # load column data
|
|
32 column <- as.numeric(columns[key])
|
|
33 column_data <- sapply( table[column], as.numeric )
|
|
34
|
|
35 # collect vectors in list
|
|
36 m <- append(m, list(column_data))
|
|
37 }
|
|
38
|
|
39 # get min/max boundaries
|
|
40 min_value <- min(unlist(m))
|
|
41 max_value <- max(unlist(m))
|
|
42
|
36
|
43 # identify range
|
|
44 diff <- max_value - min_value
|
|
45
|
23
|
46 # identify increment
|
36
|
47 increment <- roundup(diff / binsize)
|
23
|
48
|
|
49 # fix min value
|
36
|
50 min_value <- lowerboundary(min_value, increment)
|
|
51 max_value <- upperboundary(max_value, increment)
|
|
52
|
|
53 # update range
|
|
54 diff <- max_value - min_value
|
|
55
|
|
56 # fix bin size
|
|
57 binsize = round(diff / increment)
|
23
|
58
|
|
59 # fix max value
|
36
|
60 max_value <- min_value + binsize * increment
|
23
|
61
|
|
62 # check if single bin is enough
|
|
63 if (min_value == max_value) {
|
|
64 l <- append(l, max_value)
|
|
65 for (key in seq(m)) {
|
|
66 l <- append(l, 1.0)
|
|
67 }
|
|
68 return (l)
|
|
69 }
|
|
70
|
|
71 # fix range and bins
|
|
72 bin_seq = seq(min_value, max_value, by=increment)
|
|
73
|
|
74 # add as first column
|
|
75 l <- append(l, list(bin_seq[2: length(bin_seq)]))
|
|
76
|
|
77 # loop through all columns
|
|
78 for (key in seq(m)) {
|
|
79 # load column data
|
|
80 column_data <- m[[key]]
|
|
81
|
|
82 # create hist data
|
|
83 hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE)
|
|
84
|
|
85 # normalize densities
|
|
86 count_sum <- sum(hist_data$counts)
|
|
87 if (count_sum > 0) {
|
|
88 hist_data$counts = hist_data$counts / count_sum
|
|
89 }
|
|
90
|
|
91 # collect vectors in list
|
|
92 l <- append(l, list(hist_data$counts))
|
|
93 }
|
|
94
|
|
95 # return
|
|
96 return (l)
|
|
97 }
|