# HG changeset patch # User guerler # Date 1400181321 14400 # Node ID b84a193361be00525e0c79000b4ecc6c42fa337d # Parent 9713035b1a4740b6fd4d01777f4f4e27b7400af0 Uploaded diff -r 9713035b1a47 -r b84a193361be histogram.r --- a/histogram.r Fri May 09 01:01:39 2014 -0400 +++ b/histogram.r Thu May 15 15:15:21 2014 -0400 @@ -1,27 +1,6 @@ -# binsize -min_binsize = 10 - -# lower boundary -lowerboundary <- function(x, increment) { - return (floor(x / increment) * increment) -} - -# upper boundary -upperboundary <- function(x, increment) { - return (ceiling(x / increment) * increment) -} - -# round to decimals -roundup <- function(x) { - return (sign(x) * 10^ceiling(log10(abs(x)))) -} - # wrapper wrapper <- function(table, columns, options) { - # get binsize - binsize = max(as.integer(options$binsize), min_binsize) - # initialize output list l <- list() @@ -36,43 +15,12 @@ m <- append(m, list(column_data)) } - # get min/max boundaries - min_value <- min(unlist(m)) - max_value <- max(unlist(m)) - - # identify range - diff <- max_value - min_value - - # identify increment - increment <- roundup(diff / binsize) - - # fix min value - min_value <- lowerboundary(min_value, increment) - max_value <- upperboundary(max_value, increment) - - # update range - diff <- max_value - min_value - - # fix bin size - binsize = round(diff / increment) - - # fix max value - max_value <- min_value + binsize * increment - - # check if single bin is enough - if (min_value == max_value) { - l <- append(l, max_value) - for (key in seq(m)) { - l <- append(l, 1.0) - } - return (l) - } - - # fix range and bins - bin_seq = seq(min_value, max_value, by=increment) + # identify optimal breaks + hist_data <- hist(unlist(m), plot=FALSE) + breaks <- hist_data$breaks; # add as first column - l <- append(l, list(bin_seq[2: length(bin_seq)])) + l <- append(l, list(breaks[2: length(breaks)])) # loop through all columns for (key in seq(m)) { @@ -80,14 +28,14 @@ column_data <- m[[key]] # create hist data - hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE) + hist_data <- hist(column_data, breaks=breaks, plot=FALSE) # normalize densities count_sum <- sum(hist_data$counts) if (count_sum > 0) { hist_data$counts = hist_data$counts / count_sum } - + # collect vectors in list l <- append(l, list(hist_data$counts)) }