# HG changeset patch # User guerler # Date 1397770524 14400 # Node ID 2e2d92b2ae38046051e5408da673da8345e9160c # Parent 6a11aeb8bd39e80e6c257a6505118b1fdb7e6225 Uploaded diff -r 6a11aeb8bd39 -r 2e2d92b2ae38 histogram.r --- a/histogram.r Thu Apr 17 13:22:34 2014 -0400 +++ b/histogram.r Thu Apr 17 17:35:24 2014 -0400 @@ -4,31 +4,44 @@ l <- list() # loop through all columns + m <- list() for (key in names(columns)) { # load column data column <- as.numeric(columns[key]) column_data <- sapply( table[column], as.numeric ) + # collect vectors in list + m <- append(m, list(column_data)) + } + + # get min/max boundaries + max_value <- max(unlist(m)) + min_value <- min(unlist(m)) + + # fix range and bins + bin_seq = seq(min_value, max_value, by=10) + + # add as first column + l <- append(l, list(bin_seq[2: length(bin_seq)])) + + # loop through all columns + for (key in seq(m)) { + # load column data + column_data <- m[[key]] + # create hist data - hist_data <- hist(column_data, plot=FALSE) + hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE) # normalize densities count_sum <- sum(hist_data$counts) if (count_sum > 0) { - hist_data$counts=hist_data$counts/count_sum + hist_data$counts = hist_data$counts / count_sum } # collect vectors in list - l <- append(l, list(hist_data$breaks[2: length(hist_data$breaks)])) l <- append(l, list(hist_data$counts)) } - # make sure length is fine - n <- max(sapply(l, length)) - ll <- lapply(l, function(X) { - c(as.character(X), rep('2147483647', times = n - length(X))) - }) - l <- do.call(cbind, ll) # return return (l)