Mercurial > repos > devteam > dwt_var_perfeature
diff execute_dwt_var_perFeature.R @ 3:6c29c7e347e8 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/dwt_var_perfeature commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
author | devteam |
---|---|
date | Mon, 06 Jul 2020 20:34:27 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/execute_dwt_var_perFeature.R Mon Jul 06 20:34:27 2020 -0400 @@ -0,0 +1,147 @@ +##################################################################### +## plot multiscale wavelet variance +## create null bands by permuting the original data series +## generate plots and table of wavelet variance including p-values +####################################################################### +options(echo = FALSE) +library("wavethresh"); +library("waveslim"); +library("bitops"); + +## to determine if data is properly formatted 2^N observations +is_power2 <- function(x) { + x && !(bitops::bitAnd(x, x - 1)); +} + +## dwt : discrete wavelet transform using Haar wavelet filter, simplest wavelet function but later can modify to let user-define the wavelet filter function +dwt_var_permut_get_max <- function(data, names, alpha, filter = 1, family = "DaubExPhase", bc = "symmetric", method = "kendall", wf = "haar", boundary = "reflection") { + title <- NULL; + final_pvalue <- NULL; + j <- NULL; + scale <- NULL; + out <- NULL; + + print(class(data)); + print(names); + print(alpha); + + par(mar = c(5, 4, 4, 3), oma = c(4, 4, 3, 2), xaxt = "s", cex = 1, las = 1); + + title <- c("Wavelet", "Variance", "Pvalue", "Test"); + print(title); + + for (i in seq_len(length(names))) { + temp <- NULL; + results <- NULL; + wave1_dwt <- NULL; + + ## if data fails formatting check, do something + print(is.numeric(as.matrix(data)[, i])); + if (!is.numeric(as.matrix(data)[, i])) { + stop("data must be a numeric vector"); + } + print(length(as.matrix(data)[, i])); + print(is_power2(length(as.matrix(data)[, i]))); + if (!is_power2(length(as.matrix(data)[, i]))) { + stop("data length must be a power of two"); + } + j <- wavethresh::wd(as.matrix(data)[, i], filter.number = filter, family = family, bc = bc)$nlevels; + print(j); + temp <- vector(length = j); + wave1_dwt <- waveslim::dwt(as.matrix(data)[, i], wf = wf, j, boundary = boundary); + + temp <- waveslim::wave.variance(wave1_dwt)[- (j + 1), 1]; + print(temp); + + ##permutations code : + feature1 <- NULL; + null <- NULL; + var_lower <- NULL; + limit_lower <- NULL; + var_upper <- NULL; + limit_upper <- NULL; + med <- NULL; + + limit_lower <- alpha / 2 * 1000; + print(limit_lower); + limit_upper <- (1 - alpha / 2) * 1000; + print(limit_upper); + + feature1 <- as.matrix(data)[, i]; + for (k in 1:1000) { + nk_1 <- NULL; + null_levels <- NULL; + var <- NULL; + null_wave1 <- NULL; + + nk_1 <- sample(feature1, length(feature1), replace = FALSE); + null_levels <- wavethresh::wd(nk_1, filter.number = filter, family = family, bc = bc)$nlevels; + var <- vector(length = length(null_levels)); + null_wave1 <- waveslim::dwt(nk_1, wf = wf, j, boundary = boundary); + var <- waveslim::wave.variance(null_wave1)[- (null_levels + 1), 1]; + null <- rbind(null, var); + } + null <- apply(null, 2, sort, na.last = TRUE); + var_lower <- null[limit_lower, ]; + var_upper <- null[limit_upper, ]; + med <- (apply(null, 2, median, na.rm = TRUE)); + + ## plot + results <- cbind(temp, var_lower, var_upper); + print(results); + matplot(results, type = "b", pch = "*", lty = 1, col = c(1, 2, 2), xaxt = "n", xlab = "Wavelet Scale", ylab = "Wavelet variance"); + mtext(names[i], side = 3, line = 0.5, cex = 1); + axis(1, at = 1:j, labels = c(2 ^ (0:(j - 1))), las = 3, cex.axis = 1); + + ## get pvalues by comparison to null distribution + for (m in seq_len(length(temp))) { + print(paste("scale", m, sep = " ")); + print(paste("var", temp[m], sep = " ")); + print(paste("med", med[m], sep = " ")); + pv <- NULL; + tail <- NULL; + scale <- NULL; + scale <- 2 ^ (m - 1); + if (temp[m] >= med[m]) { + ## R tail test + print("R"); + tail <- "R"; + pv <- (length(which(null[, m] >= temp[m]))) / (length(na.exclude(null[, m]))); + } else { + if (temp[m] < med[m]) { + ## L tail test + print("L"); + tail <- "L"; + pv <- (length(which(null[, m] <= temp[m]))) / (length(na.exclude(null[, m]))); + } + } + print(pv); + out <- rbind(out, c(paste("Scale", scale, sep = "_"), format(temp[m], digits = 3), pv, tail)); + } + final_pvalue <- rbind(final_pvalue, out); + } + colnames(final_pvalue) <- title; + return(final_pvalue); +} + +## execute +## read in data +args <- commandArgs(trailingOnly = TRUE) + +data_test <- NULL; +final <- NULL; +sub <- NULL; +sub_names <- NULL; +data_test <- read.delim(args[1], header = FALSE); +pdf(file = args[5], width = 11, height = 8) +for (f in strsplit(args[2], ",")) { + f <- as.integer(f) + if (f > ncol(data_test)) + stop(paste("column", f, "doesn't exist")); + sub <- data_test[, f]; + sub_names <- colnames(data_test)[f]; + final <- rbind(final, dwt_var_permut_get_max(sub, sub_names, as.double(args[3]))); +} + +dev.off(); +write.table(final, file = args[4], sep = "\t", quote = FALSE, row.names = FALSE);