Mercurial > repos > ethevenot > univariate
diff univariate_script.R @ 0:ef64d3752050 draft
planemo upload for repository https://github.com/workflow4metabolomics/univariate.git commit ca0e312e1c986c45310f37effe031f60009fbcab
author | ethevenot |
---|---|
date | Wed, 27 Jul 2016 11:44:34 -0400 |
parents | |
children | 09799fc16bc6 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/univariate_script.R Wed Jul 27 11:44:34 2016 -0400 @@ -0,0 +1,148 @@ +univariateF <- function(datMN, + samDF, + varDF, + facC, + tesC = c("ttest", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[1], + adjC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7], + thrN = 0.05) { + + + ## Option + ##--------------- + + strAsFacL <- options()$stringsAsFactors + options(stingsAsFactors = FALSE) + options(warn = -1) + + if(mode(samDF[, facC]) == "character") { + facFcVn <- factor(samDF[, facC]) + facLevVc <- levels(facFcVn) + } else + facFcVn <- samDF[, facC] + + cat("\nPerforming '", tesC, "'\n", sep="") + + varPfxC <- paste0(make.names(facC), "_", tesC, "_") + + if(tesC %in% c("ttest", "wilcoxon", "pearson", "spearman")) { + + switch(tesC, + ttest = { + staF <- function(y) diff(tapply(y, facFcVn, function(x) mean(x, na.rm = TRUE))) + tesF <- function(y) t.test(y ~ facFcVn)[["p.value"]] + }, + wilcoxon = { + staF <- function(y) diff(tapply(y, facFcVn, function(x) median(x, na.rm = TRUE))) + tesF <- function(y) wilcox.test(y ~ facFcVn)[["p.value"]] + }, + pearson = { + staF <- function(y) cor(facFcVn, y, method = "pearson", use = "pairwise.complete.obs") + tesF <- function(y) cor.test(facFcVn, y, method = "pearson", use = "pairwise.complete.obs")[["p.value"]] + }, + spearman = { + staF <- function(y) cor(facFcVn, y, method = "spearman", use = "pairwise.complete.obs") + tesF <- function(y) cor.test(facFcVn, y, method = "spearman", use = "pairwise.complete.obs")[["p.value"]] + }) + + staVn <- apply(datMN, 2, staF) + + fdrVn <- p.adjust(apply(datMN, + 2, + tesF), + method = adjC) + + sigVn <- as.numeric(fdrVn < thrN) + + if(tesC %in% c("ttest", "wilcoxon")) + varPfxC <- paste0(varPfxC, paste(rev(facLevVc), collapse = "-"), "_") + + varDF[, paste0(varPfxC, ifelse(tesC %in% c("ttest", "wilcoxon"), "dif", "cor"))] <- staVn + + varDF[, paste0(varPfxC, adjC)] <- fdrVn + + varDF[, paste0(varPfxC, "sig")] <- sigVn + + } else if(tesC == "anova") { + + ## getting the names of the pairwise comparisons 'class1Vclass2' + prwVc <- rownames(TukeyHSD(aov(datMN[, 1] ~ facFcVn))[["facFcVn"]]) + + aovMN <- t(apply(datMN, 2, function(varVn) { + + aovMod <- aov(varVn ~ facFcVn) + pvaN <- summary(aovMod)[[1]][1, "Pr(>F)"] + hsdMN <- TukeyHSD(aovMod)[["facFcVn"]] + c(pvaN, c(hsdMN[, c("diff", "p adj")]), as.numeric(hsdMN[, "p adj"] < thrN)) + + })) + + aovMN[, 1] <- p.adjust(aovMN[, 1], method = adjC) + sigVn <- as.numeric(aovMN[, 1] < thrN) + aovMN <- cbind(aovMN[, 1], sigVn, aovMN[, 2:ncol(aovMN)]) + ## aovMN[which(aovMN[, 2] < 1), (3 + length(prwVc)):ncol(aovMN)] <- NA + colnames(aovMN) <- paste0(varPfxC, + c(adjC, + "sig", + paste0(prwVc, "_dif"), + paste0(prwVc, "_pva"), + paste0(prwVc, "_sig"))) + aovMN[which(aovMN[, paste0(varPfxC, "sig")] < 1), paste0(varPfxC, c(paste0(prwVc, "_pva"), paste0(prwVc, "_sig")))] <- NA + + varDF <- cbind.data.frame(varDF, as.data.frame(aovMN)) + + } else if(tesC == "kruskal") { + + ## getting the names of the pairwise comparisons 'class1Vclass2' + nemMN <- posthoc.kruskal.nemenyi.test(datMN[, 1], facFcVn, "Tukey")[["p.value"]] + nemVl <- c(lower.tri(nemMN, diag = TRUE)) + nemClaMC <- cbind(rownames(nemMN)[c(row(nemMN))][nemVl], + colnames(nemMN)[c(col(nemMN))][nemVl]) + nemNamVc <- paste0(nemClaMC[, 1], "-", nemClaMC[, 2]) + nemNamVc <- paste0(varPfxC, nemNamVc) + + nemMN <- t(apply(datMN, 2, function(varVn) { + + pvaN <- kruskal.test(varVn ~ facFcVn)[["p.value"]] + varNemMN <- posthoc.kruskal.nemenyi.test(varVn, facFcVn, "Tukey")[["p.value"]] + c(pvaN, c(varNemMN)) + + })) + pvaVn <- nemMN[, 1] + fdrVn <- p.adjust(pvaVn, method = adjC) + sigVn <- as.numeric(fdrVn < thrN) + nemMN <- nemMN[, c(FALSE, nemVl)] + colnames(nemMN) <- paste0(nemNamVc, "_pva") + nemSigMN <- nemMN < thrN + mode(nemSigMN) <- "numeric" + colnames(nemSigMN) <- paste0(nemNamVc, "_sig") + nemMN[sigVn < 1, ] <- NA + nemSigMN[sigVn < 1, ] <- NA + + difMN <- sapply(1:nrow(nemClaMC), function(prwI) { + prwVc <- nemClaMC[prwI, ] + prwVi <- which(facFcVn %in% prwVc) + prwFacFc <- factor(as.character(facFcVn)[prwVi], levels = prwVc) + apply(datMN[prwVi, ], 2, function(varVn) -diff(as.numeric(tapply(varVn, prwFacFc, function(x) median(x, na.rm = TRUE))))) + }) + colnames(difMN) <- gsub("_sig", "_dif", colnames(nemSigMN)) + + nemMN <- cbind(fdrVn, sigVn, difMN, nemMN, nemSigMN) + colnames(nemMN)[1:2] <- paste0(varPfxC, c(adjC, "sig")) + + varDF <- cbind.data.frame(varDF, as.data.frame(nemMN)) + + } + + names(sigVn) <- rownames(varDF) + sigSumN <- sum(sigVn, na.rm = TRUE) + if(sigSumN) { + cat("\nThe following ", sigSumN, " variable", ifelse(sigSumN > 1, "s", ""), " (", round(sigSumN / length(sigVn) * 100), "%) ", ifelse(sigSumN > 1, "were", "was"), " found significant at the ", thrN, " level:\n", sep = "") + cat(paste(rownames(varDF)[sigVn > 0], collapse = "\n"), "\n", sep = "") + } else + cat("\nNo significant variable found at the selected ", thrN, " level\n", sep = "") + + options(stingsAsFactors = strAsFacL) + + return(varDF) + +}