Mercurial > repos > ethevenot > univariate
diff univariate_script.R @ 3:140290de7986 draft
planemo upload for repository https://github.com/workflow4metabolomics/univariate.git commit 27bc6157f43574f038b3fb1be1f46ce4786e24b1
author | ethevenot |
---|---|
date | Sun, 30 Oct 2016 14:17:09 -0400 |
parents | 09799fc16bc6 |
children |
line wrap: on
line diff
--- a/univariate_script.R Sat Aug 06 12:42:42 2016 -0400 +++ b/univariate_script.R Sun Oct 30 14:17:09 2016 -0400 @@ -4,16 +4,18 @@ facC, tesC = c("ttest", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[1], adjC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7], - thrN = 0.05) { + thrN = 0.05, + pdfC) { ## Option - ##--------------- strAsFacL <- options()$stringsAsFactors options(stingsAsFactors = FALSE) options(warn = -1) + ## Getting the response (either a factor or a numeric) + if(mode(samDF[, facC]) == "character") { facFcVn <- factor(samDF[, facC]) facLevVc <- levels(facFcVn) @@ -24,8 +26,10 @@ varPfxC <- paste0(make.names(facC), "_", tesC, "_") + if(tesC %in% c("ttest", "wilcoxon", "pearson", "spearman")) { + switch(tesC, ttest = { staF <- function(y) diff(tapply(y, facFcVn, function(x) mean(x, na.rm = TRUE))) @@ -46,61 +50,168 @@ staVn <- apply(datMN, 2, staF) - fdrVn <- p.adjust(apply(datMN, + adjVn <- p.adjust(apply(datMN, 2, tesF), method = adjC) - sigVn <- as.numeric(fdrVn < thrN) + sigVn <- as.numeric(adjVn < thrN) if(tesC %in% c("ttest", "wilcoxon")) varPfxC <- paste0(varPfxC, paste(rev(facLevVc), collapse = "."), "_") varDF[, paste0(varPfxC, ifelse(tesC %in% c("ttest", "wilcoxon"), "dif", "cor"))] <- staVn - varDF[, paste0(varPfxC, adjC)] <- fdrVn + varDF[, paste0(varPfxC, adjC)] <- adjVn varDF[, paste0(varPfxC, "sig")] <- sigVn + ## graphic + + pdf(pdfC, onefile = TRUE) + + varVi <- which(sigVn > 0) + + if(tesC %in% c("ttest", "wilcoxon")) { + + facVc <- as.character(facFcVn) + names(facVc) <- rownames(samDF) + + for(varI in varVi) { + + varC <- rownames(varDF)[varI] + + boxF(facFcVn, + datMN[, varI], + paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")"), + facVc) + + } + + } else { ## pearson or spearman + + for(varI in varVi) { + + varC <- rownames(varDF)[varI] + + mod <- lm(datMN[, varI] ~ facFcVn) + + plot(facFcVn, datMN[, varI], + xlab = facC, + ylab = "", + pch = 18, + main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ", R2 = ", signif(summary(mod)$r.squared, 2), ")")) + + abline(mod, col = "red") + + } + + } + + dev.off() + + } else if(tesC == "anova") { + ## getting the names of the pairwise comparisons 'class1Vclass2' prwVc <- rownames(TukeyHSD(aov(datMN[, 1] ~ facFcVn))[["facFcVn"]]) prwVc <- gsub("-", ".", prwVc, fixed = TRUE) ## 2016-08-05: '-' character in dataframe column names seems not to be converted to "." by write.table on ubuntu R-3.3.1 + ## omnibus and post-hoc tests + aovMN <- t(apply(datMN, 2, function(varVn) { aovMod <- aov(varVn ~ facFcVn) pvaN <- summary(aovMod)[[1]][1, "Pr(>F)"] hsdMN <- TukeyHSD(aovMod)[["facFcVn"]] - c(pvaN, c(hsdMN[, c("diff", "p adj")]), as.numeric(hsdMN[, "p adj"] < thrN)) + c(pvaN, c(hsdMN[, c("diff", "p adj")])) })) - aovMN[, 1] <- p.adjust(aovMN[, 1], method = adjC) - sigVn <- as.numeric(aovMN[, 1] < thrN) - aovMN <- cbind(aovMN[, 1], sigVn, aovMN[, 2:ncol(aovMN)]) - ## aovMN[which(aovMN[, 2] < 1), (3 + length(prwVc)):ncol(aovMN)] <- NA - colnames(aovMN) <- paste0(varPfxC, - c(adjC, - "sig", - paste0(prwVc, "_dif"), - paste0(prwVc, "_pva"), - paste0(prwVc, "_sig"))) - aovMN[which(aovMN[, paste0(varPfxC, "sig")] < 1), paste0(varPfxC, c(paste0(prwVc, "_pva"), paste0(prwVc, "_sig")))] <- NA + difVi <- 1:length(prwVc) + 1 + + ## difference of the means for each pairwise comparison + + difMN <- aovMN[, difVi] + colnames(difMN) <- paste0(varPfxC, prwVc, "_dif") + + ## correction for multiple testing + + aovMN <- aovMN[, -difVi, drop = FALSE] + aovMN <- apply(aovMN, 2, function(pvaVn) p.adjust(pvaVn, method = adjC)) + + ## significance coding (0 = not significant, 1 = significant) + + adjVn <- aovMN[, 1] + sigVn <- as.numeric(adjVn < thrN) + + aovMN <- aovMN[, -1, drop = FALSE] + colnames(aovMN) <- paste0(varPfxC, prwVc, "_", adjC) + + aovSigMN <- aovMN < thrN + mode(aovSigMN) <- "numeric" + colnames(aovSigMN) <- paste0(varPfxC, prwVc, "_sig") + + ## final aggregated table + + resMN <- cbind(adjVn, sigVn, difMN, aovMN, aovSigMN) + colnames(resMN)[1:2] <- paste0(varPfxC, c(adjC, "sig")) + + varDF <- cbind.data.frame(varDF, as.data.frame(resMN)) + + ## graphic - varDF <- cbind.data.frame(varDF, as.data.frame(aovMN)) + pdf(pdfC, onefile = TRUE) + + for(varI in 1:nrow(varDF)) { + + if(sum(aovSigMN[varI, ]) > 0) { + + varC <- rownames(varDF)[varI] + boxplot(datMN[, varI] ~ facFcVn, + main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")")) + + for(prwI in 1:length(prwVc)) { + + if(aovSigMN[varI, paste0(varPfxC, prwVc[prwI], "_sig")] == 1) { + + claVc <- unlist(strsplit(prwVc[prwI], ".", fixed = TRUE)) + aovClaVl <- facFcVn %in% claVc + aovFc <- facFcVn[aovClaVl, drop = TRUE] + aovVc <- as.character(aovFc) + names(aovVc) <- rownames(samDF)[aovClaVl] + boxF(aovFc, + datMN[aovClaVl, varI], + paste0(varC, " (", adjC, " = ", signif(aovMN[varI, paste0(varPfxC, prwVc[prwI], "_", adjC)], 2), ")"), + aovVc) + + } + + } + + } + + } + + dev.off() + + } else if(tesC == "kruskal") { + - ## getting the names of the pairwise comparisons 'class1Vclass2' + ## getting the names of the pairwise comparisons 'class1.class2' + nemMN <- posthoc.kruskal.nemenyi.test(datMN[, 1], facFcVn, "Tukey")[["p.value"]] nemVl <- c(lower.tri(nemMN, diag = TRUE)) nemClaMC <- cbind(rownames(nemMN)[c(row(nemMN))][nemVl], colnames(nemMN)[c(col(nemMN))][nemVl]) nemNamVc <- paste0(nemClaMC[, 1], ".", nemClaMC[, 2]) - nemNamVc <- paste0(varPfxC, nemNamVc) + pfxNemVc <- paste0(varPfxC, nemNamVc) + + ## omnibus and post-hoc tests nemMN <- t(apply(datMN, 2, function(varVn) { @@ -109,17 +220,24 @@ c(pvaN, c(varNemMN)) })) - pvaVn <- nemMN[, 1] - fdrVn <- p.adjust(pvaVn, method = adjC) - sigVn <- as.numeric(fdrVn < thrN) + + ## correction for multiple testing + + nemMN <- apply(nemMN, 2, + function(pvaVn) p.adjust(pvaVn, method = adjC)) + adjVn <- nemMN[, 1] + sigVn <- as.numeric(adjVn < thrN) nemMN <- nemMN[, c(FALSE, nemVl)] - colnames(nemMN) <- paste0(nemNamVc, "_pva") + colnames(nemMN) <- paste0(pfxNemVc, "_", adjC) + + ## significance coding (0 = not significant, 1 = significant) + nemSigMN <- nemMN < thrN mode(nemSigMN) <- "numeric" - colnames(nemSigMN) <- paste0(nemNamVc, "_sig") - nemMN[sigVn < 1, ] <- NA - nemSigMN[sigVn < 1, ] <- NA + colnames(nemSigMN) <- paste0(pfxNemVc, "_sig") + ## difference of the medians for each pairwise comparison + difMN <- sapply(1:nrow(nemClaMC), function(prwI) { prwVc <- nemClaMC[prwI, ] prwVi <- which(facFcVn %in% prwVc) @@ -128,13 +246,52 @@ }) colnames(difMN) <- gsub("_sig", "_dif", colnames(nemSigMN)) - nemMN <- cbind(fdrVn, sigVn, difMN, nemMN, nemSigMN) - colnames(nemMN)[1:2] <- paste0(varPfxC, c(adjC, "sig")) + ## final aggregated table + + resMN <- cbind(adjVn, sigVn, difMN, nemMN, nemSigMN) + colnames(resMN)[1:2] <- paste0(varPfxC, c(adjC, "sig")) + + varDF <- cbind.data.frame(varDF, as.data.frame(resMN)) + + ## graphic + + pdf(pdfC, onefile = TRUE) + + for(varI in 1:nrow(varDF)) { + + if(sum(nemSigMN[varI, ]) > 0) { + + varC <- rownames(varDF)[varI] - varDF <- cbind.data.frame(varDF, as.data.frame(nemMN)) + boxplot(datMN[, varI] ~ facFcVn, + main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")")) + + for(nemI in 1:length(nemNamVc)) { + + if(nemSigMN[varI, paste0(varPfxC, nemNamVc[nemI], "_sig")] == 1) { + + nemClaVc <- nemClaMC[nemI, ] + nemClaVl <- facFcVn %in% nemClaVc + nemFc <- facFcVn[nemClaVl, drop = TRUE] + nemVc <- as.character(nemFc) + names(nemVc) <- rownames(samDF)[nemClaVl] + boxF(nemFc, + datMN[nemClaVl, varI], + paste0(varC, " (", adjC, " = ", signif(nemMN[varI, paste0(varPfxC, nemNamVc[nemI], "_", adjC)], 2), ")"), + nemVc) + + } + + } + + } + + } + dev.off() + } - + names(sigVn) <- rownames(varDF) sigSumN <- sum(sigVn, na.rm = TRUE) if(sigSumN) { @@ -148,3 +305,28 @@ return(varDF) } + + +boxF <- function(xFc, + yVn, + maiC, + xVc) { + + boxLs <- boxplot(yVn ~ xFc, + main = maiC) + + outVn <- boxLs[["out"]] + + if(length(outVn)) { + + for(outI in 1:length(outVn)) { + levI <- which(levels(xFc) == xVc[names(outVn)[outI]]) + text(levI, + outVn[outI], + labels = names(outVn)[outI], + pos = ifelse(levI == 2, 2, 4)) + } + + } + +}