view univariate_script.R @ 0:ef64d3752050 draft

planemo upload for repository https://github.com/workflow4metabolomics/univariate.git commit ca0e312e1c986c45310f37effe031f60009fbcab
author ethevenot
date Wed, 27 Jul 2016 11:44:34 -0400
parents
children 09799fc16bc6
line wrap: on
line source

univariateF <- function(datMN,
                        samDF,
                        varDF,
                        facC,
                        tesC = c("ttest", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[1],
                        adjC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7],
                        thrN = 0.05) {


    ## Option
    ##---------------

    strAsFacL <- options()$stringsAsFactors
    options(stingsAsFactors = FALSE)
    options(warn = -1)

    if(mode(samDF[, facC]) == "character") {
        facFcVn <- factor(samDF[, facC])
        facLevVc <- levels(facFcVn)
    } else
        facFcVn <- samDF[, facC]

    cat("\nPerforming '", tesC, "'\n", sep="")

    varPfxC <- paste0(make.names(facC), "_", tesC, "_")

    if(tesC %in% c("ttest", "wilcoxon", "pearson", "spearman")) {

        switch(tesC,
               ttest = {
                   staF <- function(y) diff(tapply(y, facFcVn, function(x) mean(x, na.rm = TRUE)))
                   tesF <- function(y) t.test(y ~ facFcVn)[["p.value"]]
               },
               wilcoxon = {
                   staF <- function(y) diff(tapply(y, facFcVn, function(x) median(x, na.rm = TRUE)))
                   tesF <- function(y) wilcox.test(y ~ facFcVn)[["p.value"]]
               },
               pearson = {
                   staF <- function(y) cor(facFcVn, y, method = "pearson", use = "pairwise.complete.obs")
                   tesF <- function(y) cor.test(facFcVn, y, method = "pearson", use = "pairwise.complete.obs")[["p.value"]]
               },
               spearman = {
                   staF <- function(y) cor(facFcVn, y, method = "spearman", use = "pairwise.complete.obs")
                   tesF <- function(y) cor.test(facFcVn, y, method = "spearman", use = "pairwise.complete.obs")[["p.value"]]
               })

        staVn <- apply(datMN, 2, staF)

        fdrVn <- p.adjust(apply(datMN,
                                2,
                                tesF),
                          method = adjC)

        sigVn <- as.numeric(fdrVn < thrN)

        if(tesC %in% c("ttest", "wilcoxon"))
            varPfxC <- paste0(varPfxC, paste(rev(facLevVc), collapse = "-"), "_")

        varDF[, paste0(varPfxC, ifelse(tesC %in% c("ttest", "wilcoxon"), "dif", "cor"))] <- staVn

        varDF[, paste0(varPfxC, adjC)] <- fdrVn

        varDF[, paste0(varPfxC, "sig")] <- sigVn

    } else if(tesC == "anova") {

        ## getting the names of the pairwise comparisons 'class1Vclass2'
        prwVc <- rownames(TukeyHSD(aov(datMN[, 1] ~ facFcVn))[["facFcVn"]])

        aovMN <- t(apply(datMN, 2, function(varVn) {

            aovMod <- aov(varVn ~ facFcVn)
            pvaN <- summary(aovMod)[[1]][1, "Pr(>F)"]
            hsdMN <- TukeyHSD(aovMod)[["facFcVn"]]
            c(pvaN, c(hsdMN[, c("diff", "p adj")]), as.numeric(hsdMN[, "p adj"] < thrN))

        }))

        aovMN[, 1] <- p.adjust(aovMN[, 1], method = adjC)
        sigVn <-  as.numeric(aovMN[, 1] < thrN)
        aovMN <- cbind(aovMN[, 1], sigVn, aovMN[, 2:ncol(aovMN)])
        ## aovMN[which(aovMN[, 2] < 1), (3 + length(prwVc)):ncol(aovMN)] <- NA
        colnames(aovMN) <- paste0(varPfxC,
                                  c(adjC,
                                    "sig",
                                    paste0(prwVc, "_dif"),
                                    paste0(prwVc, "_pva"),
                                    paste0(prwVc, "_sig")))
        aovMN[which(aovMN[, paste0(varPfxC, "sig")] < 1), paste0(varPfxC, c(paste0(prwVc, "_pva"), paste0(prwVc, "_sig")))] <- NA

        varDF <- cbind.data.frame(varDF, as.data.frame(aovMN))

    } else if(tesC == "kruskal") {

        ## getting the names of the pairwise comparisons 'class1Vclass2'
        nemMN <- posthoc.kruskal.nemenyi.test(datMN[, 1], facFcVn, "Tukey")[["p.value"]]
        nemVl <- c(lower.tri(nemMN, diag = TRUE))
        nemClaMC <- cbind(rownames(nemMN)[c(row(nemMN))][nemVl],
                          colnames(nemMN)[c(col(nemMN))][nemVl])
        nemNamVc <- paste0(nemClaMC[, 1], "-", nemClaMC[, 2])
        nemNamVc <- paste0(varPfxC, nemNamVc)

        nemMN <- t(apply(datMN, 2, function(varVn) {

            pvaN <- kruskal.test(varVn ~ facFcVn)[["p.value"]]
            varNemMN <- posthoc.kruskal.nemenyi.test(varVn, facFcVn, "Tukey")[["p.value"]]
            c(pvaN, c(varNemMN))

        }))
        pvaVn <- nemMN[, 1]
        fdrVn <- p.adjust(pvaVn, method = adjC)
        sigVn <- as.numeric(fdrVn < thrN)
        nemMN <- nemMN[, c(FALSE, nemVl)]
        colnames(nemMN) <- paste0(nemNamVc, "_pva")
        nemSigMN <- nemMN < thrN
        mode(nemSigMN) <- "numeric"
        colnames(nemSigMN) <- paste0(nemNamVc, "_sig")
        nemMN[sigVn < 1, ] <- NA
        nemSigMN[sigVn < 1, ] <- NA

        difMN <- sapply(1:nrow(nemClaMC), function(prwI) {
            prwVc <- nemClaMC[prwI, ]
            prwVi <- which(facFcVn %in% prwVc)
            prwFacFc <- factor(as.character(facFcVn)[prwVi], levels = prwVc)
            apply(datMN[prwVi, ], 2, function(varVn) -diff(as.numeric(tapply(varVn, prwFacFc, function(x) median(x, na.rm = TRUE)))))
        })
        colnames(difMN) <- gsub("_sig", "_dif", colnames(nemSigMN))

        nemMN <- cbind(fdrVn, sigVn, difMN, nemMN, nemSigMN)
        colnames(nemMN)[1:2] <- paste0(varPfxC, c(adjC, "sig"))

        varDF <- cbind.data.frame(varDF, as.data.frame(nemMN))

    }

    names(sigVn) <- rownames(varDF)
    sigSumN <- sum(sigVn, na.rm = TRUE)
    if(sigSumN) {
        cat("\nThe following ", sigSumN, " variable", ifelse(sigSumN > 1, "s", ""), " (", round(sigSumN / length(sigVn) * 100), "%) ", ifelse(sigSumN > 1, "were", "was"), " found significant at the ", thrN, " level:\n", sep = "")
        cat(paste(rownames(varDF)[sigVn > 0], collapse = "\n"), "\n", sep = "")
    } else
        cat("\nNo significant variable found at the selected ", thrN, " level\n", sep = "")

    options(stingsAsFactors = strAsFacL)

    return(varDF)

}