diff univariate_script.R @ 3:140290de7986 draft

planemo upload for repository https://github.com/workflow4metabolomics/univariate.git commit 27bc6157f43574f038b3fb1be1f46ce4786e24b1
author ethevenot
date Sun, 30 Oct 2016 14:17:09 -0400
parents 09799fc16bc6
children
line wrap: on
line diff
--- a/univariate_script.R	Sat Aug 06 12:42:42 2016 -0400
+++ b/univariate_script.R	Sun Oct 30 14:17:09 2016 -0400
@@ -4,16 +4,18 @@
                         facC,
                         tesC = c("ttest", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[1],
                         adjC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7],
-                        thrN = 0.05) {
+                        thrN = 0.05,
+                        pdfC) {
 
 
     ## Option
-    ##---------------
 
     strAsFacL <- options()$stringsAsFactors
     options(stingsAsFactors = FALSE)
     options(warn = -1)
 
+    ## Getting the response (either a factor or a numeric)
+
     if(mode(samDF[, facC]) == "character") {
         facFcVn <- factor(samDF[, facC])
         facLevVc <- levels(facFcVn)
@@ -24,8 +26,10 @@
 
     varPfxC <- paste0(make.names(facC), "_", tesC, "_")
 
+    
     if(tesC %in% c("ttest", "wilcoxon", "pearson", "spearman")) {
 
+        
         switch(tesC,
                ttest = {
                    staF <- function(y) diff(tapply(y, facFcVn, function(x) mean(x, na.rm = TRUE)))
@@ -46,61 +50,168 @@
 
         staVn <- apply(datMN, 2, staF)
 
-        fdrVn <- p.adjust(apply(datMN,
+        adjVn <- p.adjust(apply(datMN,
                                 2,
                                 tesF),
                           method = adjC)
 
-        sigVn <- as.numeric(fdrVn < thrN)
+        sigVn <- as.numeric(adjVn < thrN)
 
         if(tesC %in% c("ttest", "wilcoxon"))
             varPfxC <- paste0(varPfxC, paste(rev(facLevVc), collapse = "."), "_")
 
         varDF[, paste0(varPfxC, ifelse(tesC %in% c("ttest", "wilcoxon"), "dif", "cor"))] <- staVn
 
-        varDF[, paste0(varPfxC, adjC)] <- fdrVn
+        varDF[, paste0(varPfxC, adjC)] <- adjVn
 
         varDF[, paste0(varPfxC, "sig")] <- sigVn
 
+        ## graphic
+
+        pdf(pdfC, onefile = TRUE)
+
+        varVi <- which(sigVn > 0)
+
+        if(tesC %in% c("ttest", "wilcoxon")) {
+
+            facVc <- as.character(facFcVn)
+            names(facVc) <- rownames(samDF)
+
+            for(varI in varVi) {
+                
+                varC <- rownames(varDF)[varI]
+                
+                boxF(facFcVn,
+                     datMN[, varI],
+                     paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")"),
+                     facVc)
+                
+            }
+
+        } else { ## pearson or spearman
+
+            for(varI in varVi) {
+
+                varC <- rownames(varDF)[varI]
+
+                mod <- lm(datMN[, varI] ~  facFcVn)
+
+                plot(facFcVn, datMN[, varI],
+                     xlab = facC,
+                     ylab = "",
+                     pch = 18,
+                     main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ", R2 = ", signif(summary(mod)$r.squared, 2), ")"))
+            
+                abline(mod, col = "red")
+
+                }
+
+        }
+
+        dev.off()
+
+        
     } else if(tesC == "anova") {
 
+        
         ## getting the names of the pairwise comparisons 'class1Vclass2'
         prwVc <- rownames(TukeyHSD(aov(datMN[, 1] ~ facFcVn))[["facFcVn"]])
 
         prwVc <- gsub("-", ".", prwVc, fixed = TRUE) ## 2016-08-05: '-' character in dataframe column names seems not to be converted to "." by write.table on ubuntu R-3.3.1
 
+        ## omnibus and post-hoc tests
+        
         aovMN <- t(apply(datMN, 2, function(varVn) {
 
             aovMod <- aov(varVn ~ facFcVn)
             pvaN <- summary(aovMod)[[1]][1, "Pr(>F)"]
             hsdMN <- TukeyHSD(aovMod)[["facFcVn"]]
-            c(pvaN, c(hsdMN[, c("diff", "p adj")]), as.numeric(hsdMN[, "p adj"] < thrN))
+            c(pvaN, c(hsdMN[, c("diff", "p adj")]))
 
         }))
 
-        aovMN[, 1] <- p.adjust(aovMN[, 1], method = adjC)
-        sigVn <-  as.numeric(aovMN[, 1] < thrN)
-        aovMN <- cbind(aovMN[, 1], sigVn, aovMN[, 2:ncol(aovMN)])
-        ## aovMN[which(aovMN[, 2] < 1), (3 + length(prwVc)):ncol(aovMN)] <- NA
-        colnames(aovMN) <- paste0(varPfxC,
-                                  c(adjC,
-                                    "sig",
-                                    paste0(prwVc, "_dif"),
-                                    paste0(prwVc, "_pva"),
-                                    paste0(prwVc, "_sig")))
-        aovMN[which(aovMN[, paste0(varPfxC, "sig")] < 1), paste0(varPfxC, c(paste0(prwVc, "_pva"), paste0(prwVc, "_sig")))] <- NA
+        difVi <- 1:length(prwVc) + 1
+
+        ## difference of the means for each pairwise comparison
+        
+        difMN <- aovMN[, difVi]
+        colnames(difMN) <- paste0(varPfxC, prwVc, "_dif")
+
+        ## correction for multiple testing
+        
+        aovMN <- aovMN[, -difVi, drop = FALSE]
+        aovMN <- apply(aovMN, 2, function(pvaVn) p.adjust(pvaVn, method = adjC))
+
+        ## significance coding (0 = not significant, 1 = significant)
+        
+        adjVn <- aovMN[, 1]
+        sigVn <-  as.numeric(adjVn < thrN)
+
+        aovMN <- aovMN[, -1, drop = FALSE]
+        colnames(aovMN) <- paste0(varPfxC, prwVc, "_", adjC)
+
+        aovSigMN <- aovMN < thrN
+        mode(aovSigMN) <- "numeric"
+        colnames(aovSigMN) <- paste0(varPfxC, prwVc, "_sig")
+
+        ## final aggregated table
+
+        resMN <- cbind(adjVn, sigVn, difMN, aovMN, aovSigMN)
+        colnames(resMN)[1:2] <- paste0(varPfxC, c(adjC, "sig"))
+
+        varDF <- cbind.data.frame(varDF, as.data.frame(resMN))
+
+        ## graphic
 
-        varDF <- cbind.data.frame(varDF, as.data.frame(aovMN))
+        pdf(pdfC, onefile = TRUE)
+        
+        for(varI in 1:nrow(varDF)) {
+            
+            if(sum(aovSigMN[varI, ]) > 0) {
+                
+                varC <- rownames(varDF)[varI]
 
+                boxplot(datMN[, varI] ~ facFcVn,
+                        main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")"))
+                
+                for(prwI in 1:length(prwVc)) {
+                    
+                    if(aovSigMN[varI, paste0(varPfxC, prwVc[prwI], "_sig")] == 1) {
+                        
+                        claVc <- unlist(strsplit(prwVc[prwI], ".", fixed = TRUE))
+                        aovClaVl <- facFcVn %in% claVc
+                        aovFc <- facFcVn[aovClaVl, drop = TRUE]
+                        aovVc <- as.character(aovFc)
+                        names(aovVc) <- rownames(samDF)[aovClaVl]
+                        boxF(aovFc,
+                             datMN[aovClaVl, varI],
+                             paste0(varC, " (", adjC, " = ", signif(aovMN[varI, paste0(varPfxC, prwVc[prwI], "_", adjC)], 2), ")"),
+                             aovVc)
+                        
+                    }
+                       
+                }
+                
+            }
+            
+        }
+
+        dev.off()
+
+        
     } else if(tesC == "kruskal") {
+        
 
-        ## getting the names of the pairwise comparisons 'class1Vclass2'
+        ## getting the names of the pairwise comparisons 'class1.class2'
+        
         nemMN <- posthoc.kruskal.nemenyi.test(datMN[, 1], facFcVn, "Tukey")[["p.value"]]
         nemVl <- c(lower.tri(nemMN, diag = TRUE))
         nemClaMC <- cbind(rownames(nemMN)[c(row(nemMN))][nemVl],
                           colnames(nemMN)[c(col(nemMN))][nemVl])
         nemNamVc <- paste0(nemClaMC[, 1], ".", nemClaMC[, 2])
-        nemNamVc <- paste0(varPfxC, nemNamVc)
+        pfxNemVc <- paste0(varPfxC, nemNamVc)
+
+        ## omnibus and post-hoc tests
         
         nemMN <- t(apply(datMN, 2, function(varVn) {
 
@@ -109,17 +220,24 @@
             c(pvaN, c(varNemMN))
 
         }))
-        pvaVn <- nemMN[, 1]
-        fdrVn <- p.adjust(pvaVn, method = adjC)
-        sigVn <- as.numeric(fdrVn < thrN)
+
+        ## correction for multiple testing
+        
+        nemMN <- apply(nemMN, 2,
+                       function(pvaVn) p.adjust(pvaVn, method = adjC))
+        adjVn <- nemMN[, 1]
+        sigVn <- as.numeric(adjVn < thrN)
         nemMN <- nemMN[, c(FALSE, nemVl)]
-        colnames(nemMN) <- paste0(nemNamVc, "_pva")
+        colnames(nemMN) <- paste0(pfxNemVc, "_", adjC)
+
+        ## significance coding (0 = not significant, 1 = significant)
+        
         nemSigMN <- nemMN < thrN
         mode(nemSigMN) <- "numeric"
-        colnames(nemSigMN) <- paste0(nemNamVc, "_sig")
-        nemMN[sigVn < 1, ] <- NA
-        nemSigMN[sigVn < 1, ] <- NA
+        colnames(nemSigMN) <- paste0(pfxNemVc, "_sig")
 
+        ## difference of the medians for each pairwise comparison
+        
         difMN <- sapply(1:nrow(nemClaMC), function(prwI) {
             prwVc <- nemClaMC[prwI, ]
             prwVi <- which(facFcVn %in% prwVc)
@@ -128,13 +246,52 @@
         })
         colnames(difMN) <- gsub("_sig", "_dif", colnames(nemSigMN))
 
-        nemMN <- cbind(fdrVn, sigVn, difMN, nemMN, nemSigMN)
-        colnames(nemMN)[1:2] <- paste0(varPfxC, c(adjC, "sig"))
+        ## final aggregated table
+        
+        resMN <- cbind(adjVn, sigVn, difMN, nemMN, nemSigMN)
+        colnames(resMN)[1:2] <- paste0(varPfxC, c(adjC, "sig"))
+
+        varDF <- cbind.data.frame(varDF, as.data.frame(resMN))
+
+        ## graphic
+
+        pdf(pdfC, onefile = TRUE)
+        
+        for(varI in 1:nrow(varDF)) {
+            
+            if(sum(nemSigMN[varI, ]) > 0) {
+                
+                varC <- rownames(varDF)[varI]
 
-        varDF <- cbind.data.frame(varDF, as.data.frame(nemMN))
+                boxplot(datMN[, varI] ~ facFcVn,
+                        main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")"))
+                
+                for(nemI in 1:length(nemNamVc)) {
+                    
+                    if(nemSigMN[varI, paste0(varPfxC, nemNamVc[nemI], "_sig")] == 1) {
+                        
+                        nemClaVc <- nemClaMC[nemI, ]
+                        nemClaVl <- facFcVn %in% nemClaVc
+                        nemFc <- facFcVn[nemClaVl, drop = TRUE]
+                        nemVc <- as.character(nemFc)
+                        names(nemVc) <- rownames(samDF)[nemClaVl]
+                        boxF(nemFc,
+                             datMN[nemClaVl, varI],
+                             paste0(varC, " (", adjC, " = ", signif(nemMN[varI, paste0(varPfxC, nemNamVc[nemI], "_", adjC)], 2), ")"),
+                             nemVc)
+                        
+                    }
+                       
+                }
+                
+            }
+            
+        }
 
+        dev.off()
+        
     }
-
+    
     names(sigVn) <- rownames(varDF)
     sigSumN <- sum(sigVn, na.rm = TRUE)
     if(sigSumN) {
@@ -148,3 +305,28 @@
     return(varDF)
 
 }
+
+
+boxF <- function(xFc,
+                 yVn,
+                 maiC,
+                 xVc) {
+    
+    boxLs <- boxplot(yVn ~  xFc,
+                     main = maiC)
+    
+    outVn <- boxLs[["out"]]
+    
+    if(length(outVn)) {
+        
+        for(outI in 1:length(outVn)) {
+            levI <- which(levels(xFc) == xVc[names(outVn)[outI]])
+            text(levI,
+                 outVn[outI],
+                 labels = names(outVn)[outI],
+                 pos = ifelse(levI == 2, 2, 4))
+            }
+        
+    }
+    
+}