comparison univariate_script.R @ 0:ef64d3752050 draft

planemo upload for repository https://github.com/workflow4metabolomics/univariate.git commit ca0e312e1c986c45310f37effe031f60009fbcab
author ethevenot
date Wed, 27 Jul 2016 11:44:34 -0400
parents
children 09799fc16bc6
comparison
equal deleted inserted replaced
-1:000000000000 0:ef64d3752050
1 univariateF <- function(datMN,
2 samDF,
3 varDF,
4 facC,
5 tesC = c("ttest", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[1],
6 adjC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7],
7 thrN = 0.05) {
8
9
10 ## Option
11 ##---------------
12
13 strAsFacL <- options()$stringsAsFactors
14 options(stingsAsFactors = FALSE)
15 options(warn = -1)
16
17 if(mode(samDF[, facC]) == "character") {
18 facFcVn <- factor(samDF[, facC])
19 facLevVc <- levels(facFcVn)
20 } else
21 facFcVn <- samDF[, facC]
22
23 cat("\nPerforming '", tesC, "'\n", sep="")
24
25 varPfxC <- paste0(make.names(facC), "_", tesC, "_")
26
27 if(tesC %in% c("ttest", "wilcoxon", "pearson", "spearman")) {
28
29 switch(tesC,
30 ttest = {
31 staF <- function(y) diff(tapply(y, facFcVn, function(x) mean(x, na.rm = TRUE)))
32 tesF <- function(y) t.test(y ~ facFcVn)[["p.value"]]
33 },
34 wilcoxon = {
35 staF <- function(y) diff(tapply(y, facFcVn, function(x) median(x, na.rm = TRUE)))
36 tesF <- function(y) wilcox.test(y ~ facFcVn)[["p.value"]]
37 },
38 pearson = {
39 staF <- function(y) cor(facFcVn, y, method = "pearson", use = "pairwise.complete.obs")
40 tesF <- function(y) cor.test(facFcVn, y, method = "pearson", use = "pairwise.complete.obs")[["p.value"]]
41 },
42 spearman = {
43 staF <- function(y) cor(facFcVn, y, method = "spearman", use = "pairwise.complete.obs")
44 tesF <- function(y) cor.test(facFcVn, y, method = "spearman", use = "pairwise.complete.obs")[["p.value"]]
45 })
46
47 staVn <- apply(datMN, 2, staF)
48
49 fdrVn <- p.adjust(apply(datMN,
50 2,
51 tesF),
52 method = adjC)
53
54 sigVn <- as.numeric(fdrVn < thrN)
55
56 if(tesC %in% c("ttest", "wilcoxon"))
57 varPfxC <- paste0(varPfxC, paste(rev(facLevVc), collapse = "-"), "_")
58
59 varDF[, paste0(varPfxC, ifelse(tesC %in% c("ttest", "wilcoxon"), "dif", "cor"))] <- staVn
60
61 varDF[, paste0(varPfxC, adjC)] <- fdrVn
62
63 varDF[, paste0(varPfxC, "sig")] <- sigVn
64
65 } else if(tesC == "anova") {
66
67 ## getting the names of the pairwise comparisons 'class1Vclass2'
68 prwVc <- rownames(TukeyHSD(aov(datMN[, 1] ~ facFcVn))[["facFcVn"]])
69
70 aovMN <- t(apply(datMN, 2, function(varVn) {
71
72 aovMod <- aov(varVn ~ facFcVn)
73 pvaN <- summary(aovMod)[[1]][1, "Pr(>F)"]
74 hsdMN <- TukeyHSD(aovMod)[["facFcVn"]]
75 c(pvaN, c(hsdMN[, c("diff", "p adj")]), as.numeric(hsdMN[, "p adj"] < thrN))
76
77 }))
78
79 aovMN[, 1] <- p.adjust(aovMN[, 1], method = adjC)
80 sigVn <- as.numeric(aovMN[, 1] < thrN)
81 aovMN <- cbind(aovMN[, 1], sigVn, aovMN[, 2:ncol(aovMN)])
82 ## aovMN[which(aovMN[, 2] < 1), (3 + length(prwVc)):ncol(aovMN)] <- NA
83 colnames(aovMN) <- paste0(varPfxC,
84 c(adjC,
85 "sig",
86 paste0(prwVc, "_dif"),
87 paste0(prwVc, "_pva"),
88 paste0(prwVc, "_sig")))
89 aovMN[which(aovMN[, paste0(varPfxC, "sig")] < 1), paste0(varPfxC, c(paste0(prwVc, "_pva"), paste0(prwVc, "_sig")))] <- NA
90
91 varDF <- cbind.data.frame(varDF, as.data.frame(aovMN))
92
93 } else if(tesC == "kruskal") {
94
95 ## getting the names of the pairwise comparisons 'class1Vclass2'
96 nemMN <- posthoc.kruskal.nemenyi.test(datMN[, 1], facFcVn, "Tukey")[["p.value"]]
97 nemVl <- c(lower.tri(nemMN, diag = TRUE))
98 nemClaMC <- cbind(rownames(nemMN)[c(row(nemMN))][nemVl],
99 colnames(nemMN)[c(col(nemMN))][nemVl])
100 nemNamVc <- paste0(nemClaMC[, 1], "-", nemClaMC[, 2])
101 nemNamVc <- paste0(varPfxC, nemNamVc)
102
103 nemMN <- t(apply(datMN, 2, function(varVn) {
104
105 pvaN <- kruskal.test(varVn ~ facFcVn)[["p.value"]]
106 varNemMN <- posthoc.kruskal.nemenyi.test(varVn, facFcVn, "Tukey")[["p.value"]]
107 c(pvaN, c(varNemMN))
108
109 }))
110 pvaVn <- nemMN[, 1]
111 fdrVn <- p.adjust(pvaVn, method = adjC)
112 sigVn <- as.numeric(fdrVn < thrN)
113 nemMN <- nemMN[, c(FALSE, nemVl)]
114 colnames(nemMN) <- paste0(nemNamVc, "_pva")
115 nemSigMN <- nemMN < thrN
116 mode(nemSigMN) <- "numeric"
117 colnames(nemSigMN) <- paste0(nemNamVc, "_sig")
118 nemMN[sigVn < 1, ] <- NA
119 nemSigMN[sigVn < 1, ] <- NA
120
121 difMN <- sapply(1:nrow(nemClaMC), function(prwI) {
122 prwVc <- nemClaMC[prwI, ]
123 prwVi <- which(facFcVn %in% prwVc)
124 prwFacFc <- factor(as.character(facFcVn)[prwVi], levels = prwVc)
125 apply(datMN[prwVi, ], 2, function(varVn) -diff(as.numeric(tapply(varVn, prwFacFc, function(x) median(x, na.rm = TRUE)))))
126 })
127 colnames(difMN) <- gsub("_sig", "_dif", colnames(nemSigMN))
128
129 nemMN <- cbind(fdrVn, sigVn, difMN, nemMN, nemSigMN)
130 colnames(nemMN)[1:2] <- paste0(varPfxC, c(adjC, "sig"))
131
132 varDF <- cbind.data.frame(varDF, as.data.frame(nemMN))
133
134 }
135
136 names(sigVn) <- rownames(varDF)
137 sigSumN <- sum(sigVn, na.rm = TRUE)
138 if(sigSumN) {
139 cat("\nThe following ", sigSumN, " variable", ifelse(sigSumN > 1, "s", ""), " (", round(sigSumN / length(sigVn) * 100), "%) ", ifelse(sigSumN > 1, "were", "was"), " found significant at the ", thrN, " level:\n", sep = "")
140 cat(paste(rownames(varDF)[sigVn > 0], collapse = "\n"), "\n", sep = "")
141 } else
142 cat("\nNo significant variable found at the selected ", thrN, " level\n", sep = "")
143
144 options(stingsAsFactors = strAsFacL)
145
146 return(varDF)
147
148 }