Mercurial > repos > ethevenot > univariate
comparison univariate_script.R @ 3:140290de7986 draft
planemo upload for repository https://github.com/workflow4metabolomics/univariate.git commit 27bc6157f43574f038b3fb1be1f46ce4786e24b1
author | ethevenot |
---|---|
date | Sun, 30 Oct 2016 14:17:09 -0400 |
parents | 09799fc16bc6 |
children |
comparison
equal
deleted
inserted
replaced
2:09799fc16bc6 | 3:140290de7986 |
---|---|
2 samDF, | 2 samDF, |
3 varDF, | 3 varDF, |
4 facC, | 4 facC, |
5 tesC = c("ttest", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[1], | 5 tesC = c("ttest", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[1], |
6 adjC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7], | 6 adjC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7], |
7 thrN = 0.05) { | 7 thrN = 0.05, |
8 pdfC) { | |
8 | 9 |
9 | 10 |
10 ## Option | 11 ## Option |
11 ##--------------- | |
12 | 12 |
13 strAsFacL <- options()$stringsAsFactors | 13 strAsFacL <- options()$stringsAsFactors |
14 options(stingsAsFactors = FALSE) | 14 options(stingsAsFactors = FALSE) |
15 options(warn = -1) | 15 options(warn = -1) |
16 | |
17 ## Getting the response (either a factor or a numeric) | |
16 | 18 |
17 if(mode(samDF[, facC]) == "character") { | 19 if(mode(samDF[, facC]) == "character") { |
18 facFcVn <- factor(samDF[, facC]) | 20 facFcVn <- factor(samDF[, facC]) |
19 facLevVc <- levels(facFcVn) | 21 facLevVc <- levels(facFcVn) |
20 } else | 22 } else |
22 | 24 |
23 cat("\nPerforming '", tesC, "'\n", sep="") | 25 cat("\nPerforming '", tesC, "'\n", sep="") |
24 | 26 |
25 varPfxC <- paste0(make.names(facC), "_", tesC, "_") | 27 varPfxC <- paste0(make.names(facC), "_", tesC, "_") |
26 | 28 |
29 | |
27 if(tesC %in% c("ttest", "wilcoxon", "pearson", "spearman")) { | 30 if(tesC %in% c("ttest", "wilcoxon", "pearson", "spearman")) { |
28 | 31 |
32 | |
29 switch(tesC, | 33 switch(tesC, |
30 ttest = { | 34 ttest = { |
31 staF <- function(y) diff(tapply(y, facFcVn, function(x) mean(x, na.rm = TRUE))) | 35 staF <- function(y) diff(tapply(y, facFcVn, function(x) mean(x, na.rm = TRUE))) |
32 tesF <- function(y) t.test(y ~ facFcVn)[["p.value"]] | 36 tesF <- function(y) t.test(y ~ facFcVn)[["p.value"]] |
33 }, | 37 }, |
44 tesF <- function(y) cor.test(facFcVn, y, method = "spearman", use = "pairwise.complete.obs")[["p.value"]] | 48 tesF <- function(y) cor.test(facFcVn, y, method = "spearman", use = "pairwise.complete.obs")[["p.value"]] |
45 }) | 49 }) |
46 | 50 |
47 staVn <- apply(datMN, 2, staF) | 51 staVn <- apply(datMN, 2, staF) |
48 | 52 |
49 fdrVn <- p.adjust(apply(datMN, | 53 adjVn <- p.adjust(apply(datMN, |
50 2, | 54 2, |
51 tesF), | 55 tesF), |
52 method = adjC) | 56 method = adjC) |
53 | 57 |
54 sigVn <- as.numeric(fdrVn < thrN) | 58 sigVn <- as.numeric(adjVn < thrN) |
55 | 59 |
56 if(tesC %in% c("ttest", "wilcoxon")) | 60 if(tesC %in% c("ttest", "wilcoxon")) |
57 varPfxC <- paste0(varPfxC, paste(rev(facLevVc), collapse = "."), "_") | 61 varPfxC <- paste0(varPfxC, paste(rev(facLevVc), collapse = "."), "_") |
58 | 62 |
59 varDF[, paste0(varPfxC, ifelse(tesC %in% c("ttest", "wilcoxon"), "dif", "cor"))] <- staVn | 63 varDF[, paste0(varPfxC, ifelse(tesC %in% c("ttest", "wilcoxon"), "dif", "cor"))] <- staVn |
60 | 64 |
61 varDF[, paste0(varPfxC, adjC)] <- fdrVn | 65 varDF[, paste0(varPfxC, adjC)] <- adjVn |
62 | 66 |
63 varDF[, paste0(varPfxC, "sig")] <- sigVn | 67 varDF[, paste0(varPfxC, "sig")] <- sigVn |
64 | 68 |
69 ## graphic | |
70 | |
71 pdf(pdfC, onefile = TRUE) | |
72 | |
73 varVi <- which(sigVn > 0) | |
74 | |
75 if(tesC %in% c("ttest", "wilcoxon")) { | |
76 | |
77 facVc <- as.character(facFcVn) | |
78 names(facVc) <- rownames(samDF) | |
79 | |
80 for(varI in varVi) { | |
81 | |
82 varC <- rownames(varDF)[varI] | |
83 | |
84 boxF(facFcVn, | |
85 datMN[, varI], | |
86 paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")"), | |
87 facVc) | |
88 | |
89 } | |
90 | |
91 } else { ## pearson or spearman | |
92 | |
93 for(varI in varVi) { | |
94 | |
95 varC <- rownames(varDF)[varI] | |
96 | |
97 mod <- lm(datMN[, varI] ~ facFcVn) | |
98 | |
99 plot(facFcVn, datMN[, varI], | |
100 xlab = facC, | |
101 ylab = "", | |
102 pch = 18, | |
103 main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ", R2 = ", signif(summary(mod)$r.squared, 2), ")")) | |
104 | |
105 abline(mod, col = "red") | |
106 | |
107 } | |
108 | |
109 } | |
110 | |
111 dev.off() | |
112 | |
113 | |
65 } else if(tesC == "anova") { | 114 } else if(tesC == "anova") { |
66 | 115 |
116 | |
67 ## getting the names of the pairwise comparisons 'class1Vclass2' | 117 ## getting the names of the pairwise comparisons 'class1Vclass2' |
68 prwVc <- rownames(TukeyHSD(aov(datMN[, 1] ~ facFcVn))[["facFcVn"]]) | 118 prwVc <- rownames(TukeyHSD(aov(datMN[, 1] ~ facFcVn))[["facFcVn"]]) |
69 | 119 |
70 prwVc <- gsub("-", ".", prwVc, fixed = TRUE) ## 2016-08-05: '-' character in dataframe column names seems not to be converted to "." by write.table on ubuntu R-3.3.1 | 120 prwVc <- gsub("-", ".", prwVc, fixed = TRUE) ## 2016-08-05: '-' character in dataframe column names seems not to be converted to "." by write.table on ubuntu R-3.3.1 |
71 | 121 |
122 ## omnibus and post-hoc tests | |
123 | |
72 aovMN <- t(apply(datMN, 2, function(varVn) { | 124 aovMN <- t(apply(datMN, 2, function(varVn) { |
73 | 125 |
74 aovMod <- aov(varVn ~ facFcVn) | 126 aovMod <- aov(varVn ~ facFcVn) |
75 pvaN <- summary(aovMod)[[1]][1, "Pr(>F)"] | 127 pvaN <- summary(aovMod)[[1]][1, "Pr(>F)"] |
76 hsdMN <- TukeyHSD(aovMod)[["facFcVn"]] | 128 hsdMN <- TukeyHSD(aovMod)[["facFcVn"]] |
77 c(pvaN, c(hsdMN[, c("diff", "p adj")]), as.numeric(hsdMN[, "p adj"] < thrN)) | 129 c(pvaN, c(hsdMN[, c("diff", "p adj")])) |
78 | 130 |
79 })) | 131 })) |
80 | 132 |
81 aovMN[, 1] <- p.adjust(aovMN[, 1], method = adjC) | 133 difVi <- 1:length(prwVc) + 1 |
82 sigVn <- as.numeric(aovMN[, 1] < thrN) | 134 |
83 aovMN <- cbind(aovMN[, 1], sigVn, aovMN[, 2:ncol(aovMN)]) | 135 ## difference of the means for each pairwise comparison |
84 ## aovMN[which(aovMN[, 2] < 1), (3 + length(prwVc)):ncol(aovMN)] <- NA | 136 |
85 colnames(aovMN) <- paste0(varPfxC, | 137 difMN <- aovMN[, difVi] |
86 c(adjC, | 138 colnames(difMN) <- paste0(varPfxC, prwVc, "_dif") |
87 "sig", | 139 |
88 paste0(prwVc, "_dif"), | 140 ## correction for multiple testing |
89 paste0(prwVc, "_pva"), | 141 |
90 paste0(prwVc, "_sig"))) | 142 aovMN <- aovMN[, -difVi, drop = FALSE] |
91 aovMN[which(aovMN[, paste0(varPfxC, "sig")] < 1), paste0(varPfxC, c(paste0(prwVc, "_pva"), paste0(prwVc, "_sig")))] <- NA | 143 aovMN <- apply(aovMN, 2, function(pvaVn) p.adjust(pvaVn, method = adjC)) |
92 | 144 |
93 varDF <- cbind.data.frame(varDF, as.data.frame(aovMN)) | 145 ## significance coding (0 = not significant, 1 = significant) |
94 | 146 |
147 adjVn <- aovMN[, 1] | |
148 sigVn <- as.numeric(adjVn < thrN) | |
149 | |
150 aovMN <- aovMN[, -1, drop = FALSE] | |
151 colnames(aovMN) <- paste0(varPfxC, prwVc, "_", adjC) | |
152 | |
153 aovSigMN <- aovMN < thrN | |
154 mode(aovSigMN) <- "numeric" | |
155 colnames(aovSigMN) <- paste0(varPfxC, prwVc, "_sig") | |
156 | |
157 ## final aggregated table | |
158 | |
159 resMN <- cbind(adjVn, sigVn, difMN, aovMN, aovSigMN) | |
160 colnames(resMN)[1:2] <- paste0(varPfxC, c(adjC, "sig")) | |
161 | |
162 varDF <- cbind.data.frame(varDF, as.data.frame(resMN)) | |
163 | |
164 ## graphic | |
165 | |
166 pdf(pdfC, onefile = TRUE) | |
167 | |
168 for(varI in 1:nrow(varDF)) { | |
169 | |
170 if(sum(aovSigMN[varI, ]) > 0) { | |
171 | |
172 varC <- rownames(varDF)[varI] | |
173 | |
174 boxplot(datMN[, varI] ~ facFcVn, | |
175 main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")")) | |
176 | |
177 for(prwI in 1:length(prwVc)) { | |
178 | |
179 if(aovSigMN[varI, paste0(varPfxC, prwVc[prwI], "_sig")] == 1) { | |
180 | |
181 claVc <- unlist(strsplit(prwVc[prwI], ".", fixed = TRUE)) | |
182 aovClaVl <- facFcVn %in% claVc | |
183 aovFc <- facFcVn[aovClaVl, drop = TRUE] | |
184 aovVc <- as.character(aovFc) | |
185 names(aovVc) <- rownames(samDF)[aovClaVl] | |
186 boxF(aovFc, | |
187 datMN[aovClaVl, varI], | |
188 paste0(varC, " (", adjC, " = ", signif(aovMN[varI, paste0(varPfxC, prwVc[prwI], "_", adjC)], 2), ")"), | |
189 aovVc) | |
190 | |
191 } | |
192 | |
193 } | |
194 | |
195 } | |
196 | |
197 } | |
198 | |
199 dev.off() | |
200 | |
201 | |
95 } else if(tesC == "kruskal") { | 202 } else if(tesC == "kruskal") { |
96 | 203 |
97 ## getting the names of the pairwise comparisons 'class1Vclass2' | 204 |
205 ## getting the names of the pairwise comparisons 'class1.class2' | |
206 | |
98 nemMN <- posthoc.kruskal.nemenyi.test(datMN[, 1], facFcVn, "Tukey")[["p.value"]] | 207 nemMN <- posthoc.kruskal.nemenyi.test(datMN[, 1], facFcVn, "Tukey")[["p.value"]] |
99 nemVl <- c(lower.tri(nemMN, diag = TRUE)) | 208 nemVl <- c(lower.tri(nemMN, diag = TRUE)) |
100 nemClaMC <- cbind(rownames(nemMN)[c(row(nemMN))][nemVl], | 209 nemClaMC <- cbind(rownames(nemMN)[c(row(nemMN))][nemVl], |
101 colnames(nemMN)[c(col(nemMN))][nemVl]) | 210 colnames(nemMN)[c(col(nemMN))][nemVl]) |
102 nemNamVc <- paste0(nemClaMC[, 1], ".", nemClaMC[, 2]) | 211 nemNamVc <- paste0(nemClaMC[, 1], ".", nemClaMC[, 2]) |
103 nemNamVc <- paste0(varPfxC, nemNamVc) | 212 pfxNemVc <- paste0(varPfxC, nemNamVc) |
213 | |
214 ## omnibus and post-hoc tests | |
104 | 215 |
105 nemMN <- t(apply(datMN, 2, function(varVn) { | 216 nemMN <- t(apply(datMN, 2, function(varVn) { |
106 | 217 |
107 pvaN <- kruskal.test(varVn ~ facFcVn)[["p.value"]] | 218 pvaN <- kruskal.test(varVn ~ facFcVn)[["p.value"]] |
108 varNemMN <- posthoc.kruskal.nemenyi.test(varVn, facFcVn, "Tukey")[["p.value"]] | 219 varNemMN <- posthoc.kruskal.nemenyi.test(varVn, facFcVn, "Tukey")[["p.value"]] |
109 c(pvaN, c(varNemMN)) | 220 c(pvaN, c(varNemMN)) |
110 | 221 |
111 })) | 222 })) |
112 pvaVn <- nemMN[, 1] | 223 |
113 fdrVn <- p.adjust(pvaVn, method = adjC) | 224 ## correction for multiple testing |
114 sigVn <- as.numeric(fdrVn < thrN) | 225 |
226 nemMN <- apply(nemMN, 2, | |
227 function(pvaVn) p.adjust(pvaVn, method = adjC)) | |
228 adjVn <- nemMN[, 1] | |
229 sigVn <- as.numeric(adjVn < thrN) | |
115 nemMN <- nemMN[, c(FALSE, nemVl)] | 230 nemMN <- nemMN[, c(FALSE, nemVl)] |
116 colnames(nemMN) <- paste0(nemNamVc, "_pva") | 231 colnames(nemMN) <- paste0(pfxNemVc, "_", adjC) |
232 | |
233 ## significance coding (0 = not significant, 1 = significant) | |
234 | |
117 nemSigMN <- nemMN < thrN | 235 nemSigMN <- nemMN < thrN |
118 mode(nemSigMN) <- "numeric" | 236 mode(nemSigMN) <- "numeric" |
119 colnames(nemSigMN) <- paste0(nemNamVc, "_sig") | 237 colnames(nemSigMN) <- paste0(pfxNemVc, "_sig") |
120 nemMN[sigVn < 1, ] <- NA | 238 |
121 nemSigMN[sigVn < 1, ] <- NA | 239 ## difference of the medians for each pairwise comparison |
122 | 240 |
123 difMN <- sapply(1:nrow(nemClaMC), function(prwI) { | 241 difMN <- sapply(1:nrow(nemClaMC), function(prwI) { |
124 prwVc <- nemClaMC[prwI, ] | 242 prwVc <- nemClaMC[prwI, ] |
125 prwVi <- which(facFcVn %in% prwVc) | 243 prwVi <- which(facFcVn %in% prwVc) |
126 prwFacFc <- factor(as.character(facFcVn)[prwVi], levels = prwVc) | 244 prwFacFc <- factor(as.character(facFcVn)[prwVi], levels = prwVc) |
127 apply(datMN[prwVi, ], 2, function(varVn) -diff(as.numeric(tapply(varVn, prwFacFc, function(x) median(x, na.rm = TRUE))))) | 245 apply(datMN[prwVi, ], 2, function(varVn) -diff(as.numeric(tapply(varVn, prwFacFc, function(x) median(x, na.rm = TRUE))))) |
128 }) | 246 }) |
129 colnames(difMN) <- gsub("_sig", "_dif", colnames(nemSigMN)) | 247 colnames(difMN) <- gsub("_sig", "_dif", colnames(nemSigMN)) |
130 | 248 |
131 nemMN <- cbind(fdrVn, sigVn, difMN, nemMN, nemSigMN) | 249 ## final aggregated table |
132 colnames(nemMN)[1:2] <- paste0(varPfxC, c(adjC, "sig")) | 250 |
133 | 251 resMN <- cbind(adjVn, sigVn, difMN, nemMN, nemSigMN) |
134 varDF <- cbind.data.frame(varDF, as.data.frame(nemMN)) | 252 colnames(resMN)[1:2] <- paste0(varPfxC, c(adjC, "sig")) |
135 | 253 |
254 varDF <- cbind.data.frame(varDF, as.data.frame(resMN)) | |
255 | |
256 ## graphic | |
257 | |
258 pdf(pdfC, onefile = TRUE) | |
259 | |
260 for(varI in 1:nrow(varDF)) { | |
261 | |
262 if(sum(nemSigMN[varI, ]) > 0) { | |
263 | |
264 varC <- rownames(varDF)[varI] | |
265 | |
266 boxplot(datMN[, varI] ~ facFcVn, | |
267 main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")")) | |
268 | |
269 for(nemI in 1:length(nemNamVc)) { | |
270 | |
271 if(nemSigMN[varI, paste0(varPfxC, nemNamVc[nemI], "_sig")] == 1) { | |
272 | |
273 nemClaVc <- nemClaMC[nemI, ] | |
274 nemClaVl <- facFcVn %in% nemClaVc | |
275 nemFc <- facFcVn[nemClaVl, drop = TRUE] | |
276 nemVc <- as.character(nemFc) | |
277 names(nemVc) <- rownames(samDF)[nemClaVl] | |
278 boxF(nemFc, | |
279 datMN[nemClaVl, varI], | |
280 paste0(varC, " (", adjC, " = ", signif(nemMN[varI, paste0(varPfxC, nemNamVc[nemI], "_", adjC)], 2), ")"), | |
281 nemVc) | |
282 | |
283 } | |
284 | |
285 } | |
286 | |
287 } | |
288 | |
289 } | |
290 | |
291 dev.off() | |
292 | |
136 } | 293 } |
137 | 294 |
138 names(sigVn) <- rownames(varDF) | 295 names(sigVn) <- rownames(varDF) |
139 sigSumN <- sum(sigVn, na.rm = TRUE) | 296 sigSumN <- sum(sigVn, na.rm = TRUE) |
140 if(sigSumN) { | 297 if(sigSumN) { |
141 cat("\nThe following ", sigSumN, " variable", ifelse(sigSumN > 1, "s", ""), " (", round(sigSumN / length(sigVn) * 100), "%) ", ifelse(sigSumN > 1, "were", "was"), " found significant at the ", thrN, " level:\n", sep = "") | 298 cat("\nThe following ", sigSumN, " variable", ifelse(sigSumN > 1, "s", ""), " (", round(sigSumN / length(sigVn) * 100), "%) ", ifelse(sigSumN > 1, "were", "was"), " found significant at the ", thrN, " level:\n", sep = "") |
142 cat(paste(rownames(varDF)[sigVn > 0], collapse = "\n"), "\n", sep = "") | 299 cat(paste(rownames(varDF)[sigVn > 0], collapse = "\n"), "\n", sep = "") |
146 options(stingsAsFactors = strAsFacL) | 303 options(stingsAsFactors = strAsFacL) |
147 | 304 |
148 return(varDF) | 305 return(varDF) |
149 | 306 |
150 } | 307 } |
308 | |
309 | |
310 boxF <- function(xFc, | |
311 yVn, | |
312 maiC, | |
313 xVc) { | |
314 | |
315 boxLs <- boxplot(yVn ~ xFc, | |
316 main = maiC) | |
317 | |
318 outVn <- boxLs[["out"]] | |
319 | |
320 if(length(outVn)) { | |
321 | |
322 for(outI in 1:length(outVn)) { | |
323 levI <- which(levels(xFc) == xVc[names(outVn)[outI]]) | |
324 text(levI, | |
325 outVn[outI], | |
326 labels = names(outVn)[outI], | |
327 pos = ifelse(levI == 2, 2, 4)) | |
328 } | |
329 | |
330 } | |
331 | |
332 } |