Mercurial > repos > ethevenot > multivariate
comparison multivariate_wrapper.R @ 0:fafba524dca6 draft
planemo upload for repository https://github.com/workflow4metabolomics/multivariate.git commit 6596dbd39d20ee1962d9ebdd87eec04821239760
author | ethevenot |
---|---|
date | Wed, 27 Jul 2016 11:22:56 -0400 |
parents | |
children | da272496b32d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fafba524dca6 |
---|---|
1 #!/usr/bin/env Rscript | |
2 | |
3 library(batch) ## parseCommandArgs | |
4 | |
5 ######## | |
6 # MAIN # | |
7 ######## | |
8 | |
9 argVc <- unlist(parseCommandArgs(evaluate=FALSE)) | |
10 | |
11 | |
12 #### Start_of_tested_code <- function() {} | |
13 | |
14 | |
15 ##------------------------------ | |
16 ## Initializing | |
17 ##------------------------------ | |
18 | |
19 ## options | |
20 ##-------- | |
21 | |
22 strAsFacL <- options()$stringsAsFactors | |
23 options(stringsAsFactors = FALSE) | |
24 | |
25 ## libraries | |
26 ##---------- | |
27 | |
28 suppressMessages(library(ropls)) | |
29 | |
30 if(packageVersion("ropls") < "1.4.0") | |
31 cat("\nWarning: new version of the 'ropls' package is available\n", sep="") | |
32 | |
33 ## constants | |
34 ##---------- | |
35 | |
36 modNamC <- "Multivariate" ## module name | |
37 | |
38 topEnvC <- environment() | |
39 flgC <- "\n" | |
40 | |
41 ## functions | |
42 ##---------- | |
43 | |
44 flgF <- function(tesC, | |
45 envC = topEnvC, | |
46 txtC = NA) { ## management of warning and error messages | |
47 | |
48 tesL <- eval(parse(text = tesC), envir = envC) | |
49 | |
50 if(!tesL) { | |
51 | |
52 sink(NULL) | |
53 stpTxtC <- ifelse(is.na(txtC), | |
54 paste0(tesC, " is FALSE"), | |
55 txtC) | |
56 | |
57 stop(stpTxtC, | |
58 call. = FALSE) | |
59 | |
60 } | |
61 | |
62 } ## flgF | |
63 | |
64 | |
65 ## log file | |
66 ##--------- | |
67 | |
68 sink(argVc["information"]) | |
69 | |
70 cat("\nStart of the '", modNamC, "' Galaxy module call: ", | |
71 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") | |
72 | |
73 | |
74 ## arguments | |
75 ##---------- | |
76 | |
77 xMN <- t(as.matrix(read.table(argVc["dataMatrix_in"], | |
78 check.names = FALSE, | |
79 header = TRUE, | |
80 row.names = 1, | |
81 sep = "\t"))) | |
82 | |
83 samDF <- read.table(argVc["sampleMetadata_in"], | |
84 check.names = FALSE, | |
85 header = TRUE, | |
86 row.names = 1, | |
87 sep = "\t") | |
88 flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section") | |
89 | |
90 varDF <- read.table(argVc["variableMetadata_in"], | |
91 check.names = FALSE, | |
92 header = TRUE, | |
93 row.names = 1, | |
94 sep = "\t") | |
95 flgF("identical(colnames(xMN), rownames(varDF))", txtC = "Variable names (or number) in the data matrix (first column) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section") | |
96 | |
97 flgF("argVc['respC'] == 'none' || (argVc['respC'] %in% colnames(samDF))", | |
98 txtC = paste0("Y Response argument (", argVc['respC'], ") must be either none or one of the column names (first row) of your sample metadata")) | |
99 if(argVc["respC"] != "none") { | |
100 yMCN <- matrix(samDF[, argVc['respC']], ncol = 1, dimnames = list(rownames(xMN), argVc['respC'])) | |
101 } else | |
102 yMCN <- NULL | |
103 | |
104 if(argVc["testL"] == "TRUE") { | |
105 flgF("!is.null(yMCN)", | |
106 txtC = "Predictions cannot be peformed with PCA models") | |
107 flgF("'test.' %in% colnames(samDF)", | |
108 txtC = "No 'test.' column found in the sample metadata") | |
109 flgF("identical(sort(unique(samDF[, 'test.'])), c('no', 'yes'))", | |
110 txtC = "'test.' column of sample metadata must contain both 'yes' (tested samples) and 'no' (samples to be used for model training) values, and nothing else") | |
111 flgF("identical(sort(unique(samDF[, 'test.'])), c('no', 'yes'))", | |
112 txtC = "'test.' column of sample metadata must contain both 'yes' (tested samples) and 'no' (samples to be used for model training) values, and nothing else") | |
113 flgF("!any(is.na(yMCN[samDF[, 'test.'] == 'no', ]))", | |
114 txtC = "samples for model training (i.e. 'no' value in the 'test.' column) should not contain NA in the response") | |
115 tesVl <- samDF[, "test."] == "yes" | |
116 xTesMN <- xMN[tesVl, , drop = FALSE] | |
117 xMN <- xMN[!tesVl, , drop = FALSE] | |
118 yMCN <- yMCN[!tesVl, , drop = FALSE] | |
119 } else | |
120 tesVl <- NULL | |
121 | |
122 if(!('parAsColC' %in% names(argVc))) | |
123 argVc["parAsColC"] <- "none" | |
124 flgF("argVc['parAsColC'] == 'none' || argVc['parAsColC'] %in% colnames(samDF)", txtC = paste0("Sample color argument (", argVc['parAsColC'], ") must be either none or one of the column names (first row) of your sample metadata")) | |
125 if(argVc["parAsColC"] != "none") { | |
126 parAsColFcVn <- samDF[, argVc['parAsColC']] | |
127 if(is.character(parAsColFcVn)) | |
128 parAsColFcVn <- factor(parAsColFcVn) | |
129 } else | |
130 parAsColFcVn <- NA | |
131 | |
132 if(!('parMahalC' %in% names(argVc)) || argVc["parMahalC"] == "NA") { | |
133 if(!is.null(yMCN) && ncol(yMCN) == 1 && mode(yMCN) == "character") | |
134 argVc["parMahalC"] <- argVc["respC"] | |
135 else | |
136 argVc["parMahalC"] <- "none" | |
137 } | |
138 flgF("argVc['parMahalC'] == 'none' || (argVc['parMahalC'] %in% colnames(samDF))", | |
139 txtC = paste0("Mahalanobis argument (", argVc['parMahalC'], ") must be either 'NA', 'none' or one of the column names (first row) of your sample metadata")) | |
140 if(argVc["parMahalC"] == "none") { | |
141 parEllipsesL <- FALSE | |
142 } else { | |
143 if(is.null(yMCN)) { ## PCA case | |
144 flgF("mode(samDF[, argVc['parMahalC']]) == 'character'", | |
145 txtC = paste0("Mahalanobis argument (", argVc['parMahalC'], ") must correspond to a column of characters in your sampleMetadata")) | |
146 parAsColFcVn <- factor(samDF[, argVc["parMahalC"]]) | |
147 parEllipsesL <- TRUE | |
148 } else { ## (O)PLS-DA case | |
149 flgF("identical(as.character(argVc['respC']), as.character(argVc['parMahalC']))", | |
150 txtC = paste0("The Mahalanobis argument (", argVc['parMahalC'], ") must be identical to the Y response argument (", argVc['respC'], ")")) | |
151 parEllipsesL <- TRUE | |
152 } | |
153 } | |
154 | |
155 if(!('parLabVc' %in% names(argVc))) | |
156 argVc["parLabVc"] <- "none" | |
157 flgF("argVc['parLabVc'] == 'none' || (argVc['parLabVc'] %in% colnames(samDF))", | |
158 txtC = paste0("Sample labels argument (", argVc['parLabVc'], ") must be either none or one of the column names (first row) of your sample metadata")) | |
159 if('parLabVc' %in% names(argVc)) | |
160 if(argVc["parLabVc"] != "none") { | |
161 flgF("mode(samDF[, argVc['parLabVc']]) == 'character'", | |
162 txtC = paste0("The sample label argument (", argVc['parLabVc'], ") must correspond to a sample metadata column of characters (not numerics)")) | |
163 parLabVc <- samDF[, argVc['parLabVc']] | |
164 } else | |
165 parLabVc <- NA | |
166 | |
167 if('parPc1I' %in% names(argVc)) { | |
168 parCompVi <- as.numeric(c(argVc["parPc1I"], argVc["parPc2I"])) | |
169 } else | |
170 parCompVi <- c(1, 2) | |
171 | |
172 | |
173 ## checking | |
174 ##--------- | |
175 | |
176 | |
177 flgF("argVc['predI'] == 'NA' || argVc['orthoI'] == 'NA' || as.numeric(argVc['orthoI']) > 0 || parCompVi[2] <= as.numeric(argVc['predI'])", | |
178 txtC = paste0("The highest component to display (", parCompVi[2], ") must not exceed the number of predictive components of the model (", argVc['predI'], ")")) | |
179 | |
180 | |
181 if(argVc["orthoI"] == "NA" || argVc["orthoI"] != "0") | |
182 if(argVc["predI"] == "NA" || argVc["predI"] != "0") { | |
183 argVc["predI"] <- "1" | |
184 cat("\nWarning: OPLS: number of predictive components ('predI' argument) set to 1\n", sep = "") | |
185 } | |
186 | |
187 if(argVc["predI"] != "NA") | |
188 if(as.numeric(argVc["predI"]) > min(nrow(xMN), ncol(xMN))) { | |
189 argVc["predI"] <- as.character(min(nrow(xMN), ncol(xMN))) | |
190 cat("\nWarning: 'predI' set to the minimum of the dataMatrix dimensions: ", as.numeric(argVc["predI"]), "\n", sep = "") | |
191 } | |
192 | |
193 if("algoC" %in% names(argVc) && argVc["algoC"] == "svd" && length(which(is.na(c(xMN)))) > 0) { | |
194 minN <- min(c(xMN[!is.na(xMN)])) / 2 | |
195 cat("\nWarning: Missing values set to ", round(minN, 1), " (half minimum value) for 'svd' algorithm to be used\n", sep = "") | |
196 } | |
197 | |
198 | |
199 ##------------------------------ | |
200 ## Computation and plot | |
201 ##------------------------------ | |
202 | |
203 | |
204 sink() | |
205 | |
206 optWrnN <- options()$warn | |
207 options(warn = -1) | |
208 | |
209 ropLs <- opls(x = xMN, | |
210 y = yMCN, | |
211 predI = ifelse(argVc["predI"] == "NA", NA, as.numeric(argVc["predI"])), | |
212 orthoI = ifelse(argVc["orthoI"] == "NA", NA, as.numeric(argVc["orthoI"])), | |
213 algoC = ifelse('algoC' %in% names(argVc), argVc["algoC"], "default"), | |
214 crossvalI = ifelse('crossvalI' %in% names(argVc), as.numeric(argVc["crossvalI"]), 7), | |
215 log10L = ifelse('log10L' %in% names(argVc), as.logical(argVc["log10L"]), FALSE), | |
216 permI = ifelse('permI' %in% names(argVc), as.numeric(argVc["permI"]), 20), | |
217 scaleC = ifelse('scaleC' %in% names(argVc), argVc["scaleC"], "standard"), | |
218 subset = NULL, | |
219 printL = FALSE, | |
220 plotL = FALSE, | |
221 .sinkC = argVc['information']) | |
222 | |
223 modC <- ropLs@typeC | |
224 sumDF <- getSummaryDF(ropLs) | |
225 desMC <- ropLs@descriptionMC | |
226 scoreMN <- getScoreMN(ropLs) | |
227 loadingMN <- getLoadingMN(ropLs) | |
228 | |
229 vipVn <- coeMN <- orthoScoreMN <- orthoLoadingMN <- orthoVipVn <- NULL | |
230 | |
231 if(grepl("PLS", modC)) { | |
232 | |
233 vipVn <- getVipVn(ropLs) | |
234 coeMN <- coef(ropLs) | |
235 | |
236 if(grepl("OPLS", modC)) { | |
237 orthoScoreMN <- getScoreMN(ropLs, orthoL = TRUE) | |
238 orthoLoadingMN <- getLoadingMN(ropLs, orthoL = TRUE) | |
239 orthoVipVn <- getVipVn(ropLs, orthoL = TRUE) | |
240 } | |
241 | |
242 } | |
243 | |
244 ploC <- ifelse('typeC' %in% names(argVc), argVc["typeC"], "summary") | |
245 | |
246 if(sumDF[, "pre"] + sumDF[, "ort"] < 2) { | |
247 if(!(ploC %in% c("permutation", "overview"))) { | |
248 ploC <- "summary" | |
249 plotWarnL <- TRUE | |
250 } | |
251 } else | |
252 plotWarnL <- FALSE | |
253 | |
254 plot(ropLs, | |
255 typeVc = ploC, | |
256 parAsColFcVn = parAsColFcVn, | |
257 parCexN = ifelse('parCexN' %in% names(argVc), as.numeric(argVc["parCexN"]), 0.8), | |
258 parCompVi = parCompVi, | |
259 parEllipsesL = parEllipsesL, | |
260 parLabVc = parLabVc, | |
261 file.pdfC = argVc['figure'], | |
262 .sinkC = argVc['information']) | |
263 | |
264 options(warn = optWrnN) | |
265 | |
266 | |
267 ##------------------------------ | |
268 ## Print | |
269 ##------------------------------ | |
270 | |
271 | |
272 sink(argVc["information"], append = TRUE) | |
273 | |
274 if(plotWarnL) | |
275 cat("\nWarning: For single component models, only 'overview' (and 'permutation' in case of single response (O)PLS(-DA)) plot(s) are available\n", sep = "") | |
276 | |
277 | |
278 cat("\n", modC, "\n", sep = "") | |
279 | |
280 cat("\n", desMC["samples", ], | |
281 " samples x ", | |
282 desMC["X_variables", ], | |
283 " variables", | |
284 ifelse(modC != "PCA", | |
285 " and 1 response", | |
286 ""), | |
287 "\n", sep = "") | |
288 | |
289 cat("\n", ropLs@suppLs[["scaleC"]], " scaling of dataMatrix", | |
290 ifelse(modC == "PCA", | |
291 "", | |
292 paste0(" and ", | |
293 ifelse(mode(ropLs@suppLs[["yMCN"]]) == "character" && ropLs@suppLs[["scaleC"]] != "standard", | |
294 "standard scaling of ", | |
295 ""), | |
296 "response\n")), sep = "") | |
297 | |
298 if(substr(desMC["missing_values", ], 1, 1) != "0") | |
299 cat("\n", desMC["missing_values", ], " NAs\n", sep = "") | |
300 | |
301 if(substr(desMC["near_zero_excluded_X_variables", ], 1, 1) != "0") | |
302 cat("\n", desMC["near_zero_excluded_X_variables", ], | |
303 " excluded variables during model building (because of near zero variance)\n", sep = "") | |
304 | |
305 cat("\n") | |
306 | |
307 optDigN <- options()[["digits"]] | |
308 options(digits = 3) | |
309 print(ropLs@modelDF) | |
310 options(digits = optDigN) | |
311 | |
312 | |
313 ##------------------------------ | |
314 ## Ending | |
315 ##------------------------------ | |
316 | |
317 | |
318 ## Saving | |
319 ##------- | |
320 | |
321 | |
322 rspModC <- gsub("-", "", modC) | |
323 if(rspModC != "PCA") | |
324 rspModC <- paste0(make.names(argVc['respC']), "_", rspModC) | |
325 | |
326 if(sumDF[, "pre"] + sumDF[, "ort"] < 2) { | |
327 | |
328 tCompMN <- scoreMN | |
329 pCompMN <- loadingMN | |
330 | |
331 } else { | |
332 | |
333 if(sumDF[, "ort"] > 0) { | |
334 if(parCompVi[2] > sumDF[, "ort"] + 1) | |
335 stop("Selected orthogonal component for plotting (ordinate) exceeds the total number of orthogonal components of the model", call. = FALSE) | |
336 tCompMN <- cbind(scoreMN[, 1], orthoScoreMN[, parCompVi[2] - 1]) | |
337 pCompMN <- cbind(loadingMN[, 1], orthoLoadingMN[, parCompVi[2] - 1]) | |
338 colnames(pCompMN) <- colnames(tCompMN) <- c("h1", paste("o", parCompVi[2] - 1, sep = "")) | |
339 } else { | |
340 if(max(parCompVi) > sumDF[, "pre"]) | |
341 stop("Selected component for plotting as ordinate exceeds the total number of predictive components of the model", call. = FALSE) | |
342 tCompMN <- scoreMN[, parCompVi, drop = FALSE] | |
343 pCompMN <- loadingMN[, parCompVi, drop = FALSE] | |
344 } | |
345 | |
346 } | |
347 | |
348 ## x-scores and prediction | |
349 | |
350 colnames(tCompMN) <- paste0(rspModC, "_XSCOR-", colnames(tCompMN)) | |
351 tCompDF <- as.data.frame(tCompMN)[rownames(samDF), , drop = FALSE] | |
352 | |
353 if(modC != "PCA") { | |
354 | |
355 if(!is.null(tesVl)) { | |
356 tCompFulMN <- matrix(NA, | |
357 nrow = nrow(samDF), | |
358 ncol = ncol(tCompMN), | |
359 dimnames = list(rownames(samDF), colnames(tCompMN))) | |
360 mode(tCompFulMN) <- "numeric" | |
361 tCompFulMN[rownames(tCompMN), ] <- tCompMN | |
362 tCompMN <- tCompFulMN | |
363 | |
364 fitMCN <- fitted(ropLs) | |
365 fitFulMCN <- matrix(NA, | |
366 nrow = nrow(samDF), | |
367 ncol = 1, | |
368 dimnames = list(rownames(samDF), NULL)) | |
369 mode(fitFulMCN) <- mode(fitMCN) | |
370 fitFulMCN[rownames(fitMCN), ] <- fitMCN | |
371 yPreMCN <- predict(ropLs, newdata = as.data.frame(xTesMN)) | |
372 fitFulMCN[rownames(yPreMCN), ] <- yPreMCN | |
373 fitMCN <- fitFulMCN | |
374 | |
375 } else | |
376 fitMCN <- fitted(ropLs) | |
377 | |
378 colnames(fitMCN) <- paste0(rspModC, | |
379 "_predictions") | |
380 fitDF <- as.data.frame(fitMCN)[rownames(samDF), , drop = FALSE] | |
381 | |
382 tCompDF <- cbind.data.frame(tCompDF, fitDF) | |
383 } | |
384 | |
385 samDF <- cbind.data.frame(samDF, tCompDF) | |
386 | |
387 ## x-loadings and VIP | |
388 | |
389 colnames(pCompMN) <- paste0(rspModC, "_XLOAD-", colnames(pCompMN)) | |
390 if(!is.null(vipVn)) { | |
391 pCompMN <- cbind(pCompMN, vipVn) | |
392 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC, | |
393 "_VIP", | |
394 ifelse(!is.null(orthoVipVn), | |
395 "_pred", | |
396 "")) | |
397 if(!is.null(orthoVipVn)) { | |
398 pCompMN <- cbind(pCompMN, orthoVipVn) | |
399 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC, | |
400 "_VIP_ortho") | |
401 } | |
402 } | |
403 if(!is.null(coeMN)) { | |
404 pCompMN <- cbind(pCompMN, coeMN) | |
405 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC, "_COEFF") | |
406 } | |
407 pCompDF <- as.data.frame(pCompMN)[rownames(varDF), , drop = FALSE] | |
408 varDF <- cbind.data.frame(varDF, pCompDF) | |
409 | |
410 ## sampleMetadata | |
411 | |
412 samDF <- cbind.data.frame(sampleMetadata = rownames(samDF), | |
413 samDF) | |
414 write.table(samDF, | |
415 file = argVc["sampleMetadata_out"], | |
416 quote = FALSE, | |
417 row.names = FALSE, | |
418 sep = "\t") | |
419 | |
420 ## variableMetadata | |
421 | |
422 varDF <- cbind.data.frame(variableMetadata = rownames(varDF), | |
423 varDF) | |
424 write.table(varDF, | |
425 file = argVc["variableMetadata_out"], | |
426 quote = FALSE, | |
427 row.names = FALSE, | |
428 sep = "\t") | |
429 | |
430 # Output ropLs | |
431 if ( ! is.null(argVc['ropls_out'])) | |
432 save(ropLs, file = argVc['ropls_out']) | |
433 | |
434 ## Closing | |
435 ##-------- | |
436 | |
437 cat("\nEnd of '", modNamC, "' Galaxy module call: ", | |
438 as.character(Sys.time()), "\n", sep = "") | |
439 | |
440 sink() | |
441 | |
442 options(stringsAsFactors = strAsFacL) | |
443 | |
444 | |
445 #### End_of_tested_code <- function() {} | |
446 | |
447 | |
448 rm(list = ls()) |