comparison multivariate_wrapper.R @ 0:fafba524dca6 draft

planemo upload for repository https://github.com/workflow4metabolomics/multivariate.git commit 6596dbd39d20ee1962d9ebdd87eec04821239760
author ethevenot
date Wed, 27 Jul 2016 11:22:56 -0400
parents
children da272496b32d
comparison
equal deleted inserted replaced
-1:000000000000 0:fafba524dca6
1 #!/usr/bin/env Rscript
2
3 library(batch) ## parseCommandArgs
4
5 ########
6 # MAIN #
7 ########
8
9 argVc <- unlist(parseCommandArgs(evaluate=FALSE))
10
11
12 #### Start_of_tested_code <- function() {}
13
14
15 ##------------------------------
16 ## Initializing
17 ##------------------------------
18
19 ## options
20 ##--------
21
22 strAsFacL <- options()$stringsAsFactors
23 options(stringsAsFactors = FALSE)
24
25 ## libraries
26 ##----------
27
28 suppressMessages(library(ropls))
29
30 if(packageVersion("ropls") < "1.4.0")
31 cat("\nWarning: new version of the 'ropls' package is available\n", sep="")
32
33 ## constants
34 ##----------
35
36 modNamC <- "Multivariate" ## module name
37
38 topEnvC <- environment()
39 flgC <- "\n"
40
41 ## functions
42 ##----------
43
44 flgF <- function(tesC,
45 envC = topEnvC,
46 txtC = NA) { ## management of warning and error messages
47
48 tesL <- eval(parse(text = tesC), envir = envC)
49
50 if(!tesL) {
51
52 sink(NULL)
53 stpTxtC <- ifelse(is.na(txtC),
54 paste0(tesC, " is FALSE"),
55 txtC)
56
57 stop(stpTxtC,
58 call. = FALSE)
59
60 }
61
62 } ## flgF
63
64
65 ## log file
66 ##---------
67
68 sink(argVc["information"])
69
70 cat("\nStart of the '", modNamC, "' Galaxy module call: ",
71 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
72
73
74 ## arguments
75 ##----------
76
77 xMN <- t(as.matrix(read.table(argVc["dataMatrix_in"],
78 check.names = FALSE,
79 header = TRUE,
80 row.names = 1,
81 sep = "\t")))
82
83 samDF <- read.table(argVc["sampleMetadata_in"],
84 check.names = FALSE,
85 header = TRUE,
86 row.names = 1,
87 sep = "\t")
88 flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
89
90 varDF <- read.table(argVc["variableMetadata_in"],
91 check.names = FALSE,
92 header = TRUE,
93 row.names = 1,
94 sep = "\t")
95 flgF("identical(colnames(xMN), rownames(varDF))", txtC = "Variable names (or number) in the data matrix (first column) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
96
97 flgF("argVc['respC'] == 'none' || (argVc['respC'] %in% colnames(samDF))",
98 txtC = paste0("Y Response argument (", argVc['respC'], ") must be either none or one of the column names (first row) of your sample metadata"))
99 if(argVc["respC"] != "none") {
100 yMCN <- matrix(samDF[, argVc['respC']], ncol = 1, dimnames = list(rownames(xMN), argVc['respC']))
101 } else
102 yMCN <- NULL
103
104 if(argVc["testL"] == "TRUE") {
105 flgF("!is.null(yMCN)",
106 txtC = "Predictions cannot be peformed with PCA models")
107 flgF("'test.' %in% colnames(samDF)",
108 txtC = "No 'test.' column found in the sample metadata")
109 flgF("identical(sort(unique(samDF[, 'test.'])), c('no', 'yes'))",
110 txtC = "'test.' column of sample metadata must contain both 'yes' (tested samples) and 'no' (samples to be used for model training) values, and nothing else")
111 flgF("identical(sort(unique(samDF[, 'test.'])), c('no', 'yes'))",
112 txtC = "'test.' column of sample metadata must contain both 'yes' (tested samples) and 'no' (samples to be used for model training) values, and nothing else")
113 flgF("!any(is.na(yMCN[samDF[, 'test.'] == 'no', ]))",
114 txtC = "samples for model training (i.e. 'no' value in the 'test.' column) should not contain NA in the response")
115 tesVl <- samDF[, "test."] == "yes"
116 xTesMN <- xMN[tesVl, , drop = FALSE]
117 xMN <- xMN[!tesVl, , drop = FALSE]
118 yMCN <- yMCN[!tesVl, , drop = FALSE]
119 } else
120 tesVl <- NULL
121
122 if(!('parAsColC' %in% names(argVc)))
123 argVc["parAsColC"] <- "none"
124 flgF("argVc['parAsColC'] == 'none' || argVc['parAsColC'] %in% colnames(samDF)", txtC = paste0("Sample color argument (", argVc['parAsColC'], ") must be either none or one of the column names (first row) of your sample metadata"))
125 if(argVc["parAsColC"] != "none") {
126 parAsColFcVn <- samDF[, argVc['parAsColC']]
127 if(is.character(parAsColFcVn))
128 parAsColFcVn <- factor(parAsColFcVn)
129 } else
130 parAsColFcVn <- NA
131
132 if(!('parMahalC' %in% names(argVc)) || argVc["parMahalC"] == "NA") {
133 if(!is.null(yMCN) && ncol(yMCN) == 1 && mode(yMCN) == "character")
134 argVc["parMahalC"] <- argVc["respC"]
135 else
136 argVc["parMahalC"] <- "none"
137 }
138 flgF("argVc['parMahalC'] == 'none' || (argVc['parMahalC'] %in% colnames(samDF))",
139 txtC = paste0("Mahalanobis argument (", argVc['parMahalC'], ") must be either 'NA', 'none' or one of the column names (first row) of your sample metadata"))
140 if(argVc["parMahalC"] == "none") {
141 parEllipsesL <- FALSE
142 } else {
143 if(is.null(yMCN)) { ## PCA case
144 flgF("mode(samDF[, argVc['parMahalC']]) == 'character'",
145 txtC = paste0("Mahalanobis argument (", argVc['parMahalC'], ") must correspond to a column of characters in your sampleMetadata"))
146 parAsColFcVn <- factor(samDF[, argVc["parMahalC"]])
147 parEllipsesL <- TRUE
148 } else { ## (O)PLS-DA case
149 flgF("identical(as.character(argVc['respC']), as.character(argVc['parMahalC']))",
150 txtC = paste0("The Mahalanobis argument (", argVc['parMahalC'], ") must be identical to the Y response argument (", argVc['respC'], ")"))
151 parEllipsesL <- TRUE
152 }
153 }
154
155 if(!('parLabVc' %in% names(argVc)))
156 argVc["parLabVc"] <- "none"
157 flgF("argVc['parLabVc'] == 'none' || (argVc['parLabVc'] %in% colnames(samDF))",
158 txtC = paste0("Sample labels argument (", argVc['parLabVc'], ") must be either none or one of the column names (first row) of your sample metadata"))
159 if('parLabVc' %in% names(argVc))
160 if(argVc["parLabVc"] != "none") {
161 flgF("mode(samDF[, argVc['parLabVc']]) == 'character'",
162 txtC = paste0("The sample label argument (", argVc['parLabVc'], ") must correspond to a sample metadata column of characters (not numerics)"))
163 parLabVc <- samDF[, argVc['parLabVc']]
164 } else
165 parLabVc <- NA
166
167 if('parPc1I' %in% names(argVc)) {
168 parCompVi <- as.numeric(c(argVc["parPc1I"], argVc["parPc2I"]))
169 } else
170 parCompVi <- c(1, 2)
171
172
173 ## checking
174 ##---------
175
176
177 flgF("argVc['predI'] == 'NA' || argVc['orthoI'] == 'NA' || as.numeric(argVc['orthoI']) > 0 || parCompVi[2] <= as.numeric(argVc['predI'])",
178 txtC = paste0("The highest component to display (", parCompVi[2], ") must not exceed the number of predictive components of the model (", argVc['predI'], ")"))
179
180
181 if(argVc["orthoI"] == "NA" || argVc["orthoI"] != "0")
182 if(argVc["predI"] == "NA" || argVc["predI"] != "0") {
183 argVc["predI"] <- "1"
184 cat("\nWarning: OPLS: number of predictive components ('predI' argument) set to 1\n", sep = "")
185 }
186
187 if(argVc["predI"] != "NA")
188 if(as.numeric(argVc["predI"]) > min(nrow(xMN), ncol(xMN))) {
189 argVc["predI"] <- as.character(min(nrow(xMN), ncol(xMN)))
190 cat("\nWarning: 'predI' set to the minimum of the dataMatrix dimensions: ", as.numeric(argVc["predI"]), "\n", sep = "")
191 }
192
193 if("algoC" %in% names(argVc) && argVc["algoC"] == "svd" && length(which(is.na(c(xMN)))) > 0) {
194 minN <- min(c(xMN[!is.na(xMN)])) / 2
195 cat("\nWarning: Missing values set to ", round(minN, 1), " (half minimum value) for 'svd' algorithm to be used\n", sep = "")
196 }
197
198
199 ##------------------------------
200 ## Computation and plot
201 ##------------------------------
202
203
204 sink()
205
206 optWrnN <- options()$warn
207 options(warn = -1)
208
209 ropLs <- opls(x = xMN,
210 y = yMCN,
211 predI = ifelse(argVc["predI"] == "NA", NA, as.numeric(argVc["predI"])),
212 orthoI = ifelse(argVc["orthoI"] == "NA", NA, as.numeric(argVc["orthoI"])),
213 algoC = ifelse('algoC' %in% names(argVc), argVc["algoC"], "default"),
214 crossvalI = ifelse('crossvalI' %in% names(argVc), as.numeric(argVc["crossvalI"]), 7),
215 log10L = ifelse('log10L' %in% names(argVc), as.logical(argVc["log10L"]), FALSE),
216 permI = ifelse('permI' %in% names(argVc), as.numeric(argVc["permI"]), 20),
217 scaleC = ifelse('scaleC' %in% names(argVc), argVc["scaleC"], "standard"),
218 subset = NULL,
219 printL = FALSE,
220 plotL = FALSE,
221 .sinkC = argVc['information'])
222
223 modC <- ropLs@typeC
224 sumDF <- getSummaryDF(ropLs)
225 desMC <- ropLs@descriptionMC
226 scoreMN <- getScoreMN(ropLs)
227 loadingMN <- getLoadingMN(ropLs)
228
229 vipVn <- coeMN <- orthoScoreMN <- orthoLoadingMN <- orthoVipVn <- NULL
230
231 if(grepl("PLS", modC)) {
232
233 vipVn <- getVipVn(ropLs)
234 coeMN <- coef(ropLs)
235
236 if(grepl("OPLS", modC)) {
237 orthoScoreMN <- getScoreMN(ropLs, orthoL = TRUE)
238 orthoLoadingMN <- getLoadingMN(ropLs, orthoL = TRUE)
239 orthoVipVn <- getVipVn(ropLs, orthoL = TRUE)
240 }
241
242 }
243
244 ploC <- ifelse('typeC' %in% names(argVc), argVc["typeC"], "summary")
245
246 if(sumDF[, "pre"] + sumDF[, "ort"] < 2) {
247 if(!(ploC %in% c("permutation", "overview"))) {
248 ploC <- "summary"
249 plotWarnL <- TRUE
250 }
251 } else
252 plotWarnL <- FALSE
253
254 plot(ropLs,
255 typeVc = ploC,
256 parAsColFcVn = parAsColFcVn,
257 parCexN = ifelse('parCexN' %in% names(argVc), as.numeric(argVc["parCexN"]), 0.8),
258 parCompVi = parCompVi,
259 parEllipsesL = parEllipsesL,
260 parLabVc = parLabVc,
261 file.pdfC = argVc['figure'],
262 .sinkC = argVc['information'])
263
264 options(warn = optWrnN)
265
266
267 ##------------------------------
268 ## Print
269 ##------------------------------
270
271
272 sink(argVc["information"], append = TRUE)
273
274 if(plotWarnL)
275 cat("\nWarning: For single component models, only 'overview' (and 'permutation' in case of single response (O)PLS(-DA)) plot(s) are available\n", sep = "")
276
277
278 cat("\n", modC, "\n", sep = "")
279
280 cat("\n", desMC["samples", ],
281 " samples x ",
282 desMC["X_variables", ],
283 " variables",
284 ifelse(modC != "PCA",
285 " and 1 response",
286 ""),
287 "\n", sep = "")
288
289 cat("\n", ropLs@suppLs[["scaleC"]], " scaling of dataMatrix",
290 ifelse(modC == "PCA",
291 "",
292 paste0(" and ",
293 ifelse(mode(ropLs@suppLs[["yMCN"]]) == "character" && ropLs@suppLs[["scaleC"]] != "standard",
294 "standard scaling of ",
295 ""),
296 "response\n")), sep = "")
297
298 if(substr(desMC["missing_values", ], 1, 1) != "0")
299 cat("\n", desMC["missing_values", ], " NAs\n", sep = "")
300
301 if(substr(desMC["near_zero_excluded_X_variables", ], 1, 1) != "0")
302 cat("\n", desMC["near_zero_excluded_X_variables", ],
303 " excluded variables during model building (because of near zero variance)\n", sep = "")
304
305 cat("\n")
306
307 optDigN <- options()[["digits"]]
308 options(digits = 3)
309 print(ropLs@modelDF)
310 options(digits = optDigN)
311
312
313 ##------------------------------
314 ## Ending
315 ##------------------------------
316
317
318 ## Saving
319 ##-------
320
321
322 rspModC <- gsub("-", "", modC)
323 if(rspModC != "PCA")
324 rspModC <- paste0(make.names(argVc['respC']), "_", rspModC)
325
326 if(sumDF[, "pre"] + sumDF[, "ort"] < 2) {
327
328 tCompMN <- scoreMN
329 pCompMN <- loadingMN
330
331 } else {
332
333 if(sumDF[, "ort"] > 0) {
334 if(parCompVi[2] > sumDF[, "ort"] + 1)
335 stop("Selected orthogonal component for plotting (ordinate) exceeds the total number of orthogonal components of the model", call. = FALSE)
336 tCompMN <- cbind(scoreMN[, 1], orthoScoreMN[, parCompVi[2] - 1])
337 pCompMN <- cbind(loadingMN[, 1], orthoLoadingMN[, parCompVi[2] - 1])
338 colnames(pCompMN) <- colnames(tCompMN) <- c("h1", paste("o", parCompVi[2] - 1, sep = ""))
339 } else {
340 if(max(parCompVi) > sumDF[, "pre"])
341 stop("Selected component for plotting as ordinate exceeds the total number of predictive components of the model", call. = FALSE)
342 tCompMN <- scoreMN[, parCompVi, drop = FALSE]
343 pCompMN <- loadingMN[, parCompVi, drop = FALSE]
344 }
345
346 }
347
348 ## x-scores and prediction
349
350 colnames(tCompMN) <- paste0(rspModC, "_XSCOR-", colnames(tCompMN))
351 tCompDF <- as.data.frame(tCompMN)[rownames(samDF), , drop = FALSE]
352
353 if(modC != "PCA") {
354
355 if(!is.null(tesVl)) {
356 tCompFulMN <- matrix(NA,
357 nrow = nrow(samDF),
358 ncol = ncol(tCompMN),
359 dimnames = list(rownames(samDF), colnames(tCompMN)))
360 mode(tCompFulMN) <- "numeric"
361 tCompFulMN[rownames(tCompMN), ] <- tCompMN
362 tCompMN <- tCompFulMN
363
364 fitMCN <- fitted(ropLs)
365 fitFulMCN <- matrix(NA,
366 nrow = nrow(samDF),
367 ncol = 1,
368 dimnames = list(rownames(samDF), NULL))
369 mode(fitFulMCN) <- mode(fitMCN)
370 fitFulMCN[rownames(fitMCN), ] <- fitMCN
371 yPreMCN <- predict(ropLs, newdata = as.data.frame(xTesMN))
372 fitFulMCN[rownames(yPreMCN), ] <- yPreMCN
373 fitMCN <- fitFulMCN
374
375 } else
376 fitMCN <- fitted(ropLs)
377
378 colnames(fitMCN) <- paste0(rspModC,
379 "_predictions")
380 fitDF <- as.data.frame(fitMCN)[rownames(samDF), , drop = FALSE]
381
382 tCompDF <- cbind.data.frame(tCompDF, fitDF)
383 }
384
385 samDF <- cbind.data.frame(samDF, tCompDF)
386
387 ## x-loadings and VIP
388
389 colnames(pCompMN) <- paste0(rspModC, "_XLOAD-", colnames(pCompMN))
390 if(!is.null(vipVn)) {
391 pCompMN <- cbind(pCompMN, vipVn)
392 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC,
393 "_VIP",
394 ifelse(!is.null(orthoVipVn),
395 "_pred",
396 ""))
397 if(!is.null(orthoVipVn)) {
398 pCompMN <- cbind(pCompMN, orthoVipVn)
399 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC,
400 "_VIP_ortho")
401 }
402 }
403 if(!is.null(coeMN)) {
404 pCompMN <- cbind(pCompMN, coeMN)
405 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC, "_COEFF")
406 }
407 pCompDF <- as.data.frame(pCompMN)[rownames(varDF), , drop = FALSE]
408 varDF <- cbind.data.frame(varDF, pCompDF)
409
410 ## sampleMetadata
411
412 samDF <- cbind.data.frame(sampleMetadata = rownames(samDF),
413 samDF)
414 write.table(samDF,
415 file = argVc["sampleMetadata_out"],
416 quote = FALSE,
417 row.names = FALSE,
418 sep = "\t")
419
420 ## variableMetadata
421
422 varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
423 varDF)
424 write.table(varDF,
425 file = argVc["variableMetadata_out"],
426 quote = FALSE,
427 row.names = FALSE,
428 sep = "\t")
429
430 # Output ropLs
431 if ( ! is.null(argVc['ropls_out']))
432 save(ropLs, file = argVc['ropls_out'])
433
434 ## Closing
435 ##--------
436
437 cat("\nEnd of '", modNamC, "' Galaxy module call: ",
438 as.character(Sys.time()), "\n", sep = "")
439
440 sink()
441
442 options(stringsAsFactors = strAsFacL)
443
444
445 #### End_of_tested_code <- function() {}
446
447
448 rm(list = ls())