comparison biosigner_wrapper.R @ 0:48e4be935243 draft

planemo upload for repository https://github.com/workflow4metabolomics/biosigner.git commit b8af709c9fd6ed283fc4e4249dcf692556927b2d
author ethevenot
date Wed, 27 Jul 2016 11:40:20 -0400
parents
children b2414be87d4b
comparison
equal deleted inserted replaced
-1:000000000000 0:48e4be935243
1 #!/usr/bin/env Rscript
2
3 library(batch) ## parseCommandArgs
4
5 argVc <- unlist(parseCommandArgs(evaluate=FALSE))
6
7
8 #### Start_of_tested_code <- function() {}
9
10
11 ##------------------------------
12 ## Initializing
13 ##------------------------------
14
15 ## options
16 ##--------
17
18 strAsFacL <- options()$stringsAsFactors
19 options(stringsAsFactors = FALSE)
20
21 ## libraries
22 ##----------
23
24 suppressMessages(library(biosigner))
25
26 if(packageVersion("biosigner") < "1.0.0")
27 stop("Please use 'biosigner' versions of 1.0.0 and above")
28 if(packageVersion("ropls") < "1.4.0")
29 stop("Please use 'ropls' versions of 1.4.0 and above")
30
31 ## constants
32 ##----------
33
34 modNamC <- "Biosigner" ## module name
35
36 topEnvC <- environment()
37 flgC <- "\n"
38
39 ## functions
40 ##----------
41
42 flgF <- function(tesC,
43 envC = topEnvC,
44 txtC = NA) { ## management of warning and error messages
45
46 tesL <- eval(parse(text = tesC), envir = envC)
47
48 if(!tesL) {
49
50 sink(NULL)
51 stpTxtC <- ifelse(is.na(txtC),
52 paste0(tesC, " is FALSE"),
53 txtC)
54
55 stop(stpTxtC,
56 call. = FALSE)
57
58 }
59
60 } ## flgF
61
62
63 ## log file
64 ##---------
65
66 sink(argVc["information"])
67
68 cat("\nStart of the '", modNamC, "' Galaxy module call: ",
69 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
70
71
72 ## arguments
73 ##----------
74
75 xMN <- t(as.matrix(read.table(argVc["dataMatrix_in"],
76 check.names = FALSE,
77 header = TRUE,
78 row.names = 1,
79 sep = "\t")))
80
81 samDF <- read.table(argVc["sampleMetadata_in"],
82 check.names = FALSE,
83 header = TRUE,
84 row.names = 1,
85 sep = "\t")
86 flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
87
88 varDF <- read.table(argVc["variableMetadata_in"],
89 check.names = FALSE,
90 header = TRUE,
91 row.names = 1,
92 sep = "\t")
93 flgF("identical(colnames(xMN), rownames(varDF))", txtC = "Variable names (or number) in the data matrix (first column) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
94
95 flgF("argVc['respC'] %in% colnames(samDF)",
96 txtC = paste0("Class argument (", argVc['respC'], ") must be either none or one of the column names (first row) of your sample metadata"))
97 respVc <- samDF[, argVc["respC"]]
98 flgF("mode(respVc) == 'character'",
99 txtC = paste0("'", argVc['respC'], "' column of sampleMetadata does not contain only characters"))
100 respFc <- factor(respVc)
101 flgF("length(levels(respFc)) == 2",
102 txtC = paste0("'", argVc['respC'], "' column of sampleMetadata does not contain only 2 types of characters (e.g., 'case' and 'control')"))
103 tierMaxC <- ifelse("tierC" %in% names(argVc), argVc["tierC"], "S")
104 pvalN <- ifelse("pvalN" %in% names(argVc), as.numeric(argVc["pvalN"]), 0.05)
105
106
107 ##------------------------------
108 ## Computation and plot
109 ##------------------------------
110
111
112 sink()
113
114 optWrnN <- options()$warn
115 options(warn = -1)
116
117 if("seedI" %in% names(argVc) && argVc["seedI"] != "0")
118 set.seed(as.integer(argVc["seedI"]))
119
120 bsnLs <- biosign(x = xMN,
121 y = respFc,
122 methodVc = ifelse("methodC" %in% names(argVc), argVc["methodC"], "all"),
123 bootI = ifelse("bootI" %in% names(argVc), as.numeric(argVc["bootI"]), 50),
124 pvalN = pvalN,
125 printL = FALSE,
126 plotL = FALSE,
127 .sinkC = argVc["information"])
128
129 if("seedI" %in% names(argVc) && argVc["seedI"] != "0")
130 set.seed(NULL)
131
132 tierMC <- bsnLs@tierMC
133
134 if(!is.null(tierMC)) {
135 plot(bsnLs,
136 tierMaxC = tierMaxC,
137 file.pdfC = argVc["figure_tier"],
138 .sinkC = argVc["information"])
139 plot(bsnLs,
140 tierMaxC = tierMaxC,
141 typeC = "boxplot",
142 file.pdfC = argVc["figure_boxplot"],
143 .sinkC = argVc["information"])
144 } else {
145 pdf(argVc["figure_tier"])
146 plot(1, bty = "n", type = "n",
147 xaxt = "n", yaxt = "n", xlab = "", ylab = "")
148 text(mean(par("usr")[1:2]), mean(par("usr")[3:4]),
149 labels = "No significant variable to display")
150 dev.off()
151 pdf(argVc["figure_boxplot"])
152 plot(1, bty = "n", type = "n",
153 xaxt = "n", yaxt = "n", xlab = "", ylab = "")
154 text(mean(par("usr")[1:2]), mean(par("usr")[3:4]),
155 labels = "No significant variable to display")
156 dev.off()
157 }
158
159
160 options(warn = optWrnN)
161
162
163 ##------------------------------
164 ## Print
165 ##------------------------------
166
167 sink(argVc["information"], append = TRUE)
168
169 tierFullVc <- c("S", LETTERS[1:5])
170 tierVc <- tierFullVc[1:which(tierFullVc == tierMaxC)]
171
172 if(sum(tierMC %in% tierVc)) {
173 cat("\nSignificant features from '", paste(tierVc, collapse = "', '"), "' tiers:\n", sep = "")
174 print(tierMC[apply(tierMC, 1, function(rowVc) sum(rowVc %in% tierVc) > 0), ,
175 drop = FALSE])
176 cat("\nAccuracy:\n")
177 print(round(getAccuracyMN(bsnLs), 3))
178 } else
179 cat("\nNo significant variable found for any classifier\n")
180
181
182 ##------------------------------
183 ## Ending
184 ##------------------------------
185
186 ## Saving
187 ##-------
188
189 if(!is.null(tierMC)) {
190 tierDF <- data.frame(tier = sapply(rownames(varDF),
191 function(varC) {
192 varTirVc <- tierMC[varC, ]
193 varTirVc <- names(varTirVc)[varTirVc %in% tierVc]
194 paste(varTirVc, collapse = "|")
195 }),
196 stringsAsFactors = FALSE)
197 colnames(tierDF) <- paste(argVc["respC"],
198 colnames(tierDF),
199 paste(tierVc, collapse = ""),
200 sep = "_")
201 varDF <- cbind.data.frame(varDF, tierDF)
202 }
203
204 ## variableMetadata
205
206 varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
207 varDF)
208 write.table(varDF,
209 file = argVc["variableMetadata_out"],
210 quote = FALSE,
211 row.names = FALSE,
212 sep = "\t")
213
214
215 ## Closing
216 ##--------
217
218 cat("\nEnd of '", modNamC, "' Galaxy module call: ",
219 as.character(Sys.time()), "\n", sep = "")
220
221 sink()
222
223 options(stringsAsFactors = strAsFacL)
224
225
226 #### End_of_tested_code <- function() {}
227
228
229 rm(list = ls())
230