comparison batch_correction_all_loess_wrapper.R @ 0:71d83d8920bf draft

planemo upload for repository https://github.com/workflow4metabolomics/batchcorrection.git commit de79117e6ab856420b87efca3675c7963688f975
author melpetera
date Tue, 09 Aug 2016 06:47:41 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:71d83d8920bf
1 #!/usr/bin/env Rscript
2
3 library(batch) ## necessary for parseCommandArgs function
4 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
5
6 source_local <- function(fname){
7 argv <- commandArgs(trailingOnly = FALSE)
8 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
9 source(paste(base_dir, fname, sep="/"))
10 }
11
12 ## Import the different functions
13 source_local("batch_correction_all_loess_script.R")
14
15 argVc <- unlist(args)
16
17 ## argVc["method"] is either 'all_loess_pool' or 'all_loess_sample'
18 ## alternative version developped by CEA
19 ## all variables are treated with loess
20 ## the reference observations for loess are either 'pool'
21 ## ('all_loess_pool') or 'sample' ('all_loess_sample')
22
23
24 ##------------------------------
25 ## Initializing
26 ##------------------------------
27
28 ## options
29 ##--------
30
31 strAsFacL <- options()$stringsAsFactors
32 options(stringsAsFactors = FALSE)
33
34 ## libraries
35 ##----------
36
37 suppressMessages(library(ropls))
38
39 if(packageVersion("ropls") < "1.4.0")
40 stop("Please use 'ropls' versions of 1.4.0 and above")
41
42 ## constants
43 ##----------
44
45 modNamC <- "Batch correction" ## module name
46
47 ## log file
48 ##---------
49
50 ## sink(argVc["information"]) ## not implemented
51
52 cat("\nStart of the '", modNamC, "' Galaxy module call: ",
53 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
54
55 ## loading
56 ##--------
57
58 rawMN <- t(as.matrix(read.table(argVc["dataMatrix"],
59 header = TRUE,
60 row.names = 1,
61 sep = "\t")))
62
63 samDF <- read.table(argVc["sampleMetadata"],
64 header = TRUE,
65 row.names = 1,
66 sep = "\t")
67
68 varDF <- read.table(argVc["variableMetadata"],
69 check.names = FALSE,
70 header = TRUE,
71 row.names = 1,
72 sep = "\t") ## not used; for compatibility only
73
74 refC <- tolower(gsub("all_loess_", "", argVc["method"]))
75
76 spnN <- as.numeric(argVc["span"])
77
78
79 ## checking
80 ##---------
81
82 stopifnot(refC %in% c("pool", "sample"))
83
84 if(refC == "pool" &&
85 !any("pool" %in% samDF[, "sampleType"]))
86 stop("No 'pool' found in the 'sampleType' column; use the samples as normalization reference instead")
87
88 refMN <- rawMN[samDF[, "sampleType"] == refC, ]
89 refNasZerVl <- apply(refMN, 2,
90 function(refVn)
91 all(sapply(refVn,
92 function(refN) {is.na(refN) || refN == 0})))
93
94 if(sum(refNasZerVl)) {
95
96 refNasZerVi <- which(refNasZerVl)
97 cat("The following variables have 'NA' or 0 values in all reference samples; they will be removed from the data:\n", sep = "")
98 print(refNasZerVi)
99 rawMN <- rawMN[, !refNasZerVl, drop = FALSE]
100 varDF <- varDF[!refNasZerVl, , drop = FALSE]
101
102 }
103
104 ##------------------------------
105 ## Computation
106 ##------------------------------
107
108 ## ordering (batch and injection order)
109 ##-------------------------------------
110
111 samDF[, "ordIniVi"] <- 1:nrow(rawMN)
112 ordBatInjVi <- order(samDF[, "batch"], samDF[, "injectionOrder"])
113 rawMN <- rawMN[ordBatInjVi, ]
114 samDF <- samDF[ordBatInjVi, ]
115
116 ## signal drift and batch-effect correction
117 ##-----------------------------------------
118
119 nrmMN <- shiftBatchCorrectF(rawMN,
120 samDF,
121 refC,
122 spnN)
123
124 ## figure
125 ##-------
126
127 cat("\nPlotting\n")
128
129 pdf(argVc["graph_output"], onefile = TRUE, width = 11, height = 7)
130 plotBatchF(rawMN, samDF, spnN)
131 plotBatchF(nrmMN, samDF, spnN)
132 dev.off()
133
134 ## returning to initial order
135 ##---------------------------
136
137 ordIniVi <- order(samDF[, "ordIniVi"])
138 nrmMN <- nrmMN[ordIniVi, ]
139 samDF <- samDF[ordIniVi, ]
140 samDF <- samDF[, colnames(samDF) != "ordIniVi", drop=FALSE]
141
142
143 ##------------------------------
144 ## Ending
145 ##------------------------------
146
147
148 ## saving
149 ##-------
150
151 datMN <- nrmMN
152
153 datDF <- cbind.data.frame(dataMatrix = colnames(datMN),
154 as.data.frame(t(datMN)))
155 write.table(datDF,
156 file = argVc["dataMatrix_out"],
157 quote = FALSE,
158 row.names = FALSE,
159 sep = "\t")
160
161 varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
162 varDF) ## not modified; for compatibility only
163 write.table(varDF,
164 file = argVc["variableMetadata_out"],
165 quote = FALSE,
166 row.names = FALSE,
167 sep = "\t")
168
169 simDF <- cbind.data.frame(samDF, as.data.frame(datMN)) ## for compatibility
170 simDF <- cbind.data.frame(names = rownames(simDF),
171 simDF)
172 write.table(simDF,
173 file = argVc["variable_for_simca"],
174 quote = FALSE,
175 row.names = FALSE,
176 sep = "\t")
177
178 res <- list(dataMatrix_raw = rawMN,
179 dataMatrix_normalized = nrmMN,
180 sampleMetadata = samDF)
181 save(res,
182 file = argVc["rdata_output"]) ## for compatibility
183
184 ## closing
185 ##--------
186
187 cat("\nEnd of '", modNamC, "' Galaxy module call: ",
188 as.character(Sys.time()), "\n", sep = "")
189
190 ## sink()
191
192 options(stringsAsFactors = strAsFacL)
193
194
195 rm(argVc)
196