3
|
1 #!/usr/bin/env Rscript
|
|
2
|
|
3 library(batch) ## necessary for parseCommandArgs function
|
|
4 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
|
|
5
|
|
6 source_local <- function(fname){
|
|
7 argv <- commandArgs(trailingOnly = FALSE)
|
|
8 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
|
|
9 source(paste(base_dir, fname, sep="/"))
|
|
10 }
|
|
11
|
|
12 ## Import the different functions
|
|
13 source_local("batch_correction_all_loess_script.R")
|
|
14
|
|
15 argVc <- unlist(args)
|
|
16
|
|
17 ## argVc["method"] is either 'all_loess_pool' or 'all_loess_sample'
|
|
18 ## alternative version developped by CEA
|
|
19 ## all variables are treated with loess
|
|
20 ## the reference observations for loess are either 'pool'
|
|
21 ## ('all_loess_pool') or 'sample' ('all_loess_sample')
|
|
22
|
|
23
|
|
24 ##------------------------------
|
|
25 ## Initializing
|
|
26 ##------------------------------
|
|
27
|
|
28 ## options
|
|
29 ##--------
|
|
30
|
|
31 strAsFacL <- options()$stringsAsFactors
|
|
32 options(stringsAsFactors = FALSE)
|
|
33
|
|
34 ## libraries
|
|
35 ##----------
|
|
36
|
|
37 suppressMessages(library(ropls))
|
|
38
|
|
39 if(packageVersion("ropls") < "1.4.0")
|
|
40 stop("Please use 'ropls' versions of 1.4.0 and above")
|
|
41
|
|
42 ## constants
|
|
43 ##----------
|
|
44
|
|
45 modNamC <- "Batch correction" ## module name
|
|
46
|
|
47 ## log file
|
|
48 ##---------
|
|
49
|
|
50 ## sink(argVc["information"]) ## not implemented
|
|
51
|
|
52 cat("\nStart of the '", modNamC, "' Galaxy module call: ",
|
|
53 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
|
|
54
|
|
55 ## loading
|
|
56 ##--------
|
|
57
|
|
58 rawMN <- t(as.matrix(read.table(argVc["dataMatrix"],
|
|
59 header = TRUE,
|
|
60 row.names = 1,
|
|
61 sep = "\t")))
|
|
62
|
|
63 samDF <- read.table(argVc["sampleMetadata"],
|
|
64 header = TRUE,
|
|
65 row.names = 1,
|
|
66 sep = "\t")
|
|
67
|
|
68 varDF <- read.table(argVc["variableMetadata"],
|
|
69 check.names = FALSE,
|
|
70 header = TRUE,
|
|
71 row.names = 1,
|
|
72 sep = "\t") ## not used; for compatibility only
|
|
73
|
|
74 refC <- tolower(gsub("all_loess_", "", argVc["method"]))
|
|
75
|
|
76 spnN <- as.numeric(argVc["span"])
|
|
77
|
|
78 ## checking
|
|
79 ##---------
|
|
80
|
|
81 stopifnot(refC %in% c("pool", "sample"))
|
|
82
|
|
83 if(refC == "pool" &&
|
|
84 !any("pool" %in% samDF[, "sampleType"]))
|
|
85 stop("No 'pool' found in the 'sampleType' column; use the samples as normalization reference instead")
|
|
86
|
|
87 refMN <- rawMN[samDF[, "sampleType"] == refC, ]
|
|
88 refNasZerVl <- apply(refMN, 2,
|
|
89 function(refVn)
|
|
90 all(sapply(refVn,
|
|
91 function(refN) {is.na(refN) || refN == 0})))
|
|
92
|
|
93 if(sum(refNasZerVl)) {
|
|
94
|
|
95 refNasZerVi <- which(refNasZerVl)
|
|
96 cat("The following variables have 'NA' or 0 values in all reference samples; they will be removed from the data:\n", sep = "")
|
|
97 print(refNasZerVi)
|
|
98 rawMN <- rawMN[, !refNasZerVl, drop = FALSE]
|
|
99 varDF <- varDF[!refNasZerVl, , drop = FALSE]
|
|
100
|
|
101 }
|
|
102
|
|
103 ##------------------------------
|
|
104 ## Computation
|
|
105 ##------------------------------
|
|
106
|
|
107
|
|
108 ## ordering (batch and injection order)
|
|
109 ##-------------------------------------
|
|
110
|
|
111 samDF[, "ordIniVi"] <- 1:nrow(rawMN)
|
|
112 ordBatInjVi <- order(samDF[, "batch"], samDF[, "injectionOrder"])
|
|
113 rawMN <- rawMN[ordBatInjVi, ]
|
|
114 samDF <- samDF[ordBatInjVi, ]
|
|
115
|
|
116 ## signal drift and batch-effect correction
|
|
117 ##-----------------------------------------
|
|
118
|
|
119 nrmMN <- shiftBatchCorrectF(rawMN,
|
|
120 samDF,
|
|
121 refC,
|
|
122 spnN)
|
|
123
|
|
124 ## figure
|
|
125 ##-------
|
|
126
|
|
127 cat("\nPlotting\n")
|
|
128
|
|
129 pdf(argVc["graph_output"], onefile = TRUE, width = 11, height = 7)
|
|
130 plotBatchF(rawMN, samDF, spnN)
|
|
131 plotBatchF(nrmMN, samDF, spnN)
|
|
132 dev.off()
|
|
133
|
|
134 ## returning to initial order
|
|
135 ##---------------------------
|
|
136
|
|
137 ordIniVi <- order(samDF[, "ordIniVi"])
|
|
138 nrmMN <- nrmMN[ordIniVi, ]
|
|
139 samDF <- samDF[ordIniVi, ]
|
|
140 samDF <- samDF[, colnames(samDF) != "ordIniVi", drop=FALSE]
|
|
141
|
|
142
|
|
143 ##------------------------------
|
|
144 ## Ending
|
|
145 ##------------------------------
|
|
146
|
|
147
|
|
148 ## saving
|
|
149 ##-------
|
|
150
|
|
151 datMN <- nrmMN
|
|
152
|
|
153 datDF <- cbind.data.frame(dataMatrix = colnames(datMN),
|
|
154 as.data.frame(t(datMN)))
|
|
155 write.table(datDF,
|
|
156 file = argVc["dataMatrix_out"],
|
|
157 quote = FALSE,
|
|
158 row.names = FALSE,
|
|
159 sep = "\t")
|
|
160
|
|
161 varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
|
|
162 varDF) ## not modified; for compatibility only
|
|
163 write.table(varDF,
|
|
164 file = argVc["variableMetadata_out"],
|
|
165 quote = FALSE,
|
|
166 row.names = FALSE,
|
|
167 sep = "\t")
|
|
168
|
|
169
|
|
170 res <- list(dataMatrix_raw = rawMN,
|
|
171 dataMatrix_normalized = nrmMN,
|
|
172 sampleMetadata = samDF)
|
|
173 save(res,
|
|
174 file = argVc["rdata_output"]) ## for compatibility
|
|
175
|
|
176 ## closing
|
|
177 ##--------
|
|
178
|
|
179 cat("\nEnd of '", modNamC, "' Galaxy module call: ",
|
|
180 as.character(Sys.time()), "\n", sep = "")
|
|
181
|
|
182 ## sink()
|
|
183
|
|
184 options(stringsAsFactors = strAsFacL)
|
|
185
|
|
186
|
|
187 rm(argVc)
|
|
188
|