4
|
1 #!/usr/bin/env Rscript
|
|
2
|
|
3 library(batch) ## necessary for parseCommandArgs function
|
|
4
|
|
5 ##------------------------------
|
|
6 ## test help option
|
|
7 ##------------------------------
|
|
8
|
|
9 # Prog. constants
|
|
10 argv.help <- commandArgs(trailingOnly = FALSE)
|
|
11 script.path <- sub("--file=", "", argv.help[grep("--file=", argv.help)])
|
|
12 prog.name <- basename(script.path)
|
|
13
|
|
14 # Test Help
|
|
15 if (length(grep('-h', argv.help)) > 0) {
|
|
16 cat("Usage: Rscript ",
|
|
17 prog.name,
|
|
18 "{args} \n",
|
|
19 "parameters: \n",
|
|
20 "\tdataMatrix {file}: set the input data matrix file (mandatory) \n",
|
|
21 "\tsampleMetadata {file}: set the input sample metadata file (mandatory) \n",
|
|
22 "\tvariableMetadata {file}: set the input variable metadata file (mandatory) \n",
|
|
23 "\tmethod {opt}: set the method; can set to \"all_loess_pool\" or \"all_loess_sample\" (mandatory) \n",
|
|
24 "\tspan {condition}: set the span condition; (mandatory) \n",
|
|
25 "\tdataMatrix_out {file}: set the output data matrix file (mandatory) \n",
|
|
26 "\tvariableMetadata_out {file}: set the output variable metadata file (mandatory) \n",
|
|
27 "\tgraph_output {file}: set the output graph file (mandatory) \n",
|
|
28 "\trdata_output {file}: set the output Rdata file (mandatory) \n",
|
|
29 "\tbatch_col_name {val}: the column name for batch. Default value is \"batch\".\n",
|
|
30 "\tinjection_order_col_name {val}: the column name for the injection order. Default value is \"injectionOrder\".\n",
|
|
31 "\tsample_type_col_name {val}: the column name for the sample types. Default value is \"sampleType\".\n",
|
|
32 "\tsample_type_tags {val}: the tags used inside the sample type column, defined as key/value pairs separated by commas (example: blank=blank,pool=pool,sample=sample).\n",
|
|
33 "\n")
|
|
34 quit(status = 0)
|
|
35 }
|
|
36
|
|
37 ##------------------------------
|
|
38 ## init. params
|
|
39 ##------------------------------
|
|
40
|
|
41 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
|
|
42
|
|
43 # Set default col names
|
|
44 if ( ! 'batch_col_name' %in% names(args))
|
|
45 args[['batch_col_name']] <- 'batch'
|
|
46 if ( ! 'injection_order_col_name' %in% names(args))
|
|
47 args[['injection_order_col_name']] <- 'injectionOrder'
|
|
48 if ( ! 'sample_type_col_name' %in% names(args))
|
|
49 args[['sample_type_col_name']] <- 'sampleType'
|
|
50 if ( ! 'sample_type_tags' %in% names(args))
|
|
51 args[['sample_type_tags']] <- 'blank=blank,pool=pool,sample=sample'
|
|
52
|
|
53 # Parse sample type tags
|
|
54 sample.type.tags <- list()
|
|
55 for (kv in strsplit(strsplit(args$sample_type_tags, ',')[[1]], '='))
|
|
56 sample.type.tags[[kv[[1]]]] <- kv[[2]]
|
|
57 if ( ! all(c('pool', 'blank', 'sample') %in% names(sample.type.tags)))
|
|
58 stop("All tags pool, blank and sample must be defined in option sampleTypeTags.")
|
|
59 args$sample_type_tags <- sample.type.tags
|
|
60
|
|
61 ##------------------------------
|
|
62 ## init. functions
|
|
63 ##------------------------------
|
|
64
|
|
65 source_local <- function(fname){
|
|
66 argv <- commandArgs(trailingOnly = FALSE)
|
|
67 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
|
|
68 source(paste(base_dir, fname, sep="/"))
|
|
69 }
|
|
70
|
|
71 ## Import the different functions
|
|
72 source_local("batch_correction_all_loess_script.R")
|
|
73
|
|
74 argVc <- unlist(args)
|
|
75
|
|
76 ## argVc["method"] is either 'all_loess_pool' or 'all_loess_sample'
|
|
77 ## alternative version developped by CEA
|
|
78 ## all variables are treated with loess
|
|
79 ## the reference observations for loess are either 'pool'
|
|
80 ## ('all_loess_pool') or 'sample' ('all_loess_sample')
|
|
81
|
|
82
|
|
83 ##------------------------------
|
|
84 ## Initializing
|
|
85 ##------------------------------
|
|
86
|
|
87 ## options
|
|
88 ##--------
|
|
89
|
|
90 strAsFacL <- options()$stringsAsFactors
|
|
91 options(stringsAsFactors = FALSE)
|
|
92
|
|
93 ## libraries
|
|
94 ##----------
|
|
95
|
|
96 suppressMessages(library(ropls))
|
|
97
|
|
98 if(packageVersion("ropls") < "1.4.0")
|
|
99 stop("Please use 'ropls' versions of 1.4.0 and above")
|
|
100
|
|
101 ## constants
|
|
102 ##----------
|
|
103
|
|
104 modNamC <- "Batch correction" ## module name
|
|
105
|
|
106 ## log file
|
|
107 ##---------
|
|
108
|
|
109 ## sink(argVc["information"]) ## not implemented
|
|
110
|
|
111 cat("\nStart of the '", modNamC, "' Galaxy module call: ",
|
|
112 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
|
|
113
|
|
114 ## loading
|
|
115 ##--------
|
|
116
|
|
117 rawMN <- t(as.matrix(read.table(argVc["dataMatrix"],
|
|
118 header = TRUE,
|
|
119 comment.char = '',
|
|
120 row.names = 1,
|
|
121 sep = "\t")))
|
|
122
|
|
123 samDF <- read.table(argVc["sampleMetadata"],
|
|
124 header = TRUE,
|
|
125 comment.char = '',
|
|
126 row.names = 1,
|
|
127 sep = "\t")
|
|
128
|
|
129 varDF <- read.table(argVc["variableMetadata"],
|
|
130 check.names = FALSE,
|
|
131 header = TRUE,
|
|
132 comment.char = '',
|
|
133 row.names = 1,
|
|
134 sep = "\t") ## not used; for compatibility only
|
|
135
|
|
136 refC <- tolower(gsub("all_loess_", "", argVc["method"]))
|
|
137
|
|
138 spnN <- as.numeric(argVc["span"])
|
|
139
|
|
140 ## checking
|
|
141 ##---------
|
|
142
|
|
143 stopifnot(refC %in% c('pool', 'sample'))
|
|
144 refC <- args$sample_type_tags[[refC]]
|
|
145
|
|
146 if(refC == args$sample_type_tags$pool &&
|
|
147 !any(args$sample_type_tags$pool %in% samDF[, args$sample_type_col_name]))
|
|
148 stop("No 'pool' found in the 'sampleType' column; use the samples as normalization reference instead")
|
|
149
|
|
150 refMN <- rawMN[samDF[, args$sample_type_col_name] == refC, ]
|
|
151 refNasZerVl <- apply(refMN, 2,
|
|
152 function(refVn)
|
|
153 all(sapply(refVn,
|
|
154 function(refN) {is.na(refN) || refN == 0})))
|
|
155
|
|
156 if(sum(refNasZerVl)) {
|
|
157
|
|
158 refNasZerVi <- which(refNasZerVl)
|
|
159 cat("The following variables have 'NA' or 0 values in all reference samples; they will be removed from the data:\n", sep = "")
|
|
160 rawMN <- rawMN[, !refNasZerVl, drop = FALSE]
|
|
161 varDF <- varDF[!refNasZerVl, , drop = FALSE]
|
|
162
|
|
163 }
|
|
164
|
|
165 ##------------------------------
|
|
166 ## Computation
|
|
167 ##------------------------------
|
|
168
|
|
169
|
|
170 ## ordering (batch and injection order)
|
|
171 ##-------------------------------------
|
|
172
|
|
173 samDF[, "ordIniVi"] <- 1:nrow(rawMN)
|
|
174 ordBatInjVi <- order(samDF[, args$batch_col_name], samDF[, args$injection_order_col_name])
|
|
175 rawMN <- rawMN[ordBatInjVi, ]
|
|
176 samDF <- samDF[ordBatInjVi, ]
|
|
177
|
|
178 ## signal drift and batch-effect correction
|
|
179 ##-----------------------------------------
|
|
180
|
|
181 nrmMN <- shiftBatchCorrectF(rawMN,
|
|
182 samDF,
|
|
183 refC,
|
|
184 spnN)
|
|
185
|
|
186 ## figure
|
|
187 ##-------
|
|
188
|
|
189 cat("\nPlotting\n")
|
|
190
|
|
191 pdf(argVc["graph_output"], onefile = TRUE, width = 11, height = 7)
|
|
192 plotBatchF(rawMN, samDF, spnN)
|
|
193 plotBatchF(nrmMN, samDF, spnN)
|
|
194 dev.off()
|
|
195
|
|
196 ## returning to initial order
|
|
197 ##---------------------------
|
|
198
|
|
199 ordIniVi <- order(samDF[, "ordIniVi"])
|
|
200 nrmMN <- nrmMN[ordIniVi, ]
|
|
201 samDF <- samDF[ordIniVi, ]
|
|
202 samDF <- samDF[, colnames(samDF) != "ordIniVi", drop=FALSE]
|
|
203
|
|
204
|
|
205 ##------------------------------
|
|
206 ## Ending
|
|
207 ##------------------------------
|
|
208
|
|
209
|
|
210 ## saving
|
|
211 ##-------
|
|
212
|
|
213 datMN <- nrmMN
|
|
214
|
|
215 datDF <- cbind.data.frame(dataMatrix = colnames(datMN),
|
|
216 as.data.frame(t(datMN)))
|
|
217 write.table(datDF,
|
|
218 file = argVc["dataMatrix_out"],
|
|
219 quote = FALSE,
|
|
220 row.names = FALSE,
|
|
221 sep = "\t")
|
|
222
|
|
223 varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
|
|
224 varDF) ## not modified; for compatibility only
|
|
225 write.table(varDF,
|
|
226 file = argVc["variableMetadata_out"],
|
|
227 quote = FALSE,
|
|
228 row.names = FALSE,
|
|
229 sep = "\t")
|
|
230
|
|
231
|
|
232 res <- list(dataMatrix_raw = rawMN,
|
|
233 dataMatrix_normalized = nrmMN,
|
|
234 sampleMetadata = samDF)
|
|
235 save(res,
|
|
236 file = argVc["rdata_output"]) ## for compatibility
|
|
237
|
|
238 ## closing
|
|
239 ##--------
|
|
240
|
|
241 cat("\nEnd of '", modNamC, "' Galaxy module call: ",
|
|
242 as.character(Sys.time()), "\n", sep = "")
|
|
243
|
|
244 ## sink()
|
|
245
|
|
246 options(stringsAsFactors = strAsFacL)
|
|
247
|
|
248
|
|
249 rm(argVc)
|
|
250
|