comparison BC/batch_correction_all_loess_wrapper.R @ 4:23314e1192d4 draft default tip

Uploaded
author melpetera
date Thu, 14 Jan 2021 09:56:58 +0000
parents
children
comparison
equal deleted inserted replaced
3:73892ef177e3 4:23314e1192d4
1 #!/usr/bin/env Rscript
2
3 library(batch) ## necessary for parseCommandArgs function
4
5 ##------------------------------
6 ## test help option
7 ##------------------------------
8
9 # Prog. constants
10 argv.help <- commandArgs(trailingOnly = FALSE)
11 script.path <- sub("--file=", "", argv.help[grep("--file=", argv.help)])
12 prog.name <- basename(script.path)
13
14 # Test Help
15 if (length(grep('-h', argv.help)) > 0) {
16 cat("Usage: Rscript ",
17 prog.name,
18 "{args} \n",
19 "parameters: \n",
20 "\tdataMatrix {file}: set the input data matrix file (mandatory) \n",
21 "\tsampleMetadata {file}: set the input sample metadata file (mandatory) \n",
22 "\tvariableMetadata {file}: set the input variable metadata file (mandatory) \n",
23 "\tmethod {opt}: set the method; can set to \"all_loess_pool\" or \"all_loess_sample\" (mandatory) \n",
24 "\tspan {condition}: set the span condition; (mandatory) \n",
25 "\tdataMatrix_out {file}: set the output data matrix file (mandatory) \n",
26 "\tvariableMetadata_out {file}: set the output variable metadata file (mandatory) \n",
27 "\tgraph_output {file}: set the output graph file (mandatory) \n",
28 "\trdata_output {file}: set the output Rdata file (mandatory) \n",
29 "\tbatch_col_name {val}: the column name for batch. Default value is \"batch\".\n",
30 "\tinjection_order_col_name {val}: the column name for the injection order. Default value is \"injectionOrder\".\n",
31 "\tsample_type_col_name {val}: the column name for the sample types. Default value is \"sampleType\".\n",
32 "\tsample_type_tags {val}: the tags used inside the sample type column, defined as key/value pairs separated by commas (example: blank=blank,pool=pool,sample=sample).\n",
33 "\n")
34 quit(status = 0)
35 }
36
37 ##------------------------------
38 ## init. params
39 ##------------------------------
40
41 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
42
43 # Set default col names
44 if ( ! 'batch_col_name' %in% names(args))
45 args[['batch_col_name']] <- 'batch'
46 if ( ! 'injection_order_col_name' %in% names(args))
47 args[['injection_order_col_name']] <- 'injectionOrder'
48 if ( ! 'sample_type_col_name' %in% names(args))
49 args[['sample_type_col_name']] <- 'sampleType'
50 if ( ! 'sample_type_tags' %in% names(args))
51 args[['sample_type_tags']] <- 'blank=blank,pool=pool,sample=sample'
52
53 # Parse sample type tags
54 sample.type.tags <- list()
55 for (kv in strsplit(strsplit(args$sample_type_tags, ',')[[1]], '='))
56 sample.type.tags[[kv[[1]]]] <- kv[[2]]
57 if ( ! all(c('pool', 'blank', 'sample') %in% names(sample.type.tags)))
58 stop("All tags pool, blank and sample must be defined in option sampleTypeTags.")
59 args$sample_type_tags <- sample.type.tags
60
61 ##------------------------------
62 ## init. functions
63 ##------------------------------
64
65 source_local <- function(fname){
66 argv <- commandArgs(trailingOnly = FALSE)
67 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
68 source(paste(base_dir, fname, sep="/"))
69 }
70
71 ## Import the different functions
72 source_local("batch_correction_all_loess_script.R")
73
74 argVc <- unlist(args)
75
76 ## argVc["method"] is either 'all_loess_pool' or 'all_loess_sample'
77 ## alternative version developped by CEA
78 ## all variables are treated with loess
79 ## the reference observations for loess are either 'pool'
80 ## ('all_loess_pool') or 'sample' ('all_loess_sample')
81
82
83 ##------------------------------
84 ## Initializing
85 ##------------------------------
86
87 ## options
88 ##--------
89
90 strAsFacL <- options()$stringsAsFactors
91 options(stringsAsFactors = FALSE)
92
93 ## libraries
94 ##----------
95
96 suppressMessages(library(ropls))
97
98 if(packageVersion("ropls") < "1.4.0")
99 stop("Please use 'ropls' versions of 1.4.0 and above")
100
101 ## constants
102 ##----------
103
104 modNamC <- "Batch correction" ## module name
105
106 ## log file
107 ##---------
108
109 ## sink(argVc["information"]) ## not implemented
110
111 cat("\nStart of the '", modNamC, "' Galaxy module call: ",
112 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
113
114 ## loading
115 ##--------
116
117 rawMN <- t(as.matrix(read.table(argVc["dataMatrix"],
118 header = TRUE,
119 comment.char = '',
120 row.names = 1,
121 sep = "\t")))
122
123 samDF <- read.table(argVc["sampleMetadata"],
124 header = TRUE,
125 comment.char = '',
126 row.names = 1,
127 sep = "\t")
128
129 varDF <- read.table(argVc["variableMetadata"],
130 check.names = FALSE,
131 header = TRUE,
132 comment.char = '',
133 row.names = 1,
134 sep = "\t") ## not used; for compatibility only
135
136 refC <- tolower(gsub("all_loess_", "", argVc["method"]))
137
138 spnN <- as.numeric(argVc["span"])
139
140 ## checking
141 ##---------
142
143 stopifnot(refC %in% c('pool', 'sample'))
144 refC <- args$sample_type_tags[[refC]]
145
146 if(refC == args$sample_type_tags$pool &&
147 !any(args$sample_type_tags$pool %in% samDF[, args$sample_type_col_name]))
148 stop("No 'pool' found in the 'sampleType' column; use the samples as normalization reference instead")
149
150 refMN <- rawMN[samDF[, args$sample_type_col_name] == refC, ]
151 refNasZerVl <- apply(refMN, 2,
152 function(refVn)
153 all(sapply(refVn,
154 function(refN) {is.na(refN) || refN == 0})))
155
156 if(sum(refNasZerVl)) {
157
158 refNasZerVi <- which(refNasZerVl)
159 cat("The following variables have 'NA' or 0 values in all reference samples; they will be removed from the data:\n", sep = "")
160 rawMN <- rawMN[, !refNasZerVl, drop = FALSE]
161 varDF <- varDF[!refNasZerVl, , drop = FALSE]
162
163 }
164
165 ##------------------------------
166 ## Computation
167 ##------------------------------
168
169
170 ## ordering (batch and injection order)
171 ##-------------------------------------
172
173 samDF[, "ordIniVi"] <- 1:nrow(rawMN)
174 ordBatInjVi <- order(samDF[, args$batch_col_name], samDF[, args$injection_order_col_name])
175 rawMN <- rawMN[ordBatInjVi, ]
176 samDF <- samDF[ordBatInjVi, ]
177
178 ## signal drift and batch-effect correction
179 ##-----------------------------------------
180
181 nrmMN <- shiftBatchCorrectF(rawMN,
182 samDF,
183 refC,
184 spnN)
185
186 ## figure
187 ##-------
188
189 cat("\nPlotting\n")
190
191 pdf(argVc["graph_output"], onefile = TRUE, width = 11, height = 7)
192 plotBatchF(rawMN, samDF, spnN)
193 plotBatchF(nrmMN, samDF, spnN)
194 dev.off()
195
196 ## returning to initial order
197 ##---------------------------
198
199 ordIniVi <- order(samDF[, "ordIniVi"])
200 nrmMN <- nrmMN[ordIniVi, ]
201 samDF <- samDF[ordIniVi, ]
202 samDF <- samDF[, colnames(samDF) != "ordIniVi", drop=FALSE]
203
204
205 ##------------------------------
206 ## Ending
207 ##------------------------------
208
209
210 ## saving
211 ##-------
212
213 datMN <- nrmMN
214
215 datDF <- cbind.data.frame(dataMatrix = colnames(datMN),
216 as.data.frame(t(datMN)))
217 write.table(datDF,
218 file = argVc["dataMatrix_out"],
219 quote = FALSE,
220 row.names = FALSE,
221 sep = "\t")
222
223 varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
224 varDF) ## not modified; for compatibility only
225 write.table(varDF,
226 file = argVc["variableMetadata_out"],
227 quote = FALSE,
228 row.names = FALSE,
229 sep = "\t")
230
231
232 res <- list(dataMatrix_raw = rawMN,
233 dataMatrix_normalized = nrmMN,
234 sampleMetadata = samDF)
235 save(res,
236 file = argVc["rdata_output"]) ## for compatibility
237
238 ## closing
239 ##--------
240
241 cat("\nEnd of '", modNamC, "' Galaxy module call: ",
242 as.character(Sys.time()), "\n", sep = "")
243
244 ## sink()
245
246 options(stringsAsFactors = strAsFacL)
247
248
249 rm(argVc)
250