Mercurial > repos > melpetera > batchcorrection
comparison BC/batch_correction_all_loess_wrapper.R @ 4:23314e1192d4 draft default tip
Uploaded
author | melpetera |
---|---|
date | Thu, 14 Jan 2021 09:56:58 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:73892ef177e3 | 4:23314e1192d4 |
---|---|
1 #!/usr/bin/env Rscript | |
2 | |
3 library(batch) ## necessary for parseCommandArgs function | |
4 | |
5 ##------------------------------ | |
6 ## test help option | |
7 ##------------------------------ | |
8 | |
9 # Prog. constants | |
10 argv.help <- commandArgs(trailingOnly = FALSE) | |
11 script.path <- sub("--file=", "", argv.help[grep("--file=", argv.help)]) | |
12 prog.name <- basename(script.path) | |
13 | |
14 # Test Help | |
15 if (length(grep('-h', argv.help)) > 0) { | |
16 cat("Usage: Rscript ", | |
17 prog.name, | |
18 "{args} \n", | |
19 "parameters: \n", | |
20 "\tdataMatrix {file}: set the input data matrix file (mandatory) \n", | |
21 "\tsampleMetadata {file}: set the input sample metadata file (mandatory) \n", | |
22 "\tvariableMetadata {file}: set the input variable metadata file (mandatory) \n", | |
23 "\tmethod {opt}: set the method; can set to \"all_loess_pool\" or \"all_loess_sample\" (mandatory) \n", | |
24 "\tspan {condition}: set the span condition; (mandatory) \n", | |
25 "\tdataMatrix_out {file}: set the output data matrix file (mandatory) \n", | |
26 "\tvariableMetadata_out {file}: set the output variable metadata file (mandatory) \n", | |
27 "\tgraph_output {file}: set the output graph file (mandatory) \n", | |
28 "\trdata_output {file}: set the output Rdata file (mandatory) \n", | |
29 "\tbatch_col_name {val}: the column name for batch. Default value is \"batch\".\n", | |
30 "\tinjection_order_col_name {val}: the column name for the injection order. Default value is \"injectionOrder\".\n", | |
31 "\tsample_type_col_name {val}: the column name for the sample types. Default value is \"sampleType\".\n", | |
32 "\tsample_type_tags {val}: the tags used inside the sample type column, defined as key/value pairs separated by commas (example: blank=blank,pool=pool,sample=sample).\n", | |
33 "\n") | |
34 quit(status = 0) | |
35 } | |
36 | |
37 ##------------------------------ | |
38 ## init. params | |
39 ##------------------------------ | |
40 | |
41 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects | |
42 | |
43 # Set default col names | |
44 if ( ! 'batch_col_name' %in% names(args)) | |
45 args[['batch_col_name']] <- 'batch' | |
46 if ( ! 'injection_order_col_name' %in% names(args)) | |
47 args[['injection_order_col_name']] <- 'injectionOrder' | |
48 if ( ! 'sample_type_col_name' %in% names(args)) | |
49 args[['sample_type_col_name']] <- 'sampleType' | |
50 if ( ! 'sample_type_tags' %in% names(args)) | |
51 args[['sample_type_tags']] <- 'blank=blank,pool=pool,sample=sample' | |
52 | |
53 # Parse sample type tags | |
54 sample.type.tags <- list() | |
55 for (kv in strsplit(strsplit(args$sample_type_tags, ',')[[1]], '=')) | |
56 sample.type.tags[[kv[[1]]]] <- kv[[2]] | |
57 if ( ! all(c('pool', 'blank', 'sample') %in% names(sample.type.tags))) | |
58 stop("All tags pool, blank and sample must be defined in option sampleTypeTags.") | |
59 args$sample_type_tags <- sample.type.tags | |
60 | |
61 ##------------------------------ | |
62 ## init. functions | |
63 ##------------------------------ | |
64 | |
65 source_local <- function(fname){ | |
66 argv <- commandArgs(trailingOnly = FALSE) | |
67 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) | |
68 source(paste(base_dir, fname, sep="/")) | |
69 } | |
70 | |
71 ## Import the different functions | |
72 source_local("batch_correction_all_loess_script.R") | |
73 | |
74 argVc <- unlist(args) | |
75 | |
76 ## argVc["method"] is either 'all_loess_pool' or 'all_loess_sample' | |
77 ## alternative version developped by CEA | |
78 ## all variables are treated with loess | |
79 ## the reference observations for loess are either 'pool' | |
80 ## ('all_loess_pool') or 'sample' ('all_loess_sample') | |
81 | |
82 | |
83 ##------------------------------ | |
84 ## Initializing | |
85 ##------------------------------ | |
86 | |
87 ## options | |
88 ##-------- | |
89 | |
90 strAsFacL <- options()$stringsAsFactors | |
91 options(stringsAsFactors = FALSE) | |
92 | |
93 ## libraries | |
94 ##---------- | |
95 | |
96 suppressMessages(library(ropls)) | |
97 | |
98 if(packageVersion("ropls") < "1.4.0") | |
99 stop("Please use 'ropls' versions of 1.4.0 and above") | |
100 | |
101 ## constants | |
102 ##---------- | |
103 | |
104 modNamC <- "Batch correction" ## module name | |
105 | |
106 ## log file | |
107 ##--------- | |
108 | |
109 ## sink(argVc["information"]) ## not implemented | |
110 | |
111 cat("\nStart of the '", modNamC, "' Galaxy module call: ", | |
112 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") | |
113 | |
114 ## loading | |
115 ##-------- | |
116 | |
117 rawMN <- t(as.matrix(read.table(argVc["dataMatrix"], | |
118 header = TRUE, | |
119 comment.char = '', | |
120 row.names = 1, | |
121 sep = "\t"))) | |
122 | |
123 samDF <- read.table(argVc["sampleMetadata"], | |
124 header = TRUE, | |
125 comment.char = '', | |
126 row.names = 1, | |
127 sep = "\t") | |
128 | |
129 varDF <- read.table(argVc["variableMetadata"], | |
130 check.names = FALSE, | |
131 header = TRUE, | |
132 comment.char = '', | |
133 row.names = 1, | |
134 sep = "\t") ## not used; for compatibility only | |
135 | |
136 refC <- tolower(gsub("all_loess_", "", argVc["method"])) | |
137 | |
138 spnN <- as.numeric(argVc["span"]) | |
139 | |
140 ## checking | |
141 ##--------- | |
142 | |
143 stopifnot(refC %in% c('pool', 'sample')) | |
144 refC <- args$sample_type_tags[[refC]] | |
145 | |
146 if(refC == args$sample_type_tags$pool && | |
147 !any(args$sample_type_tags$pool %in% samDF[, args$sample_type_col_name])) | |
148 stop("No 'pool' found in the 'sampleType' column; use the samples as normalization reference instead") | |
149 | |
150 refMN <- rawMN[samDF[, args$sample_type_col_name] == refC, ] | |
151 refNasZerVl <- apply(refMN, 2, | |
152 function(refVn) | |
153 all(sapply(refVn, | |
154 function(refN) {is.na(refN) || refN == 0}))) | |
155 | |
156 if(sum(refNasZerVl)) { | |
157 | |
158 refNasZerVi <- which(refNasZerVl) | |
159 cat("The following variables have 'NA' or 0 values in all reference samples; they will be removed from the data:\n", sep = "") | |
160 rawMN <- rawMN[, !refNasZerVl, drop = FALSE] | |
161 varDF <- varDF[!refNasZerVl, , drop = FALSE] | |
162 | |
163 } | |
164 | |
165 ##------------------------------ | |
166 ## Computation | |
167 ##------------------------------ | |
168 | |
169 | |
170 ## ordering (batch and injection order) | |
171 ##------------------------------------- | |
172 | |
173 samDF[, "ordIniVi"] <- 1:nrow(rawMN) | |
174 ordBatInjVi <- order(samDF[, args$batch_col_name], samDF[, args$injection_order_col_name]) | |
175 rawMN <- rawMN[ordBatInjVi, ] | |
176 samDF <- samDF[ordBatInjVi, ] | |
177 | |
178 ## signal drift and batch-effect correction | |
179 ##----------------------------------------- | |
180 | |
181 nrmMN <- shiftBatchCorrectF(rawMN, | |
182 samDF, | |
183 refC, | |
184 spnN) | |
185 | |
186 ## figure | |
187 ##------- | |
188 | |
189 cat("\nPlotting\n") | |
190 | |
191 pdf(argVc["graph_output"], onefile = TRUE, width = 11, height = 7) | |
192 plotBatchF(rawMN, samDF, spnN) | |
193 plotBatchF(nrmMN, samDF, spnN) | |
194 dev.off() | |
195 | |
196 ## returning to initial order | |
197 ##--------------------------- | |
198 | |
199 ordIniVi <- order(samDF[, "ordIniVi"]) | |
200 nrmMN <- nrmMN[ordIniVi, ] | |
201 samDF <- samDF[ordIniVi, ] | |
202 samDF <- samDF[, colnames(samDF) != "ordIniVi", drop=FALSE] | |
203 | |
204 | |
205 ##------------------------------ | |
206 ## Ending | |
207 ##------------------------------ | |
208 | |
209 | |
210 ## saving | |
211 ##------- | |
212 | |
213 datMN <- nrmMN | |
214 | |
215 datDF <- cbind.data.frame(dataMatrix = colnames(datMN), | |
216 as.data.frame(t(datMN))) | |
217 write.table(datDF, | |
218 file = argVc["dataMatrix_out"], | |
219 quote = FALSE, | |
220 row.names = FALSE, | |
221 sep = "\t") | |
222 | |
223 varDF <- cbind.data.frame(variableMetadata = rownames(varDF), | |
224 varDF) ## not modified; for compatibility only | |
225 write.table(varDF, | |
226 file = argVc["variableMetadata_out"], | |
227 quote = FALSE, | |
228 row.names = FALSE, | |
229 sep = "\t") | |
230 | |
231 | |
232 res <- list(dataMatrix_raw = rawMN, | |
233 dataMatrix_normalized = nrmMN, | |
234 sampleMetadata = samDF) | |
235 save(res, | |
236 file = argVc["rdata_output"]) ## for compatibility | |
237 | |
238 ## closing | |
239 ##-------- | |
240 | |
241 cat("\nEnd of '", modNamC, "' Galaxy module call: ", | |
242 as.character(Sys.time()), "\n", sep = "") | |
243 | |
244 ## sink() | |
245 | |
246 options(stringsAsFactors = strAsFacL) | |
247 | |
248 | |
249 rm(argVc) | |
250 |