comparison batchcorrection-57edfd3943ab/batch_correction_all_loess_wrapper.R @ 3:73892ef177e3 draft

Uploaded
author melpetera
date Tue, 02 May 2017 09:47:22 -0400
parents
children
comparison
equal deleted inserted replaced
2:016780b192a6 3:73892ef177e3
1 #!/usr/bin/env Rscript
2
3 library(batch) ## necessary for parseCommandArgs function
4 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
5
6 source_local <- function(fname){
7 argv <- commandArgs(trailingOnly = FALSE)
8 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
9 source(paste(base_dir, fname, sep="/"))
10 }
11
12 ## Import the different functions
13 source_local("batch_correction_all_loess_script.R")
14
15 argVc <- unlist(args)
16
17 ## argVc["method"] is either 'all_loess_pool' or 'all_loess_sample'
18 ## alternative version developped by CEA
19 ## all variables are treated with loess
20 ## the reference observations for loess are either 'pool'
21 ## ('all_loess_pool') or 'sample' ('all_loess_sample')
22
23
24 ##------------------------------
25 ## Initializing
26 ##------------------------------
27
28 ## options
29 ##--------
30
31 strAsFacL <- options()$stringsAsFactors
32 options(stringsAsFactors = FALSE)
33
34 ## libraries
35 ##----------
36
37 suppressMessages(library(ropls))
38
39 if(packageVersion("ropls") < "1.4.0")
40 stop("Please use 'ropls' versions of 1.4.0 and above")
41
42 ## constants
43 ##----------
44
45 modNamC <- "Batch correction" ## module name
46
47 ## log file
48 ##---------
49
50 ## sink(argVc["information"]) ## not implemented
51
52 cat("\nStart of the '", modNamC, "' Galaxy module call: ",
53 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
54
55 ## loading
56 ##--------
57
58 rawMN <- t(as.matrix(read.table(argVc["dataMatrix"],
59 header = TRUE,
60 row.names = 1,
61 sep = "\t")))
62
63 samDF <- read.table(argVc["sampleMetadata"],
64 header = TRUE,
65 row.names = 1,
66 sep = "\t")
67
68 varDF <- read.table(argVc["variableMetadata"],
69 check.names = FALSE,
70 header = TRUE,
71 row.names = 1,
72 sep = "\t") ## not used; for compatibility only
73
74 refC <- tolower(gsub("all_loess_", "", argVc["method"]))
75
76 spnN <- as.numeric(argVc["span"])
77
78 ## checking
79 ##---------
80
81 stopifnot(refC %in% c("pool", "sample"))
82
83 if(refC == "pool" &&
84 !any("pool" %in% samDF[, "sampleType"]))
85 stop("No 'pool' found in the 'sampleType' column; use the samples as normalization reference instead")
86
87 refMN <- rawMN[samDF[, "sampleType"] == refC, ]
88 refNasZerVl <- apply(refMN, 2,
89 function(refVn)
90 all(sapply(refVn,
91 function(refN) {is.na(refN) || refN == 0})))
92
93 if(sum(refNasZerVl)) {
94
95 refNasZerVi <- which(refNasZerVl)
96 cat("The following variables have 'NA' or 0 values in all reference samples; they will be removed from the data:\n", sep = "")
97 print(refNasZerVi)
98 rawMN <- rawMN[, !refNasZerVl, drop = FALSE]
99 varDF <- varDF[!refNasZerVl, , drop = FALSE]
100
101 }
102
103 ##------------------------------
104 ## Computation
105 ##------------------------------
106
107
108 ## ordering (batch and injection order)
109 ##-------------------------------------
110
111 samDF[, "ordIniVi"] <- 1:nrow(rawMN)
112 ordBatInjVi <- order(samDF[, "batch"], samDF[, "injectionOrder"])
113 rawMN <- rawMN[ordBatInjVi, ]
114 samDF <- samDF[ordBatInjVi, ]
115
116 ## signal drift and batch-effect correction
117 ##-----------------------------------------
118
119 nrmMN <- shiftBatchCorrectF(rawMN,
120 samDF,
121 refC,
122 spnN)
123
124 ## figure
125 ##-------
126
127 cat("\nPlotting\n")
128
129 pdf(argVc["graph_output"], onefile = TRUE, width = 11, height = 7)
130 plotBatchF(rawMN, samDF, spnN)
131 plotBatchF(nrmMN, samDF, spnN)
132 dev.off()
133
134 ## returning to initial order
135 ##---------------------------
136
137 ordIniVi <- order(samDF[, "ordIniVi"])
138 nrmMN <- nrmMN[ordIniVi, ]
139 samDF <- samDF[ordIniVi, ]
140 samDF <- samDF[, colnames(samDF) != "ordIniVi", drop=FALSE]
141
142
143 ##------------------------------
144 ## Ending
145 ##------------------------------
146
147
148 ## saving
149 ##-------
150
151 datMN <- nrmMN
152
153 datDF <- cbind.data.frame(dataMatrix = colnames(datMN),
154 as.data.frame(t(datMN)))
155 write.table(datDF,
156 file = argVc["dataMatrix_out"],
157 quote = FALSE,
158 row.names = FALSE,
159 sep = "\t")
160
161 varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
162 varDF) ## not modified; for compatibility only
163 write.table(varDF,
164 file = argVc["variableMetadata_out"],
165 quote = FALSE,
166 row.names = FALSE,
167 sep = "\t")
168
169
170 res <- list(dataMatrix_raw = rawMN,
171 dataMatrix_normalized = nrmMN,
172 sampleMetadata = samDF)
173 save(res,
174 file = argVc["rdata_output"]) ## for compatibility
175
176 ## closing
177 ##--------
178
179 cat("\nEnd of '", modNamC, "' Galaxy module call: ",
180 as.character(Sys.time()), "\n", sep = "")
181
182 ## sink()
183
184 options(stringsAsFactors = strAsFacL)
185
186
187 rm(argVc)
188