annotate batchcorrection-57edfd3943ab/batch_correction_all_loess_wrapper.R @ 3:73892ef177e3 draft

Uploaded
author melpetera
date Tue, 02 May 2017 09:47:22 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
1 #!/usr/bin/env Rscript
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
2
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
3 library(batch) ## necessary for parseCommandArgs function
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
4 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
5
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
6 source_local <- function(fname){
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
7 argv <- commandArgs(trailingOnly = FALSE)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
8 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
9 source(paste(base_dir, fname, sep="/"))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
10 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
11
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
12 ## Import the different functions
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
13 source_local("batch_correction_all_loess_script.R")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
14
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
15 argVc <- unlist(args)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
16
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
17 ## argVc["method"] is either 'all_loess_pool' or 'all_loess_sample'
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
18 ## alternative version developped by CEA
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
19 ## all variables are treated with loess
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
20 ## the reference observations for loess are either 'pool'
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
21 ## ('all_loess_pool') or 'sample' ('all_loess_sample')
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
22
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
23
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
24 ##------------------------------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
25 ## Initializing
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
26 ##------------------------------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
27
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
28 ## options
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
29 ##--------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
30
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
31 strAsFacL <- options()$stringsAsFactors
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
32 options(stringsAsFactors = FALSE)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
33
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
34 ## libraries
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
35 ##----------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
36
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
37 suppressMessages(library(ropls))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
38
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
39 if(packageVersion("ropls") < "1.4.0")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
40 stop("Please use 'ropls' versions of 1.4.0 and above")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
41
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
42 ## constants
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
43 ##----------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
44
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
45 modNamC <- "Batch correction" ## module name
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
46
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
47 ## log file
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
48 ##---------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
49
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
50 ## sink(argVc["information"]) ## not implemented
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
51
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
52 cat("\nStart of the '", modNamC, "' Galaxy module call: ",
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
53 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
54
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
55 ## loading
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
56 ##--------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
57
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
58 rawMN <- t(as.matrix(read.table(argVc["dataMatrix"],
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
59 header = TRUE,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
60 row.names = 1,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
61 sep = "\t")))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
62
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
63 samDF <- read.table(argVc["sampleMetadata"],
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
64 header = TRUE,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
65 row.names = 1,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
66 sep = "\t")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
67
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
68 varDF <- read.table(argVc["variableMetadata"],
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
69 check.names = FALSE,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
70 header = TRUE,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
71 row.names = 1,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
72 sep = "\t") ## not used; for compatibility only
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
73
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
74 refC <- tolower(gsub("all_loess_", "", argVc["method"]))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
75
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
76 spnN <- as.numeric(argVc["span"])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
77
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
78 ## checking
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
79 ##---------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
80
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
81 stopifnot(refC %in% c("pool", "sample"))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
82
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
83 if(refC == "pool" &&
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
84 !any("pool" %in% samDF[, "sampleType"]))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
85 stop("No 'pool' found in the 'sampleType' column; use the samples as normalization reference instead")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
86
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
87 refMN <- rawMN[samDF[, "sampleType"] == refC, ]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
88 refNasZerVl <- apply(refMN, 2,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
89 function(refVn)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
90 all(sapply(refVn,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
91 function(refN) {is.na(refN) || refN == 0})))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
92
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
93 if(sum(refNasZerVl)) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
94
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
95 refNasZerVi <- which(refNasZerVl)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
96 cat("The following variables have 'NA' or 0 values in all reference samples; they will be removed from the data:\n", sep = "")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
97 print(refNasZerVi)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
98 rawMN <- rawMN[, !refNasZerVl, drop = FALSE]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
99 varDF <- varDF[!refNasZerVl, , drop = FALSE]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
100
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
101 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
102
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
103 ##------------------------------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
104 ## Computation
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
105 ##------------------------------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
106
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
107
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
108 ## ordering (batch and injection order)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
109 ##-------------------------------------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
110
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
111 samDF[, "ordIniVi"] <- 1:nrow(rawMN)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
112 ordBatInjVi <- order(samDF[, "batch"], samDF[, "injectionOrder"])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
113 rawMN <- rawMN[ordBatInjVi, ]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
114 samDF <- samDF[ordBatInjVi, ]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
115
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
116 ## signal drift and batch-effect correction
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
117 ##-----------------------------------------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
118
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
119 nrmMN <- shiftBatchCorrectF(rawMN,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
120 samDF,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
121 refC,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
122 spnN)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
123
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
124 ## figure
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
125 ##-------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
126
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
127 cat("\nPlotting\n")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
128
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
129 pdf(argVc["graph_output"], onefile = TRUE, width = 11, height = 7)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
130 plotBatchF(rawMN, samDF, spnN)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
131 plotBatchF(nrmMN, samDF, spnN)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
132 dev.off()
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
133
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
134 ## returning to initial order
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
135 ##---------------------------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
136
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
137 ordIniVi <- order(samDF[, "ordIniVi"])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
138 nrmMN <- nrmMN[ordIniVi, ]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
139 samDF <- samDF[ordIniVi, ]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
140 samDF <- samDF[, colnames(samDF) != "ordIniVi", drop=FALSE]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
141
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
142
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
143 ##------------------------------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
144 ## Ending
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
145 ##------------------------------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
146
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
147
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
148 ## saving
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
149 ##-------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
150
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
151 datMN <- nrmMN
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
152
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
153 datDF <- cbind.data.frame(dataMatrix = colnames(datMN),
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
154 as.data.frame(t(datMN)))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
155 write.table(datDF,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
156 file = argVc["dataMatrix_out"],
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
157 quote = FALSE,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
158 row.names = FALSE,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
159 sep = "\t")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
160
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
161 varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
162 varDF) ## not modified; for compatibility only
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
163 write.table(varDF,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
164 file = argVc["variableMetadata_out"],
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
165 quote = FALSE,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
166 row.names = FALSE,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
167 sep = "\t")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
168
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
169
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
170 res <- list(dataMatrix_raw = rawMN,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
171 dataMatrix_normalized = nrmMN,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
172 sampleMetadata = samDF)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
173 save(res,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
174 file = argVc["rdata_output"]) ## for compatibility
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
175
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
176 ## closing
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
177 ##--------
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
178
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
179 cat("\nEnd of '", modNamC, "' Galaxy module call: ",
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
180 as.character(Sys.time()), "\n", sep = "")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
181
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
182 ## sink()
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
183
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
184 options(stringsAsFactors = strAsFacL)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
185
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
186
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
187 rm(argVc)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
188