Mercurial > repos > ethevenot > batchcorrection
comparison batch_correction_all_loess_wrapper.R @ 0:b74d1d533dea draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/batchcorrection.git commit 241fb99a843e13195c5054cd9731e1561f039bde
author | ethevenot |
---|---|
date | Thu, 04 Aug 2016 11:40:35 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b74d1d533dea |
---|---|
1 #!/usr/bin/env Rscript | |
2 | |
3 library(batch) ## necessary for parseCommandArgs function | |
4 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects | |
5 | |
6 source_local <- function(fname){ | |
7 argv <- commandArgs(trailingOnly = FALSE) | |
8 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) | |
9 source(paste(base_dir, fname, sep="/")) | |
10 } | |
11 | |
12 ## Import the different functions | |
13 source_local("batch_correction_all_loess_script.R") | |
14 | |
15 argVc <- unlist(args) | |
16 | |
17 ## argVc["method"] is either 'all_loess_pool' or 'all_loess_sample' | |
18 ## alternative version developped by CEA | |
19 ## all variables are treated with loess | |
20 ## the reference observations for loess are either 'pool' | |
21 ## ('all_loess_pool') or 'sample' ('all_loess_sample') | |
22 | |
23 | |
24 ##------------------------------ | |
25 ## Initializing | |
26 ##------------------------------ | |
27 | |
28 ## options | |
29 ##-------- | |
30 | |
31 strAsFacL <- options()$stringsAsFactors | |
32 options(stringsAsFactors = FALSE) | |
33 | |
34 ## libraries | |
35 ##---------- | |
36 | |
37 suppressMessages(library(ropls)) | |
38 | |
39 if(packageVersion("ropls") < "1.4.0") | |
40 stop("Please use 'ropls' versions of 1.4.0 and above") | |
41 | |
42 ## constants | |
43 ##---------- | |
44 | |
45 modNamC <- "Batch correction" ## module name | |
46 | |
47 ## log file | |
48 ##--------- | |
49 | |
50 ## sink(argVc["information"]) ## not implemented | |
51 | |
52 cat("\nStart of the '", modNamC, "' Galaxy module call: ", | |
53 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") | |
54 | |
55 ## loading | |
56 ##-------- | |
57 | |
58 rawMN <- t(as.matrix(read.table(argVc["dataMatrix"], | |
59 header = TRUE, | |
60 row.names = 1, | |
61 sep = "\t"))) | |
62 | |
63 samDF <- read.table(argVc["sampleMetadata"], | |
64 header = TRUE, | |
65 row.names = 1, | |
66 sep = "\t") | |
67 | |
68 varDF <- read.table(argVc["variableMetadata"], | |
69 check.names = FALSE, | |
70 header = TRUE, | |
71 row.names = 1, | |
72 sep = "\t") ## not used; for compatibility only | |
73 | |
74 refC <- tolower(gsub("all_loess_", "", argVc["method"])) | |
75 | |
76 spnN <- as.numeric(argVc["span"]) | |
77 | |
78 | |
79 ## checking | |
80 ##--------- | |
81 | |
82 stopifnot(refC %in% c("pool", "sample")) | |
83 | |
84 if(refC == "pool" && | |
85 !any("pool" %in% samDF[, "sampleType"])) | |
86 stop("No 'pool' found in the 'sampleType' column; use the samples as normalization reference instead") | |
87 | |
88 refMN <- rawMN[samDF[, "sampleType"] == refC, ] | |
89 refNasZerVl <- apply(refMN, 2, | |
90 function(refVn) | |
91 all(sapply(refVn, | |
92 function(refN) {is.na(refN) || refN == 0}))) | |
93 | |
94 if(sum(refNasZerVl)) { | |
95 | |
96 refNasZerVi <- which(refNasZerVl) | |
97 cat("The following variables have 'NA' or 0 values in all reference samples; they will be removed from the data:\n", sep = "") | |
98 print(refNasZerVi) | |
99 rawMN <- rawMN[, !refNasZerVl, drop = FALSE] | |
100 varDF <- varDF[!refNasZerVl, , drop = FALSE] | |
101 | |
102 } | |
103 | |
104 ##------------------------------ | |
105 ## Computation | |
106 ##------------------------------ | |
107 | |
108 ## ordering (batch and injection order) | |
109 ##------------------------------------- | |
110 | |
111 samDF[, "ordIniVi"] <- 1:nrow(rawMN) | |
112 ordBatInjVi <- order(samDF[, "batch"], samDF[, "injectionOrder"]) | |
113 rawMN <- rawMN[ordBatInjVi, ] | |
114 samDF <- samDF[ordBatInjVi, ] | |
115 | |
116 ## signal drift and batch-effect correction | |
117 ##----------------------------------------- | |
118 | |
119 nrmMN <- shiftBatchCorrectF(rawMN, | |
120 samDF, | |
121 refC, | |
122 spnN) | |
123 | |
124 ## figure | |
125 ##------- | |
126 | |
127 cat("\nPlotting\n") | |
128 | |
129 pdf(argVc["graph_output"], onefile = TRUE, width = 11, height = 7) | |
130 plotBatchF(rawMN, samDF, spnN) | |
131 plotBatchF(nrmMN, samDF, spnN) | |
132 dev.off() | |
133 | |
134 ## returning to initial order | |
135 ##--------------------------- | |
136 | |
137 ordIniVi <- order(samDF[, "ordIniVi"]) | |
138 nrmMN <- nrmMN[ordIniVi, ] | |
139 samDF <- samDF[ordIniVi, ] | |
140 samDF <- samDF[, colnames(samDF) != "ordIniVi", drop=FALSE] | |
141 | |
142 | |
143 ##------------------------------ | |
144 ## Ending | |
145 ##------------------------------ | |
146 | |
147 | |
148 ## saving | |
149 ##------- | |
150 | |
151 datMN <- nrmMN | |
152 | |
153 datDF <- cbind.data.frame(dataMatrix = colnames(datMN), | |
154 as.data.frame(t(datMN))) | |
155 write.table(datDF, | |
156 file = argVc["dataMatrix_out"], | |
157 quote = FALSE, | |
158 row.names = FALSE, | |
159 sep = "\t") | |
160 | |
161 varDF <- cbind.data.frame(variableMetadata = rownames(varDF), | |
162 varDF) ## not modified; for compatibility only | |
163 write.table(varDF, | |
164 file = argVc["variableMetadata_out"], | |
165 quote = FALSE, | |
166 row.names = FALSE, | |
167 sep = "\t") | |
168 | |
169 simDF <- cbind.data.frame(samDF, as.data.frame(datMN)) ## for compatibility | |
170 simDF <- cbind.data.frame(names = rownames(simDF), | |
171 simDF) | |
172 write.table(simDF, | |
173 file = argVc["variable_for_simca"], | |
174 quote = FALSE, | |
175 row.names = FALSE, | |
176 sep = "\t") | |
177 | |
178 res <- list(dataMatrix_raw = rawMN, | |
179 dataMatrix_normalized = nrmMN, | |
180 sampleMetadata = samDF) | |
181 save(res, | |
182 file = argVc["rdata_output"]) ## for compatibility | |
183 | |
184 ## closing | |
185 ##-------- | |
186 | |
187 cat("\nEnd of '", modNamC, "' Galaxy module call: ", | |
188 as.character(Sys.time()), "\n", sep = "") | |
189 | |
190 ## sink() | |
191 | |
192 options(stringsAsFactors = strAsFacL) | |
193 | |
194 | |
195 rm(argVc) | |
196 |