Mercurial > repos > ethevenot > batchcorrection
diff batch_correction_all_loess_wrapper.R @ 0:b74d1d533dea draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/batchcorrection.git commit 241fb99a843e13195c5054cd9731e1561f039bde
author | ethevenot |
---|---|
date | Thu, 04 Aug 2016 11:40:35 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/batch_correction_all_loess_wrapper.R Thu Aug 04 11:40:35 2016 -0400 @@ -0,0 +1,196 @@ +#!/usr/bin/env Rscript + +library(batch) ## necessary for parseCommandArgs function +args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects + +source_local <- function(fname){ + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep="/")) +} + +## Import the different functions +source_local("batch_correction_all_loess_script.R") + +argVc <- unlist(args) + +## argVc["method"] is either 'all_loess_pool' or 'all_loess_sample' +## alternative version developped by CEA +## all variables are treated with loess +## the reference observations for loess are either 'pool' +## ('all_loess_pool') or 'sample' ('all_loess_sample') + + +##------------------------------ +## Initializing +##------------------------------ + +## options +##-------- + +strAsFacL <- options()$stringsAsFactors +options(stringsAsFactors = FALSE) + +## libraries +##---------- + +suppressMessages(library(ropls)) + +if(packageVersion("ropls") < "1.4.0") + stop("Please use 'ropls' versions of 1.4.0 and above") + +## constants +##---------- + +modNamC <- "Batch correction" ## module name + +## log file +##--------- + +## sink(argVc["information"]) ## not implemented + +cat("\nStart of the '", modNamC, "' Galaxy module call: ", + format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") + +## loading +##-------- + +rawMN <- t(as.matrix(read.table(argVc["dataMatrix"], + header = TRUE, + row.names = 1, + sep = "\t"))) + +samDF <- read.table(argVc["sampleMetadata"], + header = TRUE, + row.names = 1, + sep = "\t") + +varDF <- read.table(argVc["variableMetadata"], + check.names = FALSE, + header = TRUE, + row.names = 1, + sep = "\t") ## not used; for compatibility only + +refC <- tolower(gsub("all_loess_", "", argVc["method"])) + +spnN <- as.numeric(argVc["span"]) + + +## checking +##--------- + +stopifnot(refC %in% c("pool", "sample")) + +if(refC == "pool" && + !any("pool" %in% samDF[, "sampleType"])) + stop("No 'pool' found in the 'sampleType' column; use the samples as normalization reference instead") + +refMN <- rawMN[samDF[, "sampleType"] == refC, ] +refNasZerVl <- apply(refMN, 2, + function(refVn) + all(sapply(refVn, + function(refN) {is.na(refN) || refN == 0}))) + +if(sum(refNasZerVl)) { + + refNasZerVi <- which(refNasZerVl) + cat("The following variables have 'NA' or 0 values in all reference samples; they will be removed from the data:\n", sep = "") + print(refNasZerVi) + rawMN <- rawMN[, !refNasZerVl, drop = FALSE] + varDF <- varDF[!refNasZerVl, , drop = FALSE] + +} + +##------------------------------ +## Computation +##------------------------------ + +## ordering (batch and injection order) +##------------------------------------- + +samDF[, "ordIniVi"] <- 1:nrow(rawMN) +ordBatInjVi <- order(samDF[, "batch"], samDF[, "injectionOrder"]) +rawMN <- rawMN[ordBatInjVi, ] +samDF <- samDF[ordBatInjVi, ] + +## signal drift and batch-effect correction +##----------------------------------------- + +nrmMN <- shiftBatchCorrectF(rawMN, + samDF, + refC, + spnN) + +## figure +##------- + +cat("\nPlotting\n") + +pdf(argVc["graph_output"], onefile = TRUE, width = 11, height = 7) +plotBatchF(rawMN, samDF, spnN) +plotBatchF(nrmMN, samDF, spnN) +dev.off() + +## returning to initial order +##--------------------------- + +ordIniVi <- order(samDF[, "ordIniVi"]) +nrmMN <- nrmMN[ordIniVi, ] +samDF <- samDF[ordIniVi, ] +samDF <- samDF[, colnames(samDF) != "ordIniVi", drop=FALSE] + + +##------------------------------ +## Ending +##------------------------------ + + +## saving +##------- + +datMN <- nrmMN + +datDF <- cbind.data.frame(dataMatrix = colnames(datMN), + as.data.frame(t(datMN))) +write.table(datDF, + file = argVc["dataMatrix_out"], + quote = FALSE, + row.names = FALSE, + sep = "\t") + +varDF <- cbind.data.frame(variableMetadata = rownames(varDF), + varDF) ## not modified; for compatibility only +write.table(varDF, + file = argVc["variableMetadata_out"], + quote = FALSE, + row.names = FALSE, + sep = "\t") + +simDF <- cbind.data.frame(samDF, as.data.frame(datMN)) ## for compatibility +simDF <- cbind.data.frame(names = rownames(simDF), + simDF) +write.table(simDF, + file = argVc["variable_for_simca"], + quote = FALSE, + row.names = FALSE, + sep = "\t") + +res <- list(dataMatrix_raw = rawMN, + dataMatrix_normalized = nrmMN, + sampleMetadata = samDF) +save(res, + file = argVc["rdata_output"]) ## for compatibility + +## closing +##-------- + +cat("\nEnd of '", modNamC, "' Galaxy module call: ", + as.character(Sys.time()), "\n", sep = "") + +## sink() + +options(stringsAsFactors = strAsFacL) + + +rm(argVc) +