view batch_correction_wrapper.R @ 0:b74d1d533dea draft default tip

planemo upload for repository https://github.com/workflow4metabolomics/batchcorrection.git commit 241fb99a843e13195c5054cd9731e1561f039bde
author ethevenot
date Thu, 04 Aug 2016 11:40:35 -0400
parents
children
line wrap: on
line source

#!/usr/bin/Rscript --vanilla --slave --no-site-file

################################################################################################
# batch_correction_wrapper                                                                     #
#                                                                                              #
# Author: Marion LANDI / Jean-Francois MARTIN / Melanie Petera                                 #
# User: Galaxy                                                                                 #
# Original data: --                                                                            #
# Starting date: 22-07-2014                                                                    #
# Version 1: 22-07-2014                                                                        #
# Version 2: 08-12-2014                                                                        #
# Version 2.1: 09-01-2015 modification in Error message of sample matching                     #
# Version 2.2: 16-03-2015 inclusion of miniTools' functions for special characters             #
#                                                                                              #
#                                                                                              #
# Input files: dataMatrix.txt ; sampleMetadata.txt ; variableMetadata.txt (for DBC)            #
# Output files: graph_output.pdf ; corrected table ; diagnostic table                          #
#                                                                                              #
################################################################################################


library(batch) #necessary for parseCommandArgs function
args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects

source_local <- function(...){
	argv <- commandArgs(trailingOnly = FALSE)
	base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
	for(i in 1:length(list(...))){source(paste(base_dir, list(...)[[i]], sep="/"))}
}
#Import the different functions
source_local("Normalisation_QCpool.r","easyrlibrary-lib/RcheckLibrary.R","easyrlibrary-lib/miniTools.R")


## Reading of input files
idsample=read.table(args$sampleMetadata,header=T,sep='\t',check.names=FALSE)
iddata=read.table(args$dataMatrix,header=T,sep='\t',check.names=FALSE)

### Table match check 
table.check <- match2(iddata,idsample,"sample")

### StockID
samp.id <- stockID(iddata,idsample,"sample")
iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match

### Checking mandatory variables
mand.check <- ""
for(mandcol in c("sampleType","injectionOrder","batch")){
  if(!(mandcol%in%colnames(idsample))){
    mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n",
                    "Note: table must include this exact column name (it is case-sensitive).\n")
  }
}
if(length(mand.check)>1){check.err(paste(table.check,mand.check,sep=""))}

### Formating
idsample[[1]]=make.names(idsample[[1]])
dimnames(iddata)[[1]]=iddata[[1]]

### Transposition of ions data
idTdata=t(iddata[,2:dim(iddata)[2]])
idTdata=data.frame(dimnames(idTdata)[[1]],idTdata)
	
### Merge of 2 files (ok even if the two dataframe are not sorted on the same key)
id=merge(idsample, idTdata, by.x=1, by.y=1)

id$batch=as.factor(id$batch)
ids=id[id$sampleType == 'pool' | id$sampleType == 'sample',]
nbid=dim(idsample)[2]
	
### Checking the number of sample and pool
	
# least 2 samples
if(length(which(ids$sampleType == "sample"))<2){
	table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.",
	       "\nMake sure this is not due to errors in sampleType coding.\n")
}
	
# least 2 pools per batch for all batchs
B <- rep(0,length(levels(ids$batch)))
for(nbB in length(levels(ids$batch))){
	B[nbB]<-length(which(ids[which(ids$batch==(levels(ids$batch)[nbB])),]$sampleType == "pool"))
}
if(length(which(B>1))==0){
	table.check <- c(table.check,"\nError: less than 2 pools specified in each batch in sample metadata.",
	       "\nMake sure this is not due to errors in sampleType coding.\n")
}
	
### Factor of interest 
factbio=args$ref_factor


if(args$analyse == "batch_correction") {
	## Reading of Metadata Ions file
	metaion=read.table(args$variableMetadata,header=T,sep='\t',check.names=FALSE)
	## Table match check 
	table.check <- c(table.check,match2(iddata,metaion,"variable"))
	check.err(table.check)
	
	## variables
	detail=args$detail
	method=args$method
	
	## outputs
	outfic=args$variable_for_simca
	outlog=args$graph_output
	
	## Launch
	res = norm_QCpool(ids,nbid,outfic,outlog,factbio,metaion,detail,F,F,method,args$span)
	save(res, file=args$rdata_output)
	write.table(reproduceID(res[[1]],res[[3]],"sample",samp.id)$dataMatrix, file=args$dataMatrix_out, sep = '\t', row.names=F, quote=F)
	write.table(res[[2]], file=args$variableMetadata_out, sep = '\t', row.names=F, quote=F)
}else{
	## error check
	check.err(table.check)
	
	## outputs
	out_graph_pdf=args$out_graph_pdf
	out_preNormSummary=args$out_preNormSummary
	
	## Launch
	plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,args$span)
}

rm(args)