Mercurial > repos > melpetera > batchcorrection
diff BC/batch_correction_3Llauncher.R @ 4:23314e1192d4 draft default tip
Uploaded
author | melpetera |
---|---|
date | Thu, 14 Jan 2021 09:56:58 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BC/batch_correction_3Llauncher.R Thu Jan 14 09:56:58 2021 +0000 @@ -0,0 +1,146 @@ +############################################################################################################### +# batch_correction_3Llauncher # +# # +# Authors: Jean-Francois MARTIN / Melanie Petera # +# Starting date: 04-08-2020 # +# Based on batch_correction_wrapper.R version 2.91 # +# Version 1: 02-10-2020 # +# - split of tool-linked code and script-linked one # +# - handling of sample tags' parameters # +# - accepting samples beyond pools and samples # +# - dealing with special characters in IDs and column names # +# - adding a min.norm argument to the function # +# # +# Input files: dataMatrix.txt, sampleMetadata.txt, variableMetadata.txt (BC only) # +# Output files: graph.pdf, corrected table (BC only), diagnostic table (DBC only), variableMetadata (BC only) # +# # +############################################################################################################### + +meth3L <- function(idsample,iddata,sample_type_col_name,injection_order_col_name,batch_col_name,sample_type_tags, + factbio,analyse,metaion,detail,method,outlog,span,valnull, + rdata_output,dataMatrix_out,variableMetadata_out,out_graph_pdf,out_preNormSummary,min.norm){ + +## Import function +tab.import <- function(tested.file,tabtype){ + tab.res <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = ''), error=conditionMessage) + if(length(tab.res)==1){ + stop(paste("Could not import the",tabtype,"file. There may be issues in your table integrity.\nCorresponding R error message:\n",tab.res)) + }else{ + tab.comp <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = '',quote=""), error=conditionMessage) + if((length(tab.comp)!=1)&&(dim(tab.res)!=dim(tab.comp))){ # wrong original import due to quotes inside a column name + return(tab.comp) + }else{ return(tab.res) } + } +} + +## Reading of input files +idsample=tab.import(idsample,"sampleMetadata") +iddata=tab.import(iddata,"dataMatrix") + +### Table match check +table.check <- match2(iddata,idsample,"sample") +if(length(table.check)>1){check.err(table.check)} + +### StockID +samp.id <- stockID(iddata,idsample,"sample") +iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match + +### Checking mandatory variables +mand.check <- "" +for(mandcol in c(sample_type_col_name, injection_order_col_name, batch_col_name)){ + if(!(mandcol%in%colnames(idsample))){ + mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n", + "Note: column names are case-sensitive.\n") + } +} +if(length(mand.check)>1){ + mand.check <- c(mand.check,"\nFor more information, see the help section or:", + "\n http://workflow4metabolomics.org/sites/", + "workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n") + check.err(mand.check) +} + +if(analyse == "batch_correction") { + ## Reading of Metadata Ions file + metaion=read.table(metaion,header=T,sep='\t',check.names=FALSE,comment.char = '') + ## Table match check + table.check <- c(table.check,match2(iddata,metaion,"variable")) + ## StockID + var.id <- stockID(iddata,metaion,"variable") + iddata<-var.id$dataMatrix ; metaion<-var.id$Metadata ; var.id<-var.id$id.match +} + +### Formating +idsample[[1]]=make.names(idsample[[1]]) +dimnames(iddata)[[1]]=iddata[[1]] + +### Transposition of ions data +idTdata=t(iddata[,2:dim(iddata)[2]]) +idTdata=data.frame(dimnames(idTdata)[[1]],idTdata) + +### Merge of 2 files (ok even if the two dataframe are not sorted on the same key) +ids=merge(idsample, idTdata, by.x=1, by.y=1) + +ids[[batch_col_name]]=as.factor(ids[[batch_col_name]]) +nbid=dim(idsample)[2] + +### Checking the number of sample and pool + +# least 2 samples +if(length(which(ids[[sample_type_col_name]] %in% sample_type_tags$sample))<2){ + table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.", + "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n") +} + +# least 2 pools per batch for all batchs +B <- rep(0,length(levels(ids[[batch_col_name]]))) +for(nbB in 1:length(levels(ids[[batch_col_name]]))){ + B[nbB]<-length(which(ids[which(ids[[batch_col_name]]==(levels(ids[[batch_col_name]])[nbB])),,drop=FALSE][[sample_type_col_name]] %in% sample_type_tags$pool)) +} +if(length(which(B>1))==0){ + table.check <- c(table.check,"\nError: less than 2 pools specified in at least one batch in sample metadata.", + "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n") +} + +### Checking the unicity of samples and variables +uni.check <- function(tested.tab,tabtype,err.obj){ + unicity <- duplicated(tested.tab[,1]) + if(sum(unicity)>0){ + #Sending back an explicit error + duptable <- t(t(table(tested.tab[,1][unicity])+1)) + err.obj <- c(err.obj,paste0("\n-------\nError: your '",tabtype,"' IDs contain duplicates:\n"), + paste(rownames(duptable),duptable,sep=": ",collapse="\n"), + "\nSince identifiers are meant to be unique, please check your data.\n-------\n") + } + return(err.obj) +} +table.check <- uni.check(ids,"sample",table.check) +if(analyse == "batch_correction"){table.check <- uni.check(metaion,"variable",table.check)} + +## error check +check.err(table.check) + + +### BC/DBC-specific processing + +# Gathering mandatory information in a single object +sm.meta <- list(batch=batch_col_name, injectionOrder=injection_order_col_name, sampleType=sample_type_col_name, sampleTag=sample_type_tags) + +if(analyse == "batch_correction") { + ## Launch + res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,FALSE,FALSE,method,span,valnull,sm.meta,min.norm) + ## Get back original IDs + var.id <- reproduceID(res[[1]],res[[2]],"variable",var.id) + res[[1]] <- var.id$dataMatrix ; res[[2]] <- var.id$Metadata + samp.id <- reproduceID(res[[1]],res[[3]],"sample",samp.id) + res[[1]] <- samp.id$dataMatrix ; res[[3]] <- samp.id$Metadata + ## Save files + save(res, file=rdata_output) + write.table(res[[1]], file=dataMatrix_out, sep = '\t', row.names=FALSE, quote=FALSE) + write.table(res[[2]], file=variableMetadata_out, sep = '\t', row.names=FALSE, quote=FALSE) +}else{ + ## Launch + plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,span,sm.meta) +} + +}#end of meth3L \ No newline at end of file