Mercurial > repos > melpetera > batchcorrection
view BC/batch_correction_3Llauncher.R @ 4:23314e1192d4 draft default tip
Uploaded
author | melpetera |
---|---|
date | Thu, 14 Jan 2021 09:56:58 +0000 |
parents | |
children |
line wrap: on
line source
############################################################################################################### # batch_correction_3Llauncher # # # # Authors: Jean-Francois MARTIN / Melanie Petera # # Starting date: 04-08-2020 # # Based on batch_correction_wrapper.R version 2.91 # # Version 1: 02-10-2020 # # - split of tool-linked code and script-linked one # # - handling of sample tags' parameters # # - accepting samples beyond pools and samples # # - dealing with special characters in IDs and column names # # - adding a min.norm argument to the function # # # # Input files: dataMatrix.txt, sampleMetadata.txt, variableMetadata.txt (BC only) # # Output files: graph.pdf, corrected table (BC only), diagnostic table (DBC only), variableMetadata (BC only) # # # ############################################################################################################### meth3L <- function(idsample,iddata,sample_type_col_name,injection_order_col_name,batch_col_name,sample_type_tags, factbio,analyse,metaion,detail,method,outlog,span,valnull, rdata_output,dataMatrix_out,variableMetadata_out,out_graph_pdf,out_preNormSummary,min.norm){ ## Import function tab.import <- function(tested.file,tabtype){ tab.res <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = ''), error=conditionMessage) if(length(tab.res)==1){ stop(paste("Could not import the",tabtype,"file. There may be issues in your table integrity.\nCorresponding R error message:\n",tab.res)) }else{ tab.comp <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = '',quote=""), error=conditionMessage) if((length(tab.comp)!=1)&&(dim(tab.res)!=dim(tab.comp))){ # wrong original import due to quotes inside a column name return(tab.comp) }else{ return(tab.res) } } } ## Reading of input files idsample=tab.import(idsample,"sampleMetadata") iddata=tab.import(iddata,"dataMatrix") ### Table match check table.check <- match2(iddata,idsample,"sample") if(length(table.check)>1){check.err(table.check)} ### StockID samp.id <- stockID(iddata,idsample,"sample") iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match ### Checking mandatory variables mand.check <- "" for(mandcol in c(sample_type_col_name, injection_order_col_name, batch_col_name)){ if(!(mandcol%in%colnames(idsample))){ mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n", "Note: column names are case-sensitive.\n") } } if(length(mand.check)>1){ mand.check <- c(mand.check,"\nFor more information, see the help section or:", "\n http://workflow4metabolomics.org/sites/", "workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n") check.err(mand.check) } if(analyse == "batch_correction") { ## Reading of Metadata Ions file metaion=read.table(metaion,header=T,sep='\t',check.names=FALSE,comment.char = '') ## Table match check table.check <- c(table.check,match2(iddata,metaion,"variable")) ## StockID var.id <- stockID(iddata,metaion,"variable") iddata<-var.id$dataMatrix ; metaion<-var.id$Metadata ; var.id<-var.id$id.match } ### Formating idsample[[1]]=make.names(idsample[[1]]) dimnames(iddata)[[1]]=iddata[[1]] ### Transposition of ions data idTdata=t(iddata[,2:dim(iddata)[2]]) idTdata=data.frame(dimnames(idTdata)[[1]],idTdata) ### Merge of 2 files (ok even if the two dataframe are not sorted on the same key) ids=merge(idsample, idTdata, by.x=1, by.y=1) ids[[batch_col_name]]=as.factor(ids[[batch_col_name]]) nbid=dim(idsample)[2] ### Checking the number of sample and pool # least 2 samples if(length(which(ids[[sample_type_col_name]] %in% sample_type_tags$sample))<2){ table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.", "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n") } # least 2 pools per batch for all batchs B <- rep(0,length(levels(ids[[batch_col_name]]))) for(nbB in 1:length(levels(ids[[batch_col_name]]))){ B[nbB]<-length(which(ids[which(ids[[batch_col_name]]==(levels(ids[[batch_col_name]])[nbB])),,drop=FALSE][[sample_type_col_name]] %in% sample_type_tags$pool)) } if(length(which(B>1))==0){ table.check <- c(table.check,"\nError: less than 2 pools specified in at least one batch in sample metadata.", "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n") } ### Checking the unicity of samples and variables uni.check <- function(tested.tab,tabtype,err.obj){ unicity <- duplicated(tested.tab[,1]) if(sum(unicity)>0){ #Sending back an explicit error duptable <- t(t(table(tested.tab[,1][unicity])+1)) err.obj <- c(err.obj,paste0("\n-------\nError: your '",tabtype,"' IDs contain duplicates:\n"), paste(rownames(duptable),duptable,sep=": ",collapse="\n"), "\nSince identifiers are meant to be unique, please check your data.\n-------\n") } return(err.obj) } table.check <- uni.check(ids,"sample",table.check) if(analyse == "batch_correction"){table.check <- uni.check(metaion,"variable",table.check)} ## error check check.err(table.check) ### BC/DBC-specific processing # Gathering mandatory information in a single object sm.meta <- list(batch=batch_col_name, injectionOrder=injection_order_col_name, sampleType=sample_type_col_name, sampleTag=sample_type_tags) if(analyse == "batch_correction") { ## Launch res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,FALSE,FALSE,method,span,valnull,sm.meta,min.norm) ## Get back original IDs var.id <- reproduceID(res[[1]],res[[2]],"variable",var.id) res[[1]] <- var.id$dataMatrix ; res[[2]] <- var.id$Metadata samp.id <- reproduceID(res[[1]],res[[3]],"sample",samp.id) res[[1]] <- samp.id$dataMatrix ; res[[3]] <- samp.id$Metadata ## Save files save(res, file=rdata_output) write.table(res[[1]], file=dataMatrix_out, sep = '\t', row.names=FALSE, quote=FALSE) write.table(res[[2]], file=variableMetadata_out, sep = '\t', row.names=FALSE, quote=FALSE) }else{ ## Launch plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,span,sm.meta) } }#end of meth3L