view BC/batch_correction_3Llauncher.R @ 4:23314e1192d4 draft default tip

Uploaded
author melpetera
date Thu, 14 Jan 2021 09:56:58 +0000
parents
children
line wrap: on
line source

###############################################################################################################
# batch_correction_3Llauncher                                                                                 #
#                                                                                                             #
# Authors: Jean-Francois MARTIN / Melanie Petera                                                              #
# Starting date: 04-08-2020                                                                                   #
# Based on batch_correction_wrapper.R version 2.91                                                            #
# Version 1: 02-10-2020                                                                                       #
#            - split of tool-linked code and script-linked one                                                #
#            - handling of sample tags' parameters                                                            #
#            - accepting samples beyond pools and samples                                                     #
#            - dealing with special characters in IDs and column names                                        #
#            - adding a min.norm argument to the function                                                     #
#                                                                                                             #
# Input files: dataMatrix.txt, sampleMetadata.txt, variableMetadata.txt (BC only)                             #
# Output files: graph.pdf, corrected table (BC only), diagnostic table (DBC only), variableMetadata (BC only) #
#                                                                                                             #
###############################################################################################################

meth3L <- function(idsample,iddata,sample_type_col_name,injection_order_col_name,batch_col_name,sample_type_tags,
                   factbio,analyse,metaion,detail,method,outlog,span,valnull,
                   rdata_output,dataMatrix_out,variableMetadata_out,out_graph_pdf,out_preNormSummary,min.norm){

## Import function
tab.import <- function(tested.file,tabtype){
  tab.res <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = ''), error=conditionMessage)
  if(length(tab.res)==1){
    stop(paste("Could not import the",tabtype,"file. There may be issues in your table integrity.\nCorresponding R error message:\n",tab.res))
  }else{
    tab.comp <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = '',quote=""), error=conditionMessage)
    if((length(tab.comp)!=1)&&(dim(tab.res)!=dim(tab.comp))){ # wrong original import due to quotes inside a column name
      return(tab.comp)
    }else{ return(tab.res) }
  }
}

## Reading of input files
idsample=tab.import(idsample,"sampleMetadata")
iddata=tab.import(iddata,"dataMatrix")

### Table match check 
table.check <- match2(iddata,idsample,"sample")
if(length(table.check)>1){check.err(table.check)}

### StockID
samp.id <- stockID(iddata,idsample,"sample")
iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match

### Checking mandatory variables
mand.check <- ""
for(mandcol in c(sample_type_col_name, injection_order_col_name, batch_col_name)){
  if(!(mandcol%in%colnames(idsample))){
    mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n",
                    "Note: column names are case-sensitive.\n")
  }
}
if(length(mand.check)>1){
  mand.check <- c(mand.check,"\nFor more information, see the help section or:",
                  "\n http://workflow4metabolomics.org/sites/",
                  "workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n")
  check.err(mand.check)
}

if(analyse == "batch_correction") {
    ## Reading of Metadata Ions file
    metaion=read.table(metaion,header=T,sep='\t',check.names=FALSE,comment.char = '')
    ## Table match check 
    table.check <- c(table.check,match2(iddata,metaion,"variable"))
    ## StockID
    var.id <- stockID(iddata,metaion,"variable")
    iddata<-var.id$dataMatrix ; metaion<-var.id$Metadata ; var.id<-var.id$id.match
}

### Formating
idsample[[1]]=make.names(idsample[[1]])
dimnames(iddata)[[1]]=iddata[[1]]

### Transposition of ions data
idTdata=t(iddata[,2:dim(iddata)[2]])
idTdata=data.frame(dimnames(idTdata)[[1]],idTdata)

### Merge of 2 files (ok even if the two dataframe are not sorted on the same key)
ids=merge(idsample, idTdata, by.x=1, by.y=1)

ids[[batch_col_name]]=as.factor(ids[[batch_col_name]])
nbid=dim(idsample)[2]

### Checking the number of sample and pool

# least 2 samples
if(length(which(ids[[sample_type_col_name]] %in% sample_type_tags$sample))<2){
    table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.",
           "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n")
}

# least 2 pools per batch for all batchs
B <- rep(0,length(levels(ids[[batch_col_name]])))
for(nbB in 1:length(levels(ids[[batch_col_name]]))){
    B[nbB]<-length(which(ids[which(ids[[batch_col_name]]==(levels(ids[[batch_col_name]])[nbB])),,drop=FALSE][[sample_type_col_name]] %in% sample_type_tags$pool))
}
if(length(which(B>1))==0){
    table.check <- c(table.check,"\nError: less than 2 pools specified in at least one batch in sample metadata.",
           "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n")
}

### Checking the unicity of samples and variables
uni.check <- function(tested.tab,tabtype,err.obj){
  unicity <- duplicated(tested.tab[,1])
  if(sum(unicity)>0){
    #Sending back an explicit error
    duptable <- t(t(table(tested.tab[,1][unicity])+1))
    err.obj <- c(err.obj,paste0("\n-------\nError: your '",tabtype,"' IDs contain duplicates:\n"),
                 paste(rownames(duptable),duptable,sep=": ",collapse="\n"),
                 "\nSince identifiers are meant to be unique, please check your data.\n-------\n")
  }
  return(err.obj)
}
table.check <- uni.check(ids,"sample",table.check)
if(analyse == "batch_correction"){table.check <- uni.check(metaion,"variable",table.check)}

## error check
check.err(table.check)


### BC/DBC-specific processing

# Gathering mandatory information in a single object
sm.meta <- list(batch=batch_col_name, injectionOrder=injection_order_col_name, sampleType=sample_type_col_name, sampleTag=sample_type_tags)

if(analyse == "batch_correction") {
    ## Launch
    res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,FALSE,FALSE,method,span,valnull,sm.meta,min.norm)
    ## Get back original IDs
    var.id <- reproduceID(res[[1]],res[[2]],"variable",var.id)
    res[[1]] <- var.id$dataMatrix ; res[[2]] <- var.id$Metadata
    samp.id <- reproduceID(res[[1]],res[[3]],"sample",samp.id)
    res[[1]] <- samp.id$dataMatrix ; res[[3]] <- samp.id$Metadata
    ## Save files
    save(res, file=rdata_output)
    write.table(res[[1]], file=dataMatrix_out, sep = '\t', row.names=FALSE, quote=FALSE)
    write.table(res[[2]], file=variableMetadata_out, sep = '\t', row.names=FALSE, quote=FALSE)
}else{
    ## Launch
    plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,span,sm.meta)
}

}#end of meth3L