view IDchoice/IDchoice_script.R @ 0:b7a6a88f518a draft

Uploaded
author melpetera
date Thu, 11 Oct 2018 05:47:29 -0400
parents
children bb19b1d15732
line wrap: on
line source

################################################################################################
# ID CHOICE                                                                                    #
#                                                                                              #
# User: Galaxy                                                                                 #
# Starting date: 01-06-2017                                                                    #
# V-0.1: First version of code                                                                 #
#                                                                                              #
#                                                                                              #
# Input files: dataMatrix ; Metadata file                                                      #
# Output files: dataMatrix ; Metadata file                                                     #
#                                                                                              #
# Dependencies: RcheckLibrary.R ; miniTools.R  (easyRlibrary)                                  #
#                                                                                              #
################################################################################################

# Parameters (for dev)
if(FALSE){
  DM.name <- "CaracSpe_dataMatrix.txt"
  meta.name <- "CaracSpe_variableMetadata.txt"
  metype <- "variable"
  #coloname <- "namecustom"
  coloname <- "B"
  makeun <- "yes"
  DMout <- "ID_DM.txt"
  metaout <- paste0("ID_",metype,"meta.txt")
}


id.choice <- function(DM.name,meta.name,metype,coloname,makeun,DMout,metaout){
  # This function allows to replace original IDs with other ones from one metadata table.
  #
  # Parameters:
  # - DM.name, meta.name: dataMatrix and metadata files' access respectively
  # - metype: "sample" or "variable" depending on metadata content
  # - coloname: name of the metadata column to be used as new ID
  # - makeun: "yes" or "no" depending on user choice if new IDs are not unique ("yes"=conversion to unique ID)
  # - DMout, metaout: output files' access
  
  
# Input --------------------------------------------------------------

DM <- read.table(DM.name,header=TRUE,sep="\t",check.names=FALSE)
meta <- read.table(meta.name,header=TRUE,sep="\t",check.names=FALSE,colClasses="character")

# Table match check 
table.check <- match2(DM,meta,metype)
check.err(table.check)


# Checking unicity of new IDs ----------------------------------------

numcol <- which(colnames(meta)==coloname)
if(length(numcol)==0) {
      stop(paste0("\n-------\nWarning: no '",coloname,"' column detected in ",metype," metadata!",
                  "\nPlease check your metadata file (column names are case-sensitive).\n-------\n")) 
}

unicity <- duplicated(meta[,numcol])

if(sum(unicity)>0){
  if(makeun=="no"){
    #Sending back an explicit error
	duptable <- t(t(table(meta[,numcol][unicity])+1))
	stop(paste0("\n-------\nYour '",coloname,"' column contains duplicates:\n"),
		paste(rownames(duptable),duptable,sep=": ",collapse="\n"),paste0("\nSince identifiers are meant to be unique, ",
		"please check your data or use the 'Force unicity' option to force unicity.\n-------\n"))
	
  }else{
    #Making unique names
	meta <- cbind(meta,newID=make.unique(meta[,numcol],sep="_"),ori=c(1:nrow(meta)))
  }
}else{
  #No unicity problem
  meta <- cbind(meta,newID=meta[,numcol],ori=c(1:nrow(meta)))
}


# Merging tables -----------------------------------------------------

#Transposing the dataMatrix if necessary
if(metype=="sample"){
  rownames(DM) <- DM[,1]
  DM <- DM[,-1]
  DM <- t(DM)
  DM <- data.frame(sample=row.names(DM),DM,check.names=FALSE)
  rownames(DM) <- NULL
}

comb.data <- merge(x=meta,y=DM,by.x=1,by.y=1)
comb.data <- comb.data[order(comb.data$ori),]


# Changing IDs -------------------------------------------------------

DM <- comb.data[,-c(1:(ncol(meta)-2),ncol(meta))]
if(makeun=="no"){
  comb.data <- comb.data[,c(numcol,which(colnames(meta)!=coloname))]
  meta <- comb.data[,c(1:(ncol(meta)-2))]
}else{
  meta <- comb.data[,c(ncol(meta)-1,1:(ncol(meta)-2))]
}

#Transposing back the dataMatrix if necessary
if(metype=="sample"){
  rownames(DM) <- DM[,1]
  DM <- DM[,-1]
  DM <- t(DM)
  DM <- data.frame(sample=row.names(DM),DM,check.names=FALSE)
  rownames(DM) <- NULL
}


# Output -------------------------------------------------------------

# Writing the table
write.table(DM,DMout,sep="\t",quote=FALSE,row.names=FALSE)
write.table(meta,metaout,sep="\t",quote=FALSE,row.names=FALSE)


} # End of id.choice


# Typical function call
# id.choice(DM.name,meta.name,metype,coloname,makeun,DMout,metaout)