diff IDchoice/IDchoice_script.R @ 0:b7a6a88f518a draft

Uploaded
author melpetera
date Thu, 11 Oct 2018 05:47:29 -0400
parents
children bb19b1d15732
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/IDchoice/IDchoice_script.R	Thu Oct 11 05:47:29 2018 -0400
@@ -0,0 +1,124 @@
+################################################################################################
+# ID CHOICE                                                                                    #
+#                                                                                              #
+# User: Galaxy                                                                                 #
+# Starting date: 01-06-2017                                                                    #
+# V-0.1: First version of code                                                                 #
+#                                                                                              #
+#                                                                                              #
+# Input files: dataMatrix ; Metadata file                                                      #
+# Output files: dataMatrix ; Metadata file                                                     #
+#                                                                                              #
+# Dependencies: RcheckLibrary.R ; miniTools.R  (easyRlibrary)                                  #
+#                                                                                              #
+################################################################################################
+
+# Parameters (for dev)
+if(FALSE){
+  DM.name <- "CaracSpe_dataMatrix.txt"
+  meta.name <- "CaracSpe_variableMetadata.txt"
+  metype <- "variable"
+  #coloname <- "namecustom"
+  coloname <- "B"
+  makeun <- "yes"
+  DMout <- "ID_DM.txt"
+  metaout <- paste0("ID_",metype,"meta.txt")
+}
+
+
+id.choice <- function(DM.name,meta.name,metype,coloname,makeun,DMout,metaout){
+  # This function allows to replace original IDs with other ones from one metadata table.
+  #
+  # Parameters:
+  # - DM.name, meta.name: dataMatrix and metadata files' access respectively
+  # - metype: "sample" or "variable" depending on metadata content
+  # - coloname: name of the metadata column to be used as new ID
+  # - makeun: "yes" or "no" depending on user choice if new IDs are not unique ("yes"=conversion to unique ID)
+  # - DMout, metaout: output files' access
+  
+  
+# Input --------------------------------------------------------------
+
+DM <- read.table(DM.name,header=TRUE,sep="\t",check.names=FALSE)
+meta <- read.table(meta.name,header=TRUE,sep="\t",check.names=FALSE,colClasses="character")
+
+# Table match check 
+table.check <- match2(DM,meta,metype)
+check.err(table.check)
+
+
+# Checking unicity of new IDs ----------------------------------------
+
+numcol <- which(colnames(meta)==coloname)
+if(length(numcol)==0) {
+      stop(paste0("\n-------\nWarning: no '",coloname,"' column detected in ",metype," metadata!",
+                  "\nPlease check your metadata file (column names are case-sensitive).\n-------\n")) 
+}
+
+unicity <- duplicated(meta[,numcol])
+
+if(sum(unicity)>0){
+  if(makeun=="no"){
+    #Sending back an explicit error
+	duptable <- t(t(table(meta[,numcol][unicity])+1))
+	stop(paste0("\n-------\nYour '",coloname,"' column contains duplicates:\n"),
+		paste(rownames(duptable),duptable,sep=": ",collapse="\n"),paste0("\nSince identifiers are meant to be unique, ",
+		"please check your data or use the 'Force unicity' option to force unicity.\n-------\n"))
+	
+  }else{
+    #Making unique names
+	meta <- cbind(meta,newID=make.unique(meta[,numcol],sep="_"),ori=c(1:nrow(meta)))
+  }
+}else{
+  #No unicity problem
+  meta <- cbind(meta,newID=meta[,numcol],ori=c(1:nrow(meta)))
+}
+
+
+# Merging tables -----------------------------------------------------
+
+#Transposing the dataMatrix if necessary
+if(metype=="sample"){
+  rownames(DM) <- DM[,1]
+  DM <- DM[,-1]
+  DM <- t(DM)
+  DM <- data.frame(sample=row.names(DM),DM,check.names=FALSE)
+  rownames(DM) <- NULL
+}
+
+comb.data <- merge(x=meta,y=DM,by.x=1,by.y=1)
+comb.data <- comb.data[order(comb.data$ori),]
+
+
+# Changing IDs -------------------------------------------------------
+
+DM <- comb.data[,-c(1:(ncol(meta)-2),ncol(meta))]
+if(makeun=="no"){
+  comb.data <- comb.data[,c(numcol,which(colnames(meta)!=coloname))]
+  meta <- comb.data[,c(1:(ncol(meta)-2))]
+}else{
+  meta <- comb.data[,c(ncol(meta)-1,1:(ncol(meta)-2))]
+}
+
+#Transposing back the dataMatrix if necessary
+if(metype=="sample"){
+  rownames(DM) <- DM[,1]
+  DM <- DM[,-1]
+  DM <- t(DM)
+  DM <- data.frame(sample=row.names(DM),DM,check.names=FALSE)
+  rownames(DM) <- NULL
+}
+
+
+# Output -------------------------------------------------------------
+
+# Writing the table
+write.table(DM,DMout,sep="\t",quote=FALSE,row.names=FALSE)
+write.table(meta,metaout,sep="\t",quote=FALSE,row.names=FALSE)
+
+
+} # End of id.choice
+
+
+# Typical function call
+# id.choice(DM.name,meta.name,metype,coloname,makeun,DMout,metaout)