Mercurial > repos > proteore > proteore_id_converter

diff id_converter_UniProt.R @ 14:659f1248f535 draft
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
author: proteore
date: Wed, 19 Sep 2018 04:45:04 -0400
parents: 0584344186eb
--- a/id_converter_UniProt.R	Mon Aug 27 06:12:21 2018 -0400
+++ b/id_converter_UniProt.R	Wed Sep 19 04:45:04 2018 -0400
@@ -48,7 +48,7 @@
   if("--help" %in% args) {
     cat("Selection and Annotation HPA
     Arguments:
-        --ref_file: path to reference file (human_id_mapping_file.txt)
+        --ref_file: path to reference file (id_mapping_file.txt)
         --input_type: type of input (list of id or filename)
         --id_type: type of input IDs
         --input: list of IDs (text or filename)
@@ -64,33 +64,32 @@
   argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
   args <- as.list(as.character(argsDF$V2))
   names(args) <- argsDF$V1
-
+  
   input_id_type = args$id_type # Uniprot, ENSG....
   list_id_input_type = args$input_type # list or file
   options = strsplit(args$target_ids, ",")[[1]]
   output = args$output
-  human_id_mapping_file = args$ref_file
+  id_mapping_file = args$ref_file
     
   # Extract input IDs
   if (list_id_input_type == "list") {
     print(args$input)
-    list_id = strsplit(args$input, "[ \t\n]+")[[1]]
+    list_id = trimws(strsplit(args$input, ",")[[1]])
+    list_id = list_id[list_id != ""]    #remove empty entry
     # Remove isoform accession number (e.g. "-2")
     list_id = gsub("-.+", "", list_id)
-  }
-  else if (list_id_input_type == "file") {
+  } else if (list_id_input_type == "file") {
     filename = args$input
     column_number = as.numeric(gsub("c", "" ,args$column_number))
     header = args$header
     file_all = readfile(filename, header)
-    list_id = c()
-    list_id = sapply(strsplit(file_all[,column_number], ";"), "[", 1)
+    list_id = trimws(gsub("[$,\xc2\xa0]","",sapply(strsplit(file_all[,column_number], ";"), "[", 1)))
     # Remove isoform accession number (e.g. "-2")
     list_id = gsub("-.+", "", list_id)
   }
 
   # Extract ID maps
-  human_id_map = read.table(human_id_mapping_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings = "", quote = "")
+  id_map = read.table(id_mapping_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings = "", quote = "")
   
   names = c()
     
@@ -99,7 +98,7 @@
 
   for (opt in options) {
     names = c(names, opt)
-    mapped = human_id_map[match(list_id, human_id_map[input_id_type][,]),][opt][,]
+    mapped = id_map[match(list_id, id_map[input_id_type][,]),][opt][,]
     res = cbind(res, matrix(mapped))
   }
      
@@ -112,6 +111,9 @@
   }
   else if (list_id_input_type == "file") {
     names(res) = options
+    if (all(names(file_all) == file_all[1,1:length(names(file_all))])){ #if header of file is the same as the first line of file
+      names(file_all)[column_number] = input_id_type
+    }
     names = c(names(file_all), names)
     output_content = cbind(file_all, res)
     colnames(output_content) = names
author	proteore
date	Wed, 19 Sep 2018 04:45:04 -0400
parents	0584344186eb
children