proteore_id_converter: id_converter

comparison id_converter_UniProt.R @ 14:659f1248f535 draft

planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty

author	proteore
date	Wed, 19 Sep 2018 04:45:04 -0400
parents	0584344186eb
children

comparison

equal deleted inserted replaced

-:4fcc5e5c0902
+:659f1248f535
 # Help section
 if("--help" %in% args) {
 cat("Selection and Annotation HPA
 Arguments:
---ref_file: path to reference file (human_id_mapping_file.txt)
+--ref_file: path to reference file (id_mapping_file.txt)
 --input_type: type of input (list of id or filename)
 --id_type: type of input IDs
 --input: list of IDs (text or filename)
 --column_number: the column number which contains list of input IDs
 --header: true/false if your file contains a header
 # Parse arguments
 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
 args <- as.list(as.character(argsDF$V2))
 names(args) <- argsDF$V1
 input_id_type = args$id_type # Uniprot, ENSG....
 list_id_input_type = args$input_type # list or file
 options = strsplit(args$target_ids, ",")[[1]]
 output = args$output
-human_id_mapping_file = args$ref_file
+id_mapping_file = args$ref_file
 # Extract input IDs
 if (list_id_input_type == "list") {
 print(args$input)
-list_id = strsplit(args$input, "[ \t\n]+")[[1]]
+list_id = trimws(strsplit(args$input, ",")[[1]])
+list_id = list_id[list_id != ""]    #remove empty entry
 # Remove isoform accession number (e.g. "-2")
 list_id = gsub("-.+", "", list_id)
-}
+} else if (list_id_input_type == "file") {
-else if (list_id_input_type == "file") {
 filename = args$input
 column_number = as.numeric(gsub("c", "" ,args$column_number))
 header = args$header
 file_all = readfile(filename, header)
-list_id = c()
+list_id = trimws(gsub("[$,\xc2\xa0]","",sapply(strsplit(file_all[,column_number], ";"), "[", 1)))
-list_id = sapply(strsplit(file_all[,column_number], ";"), "[", 1)
 # Remove isoform accession number (e.g. "-2")
 list_id = gsub("-.+", "", list_id)
 }
 # Extract ID maps
-human_id_map = read.table(human_id_mapping_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings = "", quote = "")
+id_map = read.table(id_mapping_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings = "", quote = "")
 names = c()
 # Map IDs
 res = matrix(nrow=length(list_id), ncol=0)
 for (opt in options) {
 names = c(names, opt)
-mapped = human_id_map[match(list_id, human_id_map[input_id_type][,]),][opt][,]
+mapped = id_map[match(list_id, id_map[input_id_type][,]),][opt][,]
 res = cbind(res, matrix(mapped))
 }
 # Write output
 if (list_id_input_type == "list") {
 colnames(res) = names
 write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE)
 }
 else if (list_id_input_type == "file") {
 names(res) = options
+if (all(names(file_all) == file_all[1,1:length(names(file_all))])){ #if header of file is the same as the first line of file
+names(file_all)[column_number] = input_id_type
+}
 names = c(names(file_all), names)
 output_content = cbind(file_all, res)
 colnames(output_content) = names
 write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE)
 }

Mercurial > repos > proteore > proteore_id_converter

comparison id_converter_UniProt.R @ 14:659f1248f535 draft