comparison id_converter_UniProt.R @ 14:659f1248f535 draft

planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
author proteore
date Wed, 19 Sep 2018 04:45:04 -0400
parents 0584344186eb
children
comparison
equal deleted inserted replaced
13:4fcc5e5c0902 14:659f1248f535
46 46
47 # Help section 47 # Help section
48 if("--help" %in% args) { 48 if("--help" %in% args) {
49 cat("Selection and Annotation HPA 49 cat("Selection and Annotation HPA
50 Arguments: 50 Arguments:
51 --ref_file: path to reference file (human_id_mapping_file.txt) 51 --ref_file: path to reference file (id_mapping_file.txt)
52 --input_type: type of input (list of id or filename) 52 --input_type: type of input (list of id or filename)
53 --id_type: type of input IDs 53 --id_type: type of input IDs
54 --input: list of IDs (text or filename) 54 --input: list of IDs (text or filename)
55 --column_number: the column number which contains list of input IDs 55 --column_number: the column number which contains list of input IDs
56 --header: true/false if your file contains a header 56 --header: true/false if your file contains a header
62 # Parse arguments 62 # Parse arguments
63 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") 63 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
64 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) 64 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
65 args <- as.list(as.character(argsDF$V2)) 65 args <- as.list(as.character(argsDF$V2))
66 names(args) <- argsDF$V1 66 names(args) <- argsDF$V1
67 67
68 input_id_type = args$id_type # Uniprot, ENSG.... 68 input_id_type = args$id_type # Uniprot, ENSG....
69 list_id_input_type = args$input_type # list or file 69 list_id_input_type = args$input_type # list or file
70 options = strsplit(args$target_ids, ",")[[1]] 70 options = strsplit(args$target_ids, ",")[[1]]
71 output = args$output 71 output = args$output
72 human_id_mapping_file = args$ref_file 72 id_mapping_file = args$ref_file
73 73
74 # Extract input IDs 74 # Extract input IDs
75 if (list_id_input_type == "list") { 75 if (list_id_input_type == "list") {
76 print(args$input) 76 print(args$input)
77 list_id = strsplit(args$input, "[ \t\n]+")[[1]] 77 list_id = trimws(strsplit(args$input, ",")[[1]])
78 list_id = list_id[list_id != ""] #remove empty entry
78 # Remove isoform accession number (e.g. "-2") 79 # Remove isoform accession number (e.g. "-2")
79 list_id = gsub("-.+", "", list_id) 80 list_id = gsub("-.+", "", list_id)
80 } 81 } else if (list_id_input_type == "file") {
81 else if (list_id_input_type == "file") {
82 filename = args$input 82 filename = args$input
83 column_number = as.numeric(gsub("c", "" ,args$column_number)) 83 column_number = as.numeric(gsub("c", "" ,args$column_number))
84 header = args$header 84 header = args$header
85 file_all = readfile(filename, header) 85 file_all = readfile(filename, header)
86 list_id = c() 86 list_id = trimws(gsub("[$,\xc2\xa0]","",sapply(strsplit(file_all[,column_number], ";"), "[", 1)))
87 list_id = sapply(strsplit(file_all[,column_number], ";"), "[", 1)
88 # Remove isoform accession number (e.g. "-2") 87 # Remove isoform accession number (e.g. "-2")
89 list_id = gsub("-.+", "", list_id) 88 list_id = gsub("-.+", "", list_id)
90 } 89 }
91 90
92 # Extract ID maps 91 # Extract ID maps
93 human_id_map = read.table(human_id_mapping_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings = "", quote = "") 92 id_map = read.table(id_mapping_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings = "", quote = "")
94 93
95 names = c() 94 names = c()
96 95
97 # Map IDs 96 # Map IDs
98 res = matrix(nrow=length(list_id), ncol=0) 97 res = matrix(nrow=length(list_id), ncol=0)
99 98
100 for (opt in options) { 99 for (opt in options) {
101 names = c(names, opt) 100 names = c(names, opt)
102 mapped = human_id_map[match(list_id, human_id_map[input_id_type][,]),][opt][,] 101 mapped = id_map[match(list_id, id_map[input_id_type][,]),][opt][,]
103 res = cbind(res, matrix(mapped)) 102 res = cbind(res, matrix(mapped))
104 } 103 }
105 104
106 # Write output 105 # Write output
107 if (list_id_input_type == "list") { 106 if (list_id_input_type == "list") {
110 colnames(res) = names 109 colnames(res) = names
111 write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE) 110 write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE)
112 } 111 }
113 else if (list_id_input_type == "file") { 112 else if (list_id_input_type == "file") {
114 names(res) = options 113 names(res) = options
114 if (all(names(file_all) == file_all[1,1:length(names(file_all))])){ #if header of file is the same as the first line of file
115 names(file_all)[column_number] = input_id_type
116 }
115 names = c(names(file_all), names) 117 names = c(names(file_all), names)
116 output_content = cbind(file_all, res) 118 output_content = cbind(file_all, res)
117 colnames(output_content) = names 119 colnames(output_content) = names
118 write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE) 120 write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE)
119 } 121 }