Mercurial > repos > proteore > proteore_id_converter
comparison id_converter_UniProt.R @ 14:659f1248f535 draft
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
author | proteore |
---|---|
date | Wed, 19 Sep 2018 04:45:04 -0400 |
parents | 0584344186eb |
children |
comparison
equal
deleted
inserted
replaced
13:4fcc5e5c0902 | 14:659f1248f535 |
---|---|
46 | 46 |
47 # Help section | 47 # Help section |
48 if("--help" %in% args) { | 48 if("--help" %in% args) { |
49 cat("Selection and Annotation HPA | 49 cat("Selection and Annotation HPA |
50 Arguments: | 50 Arguments: |
51 --ref_file: path to reference file (human_id_mapping_file.txt) | 51 --ref_file: path to reference file (id_mapping_file.txt) |
52 --input_type: type of input (list of id or filename) | 52 --input_type: type of input (list of id or filename) |
53 --id_type: type of input IDs | 53 --id_type: type of input IDs |
54 --input: list of IDs (text or filename) | 54 --input: list of IDs (text or filename) |
55 --column_number: the column number which contains list of input IDs | 55 --column_number: the column number which contains list of input IDs |
56 --header: true/false if your file contains a header | 56 --header: true/false if your file contains a header |
62 # Parse arguments | 62 # Parse arguments |
63 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | 63 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
64 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | 64 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
65 args <- as.list(as.character(argsDF$V2)) | 65 args <- as.list(as.character(argsDF$V2)) |
66 names(args) <- argsDF$V1 | 66 names(args) <- argsDF$V1 |
67 | 67 |
68 input_id_type = args$id_type # Uniprot, ENSG.... | 68 input_id_type = args$id_type # Uniprot, ENSG.... |
69 list_id_input_type = args$input_type # list or file | 69 list_id_input_type = args$input_type # list or file |
70 options = strsplit(args$target_ids, ",")[[1]] | 70 options = strsplit(args$target_ids, ",")[[1]] |
71 output = args$output | 71 output = args$output |
72 human_id_mapping_file = args$ref_file | 72 id_mapping_file = args$ref_file |
73 | 73 |
74 # Extract input IDs | 74 # Extract input IDs |
75 if (list_id_input_type == "list") { | 75 if (list_id_input_type == "list") { |
76 print(args$input) | 76 print(args$input) |
77 list_id = strsplit(args$input, "[ \t\n]+")[[1]] | 77 list_id = trimws(strsplit(args$input, ",")[[1]]) |
78 list_id = list_id[list_id != ""] #remove empty entry | |
78 # Remove isoform accession number (e.g. "-2") | 79 # Remove isoform accession number (e.g. "-2") |
79 list_id = gsub("-.+", "", list_id) | 80 list_id = gsub("-.+", "", list_id) |
80 } | 81 } else if (list_id_input_type == "file") { |
81 else if (list_id_input_type == "file") { | |
82 filename = args$input | 82 filename = args$input |
83 column_number = as.numeric(gsub("c", "" ,args$column_number)) | 83 column_number = as.numeric(gsub("c", "" ,args$column_number)) |
84 header = args$header | 84 header = args$header |
85 file_all = readfile(filename, header) | 85 file_all = readfile(filename, header) |
86 list_id = c() | 86 list_id = trimws(gsub("[$,\xc2\xa0]","",sapply(strsplit(file_all[,column_number], ";"), "[", 1))) |
87 list_id = sapply(strsplit(file_all[,column_number], ";"), "[", 1) | |
88 # Remove isoform accession number (e.g. "-2") | 87 # Remove isoform accession number (e.g. "-2") |
89 list_id = gsub("-.+", "", list_id) | 88 list_id = gsub("-.+", "", list_id) |
90 } | 89 } |
91 | 90 |
92 # Extract ID maps | 91 # Extract ID maps |
93 human_id_map = read.table(human_id_mapping_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings = "", quote = "") | 92 id_map = read.table(id_mapping_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings = "", quote = "") |
94 | 93 |
95 names = c() | 94 names = c() |
96 | 95 |
97 # Map IDs | 96 # Map IDs |
98 res = matrix(nrow=length(list_id), ncol=0) | 97 res = matrix(nrow=length(list_id), ncol=0) |
99 | 98 |
100 for (opt in options) { | 99 for (opt in options) { |
101 names = c(names, opt) | 100 names = c(names, opt) |
102 mapped = human_id_map[match(list_id, human_id_map[input_id_type][,]),][opt][,] | 101 mapped = id_map[match(list_id, id_map[input_id_type][,]),][opt][,] |
103 res = cbind(res, matrix(mapped)) | 102 res = cbind(res, matrix(mapped)) |
104 } | 103 } |
105 | 104 |
106 # Write output | 105 # Write output |
107 if (list_id_input_type == "list") { | 106 if (list_id_input_type == "list") { |
110 colnames(res) = names | 109 colnames(res) = names |
111 write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE) | 110 write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE) |
112 } | 111 } |
113 else if (list_id_input_type == "file") { | 112 else if (list_id_input_type == "file") { |
114 names(res) = options | 113 names(res) = options |
114 if (all(names(file_all) == file_all[1,1:length(names(file_all))])){ #if header of file is the same as the first line of file | |
115 names(file_all)[column_number] = input_id_type | |
116 } | |
115 names = c(names(file_all), names) | 117 names = c(names(file_all), names) |
116 output_content = cbind(file_all, res) | 118 output_content = cbind(file_all, res) |
117 colnames(output_content) = names | 119 colnames(output_content) = names |
118 write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE) | 120 write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE) |
119 } | 121 } |