Mercurial > repos > proteore > proteore_id_converter
comparison id_converter_UniProt.R @ 5:0584344186eb draft
planemo upload commit 6e27a0eaf8172154128960497271e8f54341acd7-dirty
author | proteore |
---|---|
date | Tue, 20 Mar 2018 10:51:41 -0400 |
parents | 134949593a3b |
children | 659f1248f535 |
comparison
equal
deleted
inserted
replaced
4:134949593a3b | 5:0584344186eb |
---|---|
37 # Ensembl_ENSG: Ensembl gene identifiers (e.g. ENSG00000166913) | 37 # Ensembl_ENSG: Ensembl gene identifiers (e.g. ENSG00000166913) |
38 # Ensembl_ENST: Ensembl transcript identifiers (e.g. ENST00000353703; ENST00000372839) | 38 # Ensembl_ENST: Ensembl transcript identifiers (e.g. ENST00000353703; ENST00000372839) |
39 # Ensembl_ENSP: Ensembl protein identifiers (e.g. ENSP00000300161; ENSP00000361930) | 39 # Ensembl_ENSP: Ensembl protein identifiers (e.g. ENSP00000300161; ENSP00000361930) |
40 | 40 |
41 mapping = function() { | 41 mapping = function() { |
42 # Extract arguments | 42 args <- commandArgs(TRUE) |
43 args = commandArgs(trailingOnly = TRUE) | 43 if(length(args)<1) { |
44 #print(args) | 44 args <- c("--help") |
45 if (length(args) != 6) { | |
46 stop("Not enough/Too many arguments", call. = FALSE) | |
47 } | 45 } |
48 else { | 46 |
49 input_id_type = args[1] | 47 # Help section |
50 list_id = args[2] | 48 if("--help" %in% args) { |
51 list_id_input_type = args[3] | 49 cat("Selection and Annotation HPA |
52 options = strsplit(args[4], ",")[[1]] | 50 Arguments: |
53 output = args[5] | 51 --ref_file: path to reference file (human_id_mapping_file.txt) |
54 human_id_mapping_file = args[6] | 52 --input_type: type of input (list of id or filename) |
53 --id_type: type of input IDs | |
54 --input: list of IDs (text or filename) | |
55 --column_number: the column number which contains list of input IDs | |
56 --header: true/false if your file contains a header | |
57 --target_ids: target IDs to map to | |
58 --output: output filename \n") | |
59 q(save="no") | |
60 } | |
61 | |
62 # Parse arguments | |
63 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | |
64 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | |
65 args <- as.list(as.character(argsDF$V2)) | |
66 names(args) <- argsDF$V1 | |
67 | |
68 input_id_type = args$id_type # Uniprot, ENSG.... | |
69 list_id_input_type = args$input_type # list or file | |
70 options = strsplit(args$target_ids, ",")[[1]] | |
71 output = args$output | |
72 human_id_mapping_file = args$ref_file | |
55 | 73 |
56 # Extract ID maps | 74 # Extract input IDs |
57 human_id_map = read.table(human_id_mapping_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings = "", quote = "") | 75 if (list_id_input_type == "list") { |
76 print(args$input) | |
77 list_id = strsplit(args$input, "[ \t\n]+")[[1]] | |
78 # Remove isoform accession number (e.g. "-2") | |
79 list_id = gsub("-.+", "", list_id) | |
80 } | |
81 else if (list_id_input_type == "file") { | |
82 filename = args$input | |
83 column_number = as.numeric(gsub("c", "" ,args$column_number)) | |
84 header = args$header | |
85 file_all = readfile(filename, header) | |
86 list_id = c() | |
87 list_id = sapply(strsplit(file_all[,column_number], ";"), "[", 1) | |
88 # Remove isoform accession number (e.g. "-2") | |
89 list_id = gsub("-.+", "", list_id) | |
90 } | |
91 | |
92 # Extract ID maps | |
93 human_id_map = read.table(human_id_mapping_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings = "", quote = "") | |
94 | |
95 names = c() | |
58 | 96 |
59 # Extract input IDs | 97 # Map IDs |
60 if (list_id_input_type == "list") { | 98 res = matrix(nrow=length(list_id), ncol=0) |
61 list_id = strsplit(args[2], " ")[[1]] | |
62 # Remove isoform accession number (e.g. "-2") | |
63 list_id = gsub("-.+", "", list_id) | |
64 } | |
65 else if (list_id_input_type == "file") { | |
66 filename = as.character(strsplit(list_id, ",")[[1]][1]) | |
67 column_number = as.numeric(gsub("c", "" ,strsplit(list_id, ",")[[1]][2])) | |
68 header = strsplit(list_id, ",")[[1]][3] | |
69 file_all = readfile(filename, header) | |
70 print(class(file_all)) | |
71 str(file_all) | |
72 print(class(file_all[,1])) | |
73 list_id = c() | |
74 list_id = sapply(strsplit(file_all[,column_number], ";"), "[", 1) | |
75 # Remove isoform accession number (e.g. "-2") | |
76 list_id = gsub("-.+", "", list_id) | |
77 } | |
78 names = c() | |
79 | |
80 # Map IDs | |
81 res = matrix(nrow=length(list_id), ncol=0) | |
82 | 99 |
83 for (opt in options) { | 100 for (opt in options) { |
84 names = c(names, opt) | 101 names = c(names, opt) |
85 mapped = human_id_map[match(list_id, human_id_map[input_id_type][,]),][opt][,] | 102 mapped = human_id_map[match(list_id, human_id_map[input_id_type][,]),][opt][,] |
86 res = cbind(res, matrix(mapped)) | 103 res = cbind(res, matrix(mapped)) |
87 } | 104 } |
88 | 105 |
89 # Write output | 106 # Write output |
90 if (list_id_input_type == "list") { | 107 if (list_id_input_type == "list") { |
91 res = cbind(as.matrix(list_id), res) | 108 res = cbind(as.matrix(list_id), res) |
92 names = c(input_id_type, names) | 109 names = c(input_id_type, names) |
93 colnames(res) = names | 110 colnames(res) = names |
94 write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE) | 111 write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE) |
95 } | 112 } |
96 else if (list_id_input_type == "file") { | 113 else if (list_id_input_type == "file") { |
97 names(res) = options | 114 names(res) = options |
98 names = c(names(file_all), names) | 115 names = c(names(file_all), names) |
99 output_content = cbind(file_all, res) | 116 output_content = cbind(file_all, res) |
100 colnames(output_content) = names | 117 colnames(output_content) = names |
101 write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE) | 118 write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE) |
102 } | |
103 } | 119 } |
104 } | 120 } |
105 | 121 |
106 mapping() | 122 mapping() |
107 | 123 |