comparison add_expression_HPA.R @ 12:dbeabf9bf091 draft

planemo upload commit 51fc514a85c1055cab5bb6e76c90f3da7e648101-dirty
author proteore
date Thu, 07 Mar 2019 09:08:33 -0500
parents 5c260bd3552e
children 133309fd6875
comparison
equal deleted inserted replaced
11:e109cacd75b2 12:dbeabf9bf091
14 return (TRUE) 14 return (TRUE)
15 }else if (any(is.element(c("f","false"),tolower(x)))){ 15 }else if (any(is.element(c("f","false"),tolower(x)))){
16 return (FALSE) 16 return (FALSE)
17 }else{ 17 }else{
18 return(NULL) 18 return(NULL)
19 }
20 }
21
22 stopQuietly <- function(...) {
23 blankMsg <- sprintf("\r%s\r", paste(rep(" ", getOption("width")-1L), collapse=" "));
24 stop(simpleError(blankMsg));
25 } # stopQuietly()
26
27 check_ensembl_geneids <- function(vector,type) {
28 ensembl_geneid_pattern = "^ENS[A-Z]+[0-9]{11}$|^[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?$|^CG[0-9]+$|^[A-Z0-9]+[.][0-9]+$|^YM[A-Z][0-9]{3}[a-z][0-9]$"
29 res = grepl(ensembl_geneid_pattern,vector)
30 if (all(!res)){
31 cat("No Ensembl geneIDs found in entered ids")
32 stopQuietly()
33 } else if (any(!res)) {
34 cat(paste(sep="",collapse = " ",c(sum(!res, na.rm=TRUE),'IDs are not ENSG IDs, please check:\n')))
35 not_geneids <- sapply(vector[which(!res)], function(x) paste(sep="",collapse = "",x,"\n"),USE.NAMES = F)
36 cat(not_geneids)
19 } 37 }
20 } 38 }
21 39
22 add_expression = function(input, atlas, options) { 40 add_expression = function(input, atlas, options) {
23 input <- unique(input[!is.na(input)]) 41 input <- unique(input[!is.na(input)])
87 colnames(res)=colnames(tab) 105 colnames(res)=colnames(tab)
88 } 106 }
89 return(res) 107 return(res)
90 } 108 }
91 109
92 main = function() { 110 get_args <- function(){
93 args <- commandArgs(TRUE) 111 args <- commandArgs(TRUE)
94 if(length(args)<1) { 112 if(length(args)<1) {
95 args <- c("--help") 113 args <- c("--help")
96 } 114 }
97 115
114 # Parse arguments 132 # Parse arguments
115 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") 133 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
116 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) 134 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
117 args <- as.list(as.character(argsDF$V2)) 135 args <- as.list(as.character(argsDF$V2))
118 names(args) <- argsDF$V1 136 names(args) <- argsDF$V1
137
138 return(args)
139 }
140
141 is_col_in_file <- function(file,ncol) {
142 is_in_file = (ncol <= ncol(file) && ncol > 0)
143 if (!is_in_file){
144 cat(paste(sep = "", collapse = " ", c("Column",ncol,"not found in file") ))
145 stopQuietly()
146 }
147 }
148
149 main = function() {
150
151 args = get_args()
119 152
120 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") 153 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda")
121 #load("/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") 154 #load("/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda")
122 155
123 inputtype = args$inputtype 156 inputtype = args$inputtype
124 if (inputtype == "copypaste") { 157 if (inputtype == "copypaste") {
125 input = strsplit(args$input, "[ \t\n]+")[[1]] 158 ids = strsplit(args$input, "[ \t\n]+")[[1]]
126 } else if (inputtype == "tabfile") { 159 } else if (inputtype == "tabfile") {
127 filename = args$input 160 filename = args$input
128 ncol = args$column 161 ncol = args$column
129 # Check ncol 162 # Check ncol
130 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { 163 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
132 } else { 165 } else {
133 ncol = as.numeric(gsub("c", "", ncol)) 166 ncol = as.numeric(gsub("c", "", ncol))
134 } 167 }
135 header = str2bool(args$header) 168 header = str2bool(args$header)
136 file = read_file(filename, header) 169 file = read_file(filename, header)
170 is_col_in_file(file,ncol)
137 file = one_id_one_line(file,ncol) 171 file = one_id_one_line(file,ncol)
138 input = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) 172 ids = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE))
139 input = input[which(!is.na(input))] 173 ids = ids[which(!is.na(ids))]
140 } 174 }
175 check_ensembl_geneids(ids)
141 176
142 # Read protein atlas 177 # Read protein atlas
143 protein_atlas = args$atlas 178 protein_atlas = args$atlas
144 protein_atlas = read_file(protein_atlas, T) 179 protein_atlas = read_file(protein_atlas, T)
145 180
146 # Add expression 181 # Add expression
147 output = args$output 182 output = args$output
148 options = strsplit(args$select, ",")[[1]] 183 options = strsplit(args$select, ",")[[1]]
149 res = add_expression(input, protein_atlas, options) 184 res = add_expression(ids, protein_atlas, options)
150 185
151 # Write output 186 # Write output
152 if (is.null(res)) { 187 if (is.null(res)) {
153 write.table("None of the input ENSG ids are can be found in HPA data file",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) 188 write.table("None of the ENSG ids entered can be found in HPA data file",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE)
154 } else { 189 } else {
155 if (inputtype == "copypaste") { 190 if (inputtype == "copypaste") {
156 input <- data.frame(input) 191 ids <- data.frame(ids)
157 output_content = merge(input,res,by.x=1,by.y="row.names",incomparables = NA, all.x=T) 192 output_content = merge(ids,res,by.x=1,by.y="row.names",incomparables = NA, all.x=T)
158 colnames(output_content)[1] = "Ensembl" 193 colnames(output_content)[1] = "Ensembl"
159 } else if (inputtype == "tabfile") { 194 } else if (inputtype == "tabfile") {
160 output_content = merge(file, res, by.x=ncol, by.y="row.names", incomparables = NA, all.x=T) 195 output_content = merge(file, res, by.x=ncol, by.y="row.names", incomparables = NA, all.x=T)
161 output_content = order_columns(output_content,ncol) 196 output_content = order_columns(output_content,ncol)
162 } 197 }