Mercurial > repos > proteore > proteore_expression_rnaseq_abbased
comparison add_expression_HPA.R @ 12:dbeabf9bf091 draft
planemo upload commit 51fc514a85c1055cab5bb6e76c90f3da7e648101-dirty
author | proteore |
---|---|
date | Thu, 07 Mar 2019 09:08:33 -0500 |
parents | 5c260bd3552e |
children | 133309fd6875 |
comparison
equal
deleted
inserted
replaced
11:e109cacd75b2 | 12:dbeabf9bf091 |
---|---|
14 return (TRUE) | 14 return (TRUE) |
15 }else if (any(is.element(c("f","false"),tolower(x)))){ | 15 }else if (any(is.element(c("f","false"),tolower(x)))){ |
16 return (FALSE) | 16 return (FALSE) |
17 }else{ | 17 }else{ |
18 return(NULL) | 18 return(NULL) |
19 } | |
20 } | |
21 | |
22 stopQuietly <- function(...) { | |
23 blankMsg <- sprintf("\r%s\r", paste(rep(" ", getOption("width")-1L), collapse=" ")); | |
24 stop(simpleError(blankMsg)); | |
25 } # stopQuietly() | |
26 | |
27 check_ensembl_geneids <- function(vector,type) { | |
28 ensembl_geneid_pattern = "^ENS[A-Z]+[0-9]{11}$|^[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?$|^CG[0-9]+$|^[A-Z0-9]+[.][0-9]+$|^YM[A-Z][0-9]{3}[a-z][0-9]$" | |
29 res = grepl(ensembl_geneid_pattern,vector) | |
30 if (all(!res)){ | |
31 cat("No Ensembl geneIDs found in entered ids") | |
32 stopQuietly() | |
33 } else if (any(!res)) { | |
34 cat(paste(sep="",collapse = " ",c(sum(!res, na.rm=TRUE),'IDs are not ENSG IDs, please check:\n'))) | |
35 not_geneids <- sapply(vector[which(!res)], function(x) paste(sep="",collapse = "",x,"\n"),USE.NAMES = F) | |
36 cat(not_geneids) | |
19 } | 37 } |
20 } | 38 } |
21 | 39 |
22 add_expression = function(input, atlas, options) { | 40 add_expression = function(input, atlas, options) { |
23 input <- unique(input[!is.na(input)]) | 41 input <- unique(input[!is.na(input)]) |
87 colnames(res)=colnames(tab) | 105 colnames(res)=colnames(tab) |
88 } | 106 } |
89 return(res) | 107 return(res) |
90 } | 108 } |
91 | 109 |
92 main = function() { | 110 get_args <- function(){ |
93 args <- commandArgs(TRUE) | 111 args <- commandArgs(TRUE) |
94 if(length(args)<1) { | 112 if(length(args)<1) { |
95 args <- c("--help") | 113 args <- c("--help") |
96 } | 114 } |
97 | 115 |
114 # Parse arguments | 132 # Parse arguments |
115 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | 133 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
116 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | 134 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
117 args <- as.list(as.character(argsDF$V2)) | 135 args <- as.list(as.character(argsDF$V2)) |
118 names(args) <- argsDF$V1 | 136 names(args) <- argsDF$V1 |
137 | |
138 return(args) | |
139 } | |
140 | |
141 is_col_in_file <- function(file,ncol) { | |
142 is_in_file = (ncol <= ncol(file) && ncol > 0) | |
143 if (!is_in_file){ | |
144 cat(paste(sep = "", collapse = " ", c("Column",ncol,"not found in file") )) | |
145 stopQuietly() | |
146 } | |
147 } | |
148 | |
149 main = function() { | |
150 | |
151 args = get_args() | |
119 | 152 |
120 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") | 153 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") |
121 #load("/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") | 154 #load("/home/dchristiany/proteore_project/ProteoRE/tools/add_expression_data_HPA/args.rda") |
122 | 155 |
123 inputtype = args$inputtype | 156 inputtype = args$inputtype |
124 if (inputtype == "copypaste") { | 157 if (inputtype == "copypaste") { |
125 input = strsplit(args$input, "[ \t\n]+")[[1]] | 158 ids = strsplit(args$input, "[ \t\n]+")[[1]] |
126 } else if (inputtype == "tabfile") { | 159 } else if (inputtype == "tabfile") { |
127 filename = args$input | 160 filename = args$input |
128 ncol = args$column | 161 ncol = args$column |
129 # Check ncol | 162 # Check ncol |
130 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | 163 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { |
132 } else { | 165 } else { |
133 ncol = as.numeric(gsub("c", "", ncol)) | 166 ncol = as.numeric(gsub("c", "", ncol)) |
134 } | 167 } |
135 header = str2bool(args$header) | 168 header = str2bool(args$header) |
136 file = read_file(filename, header) | 169 file = read_file(filename, header) |
170 is_col_in_file(file,ncol) | |
137 file = one_id_one_line(file,ncol) | 171 file = one_id_one_line(file,ncol) |
138 input = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) | 172 ids = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) |
139 input = input[which(!is.na(input))] | 173 ids = ids[which(!is.na(ids))] |
140 } | 174 } |
175 check_ensembl_geneids(ids) | |
141 | 176 |
142 # Read protein atlas | 177 # Read protein atlas |
143 protein_atlas = args$atlas | 178 protein_atlas = args$atlas |
144 protein_atlas = read_file(protein_atlas, T) | 179 protein_atlas = read_file(protein_atlas, T) |
145 | 180 |
146 # Add expression | 181 # Add expression |
147 output = args$output | 182 output = args$output |
148 options = strsplit(args$select, ",")[[1]] | 183 options = strsplit(args$select, ",")[[1]] |
149 res = add_expression(input, protein_atlas, options) | 184 res = add_expression(ids, protein_atlas, options) |
150 | 185 |
151 # Write output | 186 # Write output |
152 if (is.null(res)) { | 187 if (is.null(res)) { |
153 write.table("None of the input ENSG ids are can be found in HPA data file",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) | 188 write.table("None of the ENSG ids entered can be found in HPA data file",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) |
154 } else { | 189 } else { |
155 if (inputtype == "copypaste") { | 190 if (inputtype == "copypaste") { |
156 input <- data.frame(input) | 191 ids <- data.frame(ids) |
157 output_content = merge(input,res,by.x=1,by.y="row.names",incomparables = NA, all.x=T) | 192 output_content = merge(ids,res,by.x=1,by.y="row.names",incomparables = NA, all.x=T) |
158 colnames(output_content)[1] = "Ensembl" | 193 colnames(output_content)[1] = "Ensembl" |
159 } else if (inputtype == "tabfile") { | 194 } else if (inputtype == "tabfile") { |
160 output_content = merge(file, res, by.x=ncol, by.y="row.names", incomparables = NA, all.x=T) | 195 output_content = merge(file, res, by.x=ncol, by.y="row.names", incomparables = NA, all.x=T) |
161 output_content = order_columns(output_content,ncol) | 196 output_content = order_columns(output_content,ncol) |
162 } | 197 } |