comparison GO-enrich.R @ 10:d951677a50d4 draft

planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
author proteore
date Fri, 28 Jun 2019 05:08:48 -0400
parents 2f67202ffdb3
children f6107b8ae8f8
comparison
equal deleted inserted replaced
9:2f67202ffdb3 10:d951677a50d4
42 width=1000 42 width=1000
43 } 43 }
44 return (width) 44 return (width)
45 } 45 }
46 46
47 repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) { 47 repartition_GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
48 ggo<-groupGO(gene=geneid, 48 ggo<-groupGO(gene=geneid,
49 OrgDb = orgdb, 49 OrgDb = orgdb,
50 ont=ontology, 50 ont=ontology,
51 level=level, 51 level=level,
52 readable=TRUE) 52 readable=TRUE)
64 return(ggo) 64 return(ggo)
65 } 65 }
66 } 66 }
67 67
68 # GO over-representation test 68 # GO over-representation test
69 enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) { 69 enrich_GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) {
70 ego<-enrichGO(gene=geneid, 70 ego<-enrichGO(gene=geneid,
71 universe=universe, 71 universe=universe,
72 OrgDb=orgdb, 72 OrgDb=orgdb,
73 ont=ontology, 73 ont=ontology,
74 pAdjustMethod="BH", 74 pAdjustMethod="BH",
105 } else { 105 } else {
106 warning(paste("No Go terms enriched (EGO) found for ",ontology,"ontology"),immediate. = TRUE,noBreaks. = TRUE,call. = FALSE) 106 warning(paste("No Go terms enriched (EGO) found for ",ontology,"ontology"),immediate. = TRUE,noBreaks. = TRUE,call. = FALSE)
107 } 107 }
108 } 108 }
109 109
110 clean_ids <- function(ids){
111 ids = gsub(" ","",ids)
112 ids = ids[which(ids!="")]
113 ids = ids[which(ids!="NA")]
114 ids = ids[!is.na(ids)]
115
116 return(ids)
117 }
118
110 check_ids <- function(vector,type) { 119 check_ids <- function(vector,type) {
111 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" 120 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
112 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" 121 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
113 if (type == "entrez") 122 if (type == "entrez")
114 return(grepl(entrez_id,vector)) 123 return(grepl(entrez_id,vector))
115 else if (type == "uniprot") { 124 else if (type == "uniprot") {
116 return(grepl(uniprot_pattern,vector)) 125 return(grepl(uniprot_pattern,vector))
117 } 126 }
118 } 127 }
119 128
120 clusterProfiler = function() { 129 get_args <- function(){
121 args <- commandArgs(TRUE) 130 args <- commandArgs(TRUE)
122 if(length(args)<1) { 131 if(length(args)<1) {
123 args <- c("--help") 132 args <- c("--help")
124 } 133 }
125 134
151 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") 160 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
152 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) 161 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
153 args <- as.list(as.character(argsDF$V2)) 162 args <- as.list(as.character(argsDF$V2))
154 names(args) <- argsDF$V1 163 names(args) <- argsDF$V1
155 164
165 return(args)
166 }
167
168
169 main <- function() {
170
171 #get args from command
172 args <- get_args()
173
156 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda") 174 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda")
157 #load("/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda") 175 #load("/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda")
158
159 176
160 go_represent=str2bool(args$go_represent) 177 go_represent=str2bool(args$go_represent)
161 go_enrich=str2bool(args$go_enrich) 178 go_enrich=str2bool(args$go_enrich)
162 if (go_enrich){ 179 if (go_enrich){
163 plot = unlist(strsplit(args$plot,",")) 180 plot = unlist(strsplit(args$plot,","))
177 # Extract input IDs 194 # Extract input IDs
178 input_type = args$input_type 195 input_type = args$input_type
179 id_type = args$id_type 196 id_type = args$id_type
180 197
181 if (input_type == "text") { 198 if (input_type == "text") {
182 input = strsplit(args$input, "[ \t\n]+")[[1]] 199 input = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";"))
183 } else if (input_type == "file") { 200 } else if (input_type == "file") {
184 filename = args$input 201 filename = args$input
185 ncol = args$ncol 202 ncol = args$ncol
186 # Check ncol 203 # Check ncol
187 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { 204 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
191 } 208 }
192 header = str2bool(args$header) # Get file content 209 header = str2bool(args$header) # Get file content
193 file = read_file(filename, header) # Extract Protein IDs list 210 file = read_file(filename, header) # Extract Protein IDs list
194 input = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) 211 input = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE))
195 } 212 }
196 213 input = clean_ids(input)
197 214
198 ## Get input gene list from input IDs 215 ## Get input gene list from input IDs
199 #ID format Conversion 216 #ID format Conversion
200 #This case : from UNIPROT (protein id) to ENTREZ (gene id) 217 #This case : from UNIPROT (protein id) to ENTREZ (gene id)
201 #bitr = conversion function from clusterProfiler 218 #bitr = conversion function from clusterProfiler
220 qval_cutoff <- as.numeric(args$qval_cutoff) 237 qval_cutoff <- as.numeric(args$qval_cutoff)
221 # Extract universe background genes (same as input file) 238 # Extract universe background genes (same as input file)
222 if (!is.null(args$universe_type)) { 239 if (!is.null(args$universe_type)) {
223 universe_type = args$universe_type 240 universe_type = args$universe_type
224 if (universe_type == "text") { 241 if (universe_type == "text") {
225 universe = strsplit(args$universe, "[ \t\n]+")[[1]] 242 universe = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";"))
226 } else if (universe_type == "file") { 243 } else if (universe_type == "file") {
227 universe_filename = args$universe 244 universe_filename = args$universe
228 universe_ncol = args$uncol 245 universe_ncol = args$uncol
229 # Check ncol 246 # Check ncol
230 if (! as.numeric(gsub("c", "", universe_ncol)) %% 1 == 0) { 247 if (! as.numeric(gsub("c", "", universe_ncol)) %% 1 == 0) {
236 # Get file content 253 # Get file content
237 universe_file = read_file(universe_filename, universe_header) 254 universe_file = read_file(universe_filename, universe_header)
238 # Extract Protein IDs list 255 # Extract Protein IDs list
239 universe <- unlist(sapply(universe_file[,universe_ncol], function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) 256 universe <- unlist(sapply(universe_file[,universe_ncol], function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE))
240 } 257 }
258 universe = clean_ids(input)
241 universe_id_type = args$universe_id_type 259 universe_id_type = args$universe_id_type
242 ##to initialize 260 ##to initialize
243 if (universe_id_type=="Uniprot" & any(check_ids(universe,"uniprot"))) { 261 if (universe_id_type=="Uniprot" & any(check_ids(universe,"uniprot"))) {
244 idFrom<-"UNIPROT" 262 idFrom<-"UNIPROT"
245 idTo<-"ENTREZID" 263 idTo<-"ENTREZID"
263 } 281 }
264 282
265 ##enrichGO : GO over-representation test 283 ##enrichGO : GO over-representation test
266 for (onto in ontology) { 284 for (onto in ontology) {
267 if (go_represent) { 285 if (go_represent) {
268 ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE) 286 ggo<-repartition_GO(gene, orgdb, onto, level, readable=TRUE)
269 if (is.list(ggo)){ggo <- as.data.frame(apply(ggo, c(1,2), function(x) gsub("^$|^ $", NA, x)))} #convert "" and " " to NA 287 if (is.list(ggo)){ggo <- as.data.frame(apply(ggo, c(1,2), function(x) gsub("^$|^ $", NA, x)))} #convert "" and " " to NA
270 output_path = paste("cluster_profiler_GGO_",onto,".tsv",sep="") 288 output_path = paste("cluster_profiler_GGO_",onto,".tsv",sep="")
271 write.table(ggo, output_path, sep="\t", row.names = FALSE, quote = FALSE ) 289 write.table(ggo, output_path, sep="\t", row.names = FALSE, quote = FALSE )
272 } 290 }
273 291
274 if (go_enrich) { 292 if (go_enrich) {
275 ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot) 293 ego<-enrich_GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot)
276 if (is.list(ego)){ego <- as.data.frame(apply(ego, c(1,2), function(x) gsub("^$|^ $", NA, x)))} #convert "" and " " to NA 294 if (is.list(ego)){ego <- as.data.frame(apply(ego, c(1,2), function(x) gsub("^$|^ $", NA, x)))} #convert "" and " " to NA
277 output_path = paste("cluster_profiler_EGO_",onto,".tsv",sep="") 295 output_path = paste("cluster_profiler_EGO_",onto,".tsv",sep="")
278 write.table(ego, output_path, sep="\t", row.names = FALSE, quote = FALSE ) 296 write.table(ego, output_path, sep="\t", row.names = FALSE, quote = FALSE )
279 } 297 }
280 } 298 }
281 } 299 }
282 300
283 clusterProfiler() 301 if(!interactive()) {
302 main()
303 }