comparison enrichment_v3.R @ 9:70c0c8757f5f draft

planemo upload commit 9d3e0b226140b566fc529fd0ffe7aa9e8388c6e5-dirty
author proteore
date Fri, 21 Sep 2018 05:32:38 -0400
parents ddaa0c318d65
children
comparison
equal deleted inserted replaced
8:ddaa0c318d65 9:70c0c8757f5f
28 # OUTPUT : 28 # OUTPUT :
29 # - outputs commanded by the user named respectively result.tsv for the text 29 # - outputs commanded by the user named respectively result.tsv for the text
30 # results file, barplot.png for the barplot image file and dotplot.png for the 30 # results file, barplot.png for the barplot image file and dotplot.png for the
31 # dotplot image file 31 # dotplot image file
32 32
33 options(warn=-1) #TURN OFF WARNINGS !!!!!!
33 34
34 # loading topGO library 35 # loading topGO library
35 library(topGO) 36 suppressMessages(library(topGO))
36 37
37 # Read file and return file content as data.frame 38 # Read file and return file content as data.frame
38 readfile = function(filename, header) { 39 readfile = function(filename, header) {
39 if (header == "true") { 40 if (header == "true") {
40 # Read only first line of the file as header: 41 # Read only first line of the file as header:
52 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] 53 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
53 } 54 }
54 return(file) 55 return(file)
55 } 56 }
56 57
58 check_ens_ids <- function(vector) {
59 ens_pattern = "^(ENS[A-Z]+[0-9]{11}|[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?|CG[0-9]+|[A-Z0-9]+\\.[0-9]+|YM[A-Z][0-9]{3}[a-z][0-9])$"
60 return(grepl(ens_pattern,vector))
61 }
62
57 '%!in%' <- function(x,y)!('%in%'(x,y)) 63 '%!in%' <- function(x,y)!('%in%'(x,y))
58 64
59 65
60 # Parse command line arguments 66 # Parse command line arguments
61 67
80 86
81 87
82 if (length(options.args) != 12) { 88 if (length(options.args) != 12) {
83 stop("Not enough/Too many arguments", call. = FALSE) 89 stop("Not enough/Too many arguments", call. = FALSE)
84 } 90 }
91
92 #save(options.args,file="/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.Rda")
93 #load("/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.Rda")
94
85 95
86 typeinput = options.args[1] 96 typeinput = options.args[1]
87 listfile = options.args[2] 97 listfile = options.args[2]
88 onto = as.character(options.args[3]) 98 onto = as.character(options.args[3])
89 option = as.character(options.args[4]) 99 option = as.character(options.args[4])
106 sample = readfile(listfile, "true") 116 sample = readfile(listfile, "true")
107 }else{ 117 }else{
108 sample = readfile(listfile, "false") 118 sample = readfile(listfile, "false")
109 } 119 }
110 sample = sample[,column] 120 sample = sample[,column]
111 121 }
112 } 122
123 #check of ENS ids
124 if (! any(check_ens_ids(sample))){
125 print("no ensembl gene ids found in your ids list, please check your IDs in input or the selected column of your input file")
126 stop()
127 }
128
113 # Launch enrichment analysis and return result data from the analysis or the null 129 # Launch enrichment analysis and return result data from the analysis or the null
114 # object if the enrichment could not be done. 130 # object if the enrichment could not be done.
115 goEnrichment = function(geneuniverse,sample,onto){ 131 goEnrichment = function(geneuniverse,sample,onto){
116 132
117 # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their 133 # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their
237 geneRatio = data$Significant/data$Annotated 253 geneRatio = data$Significant/data$Annotated
238 goTerms = data$Term 254 goTerms = data$Term
239 count = data$Significant 255 count = data$Significant
240 256
241 labely = paste("GO terms",onto,sep=" ") 257 labely = paste("GO terms",onto,sep=" ")
242 png(filename="dotplot.png",res=300, width = 3200, height = 3200, units = "px") 258 ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point( ) + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n" )
243 sp1 = ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point() + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n") 259 ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm")
244
245 plot(sp1)
246 dev.off()
247 } 260 }
248 261
249 createBarPlot = function(data, onto){ 262 createBarPlot = function(data, onto){
250 263
251 264
253 values = roundValues(values) 266 values = roundValues(values)
254 267
255 values = as.numeric(values) 268 values = as.numeric(values)
256 goTerms = data$Term 269 goTerms = data$Term
257 count = data$Significant 270 count = data$Significant
258 png(filename="barplot.png",res=300, width = 3200, height = 3200, units = "px")
259 271
260 labely = paste("GO terms",onto,sep=" ") 272 labely = paste("GO terms",onto,sep=" ")
261 p<-ggplot(data, aes(x=goTerms, y=count,fill=values)) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n") 273 ggplot(data, aes(x=goTerms, y=count,fill=values,scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n")
262 plot(p) 274 ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm")
263 dev.off()
264 } 275 }
265 276
266 277
267 # Produce the different outputs 278 # Produce the different outputs
268 createOutputs = function(result, cut_result,text, barplot, dotplot, onto){ 279 createOutputs = function(result, cut_result,text, barplot, dotplot, onto){
340 351
341 if (dotplot=="TRUE"){ 352 if (dotplot=="TRUE"){
342 353
343 createDotPlot(cut_result, onto) 354 createDotPlot(cut_result, onto)
344 } 355 }
345 return(TRUE)
346 } 356 }
347 357
348 358
349 359
350 # Load R library ggplot2 to plot graphs 360 # Load R library ggplot2 to plot graphs
351 library(ggplot2) 361 suppressMessages(library(ggplot2))
352 362
353 # Launch enrichment analysis 363 # Launch enrichment analysis
354 allresult = goEnrichment(geneuniverse,sample,onto) 364 allresult = suppressMessages(goEnrichment(geneuniverse,sample,onto))
355 result = allresult[1][[1]] 365 result = allresult[1][[1]]
356 myGOdata = allresult[2][[1]] 366 myGOdata = allresult[2][[1]]
357 if (!is.null(result)){ 367 if (!is.null(result)){
358 368
359 # Adjust the result with a multiple testing correction or not and with the user 369 # Adjust the result with a multiple testing correction or not and with the user