annotate topGO_enrichment.R @ 10:e3430084c996 draft

planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
author proteore
date Tue, 18 Dec 2018 10:06:00 -0500
parents
children fa2e27165d5d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
1 options(warn=-1) #TURN OFF WARNINGS !!!!!!
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
2
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
3 suppressMessages(library(ggplot2))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
4 suppressMessages(library(topGO))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
5
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
6 get_args <- function(){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
7
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
8 ## Collect arguments
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
9 args <- commandArgs(TRUE)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
10
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
11 ## Default setting when no arguments passed
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
12 if(length(args) < 1) {
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
13 args <- c("--help")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
14 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
15
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
16 ## Help section
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
17 if("--help" %in% args) {
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
18 cat("Pathview R script
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
19 Arguments:
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
20 --help Print this test
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
21 --input_type
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
22 --onto
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
23 --option
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
24 --correction
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
25 --threshold
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
26 --text
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
27 --plot
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
28 --column
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
29 --geneuniverse
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
30 --header
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
31
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
32 Example:
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
33 Rscript --vanilla enrichment_v3.R --inputtype=tabfile (or copypaste) --input=file.txt --ontology='BP/CC/MF' --option=option
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
34 (e.g : classic/elim...) --threshold=threshold --correction=correction --textoutput=text --barplotoutput=barplot --dotplotoutput=dotplot
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
35 --column=column --geneuniver=human \n\n")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
36
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
37 q(save="no")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
38 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
39
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
40 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
41 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
42 args <- as.list(as.character(argsDF$V2))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
43 names(args) <- argsDF$V1
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
44
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
45 return(args)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
46 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
47
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
48 read_file <- function(path,header){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
49 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
50 if (inherits(file,"try-error")){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
51 stop("File not found !")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
52 }else{
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
53 return(file)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
54 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
55 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
56
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
57 get_list_from_cp <-function(list){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
58 list = gsub(";"," ",list)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
59 list = strsplit(list, "[ \t\n]+")[[1]]
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
60 list = list[list != ""] #remove empty entry
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
61 list = gsub("-.+", "", list) #Remove isoform accession number (e.g. "-2")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
62 return(list)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
63 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
64
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
65 check_ens_ids <- function(vector) {
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
66 ens_pattern = "^(ENS[A-Z]+[0-9]{11}|[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?|CG[0-9]+|[A-Z0-9]+\\.[0-9]+|YM[A-Z][0-9]{3}[a-z][0-9])$"
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
67 return(grepl(ens_pattern,vector))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
68 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
69
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
70 str2bool <- function(x){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
71 if (any(is.element(c("t","true"),tolower(x)))){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
72 return (TRUE)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
73 }else if (any(is.element(c("f","false"),tolower(x)))){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
74 return (FALSE)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
75 }else{
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
76 return(NULL)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
77 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
78 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
79
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
80 # Some libraries such as GOsummaries won't be able to treat the values such as
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
81 # "< 1e-30" produced by topGO. As such it is important to delete the < char
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
82 # with the deleteInfChar function. Nevertheless the user will have access to the original results in the text output.
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
83 deleteInfChar = function(values){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
84
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
85 lines = grep("<",values)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
86 if (length(lines)!=0){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
87 for (line in lines){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
88 values[line]=gsub("<","",values[line])
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
89 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
90 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
91 return(values)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
92 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
93
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
94 corrMultipleTesting = function(result, myGOdata,correction,threshold){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
95
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
96 # adjust for multiple testing
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
97 if (correction!="none"){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
98 # GenTable : transforms the result object into a list. Filters can be applied
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
99 # (e.g : with the topNodes argument, to get for instance only the n first
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
100 # GO terms with the lowest pvalues), but as we want to apply a correction we
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
101 # take all the GO terms, no matter their pvalues
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
102 allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=length(attributes(result)$score))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
103 # Some pvalues given by topGO are not numeric (e.g : "<1e-30). As such, these
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
104 # values are converted to 1e-30 to be able to correct the pvalues
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
105 pvaluestmp = deleteInfChar(allRes$test)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
106
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
107 # the correction is done from the modified pvalues
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
108 allRes$qvalues = p.adjust(pvaluestmp, method = as.character(correction), n = length(pvaluestmp))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
109 allRes = as.data.frame(allRes)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
110
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
111 # Rename the test column by pvalues, so that is more explicit
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
112 nb = which(names(allRes) %in% c("test"))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
113
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
114 names(allRes)[nb] = "pvalues"
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
115
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
116 allRes = allRes[which(as.numeric(allRes$pvalues) <= threshold),]
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
117 if (length(allRes$pvalues)==0){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
118 print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
119 return(NULL)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
120 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
121 allRes = allRes[order(allRes$qvalues),]
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
122 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
123
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
124 if (correction=="none"){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
125 # get all the go terms under user threshold
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
126 mysummary <- summary(attributes(result)$score <= threshold)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
127 numsignif <- as.integer(mysummary[[3]])
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
128 # get all significant nodes
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
129 allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=numsignif)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
130
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
131
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
132 allRes = as.data.frame(allRes)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
133 # Rename the test column by pvalues, so that is more explicit
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
134 nb = which(names(allRes) %in% c("test"))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
135 names(allRes)[nb] = "pvalues"
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
136 if (numsignif==0){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
137
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
138 print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
139 return(NULL)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
140 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
141
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
142 allRes = allRes[order(allRes$pvalues),]
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
143 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
144
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
145 return(allRes)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
146 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
147
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
148 # roundValues will simplify the results by rounding down the values. For instance 1.1e-17 becomes 1e-17
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
149 roundValues = function(values){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
150 for (line in 1:length(values)){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
151 values[line]=as.numeric(gsub(".*e","1e",as.character(values[line])))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
152 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
153 return(values)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
154 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
155
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
156 createDotPlot = function(data, onto){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
157
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
158 values = deleteInfChar(data$pvalues)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
159 values = roundValues(values)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
160 values = as.numeric(values)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
161
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
162 geneRatio = data$Significant/data$Annotated
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
163 goTerms = data$Term
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
164 count = data$Significant
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
165
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
166 labely = paste("GO terms",onto,sep=" ")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
167 ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point( ) + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n" )
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
168 ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
169 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
170
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
171 createBarPlot = function(data, onto){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
172
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
173
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
174 values = deleteInfChar(data$pvalues)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
175 values = roundValues(values)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
176 values = as.numeric(values)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
177
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
178 goTerms = data$Term
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
179 count = data$Significant
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
180
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
181 labely = paste("GO terms",onto,sep=" ")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
182 ggplot(data, aes(x=goTerms, y=count,fill=values,scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
183 ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
184 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
185
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
186 # Produce the different outputs
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
187 createOutputs = function(result, cut_result,text, barplot, dotplot, onto){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
188
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
189 if (is.null(result)){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
190 if (text){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
191 err_msg = "None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably either have no associated GO terms or are not ENSG identifiers (e.g : ENSG00000012048)."
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
192 write.table(err_msg, file='result', quote=FALSE, sep='\t', col.names = T, row.names = F)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
193 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
194 if (barplot){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
195 png(filename="barplot.png")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
196 plot.new()
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
197 #text(0,0,err_msg)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
198 dev.off()
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
199 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
200 if (dotplot){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
201 png(filename="dotplot.png")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
202 plot.new()
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
203 #text(0,0,err_msg)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
204 dev.off()
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
205 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
206 opt <- options(show.error.messages=FALSE)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
207 on.exit(options(opt))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
208 stop("null result")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
209 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
210
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
211 if (is.null(cut_result)){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
212 if (text){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
213 err_msg = "Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value."
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
214 write.table(err_msg, file='result', quote=FALSE, sep='\t', col.names = T, row.names = F)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
215 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
216 if (barplot){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
217 png(filename="barplot.png")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
218 plot.new()
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
219 text(0,0,err_msg)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
220 dev.off()
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
221 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
222 if (dotplot){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
223 png(filename="dotplot.png")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
224 plot.new()
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
225 text(0,0,err_msg)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
226 dev.off()
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
227 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
228 opt <- options(show.error.messages=FALSE)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
229 on.exit(options(opt))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
230 stop("null cut_result")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
231 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
232
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
233 if (text){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
234 write.table(cut_result, file='result', quote=FALSE, sep='\t', col.names = T, row.names = F)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
235 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
236
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
237 if (barplot){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
238 createBarPlot(cut_result, onto)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
239 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
240
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
241 if (dotplot){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
242 createDotPlot(cut_result, onto)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
243 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
244 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
245
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
246 # Launch enrichment analysis and return result data from the analysis or the null
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
247 # object if the enrichment could not be done.
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
248 goEnrichment = function(geneuniverse,sample,background_sample,onto){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
249
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
250 if (is.null(background_sample)){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
251 xx = annFUN.org(onto,mapping=geneuniverse,ID="ensembl") # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their associated ensembl ids according to the org package
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
252 allGenes = unique(unlist(xx)) # check if the genes given by the user can be found in the org package (gene universe), that is in allGenes
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
253 } else {
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
254 allGenes = background_sample
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
255 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
256
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
257 if (length(intersect(sample,allGenes))==0){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
258 print("None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably have no associated GO terms.")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
259 return(c(NULL,NULL))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
260 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
261
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
262 geneList = factor(as.integer(allGenes %in% sample))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
263 if (length(levels(geneList)) == 1 ){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
264 stop("All or none of the background genes are found in tested genes dataset, enrichment analysis can't be done")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
265 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
266 names(geneList) <- allGenes
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
267
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
268 #topGO enrichment
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
269
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
270 # Creation of a topGOdata object
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
271 # It will contain : the list of genes of interest, the GO annotations and the GO hierarchy
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
272 # Parameters :
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
273 # ontology : character string specifying the ontology of interest (BP, CC, MF)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
274 # allGenes : named vector of type numeric or factor
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
275 # annot : tells topGO how to map genes to GO annotations.
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
276 # argument not used here : nodeSize : at which minimal number of GO annotations
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
277 # do we consider a gene
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
278
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
279 myGOdata = new("topGOdata", description="SEA with TopGO", ontology=onto, allGenes=geneList, annot = annFUN.org, mapping=geneuniverse,ID="ensembl")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
280
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
281 # Performing enrichment tests
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
282 result <- runTest(myGOdata, algorithm=option, statistic="fisher")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
283 return(c(result,myGOdata))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
284 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
285
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
286 args <- get_args()
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
287
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
288 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.rda")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
289 #load("/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.rda")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
290
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
291 input_type = args$inputtype
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
292 input = args$input
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
293 onto = args$ontology
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
294 option = args$option
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
295 correction = args$correction
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
296 threshold = as.numeric(args$threshold)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
297 text = str2bool(args$textoutput)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
298 barplot = "barplot" %in% unlist(strsplit(args$plot,","))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
299 dotplot = "dotplot" %in% unlist(strsplit(args$plot,","))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
300 column = as.numeric(gsub("c","",args$column))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
301 geneuniverse = args$geneuniverse
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
302 header = str2bool(args$header)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
303 background = str2bool(args$background)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
304 if (background){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
305 background_genes = args$background_genes
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
306 background_input_type = args$background_input_type
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
307 background_header = str2bool(args$background_header)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
308 background_column = as.numeric(gsub("c","",args$background_column))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
309 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
310
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
311 #get input
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
312 if (input_type=="copy_paste"){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
313 sample <- get_list_from_cp(input)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
314 } else if (input_type=="file"){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
315 tab=read_file(input,header)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
316 sample = trimws(unlist(strsplit(tab[,column],";")))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
317 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
318
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
319 #check of ENS ids
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
320 if (! any(check_ens_ids(sample))){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
321 print("no ensembl gene ids found in your ids list, please check your IDs in input or the selected column of your input file")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
322 stop()
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
323 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
324
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
325 #get input if background genes
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
326 if (background){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
327 if (background_input_type=="copy_paste"){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
328 background_sample <- get_list_from_cp(background_genes)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
329 } else if (background_input_type=="file"){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
330 background_tab=read_file(background_genes,background_header)
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
331 background_sample = unique(trimws(unlist(strsplit(background_tab[,background_column],";"))))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
332 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
333 #check of ENS ids
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
334 if (! any(check_ens_ids(background_sample))){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
335 print("no ensembl gene ids found in your background ids list, please check your IDs in input or the selected column of your input file")
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
336 stop()
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
337 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
338 } else {
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
339 background_sample=NULL
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
340 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
341
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
342 # Launch enrichment analysis
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
343 allresult = suppressMessages(goEnrichment(geneuniverse,sample,background_sample,onto))
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
344 result = allresult[1][[1]]
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
345 myGOdata = allresult[2][[1]]
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
346 if (!is.null(result)){
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
347 cut_result = corrMultipleTesting(result,myGOdata, correction,threshold) # Adjust the result with a multiple testing correction or not and with the user, p-value cutoff
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
348 }else{
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
349 cut_result=NULL
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
350 }
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
351
e3430084c996 planemo upload commit ad5f1c5a1a71d7fa2bc8bac408856aa80b0fc2a3
proteore
parents:
diff changeset
352 createOutputs(result, cut_result,text, barplot, dotplot, onto)