annotate enrichment_v3.R @ 0:aade04e750fa draft default tip

planemo upload
author lnguyen
date Fri, 15 Sep 2017 10:38:28 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
1 # enrichment_v3.R
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
2 # Usage : Rscript --vanilla enrichment_v3.R --inputtype tabfile (or
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
3 # copypaste) --input file.txt --ontology "BP/CC/MF" --option option (e.g
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
4 # : classic/elim...) --threshold threshold --correction correction --textoutput
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
5 # text --barplotoutput barplot
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
6 # --dotplotoutput dotplot --column column --geneuniver human
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
7 # e.g : Rscript --vanilla enrichment_v3.R --inputtype tabfile --input file.txt
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
8 # --ontology BP --option classic --threshold 1e-15 --correction holm
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
9 # --textoutput TRUE
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
10 # --barplotoutput TRUE --dotplotoutput TRUE --column c1 --geneuniverse
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
11 # org.Hs.eg.db
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
12 # INPUT :
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
13 # - type of input. Can be ids separated by a blank space (copypast), or a text
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
14 # file (tabfile)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
15 # - file with at least one column of ensembl ids
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
16 # - gene ontology category : Biological Process (BP), Cellular Component (CC), Molecular Function (MF)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
17 # - test option (relative to topGO algorithms) : elim, weight01, parentchild, or no option (classic)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
18 # - threshold for enriched GO term pvalues (e.g : 1e-15)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
19 # - correction for multiple testing (see p.adjust options : holm, hochberg, hommel, bonferroni, BH, BY,fdr,none
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
20 # - outputs wanted in this order text, barplot, dotplot with boolean value (e.g
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
21 # : TRUE TRUE TRUE ).
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
22 # Declare the output not wanted as none
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
23 # - column containing the ensembl ids if the input file is a tabfile
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
24 # - gene universe reference for the user chosen specie
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
25 # - header : if the input is a text file, does this text file have a header
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
26 # (TRUE/FALSE)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
27 #
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
28 # OUTPUT :
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
29 # - outputs commanded by the user named respectively result.tsv for the text
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
30 # results file, barplot.png for the barplot image file and dotplot.png for the
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
31 # dotplot image file
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
32
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
33
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
34 # loading topGO library
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
35 library("topGO")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
36
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
37 '%!in%' <- function(x,y)!('%in%'(x,y))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
38
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
39
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
40 # Parse command line arguments
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
41
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
42 args = commandArgs(trailingOnly = TRUE)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
43
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
44 # create a list of the arguments from the command line, separated by a blank space
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
45 hh <- paste(unlist(args),collapse=' ')
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
46
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
47 # delete the first element of the list which is always a blank space
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
48 listoptions <- unlist(strsplit(hh,'--'))[-1]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
49
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
50 # for each input, split the arguments with blank space as separator, unlist,
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
51 # and delete the first element which is the input name (e.g --inputtype)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
52 options.args <- sapply(listoptions,function(x){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
53 unlist(strsplit(x, ' '))[-1]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
54 })
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
55 # same as the step above, except that only the names are kept
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
56 options.names <- sapply(listoptions,function(x){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
57 option <- unlist(strsplit(x, ' '))[1]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
58 })
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
59 names(options.args) <- unlist(options.names)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
60
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
61
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
62 if (length(options.args) != 12) {
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
63 stop("Not enough/Too many arguments", call. = FALSE)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
64 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
65
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
66 typeinput = options.args[1]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
67 listfile = options.args[2]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
68 onto = as.character(options.args[3])
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
69 option = as.character(options.args[4])
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
70 correction = as.character(options.args[6])
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
71 threshold = as.numeric(options.args[5])
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
72 text = as.character(options.args[7])
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
73 barplot = as.character(options.args[8])
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
74 dotplot = as.character(options.args[9])
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
75 column = as.numeric(gsub("c","",options.args[10]))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
76 geneuniverse = as.character(options.args[11])
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
77 header = as.character(options.args[12])
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
78
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
79 if (typeinput=="copypaste"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
80 sample = as.data.frame(unlist(listfile))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
81 sample = sample[,column]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
82 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
83 if (typeinput=="tabfile"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
84
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
85 if (header=="TRUE"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
86 sample = read.table(listfile,header=TRUE,sep="\t",na.strings="NA",fill=TRUE)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
87 }else{
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
88 sample = read.table(listfile,header=FALSE,sep="\t",na.strings="NA",fill=TRUE)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
89 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
90 sample = sample[,column]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
91
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
92 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
93 # Launch enrichment analysis and return result data from the analysis or the null
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
94 # object if the enrichment could not be done.
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
95 goEnrichment = function(geneuniverse,sample,onto){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
96
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
97 # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
98 # associated ensembl ids according to the org package
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
99 xx = annFUN.org(onto,mapping=geneuniverse,ID="ensembl")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
100 allGenes = unique(unlist(xx))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
101 # check if the genes given by the user can be found in the org package (gene
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
102 # universe), that is in
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
103 # allGenes
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
104 if (length(intersect(sample,allGenes))==0){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
105
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
106 print("None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably have no associated GO terms.")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
107 return(c(NULL,NULL))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
108
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
109 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
110
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
111 geneList = factor(as.integer(allGenes %in% sample))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
112 names(geneList) <- allGenes
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
113
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
114
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
115 #topGO enrichment
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
116
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
117
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
118 # Creation of a topGOdata object
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
119 # It will contain : the list of genes of interest, the GO annotations and the GO hierarchy
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
120 # Parameters :
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
121 # ontology : character string specifying the ontology of interest (BP, CC, MF)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
122 # allGenes : named vector of type numeric or factor
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
123 # annot : tells topGO how to map genes to GO annotations.
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
124 # argument not used here : nodeSize : at which minimal number of GO annotations
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
125 # do we consider a gene
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
126
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
127 myGOdata = new("topGOdata", description="SEA with TopGO", ontology=onto, allGenes=geneList, annot = annFUN.org, mapping=geneuniverse,ID="ensembl")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
128
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
129
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
130 # Performing enrichment tests
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
131 result <- runTest(myGOdata, algorithm=option, statistic="fisher")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
132 return(c(result,myGOdata))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
133 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
134
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
135 # Some libraries such as GOsummaries won't be able to treat the values such as
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
136 # "< 1e-30" produced by topGO. As such it is important to delete the < char
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
137 # with the deleteInfChar function. Nevertheless the user will have access to the original results in the text output.
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
138 deleteInfChar = function(values){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
139
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
140 lines = grep("<",values)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
141 if (length(lines)!=0){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
142 for (line in lines){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
143 values[line]=gsub("<","",values[line])
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
144 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
145 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
146 return(values)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
147 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
148
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
149 corrMultipleTesting = function(result, myGOdata,correction,threshold){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
150
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
151 # adjust for multiple testing
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
152 if (correction!="none"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
153 # GenTable : transforms the result object into a list. Filters can be applied
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
154 # (e.g : with the topNodes argument, to get for instance only the n first
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
155 # GO terms with the lowest pvalues), but as we want to apply a correction we
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
156 # take all the GO terms, no matter their pvalues
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
157 allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=length(attributes(result)$score))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
158 # Some pvalues given by topGO are not numeric (e.g : "<1e-30). As such, these
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
159 # values are converted to 1e-30 to be able to correct the pvalues
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
160 pvaluestmp = deleteInfChar(allRes$test)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
161
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
162 # the correction is done from the modified pvalues
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
163 allRes$qvalues = p.adjust(pvaluestmp, method = as.character(correction), n = length(pvaluestmp))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
164 allRes = as.data.frame(allRes)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
165
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
166 # Rename the test column by pvalues, so that is more explicit
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
167 nb = which(names(allRes) %in% c("test"))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
168
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
169 names(allRes)[nb] = "pvalues"
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
170
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
171 allRes = allRes[which(as.numeric(allRes$pvalues) <= threshold),]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
172 if (length(allRes$pvalues)==0){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
173 print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
174 return(NULL)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
175 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
176 allRes = allRes[order(allRes$qvalues),]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
177 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
178
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
179 if (correction=="none"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
180 # get all the go terms under user threshold
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
181 mysummary <- summary(attributes(result)$score <= threshold)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
182 numsignif <- as.integer(mysummary[[3]])
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
183 # get all significant nodes
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
184 allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=numsignif)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
185
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
186
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
187 allRes = as.data.frame(allRes)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
188 # Rename the test column by pvalues, so that is more explicit
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
189 nb = which(names(allRes) %in% c("test"))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
190 names(allRes)[nb] = "pvalues"
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
191 if (numsignif==0){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
192
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
193 print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
194 return(NULL)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
195 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
196
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
197 allRes = allRes[order(allRes$pvalues),]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
198 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
199
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
200 return(allRes)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
201 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
202
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
203 # roundValues will simplify the results by rounding down the values. For instance 1.1e-17 becomes 1e-17
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
204 roundValues = function(values){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
205 for (line in 1:length(values)){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
206 values[line]=as.numeric(gsub(".*e","1e",as.character(values[line])))
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
207 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
208 return(values)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
209 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
210
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
211 createDotPlot = function(data){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
212
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
213 values = deleteInfChar(data$pvalues)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
214 values = roundValues(values)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
215 values = as.numeric(values)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
216
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
217 geneRatio = data$Significant/data$Annotated
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
218 goTerms = data$Term
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
219 count = data$Significant
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
220
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
221 png(filename="dotplot.png",res=300, width = 3200, height = 3200, units = "px")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
222 sp1 = ggplot(data,aes(x=geneRatio,y=goTerms,xlabel ="Ratio" ,ylabel = "GO terms", color=values,size=count)) +geom_point() + scale_colour_gradientn(colours=c("red","violet","blue")) + labs(color="p-values\n")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
223
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
224 plot(sp1)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
225 dev.off()
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
226 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
227
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
228 createBarPlot = function(data){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
229
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
230
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
231 values = deleteInfChar(data$pvalues)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
232 values = roundValues(values)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
233
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
234 values = as.numeric(values)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
235 goTerms = data$Term
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
236 count = data$Significant
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
237 png(filename="barplot.png",res=300, width = 3200, height = 3200, units = "px")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
238
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
239 p<-ggplot(data, aes(x=goTerms, y=count,fill=values)) + geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
240 plot(p)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
241 dev.off()
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
242 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
243
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
244
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
245 # Produce the different outputs
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
246 createOutputs = function(result, cut_result,text, barplot,dotplot){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
247
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
248
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
249 if (is.null(result)){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
250
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
251 if (text=="TRUE"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
252
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
253 err_msg = "None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably either have no associated GO terms or are not ENSG identifiers (e.g : ENSG00000012048)."
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
254 write.table(err_msg, file='result.csv', quote=FALSE, sep='\t', col.names = T, row.names = F)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
255
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
256 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
257
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
258 if (barplot=="TRUE"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
259
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
260 png(filename="barplot.png")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
261 plot.new()
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
262 #text(0,0,err_msg)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
263 dev.off()
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
264 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
265
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
266 if (dotplot=="TRUE"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
267
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
268 png(filename="dotplot.png")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
269 plot.new()
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
270 #text(0,0,err_msg)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
271 dev.off()
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
272
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
273 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
274 return(TRUE)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
275 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
276
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
277
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
278 if (is.null(cut_result)){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
279
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
280
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
281 if (text=="TRUE"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
282
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
283 err_msg = "Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value."
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
284 write.table(err_msg, file='result.csv', quote=FALSE, sep='\t', col.names = T, row.names = F)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
285
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
286 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
287
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
288 if (barplot=="TRUE"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
289
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
290 png(filename="barplot.png")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
291 plot.new()
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
292 text(0,0,err_msg)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
293 dev.off()
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
294 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
295
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
296 if (dotplot=="TRUE"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
297
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
298 png(filename="dotplot.png")
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
299 plot.new()
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
300 text(0,0,err_msg)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
301 dev.off()
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
302
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
303 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
304 return(TRUE)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
305
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
306
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
307
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
308 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
309
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
310 if (text=="TRUE"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
311 write.table(cut_result, file='result.csv', quote=FALSE, sep='\t', col.names = T, row.names = F)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
312 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
313
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
314 if (barplot=="TRUE"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
315
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
316 createBarPlot(cut_result)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
317 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
318
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
319 if (dotplot=="TRUE"){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
320
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
321 createDotPlot(cut_result)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
322 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
323 return(TRUE)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
324 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
325
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
326
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
327
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
328 # Load R library ggplot2 to plot graphs
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
329 library(ggplot2)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
330
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
331 # Launch enrichment analysis
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
332 allresult = goEnrichment(geneuniverse,sample,onto)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
333 result = allresult[1][[1]]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
334 myGOdata = allresult[2][[1]]
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
335 if (!is.null(result)){
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
336
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
337 # Adjust the result with a multiple testing correction or not and with the user
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
338 # p-value cutoff
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
339 cut_result = corrMultipleTesting(result,myGOdata, correction,threshold)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
340 }else{
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
341
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
342 cut_result=NULL
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
343
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
344 }
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
345
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
346
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
347 createOutputs(result, cut_result,text, barplot,dotplot)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
348