topgo: enrichment_v3.R annotate

annotate enrichment_v3.R @ 0:aade04e750fa draft default tip

planemo upload

author	lnguyen
date	Fri, 15 Sep 2017 10:38:28 -0400
parents
children

rev	line source
0 aade04e750fa planemo upload lnguyen parents: diff changeset	1 # enrichment_v3.R
aade04e750fa planemo upload lnguyen parents: diff changeset	2 # Usage : Rscript --vanilla enrichment_v3.R --inputtype tabfile (or
aade04e750fa planemo upload lnguyen parents: diff changeset	3 # copypaste) --input file.txt --ontology "BP/CC/MF" --option option (e.g
aade04e750fa planemo upload lnguyen parents: diff changeset	4 # : classic/elim...) --threshold threshold --correction correction --textoutput
aade04e750fa planemo upload lnguyen parents: diff changeset	5 # text --barplotoutput barplot
aade04e750fa planemo upload lnguyen parents: diff changeset	6 # --dotplotoutput dotplot --column column --geneuniver human
aade04e750fa planemo upload lnguyen parents: diff changeset	7 # e.g : Rscript --vanilla enrichment_v3.R --inputtype tabfile --input file.txt
aade04e750fa planemo upload lnguyen parents: diff changeset	8 # --ontology BP --option classic --threshold 1e-15 --correction holm
aade04e750fa planemo upload lnguyen parents: diff changeset	9 # --textoutput TRUE
aade04e750fa planemo upload lnguyen parents: diff changeset	10 # --barplotoutput TRUE --dotplotoutput TRUE --column c1 --geneuniverse
aade04e750fa planemo upload lnguyen parents: diff changeset	11 # org.Hs.eg.db
aade04e750fa planemo upload lnguyen parents: diff changeset	12 # INPUT :
aade04e750fa planemo upload lnguyen parents: diff changeset	13 # - type of input. Can be ids separated by a blank space (copypast), or a text
aade04e750fa planemo upload lnguyen parents: diff changeset	14 # file (tabfile)
aade04e750fa planemo upload lnguyen parents: diff changeset	15 # - file with at least one column of ensembl ids
aade04e750fa planemo upload lnguyen parents: diff changeset	16 # - gene ontology category : Biological Process (BP), Cellular Component (CC), Molecular Function (MF)
aade04e750fa planemo upload lnguyen parents: diff changeset	17 # - test option (relative to topGO algorithms) : elim, weight01, parentchild, or no option (classic)
aade04e750fa planemo upload lnguyen parents: diff changeset	18 # - threshold for enriched GO term pvalues (e.g : 1e-15)
aade04e750fa planemo upload lnguyen parents: diff changeset	19 # - correction for multiple testing (see p.adjust options : holm, hochberg, hommel, bonferroni, BH, BY,fdr,none
aade04e750fa planemo upload lnguyen parents: diff changeset	20 # - outputs wanted in this order text, barplot, dotplot with boolean value (e.g
aade04e750fa planemo upload lnguyen parents: diff changeset	21 # : TRUE TRUE TRUE ).
aade04e750fa planemo upload lnguyen parents: diff changeset	22 # Declare the output not wanted as none
aade04e750fa planemo upload lnguyen parents: diff changeset	23 # - column containing the ensembl ids if the input file is a tabfile
aade04e750fa planemo upload lnguyen parents: diff changeset	24 # - gene universe reference for the user chosen specie
aade04e750fa planemo upload lnguyen parents: diff changeset	25 # - header : if the input is a text file, does this text file have a header
aade04e750fa planemo upload lnguyen parents: diff changeset	26 # (TRUE/FALSE)
aade04e750fa planemo upload lnguyen parents: diff changeset	27 #
aade04e750fa planemo upload lnguyen parents: diff changeset	28 # OUTPUT :
aade04e750fa planemo upload lnguyen parents: diff changeset	29 # - outputs commanded by the user named respectively result.tsv for the text
aade04e750fa planemo upload lnguyen parents: diff changeset	30 # results file, barplot.png for the barplot image file and dotplot.png for the
aade04e750fa planemo upload lnguyen parents: diff changeset	31 # dotplot image file
aade04e750fa planemo upload lnguyen parents: diff changeset	32
aade04e750fa planemo upload lnguyen parents: diff changeset	33
aade04e750fa planemo upload lnguyen parents: diff changeset	34 # loading topGO library
aade04e750fa planemo upload lnguyen parents: diff changeset	35 library("topGO")
aade04e750fa planemo upload lnguyen parents: diff changeset	36
aade04e750fa planemo upload lnguyen parents: diff changeset	37 '%!in%' <- function(x,y)!('%in%'(x,y))
aade04e750fa planemo upload lnguyen parents: diff changeset	38
aade04e750fa planemo upload lnguyen parents: diff changeset	39
aade04e750fa planemo upload lnguyen parents: diff changeset	40 # Parse command line arguments
aade04e750fa planemo upload lnguyen parents: diff changeset	41
aade04e750fa planemo upload lnguyen parents: diff changeset	42 args = commandArgs(trailingOnly = TRUE)
aade04e750fa planemo upload lnguyen parents: diff changeset	43
aade04e750fa planemo upload lnguyen parents: diff changeset	44 # create a list of the arguments from the command line, separated by a blank space
aade04e750fa planemo upload lnguyen parents: diff changeset	45 hh <- paste(unlist(args),collapse=' ')
aade04e750fa planemo upload lnguyen parents: diff changeset	46
aade04e750fa planemo upload lnguyen parents: diff changeset	47 # delete the first element of the list which is always a blank space
aade04e750fa planemo upload lnguyen parents: diff changeset	48 listoptions <- unlist(strsplit(hh,'--'))[-1]
aade04e750fa planemo upload lnguyen parents: diff changeset	49
aade04e750fa planemo upload lnguyen parents: diff changeset	50 # for each input, split the arguments with blank space as separator, unlist,
aade04e750fa planemo upload lnguyen parents: diff changeset	51 # and delete the first element which is the input name (e.g --inputtype)
aade04e750fa planemo upload lnguyen parents: diff changeset	52 options.args <- sapply(listoptions,function(x){
aade04e750fa planemo upload lnguyen parents: diff changeset	53 unlist(strsplit(x, ' '))[-1]
aade04e750fa planemo upload lnguyen parents: diff changeset	54 })
aade04e750fa planemo upload lnguyen parents: diff changeset	55 # same as the step above, except that only the names are kept
aade04e750fa planemo upload lnguyen parents: diff changeset	56 options.names <- sapply(listoptions,function(x){
aade04e750fa planemo upload lnguyen parents: diff changeset	57 option <- unlist(strsplit(x, ' '))[1]
aade04e750fa planemo upload lnguyen parents: diff changeset	58 })
aade04e750fa planemo upload lnguyen parents: diff changeset	59 names(options.args) <- unlist(options.names)
aade04e750fa planemo upload lnguyen parents: diff changeset	60
aade04e750fa planemo upload lnguyen parents: diff changeset	61
aade04e750fa planemo upload lnguyen parents: diff changeset	62 if (length(options.args) != 12) {
aade04e750fa planemo upload lnguyen parents: diff changeset	63 stop("Not enough/Too many arguments", call. = FALSE)
aade04e750fa planemo upload lnguyen parents: diff changeset	64 }
aade04e750fa planemo upload lnguyen parents: diff changeset	65
aade04e750fa planemo upload lnguyen parents: diff changeset	66 typeinput = options.args[1]
aade04e750fa planemo upload lnguyen parents: diff changeset	67 listfile = options.args[2]
aade04e750fa planemo upload lnguyen parents: diff changeset	68 onto = as.character(options.args[3])
aade04e750fa planemo upload lnguyen parents: diff changeset	69 option = as.character(options.args[4])
aade04e750fa planemo upload lnguyen parents: diff changeset	70 correction = as.character(options.args[6])
aade04e750fa planemo upload lnguyen parents: diff changeset	71 threshold = as.numeric(options.args[5])
aade04e750fa planemo upload lnguyen parents: diff changeset	72 text = as.character(options.args[7])
aade04e750fa planemo upload lnguyen parents: diff changeset	73 barplot = as.character(options.args[8])
aade04e750fa planemo upload lnguyen parents: diff changeset	74 dotplot = as.character(options.args[9])
aade04e750fa planemo upload lnguyen parents: diff changeset	75 column = as.numeric(gsub("c","",options.args[10]))
aade04e750fa planemo upload lnguyen parents: diff changeset	76 geneuniverse = as.character(options.args[11])
aade04e750fa planemo upload lnguyen parents: diff changeset	77 header = as.character(options.args[12])
aade04e750fa planemo upload lnguyen parents: diff changeset	78
aade04e750fa planemo upload lnguyen parents: diff changeset	79 if (typeinput=="copypaste"){
aade04e750fa planemo upload lnguyen parents: diff changeset	80 sample = as.data.frame(unlist(listfile))
aade04e750fa planemo upload lnguyen parents: diff changeset	81 sample = sample[,column]
aade04e750fa planemo upload lnguyen parents: diff changeset	82 }
aade04e750fa planemo upload lnguyen parents: diff changeset	83 if (typeinput=="tabfile"){
aade04e750fa planemo upload lnguyen parents: diff changeset	84
aade04e750fa planemo upload lnguyen parents: diff changeset	85 if (header=="TRUE"){
aade04e750fa planemo upload lnguyen parents: diff changeset	86 sample = read.table(listfile,header=TRUE,sep="\t",na.strings="NA",fill=TRUE)
aade04e750fa planemo upload lnguyen parents: diff changeset	87 }else{
aade04e750fa planemo upload lnguyen parents: diff changeset	88 sample = read.table(listfile,header=FALSE,sep="\t",na.strings="NA",fill=TRUE)
aade04e750fa planemo upload lnguyen parents: diff changeset	89 }
aade04e750fa planemo upload lnguyen parents: diff changeset	90 sample = sample[,column]
aade04e750fa planemo upload lnguyen parents: diff changeset	91
aade04e750fa planemo upload lnguyen parents: diff changeset	92 }
aade04e750fa planemo upload lnguyen parents: diff changeset	93 # Launch enrichment analysis and return result data from the analysis or the null
aade04e750fa planemo upload lnguyen parents: diff changeset	94 # object if the enrichment could not be done.
aade04e750fa planemo upload lnguyen parents: diff changeset	95 goEnrichment = function(geneuniverse,sample,onto){
aade04e750fa planemo upload lnguyen parents: diff changeset	96
aade04e750fa planemo upload lnguyen parents: diff changeset	97 # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their
aade04e750fa planemo upload lnguyen parents: diff changeset	98 # associated ensembl ids according to the org package
aade04e750fa planemo upload lnguyen parents: diff changeset	99 xx = annFUN.org(onto,mapping=geneuniverse,ID="ensembl")
aade04e750fa planemo upload lnguyen parents: diff changeset	100 allGenes = unique(unlist(xx))
aade04e750fa planemo upload lnguyen parents: diff changeset	101 # check if the genes given by the user can be found in the org package (gene
aade04e750fa planemo upload lnguyen parents: diff changeset	102 # universe), that is in
aade04e750fa planemo upload lnguyen parents: diff changeset	103 # allGenes
aade04e750fa planemo upload lnguyen parents: diff changeset	104 if (length(intersect(sample,allGenes))==0){
aade04e750fa planemo upload lnguyen parents: diff changeset	105
aade04e750fa planemo upload lnguyen parents: diff changeset	106 print("None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably have no associated GO terms.")
aade04e750fa planemo upload lnguyen parents: diff changeset	107 return(c(NULL,NULL))
aade04e750fa planemo upload lnguyen parents: diff changeset	108
aade04e750fa planemo upload lnguyen parents: diff changeset	109 }
aade04e750fa planemo upload lnguyen parents: diff changeset	110
aade04e750fa planemo upload lnguyen parents: diff changeset	111 geneList = factor(as.integer(allGenes %in% sample))
aade04e750fa planemo upload lnguyen parents: diff changeset	112 names(geneList) <- allGenes
aade04e750fa planemo upload lnguyen parents: diff changeset	113
aade04e750fa planemo upload lnguyen parents: diff changeset	114
aade04e750fa planemo upload lnguyen parents: diff changeset	115 #topGO enrichment
aade04e750fa planemo upload lnguyen parents: diff changeset	116
aade04e750fa planemo upload lnguyen parents: diff changeset	117
aade04e750fa planemo upload lnguyen parents: diff changeset	118 # Creation of a topGOdata object
aade04e750fa planemo upload lnguyen parents: diff changeset	119 # It will contain : the list of genes of interest, the GO annotations and the GO hierarchy
aade04e750fa planemo upload lnguyen parents: diff changeset	120 # Parameters :
aade04e750fa planemo upload lnguyen parents: diff changeset	121 # ontology : character string specifying the ontology of interest (BP, CC, MF)
aade04e750fa planemo upload lnguyen parents: diff changeset	122 # allGenes : named vector of type numeric or factor
aade04e750fa planemo upload lnguyen parents: diff changeset	123 # annot : tells topGO how to map genes to GO annotations.
aade04e750fa planemo upload lnguyen parents: diff changeset	124 # argument not used here : nodeSize : at which minimal number of GO annotations
aade04e750fa planemo upload lnguyen parents: diff changeset	125 # do we consider a gene
aade04e750fa planemo upload lnguyen parents: diff changeset	126
aade04e750fa planemo upload lnguyen parents: diff changeset	127 myGOdata = new("topGOdata", description="SEA with TopGO", ontology=onto, allGenes=geneList, annot = annFUN.org, mapping=geneuniverse,ID="ensembl")
aade04e750fa planemo upload lnguyen parents: diff changeset	128
aade04e750fa planemo upload lnguyen parents: diff changeset	129
aade04e750fa planemo upload lnguyen parents: diff changeset	130 # Performing enrichment tests
aade04e750fa planemo upload lnguyen parents: diff changeset	131 result <- runTest(myGOdata, algorithm=option, statistic="fisher")
aade04e750fa planemo upload lnguyen parents: diff changeset	132 return(c(result,myGOdata))
aade04e750fa planemo upload lnguyen parents: diff changeset	133 }
aade04e750fa planemo upload lnguyen parents: diff changeset	134
aade04e750fa planemo upload lnguyen parents: diff changeset	135 # Some libraries such as GOsummaries won't be able to treat the values such as
aade04e750fa planemo upload lnguyen parents: diff changeset	136 # "< 1e-30" produced by topGO. As such it is important to delete the < char
aade04e750fa planemo upload lnguyen parents: diff changeset	137 # with the deleteInfChar function. Nevertheless the user will have access to the original results in the text output.
aade04e750fa planemo upload lnguyen parents: diff changeset	138 deleteInfChar = function(values){
aade04e750fa planemo upload lnguyen parents: diff changeset	139
aade04e750fa planemo upload lnguyen parents: diff changeset	140 lines = grep("<",values)
aade04e750fa planemo upload lnguyen parents: diff changeset	141 if (length(lines)!=0){
aade04e750fa planemo upload lnguyen parents: diff changeset	142 for (line in lines){
aade04e750fa planemo upload lnguyen parents: diff changeset	143 values[line]=gsub("<","",values[line])
aade04e750fa planemo upload lnguyen parents: diff changeset	144 }
aade04e750fa planemo upload lnguyen parents: diff changeset	145 }
aade04e750fa planemo upload lnguyen parents: diff changeset	146 return(values)
aade04e750fa planemo upload lnguyen parents: diff changeset	147 }
aade04e750fa planemo upload lnguyen parents: diff changeset	148
aade04e750fa planemo upload lnguyen parents: diff changeset	149 corrMultipleTesting = function(result, myGOdata,correction,threshold){
aade04e750fa planemo upload lnguyen parents: diff changeset	150
aade04e750fa planemo upload lnguyen parents: diff changeset	151 # adjust for multiple testing
aade04e750fa planemo upload lnguyen parents: diff changeset	152 if (correction!="none"){
aade04e750fa planemo upload lnguyen parents: diff changeset	153 # GenTable : transforms the result object into a list. Filters can be applied
aade04e750fa planemo upload lnguyen parents: diff changeset	154 # (e.g : with the topNodes argument, to get for instance only the n first
aade04e750fa planemo upload lnguyen parents: diff changeset	155 # GO terms with the lowest pvalues), but as we want to apply a correction we
aade04e750fa planemo upload lnguyen parents: diff changeset	156 # take all the GO terms, no matter their pvalues
aade04e750fa planemo upload lnguyen parents: diff changeset	157 allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=length(attributes(result)$score))
aade04e750fa planemo upload lnguyen parents: diff changeset	158 # Some pvalues given by topGO are not numeric (e.g : "<1e-30). As such, these
aade04e750fa planemo upload lnguyen parents: diff changeset	159 # values are converted to 1e-30 to be able to correct the pvalues
aade04e750fa planemo upload lnguyen parents: diff changeset	160 pvaluestmp = deleteInfChar(allRes$test)
aade04e750fa planemo upload lnguyen parents: diff changeset	161
aade04e750fa planemo upload lnguyen parents: diff changeset	162 # the correction is done from the modified pvalues
aade04e750fa planemo upload lnguyen parents: diff changeset	163 allRes$qvalues = p.adjust(pvaluestmp, method = as.character(correction), n = length(pvaluestmp))
aade04e750fa planemo upload lnguyen parents: diff changeset	164 allRes = as.data.frame(allRes)
aade04e750fa planemo upload lnguyen parents: diff changeset	165
aade04e750fa planemo upload lnguyen parents: diff changeset	166 # Rename the test column by pvalues, so that is more explicit
aade04e750fa planemo upload lnguyen parents: diff changeset	167 nb = which(names(allRes) %in% c("test"))
aade04e750fa planemo upload lnguyen parents: diff changeset	168
aade04e750fa planemo upload lnguyen parents: diff changeset	169 names(allRes)[nb] = "pvalues"
aade04e750fa planemo upload lnguyen parents: diff changeset	170
aade04e750fa planemo upload lnguyen parents: diff changeset	171 allRes = allRes[which(as.numeric(allRes$pvalues) <= threshold),]
aade04e750fa planemo upload lnguyen parents: diff changeset	172 if (length(allRes$pvalues)==0){
aade04e750fa planemo upload lnguyen parents: diff changeset	173 print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value")
aade04e750fa planemo upload lnguyen parents: diff changeset	174 return(NULL)
aade04e750fa planemo upload lnguyen parents: diff changeset	175 }
aade04e750fa planemo upload lnguyen parents: diff changeset	176 allRes = allRes[order(allRes$qvalues),]
aade04e750fa planemo upload lnguyen parents: diff changeset	177 }
aade04e750fa planemo upload lnguyen parents: diff changeset	178
aade04e750fa planemo upload lnguyen parents: diff changeset	179 if (correction=="none"){
aade04e750fa planemo upload lnguyen parents: diff changeset	180 # get all the go terms under user threshold
aade04e750fa planemo upload lnguyen parents: diff changeset	181 mysummary <- summary(attributes(result)$score <= threshold)
aade04e750fa planemo upload lnguyen parents: diff changeset	182 numsignif <- as.integer(mysummary[[3]])
aade04e750fa planemo upload lnguyen parents: diff changeset	183 # get all significant nodes
aade04e750fa planemo upload lnguyen parents: diff changeset	184 allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=numsignif)
aade04e750fa planemo upload lnguyen parents: diff changeset	185
aade04e750fa planemo upload lnguyen parents: diff changeset	186
aade04e750fa planemo upload lnguyen parents: diff changeset	187 allRes = as.data.frame(allRes)
aade04e750fa planemo upload lnguyen parents: diff changeset	188 # Rename the test column by pvalues, so that is more explicit
aade04e750fa planemo upload lnguyen parents: diff changeset	189 nb = which(names(allRes) %in% c("test"))
aade04e750fa planemo upload lnguyen parents: diff changeset	190 names(allRes)[nb] = "pvalues"
aade04e750fa planemo upload lnguyen parents: diff changeset	191 if (numsignif==0){
aade04e750fa planemo upload lnguyen parents: diff changeset	192
aade04e750fa planemo upload lnguyen parents: diff changeset	193 print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value")
aade04e750fa planemo upload lnguyen parents: diff changeset	194 return(NULL)
aade04e750fa planemo upload lnguyen parents: diff changeset	195 }
aade04e750fa planemo upload lnguyen parents: diff changeset	196
aade04e750fa planemo upload lnguyen parents: diff changeset	197 allRes = allRes[order(allRes$pvalues),]
aade04e750fa planemo upload lnguyen parents: diff changeset	198 }
aade04e750fa planemo upload lnguyen parents: diff changeset	199
aade04e750fa planemo upload lnguyen parents: diff changeset	200 return(allRes)
aade04e750fa planemo upload lnguyen parents: diff changeset	201 }
aade04e750fa planemo upload lnguyen parents: diff changeset	202
aade04e750fa planemo upload lnguyen parents: diff changeset	203 # roundValues will simplify the results by rounding down the values. For instance 1.1e-17 becomes 1e-17
aade04e750fa planemo upload lnguyen parents: diff changeset	204 roundValues = function(values){
aade04e750fa planemo upload lnguyen parents: diff changeset	205 for (line in 1:length(values)){
aade04e750fa planemo upload lnguyen parents: diff changeset	206 values[line]=as.numeric(gsub(".*e","1e",as.character(values[line])))
aade04e750fa planemo upload lnguyen parents: diff changeset	207 }
aade04e750fa planemo upload lnguyen parents: diff changeset	208 return(values)
aade04e750fa planemo upload lnguyen parents: diff changeset	209 }
aade04e750fa planemo upload lnguyen parents: diff changeset	210
aade04e750fa planemo upload lnguyen parents: diff changeset	211 createDotPlot = function(data){
aade04e750fa planemo upload lnguyen parents: diff changeset	212
aade04e750fa planemo upload lnguyen parents: diff changeset	213 values = deleteInfChar(data$pvalues)
aade04e750fa planemo upload lnguyen parents: diff changeset	214 values = roundValues(values)
aade04e750fa planemo upload lnguyen parents: diff changeset	215 values = as.numeric(values)
aade04e750fa planemo upload lnguyen parents: diff changeset	216
aade04e750fa planemo upload lnguyen parents: diff changeset	217 geneRatio = data$Significant/data$Annotated
aade04e750fa planemo upload lnguyen parents: diff changeset	218 goTerms = data$Term
aade04e750fa planemo upload lnguyen parents: diff changeset	219 count = data$Significant
aade04e750fa planemo upload lnguyen parents: diff changeset	220
aade04e750fa planemo upload lnguyen parents: diff changeset	221 png(filename="dotplot.png",res=300, width = 3200, height = 3200, units = "px")
aade04e750fa planemo upload lnguyen parents: diff changeset	222 sp1 = ggplot(data,aes(x=geneRatio,y=goTerms,xlabel ="Ratio" ,ylabel = "GO terms", color=values,size=count)) +geom_point() + scale_colour_gradientn(colours=c("red","violet","blue")) + labs(color="p-values\n")
aade04e750fa planemo upload lnguyen parents: diff changeset	223
aade04e750fa planemo upload lnguyen parents: diff changeset	224 plot(sp1)
aade04e750fa planemo upload lnguyen parents: diff changeset	225 dev.off()
aade04e750fa planemo upload lnguyen parents: diff changeset	226 }
aade04e750fa planemo upload lnguyen parents: diff changeset	227
aade04e750fa planemo upload lnguyen parents: diff changeset	228 createBarPlot = function(data){
aade04e750fa planemo upload lnguyen parents: diff changeset	229
aade04e750fa planemo upload lnguyen parents: diff changeset	230
aade04e750fa planemo upload lnguyen parents: diff changeset	231 values = deleteInfChar(data$pvalues)
aade04e750fa planemo upload lnguyen parents: diff changeset	232 values = roundValues(values)
aade04e750fa planemo upload lnguyen parents: diff changeset	233
aade04e750fa planemo upload lnguyen parents: diff changeset	234 values = as.numeric(values)
aade04e750fa planemo upload lnguyen parents: diff changeset	235 goTerms = data$Term
aade04e750fa planemo upload lnguyen parents: diff changeset	236 count = data$Significant
aade04e750fa planemo upload lnguyen parents: diff changeset	237 png(filename="barplot.png",res=300, width = 3200, height = 3200, units = "px")
aade04e750fa planemo upload lnguyen parents: diff changeset	238
aade04e750fa planemo upload lnguyen parents: diff changeset	239 p<-ggplot(data, aes(x=goTerms, y=count,fill=values)) + geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n")
aade04e750fa planemo upload lnguyen parents: diff changeset	240 plot(p)
aade04e750fa planemo upload lnguyen parents: diff changeset	241 dev.off()
aade04e750fa planemo upload lnguyen parents: diff changeset	242 }
aade04e750fa planemo upload lnguyen parents: diff changeset	243
aade04e750fa planemo upload lnguyen parents: diff changeset	244
aade04e750fa planemo upload lnguyen parents: diff changeset	245 # Produce the different outputs
aade04e750fa planemo upload lnguyen parents: diff changeset	246 createOutputs = function(result, cut_result,text, barplot,dotplot){
aade04e750fa planemo upload lnguyen parents: diff changeset	247
aade04e750fa planemo upload lnguyen parents: diff changeset	248
aade04e750fa planemo upload lnguyen parents: diff changeset	249 if (is.null(result)){
aade04e750fa planemo upload lnguyen parents: diff changeset	250
aade04e750fa planemo upload lnguyen parents: diff changeset	251 if (text=="TRUE"){
aade04e750fa planemo upload lnguyen parents: diff changeset	252
aade04e750fa planemo upload lnguyen parents: diff changeset	253 err_msg = "None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably either have no associated GO terms or are not ENSG identifiers (e.g : ENSG00000012048)."
aade04e750fa planemo upload lnguyen parents: diff changeset	254 write.table(err_msg, file='result.csv', quote=FALSE, sep='\t', col.names = T, row.names = F)
aade04e750fa planemo upload lnguyen parents: diff changeset	255
aade04e750fa planemo upload lnguyen parents: diff changeset	256 }
aade04e750fa planemo upload lnguyen parents: diff changeset	257
aade04e750fa planemo upload lnguyen parents: diff changeset	258 if (barplot=="TRUE"){
aade04e750fa planemo upload lnguyen parents: diff changeset	259
aade04e750fa planemo upload lnguyen parents: diff changeset	260 png(filename="barplot.png")
aade04e750fa planemo upload lnguyen parents: diff changeset	261 plot.new()
aade04e750fa planemo upload lnguyen parents: diff changeset	262 #text(0,0,err_msg)
aade04e750fa planemo upload lnguyen parents: diff changeset	263 dev.off()
aade04e750fa planemo upload lnguyen parents: diff changeset	264 }
aade04e750fa planemo upload lnguyen parents: diff changeset	265
aade04e750fa planemo upload lnguyen parents: diff changeset	266 if (dotplot=="TRUE"){
aade04e750fa planemo upload lnguyen parents: diff changeset	267
aade04e750fa planemo upload lnguyen parents: diff changeset	268 png(filename="dotplot.png")
aade04e750fa planemo upload lnguyen parents: diff changeset	269 plot.new()
aade04e750fa planemo upload lnguyen parents: diff changeset	270 #text(0,0,err_msg)
aade04e750fa planemo upload lnguyen parents: diff changeset	271 dev.off()
aade04e750fa planemo upload lnguyen parents: diff changeset	272
aade04e750fa planemo upload lnguyen parents: diff changeset	273 }
aade04e750fa planemo upload lnguyen parents: diff changeset	274 return(TRUE)
aade04e750fa planemo upload lnguyen parents: diff changeset	275 }
aade04e750fa planemo upload lnguyen parents: diff changeset	276
aade04e750fa planemo upload lnguyen parents: diff changeset	277
aade04e750fa planemo upload lnguyen parents: diff changeset	278 if (is.null(cut_result)){
aade04e750fa planemo upload lnguyen parents: diff changeset	279
aade04e750fa planemo upload lnguyen parents: diff changeset	280
aade04e750fa planemo upload lnguyen parents: diff changeset	281 if (text=="TRUE"){
aade04e750fa planemo upload lnguyen parents: diff changeset	282
aade04e750fa planemo upload lnguyen parents: diff changeset	283 err_msg = "Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value."
aade04e750fa planemo upload lnguyen parents: diff changeset	284 write.table(err_msg, file='result.csv', quote=FALSE, sep='\t', col.names = T, row.names = F)
aade04e750fa planemo upload lnguyen parents: diff changeset	285
aade04e750fa planemo upload lnguyen parents: diff changeset	286 }
aade04e750fa planemo upload lnguyen parents: diff changeset	287
aade04e750fa planemo upload lnguyen parents: diff changeset	288 if (barplot=="TRUE"){
aade04e750fa planemo upload lnguyen parents: diff changeset	289
aade04e750fa planemo upload lnguyen parents: diff changeset	290 png(filename="barplot.png")
aade04e750fa planemo upload lnguyen parents: diff changeset	291 plot.new()
aade04e750fa planemo upload lnguyen parents: diff changeset	292 text(0,0,err_msg)
aade04e750fa planemo upload lnguyen parents: diff changeset	293 dev.off()
aade04e750fa planemo upload lnguyen parents: diff changeset	294 }
aade04e750fa planemo upload lnguyen parents: diff changeset	295
aade04e750fa planemo upload lnguyen parents: diff changeset	296 if (dotplot=="TRUE"){
aade04e750fa planemo upload lnguyen parents: diff changeset	297
aade04e750fa planemo upload lnguyen parents: diff changeset	298 png(filename="dotplot.png")
aade04e750fa planemo upload lnguyen parents: diff changeset	299 plot.new()
aade04e750fa planemo upload lnguyen parents: diff changeset	300 text(0,0,err_msg)
aade04e750fa planemo upload lnguyen parents: diff changeset	301 dev.off()
aade04e750fa planemo upload lnguyen parents: diff changeset	302
aade04e750fa planemo upload lnguyen parents: diff changeset	303 }
aade04e750fa planemo upload lnguyen parents: diff changeset	304 return(TRUE)
aade04e750fa planemo upload lnguyen parents: diff changeset	305
aade04e750fa planemo upload lnguyen parents: diff changeset	306
aade04e750fa planemo upload lnguyen parents: diff changeset	307
aade04e750fa planemo upload lnguyen parents: diff changeset	308 }
aade04e750fa planemo upload lnguyen parents: diff changeset	309
aade04e750fa planemo upload lnguyen parents: diff changeset	310 if (text=="TRUE"){
aade04e750fa planemo upload lnguyen parents: diff changeset	311 write.table(cut_result, file='result.csv', quote=FALSE, sep='\t', col.names = T, row.names = F)
aade04e750fa planemo upload lnguyen parents: diff changeset	312 }
aade04e750fa planemo upload lnguyen parents: diff changeset	313
aade04e750fa planemo upload lnguyen parents: diff changeset	314 if (barplot=="TRUE"){
aade04e750fa planemo upload lnguyen parents: diff changeset	315
aade04e750fa planemo upload lnguyen parents: diff changeset	316 createBarPlot(cut_result)
aade04e750fa planemo upload lnguyen parents: diff changeset	317 }
aade04e750fa planemo upload lnguyen parents: diff changeset	318
aade04e750fa planemo upload lnguyen parents: diff changeset	319 if (dotplot=="TRUE"){
aade04e750fa planemo upload lnguyen parents: diff changeset	320
aade04e750fa planemo upload lnguyen parents: diff changeset	321 createDotPlot(cut_result)
aade04e750fa planemo upload lnguyen parents: diff changeset	322 }
aade04e750fa planemo upload lnguyen parents: diff changeset	323 return(TRUE)
aade04e750fa planemo upload lnguyen parents: diff changeset	324 }
aade04e750fa planemo upload lnguyen parents: diff changeset	325
aade04e750fa planemo upload lnguyen parents: diff changeset	326
aade04e750fa planemo upload lnguyen parents: diff changeset	327
aade04e750fa planemo upload lnguyen parents: diff changeset	328 # Load R library ggplot2 to plot graphs
aade04e750fa planemo upload lnguyen parents: diff changeset	329 library(ggplot2)
aade04e750fa planemo upload lnguyen parents: diff changeset	330
aade04e750fa planemo upload lnguyen parents: diff changeset	331 # Launch enrichment analysis
aade04e750fa planemo upload lnguyen parents: diff changeset	332 allresult = goEnrichment(geneuniverse,sample,onto)
aade04e750fa planemo upload lnguyen parents: diff changeset	333 result = allresult[1][[1]]
aade04e750fa planemo upload lnguyen parents: diff changeset	334 myGOdata = allresult[2][[1]]
aade04e750fa planemo upload lnguyen parents: diff changeset	335 if (!is.null(result)){
aade04e750fa planemo upload lnguyen parents: diff changeset	336
aade04e750fa planemo upload lnguyen parents: diff changeset	337 # Adjust the result with a multiple testing correction or not and with the user
aade04e750fa planemo upload lnguyen parents: diff changeset	338 # p-value cutoff
aade04e750fa planemo upload lnguyen parents: diff changeset	339 cut_result = corrMultipleTesting(result,myGOdata, correction,threshold)
aade04e750fa planemo upload lnguyen parents: diff changeset	340 }else{
aade04e750fa planemo upload lnguyen parents: diff changeset	341
aade04e750fa planemo upload lnguyen parents: diff changeset	342 cut_result=NULL
aade04e750fa planemo upload lnguyen parents: diff changeset	343
aade04e750fa planemo upload lnguyen parents: diff changeset	344 }
aade04e750fa planemo upload lnguyen parents: diff changeset	345
aade04e750fa planemo upload lnguyen parents: diff changeset	346
aade04e750fa planemo upload lnguyen parents: diff changeset	347 createOutputs(result, cut_result,text, barplot,dotplot)
aade04e750fa planemo upload lnguyen parents: diff changeset	348

Mercurial > repos > lnguyen > topgo

annotate enrichment_v3.R @ 0:aade04e750fa draft default tip