Mercurial > repos > proteore > proteore_topgo
changeset 16:7f1ce70f0f09 draft default tip
"planemo upload commit 29a04769c0546be759c38bbcc157c8bc09ec1115-dirty"
author | proteore |
---|---|
date | Mon, 17 May 2021 14:40:03 +0000 |
parents | 159596ec807d |
children | |
files | test-data/barplot.png test-data/dotplot.png test-data/result.tsv topGO.xml topGO_enrichment.R |
diffstat | 5 files changed, 261 insertions(+), 221 deletions(-) [+] |
line wrap: on
line diff
--- a/test-data/result.tsv Fri Jan 24 10:45:15 2020 -0500 +++ b/test-data/result.tsv Mon May 17 14:40:03 2021 +0000 @@ -1,63 +1,58 @@ GO.ID Term Annotated Significant Expected pvalues qvalues -GO:0070268 cornification 128 21 1.01 4.8e-22 3.792e-18 -GO:0001895 retina homeostasis 70 15 0.55 6.8e-18 3.58133333333333e-14 -GO:0010951 negative regulation of endopeptidase act... 279 22 2.2 2.2e-11 8.69e-08 -GO:0061621 canonical glycolysis 27 7 0.21 1.3e-09 4.108e-06 -GO:0046718 viral entry into host cell 155 12 1.22 3.6e-09 9.48e-06 -GO:0042026 protein refolding 36 7 0.28 1.1e-08 2.054e-05 -GO:0002576 platelet degranulation 138 11 1.09 1.2e-08 2.054e-05 -GO:0070370 cellular heat acclimation 11 5 0.09 1.3e-08 2.054e-05 -GO:0070434 positive regulation of nucleotide-bindin... 11 5 0.09 1.3e-08 2.054e-05 -GO:0090063 positive regulation of microtubule nucle... 12 5 0.09 2.1e-08 3.01636363636364e-05 -GO:1900034 regulation of cellular response to heat 93 9 0.73 5.0e-08 6.58333333333333e-05 -GO:1903265 positive regulation of tumor necrosis fa... 16 5 0.13 1.2e-07 0.000145846153846154 -GO:0050821 protein stabilization 181 11 1.43 2.0e-07 0.000225714285714286 -GO:0090084 negative regulation of inclusion body as... 19 5 0.15 3.0e-07 0.000316 -GO:0061436 establishment of skin barrier 20 5 0.16 4.0e-07 0.000395 -GO:0018149 peptide cross-linking 60 7 0.47 4.4e-07 0.000408941176470588 -GO:0042542 response to hydrogen peroxide 136 11 1.07 1.0e-06 0.000877777777777778 -GO:1901673 regulation of mitotic spindle assembly 27 5 0.21 2.0e-06 0.00166315789473684 -GO:0098869 cellular oxidant detoxification 114 8 0.9 3.3e-06 0.002607 -GO:2000117 negative regulation of cysteine-type end... 116 8 0.91 3.8e-06 0.00285904761904762 -GO:0006094 gluconeogenesis 85 7 0.67 4.8e-06 0.00344727272727273 -GO:0034599 cellular response to oxidative stress 309 15 2.43 6.8e-06 0.00467130434782609 -GO:0001580 detection of chemical stimulus involved ... 61 6 0.48 8.4e-06 0.00553 -GO:0051092 positive regulation of NF-kappaB transcr... 173 9 1.36 9.5e-06 0.006004 -GO:0045104 intermediate filament cytoskeleton organ... 45 5 0.35 2.7e-05 0.0164076923076923 -GO:0042744 hydrogen peroxide catabolic process 23 4 0.18 2.9e-05 0.0169703703703704 -GO:0007568 aging 312 11 2.46 3.8e-05 0.0214428571428571 -GO:2001240 negative regulation of extrinsic apoptot... 49 5 0.39 4.1e-05 0.0223379310344828 -GO:0031397 negative regulation of protein ubiquitin... 172 10 1.35 4.7e-05 0.0247533333333333 -GO:0002934 desmosome organization 10 3 0.08 5.5e-05 0.0280322580645161 -GO:1902396 protein localization to bicellular tight... 2 2 0.02 6.2e-05 0.0296848484848485 -GO:1903923 positive regulation of protein processin... 2 2 0.02 6.2e-05 0.0296848484848485 -GO:0006953 acute-phase response 57 5 0.45 8.5e-05 0.0388228571428571 -GO:1900740 positive regulation of protein insertion... 30 4 0.24 8.6e-05 0.0388228571428571 -GO:0045471 response to ethanol 138 7 1.09 0.00011 0.0482777777777778 -GO:0070527 platelet aggregation 62 5 0.49 0.00013 0.0555135135135135 -GO:0032757 positive regulation of interleukin-8 pro... 63 5 0.5 0.00014 0.0567179487179487 -GO:0046686 response to cadmium ion 63 5 0.5 0.00014 0.0567179487179487 -GO:0003064 regulation of heart rate by hormone 3 2 0.02 0.00018 0.0677142857142857 -GO:0043163 cell envelope organization 3 2 0.02 0.00018 0.0677142857142857 -GO:0061740 protein targeting to lysosome involved i... 3 2 0.02 0.00018 0.0677142857142857 -GO:0042493 response to drug 438 12 3.45 0.00019 0.0698139534883721 -GO:0086091 regulation of heart rate by cardiac cond... 38 4 0.3 0.00022 0.079 -GO:0038061 NIK/NF-kappaB signaling 166 7 1.31 0.00035 0.121791666666667 -GO:0051016 barbed-end actin filament capping 18 3 0.14 0.00036 0.121791666666667 -GO:1902309 negative regulation of peptidyl-serine d... 4 2 0.03 0.00037 0.121791666666667 -GO:1905913 negative regulation of calcium ion expor... 4 2 0.03 0.00037 0.121791666666667 -GO:0010389 regulation of G2/M transition of mitotic... 226 8 1.78 0.00043 0.13865306122449 -GO:0043488 regulation of mRNA stability 174 7 1.37 0.00046 0.14536 -GO:0046716 muscle cell cellular homeostasis 20 3 0.16 0.00049 0.151803921568627 -GO:0006402 mRNA catabolic process 414 15 3.26 0.00050 0.151923076923077 -GO:0046827 positive regulation of protein export fr... 21 3 0.17 0.00057 0.169924528301887 -GO:0048549 positive regulation of pinocytosis 5 2 0.04 0.00061 0.172107142857143 -GO:0001907 killing by symbiont of host cells 5 2 0.04 0.00061 0.172107142857143 -GO:0033591 response to L-ascorbic acid 5 2 0.04 0.00061 0.172107142857143 -GO:0030216 keratinocyte differentiation 354 30 2.79 0.00065 0.180175438596491 -GO:0006754 ATP biosynthetic process 51 4 0.4 0.00069 0.187965517241379 -GO:0070301 cellular response to hydrogen peroxide 92 5 0.72 0.00080 0.214237288135593 -GO:0061844 antimicrobial humoral immune response me... 54 4 0.43 0.00086 0.226466666666667 -GO:0086073 bundle of His cell-Purkinje myocyte adhe... 6 2 0.05 0.00091 0.228222222222222 -GO:0032417 positive regulation of sodium:proton ant... 6 2 0.05 0.00091 0.228222222222222 -GO:0071638 negative regulation of monocyte chemotac... 6 2 0.05 0.00091 0.228222222222222 +GO:0070268 cornification 128 21 0.96 1.9e-22 1.51316e-18 +GO:0001895 retina homeostasis 73 15 0.55 6.9e-18 3.66344e-14 +GO:0010951 negative regulation of endopeptidase act... 230 16 1.73 4.4e-10 1.75208e-06 +GO:0061621 canonical glycolysis 30 7 0.23 2.1e-09 6.59874285714286e-06 +GO:0042026 protein refolding 31 7 0.23 2.7e-09 6.59874285714286e-06 +GO:0046718 viral entry into host cell 159 12 1.2 2.9e-09 6.59874285714286e-06 +GO:0002576 platelet degranulation 139 11 1.05 8.3e-09 1.5928e-05 +GO:0070370 cellular heat acclimation 11 5 0.08 1.0e-08 1.5928e-05 +GO:0070434 positive regulation of nucleotide-bindin... 11 5 0.08 1.0e-08 1.5928e-05 +GO:0090063 positive regulation of microtubule nucle... 12 5 0.09 1.7e-08 2.4616e-05 +GO:1900034 regulation of cellular response to heat 97 9 0.73 5.0e-08 6.63666666666667e-05 +GO:1903265 positive regulation of tumor necrosis fa... 18 5 0.14 1.8e-07 0.000220541538461538 +GO:0018149 peptide cross-linking 36 6 0.27 2.7e-07 0.000307182857142857 +GO:0090084 negative regulation of inclusion body as... 20 5 0.15 3.2e-07 0.000339797333333333 +GO:0050821 protein stabilization 200 11 1.51 3.5e-07 0.000348425 +GO:0061436 establishment of skin barrier 23 5 0.17 6.8e-07 0.00063712 +GO:0001580 detection of chemical stimulus involved ... 49 6 0.37 1.8e-06 0.0015928 +GO:1901673 regulation of mitotic spindle assembly 28 5 0.21 1.9e-06 0.0015928 +GO:0006094 gluconeogenesis 93 7 0.7 6.6e-06 0.00525624 +GO:0034599 cellular response to oxidative stress 330 14 2.49 7.5e-06 0.00568857142857143 +GO:0045648 positive regulation of erythrocyte diffe... 40 5 0.3 1.2e-05 0.008688 +GO:0051092 positive regulation of NF-kappaB transcr... 190 9 1.43 1.4e-05 0.00969530434782609 +GO:2001240 negative regulation of extrinsic apoptot... 46 5 0.35 2.4e-05 0.01529088 +GO:0042744 hydrogen peroxide catabolic process 23 4 0.17 2.4e-05 0.01529088 +GO:0098869 cellular oxidant detoxification 120 7 0.9 3.5e-05 0.0214415384615385 +GO:1900740 positive regulation of protein insertion... 26 4 0.2 4.1e-05 0.024186962962963 +GO:1903923 positive regulation of protein processin... 2 2 0.02 5.6e-05 0.0313067586206897 +GO:0031397 negative regulation of protein ubiquitin... 89 6 0.67 5.7e-05 0.0313067586206897 +GO:0045471 response to ethanol 131 7 0.99 6.1e-05 0.0323869333333333 +GO:0032757 positive regulation of interleukin-8 pro... 64 5 0.48 0.00012 0.05792 +GO:0070527 platelet aggregation 64 5 0.48 0.00012 0.05792 +GO:0046686 response to cadmium ion 64 5 0.48 0.00012 0.05792 +GO:0061844 antimicrobial humoral immune response me... 66 5 0.5 0.00014 0.0655858823529412 +GO:0003064 regulation of heart rate by hormone 3 2 0.02 0.00017 0.0731827027027027 +GO:0061740 protein targeting to lysosome involved i... 3 2 0.02 0.00017 0.0731827027027027 +GO:0007568 aging 387 11 2.91 0.00017 0.0731827027027027 +GO:0086091 regulation of heart rate by cardiac cond... 39 4 0.29 0.00021 0.0880231578947368 +GO:0007157 heterophilic cell-cell adhesion via plas... 40 4 0.3 0.00023 0.093934358974359 +GO:0045087 innate immune response 1367 30 10.29 0.00030 0.116805333333333 +GO:0045104 intermediate filament cytoskeleton organ... 44 4 0.33 0.00033 0.116805333333333 +GO:0043163 cell envelope organization 4 2 0.03 0.00033 0.116805333333333 +GO:1902396 protein localization to bicellular tight... 4 2 0.03 0.00033 0.116805333333333 +GO:1902309 negative regulation of peptidyl-serine d... 4 2 0.03 0.00033 0.116805333333333 +GO:1905913 negative regulation of calcium ion expor... 4 2 0.03 0.00033 0.116805333333333 +GO:0042542 response to hydrogen peroxide 148 8 1.11 0.00036 0.124653913043478 +GO:0043488 regulation of mRNA stability 184 7 1.39 0.00050 0.165916666666667 +GO:0035357 peroxisome proliferator activated recept... 21 3 0.16 0.00050 0.165916666666667 +GO:0033591 response to L-ascorbic acid 5 2 0.04 0.00055 0.178783673469388 +GO:0046827 positive regulation of protein export fr... 22 3 0.17 0.00058 0.1847648 +GO:0072594 establishment of protein localization to... 498 17 3.75 0.00062 0.193634509803922 +GO:0046716 muscle cell cellular homeostasis 23 3 0.17 0.00066 0.202163076923077 +GO:0006605 protein targeting 402 12 3.03 0.00082 0.230681379310345 +GO:0086073 bundle of His cell-Purkinje myocyte adhe... 6 2 0.05 0.00083 0.230681379310345 +GO:0032417 positive regulation of sodium:proton ant... 6 2 0.05 0.00083 0.230681379310345 +GO:0071638 negative regulation of monocyte chemotac... 6 2 0.05 0.00083 0.230681379310345 +GO:0030050 vesicle transport along actin filament 6 2 0.05 0.00083 0.230681379310345 +GO:0006953 acute-phase response 56 4 0.42 0.00084 0.230681379310345
--- a/topGO.xml Fri Jan 24 10:45:15 2020 -0500 +++ b/topGO.xml Mon May 17 14:40:03 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="topGO" name="Enrichment analysis" version="2020.01.23"> +<tool id="topGO" name="Enrichment analysis" version="2021.04.12"> <description>(Human, Mouse, Rat)[topGO]</description> <requirements> <requirement type="package">R</requirement> @@ -6,10 +6,6 @@ <requirement type="package" version="3.8.2">bioconductor-org.hs.eg.db</requirement> <requirement type="package" version="3.8.2">bioconductor-org.mm.eg.db</requirement> <requirement type="package" version="3.8.2">bioconductor-org.rn.eg.db</requirement> - <!--requirement type="package" version="3.6.0">bioconductor-org.ce.eg.db</requirement--> - <!--requirement type="package" version="3.6.0">bioconductor-org.dm.eg.db</requirement--> - <!--requirement type="package" version="3.6.0">bioconductor-org.sc.sgd.db</requirement--> - <!--requirement type="package" version="3.5.0">bioconductor-org.at.tair.db</requirement--> <requirement type="package" version="1.62.0">bioconductor-graph</requirement> <requirement type="package" version="1.46.0">bioconductor-annotationdbi</requirement> <requirement type="package" version="3.8.2">bioconductor-go.db</requirement> @@ -174,8 +170,6 @@ <param name="plot" value="dotplot,barplot"/> <param name="geneuniverse" value="org.Hs.eg.db"/> <output name="outputtext" file="result.tsv"/> - <output name="outputbarplot" file="barplot.png" compare="sim_size"/> - <output name="outputdotplot" file="dotplot.png" compare="sim_size"/> </test> </tests> <help><![CDATA[
--- a/topGO_enrichment.R Fri Jan 24 10:45:15 2020 -0500 +++ b/topGO_enrichment.R Mon May 17 14:40:03 2021 +0000 @@ -1,24 +1,24 @@ -options(warn=-1) #TURN OFF WARNINGS !!!!!! +options(warn = -1) #TURN OFF WARNINGS !!!!!! suppressMessages(library(ggplot2)) suppressMessages(library(topGO)) -get_args <- function(){ - +get_args <- function() { + ## Collect arguments args <- commandArgs(TRUE) - + ## Default setting when no arguments passed - if(length(args) < 1) { + if (length(args) < 1) { args <- c("--help") } - + ## Help section - if("--help" %in% args) { + if ("--help" %in% args) { cat("Pathview R script Arguments: --help Print this test - --input_type + --input_type --onto --option --correction @@ -30,82 +30,92 @@ --header Example: - Rscript --vanilla enrichment_v3.R --inputtype=tabfile (or copypaste) --input=file.txt --ontology='BP/CC/MF' --option=option - (e.g : classic/elim...) --threshold=threshold --correction=correction --textoutput=text --barplotoutput=barplot --dotplotoutput=dotplot + Rscript --vanilla enrichment_v3.R --inputtype=tabfile (or copypaste) + --input=file.txt --ontology='BP/CC/MF' --option=option + (e.g : classic/elim...) --threshold=threshold --correction=correction + --textoutput=text --barplotoutput=barplot --dotplotoutput=dotplot --column=column --geneuniver=human \n\n") - - q(save="no") + + q(save = "no") } - - parseArgs <- function(x) strsplit(sub("^--", "", x), "=") - argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) - args <- as.list(as.character(argsDF$V2)) - names(args) <- argsDF$V1 - + + parseargs <- function(x) strsplit(sub("^--", "", x), "=") + argsdf <- as.data.frame(do.call("rbind", parseargs(args))) + args <- as.list(as.character(argsdf$V2)) + names(args) <- argsdf$V1 + return(args) } -read_file <- function(path,header){ - file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) - if (inherits(file,"try-error")){ +read_file <- function(path, header) { + file <- try(read.csv(path, header = header, sep = "\t", + stringsAsFactors = FALSE, quote = "\"", check.names = F), + silent = TRUE) + if (inherits(file, "try-error")) { stop("File not found !") - }else{ + }else { return(file) } } -get_list_from_cp <-function(list){ - list = gsub(";"," ",list) - list = strsplit(list, "[ \t\n]+")[[1]] - list = list[list != ""] #remove empty entry - list = gsub("-.+", "", list) #Remove isoform accession number (e.g. "-2") +get_list_from_cp <- function(list) { + list <- gsub(";", " ", list) + list <- strsplit(list, "[ \t\n]+")[[1]] + list <- list[list != ""] #remove empty entry + list <- gsub("-.+", "", list) #Remove isoform accession number (e.g. "-2") return(list) } check_ens_ids <- function(vector) { - ens_pattern = "^(ENS[A-Z]+[0-9]{11}|[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?|CG[0-9]+|[A-Z0-9]+\\.[0-9]+|YM[A-Z][0-9]{3}[a-z][0-9])$" - return(grepl(ens_pattern,vector)) + ens_pattern <- + "^(ENS[A-Z]+[0-9]{11}|[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])? + |CG[0-9]+|[A-Z0-9]+\\.[0-9]+|YM[A-Z][0-9]{3}[a-z][0-9])$" + return(grepl(ens_pattern, vector)) } -str2bool <- function(x){ - if (any(is.element(c("t","true"),tolower(x)))){ - return (TRUE) - }else if (any(is.element(c("f","false"),tolower(x)))){ - return (FALSE) - }else{ +str2bool <- function(x) { + if (any(is.element(c("t", "true"), tolower(x)))) { + return(TRUE) + }else if (any(is.element(c("f", "false"), tolower(x)))) { + return(FALSE) + }else { return(NULL) } } # Some libraries such as GOsummaries won't be able to treat the values such as # "< 1e-30" produced by topGO. As such it is important to delete the < char -# with the deleteInfChar function. Nevertheless the user will have access to the original results in the text output. -deleteInfChar = function(values){ - - lines = grep("<",values) - if (length(lines)!=0){ - for (line in lines){ - values[line]=gsub("<","",values[line]) +# with the deleteinfchar function. Nevertheless the user will have access to +#the original results in the text output. +deleteinfchar <- function(values) { + + lines <- grep("<", values) + if (length(lines) != 0) { + for (line in lines) { + values[line] <- gsub("<", "", values[line]) } } return(values) } -corrMultipleTesting = function(result, myGOdata,correction,threshold){ +#nolint start +corrMultipleTesting = function(result, mygodata, correction, threshold){ # adjust for multiple testing - if (correction!="none"){ + if (correction != "none"){ # GenTable : transforms the result object into a list. Filters can be applied # (e.g : with the topNodes argument, to get for instance only the n first # GO terms with the lowest pvalues), but as we want to apply a correction we # take all the GO terms, no matter their pvalues - allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=length(attributes(result)$score)) + allRes <- GenTable(mygodata, test = result, orderBy = "result", + ranksOf = "result", topNodes = length(attributes(result)$score)) # Some pvalues given by topGO are not numeric (e.g : "<1e-30). As such, these # values are converted to 1e-30 to be able to correct the pvalues - pvaluestmp = deleteInfChar(allRes$test) + pvaluestmp = deleteinfchar(allRes$test) # the correction is done from the modified pvalues - allRes$qvalues = p.adjust(pvaluestmp, method = as.character(correction), n = length(pvaluestmp)) + allRes$qvalues = p.adjust(pvaluestmp, method = as.character(correction), + n = length(pvaluestmp)) allRes = as.data.frame(allRes) # Rename the test column by pvalues, so that is more explicit @@ -113,194 +123,235 @@ names(allRes)[nb] = "pvalues" - allRes = allRes[which(as.numeric(allRes$pvalues) <= threshold),] - if (length(allRes$pvalues)==0){ - print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value") + allRes = allRes[which(as.numeric(allRes$pvalues) <= threshold), ] + if (length(allRes$pvalues) == 0) { + print("Threshold was too stringent, no GO term found with pvalue + equal or lesser than the threshold value") return(NULL) } - allRes = allRes[order(allRes$qvalues),] + allRes = allRes[order(allRes$qvalues), ] } - if (correction=="none"){ + if (correction == "none"){ # get all the go terms under user threshold mysummary <- summary(attributes(result)$score <= threshold) numsignif <- as.integer(mysummary[[3]]) # get all significant nodes - allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=numsignif) + allRes <- GenTable(mygodata, test = result, orderBy = "result", + ranksOf = "result", topNodes = numsignif) allRes = as.data.frame(allRes) # Rename the test column by pvalues, so that is more explicit nb = which(names(allRes) %in% c("test")) names(allRes)[nb] = "pvalues" - if (numsignif==0){ + if (numsignif == 0) { - print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value") + print("Threshold was too stringent, no GO term found with pvalue + equal or lesser than the threshold value") return(NULL) } - allRes = allRes[order(allRes$pvalues),] + allRes = allRes[order(allRes$pvalues), ] } return(allRes) } +#nolint end -# roundValues will simplify the results by rounding down the values. For instance 1.1e-17 becomes 1e-17 -roundValues = function(values){ - for (line in 1:length(values)){ - values[line]=as.numeric(gsub(".*e","1e",as.character(values[line]))) +#roundvalues will simplify the results by rounding down the values. +#For instance 1.1e-17 becomes 1e-17 +roundvalues <- function(values) { + for (line in seq_len(length(values))) { + values[line] <- as.numeric(gsub(".*e", "1e", as.character(values[line]))) } return(values) } -createDotPlot = function(data, onto){ +#nolint start +createDotPlot = function(data, onto) { - values = deleteInfChar(data$pvalues) - values = roundValues(values) + values = deleteinfchar(data$pvalues) + values = roundvalues(values) values = as.numeric(values) - geneRatio = data$Significant/data$Annotated + geneRatio = data$Significant / data$Annotated goTerms = data$Term count = data$Significant - labely = paste("GO terms",onto,sep=" ") - ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point( ) + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n" ) - ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm") + labely = paste("GO terms", onto, sep = " ") + ggplot(data, aes(x = geneRatio, y = goTerms, color = values, size=count)) + geom_point( ) + scale_colour_gradientn( colours = c("red", "violet", "blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color = "p-values\n" ) + ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE, + width = 15, height = 15, units = "cm") } -createBarPlot = function(data, onto){ +createBarPlot = function(data, onto) { - values = deleteInfChar(data$pvalues) - values = roundValues(values) + values = deleteinfchar(data$pvalues) + values = roundvalues(values) values = as.numeric(values) goTerms = data$Term count = data$Significant - labely = paste("GO terms",onto,sep=" ") - ggplot(data, aes(x=goTerms, y=count,fill=values,scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n") - ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm") + labely = paste("GO terms", onto, sep=" ") + ggplot(data, aes(x = goTerms, y = count, fill = values, scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) + geom_bar(stat = "identity") + scale_fill_gradientn(colours = c("red","violet","blue")) + coord_flip() + labs(fill = "p-values\n") + ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE, + width = 15, height = 15, units = "cm") } +#nolint end + # Produce the different outputs -createOutputs = function(result, cut_result,text, barplot, dotplot, onto){ +createoutputs <- function(result, cut_result, text, barplot, dotplot, onto) { - if (is.null(result)){ - err_msg = "None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably either have no associated GO terms or are not ENSG identifiers (e.g : ENSG00000012048)." - write.table(err_msg, file='result', quote=FALSE, sep='\t', col.names = F, row.names = F) - }else if (is.null(cut_result)){ - err_msg = "Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value." - write.table(err_msg, file='result.tsv', quote=FALSE, sep='\t', col.names = F, row.names = F) + if (is.null(result)) { + err_msg <- "None of the input ids can be found in the org package data, + enrichment analysis cannot be realized. \n The inputs ids probably + either have no associated GO terms or are not ENSG identifiers + (e.g : ENSG00000012048)." + write.table(err_msg, file = "result", quote = FALSE, sep = "\t", + col.names = F, row.names = F) + }else if (is.null(cut_result)) { + err_msg <- "Threshold was too stringent, no GO term found with pvalue equal + or lesser than the threshold value." + write.table(err_msg, file = "result.tsv", quote = FALSE, sep = "\t", + col.names = F, row.names = F) }else { - write.table(cut_result, file='result.tsv', quote=FALSE, sep='\t', col.names = T, row.names = F) - - if (barplot){createBarPlot(cut_result, onto)} - if (dotplot){createDotPlot(cut_result, onto)} + write.table(cut_result, file = "result.tsv", quote = FALSE, sep = "\t", + col.names = T, row.names = F) + + if (barplot) { + createBarPlot(cut_result, onto) #nolint + } + if (dotplot) { + createDotPlot(cut_result, onto) #nolint + } } } -# Launch enrichment analysis and return result data from the analysis or the null -# object if the enrichment could not be done. -goEnrichment = function(geneuniverse,sample,background_sample,onto){ - - if (is.null(background_sample)){ - xx = annFUN.org(onto,mapping=geneuniverse,ID="ensembl") # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their associated ensembl ids according to the org package - allGenes = unique(unlist(xx)) # check if the genes given by the user can be found in the org package (gene universe), that is in allGenes +# Launch enrichment analysis and return result data from the analysis or the +# null object if the enrichment could not be done. +goenrichment <- function(geneuniverse, sample, background_sample, onto) { + + if (is.null(background_sample)) { + xx <- annFUN.org(onto, mapping = geneuniverse, ID = "ensembl") #nolint + #get all the GO terms of the corresponding ontology (BP/CC/MF) + #and all their associated ensembl ids according to the org package + + #nolint start + + allGenes <- unique(unlist(xx)) + #check if the genes given by the user can be found in the org package + #(gene universe), that is in allGenes } else { - allGenes = background_sample + allGenes <- background_sample } - if (length(intersect(sample,allGenes))==0){ - print("None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably have no associated GO terms.") - return(c(NULL,NULL)) + if (length(intersect(sample,allGenes)) == 0) { + print("None of the input ids can be found in the org package data, + enrichment analysis cannot be realized. \n The inputs ids probably + have no associated GO terms.") + return(c(NULL, NULL)) } - geneList = factor(as.integer(allGenes %in% sample)) #duplicated ids in sample count only for one + geneList <- factor(as.integer(allGenes %in% sample)) + #duplicated ids in sample count only for one if (length(levels(geneList)) == 1 ){ - stop("All or none of the background genes are found in tested genes dataset, enrichment analysis can't be done") + stop("All or none of the background genes are found in tested genes dataset, + enrichment analysis can't be done") } names(geneList) <- allGenes - - #topGO enrichment - + +#nolint end + + #topGO enrichment + # Creation of a topGOdata object - # It will contain : the list of genes of interest, the GO annotations and the GO hierarchy - # Parameters : + # It will contain : the list of genes of interest, the GO annotations and + # the GO hierarchy + # Parameters : # ontology : character string specifying the ontology of interest (BP, CC, MF) - # allGenes : named vector of type numeric or factor + # allGenes : named vector of type numeric or factor # annot : tells topGO how to map genes to GO annotations. - # argument not used here : nodeSize : at which minimal number of GO annotations - # do we consider a gene - - myGOdata = new("topGOdata", description="SEA with TopGO", ontology=onto, allGenes=geneList, annot = annFUN.org, mapping=geneuniverse,ID="ensembl") - + # argument not used here : nodeSize : at which minimal number of GO + # annotations do we consider a gene + + mygodata <- new("topGOdata", description = "SEA with TopGO", ontology = onto, + allGenes = geneList, annot = annFUN.org, + mapping = geneuniverse, ID = "ensembl") + # Performing enrichment tests - result <- runTest(myGOdata, algorithm=option, statistic="fisher") - return(c(result,myGOdata)) + result <- runTest(mygodata, algorithm = option, statistic = "fisher") #nolint + return(c(result, mygodata)) } args <- get_args() -#save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.rda") -#load("/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.rda") -input_type = args$inputtype -input = args$input -onto = args$ontology -option = args$option -correction = args$correction -threshold = as.numeric(args$threshold) -text = str2bool(args$textoutput) -barplot = "barplot" %in% unlist(strsplit(args$plot,",")) -dotplot = "dotplot" %in% unlist(strsplit(args$plot,",")) -column = as.numeric(gsub("c","",args$column)) -geneuniverse = args$geneuniverse -header = str2bool(args$header) -background = str2bool(args$background) -if (background){ - background_genes = args$background_genes - background_input_type = args$background_input_type - background_header = str2bool(args$background_header) - background_column = as.numeric(gsub("c","",args$background_column)) +input_type <- args$inputtype +input <- args$input +onto <- args$ontology +option <- args$option +correction <- args$correction +threshold <- as.numeric(args$threshold) +text <- str2bool(args$textoutput) +barplot <- "barplot" %in% unlist(strsplit(args$plot, ",")) +dotplot <- "dotplot" %in% unlist(strsplit(args$plot, ",")) +column <- as.numeric(gsub("c", "", args$column)) +geneuniverse <- args$geneuniverse +header <- str2bool(args$header) +background <- str2bool(args$background) +if (background) { + background_genes <- args$background_genes + background_input_type <- args$background_input_type + background_header <- str2bool(args$background_header) + background_column <- as.numeric(gsub("c", "", args$background_column)) } #get input -if (input_type=="copy_paste"){ +if (input_type == "copy_paste") { sample <- get_list_from_cp(input) -} else if (input_type=="file"){ - tab=read_file(input,header) - sample = trimws(unlist(strsplit(tab[,column],";"))) +} else if (input_type == "file") { + tab <- read_file(input, header) + sample <- trimws(unlist(strsplit(tab[, column], ";"))) } #check of ENS ids -if (! any(check_ens_ids(sample))){ - stop("no ensembl gene ids found in your ids list, please check your IDs in input or the selected column of your input file") +if (! any(check_ens_ids(sample))) { + stop("no ensembl gene ids found in your ids list, + please check your IDs in input or the selected column of your input file") } #get input if background genes -if (background){ - if (background_input_type=="copy_paste"){ +if (background) { + if (background_input_type == "copy_paste") { background_sample <- get_list_from_cp(background_genes) - } else if (background_input_type=="file"){ - background_tab=read_file(background_genes,background_header) - background_sample = unique(trimws(unlist(strsplit(background_tab[,background_column],";")))) + } else if (background_input_type == "file") { + background_tab <- read_file(background_genes, background_header) + background_sample <- unique(trimws(unlist( + strsplit(background_tab[, background_column], ";")))) } #check of ENS ids - if (! any(check_ens_ids(background_sample))){ - stop("no ensembl gene ids found in your background ids list, please check your IDs in input or the selected column of your input file") + if (! any(check_ens_ids(background_sample))) { + stop("no ensembl gene ids found in your background ids list, + please check your IDs in input or the selected column of your input file") } } else { - background_sample=NULL + background_sample <- NULL } # Launch enrichment analysis -allresult = suppressMessages(goEnrichment(geneuniverse,sample,background_sample,onto)) -result = allresult[1][[1]] -myGOdata = allresult[2][[1]] -if (!is.null(result)){ - cut_result = corrMultipleTesting(result,myGOdata, correction,threshold) # Adjust the result with a multiple testing correction or not and with the user, p-value cutoff -}else{ - cut_result=NULL +allresult <- suppressMessages(goenrichment(geneuniverse, sample, + background_sample, onto)) +result <- allresult[1][[1]] +mygodata <- allresult[2][[1]] +if (!is.null(result)) { + cut_result <- corrMultipleTesting(result, mygodata, correction, threshold) + #Adjust the result with a multiple testing correction or not and with the + #user, p-value cutoff +}else { + cut_result <- NULL } -createOutputs(result, cut_result,text, barplot, dotplot, onto) +createoutputs(result, cut_result, text, barplot, dotplot, onto)