Mercurial > repos > proteore > proteore_topgo
changeset 11:fa2e27165d5d draft
planemo upload commit 4efc56eb769fbceb66c64181441ff8781d523454-dirty
author | proteore |
---|---|
date | Mon, 04 Mar 2019 08:37:49 -0500 |
parents | e3430084c996 |
children | 8eaa43ba1bfc |
files | topGO.xml topGO_enrichment.R |
diffstat | 2 files changed, 47 insertions(+), 89 deletions(-) [+] |
line wrap: on
line diff
--- a/topGO.xml Tue Dec 18 10:06:00 2018 -0500 +++ b/topGO.xml Mon Mar 04 08:37:49 2019 -0500 @@ -1,4 +1,4 @@ -<tool id="topGO" name="Enrichment analysis for Gene Ontology" version="2018.12.17"> +<tool id="topGO" name="Enrichment analysis for Gene Ontology" version="2019.02.19"> <description>(Human, Mouse, Rat)[topGO]</description> <requirements> <requirement type="package" version="3.4.1">R</requirement> @@ -69,7 +69,9 @@ </when> <when value="file"> <param name="genelist" type="data" format="txt,tabular" label="Select your file" help=""/> - <param name="column" type="text" label="Column number of IDs" help="For example, fill in 'c1' if it is the first column, 'c2' if it is the second column and so on"/> + <param name="column" type="text" label="Column number of IDs" help="For example, fill in 'c1' if it is the first column, 'c2' if it is the second column and so on"> + <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator> + </param> <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" /> </when> </conditional> @@ -96,7 +98,9 @@ <when value="file"> <param name="genelist" type="data" format="txt,tabular" label="Select file that contains your background IDs list" help=""/> <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header ?" /> - <param name="column" type="text" label="Column number of IDs" value="c1" help="For example, fill in 'c1' if it is the first column, 'c2' if it is the second column and so on"/> + <param name="column" type="text" label="Column number of IDs" value="c1" help="For example, fill in 'c1' if it is the first column, 'c2' if it is the second column and so on"> + <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator> + </param> </when> </conditional> </when> @@ -141,16 +145,16 @@ </inputs> <outputs> - <data name="outputtext" format="tsv" label="Text output for topGO analysis $ontocat category" from_work_dir="result"> - <filter>textoutput</filter> + <data name="outputtext" format="tsv" label="Text output for topGO analysis $ontocat category" from_work_dir="result.tsv"> + <!--filter>textoutput</filter--> + </data> + + <data name="outputdotplot" format="png" label="Dotplot output for topGO analysis $ontocat category" from_work_dir="dotplot.png"> + <filter>'dotplot' in plot</filter> </data> <data name="outputbarplot" format="png" label="Barplot output for topGO analysis $ontocat category" from_work_dir="barplot.png"> - <filter>barplot</filter> - </data> - - <data name="outputdotplot" format="png" label="Dotplot output for topGO analysis $ontocat category" from_work_dir="dotplot.png"> - <filter>dotplot</filter> + <filter>'barplot' in plot</filter> </data> </outputs>
--- a/topGO_enrichment.R Tue Dec 18 10:06:00 2018 -0500 +++ b/topGO_enrichment.R Mon Mar 04 08:37:49 2019 -0500 @@ -155,91 +155,47 @@ createDotPlot = function(data, onto){ - values = deleteInfChar(data$pvalues) - values = roundValues(values) - values = as.numeric(values) - - geneRatio = data$Significant/data$Annotated - goTerms = data$Term - count = data$Significant - - labely = paste("GO terms",onto,sep=" ") - ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point( ) + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n" ) - ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm") + values = deleteInfChar(data$pvalues) + values = roundValues(values) + values = as.numeric(values) + + geneRatio = data$Significant/data$Annotated + goTerms = data$Term + count = data$Significant + + labely = paste("GO terms",onto,sep=" ") + ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point( ) + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n" ) + ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm") } createBarPlot = function(data, onto){ - - values = deleteInfChar(data$pvalues) - values = roundValues(values) - values = as.numeric(values) - - goTerms = data$Term - count = data$Significant - - labely = paste("GO terms",onto,sep=" ") - ggplot(data, aes(x=goTerms, y=count,fill=values,scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n") - ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm") + values = deleteInfChar(data$pvalues) + values = roundValues(values) + values = as.numeric(values) + + goTerms = data$Term + count = data$Significant + + labely = paste("GO terms",onto,sep=" ") + ggplot(data, aes(x=goTerms, y=count,fill=values,scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n") + ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm") } # Produce the different outputs createOutputs = function(result, cut_result,text, barplot, dotplot, onto){ - + if (is.null(result)){ - if (text){ - err_msg = "None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably either have no associated GO terms or are not ENSG identifiers (e.g : ENSG00000012048)." - write.table(err_msg, file='result', quote=FALSE, sep='\t', col.names = T, row.names = F) - } - if (barplot){ - png(filename="barplot.png") - plot.new() - #text(0,0,err_msg) - dev.off() - } - if (dotplot){ - png(filename="dotplot.png") - plot.new() - #text(0,0,err_msg) - dev.off() - } - opt <- options(show.error.messages=FALSE) - on.exit(options(opt)) - stop("null result") - } + err_msg = "None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably either have no associated GO terms or are not ENSG identifiers (e.g : ENSG00000012048)." + write.table(err_msg, file='result', quote=FALSE, sep='\t', col.names = F, row.names = F) + }else if (is.null(cut_result)){ + err_msg = "Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value." + write.table(err_msg, file='result.tsv', quote=FALSE, sep='\t', col.names = F, row.names = F) + }else { + write.table(cut_result, file='result.tsv', quote=FALSE, sep='\t', col.names = T, row.names = F) - if (is.null(cut_result)){ - if (text){ - err_msg = "Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value." - write.table(err_msg, file='result', quote=FALSE, sep='\t', col.names = T, row.names = F) - } - if (barplot){ - png(filename="barplot.png") - plot.new() - text(0,0,err_msg) - dev.off() - } - if (dotplot){ - png(filename="dotplot.png") - plot.new() - text(0,0,err_msg) - dev.off() - } - opt <- options(show.error.messages=FALSE) - on.exit(options(opt)) - stop("null cut_result") - } - - if (text){ - write.table(cut_result, file='result', quote=FALSE, sep='\t', col.names = T, row.names = F) - } - - if (barplot){ - createBarPlot(cut_result, onto) - } - - if (dotplot){ - createDotPlot(cut_result, onto) + if (barplot){createBarPlot(cut_result, onto)} + if (dotplot){createDotPlot(cut_result, onto)} } } @@ -318,8 +274,7 @@ #check of ENS ids if (! any(check_ens_ids(sample))){ - print("no ensembl gene ids found in your ids list, please check your IDs in input or the selected column of your input file") - stop() + stop("no ensembl gene ids found in your ids list, please check your IDs in input or the selected column of your input file") } #get input if background genes @@ -332,8 +287,7 @@ } #check of ENS ids if (! any(check_ens_ids(background_sample))){ - print("no ensembl gene ids found in your background ids list, please check your IDs in input or the selected column of your input file") - stop() + stop("no ensembl gene ids found in your background ids list, please check your IDs in input or the selected column of your input file") } } else { background_sample=NULL