Previous changeset 7:3e138d54c105 (2018-09-21) Next changeset 9:948fecb6a40b (2019-03-04) |
Commit message:
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b |
modified:
goprofiles.R goprofiles.xml |
b |
diff -r 3e138d54c105 -r 386145573c19 goprofiles.R --- a/goprofiles.R Fri Sep 21 10:08:02 2018 -0400 +++ b/goprofiles.R Tue Dec 18 09:54:57 2018 -0500 |
[ |
b'@@ -4,23 +4,24 @@\n suppressMessages(library(goProfiles,quietly = TRUE))\n \n # Read file and return file content as data.frame\n-readfile = function(filename, header) {\n- if (header == "true") {\n- # Read only first line of the file as header:\n- headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")\n- #Read the data of the files (skipping the first row)\n- file <- read.table(filename, skip = 1, header = FALSE, sep = "\\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")\n- # Remove empty rows\n- file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]\n- #And assign the header to the data\n- names(file) <- headers\n+read_file <- function(path,header){\n+ file <- try(read.csv(path,header=header, sep="\\t",stringsAsFactors = FALSE, quote="\\"", check.names = F),silent=TRUE)\n+ if (inherits(file,"try-error")){\n+ stop("File not found !")\n+ }else{\n+ return(file)\n }\n- else {\n- file <- read.table(filename, header = FALSE, sep = "\\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")\n- # Remove empty rows\n- file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]\n+}\n+\n+#convert a string to boolean\n+str2bool <- function(x){\n+ if (any(is.element(c("t","true"),tolower(x)))){\n+ return (TRUE)\n+ }else if (any(is.element(c("f","false"),tolower(x)))){\n+ return (FALSE)\n+ }else{\n+ return(NULL)\n }\n- return(file)\n }\n \n check_ids <- function(vector,type) {\n@@ -49,10 +50,10 @@\n package=org.Hs.eg.db\n } else if (species=="org.Mm.eg.db"){\n package=org.Mm.eg.db\n+ } else if (species=="org.Rn.eg.db"){\n+ package=org.Rn.eg.db\n }\n \n- \n- \n # Check if level is number\n if (! as.numeric(level) %% 1 == 0) {\n stop("Please enter an integer for level")\n@@ -75,8 +76,8 @@\n genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]\n # IDs that have NA ENTREZID\n NAs = id$UNIPROT[which(is.na(id$ENTREZID))]\n- print("IDs unable to convert to ENTREZID: ")\n- print(NAs)\n+ #print("IDs unable to convert to ENTREZID: ")\n+ #print(NAs)\n }\n \n # Create basic profiles\n@@ -91,77 +92,20 @@\n return(c(profile.CC, profile.MF, profile.BP, profile.ALL))\n }\n \n-# Plot profiles to PNG\n-plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {\n- if (!is.null(profile.CC)) {\n- png("profile.CC.png")\n- plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)\n- dev.off()\n- }\n- if (!is.null(profile.BP)) {\n- png("profile.BP.png")\n- plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)\n- dev.off()\n- }\n- if (!is.null(profile.MF)) {\n- png("profile.MF.png")\n- plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)\n- dev.off()\n- }\n- if (!is.null(profile.ALL)) {\n- png("profile.ALL.png")\n- plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title)\n- dev.off()\n- }\n-}\n-\n-# Plot profiles to JPEG\n-plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {\n- if (!is.null(profile.CC)) {\n- jpeg("profile.CC.jpeg")\n- plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)\n- dev.off()\n- }\n- if (!is.null(profile.BP)) {\n- jpeg("profile.BP.jpeg")\n- plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)\n- dev.off()\n+make_plot <- function(profile,percent,title,onto,plot_opt){\n+ \n+ if (plot_opt == "PDF") {\n+ file_name=paste("profile_",onto,".pdf",collapse="",sep="")\n+ pdf(file_name)\n+ } else if (plot_opt == "JPEG"){\n+ file_name=paste("profile_",onto,".jpeg",collapse="",sep="")\n+ jpeg(file_name)\n+ } else if (plot_opt == "PNG"){\n+ file_name=paste("profile_",onto'..b'=FALSE, aTitle=title)\n- dev.off()\n- }\n- if (!is.null(profile.ALL)) {\n- jpeg("profile.ALL.jpeg")\n- plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)\n- dev.off()\n- }\n-}\n-\n-# Plot profiles to PDF\n-plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {\n- if (!is.null(profile.CC)) {\n- pdf("profile.CC.pdf")\n- plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)\n- dev.off()\n- }\n- if (!is.null(profile.BP)) {\n- pdf("profile.BP.pdf")\n- plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)\n- dev.off()\n- }\n- if (!is.null(profile.MF)) {\n- pdf("profile.MF.pdf")\n- plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)\n- dev.off()\n- }\n- if (!is.null(profile.ALL)) {\n- #print("all")\n- pdf("profile.ALL.pdf")\n- plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)\n- dev.off()\n- }\n+ plotProfiles(profile, percentage=percent, multiplePlots=FALSE, aTitle=title)\n+ dev.off()\n }\n \n goprofiles = function() {\n@@ -212,9 +156,9 @@\n } else {\n ncol = as.numeric(gsub("c", "", ncol))\n }\n- header = args$header\n+ header = str2bool(args$header)\n # Get file content\n- file = readfile(filename, header)\n+ file = read_file(filename, header)\n # Extract Protein IDs list\n input = unlist(strsplit(as.character(file[,ncol]),";"))\n input = input [which(!is.na(input))]\n@@ -225,8 +169,7 @@\n }\n \n ontoopt = strsplit(args$onto_opt, ",")[[1]]\n- #print(ontoopt)\n- #plotopt = strsplit(args[3], ",")\n+ onto_pos = as.integer(gsub("BP",3,gsub("MF",2,gsub("CC",1,ontoopt))))\n plotopt = args$plot_opt\n level = args$level\n per = as.logical(args$per)\n@@ -236,51 +179,15 @@\n species=args$species\n \n profiles = getprofile(input, id_type, level, duplicate,species)\n- profile.CC = profiles[1]\n- #print(profile.CC)\n- profile.MF = profiles[2]\n- #print(profile.MF)\n- profile.BP = profiles[3]\n- #print(profile.BP)\n- profile.ALL = profiles[-3:-1]\n- #print(profile.ALL)\n- #c(profile.ALL, profile.CC, profile.MF, profile.BP)\n- \n- if ("CC" %in% ontoopt) {\n- write.table(profile.CC, text_output, append = TRUE, sep="\\t", row.names = FALSE, quote=FALSE)\n- if (grepl("PNG", plotopt)) {\n- plotPNG(profile.CC=profile.CC, per=per, title=title)\n- }\n- if (grepl("JPEG", plotopt)) {\n- plotJPEG(profile.CC = profile.CC, per=per, title=title)\n- }\n- if (grepl("PDF", plotopt)) {\n- plotPDF(profile.CC = profile.CC, per=per, title=title)\n- }\n- }\n- if ("MF" %in% ontoopt) {\n- write.table(profile.MF, text_output, append = TRUE, sep="\\t", row.names = FALSE, quote=FALSE)\n- if (grepl("PNG", plotopt)) {\n- plotPNG(profile.MF = profile.MF, per=per, title=title)\n- }\n- if (grepl("JPEG", plotopt)) {\n- plotJPEG(profile.MF = profile.MF, per=per, title=title)\n- }\n- if (grepl("PDF", plotopt)) {\n- plotPDF(profile.MF = profile.MF, per=per, title=title)\n- }\n- }\n- if ("BP" %in% ontoopt) {\n- write.table(profile.BP, text_output, append = TRUE, sep="\\t", row.names = FALSE, quote=FALSE)\n- if (grepl("PNG", plotopt)) {\n- plotPNG(profile.BP = profile.BP, per=per, title=title)\n- }\n- if (grepl("JPEG", plotopt)) {\n- plotJPEG(profile.BP = profile.BP, per=per, title=title)\n- }\n- if (grepl("PDF", plotopt)) {\n- plotPDF(profile.BP = profile.BP, per=per, title=title)\n- }\n+\n+ for (index in onto_pos) {\n+ onto = names(profiles[index])\n+ profile=profiles[index]\n+ make_plot(profile,per,title,onto,plotopt)\n+ text_output=paste("goProfiles_",onto,"_",title,".tsv",sep="",collapse="")\n+ profile = as.data.frame(profile)\n+ profile <- as.data.frame(apply(profile, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" and " " to NA\n+ write.table(profile, text_output, sep="\\t", row.names = FALSE, quote=FALSE, col.names = T)\n }\n }\n \n' |
b |
diff -r 3e138d54c105 -r 386145573c19 goprofiles.xml --- a/goprofiles.xml Fri Sep 21 10:08:02 2018 -0400 +++ b/goprofiles.xml Tue Dec 18 09:54:57 2018 -0500 |
[ |
b'@@ -1,9 +1,10 @@\n-<tool id="goProfiles" name="goProfiles" version="2018.09.21">\n- <description>Statistical analysis of functional profiles</description>\n+<tool id="goProfiles" name="Statistical analysis of functional profiles" version="2018.12.12">\n+ <description>(Human, Mouse) [goProfiles]</description>\n <requirements> \n <requirement type="package" version="3.4.1">R</requirement>\n <requirement type="package" version="3.5.0">bioconductor-org.hs.eg.db</requirement>\n <requirement type="package" version="3.5.0">bioconductor-org.mm.eg.db</requirement>\n+ <!--requirement type="package" version="3.5.0">bioconductor-org.rn.eg.db</requirement-->\n <requirement type="package" version="1.40.0">bioconductor-annotationdbi</requirement>\n <requirement type="package" version="2.38.0">bioconductor-biobase</requirement>\n <requirement type="package" version="1.38.0">goprofiles</requirement>\n@@ -24,32 +25,24 @@\n #end if\n \n --id_type="$input.id_type"\n- \n --onto_opt="$onto_opt"\n- \n- --plot_opt="$opt.plot_opt"\n- \n+ --plot_opt="$plot_opt"\n --level="$level"\n- \n --per="$per"\n- \n --title="$title"\n- \n --duplicate="$duplicate"\n-\n --text_output="$text_output"\n-\n- --species="$species"\n+ --species="$species" > $log\n \n ]]></command>\n <inputs>\n <conditional name="input" >\n- <param name="ids" type="select" label="Enter your ID list (only Entrez Gene ID or UniProt accession number allowed" help="Copy/paste or ID list from a file (e.g. table)" >\n- <option value="text">Copy/paste your identifiers</option>\n- <option value="file" selected="true">Input file containing your identifiers</option>\n+ <param name="ids" type="select" label="Enter your IDs (Entrez Gene ID or UniProt Accession number)" help="Copy/paste or from a file (e.g. table)" >\n+ <option value="text">Copy/paste your IDs</option>\n+ <option value="file" selected="true">Input file containing your IDs</option>\n </param>\n <when value="text" >\n- <param name="text" type="text" label="Copy/paste your identifiers" help=\'IDs must be separated by spaces into the form field, for example: P31946 P62258\' >\n+ <param name="text" type="text" label="Copy/paste your IDs" help=\'IDs must be separated by spaces into the form field, for example: P31946 P62258\' >\n <sanitizer>\n <valid initial="string.printable">\n <remove value="'"/>\n@@ -59,53 +52,56 @@\n </mapping>\n </sanitizer>\n </param>\n- <param name="id_type" type="select" label="Please select the type of your IDs list" >\n+ <param name="id_type" type="select" label="Select type of IDs list" >\n <option value="Entrez">Entrez Gene ID</option>\n- <option value="UniProt">UniProt protein acession number</option>\n+ <option value="UniProt">UniProt Accession number</option>\n </param>\n </when>\n <when value="file" >\n- <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of IDs" help="" />\n- <param name="ncol" type="text" value="c1" label="The column number of IDs to use" help=\'For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on\' />\n- <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />\n- <param name="id_type" type="select" label="Please select the type of your IDs list" >\n+ <param name="file" type="data" format="txt,tabular" label="Selec'..b'MF.pdf" file="profile.MF.pdf" ftype="pdf" compare="sim_size"/>\n </output_collection>\n- <output name="text_output" file="GO_Profile_text_output.txt"/>\n+ <output name="log" file="GO_Profile_text_output.tsv"/>\n </test>\n </tests>\n <help><![CDATA[\n-This tool, based on the goProfiles R package, performs statistical analysis of functional profiles. It is based on GO ontology and considers either a gene set (\'Entrez\xe2\x80\x99 Identifiers) or a protein set (Uniprot accession number) as input. \n+\n+**Description**\n+\n+This tool is based on the goProfiles R package; it performs statistical analysis of functional profiles based on Gene Ontology (GO). Functional profile at a given GO level is obtained by counting the\n+number of identifiers having a hit in each category of this level. \n \n-You can choose one or more GO categories: \n+-----\n+\n+**Input** \n+\n+Two modes are allowed: either by copy/pasting your IDs (separated by a space) or by supplying a tabular file (.csv, .tsv, .txt, .tab) including your IDs (identifiers).\n+Only entrez gene ID (e.g : 4151) or Uniprot accession number (e.g. P31946) are allowed. If your list is not in this form, please use the ID_Converter tool of ProteoRE. \n+\n+-----\n \n-* Biological Process (BP) \n-* Cellular Component (CC) \n-* Molecular Function (MF) \n+**Parameters**\n+\n+"Species": enter the sepcies you are working on; Homo sapiens and Mus musculus supported (Rattus norvegicus coming soon)\n+\n+"Select GO terms category": you can choose one or more GO categories which are Biological Process (BP), Cellular Component (CC) and Molecular Function (MF) \n+\n+"Ontology level (the higher this number, the deeper the GO level)": correspond to the level of GO hierarchy (from 1 to 3). In general the higher the level, the more semantically specific the term is.\n \n-Functional profile at a given GO level is obtained by counting the number of identifiers having a hit in each category of this level (2 by default). Results are displayed as bar plots (with absolute or relative frequencies) and can be exported in pdf, png and jpeg formats; textual output with GO terms and their computed frequencies is also provided. \n+-----\n+\n+**Ouput**\n \n-For more details about GoProfiles, please read: Salicr\xc3\xba et al. Comparison of lists of genes based on functional profiles. BMC Bioinformatics. 2011;12:401.(https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-401) \n+Diagram output: graphical output in the form of bar-plot or dot-plot (png, jpeg or pdf format), one figure for each GO category. \n \n-If your type of identifiers is not supported (i.e. different from Uniprot and Entrez), please use the **ID Converter** tool in the ProteoRE section to convert your list of IDs first.\n+text output: with the following information GO category description (e.g.BP.Description), GO term identifier (e.g. BP.GOID) and GO term frequency (e.g. BP.Frequency)\n \n -----\n \n@@ -153,7 +164,9 @@\n \n **Authors** \n \n-Sanchez A, Ocana J and Salicru M (2016). goProfiles: goProfiles: an R package for the statistical analysis of functional profiles. R package version 1.38.0.\n+Salicr\xc3\xba M, Oca\xc3\xb1a J, S\xc3\xa1nchez-Pla A. Comparison of lists of genes based on functional profiles. BMC Bioinformatics. 2011. 12:401. doi:10.1186/1471-2105-12-401. PubMed PMID: 21999355\n+\n+-----\n \n .. class:: infomark\n \n@@ -161,7 +174,9 @@\n \n T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR\n \n-Sandra D\xc3\xa9rozier, Olivier Ru\xc3\xa9, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit,Migale Bioinformatics platform,\n+Sandra D\xc3\xa9rozier, Olivier Ru\xc3\xa9, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit,Migale Bioinformatics platform, FR\n+\n+This work has been partially funded through the French National Agency for Research (ANR) IFB project.\n \n Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.\n \n' |