# HG changeset patch
# User proteore
# Date 1518770436 18000
# Node ID 1236ee08ccb8787f8e01630f60c751c4a6dbe81c
# Parent d89c09253c8d7708c295e44aa98f1536fa551373
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
diff -r d89c09253c8d -r 1236ee08ccb8 goprofiles.R
--- a/goprofiles.R Sun Nov 26 19:19:39 2017 -0500
+++ b/goprofiles.R Fri Feb 16 03:40:36 2018 -0500
@@ -5,16 +5,12 @@
# Read file and return file content as data.frame?
readfile = function(filename, header) {
if (header == "true") {
- # Read only the first two lines of the files as data (without headers):
+ # Read only the first line of the files as data (without headers):
headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
- #print("header")
- #print(headers)
- # Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here :
- #headers_names <- sapply(headers, paste, collapse = "_")
- #print(headers_names)
- #Read the data of the files (skipping the first 2 rows):
+ #Read the data of the files (skipping the first row):
file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
- #print(file[1,])
+ # Remove empty rows
+ file <- file[!apply(is.na(file) | file == "", 1, all),]
#And assign the headers of step two to the data:
names(file) <- headers
}
@@ -24,10 +20,6 @@
return(file)
}
-#filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt"
-#test = readfile(filename)
-#str(test)
-#str(test$Gene.names)
getprofile = function(ids, id_type, level, duplicate) {
####################################################################
# Arguments
@@ -64,27 +56,6 @@
print("IDs unable to convert to ENTREZID: ")
print(NAs)
}
- #print(genes_ids)
- # Convert Protein IDs into entrez ids
-
- # for (i in 1:length(id$UNIPROT)) {
- # print(i)
- # if (is.na(id[[2]][i])) {
- # print(id[[2]][i])
- # }
- # }
- # a = id[which(id$ENTREZID == "NA"),]
- # print(a)
- # print(a$UNIPROT)
- #print(id[[1]][which(is.na(id$ENTREZID))])
- #print(genes_ids)
- # for (gene in genes) {
- # #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA))
- # id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT")
- # print(id)
- # genes_ids = append(genes_ids, id$ENTREZID)
- # }
- #print(genes_ids)
# Create basic profiles
profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
@@ -172,103 +143,117 @@
}
goprofiles = function() {
- args = commandArgs(trailingOnly = TRUE)
- #print(args)
- # arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title"
- if (length(args) != 9) {
- stop("Not enough/Too many arguments", call. = FALSE)
+ args <- commandArgs(TRUE)
+ if(length(args)<1) {
+ args <- c("--help")
}
- else {
- input_type = args[2]
- if (input_type == "text") {
- input = strsplit(args[1], "\\s+")[[1]]
- }
- else if (input_type == "file") {
- filename = strsplit(args[1], ",")[[1]][1]
- ncol = strsplit(args[1], ",")[[1]][2]
- # Check ncol
- if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
- stop("Please enter an integer for level")
- }
- else {
- ncol = as.numeric(gsub("c", "", ncol))
- }
- header = strsplit(args[1], ",")[[1]][3]
- # Get file content
- file = readfile(filename, header)
- # Extract Protein IDs list
- input = c()
- for (row in as.character(file[,ncol])) {
- input = c(input, strsplit(row, ";")[[1]][1])
- }
- }
- id_type = args[3]
- ontoopt = strsplit(args[4], ",")[[1]]
- #print(ontoopt)
- #plotopt = strsplit(args[3], ",")
- plotopt = args[5]
- level = args[6]
- per = as.logical(args[7])
- title = args[8]
- duplicate = args[9]
-
- profiles = getprofile(input, id_type, level, duplicate)
- profile.CC = profiles[1]
- #print(profile.CC)
- profile.MF = profiles[2]
- #print(profile.MF)
- profile.BP = profiles[3]
- #print(profile.BP)
- profile.ALL = profiles[-3:-1]
- #print(profile.ALL)
- #c(profile.ALL, profile.CC, profile.MF, profile.BP)
- if ("CC" %in% ontoopt) {
- if (grepl("PNG", plotopt)) {
- plotPNG(profile.CC=profile.CC, per=per, title=title)
- }
- if (grepl("JPEG", plotopt)) {
- plotJPEG(profile.CC = profile.CC, per=per, title=title)
- }
- if (grepl("PDF", plotopt)) {
- plotPDF(profile.CC = profile.CC, per=per, title=title)
- }
- }
- if ("MF" %in% ontoopt) {
- if (grepl("PNG", plotopt)) {
- plotPNG(profile.MF = profile.MF, per=per, title=title)
- }
- if (grepl("JPEG", plotopt)) {
- plotJPEG(profile.MF = profile.MF, per=per, title=title)
- }
- if (grepl("PDF", plotopt)) {
- plotPDF(profile.MF = profile.MF, per=per, title=title)
- }
- }
- if ("BP" %in% ontoopt) {
- if (grepl("PNG", plotopt)) {
- plotPNG(profile.BP = profile.BP, per=per, title=title)
- }
- if (grepl("JPEG", plotopt)) {
- plotJPEG(profile.BP = profile.BP, per=per, title=title)
- }
- if (grepl("PDF", plotopt)) {
- plotPDF(profile.BP = profile.BP, per=per, title=title)
- }
- }
-
- #if (grepl("PNG", plotopt)) {
- # plotPNG(profile.ALL = profile.ALL, per=per, title=title)
- #}
- #if (grepl("JPEG", plotopt)) {
- # plotJPEG(profile.ALL = profile.ALL, per=per, title=title)
- #}
- #if (grepl("PDF", plotopt)) {
- # plotPDF(profile.ALL = profile.ALL, per=per, title=title)
- #}
+
+ # Help section
+ if("--help" %in% args) {
+ cat("Selection and Annotation HPA
+ Arguments:
+ --input_type: type of input (list of id or filename)
+ --input: input
+ --ncol: the column number which you would like to apply...
+ --header: true/false if your file contains a header
+ --id_type: the type of input IDs (UniProt/EntrezID)
+ --onto_opt: ontology options
+ --plot_opt: plot extension options (PDF/JPEG/PNG)
+ --level: 1-3
+ --per
+ --title: title of the plot
+ --duplicate: remove dupliate input IDs (true/false)
+ --text_output: text output filename \n")
+ q(save="no")
}
+ # Parse arguments
+ parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
+ argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
+ args <- as.list(as.character(argsDF$V2))
+ names(args) <- argsDF$V1
+
+ input_type = args$input_type
+ if (input_type == "text") {
+ input = strsplit(args$input, " ")[[1]]
+ }
+ else if (input_type == "file") {
+ filename = args$input
+ ncol = args$ncol
+ # Check ncol
+ if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
+ stop("Please enter an integer for level")
+ }
+ else {
+ ncol = as.numeric(gsub("c", "", ncol))
+ }
+ header = args$header
+ # Get file content
+ file = readfile(filename, header)
+ # Extract Protein IDs list
+ input = c()
+ for (row in as.character(file[,ncol])) {
+ input = c(input, strsplit(row, ";")[[1]][1])
+ }
+ }
+ id_type = args$id_type
+ ontoopt = strsplit(args$onto_opt, ",")[[1]]
+ #print(ontoopt)
+ #plotopt = strsplit(args[3], ",")
+ plotopt = args$plot_opt
+ level = args$level
+ per = as.logical(args$per)
+ title = args$title
+ duplicate = args$duplicate
+ text_output = args$text_output
+
+ profiles = getprofile(input, id_type, level, duplicate)
+ profile.CC = profiles[1]
+ #print(profile.CC)
+ profile.MF = profiles[2]
+ #print(profile.MF)
+ profile.BP = profiles[3]
+ #print(profile.BP)
+ profile.ALL = profiles[-3:-1]
+ #print(profile.ALL)
+ #c(profile.ALL, profile.CC, profile.MF, profile.BP)
+
+ if ("CC" %in% ontoopt) {
+ write.table(profile.CC, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+ if (grepl("PNG", plotopt)) {
+ plotPNG(profile.CC=profile.CC, per=per, title=title)
+ }
+ if (grepl("JPEG", plotopt)) {
+ plotJPEG(profile.CC = profile.CC, per=per, title=title)
+ }
+ if (grepl("PDF", plotopt)) {
+ plotPDF(profile.CC = profile.CC, per=per, title=title)
+ }
+ }
+ if ("MF" %in% ontoopt) {
+ write.table(profile.MF, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+ if (grepl("PNG", plotopt)) {
+ plotPNG(profile.MF = profile.MF, per=per, title=title)
+ }
+ if (grepl("JPEG", plotopt)) {
+ plotJPEG(profile.MF = profile.MF, per=per, title=title)
+ }
+ if (grepl("PDF", plotopt)) {
+ plotPDF(profile.MF = profile.MF, per=per, title=title)
+ }
+ }
+ if ("BP" %in% ontoopt) {
+ write.table(profile.BP, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+ if (grepl("PNG", plotopt)) {
+ plotPNG(profile.BP = profile.BP, per=per, title=title)
+ }
+ if (grepl("JPEG", plotopt)) {
+ plotJPEG(profile.BP = profile.BP, per=per, title=title)
+ }
+ if (grepl("PDF", plotopt)) {
+ plotPDF(profile.BP = profile.BP, per=per, title=title)
+ }
+ }
}
goprofiles()
-
-#Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title"
diff -r d89c09253c8d -r 1236ee08ccb8 goprofiles.xml
--- a/goprofiles.xml Sun Nov 26 19:19:39 2017 -0500
+++ b/goprofiles.xml Fri Feb 16 03:40:36 2018 -0500
@@ -5,7 +5,7 @@
R
bioconductor-org.hs.eg.db
bioconductor-annotationdbi
- bioconductor-biobase
+ bioconductor-biobase
goprofiles
@@ -14,29 +14,35 @@
-
+
@@ -53,7 +59,7 @@
-
+
@@ -93,16 +99,18 @@
+
-
+
-
+
+
@@ -111,14 +119,15 @@
-
-
-
+
+
+
+