proteore_clusterprofiler: GO-enrich.R comparison

comparison GO-enrich.R @ 6:5e16cec55146 draft

planemo upload commit 2da0aec067fd35a8ec102ce27ec4bac8f54b1c30-dirty

author	proteore
date	Thu, 29 Mar 2018 11:43:28 -0400
parents	8a91f58782df
children	4609346d8108

comparison

equal deleted inserted replaced

-:8a91f58782df
+:5e16cec55146
 dev.off()
 return(ggo)
 }
 # GO over-representation test
-enrich.GO <- function(geneid, orgdb, ontology, pval_cutoff, qval_cutoff) {
+enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff) {
 ego<-enrichGO(gene=geneid,
+universe=universe,
 OrgDb=orgdb,
 keytype="ENTREZID",
 ont=ontology,
 pAdjustMethod="BH",
 pvalueCutoff=pval_cutoff,
 qvalueCutoff=qval_cutoff,
 readable=TRUE)
+# Plot bar & dot plots
 bar_name <- paste("EGO.", ontology, ".bar.png", sep = "")
 png(bar_name)
 p <- barplot(ego)
 print(p)
 dev.off()
 if("--help" %in% args) {
 cat("clusterProfiler Enrichment Analysis
 Arguments:
 --input_type: type of input (list of id or filename)
 --input: input
---ncol: the column number which you would like to apply...
+--ncol: the column number which contains list of input IDs
 --header: true/false if your file contains a header
 --id_type: the type of input IDs (UniProt/EntrezID)
+--universe_type: list or filename
+--universe: background IDs list
+--uncol: the column number which contains background IDs list
+--uheader: true/false if the background IDs file contains header
+--universe_id_type: the type of universe IDs (UniProt/EntrezID)
 --species
 --onto_opt: ontology options
 --go_function: groupGO/enrichGO
 --level: 1-3
 --pval_cutoff
 # Parse arguments
 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
 args <- as.list(as.character(argsDF$V2))
 names(args) <- argsDF$V1
+#print(args)
+# Extract OrgDb
+if (args$species=="human") {
+orgdb<-org.Hs.eg.db
+}
+else if (args$species=="mouse") {
+orgdb<-org.Mm.eg.db
+}
+else if (args$species=="rat") {
+orgdb<-org.Rn.eg.db
+}
+# Extract input IDs
 input_type = args$input_type
 if (input_type == "text") {
 input = strsplit(args$input, "[ \t\n]+")[[1]]
 }
 else if (input_type == "file") {
 filename = args$input
 ncol = args$ncol
 # Check ncol
 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
-stop("Please enter an integer for level")
+stop("Please enter the right format for column number: c[number]")
 }
 else {
 ncol = as.numeric(gsub("c", "", ncol))
 }
 header = args$header
 for (row in as.character(file[,ncol])) {
 input = c(input, strsplit(row, ";")[[1]][1])
 }
 }
 id_type = args$id_type
+## Get input gene list from input IDs
 #ID format Conversion
 #This case : from UNIPROT (protein id) to ENTREZ (gene id)
 #bitr = conversion function from clusterProfiler
-if (args$species=="human") {
-orgdb<-org.Hs.eg.db
-}
-else if (args$species=="mouse") {
-orgdb<-org.Mm.eg.db
-}
-else if (args$species=="rat") {
-orgdb<-org.Rn.eg.db
-}
-##to initialize
 if (id_type=="Uniprot") {
 idFrom<-"UNIPROT"
 idTo<-"ENTREZID"
 gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb)
+gene<-unique(gene$ENTREZID)
 }
 else if (id_type=="Entrez") {
-gene<-input
+gene<-unique(input)
 }
 ontology <- strsplit(args$onto_opt, ",")[[1]]
+## Extract GGO/EGO arguments
 if (args$go_represent == "true") {
 go_represent <- args$go_represent
 level <- as.numeric(args$level)
 }
 if (args$go_enrich == "true") {
 go_enrich <- args$go_enrich
 pval_cutoff <- as.numeric(args$pval_cutoff)
 qval_cutoff <- as.numeric(args$qval_cutoff)
+# Extract universe background genes (same as input file)
+if (!is.null(args$universe_type)) {
+universe_type = args$universe_type
+if (universe_type == "text") {
+universe = strsplit(args$universe, "[ \t\n]+")[[1]]
+}
+else if (universe_type == "file") {
+universe_filename = args$universe
+universe_ncol = args$uncol
+# Check ncol
+if (! as.numeric(gsub("c", "", universe_ncol)) %% 1 == 0) {
+stop("Please enter the right format for column number: c[number]")
+}
+else {
+universe_ncol = as.numeric(gsub("c", "", universe_ncol))
+}
+universe_header = args$uheader
+# Get file content
+universe_file = readfile(universe_filename, universe_header)
+# Extract Protein IDs list
+universe = c()
+for (row in as.character(universe_file[,universe_ncol])) {
+universe = c(universe, strsplit(row, ";")[[1]][1])
+}
+}
+universe_id_type = args$universe_id_type
+##to initialize
+if (universe_id_type=="Uniprot") {
+idFrom<-"UNIPROT"
+idTo<-"ENTREZID"
+universe_gene<-bitr(universe, fromType=idFrom, toType=idTo, OrgDb=orgdb)
+universe_gene<-unique(universe_gene$ENTREZID)
+}
+else if (universe_id_type=="Entrez") {
+universe_gene<-unique(universe)
+}
+}
+else {
+universe_gene = NULL
+}
 }
 ##enrichGO : GO over-representation test
 for (onto in ontology) {
 if (args$go_represent == "true") {
-ggo<-repartition.GO(gene$ENTREZID, orgdb, onto, level, readable=TRUE)
+ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE)
 write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
 }
 if (args$go_enrich == "true") {
-ego<-enrich.GO(gene$ENTREZID, orgdb, onto, pval_cutoff, qval_cutoff)
+ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff)
 write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
 }
 }
 }

Mercurial > repos > proteore > proteore_clusterprofiler

comparison GO-enrich.R @ 6:5e16cec55146 draft