diff GO-enrich.R @ 6:5e16cec55146 draft

planemo upload commit 2da0aec067fd35a8ec102ce27ec4bac8f54b1c30-dirty
author proteore
date Thu, 29 Mar 2018 11:43:28 -0400
parents 8a91f58782df
children 4609346d8108
line wrap: on
line diff
--- a/GO-enrich.R	Fri Mar 23 10:01:41 2018 -0400
+++ b/GO-enrich.R	Thu Mar 29 11:43:28 2018 -0400
@@ -39,8 +39,9 @@
 }
 
 # GO over-representation test
-enrich.GO <- function(geneid, orgdb, ontology, pval_cutoff, qval_cutoff) {
+enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff) {
   ego<-enrichGO(gene=geneid,
+                universe=universe,
                 OrgDb=orgdb,
                 keytype="ENTREZID",
                 ont=ontology,
@@ -48,6 +49,7 @@
                 pvalueCutoff=pval_cutoff,
                 qvalueCutoff=qval_cutoff,
                 readable=TRUE)
+  # Plot bar & dot plots
   bar_name <- paste("EGO.", ontology, ".bar.png", sep = "")
   png(bar_name)
   p <- barplot(ego)
@@ -73,9 +75,14 @@
     Arguments:
         --input_type: type of input (list of id or filename)
         --input: input
-        --ncol: the column number which you would like to apply...
+        --ncol: the column number which contains list of input IDs
         --header: true/false if your file contains a header
         --id_type: the type of input IDs (UniProt/EntrezID)
+        --universe_type: list or filename
+        --universe: background IDs list
+        --uncol: the column number which contains background IDs list
+        --uheader: true/false if the background IDs file contains header
+        --universe_id_type: the type of universe IDs (UniProt/EntrezID)
         --species
         --onto_opt: ontology options
         --go_function: groupGO/enrichGO
@@ -90,7 +97,20 @@
   argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
   args <- as.list(as.character(argsDF$V2))
   names(args) <- argsDF$V1
+  #print(args)
 
+  # Extract OrgDb
+  if (args$species=="human") {
+    orgdb<-org.Hs.eg.db
+  }
+  else if (args$species=="mouse") {
+    orgdb<-org.Mm.eg.db
+  }
+  else if (args$species=="rat") {
+    orgdb<-org.Rn.eg.db
+  }
+
+  # Extract input IDs
   input_type = args$input_type
   if (input_type == "text") {
     input = strsplit(args$input, "[ \t\n]+")[[1]]
@@ -100,7 +120,7 @@
     ncol = args$ncol
     # Check ncol
     if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
-      stop("Please enter an integer for level")
+      stop("Please enter the right format for column number: c[number]")
     }
     else {
       ncol = as.numeric(gsub("c", "", ncol))
@@ -115,33 +135,22 @@
     }
   }
   id_type = args$id_type
-
-  
+  ## Get input gene list from input IDs
   #ID format Conversion 
   #This case : from UNIPROT (protein id) to ENTREZ (gene id)
   #bitr = conversion function from clusterProfiler
-
-  if (args$species=="human") {
-    orgdb<-org.Hs.eg.db
-  }
-  else if (args$species=="mouse") {
-    orgdb<-org.Mm.eg.db
-  }
-  else if (args$species=="rat") {
-    orgdb<-org.Rn.eg.db
-  }
-  
-  ##to initialize
   if (id_type=="Uniprot") {
     idFrom<-"UNIPROT"
     idTo<-"ENTREZID"
     gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb)
+    gene<-unique(gene$ENTREZID)
   }
   else if (id_type=="Entrez") {
-    gene<-input
+    gene<-unique(input)
   }
 
   ontology <- strsplit(args$onto_opt, ",")[[1]]
+  ## Extract GGO/EGO arguments
   if (args$go_represent == "true") {
     go_represent <- args$go_represent
     level <- as.numeric(args$level)
@@ -150,16 +159,56 @@
     go_enrich <- args$go_enrich
     pval_cutoff <- as.numeric(args$pval_cutoff)
     qval_cutoff <- as.numeric(args$qval_cutoff)
+    # Extract universe background genes (same as input file)
+    if (!is.null(args$universe_type)) {
+      universe_type = args$universe_type
+      if (universe_type == "text") {
+        universe = strsplit(args$universe, "[ \t\n]+")[[1]]
+      }
+      else if (universe_type == "file") {
+        universe_filename = args$universe
+        universe_ncol = args$uncol
+        # Check ncol
+        if (! as.numeric(gsub("c", "", universe_ncol)) %% 1 == 0) {
+          stop("Please enter the right format for column number: c[number]")
+        }
+        else {
+          universe_ncol = as.numeric(gsub("c", "", universe_ncol))
+        }
+        universe_header = args$uheader
+        # Get file content
+        universe_file = readfile(universe_filename, universe_header)
+        # Extract Protein IDs list
+        universe = c()
+        for (row in as.character(universe_file[,universe_ncol])) {
+          universe = c(universe, strsplit(row, ";")[[1]][1])
+        }
+      }
+      universe_id_type = args$universe_id_type
+      ##to initialize
+      if (universe_id_type=="Uniprot") {
+        idFrom<-"UNIPROT"
+        idTo<-"ENTREZID"
+        universe_gene<-bitr(universe, fromType=idFrom, toType=idTo, OrgDb=orgdb)
+        universe_gene<-unique(universe_gene$ENTREZID)
+      }
+      else if (universe_id_type=="Entrez") {
+        universe_gene<-unique(universe)
+      }
+    }
+    else {
+      universe_gene = NULL
+    }
   }
 
   ##enrichGO : GO over-representation test
   for (onto in ontology) {
     if (args$go_represent == "true") {
-      ggo<-repartition.GO(gene$ENTREZID, orgdb, onto, level, readable=TRUE)
+      ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE)
       write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
     }
     if (args$go_enrich == "true") {
-      ego<-enrich.GO(gene$ENTREZID, orgdb, onto, pval_cutoff, qval_cutoff)
+      ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff)
       write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
     }
   }