Mercurial > repos > proteore > proteore_clusterprofiler
comparison GO-enrich.R @ 0:bd052861852b draft
planemo upload commit ffa3be72b850aecbfbd636de815967c06a8f643f-dirty
author | proteore |
---|---|
date | Thu, 01 Mar 2018 10:05:18 -0500 |
parents | |
children | 710414ebb6db |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bd052861852b |
---|---|
1 library(clusterProfiler) | |
2 | |
3 #library(org.Sc.sgd.db) | |
4 library(org.Hs.eg.db) | |
5 library(org.Mm.eg.db) | |
6 | |
7 # Read file and return file content as data.frame? | |
8 readfile = function(filename, header) { | |
9 if (header == "true") { | |
10 # Read only the first line of the files as data (without headers): | |
11 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) | |
12 #Read the data of the files (skipping the first row): | |
13 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) | |
14 # Remove empty rows | |
15 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | |
16 #And assign the headers of step two to the data: | |
17 names(file) <- headers | |
18 } | |
19 else { | |
20 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) | |
21 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | |
22 } | |
23 return(file) | |
24 } | |
25 | |
26 repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) { | |
27 ggo<-groupGO(gene=geneid, | |
28 OrgDb = orgdb, | |
29 ont=ontology, | |
30 level=level, | |
31 readable=TRUE) | |
32 name <- paste("GGO.", ontology, ".png", sep = "") | |
33 png(name) | |
34 p <- barplot(ggo) | |
35 print(p) | |
36 dev.off() | |
37 return(ggo) | |
38 } | |
39 | |
40 # GO over-representation test | |
41 enrich.GO <- function(geneid, orgdb, ontology, pval_cutoff, qval_cutoff) { | |
42 ego<-enrichGO(gene=geneid, | |
43 OrgDb=orgdb, | |
44 keytype="ENTREZID", | |
45 ont=ontology, | |
46 pAdjustMethod="BH", | |
47 pvalueCutoff=pval_cutoff, | |
48 qvalueCutoff=qval_cutoff, | |
49 readable=TRUE) | |
50 bar_name <- paste("EGO.", ontology, ".bar.png", sep = "") | |
51 png(bar_name) | |
52 p <- barplot(ego) | |
53 print(p) | |
54 dev.off() | |
55 dot_name <- paste("EGO.", ontology, ".dot.png", sep = "") | |
56 png(dot_name) | |
57 p <- dotplot(ego) | |
58 print(p) | |
59 dev.off() | |
60 return(ego) | |
61 } | |
62 | |
63 clusterProfiler = function() { | |
64 args <- commandArgs(TRUE) | |
65 if(length(args)<1) { | |
66 args <- c("--help") | |
67 } | |
68 | |
69 # Help section | |
70 if("--help" %in% args) { | |
71 cat("clusterProfiler Enrichment Analysis | |
72 Arguments: | |
73 --input_type: type of input (list of id or filename) | |
74 --input: input | |
75 --ncol: the column number which you would like to apply... | |
76 --header: true/false if your file contains a header | |
77 --id_type: the type of input IDs (UniProt/EntrezID) | |
78 --species | |
79 --onto_opt: ontology options | |
80 --go_function: groupGO/enrichGO | |
81 --level: 1-3 | |
82 --pval_cutoff | |
83 --qval_cutoff | |
84 --text_output: text output filename \n") | |
85 q(save="no") | |
86 } | |
87 # Parse arguments | |
88 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | |
89 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | |
90 args <- as.list(as.character(argsDF$V2)) | |
91 names(args) <- argsDF$V1 | |
92 | |
93 input_type = args$input_type | |
94 if (input_type == "text") { | |
95 input = args$input | |
96 } | |
97 else if (input_type == "file") { | |
98 filename = args$input | |
99 ncol = args$ncol | |
100 # Check ncol | |
101 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | |
102 stop("Please enter an integer for level") | |
103 } | |
104 else { | |
105 ncol = as.numeric(gsub("c", "", ncol)) | |
106 } | |
107 header = args$header | |
108 # Get file content | |
109 file = readfile(filename, header) | |
110 # Extract Protein IDs list | |
111 input = c() | |
112 for (row in as.character(file[,ncol])) { | |
113 input = c(input, strsplit(row, ";")[[1]][1]) | |
114 } | |
115 } | |
116 id_type = args$id_type | |
117 | |
118 | |
119 #ID format Conversion | |
120 #This case : from UNIPROT (protein id) to ENTREZ (gene id) | |
121 #bitr = conversion function from clusterProfiler | |
122 | |
123 if (args$species=="human") { | |
124 orgdb<-org.Hs.eg.db | |
125 } | |
126 else if (args$species=="mouse") { | |
127 orgdb<-org.Mm.eg.db | |
128 } | |
129 else if (args$species=="rat") { | |
130 orgdb<-org.Rn.eg.db | |
131 } | |
132 | |
133 ##to initialize | |
134 if (id_type=="Uniprot") { | |
135 idFrom<-"UNIPROT" | |
136 idTo<-"ENTREZID" | |
137 gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb) | |
138 } | |
139 else if (id_type=="Entrez") { | |
140 gene<-input | |
141 } | |
142 | |
143 ontology <- strsplit(args$onto_opt, ",")[[1]] | |
144 if (args$go_represent == "true") { | |
145 go_represent <- args$go_represent | |
146 level <- as.numeric(args$level) | |
147 } | |
148 if (args$go_enrich == "true") { | |
149 go_enrich <- args$go_enrich | |
150 pval_cutoff <- as.numeric(args$pval_cutoff) | |
151 qval_cutoff <- as.numeric(args$qval_cutoff) | |
152 } | |
153 | |
154 ##enrichGO : GO over-representation test | |
155 for (onto in ontology) { | |
156 if (args$go_represent == "true") { | |
157 ggo<-repartition.GO(gene$ENTREZID, orgdb, onto, level, readable=TRUE) | |
158 write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) | |
159 } | |
160 if (args$go_enrich == "true") { | |
161 ego<-enrich.GO(gene$ENTREZID, orgdb, onto, pval_cutoff, qval_cutoff) | |
162 write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) | |
163 } | |
164 } | |
165 } | |
166 | |
167 clusterProfiler() |