Mercurial > repos > proteore > proteore_clusterprofiler
comparison GO-enrich.R @ 6:5e16cec55146 draft
planemo upload commit 2da0aec067fd35a8ec102ce27ec4bac8f54b1c30-dirty
author | proteore |
---|---|
date | Thu, 29 Mar 2018 11:43:28 -0400 |
parents | 8a91f58782df |
children | 4609346d8108 |
comparison
equal
deleted
inserted
replaced
5:8a91f58782df | 6:5e16cec55146 |
---|---|
37 dev.off() | 37 dev.off() |
38 return(ggo) | 38 return(ggo) |
39 } | 39 } |
40 | 40 |
41 # GO over-representation test | 41 # GO over-representation test |
42 enrich.GO <- function(geneid, orgdb, ontology, pval_cutoff, qval_cutoff) { | 42 enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff) { |
43 ego<-enrichGO(gene=geneid, | 43 ego<-enrichGO(gene=geneid, |
44 universe=universe, | |
44 OrgDb=orgdb, | 45 OrgDb=orgdb, |
45 keytype="ENTREZID", | 46 keytype="ENTREZID", |
46 ont=ontology, | 47 ont=ontology, |
47 pAdjustMethod="BH", | 48 pAdjustMethod="BH", |
48 pvalueCutoff=pval_cutoff, | 49 pvalueCutoff=pval_cutoff, |
49 qvalueCutoff=qval_cutoff, | 50 qvalueCutoff=qval_cutoff, |
50 readable=TRUE) | 51 readable=TRUE) |
52 # Plot bar & dot plots | |
51 bar_name <- paste("EGO.", ontology, ".bar.png", sep = "") | 53 bar_name <- paste("EGO.", ontology, ".bar.png", sep = "") |
52 png(bar_name) | 54 png(bar_name) |
53 p <- barplot(ego) | 55 p <- barplot(ego) |
54 print(p) | 56 print(p) |
55 dev.off() | 57 dev.off() |
71 if("--help" %in% args) { | 73 if("--help" %in% args) { |
72 cat("clusterProfiler Enrichment Analysis | 74 cat("clusterProfiler Enrichment Analysis |
73 Arguments: | 75 Arguments: |
74 --input_type: type of input (list of id or filename) | 76 --input_type: type of input (list of id or filename) |
75 --input: input | 77 --input: input |
76 --ncol: the column number which you would like to apply... | 78 --ncol: the column number which contains list of input IDs |
77 --header: true/false if your file contains a header | 79 --header: true/false if your file contains a header |
78 --id_type: the type of input IDs (UniProt/EntrezID) | 80 --id_type: the type of input IDs (UniProt/EntrezID) |
81 --universe_type: list or filename | |
82 --universe: background IDs list | |
83 --uncol: the column number which contains background IDs list | |
84 --uheader: true/false if the background IDs file contains header | |
85 --universe_id_type: the type of universe IDs (UniProt/EntrezID) | |
79 --species | 86 --species |
80 --onto_opt: ontology options | 87 --onto_opt: ontology options |
81 --go_function: groupGO/enrichGO | 88 --go_function: groupGO/enrichGO |
82 --level: 1-3 | 89 --level: 1-3 |
83 --pval_cutoff | 90 --pval_cutoff |
88 # Parse arguments | 95 # Parse arguments |
89 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | 96 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
90 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | 97 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
91 args <- as.list(as.character(argsDF$V2)) | 98 args <- as.list(as.character(argsDF$V2)) |
92 names(args) <- argsDF$V1 | 99 names(args) <- argsDF$V1 |
93 | 100 #print(args) |
101 | |
102 # Extract OrgDb | |
103 if (args$species=="human") { | |
104 orgdb<-org.Hs.eg.db | |
105 } | |
106 else if (args$species=="mouse") { | |
107 orgdb<-org.Mm.eg.db | |
108 } | |
109 else if (args$species=="rat") { | |
110 orgdb<-org.Rn.eg.db | |
111 } | |
112 | |
113 # Extract input IDs | |
94 input_type = args$input_type | 114 input_type = args$input_type |
95 if (input_type == "text") { | 115 if (input_type == "text") { |
96 input = strsplit(args$input, "[ \t\n]+")[[1]] | 116 input = strsplit(args$input, "[ \t\n]+")[[1]] |
97 } | 117 } |
98 else if (input_type == "file") { | 118 else if (input_type == "file") { |
99 filename = args$input | 119 filename = args$input |
100 ncol = args$ncol | 120 ncol = args$ncol |
101 # Check ncol | 121 # Check ncol |
102 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | 122 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { |
103 stop("Please enter an integer for level") | 123 stop("Please enter the right format for column number: c[number]") |
104 } | 124 } |
105 else { | 125 else { |
106 ncol = as.numeric(gsub("c", "", ncol)) | 126 ncol = as.numeric(gsub("c", "", ncol)) |
107 } | 127 } |
108 header = args$header | 128 header = args$header |
113 for (row in as.character(file[,ncol])) { | 133 for (row in as.character(file[,ncol])) { |
114 input = c(input, strsplit(row, ";")[[1]][1]) | 134 input = c(input, strsplit(row, ";")[[1]][1]) |
115 } | 135 } |
116 } | 136 } |
117 id_type = args$id_type | 137 id_type = args$id_type |
118 | 138 ## Get input gene list from input IDs |
119 | |
120 #ID format Conversion | 139 #ID format Conversion |
121 #This case : from UNIPROT (protein id) to ENTREZ (gene id) | 140 #This case : from UNIPROT (protein id) to ENTREZ (gene id) |
122 #bitr = conversion function from clusterProfiler | 141 #bitr = conversion function from clusterProfiler |
123 | |
124 if (args$species=="human") { | |
125 orgdb<-org.Hs.eg.db | |
126 } | |
127 else if (args$species=="mouse") { | |
128 orgdb<-org.Mm.eg.db | |
129 } | |
130 else if (args$species=="rat") { | |
131 orgdb<-org.Rn.eg.db | |
132 } | |
133 | |
134 ##to initialize | |
135 if (id_type=="Uniprot") { | 142 if (id_type=="Uniprot") { |
136 idFrom<-"UNIPROT" | 143 idFrom<-"UNIPROT" |
137 idTo<-"ENTREZID" | 144 idTo<-"ENTREZID" |
138 gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb) | 145 gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb) |
146 gene<-unique(gene$ENTREZID) | |
139 } | 147 } |
140 else if (id_type=="Entrez") { | 148 else if (id_type=="Entrez") { |
141 gene<-input | 149 gene<-unique(input) |
142 } | 150 } |
143 | 151 |
144 ontology <- strsplit(args$onto_opt, ",")[[1]] | 152 ontology <- strsplit(args$onto_opt, ",")[[1]] |
153 ## Extract GGO/EGO arguments | |
145 if (args$go_represent == "true") { | 154 if (args$go_represent == "true") { |
146 go_represent <- args$go_represent | 155 go_represent <- args$go_represent |
147 level <- as.numeric(args$level) | 156 level <- as.numeric(args$level) |
148 } | 157 } |
149 if (args$go_enrich == "true") { | 158 if (args$go_enrich == "true") { |
150 go_enrich <- args$go_enrich | 159 go_enrich <- args$go_enrich |
151 pval_cutoff <- as.numeric(args$pval_cutoff) | 160 pval_cutoff <- as.numeric(args$pval_cutoff) |
152 qval_cutoff <- as.numeric(args$qval_cutoff) | 161 qval_cutoff <- as.numeric(args$qval_cutoff) |
162 # Extract universe background genes (same as input file) | |
163 if (!is.null(args$universe_type)) { | |
164 universe_type = args$universe_type | |
165 if (universe_type == "text") { | |
166 universe = strsplit(args$universe, "[ \t\n]+")[[1]] | |
167 } | |
168 else if (universe_type == "file") { | |
169 universe_filename = args$universe | |
170 universe_ncol = args$uncol | |
171 # Check ncol | |
172 if (! as.numeric(gsub("c", "", universe_ncol)) %% 1 == 0) { | |
173 stop("Please enter the right format for column number: c[number]") | |
174 } | |
175 else { | |
176 universe_ncol = as.numeric(gsub("c", "", universe_ncol)) | |
177 } | |
178 universe_header = args$uheader | |
179 # Get file content | |
180 universe_file = readfile(universe_filename, universe_header) | |
181 # Extract Protein IDs list | |
182 universe = c() | |
183 for (row in as.character(universe_file[,universe_ncol])) { | |
184 universe = c(universe, strsplit(row, ";")[[1]][1]) | |
185 } | |
186 } | |
187 universe_id_type = args$universe_id_type | |
188 ##to initialize | |
189 if (universe_id_type=="Uniprot") { | |
190 idFrom<-"UNIPROT" | |
191 idTo<-"ENTREZID" | |
192 universe_gene<-bitr(universe, fromType=idFrom, toType=idTo, OrgDb=orgdb) | |
193 universe_gene<-unique(universe_gene$ENTREZID) | |
194 } | |
195 else if (universe_id_type=="Entrez") { | |
196 universe_gene<-unique(universe) | |
197 } | |
198 } | |
199 else { | |
200 universe_gene = NULL | |
201 } | |
153 } | 202 } |
154 | 203 |
155 ##enrichGO : GO over-representation test | 204 ##enrichGO : GO over-representation test |
156 for (onto in ontology) { | 205 for (onto in ontology) { |
157 if (args$go_represent == "true") { | 206 if (args$go_represent == "true") { |
158 ggo<-repartition.GO(gene$ENTREZID, orgdb, onto, level, readable=TRUE) | 207 ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE) |
159 write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) | 208 write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) |
160 } | 209 } |
161 if (args$go_enrich == "true") { | 210 if (args$go_enrich == "true") { |
162 ego<-enrich.GO(gene$ENTREZID, orgdb, onto, pval_cutoff, qval_cutoff) | 211 ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff) |
163 write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) | 212 write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) |
164 } | 213 } |
165 } | 214 } |
166 } | 215 } |
167 | 216 |