Mercurial > repos > lnguyen > proteore_goprofiles
comparison goprofiles.R @ 0:6eeb2fb0c4bd draft default tip
planemo upload
author | lnguyen |
---|---|
date | Sat, 16 Sep 2017 09:17:07 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6eeb2fb0c4bd |
---|---|
1 # Load necessary libraries | |
2 library("org.Hs.eg.db", quietly=TRUE) | |
3 library("goProfiles", quietly=TRUE) | |
4 | |
5 # Read file and return file content as data.frame? | |
6 readfile = function(filename, header) { | |
7 if (header == "true") { | |
8 # Read only the first two lines of the files as data (without headers): | |
9 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | |
10 #print("header") | |
11 #print(headers) | |
12 # Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here : | |
13 #headers_names <- sapply(headers, paste, collapse = "_") | |
14 #print(headers_names) | |
15 #Read the data of the files (skipping the first 2 rows): | |
16 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | |
17 #print(file[1,]) | |
18 #And assign the headers of step two to the data: | |
19 names(file) <- headers | |
20 } | |
21 else { | |
22 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | |
23 } | |
24 return(file) | |
25 } | |
26 | |
27 #filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt" | |
28 #test = readfile(filename) | |
29 #str(test) | |
30 #str(test$Gene.names) | |
31 getprofile = function(ids, id_type, level) { | |
32 | |
33 # Check if level is number | |
34 if (! as.numeric(level) %% 1 == 0) { | |
35 stop("Please enter an integer for level") | |
36 } | |
37 else { | |
38 level = as.numeric(level) | |
39 } | |
40 #genes = as.vector(file[,ncol]) | |
41 | |
42 # Extract Gene Entrez ID | |
43 if (id_type == "Entrez") { | |
44 id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first") | |
45 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] | |
46 } | |
47 else { | |
48 genes_ids = c() | |
49 id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first") | |
50 #print(id[[1]][1]) | |
51 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] | |
52 # IDs that have NA ENTREZID | |
53 NAs = id$UNIPROT[which(is.na(id$ENTREZID))] | |
54 print("IDs unable to convert to ENTREZID: ") | |
55 print(NAs) | |
56 } | |
57 #print(genes_ids) | |
58 # Convert Protein IDs into entrez ids | |
59 | |
60 # for (i in 1:length(id$UNIPROT)) { | |
61 # print(i) | |
62 # if (is.na(id[[2]][i])) { | |
63 # print(id[[2]][i]) | |
64 # } | |
65 # } | |
66 # a = id[which(id$ENTREZID == "NA"),] | |
67 # print(a) | |
68 # print(a$UNIPROT) | |
69 #print(id[[1]][which(is.na(id$ENTREZID))]) | |
70 #print(genes_ids) | |
71 # for (gene in genes) { | |
72 # #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA)) | |
73 # id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT") | |
74 # print(id) | |
75 # genes_ids = append(genes_ids, id$ENTREZID) | |
76 # } | |
77 #print(genes_ids) | |
78 | |
79 # Create basic profiles | |
80 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
81 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
82 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
83 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
84 | |
85 # Print profile | |
86 # printProfiles(profile) | |
87 | |
88 return(c(profile.CC, profile.MF, profile.BP, profile.ALL)) | |
89 } | |
90 | |
91 # Plot profiles to PNG | |
92 plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | |
93 if (!is.null(profile.CC)) { | |
94 png("profile.CC.png") | |
95 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | |
96 dev.off() | |
97 } | |
98 if (!is.null(profile.BP)) { | |
99 png("profile.BP.png") | |
100 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
101 dev.off() | |
102 } | |
103 if (!is.null(profile.MF)) { | |
104 png("profile.MF.png") | |
105 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
106 dev.off() | |
107 } | |
108 if (!is.null(profile.ALL)) { | |
109 png("profile.ALL.png") | |
110 plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title) | |
111 dev.off() | |
112 } | |
113 } | |
114 | |
115 # Plot profiles to JPEG | |
116 plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | |
117 if (!is.null(profile.CC)) { | |
118 jpeg("profile.CC.jpeg") | |
119 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | |
120 dev.off() | |
121 } | |
122 if (!is.null(profile.BP)) { | |
123 jpeg("profile.BP.jpeg") | |
124 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
125 dev.off() | |
126 } | |
127 if (!is.null(profile.MF)) { | |
128 jpeg("profile.MF.jpeg") | |
129 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
130 dev.off() | |
131 } | |
132 if (!is.null(profile.ALL)) { | |
133 jpeg("profile.ALL.jpeg") | |
134 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title) | |
135 dev.off() | |
136 } | |
137 } | |
138 | |
139 # Plot profiles to PDF | |
140 plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | |
141 if (!is.null(profile.CC)) { | |
142 pdf("profile.CC.pdf") | |
143 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | |
144 dev.off() | |
145 } | |
146 if (!is.null(profile.BP)) { | |
147 pdf("profile.BP.pdf") | |
148 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
149 dev.off() | |
150 } | |
151 if (!is.null(profile.MF)) { | |
152 pdf("profile.MF.pdf") | |
153 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
154 dev.off() | |
155 } | |
156 if (!is.null(profile.ALL)) { | |
157 #print("all") | |
158 pdf("profile.ALL.pdf") | |
159 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title) | |
160 dev.off() | |
161 } | |
162 } | |
163 | |
164 goprofiles = function() { | |
165 args = commandArgs(trailingOnly = TRUE) | |
166 #print(args) | |
167 # arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title" | |
168 if (length(args) != 8) { | |
169 stop("Not enough/Too many arguments", call. = FALSE) | |
170 } | |
171 else { | |
172 input_type = args[2] | |
173 if (input_type == "text") { | |
174 input = strsplit(args[1], "\\s+")[[1]] | |
175 } | |
176 else if (input_type == "file") { | |
177 filename = strsplit(args[1], ",")[[1]][1] | |
178 ncol = strsplit(args[1], ",")[[1]][2] | |
179 # Check ncol | |
180 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | |
181 stop("Please enter an integer for level") | |
182 } | |
183 else { | |
184 ncol = as.numeric(gsub("c", "", ncol)) | |
185 } | |
186 header = strsplit(args[1], ",")[[1]][3] | |
187 # Get file content | |
188 file = readfile(filename, header) | |
189 # Extract Protein IDs list | |
190 input = c() | |
191 for (row in as.character(file[,ncol])) { | |
192 input = c(input, strsplit(row, ";")[[1]][1]) | |
193 } | |
194 } | |
195 id_type = args[3] | |
196 ontoopt = strsplit(args[4], ",")[[1]] | |
197 #print(ontoopt) | |
198 #plotopt = strsplit(args[3], ",") | |
199 plotopt = args[5] | |
200 level = args[6] | |
201 per = as.logical(args[7]) | |
202 title = args[8] | |
203 | |
204 profiles = getprofile(input, id_type, level) | |
205 profile.CC = profiles[1] | |
206 #print(profile.CC) | |
207 profile.MF = profiles[2] | |
208 #print(profile.MF) | |
209 profile.BP = profiles[3] | |
210 #print(profile.BP) | |
211 profile.ALL = profiles[-3:-1] | |
212 #print(profile.ALL) | |
213 #c(profile.ALL, profile.CC, profile.MF, profile.BP) | |
214 if ("CC" %in% ontoopt) { | |
215 if (grepl("PNG", plotopt)) { | |
216 plotPNG(profile.CC=profile.CC, per=per, title=title) | |
217 } | |
218 if (grepl("JPEG", plotopt)) { | |
219 plotJPEG(profile.CC = profile.CC, per=per, title=title) | |
220 } | |
221 if (grepl("PDF", plotopt)) { | |
222 plotPDF(profile.CC = profile.CC, per=per, title=title) | |
223 } | |
224 } | |
225 if ("MF" %in% ontoopt) { | |
226 if (grepl("PNG", plotopt)) { | |
227 plotPNG(profile.MF = profile.MF, per=per, title=title) | |
228 } | |
229 if (grepl("JPEG", plotopt)) { | |
230 plotJPEG(profile.MF = profile.MF, per=per, title=title) | |
231 } | |
232 if (grepl("PDF", plotopt)) { | |
233 plotPDF(profile.MF = profile.MF, per=per, title=title) | |
234 } | |
235 } | |
236 if ("BP" %in% ontoopt) { | |
237 if (grepl("PNG", plotopt)) { | |
238 plotPNG(profile.BP = profile.BP, per=per, title=title) | |
239 } | |
240 if (grepl("JPEG", plotopt)) { | |
241 plotJPEG(profile.BP = profile.BP, per=per, title=title) | |
242 } | |
243 if (grepl("PDF", plotopt)) { | |
244 plotPDF(profile.BP = profile.BP, per=per, title=title) | |
245 } | |
246 } | |
247 | |
248 #if (grepl("PNG", plotopt)) { | |
249 # plotPNG(profile.ALL = profile.ALL, per=per, title=title) | |
250 #} | |
251 #if (grepl("JPEG", plotopt)) { | |
252 # plotJPEG(profile.ALL = profile.ALL, per=per, title=title) | |
253 #} | |
254 #if (grepl("PDF", plotopt)) { | |
255 # plotPDF(profile.ALL = profile.ALL, per=per, title=title) | |
256 #} | |
257 } | |
258 | |
259 } | |
260 | |
261 goprofiles() | |
262 | |
263 #Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title" |