0
|
1 # Load necessary libraries
|
|
2 library("org.Hs.eg.db", quietly=TRUE)
|
|
3 library("goProfiles", quietly=TRUE)
|
|
4
|
|
5 # Read file and return file content as data.frame?
|
|
6 readfile = function(filename, header) {
|
|
7 if (header == "true") {
|
|
8 # Read only the first two lines of the files as data (without headers):
|
|
9 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
|
|
10 #print("header")
|
|
11 #print(headers)
|
|
12 # Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here :
|
|
13 #headers_names <- sapply(headers, paste, collapse = "_")
|
|
14 #print(headers_names)
|
|
15 #Read the data of the files (skipping the first 2 rows):
|
|
16 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
|
|
17 #print(file[1,])
|
|
18 #And assign the headers of step two to the data:
|
|
19 names(file) <- headers
|
|
20 }
|
|
21 else {
|
|
22 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
|
|
23 }
|
|
24 return(file)
|
|
25 }
|
|
26
|
|
27 #filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt"
|
|
28 #test = readfile(filename)
|
|
29 #str(test)
|
|
30 #str(test$Gene.names)
|
|
31 getprofile = function(ids, id_type, level) {
|
|
32
|
|
33 # Check if level is number
|
|
34 if (! as.numeric(level) %% 1 == 0) {
|
|
35 stop("Please enter an integer for level")
|
|
36 }
|
|
37 else {
|
|
38 level = as.numeric(level)
|
|
39 }
|
|
40 #genes = as.vector(file[,ncol])
|
|
41
|
|
42 # Extract Gene Entrez ID
|
|
43 if (id_type == "Entrez") {
|
|
44 id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first")
|
|
45 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
|
|
46 }
|
|
47 else {
|
|
48 genes_ids = c()
|
|
49 id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first")
|
|
50 #print(id[[1]][1])
|
|
51 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
|
|
52 # IDs that have NA ENTREZID
|
|
53 NAs = id$UNIPROT[which(is.na(id$ENTREZID))]
|
|
54 print("IDs unable to convert to ENTREZID: ")
|
|
55 print(NAs)
|
|
56 }
|
|
57 #print(genes_ids)
|
|
58 # Convert Protein IDs into entrez ids
|
|
59
|
|
60 # for (i in 1:length(id$UNIPROT)) {
|
|
61 # print(i)
|
|
62 # if (is.na(id[[2]][i])) {
|
|
63 # print(id[[2]][i])
|
|
64 # }
|
|
65 # }
|
|
66 # a = id[which(id$ENTREZID == "NA"),]
|
|
67 # print(a)
|
|
68 # print(a$UNIPROT)
|
|
69 #print(id[[1]][which(is.na(id$ENTREZID))])
|
|
70 #print(genes_ids)
|
|
71 # for (gene in genes) {
|
|
72 # #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA))
|
|
73 # id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT")
|
|
74 # print(id)
|
|
75 # genes_ids = append(genes_ids, id$ENTREZID)
|
|
76 # }
|
|
77 #print(genes_ids)
|
|
78
|
|
79 # Create basic profiles
|
|
80 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
|
|
81 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
|
|
82 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
|
|
83 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
|
|
84
|
|
85 # Print profile
|
|
86 # printProfiles(profile)
|
|
87
|
|
88 return(c(profile.CC, profile.MF, profile.BP, profile.ALL))
|
|
89 }
|
|
90
|
|
91 # Plot profiles to PNG
|
|
92 plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
|
|
93 if (!is.null(profile.CC)) {
|
|
94 png("profile.CC.png")
|
|
95 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
|
|
96 dev.off()
|
|
97 }
|
|
98 if (!is.null(profile.BP)) {
|
|
99 png("profile.BP.png")
|
|
100 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
|
|
101 dev.off()
|
|
102 }
|
|
103 if (!is.null(profile.MF)) {
|
|
104 png("profile.MF.png")
|
|
105 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
|
|
106 dev.off()
|
|
107 }
|
|
108 if (!is.null(profile.ALL)) {
|
|
109 png("profile.ALL.png")
|
|
110 plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title)
|
|
111 dev.off()
|
|
112 }
|
|
113 }
|
|
114
|
|
115 # Plot profiles to JPEG
|
|
116 plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
|
|
117 if (!is.null(profile.CC)) {
|
|
118 jpeg("profile.CC.jpeg")
|
|
119 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
|
|
120 dev.off()
|
|
121 }
|
|
122 if (!is.null(profile.BP)) {
|
|
123 jpeg("profile.BP.jpeg")
|
|
124 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
|
|
125 dev.off()
|
|
126 }
|
|
127 if (!is.null(profile.MF)) {
|
|
128 jpeg("profile.MF.jpeg")
|
|
129 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
|
|
130 dev.off()
|
|
131 }
|
|
132 if (!is.null(profile.ALL)) {
|
|
133 jpeg("profile.ALL.jpeg")
|
|
134 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
|
|
135 dev.off()
|
|
136 }
|
|
137 }
|
|
138
|
|
139 # Plot profiles to PDF
|
|
140 plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
|
|
141 if (!is.null(profile.CC)) {
|
|
142 pdf("profile.CC.pdf")
|
|
143 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
|
|
144 dev.off()
|
|
145 }
|
|
146 if (!is.null(profile.BP)) {
|
|
147 pdf("profile.BP.pdf")
|
|
148 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
|
|
149 dev.off()
|
|
150 }
|
|
151 if (!is.null(profile.MF)) {
|
|
152 pdf("profile.MF.pdf")
|
|
153 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
|
|
154 dev.off()
|
|
155 }
|
|
156 if (!is.null(profile.ALL)) {
|
|
157 #print("all")
|
|
158 pdf("profile.ALL.pdf")
|
|
159 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
|
|
160 dev.off()
|
|
161 }
|
|
162 }
|
|
163
|
|
164 goprofiles = function() {
|
|
165 args = commandArgs(trailingOnly = TRUE)
|
|
166 #print(args)
|
|
167 # arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title"
|
|
168 if (length(args) != 8) {
|
|
169 stop("Not enough/Too many arguments", call. = FALSE)
|
|
170 }
|
|
171 else {
|
|
172 input_type = args[2]
|
|
173 if (input_type == "text") {
|
|
174 input = strsplit(args[1], "\\s+")[[1]]
|
|
175 }
|
|
176 else if (input_type == "file") {
|
|
177 filename = strsplit(args[1], ",")[[1]][1]
|
|
178 ncol = strsplit(args[1], ",")[[1]][2]
|
|
179 # Check ncol
|
|
180 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
|
|
181 stop("Please enter an integer for level")
|
|
182 }
|
|
183 else {
|
|
184 ncol = as.numeric(gsub("c", "", ncol))
|
|
185 }
|
|
186 header = strsplit(args[1], ",")[[1]][3]
|
|
187 # Get file content
|
|
188 file = readfile(filename, header)
|
|
189 # Extract Protein IDs list
|
|
190 input = c()
|
|
191 for (row in as.character(file[,ncol])) {
|
|
192 input = c(input, strsplit(row, ";")[[1]][1])
|
|
193 }
|
|
194 }
|
|
195 id_type = args[3]
|
|
196 ontoopt = strsplit(args[4], ",")[[1]]
|
|
197 #print(ontoopt)
|
|
198 #plotopt = strsplit(args[3], ",")
|
|
199 plotopt = args[5]
|
|
200 level = args[6]
|
|
201 per = as.logical(args[7])
|
|
202 title = args[8]
|
|
203
|
|
204 profiles = getprofile(input, id_type, level)
|
|
205 profile.CC = profiles[1]
|
|
206 #print(profile.CC)
|
|
207 profile.MF = profiles[2]
|
|
208 #print(profile.MF)
|
|
209 profile.BP = profiles[3]
|
|
210 #print(profile.BP)
|
|
211 profile.ALL = profiles[-3:-1]
|
|
212 #print(profile.ALL)
|
|
213 #c(profile.ALL, profile.CC, profile.MF, profile.BP)
|
|
214 if ("CC" %in% ontoopt) {
|
|
215 if (grepl("PNG", plotopt)) {
|
|
216 plotPNG(profile.CC=profile.CC, per=per, title=title)
|
|
217 }
|
|
218 if (grepl("JPEG", plotopt)) {
|
|
219 plotJPEG(profile.CC = profile.CC, per=per, title=title)
|
|
220 }
|
|
221 if (grepl("PDF", plotopt)) {
|
|
222 plotPDF(profile.CC = profile.CC, per=per, title=title)
|
|
223 }
|
|
224 }
|
|
225 if ("MF" %in% ontoopt) {
|
|
226 if (grepl("PNG", plotopt)) {
|
|
227 plotPNG(profile.MF = profile.MF, per=per, title=title)
|
|
228 }
|
|
229 if (grepl("JPEG", plotopt)) {
|
|
230 plotJPEG(profile.MF = profile.MF, per=per, title=title)
|
|
231 }
|
|
232 if (grepl("PDF", plotopt)) {
|
|
233 plotPDF(profile.MF = profile.MF, per=per, title=title)
|
|
234 }
|
|
235 }
|
|
236 if ("BP" %in% ontoopt) {
|
|
237 if (grepl("PNG", plotopt)) {
|
|
238 plotPNG(profile.BP = profile.BP, per=per, title=title)
|
|
239 }
|
|
240 if (grepl("JPEG", plotopt)) {
|
|
241 plotJPEG(profile.BP = profile.BP, per=per, title=title)
|
|
242 }
|
|
243 if (grepl("PDF", plotopt)) {
|
|
244 plotPDF(profile.BP = profile.BP, per=per, title=title)
|
|
245 }
|
|
246 }
|
|
247
|
|
248 #if (grepl("PNG", plotopt)) {
|
|
249 # plotPNG(profile.ALL = profile.ALL, per=per, title=title)
|
|
250 #}
|
|
251 #if (grepl("JPEG", plotopt)) {
|
|
252 # plotJPEG(profile.ALL = profile.ALL, per=per, title=title)
|
|
253 #}
|
|
254 #if (grepl("PDF", plotopt)) {
|
|
255 # plotPDF(profile.ALL = profile.ALL, per=per, title=title)
|
|
256 #}
|
|
257 }
|
|
258
|
|
259 }
|
|
260
|
|
261 goprofiles()
|
|
262
|
|
263 #Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title"
|