comparison goprofiles.R @ 0:6eeb2fb0c4bd draft default tip

planemo upload
author lnguyen
date Sat, 16 Sep 2017 09:17:07 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6eeb2fb0c4bd
1 # Load necessary libraries
2 library("org.Hs.eg.db", quietly=TRUE)
3 library("goProfiles", quietly=TRUE)
4
5 # Read file and return file content as data.frame?
6 readfile = function(filename, header) {
7 if (header == "true") {
8 # Read only the first two lines of the files as data (without headers):
9 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
10 #print("header")
11 #print(headers)
12 # Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here :
13 #headers_names <- sapply(headers, paste, collapse = "_")
14 #print(headers_names)
15 #Read the data of the files (skipping the first 2 rows):
16 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
17 #print(file[1,])
18 #And assign the headers of step two to the data:
19 names(file) <- headers
20 }
21 else {
22 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
23 }
24 return(file)
25 }
26
27 #filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt"
28 #test = readfile(filename)
29 #str(test)
30 #str(test$Gene.names)
31 getprofile = function(ids, id_type, level) {
32
33 # Check if level is number
34 if (! as.numeric(level) %% 1 == 0) {
35 stop("Please enter an integer for level")
36 }
37 else {
38 level = as.numeric(level)
39 }
40 #genes = as.vector(file[,ncol])
41
42 # Extract Gene Entrez ID
43 if (id_type == "Entrez") {
44 id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first")
45 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
46 }
47 else {
48 genes_ids = c()
49 id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first")
50 #print(id[[1]][1])
51 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
52 # IDs that have NA ENTREZID
53 NAs = id$UNIPROT[which(is.na(id$ENTREZID))]
54 print("IDs unable to convert to ENTREZID: ")
55 print(NAs)
56 }
57 #print(genes_ids)
58 # Convert Protein IDs into entrez ids
59
60 # for (i in 1:length(id$UNIPROT)) {
61 # print(i)
62 # if (is.na(id[[2]][i])) {
63 # print(id[[2]][i])
64 # }
65 # }
66 # a = id[which(id$ENTREZID == "NA"),]
67 # print(a)
68 # print(a$UNIPROT)
69 #print(id[[1]][which(is.na(id$ENTREZID))])
70 #print(genes_ids)
71 # for (gene in genes) {
72 # #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA))
73 # id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT")
74 # print(id)
75 # genes_ids = append(genes_ids, id$ENTREZID)
76 # }
77 #print(genes_ids)
78
79 # Create basic profiles
80 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
81 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
82 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
83 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
84
85 # Print profile
86 # printProfiles(profile)
87
88 return(c(profile.CC, profile.MF, profile.BP, profile.ALL))
89 }
90
91 # Plot profiles to PNG
92 plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
93 if (!is.null(profile.CC)) {
94 png("profile.CC.png")
95 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
96 dev.off()
97 }
98 if (!is.null(profile.BP)) {
99 png("profile.BP.png")
100 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
101 dev.off()
102 }
103 if (!is.null(profile.MF)) {
104 png("profile.MF.png")
105 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
106 dev.off()
107 }
108 if (!is.null(profile.ALL)) {
109 png("profile.ALL.png")
110 plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title)
111 dev.off()
112 }
113 }
114
115 # Plot profiles to JPEG
116 plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
117 if (!is.null(profile.CC)) {
118 jpeg("profile.CC.jpeg")
119 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
120 dev.off()
121 }
122 if (!is.null(profile.BP)) {
123 jpeg("profile.BP.jpeg")
124 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
125 dev.off()
126 }
127 if (!is.null(profile.MF)) {
128 jpeg("profile.MF.jpeg")
129 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
130 dev.off()
131 }
132 if (!is.null(profile.ALL)) {
133 jpeg("profile.ALL.jpeg")
134 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
135 dev.off()
136 }
137 }
138
139 # Plot profiles to PDF
140 plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
141 if (!is.null(profile.CC)) {
142 pdf("profile.CC.pdf")
143 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
144 dev.off()
145 }
146 if (!is.null(profile.BP)) {
147 pdf("profile.BP.pdf")
148 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
149 dev.off()
150 }
151 if (!is.null(profile.MF)) {
152 pdf("profile.MF.pdf")
153 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
154 dev.off()
155 }
156 if (!is.null(profile.ALL)) {
157 #print("all")
158 pdf("profile.ALL.pdf")
159 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
160 dev.off()
161 }
162 }
163
164 goprofiles = function() {
165 args = commandArgs(trailingOnly = TRUE)
166 #print(args)
167 # arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title"
168 if (length(args) != 8) {
169 stop("Not enough/Too many arguments", call. = FALSE)
170 }
171 else {
172 input_type = args[2]
173 if (input_type == "text") {
174 input = strsplit(args[1], "\\s+")[[1]]
175 }
176 else if (input_type == "file") {
177 filename = strsplit(args[1], ",")[[1]][1]
178 ncol = strsplit(args[1], ",")[[1]][2]
179 # Check ncol
180 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
181 stop("Please enter an integer for level")
182 }
183 else {
184 ncol = as.numeric(gsub("c", "", ncol))
185 }
186 header = strsplit(args[1], ",")[[1]][3]
187 # Get file content
188 file = readfile(filename, header)
189 # Extract Protein IDs list
190 input = c()
191 for (row in as.character(file[,ncol])) {
192 input = c(input, strsplit(row, ";")[[1]][1])
193 }
194 }
195 id_type = args[3]
196 ontoopt = strsplit(args[4], ",")[[1]]
197 #print(ontoopt)
198 #plotopt = strsplit(args[3], ",")
199 plotopt = args[5]
200 level = args[6]
201 per = as.logical(args[7])
202 title = args[8]
203
204 profiles = getprofile(input, id_type, level)
205 profile.CC = profiles[1]
206 #print(profile.CC)
207 profile.MF = profiles[2]
208 #print(profile.MF)
209 profile.BP = profiles[3]
210 #print(profile.BP)
211 profile.ALL = profiles[-3:-1]
212 #print(profile.ALL)
213 #c(profile.ALL, profile.CC, profile.MF, profile.BP)
214 if ("CC" %in% ontoopt) {
215 if (grepl("PNG", plotopt)) {
216 plotPNG(profile.CC=profile.CC, per=per, title=title)
217 }
218 if (grepl("JPEG", plotopt)) {
219 plotJPEG(profile.CC = profile.CC, per=per, title=title)
220 }
221 if (grepl("PDF", plotopt)) {
222 plotPDF(profile.CC = profile.CC, per=per, title=title)
223 }
224 }
225 if ("MF" %in% ontoopt) {
226 if (grepl("PNG", plotopt)) {
227 plotPNG(profile.MF = profile.MF, per=per, title=title)
228 }
229 if (grepl("JPEG", plotopt)) {
230 plotJPEG(profile.MF = profile.MF, per=per, title=title)
231 }
232 if (grepl("PDF", plotopt)) {
233 plotPDF(profile.MF = profile.MF, per=per, title=title)
234 }
235 }
236 if ("BP" %in% ontoopt) {
237 if (grepl("PNG", plotopt)) {
238 plotPNG(profile.BP = profile.BP, per=per, title=title)
239 }
240 if (grepl("JPEG", plotopt)) {
241 plotJPEG(profile.BP = profile.BP, per=per, title=title)
242 }
243 if (grepl("PDF", plotopt)) {
244 plotPDF(profile.BP = profile.BP, per=per, title=title)
245 }
246 }
247
248 #if (grepl("PNG", plotopt)) {
249 # plotPNG(profile.ALL = profile.ALL, per=per, title=title)
250 #}
251 #if (grepl("JPEG", plotopt)) {
252 # plotJPEG(profile.ALL = profile.ALL, per=per, title=title)
253 #}
254 #if (grepl("PDF", plotopt)) {
255 # plotPDF(profile.ALL = profile.ALL, per=per, title=title)
256 #}
257 }
258
259 }
260
261 goprofiles()
262
263 #Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title"