Mercurial > repos > proteore > proteore_goprofiles
comparison goprofiles.R @ 8:386145573c19 draft
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
author | proteore |
---|---|
date | Tue, 18 Dec 2018 09:54:57 -0500 |
parents | 3e138d54c105 |
children | 948fecb6a40b |
comparison
equal
deleted
inserted
replaced
7:3e138d54c105 | 8:386145573c19 |
---|---|
2 | 2 |
3 # Load necessary libraries | 3 # Load necessary libraries |
4 suppressMessages(library(goProfiles,quietly = TRUE)) | 4 suppressMessages(library(goProfiles,quietly = TRUE)) |
5 | 5 |
6 # Read file and return file content as data.frame | 6 # Read file and return file content as data.frame |
7 readfile = function(filename, header) { | 7 read_file <- function(path,header){ |
8 if (header == "true") { | 8 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) |
9 # Read only first line of the file as header: | 9 if (inherits(file,"try-error")){ |
10 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") | 10 stop("File not found !") |
11 #Read the data of the files (skipping the first row) | 11 }else{ |
12 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") | 12 return(file) |
13 # Remove empty rows | |
14 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | |
15 #And assign the header to the data | |
16 names(file) <- headers | |
17 } | 13 } |
18 else { | 14 } |
19 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") | 15 |
20 # Remove empty rows | 16 #convert a string to boolean |
21 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | 17 str2bool <- function(x){ |
18 if (any(is.element(c("t","true"),tolower(x)))){ | |
19 return (TRUE) | |
20 }else if (any(is.element(c("f","false"),tolower(x)))){ | |
21 return (FALSE) | |
22 }else{ | |
23 return(NULL) | |
22 } | 24 } |
23 return(file) | |
24 } | 25 } |
25 | 26 |
26 check_ids <- function(vector,type) { | 27 check_ids <- function(vector,type) { |
27 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" | 28 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" |
28 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" | 29 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" |
47 | 48 |
48 if (species=="org.Hs.eg.db"){ | 49 if (species=="org.Hs.eg.db"){ |
49 package=org.Hs.eg.db | 50 package=org.Hs.eg.db |
50 } else if (species=="org.Mm.eg.db"){ | 51 } else if (species=="org.Mm.eg.db"){ |
51 package=org.Mm.eg.db | 52 package=org.Mm.eg.db |
53 } else if (species=="org.Rn.eg.db"){ | |
54 package=org.Rn.eg.db | |
52 } | 55 } |
53 | |
54 | |
55 | 56 |
56 # Check if level is number | 57 # Check if level is number |
57 if (! as.numeric(level) %% 1 == 0) { | 58 if (! as.numeric(level) %% 1 == 0) { |
58 stop("Please enter an integer for level") | 59 stop("Please enter an integer for level") |
59 } else { | 60 } else { |
73 } | 74 } |
74 print(id[[1]]) | 75 print(id[[1]]) |
75 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] | 76 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] |
76 # IDs that have NA ENTREZID | 77 # IDs that have NA ENTREZID |
77 NAs = id$UNIPROT[which(is.na(id$ENTREZID))] | 78 NAs = id$UNIPROT[which(is.na(id$ENTREZID))] |
78 print("IDs unable to convert to ENTREZID: ") | 79 #print("IDs unable to convert to ENTREZID: ") |
79 print(NAs) | 80 #print(NAs) |
80 } | 81 } |
81 | 82 |
82 # Create basic profiles | 83 # Create basic profiles |
83 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) | 84 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) |
84 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) | 85 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) |
89 # printProfiles(profile) | 90 # printProfiles(profile) |
90 | 91 |
91 return(c(profile.CC, profile.MF, profile.BP, profile.ALL)) | 92 return(c(profile.CC, profile.MF, profile.BP, profile.ALL)) |
92 } | 93 } |
93 | 94 |
94 # Plot profiles to PNG | 95 make_plot <- function(profile,percent,title,onto,plot_opt){ |
95 plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | 96 |
96 if (!is.null(profile.CC)) { | 97 if (plot_opt == "PDF") { |
97 png("profile.CC.png") | 98 file_name=paste("profile_",onto,".pdf",collapse="",sep="") |
98 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | 99 pdf(file_name) |
99 dev.off() | 100 } else if (plot_opt == "JPEG"){ |
101 file_name=paste("profile_",onto,".jpeg",collapse="",sep="") | |
102 jpeg(file_name) | |
103 } else if (plot_opt == "PNG"){ | |
104 file_name=paste("profile_",onto,".png",collapse="",sep="") | |
105 png(file_name) | |
100 } | 106 } |
101 if (!is.null(profile.BP)) { | 107 plotProfiles(profile, percentage=percent, multiplePlots=FALSE, aTitle=title) |
102 png("profile.BP.png") | 108 dev.off() |
103 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
104 dev.off() | |
105 } | |
106 if (!is.null(profile.MF)) { | |
107 png("profile.MF.png") | |
108 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
109 dev.off() | |
110 } | |
111 if (!is.null(profile.ALL)) { | |
112 png("profile.ALL.png") | |
113 plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title) | |
114 dev.off() | |
115 } | |
116 } | |
117 | |
118 # Plot profiles to JPEG | |
119 plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | |
120 if (!is.null(profile.CC)) { | |
121 jpeg("profile.CC.jpeg") | |
122 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | |
123 dev.off() | |
124 } | |
125 if (!is.null(profile.BP)) { | |
126 jpeg("profile.BP.jpeg") | |
127 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
128 dev.off() | |
129 } | |
130 if (!is.null(profile.MF)) { | |
131 jpeg("profile.MF.jpeg") | |
132 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
133 dev.off() | |
134 } | |
135 if (!is.null(profile.ALL)) { | |
136 jpeg("profile.ALL.jpeg") | |
137 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title) | |
138 dev.off() | |
139 } | |
140 } | |
141 | |
142 # Plot profiles to PDF | |
143 plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | |
144 if (!is.null(profile.CC)) { | |
145 pdf("profile.CC.pdf") | |
146 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | |
147 dev.off() | |
148 } | |
149 if (!is.null(profile.BP)) { | |
150 pdf("profile.BP.pdf") | |
151 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
152 dev.off() | |
153 } | |
154 if (!is.null(profile.MF)) { | |
155 pdf("profile.MF.pdf") | |
156 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
157 dev.off() | |
158 } | |
159 if (!is.null(profile.ALL)) { | |
160 #print("all") | |
161 pdf("profile.ALL.pdf") | |
162 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title) | |
163 dev.off() | |
164 } | |
165 } | 109 } |
166 | 110 |
167 goprofiles = function() { | 111 goprofiles = function() { |
168 args <- commandArgs(TRUE) | 112 args <- commandArgs(TRUE) |
169 if(length(args)<1) { | 113 if(length(args)<1) { |
210 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | 154 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { |
211 stop("Please enter an integer for level") | 155 stop("Please enter an integer for level") |
212 } else { | 156 } else { |
213 ncol = as.numeric(gsub("c", "", ncol)) | 157 ncol = as.numeric(gsub("c", "", ncol)) |
214 } | 158 } |
215 header = args$header | 159 header = str2bool(args$header) |
216 # Get file content | 160 # Get file content |
217 file = readfile(filename, header) | 161 file = read_file(filename, header) |
218 # Extract Protein IDs list | 162 # Extract Protein IDs list |
219 input = unlist(strsplit(as.character(file[,ncol]),";")) | 163 input = unlist(strsplit(as.character(file[,ncol]),";")) |
220 input = input [which(!is.na(input))] | 164 input = input [which(!is.na(input))] |
221 } | 165 } |
222 | 166 |
223 if (! any(check_ids(input,id_type))){ | 167 if (! any(check_ids(input,id_type))){ |
224 stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file")) | 168 stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file")) |
225 } | 169 } |
226 | 170 |
227 ontoopt = strsplit(args$onto_opt, ",")[[1]] | 171 ontoopt = strsplit(args$onto_opt, ",")[[1]] |
228 #print(ontoopt) | 172 onto_pos = as.integer(gsub("BP",3,gsub("MF",2,gsub("CC",1,ontoopt)))) |
229 #plotopt = strsplit(args[3], ",") | |
230 plotopt = args$plot_opt | 173 plotopt = args$plot_opt |
231 level = args$level | 174 level = args$level |
232 per = as.logical(args$per) | 175 per = as.logical(args$per) |
233 title = args$title | 176 title = args$title |
234 duplicate = args$duplicate | 177 duplicate = args$duplicate |
235 text_output = args$text_output | 178 text_output = args$text_output |
236 species=args$species | 179 species=args$species |
237 | 180 |
238 profiles = getprofile(input, id_type, level, duplicate,species) | 181 profiles = getprofile(input, id_type, level, duplicate,species) |
239 profile.CC = profiles[1] | 182 |
240 #print(profile.CC) | 183 for (index in onto_pos) { |
241 profile.MF = profiles[2] | 184 onto = names(profiles[index]) |
242 #print(profile.MF) | 185 profile=profiles[index] |
243 profile.BP = profiles[3] | 186 make_plot(profile,per,title,onto,plotopt) |
244 #print(profile.BP) | 187 text_output=paste("goProfiles_",onto,"_",title,".tsv",sep="",collapse="") |
245 profile.ALL = profiles[-3:-1] | 188 profile = as.data.frame(profile) |
246 #print(profile.ALL) | 189 profile <- as.data.frame(apply(profile, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" and " " to NA |
247 #c(profile.ALL, profile.CC, profile.MF, profile.BP) | 190 write.table(profile, text_output, sep="\t", row.names = FALSE, quote=FALSE, col.names = T) |
248 | |
249 if ("CC" %in% ontoopt) { | |
250 write.table(profile.CC, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) | |
251 if (grepl("PNG", plotopt)) { | |
252 plotPNG(profile.CC=profile.CC, per=per, title=title) | |
253 } | |
254 if (grepl("JPEG", plotopt)) { | |
255 plotJPEG(profile.CC = profile.CC, per=per, title=title) | |
256 } | |
257 if (grepl("PDF", plotopt)) { | |
258 plotPDF(profile.CC = profile.CC, per=per, title=title) | |
259 } | |
260 } | |
261 if ("MF" %in% ontoopt) { | |
262 write.table(profile.MF, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) | |
263 if (grepl("PNG", plotopt)) { | |
264 plotPNG(profile.MF = profile.MF, per=per, title=title) | |
265 } | |
266 if (grepl("JPEG", plotopt)) { | |
267 plotJPEG(profile.MF = profile.MF, per=per, title=title) | |
268 } | |
269 if (grepl("PDF", plotopt)) { | |
270 plotPDF(profile.MF = profile.MF, per=per, title=title) | |
271 } | |
272 } | |
273 if ("BP" %in% ontoopt) { | |
274 write.table(profile.BP, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) | |
275 if (grepl("PNG", plotopt)) { | |
276 plotPNG(profile.BP = profile.BP, per=per, title=title) | |
277 } | |
278 if (grepl("JPEG", plotopt)) { | |
279 plotJPEG(profile.BP = profile.BP, per=per, title=title) | |
280 } | |
281 if (grepl("PDF", plotopt)) { | |
282 plotPDF(profile.BP = profile.BP, per=per, title=title) | |
283 } | |
284 } | 191 } |
285 } | 192 } |
286 | 193 |
287 goprofiles() | 194 goprofiles() |