Mercurial > repos > proteore > proteore_goprofiles
comparison goprofiles.R @ 5:781072a65600 draft
planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
author | proteore |
---|---|
date | Wed, 19 Sep 2018 05:49:06 -0400 |
parents | 715002a394ec |
children | 6afe8166a9a4 |
comparison
equal
deleted
inserted
replaced
4:715002a394ec | 5:781072a65600 |
---|---|
1 # Load necessary libraries | 1 # Load necessary libraries |
2 library(org.Hs.eg.db) | 2 library(goProfiles,quietly = TRUE) |
3 library(goProfiles) | |
4 | 3 |
5 # Read file and return file content as data.frame | 4 # Read file and return file content as data.frame |
6 readfile = function(filename, header) { | 5 readfile = function(filename, header) { |
7 if (header == "true") { | 6 if (header == "true") { |
8 # Read only first line of the file as header: | 7 # Read only first line of the file as header: |
20 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | 19 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] |
21 } | 20 } |
22 return(file) | 21 return(file) |
23 } | 22 } |
24 | 23 |
25 getprofile = function(ids, id_type, level, duplicate) { | 24 check_ids <- function(vector,type) { |
25 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" | |
26 entrez_id = "^'[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" | |
27 if (type == "Entrez"){ | |
28 return(grepl(entrez_id,vector)) | |
29 } else if (type == "UniProt") { | |
30 return(grepl(uniprot_pattern,vector)) | |
31 } | |
32 } | |
33 | |
34 getprofile = function(ids, id_type, level, duplicate,species) { | |
26 #################################################################### | 35 #################################################################### |
27 # Arguments | 36 # Arguments |
28 # - ids: list of input IDs | 37 # - ids: list of input IDs |
29 # - id_type: type of input IDs (UniProt/ENTREZID) | 38 # - id_type: type of input IDs (UniProt/ENTREZID) |
30 # - level | 39 # - level |
31 # - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE) | 40 # - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE) |
41 # - species | |
32 #################################################################### | 42 #################################################################### |
43 | |
44 library(species, character.only = TRUE, quietly = TRUE) | |
45 | |
46 if (species=="org.Hs.eg.db"){ | |
47 package=org.Hs.eg.db | |
48 } else if (species=="org.Mm.eg.db"){ | |
49 package=org.Mm.eg.db | |
50 } | |
51 | |
52 | |
33 | 53 |
34 # Check if level is number | 54 # Check if level is number |
35 if (! as.numeric(level) %% 1 == 0) { | 55 if (! as.numeric(level) %% 1 == 0) { |
36 stop("Please enter an integer for level") | 56 stop("Please enter an integer for level") |
37 } | 57 } else { |
38 else { | |
39 level = as.numeric(level) | 58 level = as.numeric(level) |
40 } | 59 } |
41 #genes = as.vector(file[,ncol]) | 60 #genes = as.vector(file[,ncol]) |
42 | 61 |
43 # Extract Gene Entrez ID | 62 # Extract Gene Entrez ID |
44 if (id_type == "Entrez") { | 63 if (id_type == "Entrez") { |
45 id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first") | 64 id = select(package, ids, "ENTREZID", multiVals = "first") |
46 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] | 65 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] |
47 } | 66 } else { |
48 else { | |
49 genes_ids = c() | 67 genes_ids = c() |
50 id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first") | 68 id = select(package, ids, "ENTREZID", "UNIPROT", multiVals = "first") |
51 if (duplicate == "TRUE") { | 69 if (duplicate == "TRUE") { |
52 id = unique(id) | 70 id = unique(id) |
53 } | 71 } |
54 print(id[[1]]) | 72 print(id[[1]]) |
55 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] | 73 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] |
58 print("IDs unable to convert to ENTREZID: ") | 76 print("IDs unable to convert to ENTREZID: ") |
59 print(NAs) | 77 print(NAs) |
60 } | 78 } |
61 | 79 |
62 # Create basic profiles | 80 # Create basic profiles |
63 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | 81 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) |
64 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | 82 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) |
65 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | 83 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) |
66 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | 84 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) |
67 | 85 |
68 # Print profile | 86 # Print profile |
69 # printProfiles(profile) | 87 # printProfiles(profile) |
70 | 88 |
71 return(c(profile.CC, profile.MF, profile.BP, profile.ALL)) | 89 return(c(profile.CC, profile.MF, profile.BP, profile.ALL)) |
163 --plot_opt: plot extension options (PDF/JPEG/PNG) | 181 --plot_opt: plot extension options (PDF/JPEG/PNG) |
164 --level: 1-3 | 182 --level: 1-3 |
165 --per | 183 --per |
166 --title: title of the plot | 184 --title: title of the plot |
167 --duplicate: remove dupliate input IDs (true/false) | 185 --duplicate: remove dupliate input IDs (true/false) |
168 --text_output: text output filename \n") | 186 --text_output: text output filename \n |
187 --species") | |
169 q(save="no") | 188 q(save="no") |
170 } | 189 } |
171 | 190 |
172 # Parse arguments | 191 # Parse arguments |
173 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | 192 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
174 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | 193 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
175 args <- as.list(as.character(argsDF$V2)) | 194 args <- as.list(as.character(argsDF$V2)) |
176 names(args) <- argsDF$V1 | 195 names(args) <- argsDF$V1 |
177 | 196 |
197 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda") | |
198 #load("/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda") | |
199 | |
200 id_type = args$id_type | |
178 input_type = args$input_type | 201 input_type = args$input_type |
179 if (input_type == "text") { | 202 if (input_type == "text") { |
180 input = strsplit(args$input, "[ \t\n]+")[[1]] | 203 input = strsplit(args$input, "[ \t\n]+")[[1]] |
181 } | 204 } else if (input_type == "file") { |
182 else if (input_type == "file") { | |
183 filename = args$input | 205 filename = args$input |
184 ncol = args$ncol | 206 ncol = args$ncol |
185 # Check ncol | 207 # Check ncol |
186 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | 208 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { |
187 stop("Please enter an integer for level") | 209 stop("Please enter an integer for level") |
188 } | 210 } else { |
189 else { | |
190 ncol = as.numeric(gsub("c", "", ncol)) | 211 ncol = as.numeric(gsub("c", "", ncol)) |
191 } | 212 } |
192 header = args$header | 213 header = args$header |
193 # Get file content | 214 # Get file content |
194 file = readfile(filename, header) | 215 file = readfile(filename, header) |
196 input = c() | 217 input = c() |
197 for (row in as.character(file[,ncol])) { | 218 for (row in as.character(file[,ncol])) { |
198 input = c(input, strsplit(row, ";")[[1]][1]) | 219 input = c(input, strsplit(row, ";")[[1]][1]) |
199 } | 220 } |
200 } | 221 } |
201 id_type = args$id_type | 222 |
223 if (! any(check_ids(input,id_type))){ | |
224 stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file")) | |
225 } | |
226 | |
202 ontoopt = strsplit(args$onto_opt, ",")[[1]] | 227 ontoopt = strsplit(args$onto_opt, ",")[[1]] |
203 #print(ontoopt) | 228 #print(ontoopt) |
204 #plotopt = strsplit(args[3], ",") | 229 #plotopt = strsplit(args[3], ",") |
205 plotopt = args$plot_opt | 230 plotopt = args$plot_opt |
206 level = args$level | 231 level = args$level |
207 per = as.logical(args$per) | 232 per = as.logical(args$per) |
208 title = args$title | 233 title = args$title |
209 duplicate = args$duplicate | 234 duplicate = args$duplicate |
210 text_output = args$text_output | 235 text_output = args$text_output |
211 | 236 species=args$species |
212 profiles = getprofile(input, id_type, level, duplicate) | 237 |
238 profiles = getprofile(input, id_type, level, duplicate,species) | |
213 profile.CC = profiles[1] | 239 profile.CC = profiles[1] |
214 #print(profile.CC) | 240 #print(profile.CC) |
215 profile.MF = profiles[2] | 241 profile.MF = profiles[2] |
216 #print(profile.MF) | 242 #print(profile.MF) |
217 profile.BP = profiles[3] | 243 profile.BP = profiles[3] |