comparison BiodbFactory.R @ 6:f86fec07f392 draft default tip

planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author prog
date Fri, 22 Feb 2019 16:04:22 -0500
parents fb9c0409d85c
children
comparison
equal deleted inserted replaced
5:fb9c0409d85c 6:f86fec07f392
1 # vi: fdm=marker
2
3 ##########################
4 # CLASS DECLARATION {{{1 #
5 ##########################
6
7 BiodbFactory <- methods::setRefClass("BiodbFactory", contains = 'BiodbObject', fields = list(.useragent = "character",
8 .conn = "list",
9 .cache.dir = "character",
10 .cache.mode = "character",
11 .debug = "logical",
12 .chunk.size = "integer",
13 .use.env.var = "logical"))
14
15 ###############
16 # CONSTRUCTOR #
17 ###############
18
19 BiodbFactory$methods( initialize = function(useragent = NA_character_, cache.dir = NA_character_, cache.mode = BIODB.CACHE.READ.WRITE, debug = FALSE, chunk.size = NA_integer_, use.env.var = FALSE, ...) {
20
21 .useragent <<- useragent
22 .conn <<- list()
23 .cache.dir <<- cache.dir
24 .cache.mode <<- cache.mode
25 .debug <<- debug
26 .chunk.size <<- as.integer(chunk.size)
27 .use.env.var <<- use.env.var
28
29 callSuper(...) # calls super-class initializer with remaining parameters
30 })
31
32 #######################
33 # PRINT DEBUG MESSAGE #
34 #######################
35
36 BiodbFactory$methods( .print.debug.msg = function(msg) {
37 if (.self$.debug)
38 .print.msg(msg = msg, class = class(.self))
39 })
40
41 ##################
42 # GET USER AGENT #
43 ##################
44
45 BiodbFactory$methods( getUserAgent = function() {
46 return(.self$.useragent)
47 })
48
49 ##################
50 # SET USER AGENT #
51 ##################
52
53 BiodbFactory$methods( setUserAgent = function(useragent) {
54 "Set useragent of BiodbFactory."
55 .useragent <<- useragent
56 })
57
58 ###############
59 # CREATE CONN #
60 ###############
61
62 BiodbFactory$methods( createConn = function(class, url = NA_character_, token = NA_character_) {
63 " Create connection to databases useful for metabolomics."
64 if (class %in% names(.self$.conn))
65 stop(paste0('A connection of type ', class, ' already exists. Please use method getConn() to access it.'))
66
67 # Use environment variables
68 if (.self$.use.env.var) {
69 if (is.na(url))
70 url <- .biodb.get.env.var(c(class, 'URL'))
71 if (is.na(token))
72 token <- .biodb.get.env.var(c(class, 'TOKEN'))
73 }
74
75 # Create connection instance
76 conn <- switch(class,
77 chebi = ChebiConn$new(useragent = .self$.useragent, debug = .self$.debug),
78 kegg = KeggConn$new(useragent = .self$.useragent, debug = .self$.debug),
79 pubchemcomp = PubchemConn$new(useragent = .self$.useragent, db = BIODB.PUBCHEMCOMP, debug = .self$.debug),
80 pubchemsub = PubchemConn$new(useragent = .self$.useragent, db = BIODB.PUBCHEMSUB, debug = .self$.debug),
81 hmdb = HmdbConn$new(useragent = .self$.useragent, debug = .self$.debug),
82 chemspider = ChemspiderConn$new(useragent = .self$.useragent, debug = .self$.debug, token = token),
83 enzyme = EnzymeConn$new(useragent = .self$.useragent, debug = .self$.debug),
84 lipidmaps = LipidmapsConn$new(useragent = .self$.useragent, debug = .self$.debug),
85 mirbase = MirbaseConn$new(useragent = .self$.useragent, debug = .self$.debug),
86 ncbigene = NcbigeneConn$new(useragent = .self$.useragent, debug = .self$.debug),
87 ncbiccds = NcbiccdsConn$new(useragent = .self$.useragent, debug = .self$.debug),
88 uniprot = UniprotConn$new(useragent = .self$.useragent, debug = .self$.debug),
89 massbank = MassbankConn$new(useragent = .self$.useragent, url = url, debug = .self$.debug),
90 massfiledb = MassFiledbConn$new(file = url, debug = .self$.debug),
91 peakforest = PeakforestConn$new(useragent = .self$.useragent, debug = .self$.debug),
92 NULL)
93
94 # Unknown class
95 if (is.null(conn))
96 stop(paste0("Unknown r-biodb class \"", class,"\"."))
97
98 # Register new class
99 .self$.conn[[class]] <- conn
100
101 return (.self$.conn[[class]])
102 })
103
104 ############
105 # GET CONN #
106 ############
107
108 BiodbFactory$methods( getConn = function(class) {
109 "Get connection to a database."
110
111 if ( ! class %in% names(.self$.conn))
112 .self$createConn(class)
113
114 return (.self$.conn[[class]])
115 })
116
117 ################
118 # CREATE ENTRY #
119 ################
120
121 BiodbFactory$methods( createEntry = function(class, id = NULL, content = NULL, drop = TRUE) {
122 "Create Entry from a database by id."
123
124 is.null(id) && is.null(content) && stop("One of id or content must be set.")
125 ! is.null(id) && ! is.null(content) && stop("id and content cannot be both set.")
126
127 # Debug
128 .self$.print.debug.msg(paste0("Creating ", if (is.null(id)) length(content) else length(id), " entries from ", if (is.null(id)) "contents" else paste("ids", paste(if (length(id) > 10) id[1:10] else id, collapse = ", ")), "..."))
129
130 # Get content
131 if ( ! is.null(id))
132 content <- .self$getEntryContent(class, id)
133 conn <- .self$getConn(class)
134 entry <- conn$createEntry(content = content, drop = drop)
135
136 # Set factory
137 .self$.print.debug.msg(paste0("Setting factory reference into entries..."))
138 for (e in c(entry))
139 if ( ! is.null(e))
140 e$setFactory(.self)
141
142 return(entry)
143 })
144
145 ########################
146 # GET CACHE FILE PATHS #
147 ########################
148
149 BiodbFactory$methods( .get.cache.file.paths = function(class, id) {
150
151 # Get extension
152 ext <- .self$getConn(class)$getEntryContentType()
153
154 # Set filenames
155 filenames <- vapply(id, function(x) { if (is.na(x)) NA_character_ else paste0(class, '-', x, '.', ext) }, FUN.VALUE = '')
156
157 # set file paths
158 file.paths <- vapply(filenames, function(x) { if (is.na(x)) NA_character_ else file.path(.self$.cache.dir, x) }, FUN.VALUE = '')
159
160 # Create cache dir if needed
161 if ( ! is.na(.self$.cache.dir) && ! file.exists(.self$.cache.dir))
162 dir.create(.self$.cache.dir)
163
164 return(file.paths)
165 })
166
167 ###########################
168 # LOAD CONTENT FROM CACHE #
169 ###########################
170
171 BiodbFactory$methods( .load.content.from.cache = function(class, id) {
172
173 content <- NULL
174
175 # Read contents from files
176 file.paths <- .self$.get.cache.file.paths(class, id)
177 content <- lapply(file.paths, function(x) { if (is.na(x)) NA_character_ else ( if (file.exists(x)) paste(readLines(x), collapse = "\n") else NULL )} )
178
179 return(content)
180 })
181
182 ############################
183 # IS CACHE READING ENABLED #
184 ############################
185
186 BiodbFactory$methods( .is.cache.reading.enabled = function() {
187 return( ! is.na(.self$.cache.dir) && .self$.cache.mode %in% c(BIODB.CACHE.READ.ONLY, BIODB.CACHE.READ.WRITE))
188 })
189
190 ############################
191 # IS CACHE WRITING ENABLED #
192 ############################
193
194 BiodbFactory$methods( .is.cache.writing.enabled = function() {
195 return( ! is.na(.self$.cache.dir) && .self$.cache.mode %in% c(BIODB.CACHE.WRITE.ONLY, BIODB.CACHE.READ.WRITE))
196 })
197
198 #########################
199 # SAVE CONTENT TO CACHE #
200 #########################
201
202 BiodbFactory$methods( .save.content.to.cache = function(class, id, content) {
203
204 # Write contents into files
205 file.paths <- .self$.get.cache.file.paths(class, id)
206 mapply(function(c, f) { if ( ! is.null(c)) writeLines(c, f) }, content, file.paths)
207 })
208
209 #####################
210 # GET ENTRY CONTENT #
211 #####################
212
213 BiodbFactory$methods( getEntryContent = function(class, id) {
214
215 # Debug
216 .self$.print.debug.msg(paste0("Get entry content(s) for ", length(id)," id(s)..."))
217
218 # Initialize content
219 if (.self$.is.cache.reading.enabled()) {
220 content <- .self$.load.content.from.cache(class, id)
221 missing.ids <- id[vapply(content, is.null, FUN.VALUE = TRUE)]
222 }
223 else {
224 content <- lapply(id, as.null)
225 missing.ids <- id
226 }
227
228 # Remove duplicates
229 n.duplicates <- sum(duplicated(missing.ids))
230 missing.ids <- missing.ids[ ! duplicated(missing.ids)]
231
232 # Debug
233 if (any(is.na(id)))
234 .self$.print.debug.msg(paste0(sum(is.na(id)), " entry ids are NA."))
235 if (.self$.is.cache.reading.enabled()) {
236 .self$.print.debug.msg(paste0(sum( ! is.na(id)) - length(missing.ids), " entry content(s) loaded from cache."))
237 if (n.duplicates > 0)
238 .self$.print.debug.msg(paste0(n.duplicates, " entry ids, whose content needs to be fetched, are duplicates."))
239 .self$.print.debug.msg(paste0(length(missing.ids), " entry content(s) need to be fetched."))
240 }
241
242 # Get contents
243 if (length(missing.ids) > 0) {
244
245 # Use connector to get missing contents
246 conn <- .self$getConn(class)
247
248 # Divide list of missing ids in chunks (in order to save in cache regularly)
249 chunks.of.missing.ids = if (is.na(.self$.chunk.size)) list(missing.ids) else split(missing.ids, ceiling(seq_along(missing.ids) / .self$.chunk.size))
250
251 # Loop on chunks
252 missing.contents <- NULL
253 for (ch.missing.ids in chunks.of.missing.ids) {
254
255 ch.missing.contents <- conn$getEntryContent(ch.missing.ids)
256
257 # Save to cache
258 if ( ! is.null(ch.missing.contents) && .self$.is.cache.writing.enabled())
259 .self$.save.content.to.cache(class, ch.missing.ids, ch.missing.contents)
260
261 # Append
262 missing.contents <- c(missing.contents, ch.missing.contents)
263
264 # Debug
265 if (.self$.is.cache.reading.enabled())
266 .self$.print.debug.msg(paste0("Now ", length(missing.ids) - length(missing.contents)," id(s) left to be retrieved..."))
267 }
268
269 # Merge content and missing.contents
270 content[id %in% missing.ids] <- vapply(id[id %in% missing.ids], function(x) missing.contents[missing.ids %in% x], FUN.VALUE = '')
271 }
272
273 return(content)
274 })