Mercurial > repos > prog > lcmsmatching

diff BiodbFactory.R @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author: prog
date: Fri, 22 Feb 2019 16:04:22 -0500
parents: fb9c0409d85c
--- a/BiodbFactory.R	Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,274 +0,0 @@
-# vi: fdm=marker
-
-##########################
-# CLASS DECLARATION {{{1 #
-##########################
-
-BiodbFactory <- methods::setRefClass("BiodbFactory", contains = 'BiodbObject', fields = list(.useragent = "character",
-														  .conn = "list",
-														  .cache.dir = "character",
-														  .cache.mode = "character",
-														  .debug = "logical",
-														  .chunk.size = "integer",
-														  .use.env.var = "logical"))
-
-###############
-# CONSTRUCTOR #
-###############
-
-BiodbFactory$methods( initialize = function(useragent = NA_character_, cache.dir = NA_character_, cache.mode = BIODB.CACHE.READ.WRITE, debug = FALSE, chunk.size = NA_integer_, use.env.var = FALSE, ...) {
-
-	.useragent <<- useragent
-	.conn <<- list()
-	.cache.dir <<- cache.dir
-	.cache.mode <<- cache.mode
-	.debug <<- debug
-	.chunk.size <<- as.integer(chunk.size)
-	.use.env.var <<- use.env.var
-
-	callSuper(...) # calls super-class initializer with remaining parameters
-})
-
-#######################
-# PRINT DEBUG MESSAGE #
-#######################
-
-BiodbFactory$methods( .print.debug.msg = function(msg) {
-	if (.self$.debug)
-		.print.msg(msg = msg, class = class(.self))
-})
-
-##################
-# GET USER AGENT #
-##################
-
-BiodbFactory$methods( getUserAgent = function() {
-	return(.self$.useragent)
-})
-
-##################
-# SET USER AGENT #
-##################
-
-	BiodbFactory$methods( setUserAgent = function(useragent) {
-	"Set useragent of BiodbFactory."
-	.useragent <<- useragent
-})
-
-###############
-# CREATE CONN #
-###############
-
-BiodbFactory$methods( createConn = function(class, url = NA_character_, token = NA_character_) {
-    " Create connection to databases useful for metabolomics."
-	if (class %in% names(.self$.conn))
-		stop(paste0('A connection of type ', class, ' already exists. Please use method getConn() to access it.'))
-
-	# Use environment variables
-	if (.self$.use.env.var) {
-		if (is.na(url))
-			url <- .biodb.get.env.var(c(class, 'URL'))
-		if (is.na(token))
-			token <- .biodb.get.env.var(c(class, 'TOKEN'))
-	}
-
-	# Create connection instance
-	conn <- switch(class,
-		            chebi       = ChebiConn$new(useragent = .self$.useragent, debug = .self$.debug),
-		            kegg        = KeggConn$new(useragent = .self$.useragent, debug = .self$.debug),
-		            pubchemcomp = PubchemConn$new(useragent = .self$.useragent, db = BIODB.PUBCHEMCOMP, debug = .self$.debug),
-		            pubchemsub  = PubchemConn$new(useragent = .self$.useragent, db = BIODB.PUBCHEMSUB, debug = .self$.debug),
-		            hmdb        = HmdbConn$new(useragent = .self$.useragent, debug = .self$.debug),
-		            chemspider  = ChemspiderConn$new(useragent = .self$.useragent, debug = .self$.debug, token = token),
-		            enzyme      = EnzymeConn$new(useragent = .self$.useragent, debug = .self$.debug),
-		            lipidmaps   = LipidmapsConn$new(useragent = .self$.useragent, debug = .self$.debug),
-		            mirbase     = MirbaseConn$new(useragent = .self$.useragent, debug = .self$.debug),
-		            ncbigene    = NcbigeneConn$new(useragent = .self$.useragent, debug = .self$.debug),
-		            ncbiccds    = NcbiccdsConn$new(useragent = .self$.useragent, debug = .self$.debug),
-		            uniprot     = UniprotConn$new(useragent = .self$.useragent, debug = .self$.debug),
-		            massbank    = MassbankConn$new(useragent = .self$.useragent, url = url, debug = .self$.debug),
-					massfiledb  = MassFiledbConn$new(file = url, debug = .self$.debug),
-					peakforest  = PeakforestConn$new(useragent = .self$.useragent, debug = .self$.debug),
-		      	    NULL)
-
-	# Unknown class
-	if (is.null(conn))
-		stop(paste0("Unknown r-biodb class \"", class,"\"."))
-
-	# Register new class
-	.self$.conn[[class]] <- conn
-
-	return (.self$.conn[[class]])
-})
-
-############
-# GET CONN #
-############
-
-BiodbFactory$methods( getConn = function(class) {
-	"Get connection to a database."
-
-	if ( ! class %in% names(.self$.conn))
-		.self$createConn(class)
-
-	return (.self$.conn[[class]])
-})
-
-################
-# CREATE ENTRY #
-################
-
-BiodbFactory$methods( createEntry = function(class, id = NULL, content = NULL, drop = TRUE) {
-	"Create Entry from a database by id."
-
-	is.null(id) && is.null(content) && stop("One of id or content must be set.")
-	! is.null(id) && ! is.null(content) && stop("id and content cannot be both set.")
-
-	# Debug
-	.self$.print.debug.msg(paste0("Creating ", if (is.null(id)) length(content) else length(id), " entries from ", if (is.null(id)) "contents" else paste("ids", paste(if (length(id) > 10) id[1:10] else id, collapse = ", ")), "..."))
-
-	# Get content
-	if ( ! is.null(id))
-		content <- .self$getEntryContent(class, id)
-	conn <- .self$getConn(class)
-	entry <- conn$createEntry(content = content, drop = drop)
-
-	# Set factory
-	.self$.print.debug.msg(paste0("Setting factory reference into entries..."))
-	for (e in c(entry))
-		if ( ! is.null(e))
-			e$setFactory(.self)
-
-	return(entry)
-})
-
-########################
-# GET CACHE FILE PATHS #
-########################
-
-BiodbFactory$methods( .get.cache.file.paths = function(class, id) {
-
-	# Get extension
-	ext <- .self$getConn(class)$getEntryContentType()
-
-	# Set filenames
-	filenames <- vapply(id, function(x) { if (is.na(x)) NA_character_ else paste0(class, '-', x, '.', ext) }, FUN.VALUE = '')
-
-	# set file paths
-	file.paths <- vapply(filenames, function(x) { if (is.na(x)) NA_character_ else file.path(.self$.cache.dir, x) }, FUN.VALUE = '')
-
-	# Create cache dir if needed
-	if ( ! is.na(.self$.cache.dir) && ! file.exists(.self$.cache.dir))
-		dir.create(.self$.cache.dir)
-
-	return(file.paths)
-})
-
-###########################
-# LOAD CONTENT FROM CACHE #
-###########################
-
-BiodbFactory$methods( .load.content.from.cache = function(class, id) {
-
-	content <- NULL
-
-	# Read contents from files
-	file.paths <- .self$.get.cache.file.paths(class, id)
-	content <- lapply(file.paths, function(x) { if (is.na(x)) NA_character_ else ( if (file.exists(x)) paste(readLines(x), collapse = "\n") else NULL )} )
-
-	return(content)
-})
-
-############################
-# IS CACHE READING ENABLED #
-############################
-
-BiodbFactory$methods( .is.cache.reading.enabled = function() {
-	return( ! is.na(.self$.cache.dir) && .self$.cache.mode %in% c(BIODB.CACHE.READ.ONLY, BIODB.CACHE.READ.WRITE))
-})
-
-############################
-# IS CACHE WRITING ENABLED #
-############################
-
-BiodbFactory$methods( .is.cache.writing.enabled = function() {
-	return( ! is.na(.self$.cache.dir) && .self$.cache.mode %in% c(BIODB.CACHE.WRITE.ONLY, BIODB.CACHE.READ.WRITE))
-})
-
-#########################
-# SAVE CONTENT TO CACHE #
-#########################
-
-BiodbFactory$methods( .save.content.to.cache = function(class, id, content) {
-
-	# Write contents into files
-	file.paths <- .self$.get.cache.file.paths(class, id)
-	mapply(function(c, f) { if ( ! is.null(c)) writeLines(c, f) }, content, file.paths)
-})
-
-#####################
-# GET ENTRY CONTENT #
-#####################
-
-BiodbFactory$methods( getEntryContent = function(class, id) {
-
-	# Debug
-	.self$.print.debug.msg(paste0("Get entry content(s) for ", length(id)," id(s)..."))
-
-	# Initialize content
-	if (.self$.is.cache.reading.enabled()) {
-		content <- .self$.load.content.from.cache(class, id)	
-		missing.ids <- id[vapply(content, is.null, FUN.VALUE = TRUE)]
-	}
-	else {
-		content <- lapply(id, as.null)
-		missing.ids <- id
-	}
-
-	# Remove duplicates
-	n.duplicates <- sum(duplicated(missing.ids))
-	missing.ids <- missing.ids[ ! duplicated(missing.ids)]
-
-	# Debug
-	if (any(is.na(id)))
-		.self$.print.debug.msg(paste0(sum(is.na(id)), " entry ids are NA."))
-	if (.self$.is.cache.reading.enabled()) {
-		.self$.print.debug.msg(paste0(sum( ! is.na(id)) - length(missing.ids), " entry content(s) loaded from cache."))
-		if (n.duplicates > 0)
-			.self$.print.debug.msg(paste0(n.duplicates, " entry ids, whose content needs to be fetched, are duplicates."))
-		.self$.print.debug.msg(paste0(length(missing.ids), " entry content(s) need to be fetched."))
-	}
-
-	# Get contents
-	if (length(missing.ids) > 0) {
-
-		# Use connector to get missing contents
-		conn <- .self$getConn(class)
-
-		# Divide list of missing ids in chunks (in order to save in cache regularly)
-		chunks.of.missing.ids = if (is.na(.self$.chunk.size)) list(missing.ids) else split(missing.ids, ceiling(seq_along(missing.ids) / .self$.chunk.size))
-
-		# Loop on chunks
-		missing.contents <- NULL
-		for (ch.missing.ids in chunks.of.missing.ids) {
-
-			ch.missing.contents <- conn$getEntryContent(ch.missing.ids)
-
-			# Save to cache
-			if ( ! is.null(ch.missing.contents) && .self$.is.cache.writing.enabled())
-				.self$.save.content.to.cache(class, ch.missing.ids, ch.missing.contents)
-
-			# Append
-			missing.contents <- c(missing.contents, ch.missing.contents)
-
-			# Debug
-			if (.self$.is.cache.reading.enabled())
-				.self$.print.debug.msg(paste0("Now ", length(missing.ids) - length(missing.contents)," id(s) left to be retrieved..."))
-		}
-
-		# Merge content and missing.contents
-		content[id %in% missing.ids] <- vapply(id[id %in% missing.ids], function(x) missing.contents[missing.ids %in% x], FUN.VALUE = '')
-	}
-
-	return(content)
-})
author	prog
date	Fri, 22 Feb 2019 16:04:22 -0500
parents	fb9c0409d85c
children