view BiodbFactory.R @ 1:253d531a0193 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 36c9d8099c20a1ae848f1337c16564335dd8fb2b
author prog
date Sat, 03 Sep 2016 17:02:01 -0400
parents e66bb061af06
children 20d69a062da3
line wrap: on
line source

if ( ! exists('BiodbFactory')) { # Do not load again if already loaded
	
	library(methods)
	source('biodb-common.R')
	source('ChebiConn.R')
	source('KeggConn.R')
	source('PubchemConn.R')
	source('HmdbConn.R')
	source('ChemspiderConn.R')
	source('EnzymeConn.R')
	source('LipidmapsConn.R')
	source('MirbaseConn.R')
	source('NcbigeneConn.R')
	source('NcbiccdsConn.R')
	source('UniprotConn.R')
	source('MassbankConn.R')
	source('MassFiledbConn.R')

	#####################
	# CLASS DECLARATION #
	#####################
	
	BiodbFactory <- setRefClass("BiodbFactory", fields = list(.useragent = "character", .conn = "list", .cache.dir = "character", .debug = "logical"))
	
	###############
	# CONSTRUCTOR #
	###############
	
	BiodbFactory$methods( initialize = function(useragent = NA_character_, cache.dir = NA_character_, debug = FALSE, ...) {
	
		.useragent <<- useragent
		.conn <<- list()
		.cache.dir <<- cache.dir
		.debug <<- debug

		callSuper(...) # calls super-class initializer with remaining parameters
	})

	#######################
	# PRINT DEBUG MESSAGE #
	#######################

	BiodbFactory$methods( .print.debug.msg = function(msg) {
		if (.self$.debug)
			.print.msg(msg = msg, class = class(.self))
	})

	##################
	# GET USER AGENT #
	##################

	BiodbFactory$methods( getUserAgent = function() {
		return(.self$.useragent)
	})

	##################
	# SET USER AGENT #
	##################

	BiodbFactory$methods( setUserAgent = function(useragent) {
		.useragent <<- useragent
	})

	############
	# GET CONN #
	############

	BiodbFactory$methods( getConn = function(class, url = NA_character_) {

		if ( ! class %in% names(.self$.conn)) {

			# Create connection instance
			conn <- switch(class,
		                	chebi       = ChebiConn$new(useragent = .self$.useragent),
		                	kegg        = KeggConn$new(useragent = .self$.useragent),
		                	pubchem     = PubchemConn$new(useragent = .self$.useragent),
		                	hmdb        = HmdbConn$new(useragent = .self$.useragent),
		                	chemspider  = ChemspiderConn$new(useragent = .self$.useragent),
		                	enzyme      = EnzymeConn$new(useragent = .self$.useragent),
		                	lipidmaps   = LipidmapsConn$new(useragent = .self$.useragent),
		                	mirbase     = MirbaseConn$new(useragent = .self$.useragent),
		                	ncbigene    = NcbigeneConn$new(useragent = .self$.useragent),
		                	ncbiccds    = NcbiccdsConn$new(useragent = .self$.useragent),
		                	uniprot     = UniprotConn$new(useragent = .self$.useragent),
		                	massbank    = MassbankConn$new(useragent = .self$.useragent, debug = .self$.debug),
							massfiledb  = MassFiledbConn$new(file = url),
		      	          	NULL)

			# Unknown class
			if (is.null(conn))
				stop(paste0("Unknown r-biodb class \"", class,"\"."))

			.self$.conn[[class]] <- conn
		}

		return (.self$.conn[[class]])
	})

	################
	# CREATE ENTRY #
	################

	BiodbFactory$methods( createEntry = function(class, type, id = NULL, content = NULL, drop = TRUE) {

		is.null(id) && is.null(content) && stop("One of id or content must be set.")
		! is.null(id) && ! is.null(content) && stop("id and content cannot be both set.")

		# Debug
		.self$.print.debug.msg(paste0("Creating entry from ", if (is.null(id)) "content" else paste("id", id), "..."))

		# Get content
		if ( ! is.null(id))
			content <- .self$getEntryContent(class, type, id)

		conn <- .self$getConn(class)
		entry <- conn$createEntry(type = type, content = content, drop = drop)

		# Set factory
		for (e in c(entry))
			e$setFactory(.self)

		return(entry)
	})

	########################
	# GET CACHE FILE PATHS #
	########################

	BiodbFactory$methods( .get.cache.file.paths = function(class, type, id) {

		# Get extension
		ext <- .self$getConn(class)$getEntryContentType(type)

		# Set filenames
		filenames <- vapply(id, function(x) paste0(class, '-', type, '-', x, '.', ext), FUN.VALUE = '')

		# set file paths
		file.paths <- vapply(filenames, function(x) file.path(.self$.cache.dir, x), FUN.VALUE = '')
	
		# Create cache dir if needed
		if ( ! is.na(.self$.cache.dir) && ! file.exists(.self$.cache.dir))
			dir.create(.self$.cache.dir)

		return(file.paths)
	})

	###########################
	# LOAD CONTENT FROM CACHE #
	###########################

	BiodbFactory$methods( .load.content.from.cache = function(class, type, id) {

		content <- NULL

		# Read contents from files
		file.paths <- .self$.get.cache.file.paths(class, type, id)
		content <- lapply(file.paths, function(x) { if (file.exists(x)) paste(readLines(x), collapse = "\n") else NULL })

		return(content)
	})

	#########################
	# SAVE CONTENT TO CACHE #
	#########################

	BiodbFactory$methods( .save.content.to.cache = function(class, type, id, content) {

		# Write contents into files
		file.paths <- .self$.get.cache.file.paths(class, type, id)
		mapply(function(c, f) { if ( ! is.null(c)) writeLines(c, f) }, content, file.paths)
	})

	#####################
	# GET ENTRY CONTENT #
	#####################

	BiodbFactory$methods( getEntryContent = function(class, type, id, chunk.size = NA_integer_) {

		# Debug
		.self$.print.debug.msg(paste0("Get entry content(s) for ", length(id)," id(s)..."))

		content <- NULL
		# Load from cache
		if ( ! is.na(.self$.cache.dir))
			content <- .self$.load.content.from.cache(class, type, id)	

		# Get list of missing contents
		missing.content.indexes <- vapply(content, is.null, FUN.VALUE = TRUE)
		missing.ids <- if (is.null(content)) id else id[missing.content.indexes]

		# Debug
		if ( ! is.na(.self$.cache.dir)) {
			.self$.print.debug.msg(paste0(length(id) - length(missing.ids), " entry content(s) loaded from cache."))
			.self$.print.debug.msg(paste0(length(missing.ids), " entry content(s) need to be fetched."))
		}

		# Get contents
		if (length(missing.ids) > 0) {

			# Use connector to get missing contents
			conn <- .self$getConn(class)

			# Divide list of missing ids in chunks (in order to save in cache regularly)
			chunks.of.missing.ids = if (is.na(chunk.size)) list(missing.ids) else split(missing.ids, ceiling(seq_along(missing.ids) / chunk.size))

			# Loop on chunks
			missing.contents <- NULL
			for (ch.missing.ids in chunks.of.missing.ids) {

				ch.missing.contents <- conn$getEntryContent(type, ch.missing.ids)

				# Save to cache
				if ( ! is.null(ch.missing.contents) && ! is.na(.self$.cache.dir))
					.self$.save.content.to.cache(class, type, ch.missing.ids, ch.missing.contents)

				# Append
				missing.contents <- c(missing.contents, ch.missing.contents)

				# Debug
				if ( ! is.na(.self$.cache.dir))
					.self$.print.debug.msg(paste0("Now ", length(missing.ids) - length(missing.contents)," id(s) left to be retrieved..."))
			}

			# Merge content and missing.contents
			if (is.null(content))
				content <- missing.contents
			else
				content[missing.content.indexes] <- missing.contents
		}

		return(content)
	})
}