view Ms4TabSqlDb.R @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author prog
date Thu, 02 Mar 2017 08:55:00 -0500
parents e66bb061af06
children
line wrap: on
line source

if ( ! exists('Ms4TabSqlDb')) { # Do not load again if already loaded

	library('methods')
	source('msdb-common.R')
	source('MsDb.R')
	
	#####################
	# CLASS DECLARATION #
	#####################
	
	Ms4TabSqlDb <- setRefClass("Ms4TabSqlDb", contains = "MsDb", fields = list(.host = "character", .port = "integer", .dbname = "character", .user = "character", .password = "character", .drv = "ANY", .conn = "ANY"))
	
	###############
	# CONSTRUCTOR #
	###############
	
	Ms4TabSqlDb$methods( initialize = function(host = NA_character_, port = NA_integer_, dbname = NA_character_, user = NA_character_, password = NA_character_, ...) {

		# Initialize members
		.host <<- if ( ! is.null(host)) host else NA_character_
		.port <<- if ( ! is.null(port)) port else NA_integer_
		.dbname <<- if ( ! is.null(dbname)) dbname else NA_character_
		.user <<- if ( ! is.null(user)) user else NA_character_
		.password <<- if ( ! is.null(password)) password else NA_character_
		.drv <<- NULL
		.conn <<- NULL

		callSuper(...)
	})
	
	##################
	# GET CONNECTION #
	##################
	
	Ms4TabSqlDb$methods( .get.connection = function() {

		# Initialize connection
		if (is.null(.self$.conn)) {
			library('RPostgreSQL')
			.drv <<- dbDriver("PostgreSQL")
			.conn <<- dbConnect(.self$.drv, host = .self$.host, port = .self$.port, dbname = .self$.dbname, user = .self$.user, password = .self$.password)
		}

		return(.self$.conn)
	})
	
	##############
	# SEND QUERY #
	##############

	Ms4TabSqlDb$methods( .send.query = function(query) {
		conn <- .self$.get.connection() # Call it first separately, so library RPostgreSQL is loaded.
		rs <- try(dbSendQuery(conn, query))
		return(rs)
	})

	####################
	# GET MOLECULE IDS #
	####################
	
	Ms4TabSqlDb$methods( getMoleculeIds = function() {

		rs <- .self$.send.query('select pkmol.molecule_id as id from peaklist_name as pkmol;')
		ids <- fetch(rs,n=-1)
		ids <- ids[['id']] # Get 'id' column
		ids <- vapply(ids, function(x) { if (substring(x, 1, 1) == 'N') as.integer(substring(x, 2)) else as.integer(x) } , FUN.VALUE = 1, USE.NAMES = FALSE)
		ids <- (sort(ids))

		return(ids)
	})

	####################
	# GET NB MOLECULES #
	####################
	
	Ms4TabSqlDb$methods( getNbMolecules = function() {

		rs <- .self$.send.query('select count(*) from peaklist_name;')
		df <- fetch(rs,n=-1)
		n <- df[[1]]

		return(n)
	})
	
	#####################
	# GET MOLECULE NAME #
	#####################
	
	Ms4TabSqlDb$methods( getMoleculeName = function(molid) {

		# Build request
		where <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ')
		request <- paste0('select pkmol.molecule_id as id, pkmol.name from peaklist_name as pkmol where ', where, ';')

		# Run request
		rs <- .self$.send.query(request)
		df <- fetch(rs,n=-1)

		# Get IDs
		ids <- vapply(df[['id']], function(x) as.integer(substring(x, 2)), FUN.VALUE = 1, USE.NAMES = FALSE)

		# Get names in the same order as the input vector
		names <- df[['name']][order(ids)[order(molid)]]

		return(if (is.null(names)) NA_character_ else names)
	})

	
	###############################
	# GET CHROMATOGRAPHIC COLUMNS #
	###############################
	
	Ms4TabSqlDb$methods( getChromCol = function(molid = NULL) {

		# Get all columns
		if (is.null(molid)) {
			request <- 'select name from method;'

		# Get columns of the specified molecules
		} else {
			where_molids <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ')
			where <- paste0('pk.name_id = pkmol.id and pk.id = pkret.id_peak and pkret.id_method = method.id and (', where_molids, ')')
			request <- paste0('select distinct method.name from method, peaklist as pk, peaklist_name as pkmol, peaklist_ret as pkret where ', where, ';')
		}

		# Run request
		rs <- .self$.send.query(request)
		df <- fetch(rs,n=-1)

		# Gets column list
		cols <- df[['name']]

		# Remove FIA
		cols <- cols[ cols != 'FIA']

		# Normalize names
		cols <- vapply(cols, .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE)

		# Remove duplicates
		cols <- cols[ ! duplicated(cols)]

		# Make data frame
		cols <- data.frame(id = cols, title = cols, stringsAsFactors = FALSE)

		return(cols)
	})

	################
	# FIND BY NAME #
	################

	Ms4TabSqlDb$methods( findByName = function(name) {

		if (is.null(name)) return(NA_integer_)

		# Put names in uppercase
		uname <- toupper(name)

		# Build request
		where <- paste0(vapply(uname, function(n) paste0("upper(pkmol.name) = '", gsub("'", "''", n, perl = TRUE), "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = ' or ')
		request <- paste0('select pkmol.molecule_id as id, pkmol.name from peaklist_name as pkmol where ', where, ';')

		# Run request
		rs <- .self$.send.query(request)
		df <- fetch(rs,n=-1)

		# Adds missing names/IDs
		missing_names <- uname[ ! uname %in% toupper(df[['name']])]
		df <- rbind(df, data.frame(id = rep(NA_integer_, length(missing_names)), name = missing_names))

		# Get IDs and names
		ids <- vapply(df[['id']], function(x) as.integer(substring(x, 2)), FUN.VALUE = 1, USE.NAMES = FALSE)
		names <- toupper(as.character(df[['name']]))

		# Get IDs in the same order as the input vector
		ids[order(uname)] <- ids[order(names)]

		return(if (is.null(ids)) NA_integer_ else ids)
	})
	
	#######################
	# GET RETENTION TIMES #
	#######################
	
	Ms4TabSqlDb$methods( getRetentionTimes = function(molid, col = NA_character_) {

		if (is.null(molid) || is.na(molid) || length(molid)  != 1)
			stop("The parameter molid must consist only in a single integer.")
			
		# Build request
		request <- paste0("select distinct method.name as col, (pkret.retention * 60) as ret from peaklist as pk, peaklist_name as pkmol, peaklist_ret as pkret, method where pkret.id_peak = pk.id and pkmol.id = pk.name_id and pkret.id_method = method.id and pkmol.molecule_id = 'N", molid, "'")
		if ( ! is.na(col)) {
			where_cols <- paste0(vapply(col, function(c) paste0("method.name = '", c, "'"), FUN.VALUE = ''), collapse = ' or ')
			request <- paste0(request, ' and (', where_cols, ')')
		}
		request <- paste0(request, ';')

		# Run request
		rs <- .self$.send.query(request)
		df <- fetch(rs,n=-1)

		# Remove FIA
		df <- df[df[['col']] != 'FIA', ]

		# Normalize names
		df[['col']] <- vapply(df[['col']], .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE)
 
		# Build output list
		lst <- list()
		if (nrow(df) > 0)
			for (i in 1:nrow(df)) {
				c <- df[i, 'col']
				lst[[c]] <- c(lst[[c]], df[i, 'ret'])
			}

		return(lst)
	})
	
	################
	# GET NB PEAKS #
	################
	
	Ms4TabSqlDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) {

		# Build request
		request <- paste0("select count(*) from peaklist as pk, peaklist_name as pkmol where pkmol.id = pk.name_id")
		if ( length(molid) > 1 || ! is.na(molid)) {
			where_molids <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ')
			request <- paste0(request, ' and (', where_molids, ')')
		}
		if ( ! is.na(type)) {
			request <- paste0(request, ' and ', if (type == MSDB.TAG.POS) '' else 'not ', 'ion_pos')
		}
		request <- paste0(request, ';')

		# Run request
		rs <- .self$.send.query(request)
		df <- fetch(rs,n=-1)

		return(df[1,1])
	})
	
	###############################
	# GET CHROMATOGRAPHIC COLUMNS #
	###############################
	
	Ms4TabSqlDb$methods( .to.dbcols = function(col) {

		# Get all column names
		request <- 'select name from method;'
		rs <- .self$.send.query(request)
		df <- fetch(rs,n=-1)

		# Get database column names
		dbcols <- df[['name']]
		dbcols <- dbcols[ dbcols != 'FIA']

		# Get normalize names
		normcols <- vapply(dbcols, .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE)

		return(dbcols[normcols == tolower(col)])
	})
	
	#################
	# GET MZ VALUES #
	#################
	
	# Returns a numeric vector of all masses stored inside the database.
	Ms4TabSqlDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {

		# Build request
		select <- paste0("select distinct pk.mass as ", MSDB.TAG.MZTHEO)
		from <- " from peaklist as pk"
		where <- ""
		if ( ! is.null(mode))
			where <- paste0(" where ", if (mode == MSDB.TAG.POS) '' else 'not ', 'pk.ion_pos')
		limit <- ""
		if ( ! is.na(NA_integer_))
			limit <- paste(" limit", max.results)

		# Assemble request
		request <- paste0(select, from, where, ';')

		# Run request
		rs <- .self$.send.query(request)
		df <- fetch(rs, n=-1)

		return(df[[MSDB.TAG.MZTHEO]])
	})

	##########
	# SEARCH #
	##########

	Ms4TabSqlDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) {

		# Build request
		select <- paste0("select pkmol.molecule_id as ", MSDB.TAG.MOLID, ", pkmol.name as ", MSDB.TAG.MOLNAMES,", pk.mass as ", MSDB.TAG.MZTHEO, ", pk.composition as ", MSDB.TAG.COMP,", pk.attribution as ", MSDB.TAG.ATTR)
		from <- " from peaklist as pk, peaklist_name as pkmol"
		where <- paste0(" where pkmol.id = pk.name_id and pk.mass >= ", mz.low, " and pk.mass <= ", mz.high)
		where <- paste0(where, ' and ', if (mode == MSDB.TAG.POS) '' else 'not ', 'pk.ion_pos')

		# Insert where clause on attribs
		if ( ! is.null(attribs)) {
			where.attribs <- paste0(vapply(attribs, function(a) paste0("pk.attribution = '", a, "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = " or ")
			where <- paste0(where, ' and (', where.attribs, ')')
		}

		# Insert where clause on molids
		if ( ! is.null(molids)) {
			where.molids <- paste0(vapply(molids, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ')
			where <- paste0(where, ' and (', where.molids, ')')
		}

		# Insert where clause on columns
		if ( ! is.null(col)) {
			dbcols <- .self$.to.dbcols(col)
			if ( ! is.null(dbcols)) {

				# Can't find specified columns
				if (length(dbcols) == 0 && length(col) > 0)
					return(.get.empty.result.df(rt = TRUE))

				select <- paste0(select, ", (60 * pkret.retention) as ", MSDB.TAG.COLRT, ", method.name as ", MSDB.TAG.COL)
				from <- paste0(from, ", method, peaklist_ret as pkret")
				where.cols <- if (length(dbcols) == 0) 'TRUE' else paste0(vapply(dbcols, function(c) paste0("method.name = '", c, "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = " or ")
				where <- paste0(where, " and pk.id = pkret.id_peak and pkret.id_method = method.id and (", where.cols, ")")
				if (! is.null(rt.low) && ! is.null(rt.high))
					where <- paste0(where, " and pkret.retention * 60 >= ", rt.low, " and pkret.retention * 60 <= ", rt.high)
			}
		}

		# Assemble request
		request <- paste0(select, from, where, ';')

		# Run request
		rs <- .self$.send.query(request)
		df <- fetch(rs,n=-1)

		# No results

		# Remove N prefix from IDs
		if (nrow(df) > 0)
			df[[MSDB.TAG.MOLID]] <- vapply(df[[MSDB.TAG.MOLID]], function(x) substring(x, 2), FUN.VALUE = '', USE.NAMES = FALSE)
		else if (nrow(df) == 0)
			df <- .get.empty.result.df(rt = ! is.null(col))

		return(df)
	})
	
} # end of load safe guard