Mercurial > repos > prog > lcmsmatching
diff Ms4TabSqlDb.R @ 0:e66bb061af06 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
| author | prog | 
|---|---|
| date | Tue, 12 Jul 2016 12:02:37 -0400 | 
| parents | |
| children | 20d69a062da3 | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Ms4TabSqlDb.R Tue Jul 12 12:02:37 2016 -0400 @@ -0,0 +1,348 @@ +if ( ! exists('Ms4TabSqlDb')) { # Do not load again if already loaded + + library('methods') + source('msdb-common.R') + source('MsDb.R') + + ##################### + # CLASS DECLARATION # + ##################### + + Ms4TabSqlDb <- setRefClass("Ms4TabSqlDb", contains = "MsDb", fields = list(.host = "character", .port = "integer", .dbname = "character", .user = "character", .password = "character", .drv = "ANY", .conn = "ANY")) + + ############### + # CONSTRUCTOR # + ############### + + Ms4TabSqlDb$methods( initialize = function(host = NA_character_, port = NA_integer_, dbname = NA_character_, user = NA_character_, password = NA_character_, ...) { + + # Initialize members + .host <<- if ( ! is.null(host)) host else NA_character_ + .port <<- if ( ! is.null(port)) port else NA_integer_ + .dbname <<- if ( ! is.null(dbname)) dbname else NA_character_ + .user <<- if ( ! is.null(user)) user else NA_character_ + .password <<- if ( ! is.null(password)) password else NA_character_ + .drv <<- NULL + .conn <<- NULL + + callSuper(...) + }) + + ################## + # GET CONNECTION # + ################## + + Ms4TabSqlDb$methods( .get.connection = function() { + + # Initialize connection + if (is.null(.self$.conn)) { + library('RPostgreSQL') + .drv <<- dbDriver("PostgreSQL") + .conn <<- dbConnect(.self$.drv, host = .self$.host, port = .self$.port, dbname = .self$.dbname, user = .self$.user, password = .self$.password) + } + + return(.self$.conn) + }) + + ############## + # SEND QUERY # + ############## + + Ms4TabSqlDb$methods( .send.query = function(query) { + conn <- .self$.get.connection() # Call it first separately, so library RPostgreSQL is loaded. + rs <- try(dbSendQuery(conn, query)) + return(rs) + }) + + #################### + # GET MOLECULE IDS # + #################### + + Ms4TabSqlDb$methods( getMoleculeIds = function() { + + rs <- .self$.send.query('select pkmol.molecule_id as id from peaklist_name as pkmol;') + ids <- fetch(rs,n=-1) + ids <- ids[['id']] # Get 'id' column + ids <- vapply(ids, function(x) { if (substring(x, 1, 1) == 'N') as.integer(substring(x, 2)) else as.integer(x) } , FUN.VALUE = 1, USE.NAMES = FALSE) + ids <- (sort(ids)) + + return(ids) + }) + + #################### + # GET NB MOLECULES # + #################### + + Ms4TabSqlDb$methods( getNbMolecules = function() { + + rs <- .self$.send.query('select count(*) from peaklist_name;') + df <- fetch(rs,n=-1) + n <- df[[1]] + + return(n) + }) + + ##################### + # GET MOLECULE NAME # + ##################### + + Ms4TabSqlDb$methods( getMoleculeName = function(molid) { + + # Build request + where <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') + request <- paste0('select pkmol.molecule_id as id, pkmol.name from peaklist_name as pkmol where ', where, ';') + + # Run request + rs <- .self$.send.query(request) + df <- fetch(rs,n=-1) + + # Get IDs + ids <- vapply(df[['id']], function(x) as.integer(substring(x, 2)), FUN.VALUE = 1, USE.NAMES = FALSE) + + # Get names in the same order as the input vector + names <- df[['name']][order(ids)[order(molid)]] + + return(if (is.null(names)) NA_character_ else names) + }) + + + ############################### + # GET CHROMATOGRAPHIC COLUMNS # + ############################### + + Ms4TabSqlDb$methods( getChromCol = function(molid = NULL) { + + # Get all columns + if (is.null(molid)) { + request <- 'select name from method;' + + # Get columns of the specified molecules + } else { + where_molids <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') + where <- paste0('pk.name_id = pkmol.id and pk.id = pkret.id_peak and pkret.id_method = method.id and (', where_molids, ')') + request <- paste0('select distinct method.name from method, peaklist as pk, peaklist_name as pkmol, peaklist_ret as pkret where ', where, ';') + } + + # Run request + rs <- .self$.send.query(request) + df <- fetch(rs,n=-1) + + # Gets column list + cols <- df[['name']] + + # Remove FIA + cols <- cols[ cols != 'FIA'] + + # Normalize names + cols <- vapply(cols, .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE) + + # Remove duplicates + cols <- cols[ ! duplicated(cols)] + + # Make data frame + cols <- data.frame(id = cols, title = cols, stringsAsFactors = FALSE) + + return(cols) + }) + + ################ + # FIND BY NAME # + ################ + + Ms4TabSqlDb$methods( findByName = function(name) { + + if (is.null(name)) return(NA_integer_) + + # Put names in uppercase + uname <- toupper(name) + + # Build request + where <- paste0(vapply(uname, function(n) paste0("upper(pkmol.name) = '", gsub("'", "''", n, perl = TRUE), "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = ' or ') + request <- paste0('select pkmol.molecule_id as id, pkmol.name from peaklist_name as pkmol where ', where, ';') + + # Run request + rs <- .self$.send.query(request) + df <- fetch(rs,n=-1) + + # Adds missing names/IDs + missing_names <- uname[ ! uname %in% toupper(df[['name']])] + df <- rbind(df, data.frame(id = rep(NA_integer_, length(missing_names)), name = missing_names)) + + # Get IDs and names + ids <- vapply(df[['id']], function(x) as.integer(substring(x, 2)), FUN.VALUE = 1, USE.NAMES = FALSE) + names <- toupper(as.character(df[['name']])) + + # Get IDs in the same order as the input vector + ids[order(uname)] <- ids[order(names)] + + return(if (is.null(ids)) NA_integer_ else ids) + }) + + ####################### + # GET RETENTION TIMES # + ####################### + + Ms4TabSqlDb$methods( getRetentionTimes = function(molid, col = NA_character_) { + + if (is.null(molid) || is.na(molid) || length(molid) != 1) + stop("The parameter molid must consist only in a single integer.") + + # Build request + request <- paste0("select distinct method.name as col, (pkret.retention * 60) as ret from peaklist as pk, peaklist_name as pkmol, peaklist_ret as pkret, method where pkret.id_peak = pk.id and pkmol.id = pk.name_id and pkret.id_method = method.id and pkmol.molecule_id = 'N", molid, "'") + if ( ! is.na(col)) { + where_cols <- paste0(vapply(col, function(c) paste0("method.name = '", c, "'"), FUN.VALUE = ''), collapse = ' or ') + request <- paste0(request, ' and (', where_cols, ')') + } + request <- paste0(request, ';') + + # Run request + rs <- .self$.send.query(request) + df <- fetch(rs,n=-1) + + # Remove FIA + df <- df[df[['col']] != 'FIA', ] + + # Normalize names + df[['col']] <- vapply(df[['col']], .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE) + + # Build output list + lst <- list() + if (nrow(df) > 0) + for (i in 1:nrow(df)) { + c <- df[i, 'col'] + lst[[c]] <- c(lst[[c]], df[i, 'ret']) + } + + return(lst) + }) + + ################ + # GET NB PEAKS # + ################ + + Ms4TabSqlDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) { + + # Build request + request <- paste0("select count(*) from peaklist as pk, peaklist_name as pkmol where pkmol.id = pk.name_id") + if ( length(molid) > 1 || ! is.na(molid)) { + where_molids <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') + request <- paste0(request, ' and (', where_molids, ')') + } + if ( ! is.na(type)) { + request <- paste0(request, ' and ', if (type == MSDB.TAG.POS) '' else 'not ', 'ion_pos') + } + request <- paste0(request, ';') + + # Run request + rs <- .self$.send.query(request) + df <- fetch(rs,n=-1) + + return(df[1,1]) + }) + + ############################### + # GET CHROMATOGRAPHIC COLUMNS # + ############################### + + Ms4TabSqlDb$methods( .to.dbcols = function(col) { + + # Get all column names + request <- 'select name from method;' + rs <- .self$.send.query(request) + df <- fetch(rs,n=-1) + + # Get database column names + dbcols <- df[['name']] + dbcols <- dbcols[ dbcols != 'FIA'] + + # Get normalize names + normcols <- vapply(dbcols, .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE) + + return(dbcols[normcols == tolower(col)]) + }) + + ################# + # GET MZ VALUES # + ################# + + # Returns a numeric vector of all masses stored inside the database. + Ms4TabSqlDb$methods( getMzValues = function(mode = NULL) { + + # Build request + select <- paste0("select distinct pk.mass as ", MSDB.TAG.MZTHEO) + from <- " from peaklist as pk" + where <- "" + if ( ! is.null(mode)) + where <- paste0(" where ", if (mode == MSDB.TAG.POS) '' else 'not ', 'pk.ion_pos') + + # Assemble request + request <- paste0(select, from, where, ';') + + # Run request + rs <- .self$.send.query(request) + df <- fetch(rs, n=-1) + + return(df[[MSDB.TAG.MZTHEO]]) + }) + + ########## + # SEARCH # + ########## + + Ms4TabSqlDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) { + + # Build request + select <- paste0("select pkmol.molecule_id as ", MSDB.TAG.MOLID, ", pkmol.name as ", MSDB.TAG.MOLNAMES,", pk.mass as ", MSDB.TAG.MZTHEO, ", pk.composition as ", MSDB.TAG.COMP,", pk.attribution as ", MSDB.TAG.ATTR) + from <- " from peaklist as pk, peaklist_name as pkmol" + where <- paste0(" where pkmol.id = pk.name_id and pk.mass >= ", mz.low, " and pk.mass <= ", mz.high) + where <- paste0(where, ' and ', if (mode == MSDB.TAG.POS) '' else 'not ', 'pk.ion_pos') + + # Insert where clause on attribs + if ( ! is.null(attribs)) { + where.attribs <- paste0(vapply(attribs, function(a) paste0("pk.attribution = '", a, "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = " or ") + where <- paste0(where, ' and (', where.attribs, ')') + } + + # Insert where clause on molids + if ( ! is.null(molids)) { + where.molids <- paste0(vapply(molids, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') + where <- paste0(where, ' and (', where.molids, ')') + } + + # Insert where clause on columns + if ( ! is.null(col)) { + dbcols <- .self$.to.dbcols(col) + if ( ! is.null(dbcols)) { + + # Can't find specified columns + if (length(dbcols) == 0 && length(col) > 0) + return(.get.empty.result.df(rt = TRUE)) + + select <- paste0(select, ", (60 * pkret.retention) as ", MSDB.TAG.COLRT, ", method.name as ", MSDB.TAG.COL) + from <- paste0(from, ", method, peaklist_ret as pkret") + where.cols <- if (length(dbcols) == 0) 'TRUE' else paste0(vapply(dbcols, function(c) paste0("method.name = '", c, "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = " or ") + where <- paste0(where, " and pk.id = pkret.id_peak and pkret.id_method = method.id and (", where.cols, ")") + if (! is.null(rt.low) && ! is.null(rt.high)) + where <- paste0(where, " and pkret.retention * 60 >= ", rt.low, " and pkret.retention * 60 <= ", rt.high) + } + } + + # Assemble request + request <- paste0(select, from, where, ';') + + # Run request + rs <- .self$.send.query(request) + df <- fetch(rs,n=-1) + + # No results + + # Remove N prefix from IDs + if (nrow(df) > 0) + df[[MSDB.TAG.MOLID]] <- vapply(df[[MSDB.TAG.MOLID]], function(x) substring(x, 2), FUN.VALUE = '', USE.NAMES = FALSE) + else if (nrow(df) == 0) + df <- .get.empty.result.df(rt = ! is.null(col)) + + return(df) + }) + +} # end of load safe guard
