Mercurial > repos > prog > lcmsmatching
diff MassFiledbConn.R @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author | prog |
---|---|
date | Fri, 22 Feb 2019 16:04:22 -0500 |
parents | fb9c0409d85c |
children |
line wrap: on
line diff
--- a/MassFiledbConn.R Wed Apr 19 10:00:05 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,275 +0,0 @@ -# LCMS File db. -# In this type of database, a single file is provided in CSV format. Default separator is tabulation. -# Each line is a MS peak measure, . -# The file contains molecule and spectrum information. Each spectrum has an accession id. - -# TODO Rename setField into setFieldName + addNewField, and setMsMode into setMsModeValue - -############# -# CONSTANTS # -############# - -# Default database fields -.BIODB.DFT.DB.FIELDS <- list() -for (f in c(BIODB.ACCESSION, BIODB.NAME, BIODB.FULLNAMES, BIODB.COMPOUND.ID, BIODB.MSMODE, BIODB.PEAK.MZEXP, BIODB.PEAK.MZTHEO, BIODB.PEAK.COMP, BIODB.PEAK.ATTR, BIODB.CHROM.COL, BIODB.CHROM.COL.RT, BIODB.FORMULA, BIODB.MASS)) - .BIODB.DFT.DB.FIELDS[[f]] <- f - -##################### -# CLASS DECLARATION # -##################### - -MassFiledbConn <- methods::setRefClass("MassFiledbConn", contains = "MassdbConn", fields = list(.file = "character", .file.sep = "character", .file.quote = "character", .field.multval.sep = 'character', .db = "ANY", .db.orig.colnames = "character", .fields = "list", .ms.modes = "character")) - -############### -# CONSTRUCTOR # -############### - -MassFiledbConn$methods( initialize = function(file = NA_character_, file.sep = "\t", file.quote = "\"", ...) { - - # Check file - (! is.null(file) && ! is.na(file)) || stop("You must specify a file database to load.") - file.exists(file) || stop(paste0("Cannot locate the file database \"", file ,"\".")) - - # Set fields - .db <<- NULL - .db.orig.colnames <<- NA_character_ - .file <<- file - .file.sep <<- file.sep - .file.quote <<- file.quote - .fields <<- .BIODB.DFT.DB.FIELDS - .field.multval.sep <<- ';' - .ms.modes <<- c(BIODB.MSMODE.NEG, BIODB.MSMODE.POS) - names(.self$.ms.modes) <- .self$.ms.modes - - callSuper(...) -}) - -###################### -# Is valid field tag # -###################### - -MassFiledbConn$methods( isValidFieldTag = function(tag) { - return (tag %in% names(.self$.fields)) -}) - -########### -# INIT DB # -########### - -MassFiledbConn$methods( .init.db = function() { - - if (is.null(.self$.db)) { - - # Load database - .db <<- read.table(.self$.file, sep = .self$.file.sep, .self$.file.quote, header = TRUE, stringsAsFactors = FALSE, row.names = NULL, comment.char = '') - - # Save column names - .db.orig.colnames <<- colnames(.self$.db) - } -}) - -############# -# Set field # -############# - -MassFiledbConn$methods( setField = function(tag, colname) { - - ( ! is.null(tag) && ! is.na(tag)) || stop("No tag specified.") - ( ! is.null(colname) && ! is.na(colname)) || stop("No column name specified.") - - # Load database file - .self$.init.db() - - # Check that this field tag is defined in the fields list - .self$isValidFieldTag(tag) || stop(paste0("Database field tag \"", tag, "\" is not valid.")) - - # Check that columns are defined in database file - all(colname %in% names(.self$.db)) || stop(paste0("One or more columns among ", paste(colname, collapse = ", "), " are not defined in database file.")) - - # Set new definition - if (length(colname) == 1) - .fields[[tag]] <<- colname - else { - new.col <- paste(colname, collapse = ".") - .self$.db[[new.col]] <- vapply(seq(nrow(.self$.db)), function(i) { paste(.self$.db[i, colname], collapse = '.') }, FUN.VALUE = '') - .fields[[tag]] <<- new.col - } - - # Update data frame column names - colnames(.self$.db) <- vapply(.self$.db.orig.colnames, function(c) if (c %in% .self$.fields) names(.self$.fields)[.self$.fields %in% c] else c, FUN.VALUE = '') -}) - -###################################### -# SET FIELD MULTIPLE VALUE SEPARATOR # -###################################### - -MassFiledbConn$methods( setFieldMultValSep = function(sep) { - .field.multval.sep <<- sep -}) - -################ -# SET MS MODES # -################ - -MassFiledbConn$methods( setMsMode = function(mode, value) { - .self$.ms.modes[[mode]] <- value -}) - -########################## -# GET ENTRY CONTENT TYPE # -########################## - -MassFiledbConn$methods( getEntryContentType = function(type) { - return(BIODB.DATAFRAME) -}) - -################ -# CHECK FIELDS # -################ - -MassFiledbConn$methods( .check.fields = function(fields) { - - if (length(fields) ==0 || (length(fields) == 1 && is.na(fields))) - return - - # Check if fields are known - unknown.fields <- names(.self$.fields)[ ! fields %in% names(.self$.fields)] - if (length(unknown.fields) > 0) - stop(paste0("Field(s) ", paste(fields, collapse = ", "), " is/are unknown.")) - - # Init db - .self$.init.db() - - # Check if fields are defined in file database - undefined.fields <- colnames(.self$.db)[ ! fields %in% colnames(.self$.db)] - if (length(undefined.fields) > 0) - stop(paste0("Column(s) ", paste(fields), collapse = ", "), " is/are undefined in file database.") -}) - -########## -# SELECT # -########## - -# Select data from database -MassFiledbConn$methods( .select = function(cols = NULL, mode = NULL, compound.ids = NULL, drop = FALSE, uniq = FALSE, sort = FALSE, max.rows = NA_integer_) { - - x <- NULL - - # Init db - .self$.init.db() - - # Get db - db <- .self$.db - - # Filter db on mode - if ( ! is.null(mode) && ! is.na(mode)) { - - # Check mode value - mode %in% names(.self$.ms.modes) || stop(paste0("Unknown mode value '", mode, "'.")) - .self$.check.fields(BIODB.MSMODE) - - # Filter on mode - db <- db[db[[unlist(.self$.fields[BIODB.MSMODE])]] %in% .self$.ms.modes[[mode]], ] - } - - # Filter db on compound ids - # TODO - - if ( ! is.null(cols) && ! is.na(cols)) - .self$.check.fields(cols) - - # Get subset - if (is.null(cols) || is.na(cols)) - x <- db - else - x <- db[, unlist(.self$.fields[cols]), drop = drop] - - # Rearrange - if (drop && is.vector(x)) { - if (uniq) - x <- x[ ! duplicated(x)] - if (sort) - x <- sort(x) - } - - # Cut - if ( ! is.na(max.rows)) - x <- if (is.vector(x)) x[1:max.rows] else x[1:max.rows, ] - - return(x) -}) - -################# -# GET ENTRY IDS # -################# - -MassFiledbConn$methods( getEntryIds = function(type) { - - ids <- NA_character_ - - if (type %in% c(BIODB.SPECTRUM, BIODB.COMPOUND)) - ids <- as.character(.self$.select(cols = if (type == BIODB.SPECTRUM) BIODB.ACCESSION else BIODB.COMPOUND.ID, drop = TRUE, uniq = TRUE, sort = TRUE)) - - return(ids) -}) - -################## -# GET NB ENTRIES # -################## - -MassFiledbConn$methods( getNbEntries = function(type) { - return(length(.self$getEntryIds(type))) -}) - -############################### -# GET CHROMATOGRAPHIC COLUMNS # -############################### - -# Inherited from MassdbConn. -MassFiledbConn$methods( getChromCol = function(compound.ids = NULL) { - - # Extract needed columns - db <- .self$.select(cols = c(BIODB.COMPOUND.ID, BIODB.CHROM.COL)) - - # Filter on molecule IDs - if ( ! is.null(compound.ids)) - db <- db[db[[BIODB.COMPOUND.ID]] %in% compound.ids, ] - - # Get column names - cols <- db[[BIODB.CHROM.COL]] - - # Remove duplicates - cols <- cols[ ! duplicated(cols)] - - # Make data frame - chrom.cols <- data.frame(cols, cols, stringsAsFactors = FALSE) - colnames(chrom.cols) <- c(BIODB.ID, BIODB.TITLE) - - return(chrom.cols) -}) - -################# -# GET MZ VALUES # -################# - -# Inherited from MassdbConn. -MassFiledbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { - - # Get mz values - mz <- .self$.select(cols = BIODB.PEAK.MZ, mode = mode, drop = TRUE, uniq = TRUE, sort = TRUE, max.rows = max.results) - - return(mz) -}) - -################ -# GET NB PEAKS # -################ - -# Inherited from MassdbConn. -MassFiledbConn$methods( getNbPeaks = function(mode = NULL, compound.ids = NULL) { - - # Get peaks - peaks <- .self$.select(cols = BIODB.PEAK.MZTHEO, mode = mode, compound.ids = compound.ids) - - return(length(peaks)) -})