Mercurial > repos > prog > lcmsmatching
view MassFiledbConn.R @ 1:253d531a0193 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 36c9d8099c20a1ae848f1337c16564335dd8fb2b
author | prog |
---|---|
date | Sat, 03 Sep 2016 17:02:01 -0400 |
parents | |
children | 20d69a062da3 |
line wrap: on
line source
if ( ! exists('MassFiledbConn')) { source('MassdbConn.R') # LCMS File db. # In this type of database, a single file is provided in CSV format. Default separator is tabulation. # Each line is a MS peak measure, . # The file contains molecule and spectrum information. Each spectrum has an accession id. # TODO Rename setField into setFieldName + addNewField, and setMsMode into setMsModeValue ############# # CONSTANTS # ############# # Default database fields .BIODB.DFT.DB.FIELDS <- list() for (f in c(BIODB.ACCESSION, BIODB.NAME, BIODB.FULLNAMES, BIODB.COMPOUND.ID, BIODB.MSMODE, BIODB.PEAK.MZ, BIODB.PEAK.COMP, BIODB.PEAK.ATTR, BIODB.CHROM.COL, BIODB.CHROM.COL.RT, BIODB.FORMULA, BIODB.MASS)) .BIODB.DFT.DB.FIELDS[[f]] <- f ##################### # CLASS DECLARATION # ##################### MassFiledbConn <- setRefClass("MassFiledbConn", contains = "MassdbConn", fields = list(.file = "character", .file.sep = "character", .file.quote = "character", .field.multval.sep = 'character', .db = "ANY", .fields = "list", .ms.modes = "character")) ############### # CONSTRUCTOR # ############### MassFiledbConn$methods( initialize = function(file = NA_character_, file.sep = "\t", file.quote = "\"", ...) { # Check file (! is.null(file) && ! is.na(file)) || stop("You must specify a file database to load.") file.exists(file) || stop(paste0("Cannot locate the file database \"", file ,"\".")) # Set fields .db <<- NULL .file <<- file .file.sep <<- file.sep .file.quote <<- file.quote .fields <<- .BIODB.DFT.DB.FIELDS .field.multval.sep <<- ';' .ms.modes <<- c(BIODB.MSMODE.NEG, BIODB.MSMODE.POS) names(.self$.ms.modes) <- .self$.ms.modes callSuper(...) }) ###################### # Is valid field tag # ###################### MassFiledbConn$methods( isValidFieldTag = function(tag) { return (tag %in% names(.self$.fields)) }) ############# # Set field # ############# MassFiledbConn$methods( setField = function(tag, colname) { ( ! is.null(tag) && ! is.na(tag)) || stop("No tag specified.") ( ! is.null(colname) && ! is.na(colname)) || stop("No column name specified.") # Load database file .self$.init.db() # Check that this field tag is defined in the fields list .self$isValidFieldTag(tag) || stop(paste0("Database field tag \"", tag, "\" is not valid.")) # Check that columns are defined in database file all(colname %in% names(.self$.db)) || stop(paste0("One or more columns among ", paste(colname, collapse = ", "), " are not defined in database file.")) # Set new definition if (length(colname) == 1) .fields[[tag]] <<- colname else { new.col <- paste(colname, collapse = ".") .self$.db[[new.col]] <- vapply(seq(nrow(.self$.db)), function(i) { paste(.self$.db[i, colname], collapse = '.') }, FUN.VALUE = '') .fields[[tag]] <<- new.col } }) ###################################### # SET FIELD MULTIPLE VALUE SEPARATOR # ###################################### MassFiledbConn$methods( setFieldMultValSep = function(sep) { .field.multval.sep <<- sep }) ################ # SET MS MODES # ################ MassFiledbConn$methods( setMsMode = function(mode, value) { .self$.ms.modes[[mode]] <- value }) ########################## # GET ENTRY CONTENT TYPE # ########################## MassFiledbConn$methods( getEntryContentType = function(type) { return(BIODB.DATAFRAME) }) ########### # INIT DB # ########### MassFiledbConn$methods( .init.db = function() { if (is.null(.self$.db)) { # Load database .db <<- read.table(.self$.file, sep = .self$.file.sep, .self$.file.quote, header = TRUE, stringsAsFactors = FALSE, row.names = NULL) # Rename columns colnames(.self$.db) <- vapply(colnames(.self$.db), function(c) if (c %in% .self$.fields) names(.self$.fields)[.self$.fields %in% c] else c, FUN.VALUE = '') } }) ################ # CHECK FIELDS # ################ MassFiledbConn$methods( .check.fields = function(fields) { # Check if fields are known unknown.fields <- names(.self$.fields)[ ! fields %in% names(.self$.fields)] if (length(unknown.fields) > 0) stop(paste0("Field(s) ", paste(fields, collapse = ", "), " is/are unknown.")) # Init db .self$.init.db() # Check if fields are defined in file database undefined.fields <- colnames(.self$.init.db)[ ! unlist(.self$.fields[fields]) %in% colnames(.self$.init.db)] if (length(undefined.fields) > 0) stop(paste0("Column(s) ", paste(unlist(.self$.fields[fields]), collapse = ", "), " is/are undefined in file database.")) }) ################ # EXTRACT COLS # ################ MassFiledbConn$methods( .extract.cols = function(cols, mode = NULL, drop = FALSE, uniq = FALSE, sort = FALSE, max.rows = NA_integer_) { x <- NULL if ( ! is.null(cols) && ! is.na(cols)) { # Init db .self$.init.db() # TODO check existence of cols/fields # Get db, eventually filtering it. if (is.null(mode)) db <- .self$.db else { # Check mode value mode %in% names(.self$.ms.modes) || stop(paste0("Unknown mode value '", mode, "'.")) .self$.check.fields(BIODB.MSMODE) # Filter on mode db <- .self$.db[.self$.db[[unlist(.self$.fields[BIODB.MSMODE])]] %in% .self$.ms.modes[[mode]], ] } # Get subset x <- db[, unlist(.self$.fields[cols]), drop = drop] # Rename columns if (is.data.frame(x)) colnames(x) <- cols # Rearrange if (drop && is.vector(x)) { if (uniq) x <- x[ ! duplicated(x)] if (sort) x <- sort(x) } # Cut if ( ! is.na(max.rows)) x <- if (is.vector(x)) x[1:max.rows] else x[1:max.rows, ] } return(x) }) ################# # GET ENTRY IDS # ################# MassFiledbConn$methods( getEntryIds = function(type) { ids <- NA_character_ if (type %in% c(BIODB.SPECTRUM, BIODB.COMPOUND)) ids <- as.character(.self$.extract.cols(if (type == BIODB.SPECTRUM) BIODB.ACCESSION else BIODB.COMPOUND.ID, drop = TRUE, uniq = TRUE, sort = TRUE)) return(ids) }) ################## # GET NB ENTRIES # ################## MassFiledbConn$methods( getNbEntries = function(type) { return(length(.self$getEntryIds(type))) }) ############################### # GET CHROMATOGRAPHIC COLUMNS # ############################### # Inherited from MassdbConn. MassFiledbConn$methods( getChromCol = function(compound.ids = NULL) { # Extract needed columns db <- .self$.extract.cols(c(BIODB.COMPOUND.ID, BIODB.CHROM.COL)) # Filter on molecule IDs if ( ! is.null(compound.ids)) db <- db[db[[BIODB.COMPOUND.ID]] %in% compound.ids, ] # Get column names cols <- db[[BIODB.CHROM.COL]] # Remove duplicates cols <- cols[ ! duplicated(cols)] # Make data frame chrom.cols <- data.frame(cols, cols, stringsAsFactors = FALSE) colnames(chrom.cols) <- c(BIODB.ID, BIODB.TITLE) return(chrom.cols) }) ################# # GET MZ VALUES # ################# # Inherited from MassdbConn. MassFiledbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { # Get mz values mz <- .self$.extract.cols(BIODB.PEAK.MZ, mode = mode, drop = TRUE, uniq = TRUE, sort = TRUE, max.rows = max.results) return(mz) }) }