Mercurial > repos > prog > lcmsmatching
comparison MassFiledbConn.R @ 1:253d531a0193 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 36c9d8099c20a1ae848f1337c16564335dd8fb2b
| author | prog |
|---|---|
| date | Sat, 03 Sep 2016 17:02:01 -0400 |
| parents | |
| children | 20d69a062da3 |
comparison
equal
deleted
inserted
replaced
| 0:e66bb061af06 | 1:253d531a0193 |
|---|---|
| 1 if ( ! exists('MassFiledbConn')) { | |
| 2 | |
| 3 source('MassdbConn.R') | |
| 4 | |
| 5 # LCMS File db. | |
| 6 # In this type of database, a single file is provided in CSV format. Default separator is tabulation. | |
| 7 # Each line is a MS peak measure, . | |
| 8 # The file contains molecule and spectrum information. Each spectrum has an accession id. | |
| 9 | |
| 10 # TODO Rename setField into setFieldName + addNewField, and setMsMode into setMsModeValue | |
| 11 | |
| 12 ############# | |
| 13 # CONSTANTS # | |
| 14 ############# | |
| 15 | |
| 16 # Default database fields | |
| 17 .BIODB.DFT.DB.FIELDS <- list() | |
| 18 for (f in c(BIODB.ACCESSION, BIODB.NAME, BIODB.FULLNAMES, BIODB.COMPOUND.ID, BIODB.MSMODE, BIODB.PEAK.MZ, BIODB.PEAK.COMP, BIODB.PEAK.ATTR, BIODB.CHROM.COL, BIODB.CHROM.COL.RT, BIODB.FORMULA, BIODB.MASS)) | |
| 19 .BIODB.DFT.DB.FIELDS[[f]] <- f | |
| 20 | |
| 21 ##################### | |
| 22 # CLASS DECLARATION # | |
| 23 ##################### | |
| 24 | |
| 25 MassFiledbConn <- setRefClass("MassFiledbConn", contains = "MassdbConn", fields = list(.file = "character", .file.sep = "character", .file.quote = "character", .field.multval.sep = 'character', .db = "ANY", .fields = "list", .ms.modes = "character")) | |
| 26 | |
| 27 ############### | |
| 28 # CONSTRUCTOR # | |
| 29 ############### | |
| 30 | |
| 31 MassFiledbConn$methods( initialize = function(file = NA_character_, file.sep = "\t", file.quote = "\"", ...) { | |
| 32 | |
| 33 # Check file | |
| 34 (! is.null(file) && ! is.na(file)) || stop("You must specify a file database to load.") | |
| 35 file.exists(file) || stop(paste0("Cannot locate the file database \"", file ,"\".")) | |
| 36 | |
| 37 # Set fields | |
| 38 .db <<- NULL | |
| 39 .file <<- file | |
| 40 .file.sep <<- file.sep | |
| 41 .file.quote <<- file.quote | |
| 42 .fields <<- .BIODB.DFT.DB.FIELDS | |
| 43 .field.multval.sep <<- ';' | |
| 44 .ms.modes <<- c(BIODB.MSMODE.NEG, BIODB.MSMODE.POS) | |
| 45 names(.self$.ms.modes) <- .self$.ms.modes | |
| 46 | |
| 47 callSuper(...) | |
| 48 }) | |
| 49 | |
| 50 ###################### | |
| 51 # Is valid field tag # | |
| 52 ###################### | |
| 53 | |
| 54 MassFiledbConn$methods( isValidFieldTag = function(tag) { | |
| 55 return (tag %in% names(.self$.fields)) | |
| 56 }) | |
| 57 | |
| 58 ############# | |
| 59 # Set field # | |
| 60 ############# | |
| 61 | |
| 62 MassFiledbConn$methods( setField = function(tag, colname) { | |
| 63 | |
| 64 ( ! is.null(tag) && ! is.na(tag)) || stop("No tag specified.") | |
| 65 ( ! is.null(colname) && ! is.na(colname)) || stop("No column name specified.") | |
| 66 | |
| 67 # Load database file | |
| 68 .self$.init.db() | |
| 69 | |
| 70 # Check that this field tag is defined in the fields list | |
| 71 .self$isValidFieldTag(tag) || stop(paste0("Database field tag \"", tag, "\" is not valid.")) | |
| 72 | |
| 73 # Check that columns are defined in database file | |
| 74 all(colname %in% names(.self$.db)) || stop(paste0("One or more columns among ", paste(colname, collapse = ", "), " are not defined in database file.")) | |
| 75 | |
| 76 # Set new definition | |
| 77 if (length(colname) == 1) | |
| 78 .fields[[tag]] <<- colname | |
| 79 else { | |
| 80 new.col <- paste(colname, collapse = ".") | |
| 81 .self$.db[[new.col]] <- vapply(seq(nrow(.self$.db)), function(i) { paste(.self$.db[i, colname], collapse = '.') }, FUN.VALUE = '') | |
| 82 .fields[[tag]] <<- new.col | |
| 83 } | |
| 84 }) | |
| 85 | |
| 86 ###################################### | |
| 87 # SET FIELD MULTIPLE VALUE SEPARATOR # | |
| 88 ###################################### | |
| 89 | |
| 90 MassFiledbConn$methods( setFieldMultValSep = function(sep) { | |
| 91 .field.multval.sep <<- sep | |
| 92 }) | |
| 93 | |
| 94 ################ | |
| 95 # SET MS MODES # | |
| 96 ################ | |
| 97 | |
| 98 MassFiledbConn$methods( setMsMode = function(mode, value) { | |
| 99 .self$.ms.modes[[mode]] <- value | |
| 100 }) | |
| 101 | |
| 102 ########################## | |
| 103 # GET ENTRY CONTENT TYPE # | |
| 104 ########################## | |
| 105 | |
| 106 MassFiledbConn$methods( getEntryContentType = function(type) { | |
| 107 return(BIODB.DATAFRAME) | |
| 108 }) | |
| 109 | |
| 110 ########### | |
| 111 # INIT DB # | |
| 112 ########### | |
| 113 | |
| 114 MassFiledbConn$methods( .init.db = function() { | |
| 115 | |
| 116 if (is.null(.self$.db)) { | |
| 117 | |
| 118 # Load database | |
| 119 .db <<- read.table(.self$.file, sep = .self$.file.sep, .self$.file.quote, header = TRUE, stringsAsFactors = FALSE, row.names = NULL) | |
| 120 | |
| 121 # Rename columns | |
| 122 colnames(.self$.db) <- vapply(colnames(.self$.db), function(c) if (c %in% .self$.fields) names(.self$.fields)[.self$.fields %in% c] else c, FUN.VALUE = '') | |
| 123 } | |
| 124 }) | |
| 125 | |
| 126 ################ | |
| 127 # CHECK FIELDS # | |
| 128 ################ | |
| 129 | |
| 130 MassFiledbConn$methods( .check.fields = function(fields) { | |
| 131 | |
| 132 # Check if fields are known | |
| 133 unknown.fields <- names(.self$.fields)[ ! fields %in% names(.self$.fields)] | |
| 134 if (length(unknown.fields) > 0) | |
| 135 stop(paste0("Field(s) ", paste(fields, collapse = ", "), " is/are unknown.")) | |
| 136 | |
| 137 # Init db | |
| 138 .self$.init.db() | |
| 139 | |
| 140 # Check if fields are defined in file database | |
| 141 undefined.fields <- colnames(.self$.init.db)[ ! unlist(.self$.fields[fields]) %in% colnames(.self$.init.db)] | |
| 142 if (length(undefined.fields) > 0) | |
| 143 stop(paste0("Column(s) ", paste(unlist(.self$.fields[fields]), collapse = ", "), " is/are undefined in file database.")) | |
| 144 }) | |
| 145 | |
| 146 ################ | |
| 147 # EXTRACT COLS # | |
| 148 ################ | |
| 149 | |
| 150 MassFiledbConn$methods( .extract.cols = function(cols, mode = NULL, drop = FALSE, uniq = FALSE, sort = FALSE, max.rows = NA_integer_) { | |
| 151 | |
| 152 x <- NULL | |
| 153 | |
| 154 if ( ! is.null(cols) && ! is.na(cols)) { | |
| 155 | |
| 156 # Init db | |
| 157 .self$.init.db() | |
| 158 | |
| 159 # TODO check existence of cols/fields | |
| 160 | |
| 161 # Get db, eventually filtering it. | |
| 162 if (is.null(mode)) | |
| 163 db <- .self$.db | |
| 164 else { | |
| 165 # Check mode value | |
| 166 mode %in% names(.self$.ms.modes) || stop(paste0("Unknown mode value '", mode, "'.")) | |
| 167 .self$.check.fields(BIODB.MSMODE) | |
| 168 | |
| 169 # Filter on mode | |
| 170 db <- .self$.db[.self$.db[[unlist(.self$.fields[BIODB.MSMODE])]] %in% .self$.ms.modes[[mode]], ] | |
| 171 } | |
| 172 | |
| 173 # Get subset | |
| 174 x <- db[, unlist(.self$.fields[cols]), drop = drop] | |
| 175 | |
| 176 # Rename columns | |
| 177 if (is.data.frame(x)) | |
| 178 colnames(x) <- cols | |
| 179 | |
| 180 # Rearrange | |
| 181 if (drop && is.vector(x)) { | |
| 182 if (uniq) | |
| 183 x <- x[ ! duplicated(x)] | |
| 184 if (sort) | |
| 185 x <- sort(x) | |
| 186 } | |
| 187 | |
| 188 # Cut | |
| 189 if ( ! is.na(max.rows)) | |
| 190 x <- if (is.vector(x)) x[1:max.rows] else x[1:max.rows, ] | |
| 191 } | |
| 192 | |
| 193 return(x) | |
| 194 }) | |
| 195 | |
| 196 ################# | |
| 197 # GET ENTRY IDS # | |
| 198 ################# | |
| 199 | |
| 200 MassFiledbConn$methods( getEntryIds = function(type) { | |
| 201 | |
| 202 ids <- NA_character_ | |
| 203 | |
| 204 if (type %in% c(BIODB.SPECTRUM, BIODB.COMPOUND)) | |
| 205 ids <- as.character(.self$.extract.cols(if (type == BIODB.SPECTRUM) BIODB.ACCESSION else BIODB.COMPOUND.ID, drop = TRUE, uniq = TRUE, sort = TRUE)) | |
| 206 | |
| 207 return(ids) | |
| 208 }) | |
| 209 | |
| 210 ################## | |
| 211 # GET NB ENTRIES # | |
| 212 ################## | |
| 213 | |
| 214 MassFiledbConn$methods( getNbEntries = function(type) { | |
| 215 return(length(.self$getEntryIds(type))) | |
| 216 }) | |
| 217 | |
| 218 ############################### | |
| 219 # GET CHROMATOGRAPHIC COLUMNS # | |
| 220 ############################### | |
| 221 | |
| 222 # Inherited from MassdbConn. | |
| 223 MassFiledbConn$methods( getChromCol = function(compound.ids = NULL) { | |
| 224 | |
| 225 # Extract needed columns | |
| 226 db <- .self$.extract.cols(c(BIODB.COMPOUND.ID, BIODB.CHROM.COL)) | |
| 227 | |
| 228 # Filter on molecule IDs | |
| 229 if ( ! is.null(compound.ids)) | |
| 230 db <- db[db[[BIODB.COMPOUND.ID]] %in% compound.ids, ] | |
| 231 | |
| 232 # Get column names | |
| 233 cols <- db[[BIODB.CHROM.COL]] | |
| 234 | |
| 235 # Remove duplicates | |
| 236 cols <- cols[ ! duplicated(cols)] | |
| 237 | |
| 238 # Make data frame | |
| 239 chrom.cols <- data.frame(cols, cols, stringsAsFactors = FALSE) | |
| 240 colnames(chrom.cols) <- c(BIODB.ID, BIODB.TITLE) | |
| 241 | |
| 242 return(chrom.cols) | |
| 243 }) | |
| 244 | |
| 245 ################# | |
| 246 # GET MZ VALUES # | |
| 247 ################# | |
| 248 | |
| 249 # Inherited from MassdbConn. | |
| 250 MassFiledbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { | |
| 251 | |
| 252 # Get mz values | |
| 253 mz <- .self$.extract.cols(BIODB.PEAK.MZ, mode = mode, drop = TRUE, uniq = TRUE, sort = TRUE, max.rows = max.results) | |
| 254 | |
| 255 return(mz) | |
| 256 }) | |
| 257 | |
| 258 } |
