Mercurial > repos > prog > lcmsmatching
comparison MsDb.R @ 2:20d69a062da3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
| author | prog |
|---|---|
| date | Thu, 02 Mar 2017 08:55:00 -0500 |
| parents | e66bb061af06 |
| children | fb9c0409d85c |
comparison
equal
deleted
inserted
replaced
| 1:253d531a0193 | 2:20d69a062da3 |
|---|---|
| 133 | 133 |
| 134 .mz.tol.unit <<- mztolunit | 134 .mz.tol.unit <<- mztolunit |
| 135 }) | 135 }) |
| 136 | 136 |
| 137 #################### | 137 #################### |
| 138 # HANDLE COMPOUNDS # | |
| 139 #################### | |
| 140 | |
| 141 # Returns TRUE if this database handles compounds directly (by IDs) | |
| 142 MsDb$methods( handleCompounds = function() { | |
| 143 return(TRUE) | |
| 144 }) | |
| 145 | |
| 146 #################### | |
| 138 # GET MOLECULE IDS # | 147 # GET MOLECULE IDS # |
| 139 #################### | 148 #################### |
| 140 | 149 |
| 141 # Returns an integer vector of all molecule IDs stored inside the database. | 150 # Returns an integer vector of all molecule IDs stored inside the database. |
| 142 MsDb$methods( getMoleculeIds = function() { | 151 MsDb$methods( getMoleculeIds = function(max.results = NA_integer_) { |
| 143 stop("Method getMoleculeIds() not implemented in concrete class.") | 152 stop("Method getMoleculeIds() not implemented in concrete class.") |
| 144 }) | 153 }) |
| 145 | 154 |
| 146 #################### | 155 #################### |
| 147 # GET NB MOLECULES # | 156 # GET NB MOLECULES # |
| 155 ################# | 164 ################# |
| 156 # GET MZ VALUES # | 165 # GET MZ VALUES # |
| 157 ################# | 166 ################# |
| 158 | 167 |
| 159 # Returns a numeric vector of all masses stored inside the database. | 168 # Returns a numeric vector of all masses stored inside the database. |
| 160 MsDb$methods( getMzValues = function(mode = NULL) { | 169 MsDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { |
| 161 stop("Method getMzValues() not implemented in concrete class.") | 170 stop("Method getMzValues() not implemented in concrete class.") |
| 162 }) | 171 }) |
| 163 | 172 |
| 164 ##################### | 173 ##################### |
| 165 # GET MOLECULE NAME # | 174 # GET MOLECULE NAME # |
| 216 | 225 |
| 217 ################## | 226 ################## |
| 218 # GET PEAK TABLE # | 227 # GET PEAK TABLE # |
| 219 ################## | 228 ################## |
| 220 | 229 |
| 221 MsDb$methods( getPeakTable = function(molid = NA_integer_, mode = NA_character_){ | 230 MsDb$methods( getPeakTable = function(molid = NA_integer_, mode = NA_character_) { |
| 222 stop("Method getPeakTable() not implemented in concrete class.") | 231 stop("Method getPeakTable() not implemented in concrete class.") |
| 223 }) | 232 }) |
| 224 | 233 |
| 225 ########## | 234 ########## |
| 226 # SEARCH # | 235 # SEARCH # |
| 233 # col The chromatographic column used. | 242 # col The chromatographic column used. |
| 234 # rt.tol Simple retention tolerance parameter: rtinf = rt - rt.tol and rtsup = rt + rt.tol | 243 # rt.tol Simple retention tolerance parameter: rtinf = rt - rt.tol and rtsup = rt + rt.tol |
| 235 # rt.tol.x Tolerance parameter for the equations : rtinf = rt - rt.tol.x - rt ^ rt.tol.y and rtsup = rt + rt.tol.x + rt ^ rt.tol.y | 244 # rt.tol.x Tolerance parameter for the equations : rtinf = rt - rt.tol.x - rt ^ rt.tol.y and rtsup = rt + rt.tol.x + rt ^ rt.tol.y |
| 236 # rt.tol.y Tolerance parameter. See rt.tol.x parameter. | 245 # rt.tol.y Tolerance parameter. See rt.tol.x parameter. |
| 237 # attribs Only search for peaks whose attribution is among this set of attributions. | 246 # attribs Only search for peaks whose attribution is among this set of attributions. |
| 238 # molids Only search for peaks whose molecule ID is among this vector of integer molecule IDs. Can also be a data frame with a retention time column x.colnames$rt and a molecule ID column MSDB.TAG.molid. | 247 # molids Only search for peaks whose molecule ID is among this vector of integer molecule IDs. Can also be a data frame with a retention time column x.colnames$rt and a molecule ID column MSDB.TAG.MOLID. |
| 239 # molids.rt.tol Retention time tolerance used when molids parameter is a data frame (rt, id) | 248 # molids.rt.tol Retention time tolerance used when molids parameter is a data frame (rt, id) |
| 240 # precursor.match Remove peaks whose molecule precursor peak has not also been matched. | 249 # precursor.match Remove peaks whose molecule precursor peak has not also been matched. |
| 241 # precursor.rt.tol | 250 # precursor.rt.tol |
| 242 # Returns a data frame, listing m/z values provided in input. Several matches can be found for an m/z value, in which case several lines (the same number as the number of matches found) with the same m/z value repeated will be inserted. The m/z values will be listed in the same order as in the input. The columns of the data.frame are: mz, rt (only if present in the input), id, mztheo, col, colrt, composition, attribution. | 251 # Returns a data frame, listing m/z values provided in input. Several matches can be found for an m/z value, in which case several lines (the same number as the number of matches found) with the same m/z value repeated will be inserted. The m/z values will be listed in the same order as in the input. The columns of the data.frame are: mz, rt (only if present in the input), id, mztheo, col, colrt, composition, attribution. |
| 243 MsDb$methods( searchForMzRtList = function(x = NULL, mode, shift = NULL, prec = NULL, col = NULL, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = NULL, molids.rt.tol = NULL, attribs = NULL, precursor.match = FALSE, precursor.rt.tol = NULL, same.cols = FALSE, same.rows = FALSE, peak.table = FALSE) { | 252 MsDb$methods( searchForMzRtList = function(x = NULL, mode, shift = NULL, prec = NULL, col = NULL, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = NULL, molids.rt.tol = NULL, attribs = NULL, precursor.match = FALSE, precursor.rt.tol = NULL, same.cols = FALSE, same.rows = FALSE, peak.table = FALSE) { |
| 259 precursors.ids <- precursors.df[, cols.to.keep, drop = FALSE] | 268 precursors.ids <- precursors.df[, cols.to.keep, drop = FALSE] |
| 260 precursors.ids <- precursors.ids[ ! is.na(precursors.ids[[MSDB.TAG.MOLID]]), , drop = FALSE] | 269 precursors.ids <- precursors.ids[ ! is.na(precursors.ids[[MSDB.TAG.MOLID]]), , drop = FALSE] |
| 261 precursors.ids <- precursors.ids[ ! duplicated(precursors.ids), ] | 270 precursors.ids <- precursors.ids[ ! duplicated(precursors.ids), ] |
| 262 | 271 |
| 263 # Get all matching peaks whose molecule is inside the previously obtained list of molecules | 272 # Get all matching peaks whose molecule is inside the previously obtained list of molecules |
| 264 .self$.doSearchForMzRtList(mode = mode, shift = shift, prec = prec, col = col, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = precursors.ids, molids.rt.tol = precursor.rt.tol, same.cols = same.cols, same.rows = same.rows, peak.table = peak.table) | 273 df <- .self$.doSearchForMzRtList(mode = mode, shift = shift, prec = prec, col = col, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = precursors.ids, molids.rt.tol = precursor.rt.tol, same.cols = same.cols, same.rows = same.rows, peak.table = peak.table) |
| 265 # TODO | 274 # TODO |
| 266 # | 275 # |
| 267 # peaks <- if (peak.table) results[['peaks']] else results | 276 # peaks <- if (peak.table) results[['peaks']] else results |
| 268 # | 277 # |
| 269 # # Merge results with the column/rt found for precursors. | 278 # # Merge results with the column/rt found for precursors. |
| 342 # z <- cbind(z, x[NULL, ! x.cols %in% colnames(z), drop = FALSE]) | 351 # z <- cbind(z, x[NULL, ! x.cols %in% colnames(z), drop = FALSE]) |
| 343 # } | 352 # } |
| 344 | 353 |
| 345 # Loop on all lines of input | 354 # Loop on all lines of input |
| 346 peaks <- NULL | 355 peaks <- NULL |
| 356 .self$.input.stream$reset() | |
| 347 while (.self$.input.stream$hasNextValues()) { | 357 while (.self$.input.stream$hasNextValues()) { |
| 348 | 358 |
| 349 .self$.input.stream$nextValues() | 359 .self$.input.stream$nextValues() |
| 350 | 360 |
| 351 # Search for m/z | 361 # Search for m/z |
| 367 # y[r, colnames(x.lines)] <- x.lines | 377 # y[r, colnames(x.lines)] <- x.lines |
| 368 # } | 378 # } |
| 369 # else { | 379 # else { |
| 370 # if (same.rows) { | 380 # if (same.rows) { |
| 371 # y[r, colnames(x.lines)] <- x.lines | 381 # y[r, colnames(x.lines)] <- x.lines |
| 372 # ids <- results[[MSDB.TAG.molid]] | 382 # ids <- results[[MSDB.TAG.MOLID]] |
| 373 # ids <- ids[ ! duplicated(ids)] # Remove duplicated values | 383 # ids <- ids[ ! duplicated(ids)] # Remove duplicated values |
| 374 # y[r, MSDB.TAG.msmatching] <- paste(ids, collapse = .self$.molids.sep) | 384 # y[r, MSDB.TAG.msmatching] <- paste(ids, collapse = .self$.molids.sep) |
| 375 # } | 385 # } |
| 376 # if ( ! same.rows || peak.table) { | 386 # if ( ! same.rows || peak.table) { |
| 377 # new.rows <- cbind(x.lines, results, row.names = NULL) | 387 # new.rows <- cbind(x.lines, results, row.names = NULL) |
| 424 rt.high <- if (is.null(rt.high)) high else min(high, rt.high) | 434 rt.high <- if (is.null(rt.high)) high else min(high, rt.high) |
| 425 } | 435 } |
| 426 | 436 |
| 427 # List molecule IDs | 437 # List molecule IDs |
| 428 if ( ! is.null(molids.rt.tol) && is.data.frame(molids)) { | 438 if ( ! is.null(molids.rt.tol) && is.data.frame(molids)) { |
| 429 ids <- molids[(rt >= molids[[MSDB.TAG.colrt]] - molids.rt.tol) & (rt <= molids[[MSDB.TAG.colrt]] + molids.rt.tol), MSDB.TAG.molid] | 439 ids <- molids[(rt >= molids[[MSDB.TAG.COLRT]] - molids.rt.tol) & (rt <= molids[[MSDB.TAG.COLRT]] + molids.rt.tol), MSDB.TAG.MOLID] |
| 430 if (length(ids) == 0) | 440 if (length(ids) == 0) |
| 431 # No molecule ID match for this retention time | 441 # No molecule ID match for this retention time |
| 432 return(data.frame()) # return empty result set | 442 return(data.frame()) # return empty result set |
| 433 } else { | 443 } else { |
| 434 ids <- molids | 444 ids <- molids |
