Mercurial > repos > prog > lcmsmatching
comparison MsDb.R @ 2:20d69a062da3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author | prog |
---|---|
date | Thu, 02 Mar 2017 08:55:00 -0500 |
parents | e66bb061af06 |
children | fb9c0409d85c |
comparison
equal
deleted
inserted
replaced
1:253d531a0193 | 2:20d69a062da3 |
---|---|
133 | 133 |
134 .mz.tol.unit <<- mztolunit | 134 .mz.tol.unit <<- mztolunit |
135 }) | 135 }) |
136 | 136 |
137 #################### | 137 #################### |
138 # HANDLE COMPOUNDS # | |
139 #################### | |
140 | |
141 # Returns TRUE if this database handles compounds directly (by IDs) | |
142 MsDb$methods( handleCompounds = function() { | |
143 return(TRUE) | |
144 }) | |
145 | |
146 #################### | |
138 # GET MOLECULE IDS # | 147 # GET MOLECULE IDS # |
139 #################### | 148 #################### |
140 | 149 |
141 # Returns an integer vector of all molecule IDs stored inside the database. | 150 # Returns an integer vector of all molecule IDs stored inside the database. |
142 MsDb$methods( getMoleculeIds = function() { | 151 MsDb$methods( getMoleculeIds = function(max.results = NA_integer_) { |
143 stop("Method getMoleculeIds() not implemented in concrete class.") | 152 stop("Method getMoleculeIds() not implemented in concrete class.") |
144 }) | 153 }) |
145 | 154 |
146 #################### | 155 #################### |
147 # GET NB MOLECULES # | 156 # GET NB MOLECULES # |
155 ################# | 164 ################# |
156 # GET MZ VALUES # | 165 # GET MZ VALUES # |
157 ################# | 166 ################# |
158 | 167 |
159 # Returns a numeric vector of all masses stored inside the database. | 168 # Returns a numeric vector of all masses stored inside the database. |
160 MsDb$methods( getMzValues = function(mode = NULL) { | 169 MsDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { |
161 stop("Method getMzValues() not implemented in concrete class.") | 170 stop("Method getMzValues() not implemented in concrete class.") |
162 }) | 171 }) |
163 | 172 |
164 ##################### | 173 ##################### |
165 # GET MOLECULE NAME # | 174 # GET MOLECULE NAME # |
216 | 225 |
217 ################## | 226 ################## |
218 # GET PEAK TABLE # | 227 # GET PEAK TABLE # |
219 ################## | 228 ################## |
220 | 229 |
221 MsDb$methods( getPeakTable = function(molid = NA_integer_, mode = NA_character_){ | 230 MsDb$methods( getPeakTable = function(molid = NA_integer_, mode = NA_character_) { |
222 stop("Method getPeakTable() not implemented in concrete class.") | 231 stop("Method getPeakTable() not implemented in concrete class.") |
223 }) | 232 }) |
224 | 233 |
225 ########## | 234 ########## |
226 # SEARCH # | 235 # SEARCH # |
233 # col The chromatographic column used. | 242 # col The chromatographic column used. |
234 # rt.tol Simple retention tolerance parameter: rtinf = rt - rt.tol and rtsup = rt + rt.tol | 243 # rt.tol Simple retention tolerance parameter: rtinf = rt - rt.tol and rtsup = rt + rt.tol |
235 # rt.tol.x Tolerance parameter for the equations : rtinf = rt - rt.tol.x - rt ^ rt.tol.y and rtsup = rt + rt.tol.x + rt ^ rt.tol.y | 244 # rt.tol.x Tolerance parameter for the equations : rtinf = rt - rt.tol.x - rt ^ rt.tol.y and rtsup = rt + rt.tol.x + rt ^ rt.tol.y |
236 # rt.tol.y Tolerance parameter. See rt.tol.x parameter. | 245 # rt.tol.y Tolerance parameter. See rt.tol.x parameter. |
237 # attribs Only search for peaks whose attribution is among this set of attributions. | 246 # attribs Only search for peaks whose attribution is among this set of attributions. |
238 # molids Only search for peaks whose molecule ID is among this vector of integer molecule IDs. Can also be a data frame with a retention time column x.colnames$rt and a molecule ID column MSDB.TAG.molid. | 247 # molids Only search for peaks whose molecule ID is among this vector of integer molecule IDs. Can also be a data frame with a retention time column x.colnames$rt and a molecule ID column MSDB.TAG.MOLID. |
239 # molids.rt.tol Retention time tolerance used when molids parameter is a data frame (rt, id) | 248 # molids.rt.tol Retention time tolerance used when molids parameter is a data frame (rt, id) |
240 # precursor.match Remove peaks whose molecule precursor peak has not also been matched. | 249 # precursor.match Remove peaks whose molecule precursor peak has not also been matched. |
241 # precursor.rt.tol | 250 # precursor.rt.tol |
242 # Returns a data frame, listing m/z values provided in input. Several matches can be found for an m/z value, in which case several lines (the same number as the number of matches found) with the same m/z value repeated will be inserted. The m/z values will be listed in the same order as in the input. The columns of the data.frame are: mz, rt (only if present in the input), id, mztheo, col, colrt, composition, attribution. | 251 # Returns a data frame, listing m/z values provided in input. Several matches can be found for an m/z value, in which case several lines (the same number as the number of matches found) with the same m/z value repeated will be inserted. The m/z values will be listed in the same order as in the input. The columns of the data.frame are: mz, rt (only if present in the input), id, mztheo, col, colrt, composition, attribution. |
243 MsDb$methods( searchForMzRtList = function(x = NULL, mode, shift = NULL, prec = NULL, col = NULL, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = NULL, molids.rt.tol = NULL, attribs = NULL, precursor.match = FALSE, precursor.rt.tol = NULL, same.cols = FALSE, same.rows = FALSE, peak.table = FALSE) { | 252 MsDb$methods( searchForMzRtList = function(x = NULL, mode, shift = NULL, prec = NULL, col = NULL, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = NULL, molids.rt.tol = NULL, attribs = NULL, precursor.match = FALSE, precursor.rt.tol = NULL, same.cols = FALSE, same.rows = FALSE, peak.table = FALSE) { |
259 precursors.ids <- precursors.df[, cols.to.keep, drop = FALSE] | 268 precursors.ids <- precursors.df[, cols.to.keep, drop = FALSE] |
260 precursors.ids <- precursors.ids[ ! is.na(precursors.ids[[MSDB.TAG.MOLID]]), , drop = FALSE] | 269 precursors.ids <- precursors.ids[ ! is.na(precursors.ids[[MSDB.TAG.MOLID]]), , drop = FALSE] |
261 precursors.ids <- precursors.ids[ ! duplicated(precursors.ids), ] | 270 precursors.ids <- precursors.ids[ ! duplicated(precursors.ids), ] |
262 | 271 |
263 # Get all matching peaks whose molecule is inside the previously obtained list of molecules | 272 # Get all matching peaks whose molecule is inside the previously obtained list of molecules |
264 .self$.doSearchForMzRtList(mode = mode, shift = shift, prec = prec, col = col, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = precursors.ids, molids.rt.tol = precursor.rt.tol, same.cols = same.cols, same.rows = same.rows, peak.table = peak.table) | 273 df <- .self$.doSearchForMzRtList(mode = mode, shift = shift, prec = prec, col = col, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = precursors.ids, molids.rt.tol = precursor.rt.tol, same.cols = same.cols, same.rows = same.rows, peak.table = peak.table) |
265 # TODO | 274 # TODO |
266 # | 275 # |
267 # peaks <- if (peak.table) results[['peaks']] else results | 276 # peaks <- if (peak.table) results[['peaks']] else results |
268 # | 277 # |
269 # # Merge results with the column/rt found for precursors. | 278 # # Merge results with the column/rt found for precursors. |
342 # z <- cbind(z, x[NULL, ! x.cols %in% colnames(z), drop = FALSE]) | 351 # z <- cbind(z, x[NULL, ! x.cols %in% colnames(z), drop = FALSE]) |
343 # } | 352 # } |
344 | 353 |
345 # Loop on all lines of input | 354 # Loop on all lines of input |
346 peaks <- NULL | 355 peaks <- NULL |
356 .self$.input.stream$reset() | |
347 while (.self$.input.stream$hasNextValues()) { | 357 while (.self$.input.stream$hasNextValues()) { |
348 | 358 |
349 .self$.input.stream$nextValues() | 359 .self$.input.stream$nextValues() |
350 | 360 |
351 # Search for m/z | 361 # Search for m/z |
367 # y[r, colnames(x.lines)] <- x.lines | 377 # y[r, colnames(x.lines)] <- x.lines |
368 # } | 378 # } |
369 # else { | 379 # else { |
370 # if (same.rows) { | 380 # if (same.rows) { |
371 # y[r, colnames(x.lines)] <- x.lines | 381 # y[r, colnames(x.lines)] <- x.lines |
372 # ids <- results[[MSDB.TAG.molid]] | 382 # ids <- results[[MSDB.TAG.MOLID]] |
373 # ids <- ids[ ! duplicated(ids)] # Remove duplicated values | 383 # ids <- ids[ ! duplicated(ids)] # Remove duplicated values |
374 # y[r, MSDB.TAG.msmatching] <- paste(ids, collapse = .self$.molids.sep) | 384 # y[r, MSDB.TAG.msmatching] <- paste(ids, collapse = .self$.molids.sep) |
375 # } | 385 # } |
376 # if ( ! same.rows || peak.table) { | 386 # if ( ! same.rows || peak.table) { |
377 # new.rows <- cbind(x.lines, results, row.names = NULL) | 387 # new.rows <- cbind(x.lines, results, row.names = NULL) |
424 rt.high <- if (is.null(rt.high)) high else min(high, rt.high) | 434 rt.high <- if (is.null(rt.high)) high else min(high, rt.high) |
425 } | 435 } |
426 | 436 |
427 # List molecule IDs | 437 # List molecule IDs |
428 if ( ! is.null(molids.rt.tol) && is.data.frame(molids)) { | 438 if ( ! is.null(molids.rt.tol) && is.data.frame(molids)) { |
429 ids <- molids[(rt >= molids[[MSDB.TAG.colrt]] - molids.rt.tol) & (rt <= molids[[MSDB.TAG.colrt]] + molids.rt.tol), MSDB.TAG.molid] | 439 ids <- molids[(rt >= molids[[MSDB.TAG.COLRT]] - molids.rt.tol) & (rt <= molids[[MSDB.TAG.COLRT]] + molids.rt.tol), MSDB.TAG.MOLID] |
430 if (length(ids) == 0) | 440 if (length(ids) == 0) |
431 # No molecule ID match for this retention time | 441 # No molecule ID match for this retention time |
432 return(data.frame()) # return empty result set | 442 return(data.frame()) # return empty result set |
433 } else { | 443 } else { |
434 ids <- molids | 444 ids <- molids |