comparison MsDb.R @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author prog
date Thu, 02 Mar 2017 08:55:00 -0500
parents e66bb061af06
children fb9c0409d85c
comparison
equal deleted inserted replaced
1:253d531a0193 2:20d69a062da3
133 133
134 .mz.tol.unit <<- mztolunit 134 .mz.tol.unit <<- mztolunit
135 }) 135 })
136 136
137 #################### 137 ####################
138 # HANDLE COMPOUNDS #
139 ####################
140
141 # Returns TRUE if this database handles compounds directly (by IDs)
142 MsDb$methods( handleCompounds = function() {
143 return(TRUE)
144 })
145
146 ####################
138 # GET MOLECULE IDS # 147 # GET MOLECULE IDS #
139 #################### 148 ####################
140 149
141 # Returns an integer vector of all molecule IDs stored inside the database. 150 # Returns an integer vector of all molecule IDs stored inside the database.
142 MsDb$methods( getMoleculeIds = function() { 151 MsDb$methods( getMoleculeIds = function(max.results = NA_integer_) {
143 stop("Method getMoleculeIds() not implemented in concrete class.") 152 stop("Method getMoleculeIds() not implemented in concrete class.")
144 }) 153 })
145 154
146 #################### 155 ####################
147 # GET NB MOLECULES # 156 # GET NB MOLECULES #
155 ################# 164 #################
156 # GET MZ VALUES # 165 # GET MZ VALUES #
157 ################# 166 #################
158 167
159 # Returns a numeric vector of all masses stored inside the database. 168 # Returns a numeric vector of all masses stored inside the database.
160 MsDb$methods( getMzValues = function(mode = NULL) { 169 MsDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {
161 stop("Method getMzValues() not implemented in concrete class.") 170 stop("Method getMzValues() not implemented in concrete class.")
162 }) 171 })
163 172
164 ##################### 173 #####################
165 # GET MOLECULE NAME # 174 # GET MOLECULE NAME #
216 225
217 ################## 226 ##################
218 # GET PEAK TABLE # 227 # GET PEAK TABLE #
219 ################## 228 ##################
220 229
221 MsDb$methods( getPeakTable = function(molid = NA_integer_, mode = NA_character_){ 230 MsDb$methods( getPeakTable = function(molid = NA_integer_, mode = NA_character_) {
222 stop("Method getPeakTable() not implemented in concrete class.") 231 stop("Method getPeakTable() not implemented in concrete class.")
223 }) 232 })
224 233
225 ########## 234 ##########
226 # SEARCH # 235 # SEARCH #
233 # col The chromatographic column used. 242 # col The chromatographic column used.
234 # rt.tol Simple retention tolerance parameter: rtinf = rt - rt.tol and rtsup = rt + rt.tol 243 # rt.tol Simple retention tolerance parameter: rtinf = rt - rt.tol and rtsup = rt + rt.tol
235 # rt.tol.x Tolerance parameter for the equations : rtinf = rt - rt.tol.x - rt ^ rt.tol.y and rtsup = rt + rt.tol.x + rt ^ rt.tol.y 244 # rt.tol.x Tolerance parameter for the equations : rtinf = rt - rt.tol.x - rt ^ rt.tol.y and rtsup = rt + rt.tol.x + rt ^ rt.tol.y
236 # rt.tol.y Tolerance parameter. See rt.tol.x parameter. 245 # rt.tol.y Tolerance parameter. See rt.tol.x parameter.
237 # attribs Only search for peaks whose attribution is among this set of attributions. 246 # attribs Only search for peaks whose attribution is among this set of attributions.
238 # molids Only search for peaks whose molecule ID is among this vector of integer molecule IDs. Can also be a data frame with a retention time column x.colnames$rt and a molecule ID column MSDB.TAG.molid. 247 # molids Only search for peaks whose molecule ID is among this vector of integer molecule IDs. Can also be a data frame with a retention time column x.colnames$rt and a molecule ID column MSDB.TAG.MOLID.
239 # molids.rt.tol Retention time tolerance used when molids parameter is a data frame (rt, id) 248 # molids.rt.tol Retention time tolerance used when molids parameter is a data frame (rt, id)
240 # precursor.match Remove peaks whose molecule precursor peak has not also been matched. 249 # precursor.match Remove peaks whose molecule precursor peak has not also been matched.
241 # precursor.rt.tol 250 # precursor.rt.tol
242 # Returns a data frame, listing m/z values provided in input. Several matches can be found for an m/z value, in which case several lines (the same number as the number of matches found) with the same m/z value repeated will be inserted. The m/z values will be listed in the same order as in the input. The columns of the data.frame are: mz, rt (only if present in the input), id, mztheo, col, colrt, composition, attribution. 251 # Returns a data frame, listing m/z values provided in input. Several matches can be found for an m/z value, in which case several lines (the same number as the number of matches found) with the same m/z value repeated will be inserted. The m/z values will be listed in the same order as in the input. The columns of the data.frame are: mz, rt (only if present in the input), id, mztheo, col, colrt, composition, attribution.
243 MsDb$methods( searchForMzRtList = function(x = NULL, mode, shift = NULL, prec = NULL, col = NULL, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = NULL, molids.rt.tol = NULL, attribs = NULL, precursor.match = FALSE, precursor.rt.tol = NULL, same.cols = FALSE, same.rows = FALSE, peak.table = FALSE) { 252 MsDb$methods( searchForMzRtList = function(x = NULL, mode, shift = NULL, prec = NULL, col = NULL, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = NULL, molids.rt.tol = NULL, attribs = NULL, precursor.match = FALSE, precursor.rt.tol = NULL, same.cols = FALSE, same.rows = FALSE, peak.table = FALSE) {
259 precursors.ids <- precursors.df[, cols.to.keep, drop = FALSE] 268 precursors.ids <- precursors.df[, cols.to.keep, drop = FALSE]
260 precursors.ids <- precursors.ids[ ! is.na(precursors.ids[[MSDB.TAG.MOLID]]), , drop = FALSE] 269 precursors.ids <- precursors.ids[ ! is.na(precursors.ids[[MSDB.TAG.MOLID]]), , drop = FALSE]
261 precursors.ids <- precursors.ids[ ! duplicated(precursors.ids), ] 270 precursors.ids <- precursors.ids[ ! duplicated(precursors.ids), ]
262 271
263 # Get all matching peaks whose molecule is inside the previously obtained list of molecules 272 # Get all matching peaks whose molecule is inside the previously obtained list of molecules
264 .self$.doSearchForMzRtList(mode = mode, shift = shift, prec = prec, col = col, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = precursors.ids, molids.rt.tol = precursor.rt.tol, same.cols = same.cols, same.rows = same.rows, peak.table = peak.table) 273 df <- .self$.doSearchForMzRtList(mode = mode, shift = shift, prec = prec, col = col, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, molids = precursors.ids, molids.rt.tol = precursor.rt.tol, same.cols = same.cols, same.rows = same.rows, peak.table = peak.table)
265 # TODO 274 # TODO
266 # 275 #
267 # peaks <- if (peak.table) results[['peaks']] else results 276 # peaks <- if (peak.table) results[['peaks']] else results
268 # 277 #
269 # # Merge results with the column/rt found for precursors. 278 # # Merge results with the column/rt found for precursors.
342 # z <- cbind(z, x[NULL, ! x.cols %in% colnames(z), drop = FALSE]) 351 # z <- cbind(z, x[NULL, ! x.cols %in% colnames(z), drop = FALSE])
343 # } 352 # }
344 353
345 # Loop on all lines of input 354 # Loop on all lines of input
346 peaks <- NULL 355 peaks <- NULL
356 .self$.input.stream$reset()
347 while (.self$.input.stream$hasNextValues()) { 357 while (.self$.input.stream$hasNextValues()) {
348 358
349 .self$.input.stream$nextValues() 359 .self$.input.stream$nextValues()
350 360
351 # Search for m/z 361 # Search for m/z
367 # y[r, colnames(x.lines)] <- x.lines 377 # y[r, colnames(x.lines)] <- x.lines
368 # } 378 # }
369 # else { 379 # else {
370 # if (same.rows) { 380 # if (same.rows) {
371 # y[r, colnames(x.lines)] <- x.lines 381 # y[r, colnames(x.lines)] <- x.lines
372 # ids <- results[[MSDB.TAG.molid]] 382 # ids <- results[[MSDB.TAG.MOLID]]
373 # ids <- ids[ ! duplicated(ids)] # Remove duplicated values 383 # ids <- ids[ ! duplicated(ids)] # Remove duplicated values
374 # y[r, MSDB.TAG.msmatching] <- paste(ids, collapse = .self$.molids.sep) 384 # y[r, MSDB.TAG.msmatching] <- paste(ids, collapse = .self$.molids.sep)
375 # } 385 # }
376 # if ( ! same.rows || peak.table) { 386 # if ( ! same.rows || peak.table) {
377 # new.rows <- cbind(x.lines, results, row.names = NULL) 387 # new.rows <- cbind(x.lines, results, row.names = NULL)
424 rt.high <- if (is.null(rt.high)) high else min(high, rt.high) 434 rt.high <- if (is.null(rt.high)) high else min(high, rt.high)
425 } 435 }
426 436
427 # List molecule IDs 437 # List molecule IDs
428 if ( ! is.null(molids.rt.tol) && is.data.frame(molids)) { 438 if ( ! is.null(molids.rt.tol) && is.data.frame(molids)) {
429 ids <- molids[(rt >= molids[[MSDB.TAG.colrt]] - molids.rt.tol) & (rt <= molids[[MSDB.TAG.colrt]] + molids.rt.tol), MSDB.TAG.molid] 439 ids <- molids[(rt >= molids[[MSDB.TAG.COLRT]] - molids.rt.tol) & (rt <= molids[[MSDB.TAG.COLRT]] + molids.rt.tol), MSDB.TAG.MOLID]
430 if (length(ids) == 0) 440 if (length(ids) == 0)
431 # No molecule ID match for this retention time 441 # No molecule ID match for this retention time
432 return(data.frame()) # return empty result set 442 return(data.frame()) # return empty result set
433 } else { 443 } else {
434 ids <- molids 444 ids <- molids