comparison MsXlsDb.R @ 0:e66bb061af06 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author prog
date Tue, 12 Jul 2016 12:02:37 -0400
parents
children 20d69a062da3
comparison
equal deleted inserted replaced
-1:000000000000 0:e66bb061af06
1 if ( ! exists('MsXlsDb')) { # Do not load again if already loaded
2
3 library('methods')
4 library('stringr')
5 source('msdb-common.R')
6 source('MsDb.R')
7 source('strhlp.R', chdir = TRUE)
8 source('dfhlp.R', chdir = TRUE)
9 source('search.R', chdir = TRUE)
10 source('excelhlp.R', chdir = TRUE)
11
12 #############
13 # CONSTANTS #
14 #############
15
16 .THIS.FILE.PATH <- getwd() # We suppose that the file has been sourced with the option chdir = TRUE
17
18 .XLS_PEAKS_ROW_OFFSET <- 8
19 .XLS_PEAKS_RT_COL_START <- 11
20 .XLS_MSPOS_TAB <- 'MS_POS'
21 .XLS_MSNEG_TAB <- 'MS_NEG'
22 .XLS_MZ_COL <- 1
23 .XLS_INTENSITY_COL <- 2
24 .XLS_RELATIVE_COL <- 3
25 .XLS_THEORETICAL_MZ_COL <- 5
26 .XLS_COMPOSITION_COL <- 8
27 .XLS_ATTRIBUTION_COL <- 9
28
29 #####################
30 # CLASS DECLARATION #
31 #####################
32
33 MsXlsDb <- setRefClass("MsXlsDb", contains = "MsDb", fields = list(.mz.index = "ANY", .name_index = "ANY", .db_dir = "character", .limit = "numeric", .files = "ANY", .cache_dir = "character", .db = "ANY"))
34
35 ###############
36 # CONSTRUCTOR #
37 ###############
38
39 MsXlsDb$methods( initialize = function(db_dir = NA_character_, limit = NA_integer_, cache_dir = NA_character_, cache = FALSE, ...) {
40
41 # Initialize members
42 .db_dir <<- if ( ! is.null(db_dir)) db_dir else NA_character_
43 .limit <<- if ( ! is.null(limit) && ! is.na(limit) && limit > 0) limit else NA_integer_
44 cache_dir <- if (cache && is.na(cache_dir) && ! is.na(db_dir)) file.path(db_dir, 'cache') else cache_dir
45 .cache_dir <<- if ( cache || ! is.null(cache_dir)) cache_dir else NA_character_
46 .files <<- NULL
47 .db <<- NULL
48 .mz.index <<- NULL
49 .name_index <<- NULL
50
51 callSuper(...)
52 })
53
54 ####################
55 # GET MOLECULE IDS #
56 ####################
57
58 MsXlsDb$methods( getMoleculeIds = function() {
59
60 # Init file list
61 .self$.init.file.list()
62
63 # Get IDs
64 mol.ids <- as.integer(which( ! is.na(.self$.files)))
65
66 return(mol.ids)
67 })
68
69 ####################
70 # GET NB MOLECULES #
71 ####################
72
73 # Returns a list of all molecule names
74 MsXlsDb$methods( getNbMolecules = function() {
75 return(length(.self$getMoleculeIds()))
76 })
77
78 #####################
79 # GET MOLECULE NAME #
80 #####################
81
82 MsXlsDb$methods( getMoleculeName = function(molid) {
83 return(vapply(molid, function(m) .self$.get.mol.name(m), FUN.VALUE = ""))
84 })
85
86 ###############################
87 # GET CHROMATOGRAPHIC COLUMNS #
88 ###############################
89
90 # Returns a list of all chromatographic columns used
91 MsXlsDb$methods( getChromCol = function(molid = NULL) {
92
93 cn <- character()
94
95 # If no molecule IDs provided, then look at all molecules
96 if (is.null(molid))
97 molid <- .self$getMoleculeIds()
98
99 # Loop on molecules
100 for (mid in molid) {
101
102 rt <- .self$getRetentionTimes(mid)
103
104 if ( ! is.null(rt))
105 cn <- c(cn, names(rt))
106 }
107
108 # Remove duplicates
109 cn <- cn[ ! duplicated(cn)]
110
111 # Make data frame
112 cn <- data.frame(id = cn, title = cn, stringsAsFactors = FALSE)
113
114 return(cn)
115 })
116
117 ################
118 # FIND BY NAME #
119 ################
120
121 MsXlsDb$methods( findByName = function(name) {
122
123 # NULL entry
124 if (is.null(name))
125 return(NA_integer_)
126
127 # Initialize output list
128 ids <- NULL
129
130 for (n in name) {
131
132 id <- NA_integer_
133
134 if ( ! is.na(n)) {
135
136 # Get index
137 index <- .self$.get.name.index()
138
139 # Search for name in index
140 i <- binary.search(toupper(n), index[['name']])
141
142 id <- if (is.na(i)) NA_integer_ else index[i, 'id']
143 }
144
145 ids <- c(ids, id)
146 }
147
148 return(ids)
149 })
150
151 #######################
152 # GET RETENTION TIMES #
153 #######################
154
155 MsXlsDb$methods( getRetentionTimes = function(molid, col = NA_character_) {
156
157 if (is.null(molid) || is.na(molid))
158 return(NULL)
159
160 # Find it in memory
161 rt <- .self$.mem.get(molid, 'rt')
162
163 if (is.null(rt)) {
164
165 # Call observers
166 if ( ! is.null(.self$.observers))
167 for (obs in .self$.observers)
168 obs$progress(paste0("Loading retention times of file", .self$.get.file(molid), "."), level = 2)
169
170 rt <- NULL
171
172 # Load from cache file
173 cache_file <- NA_character_
174 if ( ! is.na(.self$.get.cache.dir())) {
175 cache_file <- file.path(.self$.get.cache.dir(), paste0('rt-', molid, '.bin'))
176 if (file.exists(cache_file))
177 load(file = cache_file) # load rt
178 }
179
180 if (is.null(rt)) {
181
182 # Get retention times of both positive and negative mode tabs
183 mspos_rt <- .self$.parse_retention_times(molid, .XLS_MSPOS_TAB)
184 msneg_rt <- .self$.parse_retention_times(molid, .XLS_MSNEG_TAB)
185
186 # Retention times stored in negative and positive modes
187 if ( ! is.null(mspos_rt) && ! is.null(msneg_rt)) {
188
189 # Warn observers when both retention time lists are not identical
190 if ( ! identical(mspos_rt, msneg_rt))
191 for (obs in .self$.observers)
192 obs$warning(paste0("Retention times in negative and positive modes are different in file ", .self$.get.file(molid), "."))
193
194 # Merge both lists
195 rt <- mspos_rt
196 for (c in names(msneg_rt))
197 if (c %in% names(rt)) {
198 v <- c(rt[[c]], msneg_rt[[c]])
199 rt[[c]] <- v[ ! duplicated(v)]
200 }
201 else
202 rt[[c]] <- msneg_rt[[c]]
203 }
204 else
205 # Set retention times
206 rt <- if (is.null(mspos_rt)) msneg_rt else mspos_rt
207
208 if (is.null(rt)) rt <- list()
209
210 # Write in cache
211 if ( ! is.na(cache_file)) {
212
213 # Call observers
214 if ( ! is.null(.self$.observers))
215 for (obs in .self$.observers)
216 obs$progress(paste0("Caching retention times of file ", .self$.get.file(molid), "."))
217
218 save(rt, file = cache_file)
219 }
220 }
221
222 # Store in memory
223 .self$.mem.set(rt, molid, 'rt')
224 }
225
226 # Select only one column if asked
227 if ( ! is.na(col)) rt <- rt[[col]]
228
229 return(rt)
230 })
231
232 #################
233 # GET NB PEAKS #
234 #################
235
236 MsXlsDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) {
237
238 # Initialize parameters
239 if (is.null(molid) || (length(molid) == 1 && is.na(molid)))
240 molid <- .self$getMoleculeIds()
241 if (is.na(type))
242 type <- c(MSDB.TAG.POS, MSDB.TAG.NEG)
243
244 return(sum(vapply(molid, function(m) { if (is.na(m)) 0 else sum(vapply(type, function(t) { peaks <- .self$.get.peaks(m, t) ; if (is.null(peaks)) 0 else nrow(peaks) }, FUN.VALUE = 1)) }, FUN.VALUE = 1)))
245 })
246
247 ##################
248 # GET PEAK TABLE #
249 ##################
250
251 MsXlsDb$methods( getPeakTable = function(molid = NA_integer_, mode = NA_character_) {
252
253 peaks <- NULL
254
255 # Set default molecule IDs
256 if (is.null(molid) || (length(molid) == 1 && is.na(molid)))
257 molid <- .self$getMoleculeIds()
258
259 # Set default modes
260 if (is.null(mode) || (length(mode) == 1 && is.na(mode)))
261 mode <- c(MSDB.TAG.POS, MSDB.TAG.NEG)
262
263 # Loop on all molecules
264 for (mol in molid) {
265
266 # Loop on all modes
267 for (mod in mode) {
268 m.peaks <- .self$.get.peaks(mol, mod)
269 if ( ! is.null(m.peaks) && nrow(m.peaks) > 0) {
270 m.peaks[[MSDB.TAG.MOLID]] <- mol
271 m.peaks[[MSDB.TAG.MODE]] <- mod
272 peaks <- if (is.null(peaks)) m.peaks else rbind(peaks, m.peaks)
273 peaks <- df.move.col.first(peaks, c(MSDB.TAG.MOLID, MSDB.TAG.MODE))
274 }
275 }
276 }
277
278 return(peaks)
279 })
280
281 #################
282 # GET MZ VALUES #
283 #################
284
285 # Returns a numeric vector of all masses stored inside the database.
286 MsXlsDb$methods( getMzValues = function(mode = NULL) {
287
288 mz <- numeric()
289
290 # Get all mz values of all molecules
291 for(molid in .self$getMoleculeIds())
292 for (m in (if (is.null(mode) || is.na(mode)) c(MSDB.TAG.POS, MSDB.TAG.NEG) else mode))
293 mz <- c(mz, .self$.get.peaks(molid, m)[[MSDB.TAG.MZTHEO]])
294
295 # Remove duplicated
296 mz <- mz[ ! duplicated(mz)]
297
298 return(mz)
299 })
300
301 #############
302 # GET PEAKS #
303 #############
304
305 MsXlsDb$methods( .get.peaks = function(molid, mode) {
306
307 tab <- if (mode == MSDB.TAG.POS) .XLS_MSPOS_TAB else .XLS_MSNEG_TAB
308
309 # Find it in memory
310 peak_df <- .self$.mem.get(molid, 'peaks', mode)
311
312 if (is.null(peak_df)) {
313 # Call observers
314 if ( ! is.null(.self$.observers))
315 for (obs in .self$.observers)
316 obs$progress(paste0("Loading peaks of tab ", tab, " of file ", .self$.get.file(molid), "."), level = 2)
317
318 peak_df <- NULL
319
320 # Load from cache file
321 cache_file <- NA_character_
322 if ( ! is.na(.self$.get.cache.dir())) {
323 cache_file <- file.path(.self$.get.cache.dir(), paste0('peaks-', molid, '-', tab, '.csv'))
324 if (file.exists(cache_file))
325 peak_df <- read.csv(cache_file, header = TRUE, stringsAsFactors = FALSE)
326 }
327
328 # Read from XLS file, if not in cache
329 if (is.null(peak_df)) {
330
331 # Load tab (peaks start at row 8)
332 if (.self$.tab.exists(.self$.get.file(molid), tab)) {
333
334 peaks <- read.excel(.self$.get.file(molid), tab, start.row = .XLS_PEAKS_ROW_OFFSET, stringsAsFactors = FALSE)
335 if ( ! is.null(peaks))
336 peaks <- peaks[ ! is.na(peaks[.XLS_MZ_COL]), , drop = FALSE] # Remove rows where m/z is not defined. TODO maybe call observer for notify a line with non NA values but without m/z value.
337
338 # Instantiate peaks
339 if ( ! is.null(peaks) && nrow(peaks) > 0) {
340 peak_df <- peaks[1:length(peaks[[.XLS_MZ_COL]]), c(.XLS_MZ_COL, .XLS_THEORETICAL_MZ_COL, .XLS_INTENSITY_COL, .XLS_RELATIVE_COL, .XLS_COMPOSITION_COL, .XLS_ATTRIBUTION_COL), drop = FALSE]
341 colnames(peak_df) <- c(MSDB.TAG.MZEXP, MSDB.TAG.MZTHEO, MSDB.TAG.INT, MSDB.TAG.REL, MSDB.TAG.COMP, MSDB.TAG.ATTR)
342 }
343
344 # Set default data frame (important for cache file writing, because we need a correct header to be written in order for loading)
345 else {
346 peak_df <- data.frame(stringsAsFactors = FALSE)
347 peak_df[MSDB.TAG.MZEXP] <- numeric()
348 peak_df[MSDB.TAG.MZTHEO] <- numeric()
349 peak_df[MSDB.TAG.INT] <- numeric()
350 peak_df[MSDB.TAG.REL] <- numeric()
351 peak_df[MSDB.TAG.COMP] <- character()
352 peak_df[MSDB.TAG.ATTR] <- character()
353 }
354
355 if (is.null(peak_df)) peak_df <- data.frame()
356
357 # Write in cache
358 if ( ! is.na(cache_file)) {
359
360 # Call observers
361 if ( ! is.null(.self$.observers))
362 for (obs in .self$.observers)
363 obs$progress(paste0("Caching peaks of tab ", tab, " of file ", .self$.get.file(molid), "."))
364
365 write.csv(peak_df, cache_file, row.names = FALSE)
366 }
367 }
368 }
369
370 # Store in memory
371 .self$.mem.set(peak_df, molid, 'peaks', mode)
372 }
373
374 return(peak_df)
375 })
376
377 ##############################
378 # GET FULL MS PEAK M/Z INDEX #
379 ##############################
380
381 # Get mz index for full ions, creating it if necessary.
382 MsXlsDb$methods( .get.mz.index = function(mode) {
383
384 if (is.null(.self$.mz.index[[mode]])) {
385
386 # Initialize data frame
387 mzi <- data.frame(stringsAsFactors = FALSE)
388 mzi[MSDB.TAG.MZTHEO] <- numeric()
389 mzi[MSDB.TAG.MOLID] <- character()
390 mzi[MSDB.TAG.COMP] <- character()
391 mzi[MSDB.TAG.ATTR] <- character()
392
393 # Loop on all molecules
394 for(molid in .self$getMoleculeIds()) {
395
396 # Get all peaks of this molecule
397 peaks <- .self$.get.peaks(molid, mode)
398
399 # Remove rows whose mz is NA.
400 peaks <- peaks[ ! is.na(peaks[[MSDB.TAG.MZTHEO]]), ]
401
402 if (nrow(peaks) > 0) {
403
404 # Add id column
405 peaks[MSDB.TAG.MOLID] <- molid
406
407 # Append peaks
408 r <- nrow(mzi) + 1
409 rows <- r:(r+nrow(peaks)-1)
410 mzi[rows, ] <- peaks[colnames(mzi)]
411 }
412 }
413
414 # Sort by M/Z
415 sorted_indices <- order(mzi[[MSDB.TAG.MZTHEO]])
416
417 # Group in a data frame
418 .self$.mz.index[[mode]] <- mzi[sorted_indices, ]
419 }
420
421 return(.self$.mz.index[[mode]])
422 })
423
424 ######################
425 # SEARCH FOR MZ & RT #
426 ######################
427
428 MsXlsDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) {
429
430 # Search for m/z
431 results <- .self$.do.search.for.mz(mode, mz.low, mz.high)
432
433 # Filter on attributions
434 if ( ! is.null(attribs)) {
435 results <- results[results[[MSDB.TAG.ATTR]] %in% attribs, ]
436 }
437
438 # Filer on molecule IDs
439 if ( ! is.null(molids)) {
440 results <- results[results[[MSDB.TAG.MOLID]] %in% molids, ]
441 }
442
443 # Use retention time
444 if ( ! is.null(col) && ! is.null(rt.low) && ! is.null(rt.high)) {
445
446 # Get list of unique IDs
447 ids <- results[[MSDB.TAG.MOLID]]
448 ids <- ids[ ! duplicated(ids)]
449 rt <- .self$.search.for.rt(mols = ids, rt.low = rt.low, rt.high = rt.high, col = col)
450 results <- results[results[[MSDB.TAG.MOLID]] %in% rt[[MSDB.TAG.MOLID]], ]
451 results <- merge(results, rt)
452 }
453
454 return(results)
455 })
456
457 ##############################
458 # SEARCH FOR M/Z IN MS PEAKS #
459 ##############################
460
461 MsXlsDb$methods( .do.search.for.mz = function(mode, mz.low, mz.high) {
462
463 results <- data.frame(stringsAsFactors = FALSE)
464 results[MSDB.TAG.MZTHEO] <- numeric()
465 results[MSDB.TAG.MOLID] <- character()
466 results[MSDB.TAG.MOLNAMES] <- character()
467 results[MSDB.TAG.COMP] <- character()
468 results[MSDB.TAG.ATTR] <- character()
469
470 # Create m/z index
471 mz_index <- .self$.get.mz.index(mode)
472
473 # Find molecules
474 low_bound <- binary.search(mz.low, mz_index[[MSDB.TAG.MZTHEO]], lower = FALSE)
475 high_bound <- binary.search(mz.high, mz_index[[MSDB.TAG.MZTHEO]], lower = TRUE)
476
477 # Get results
478 if ( ! is.na(high_bound) && ! is.na(low_bound) && low_bound <= high_bound)
479 results <- mz_index[low_bound:high_bound,]
480
481 # Remove row names
482 rownames(results) <- NULL
483
484 return(results)
485 })
486
487 ################
488 # GET MOL NAME #
489 ################
490
491 MsXlsDb$methods( .get.mol.name = function(molid) {
492
493 if (is.na(molid))
494 return(NA_character_)
495
496 # Find it in memory
497 name <- .self$.mem.get(molid, 'name')
498
499 if (is.null(name)) {
500
501 # Load molecule
502 mol <- .self$.load.molecule(molid)
503
504 # Look for name in tabs
505 for (tab in c(.XLS_MSPOS_TAB, .XLS_MSNEG_TAB)) {
506 hdr <- mol[[tab]][['header']]
507 if ( ! is.null(hdr))
508 name <- hdr[[1]]
509 if ( ! is.null(name) && ! is.na(name)) break
510 }
511
512 # Store in memory
513 if (is.null(name)) name <- NA_character_
514 .self$.mem.set(name, molid, 'name')
515 }
516
517 return(name)
518 })
519
520 ##################
521 # GET NAME INDEX #
522 ##################
523
524 # Get name index.
525 MsXlsDb$methods( .get.name.index = function() {
526
527 if (is.null(.self$.name_index)) {
528
529 # Get names
530 names <- vapply(.self$getMoleculeIds(), function(id) toupper(.self$getMoleculeName(id)), FUN.VALUE = "")
531
532 # Get molecule IDs
533 id <- .self$getMoleculeIds()
534
535 # Sort by names
536 sorted_indices <- order(names)
537
538 # Group in a data frame
539 .self$.name_index <- data.frame(name = rbind(names)[, sorted_indices],
540 id = rbind(id)[, sorted_indices],
541 stringsAsFactors = FALSE)
542 }
543
544 return(.self$.name_index)
545 })
546
547 ##################
548 # INIT FILE LIST #
549 ##################
550
551 MsXlsDb$methods( .init.file.list = function() {
552
553 if (is.null(.self$.files)) {
554
555 # List all files
556 files <- Sys.glob(file.path(.self$.db_dir, '*.xls'))
557
558 # Limit the size of the database
559 if ( ! is.na(.self$.limit))
560 files <- head(files, .self$.limit)
561
562 # Get IDs
563 ids <- vapply(files, function(f) .extract_molecule_id_from_filename(f), FUN.VALUE = 1)
564
565 # Use ids as indices to build the vector of files
566 .files <<- rep(NA_character_, max(ids))
567 .files[ids] <<- files
568 }
569 })
570
571 #################
572 # GET CACHE DIR #
573 #################
574
575 MsXlsDb$methods( .get.cache.dir = function() {
576
577 if ( ! is.na(.self$.cache_dir) && ! file.exists(.self$.cache_dir))
578 dir.create(.self$.cache_dir)
579
580 return(.self$.cache_dir)
581 })
582
583 #################
584 # LOAD MOLECULE #
585 #################
586
587 MsXlsDb$methods( .load.molecule = function(molid) {
588
589 # Init local variables
590 mol <- NULL
591 cache_file <- NA_character_
592 excel_file <- .self$.get.file(molid)
593
594 # Call observers
595 if ( ! is.null(.self$.observers))
596 for (obs in .self$.observers)
597 obs$progress(paste0("Loading molecule ", molid, "."), level = 2)
598
599 # Load from cache
600 if ( ! is.na(.self$.get.cache.dir())) {
601 cache_file <- file.path(.self$.get.cache.dir(), paste0(molid, '.bin'))
602 if (file.exists(cache_file))
603 load(file = cache_file) # load mol variable
604 }
605
606 # Load from Excel file & write to cache
607 if (is.null(mol) && ! is.na(excel_file)) {
608
609 source(file.path(.THIS.FILE.PATH, 'excelhlp.R'), chdir = TRUE) # we use the path set when sourcing the file, since when calling this method, the current path could be different.
610
611 # Load from Excel file
612 for(tab in c(.XLS_MSPOS_TAB, .XLS_MSNEG_TAB)) {
613
614 # Test that tab exists
615 if (.self$.tab.exists(excel_file, tab)) {
616 header <- read.excel(excel_file, tab, start.row = 1, end.row = .XLS_PEAKS_ROW_OFFSET - 1, header = FALSE, stringsAsFactors = FALSE, trim.values = TRUE, col.index = c(1))[[1]]
617 peaks <- read.excel(excel_file, tab, start.row = .XLS_PEAKS_ROW_OFFSET)
618 mol[[tab]] <- list(header = header, peaks = peaks)
619 }
620
621 # Missing tab
622 else {
623 for (obs in .self$.observers)
624 obs$warning(paste0("No excel tab ", tab, " in file ", excel_file, "."))
625 }
626 }
627
628 # Write in cache
629 if ( ! is.na(cache_file)) {
630
631 # Call observers
632 if ( ! is.null(.self$.observers))
633 for (obs in .self$.observers)
634 obs$progress(paste0("Caching file ", excel_file, "."))
635
636 save(mol, file = cache_file)
637 }
638 }
639
640 return(mol)
641 })
642
643 ########################
644 # DOES EXCEL TAB EXIST #
645 ########################
646
647 MsXlsDb$methods( .tab.exists = function(file, tab) {
648
649 source(file.path(.THIS.FILE.PATH, 'excelhlp.R'), chdir = TRUE) # we use the path set when sourcing the file, since when calling this method, the current path could be different.
650
651 if ( ! tab.exists(file, tab)) {
652
653 # Warn observers
654 for (obs in .self$.observers)
655 obs$warning(paste0("No excel tab ", tab, " in file ", file, "."))
656
657 return(FALSE)
658 }
659
660 return(TRUE)
661 })
662
663 #########################
664 # PARSE RETENTION TIMES #
665 #########################
666
667 MsXlsDb$methods( .parse_retention_times = function(id, tab) {
668
669 rt <- NULL
670
671 if (.self$.tab.exists(.self$.get.file(id), tab)) {
672 peaks <- read.excel(.self$.get.file(id), tab, start.row = .XLS_PEAKS_ROW_OFFSET)
673
674 # Get retention times
675 if ( ! is.null(peaks) && length(peaks) > 0 && ! is.na(peaks[[1]][[1]]))
676 for (c in .XLS_PEAKS_RT_COL_START:length(names(peaks)))
677 if ( ! is.na(peaks[[c]][[1]])) {
678
679 # Check retention times of all different m/z peaks for the same column.
680 .self$.check_retention_times(id, tab, names(peaks)[[c]], peaks[[c]], sum( ! is.na(peaks[[1]])))
681
682 # Add retention time
683 # TODO The column names are transformed through the read.xlsx call. For instance:
684 # HPLC (C18) 25mn QTOF (Bis) --> HPLC..C18..25mn.QTOF..Bis.
685 # ZICpHILIC 150*5*2.1 Shimadzu-Exactive-42mn --> ZICpHILIC.150.5.2.1.Shimadzu.Exactive.42mn
686 # This can be an issue, since we loose the formating.
687 col_id <- names(peaks)[[c]]
688 time <- peaks[[c]][[1]] * 60 # Read and convert retention time in seconds.
689 if (is.null(rt) || ! col_id %in% names(rt))
690 rt[[col_id]] <- list(time)
691 else
692 rt[[col_id]] <- c(rt[[col_id]], time)
693 }
694 }
695
696 return(rt)
697 })
698
699 #########################
700 # CHECK RETENTION TIMES #
701 #########################
702
703 MsXlsDb$methods( .check_retention_times = function(id, tab_name, column_name, rt, n) {
704
705 if (n >= 1 && ! is.null(.self$.observers) && length(.self$.observers) > 0)
706
707 # Check column only if there is at least one value inside
708 if (sum( ! is.na(rt)) > 0)
709
710 # Loop on all values
711 for(i in 1:n) {
712
713 # Check that it's defined
714 if (i > 1 && is.na(rt[[i]]))
715 for (obs in .self$.observers)
716 obs$warning(paste0("Retention times undefined for column ", column_name, " at row ", i + .XLS_PEAKS_ROW_OFFSET, " of tab ", tab_name, " in file ", .self$.get.file(id), "."))
717
718 else if (i > 1)
719 # Check the value (it must be constant)
720 if (rt[[i-1]] != rt[[i]])
721 for (obs in .self$.observers)
722 obs$error(paste0("Retention times not constant for column ", column_name, " between row ", i - 1 + .XLS_PEAKS_ROW_OFFSET, " and row ", i + .XLS_PEAKS_ROW_OFFSET, "o tab", tab_name, "in file", .self$.get.file(id)))
723 }
724 })
725
726 ####################
727 # GET FILE FROM ID #
728 ####################
729
730 MsXlsDb$methods( .get.file = function(id) {
731
732 # List files
733 .self$.init.file.list()
734
735 return( if (id > 0 && id <= length(.self$.files)) .self$.files[id] else NA_character_)
736 })
737
738 ###########
739 # MEM GET #
740 ###########
741
742 # Get database data from memory
743 MsXlsDb$methods( .mem.get = function(molid, field, second.field = NA_character_) {
744
745 data <- .self$.db[[as.character(molid)]][[field]]
746
747 if ( ! is.na(second.field))
748 data <- data[[second.field]]
749
750 return(data)
751 })
752
753 ###########
754 # MEM SET #
755 ###########
756
757 # Set database data into memory
758 MsXlsDb$methods( .mem.set = function(data, molid, field, second.field = NA_character_) {
759
760 id <- as.character(molid)
761
762 # Create db
763 if (is.null(.self$.db))
764 .db <<- list()
765
766 # Create first level
767 if (is.null(.self$.db[[id]]))
768 .self$.db[[id]] <- list()
769
770 # Create second level
771 if ( ! is.na(second.field) && is.null(.self$.db[[id]][[field]]))
772 .self$.db[[id]][[field]] <- list()
773
774 # Store data
775 if (is.na(second.field)) {
776 .self$.db[[id]][[field]] <- data
777 } else {
778 .self$.db[[id]][[field]][[second.field]] <- data
779 }
780 })
781
782 #################
783 # SEARCH FOR RT #
784 #################
785
786 # Find molecules matching a certain retention time.
787 # col A list of chromatographic columns to use.
788 # rt.low The lower bound of the rt value.
789 # rt.high The higher bound of the rt value.
790 # mols A list of molecule IDs to process. If unset, then take all molecules.
791 # Return a data frame with the following columns: id, col, colrt.
792 MsXlsDb$methods( .search.for.rt = function(col, rt.low, rt.high, mols = NULL) {
793
794 # Use all molecules if no list is provided
795 if (is.null(mols))
796 mols <- .self$getMoleculeIds()
797
798 results <- data.frame(id = integer(), col = character(), colrt = double(), stringsAsFactors = FALSE)
799
800 # Loop on all molecules
801 for (molid in mols) {
802 no.col <- TRUE
803 for (c in col) {
804 molrts <- .self$getRetentionTimes(molid, c)
805 if ( ! is.null(molrts)) {
806 no.col <- FALSE
807 for (molrt in molrts) {
808 if (molrt >= rt.low && molrt <= rt.high) {
809 r <- nrow(results) + 1
810 results[r, ] <- c(id = molid, col = c, colrt = molrt)
811 }
812 }
813 }
814 }
815
816 if (no.col) {
817 r <- nrow(results) + 1
818 results[r, c(MSDB.TAG.MOLID)] <- c(id = molid)
819 }
820 }
821
822 return(results)
823 })
824
825 ############################
826 # EXTRACT ID FROM FILENAME #
827 ############################
828
829 .extract_molecule_id_from_filename <- function(filename) {
830
831 id <- NA_integer_
832
833 if ( ! is.na(filename)) {
834 g <- str_match(filename, "N(\\d+)[._-]")
835 if ( ! is.na(g[1,1]))
836 id <- as.numeric(g[1,2])
837 }
838
839 return(id)
840 }
841
842 } # end of load safe guard