comparison MsXlsDb.R @ 6:f86fec07f392 draft default tip

planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author prog
date Fri, 22 Feb 2019 16:04:22 -0500
parents fb9c0409d85c
children
comparison
equal deleted inserted replaced
5:fb9c0409d85c 6:f86fec07f392
1 if ( ! exists('MsXlsDb')) { # Do not load again if already loaded
2
3 library('methods')
4 library('stringr')
5 source('msdb-common.R')
6 source('MsDb.R')
7 source('strhlp.R', chdir = TRUE)
8 source('dfhlp.R', chdir = TRUE)
9 source('search.R', chdir = TRUE)
10 source('excelhlp.R', chdir = TRUE)
11
12 #############
13 # CONSTANTS #
14 #############
15
16 .THIS.FILE.PATH <- getwd() # We suppose that the file has been sourced with the option chdir = TRUE
17
18 .XLS_PEAKS_ROW_OFFSET <- 8
19 .XLS_PEAKS_RT_COL_START <- 11
20 .XLS_MSPOS_TAB <- 'MS_POS'
21 .XLS_MSNEG_TAB <- 'MS_NEG'
22 .XLS_MZ_COL <- 1
23 .XLS_INTENSITY_COL <- 2
24 .XLS_RELATIVE_COL <- 3
25 .XLS_THEORETICAL_MZ_COL <- 5
26 .XLS_COMPOSITION_COL <- 8
27 .XLS_ATTRIBUTION_COL <- 9
28
29 #####################
30 # CLASS DECLARATION #
31 #####################
32
33 MsXlsDb <- setRefClass("MsXlsDb", contains = "MsDb", fields = list(.mz.index = "ANY", .name_index = "ANY", .db_dir = "character", .limit = "numeric", .files = "ANY", .cache_dir = "character", .db = "ANY"))
34
35 ###############
36 # CONSTRUCTOR #
37 ###############
38
39 MsXlsDb$methods( initialize = function(db_dir = NA_character_, limit = NA_integer_, cache_dir = NA_character_, cache = FALSE, ...) {
40
41 # Initialize members
42 # TODO check that db_dir is not null neither na, and tests that it exists and is a directory.
43 .db_dir <<- if ( ! is.null(db_dir)) db_dir else NA_character_
44 .limit <<- if ( ! is.null(limit) && ! is.na(limit) && limit > 0) limit else NA_integer_
45 cache_dir <- if (cache && is.na(cache_dir) && ! is.na(db_dir)) file.path(db_dir, 'cache') else cache_dir
46 .cache_dir <<- if ( cache || ! is.null(cache_dir)) cache_dir else NA_character_
47 .files <<- NULL
48 .db <<- NULL
49 .mz.index <<- NULL
50 .name_index <<- NULL
51
52 callSuper(...)
53 })
54
55 ####################
56 # GET MOLECULE IDS #
57 ####################
58
59 MsXlsDb$methods( getMoleculeIds = function(max.results = NA_integer_) {
60
61 # Init file list
62 .self$.init.file.list()
63
64 # Get IDs
65 mol.ids <- as.integer(which( ! is.na(.self$.files)))
66
67 # Cut
68 if ( ! is.na(max.results) && length(mol.ids) > max.results)
69 mol.ids <- mol.ids[max.results, ]
70
71 return(mol.ids)
72 })
73
74 ####################
75 # GET NB MOLECULES #
76 ####################
77
78 # Returns a list of all molecule names
79 MsXlsDb$methods( getNbMolecules = function() {
80 return(length(.self$getMoleculeIds()))
81 })
82
83 #####################
84 # GET MOLECULE NAME #
85 #####################
86
87 MsXlsDb$methods( getMoleculeName = function(molid) {
88 return(vapply(molid, function(m) .self$.get.mol.name(m), FUN.VALUE = ""))
89 })
90
91 ###############################
92 # GET CHROMATOGRAPHIC COLUMNS #
93 ###############################
94
95 # Returns a list of all chromatographic columns used
96 MsXlsDb$methods( getChromCol = function(molid = NULL) {
97
98 cn <- character()
99
100 # If no molecule IDs provided, then look at all molecules
101 if (is.null(molid))
102 molid <- .self$getMoleculeIds()
103
104 # Loop on molecules
105 for (mid in molid) {
106
107 rt <- .self$getRetentionTimes(mid)
108
109 if ( ! is.null(rt))
110 cn <- c(cn, names(rt))
111 }
112
113 # Remove duplicates
114 cn <- cn[ ! duplicated(cn)]
115
116 # Make data frame
117 cn <- data.frame(id = cn, title = cn, stringsAsFactors = FALSE)
118
119 return(cn)
120 })
121
122 ################
123 # FIND BY NAME #
124 ################
125
126 MsXlsDb$methods( findByName = function(name) {
127
128 # NULL entry
129 if (is.null(name))
130 return(NA_integer_)
131
132 # Initialize output list
133 ids <- NULL
134
135 for (n in name) {
136
137 id <- NA_integer_
138
139 if ( ! is.na(n)) {
140
141 # Get index
142 index <- .self$.get.name.index()
143
144 # Search for name in index
145 i <- binary.search(toupper(n), index[['name']])
146
147 id <- if (is.na(i)) NA_integer_ else index[i, 'id']
148 }
149
150 ids <- c(ids, id)
151 }
152
153 return(ids)
154 })
155
156 #######################
157 # GET RETENTION TIMES #
158 #######################
159
160 MsXlsDb$methods( getRetentionTimes = function(molid, col = NA_character_) {
161
162 if (is.null(molid) || is.na(molid))
163 return(NULL)
164
165 # Find it in memory
166 rt <- .self$.mem.get(molid, 'rt')
167
168 if (is.null(rt)) {
169
170 # Call observers
171 if ( ! is.null(.self$.observers))
172 for (obs in .self$.observers)
173 obs$progress(paste0("Loading retention times of file", .self$.get.file(molid), "."), level = 2)
174
175 rt <- NULL
176
177 # Load from cache file
178 cache_file <- NA_character_
179 if ( ! is.na(.self$.get.cache.dir())) {
180 cache_file <- file.path(.self$.get.cache.dir(), paste0('rt-', molid, '.bin'))
181 if (file.exists(cache_file))
182 load(file = cache_file) # load rt
183 }
184
185 if (is.null(rt)) {
186
187 # Get retention times of both positive and negative mode tabs
188 mspos_rt <- .self$.parse_retention_times(molid, .XLS_MSPOS_TAB)
189 msneg_rt <- .self$.parse_retention_times(molid, .XLS_MSNEG_TAB)
190
191 # Retention times stored in negative and positive modes
192 if ( ! is.null(mspos_rt) && ! is.null(msneg_rt)) {
193
194 # Warn observers when both retention time lists are not identical
195 if ( ! identical(mspos_rt, msneg_rt))
196 for (obs in .self$.observers)
197 obs$warning(paste0("Retention times in negative and positive modes are different in file ", .self$.get.file(molid), "."))
198
199 # Merge both lists
200 rt <- mspos_rt
201 for (c in names(msneg_rt))
202 if (c %in% names(rt)) {
203 v <- c(rt[[c]], msneg_rt[[c]])
204 rt[[c]] <- v[ ! duplicated(v)]
205 }
206 else
207 rt[[c]] <- msneg_rt[[c]]
208 }
209 else
210 # Set retention times
211 rt <- if (is.null(mspos_rt)) msneg_rt else mspos_rt
212
213 if (is.null(rt)) rt <- list()
214
215 # Write in cache
216 if ( ! is.na(cache_file)) {
217
218 # Call observers
219 if ( ! is.null(.self$.observers))
220 for (obs in .self$.observers)
221 obs$progress(paste0("Caching retention times of file ", .self$.get.file(molid), "."))
222
223 save(rt, file = cache_file)
224 }
225 }
226
227 # Store in memory
228 .self$.mem.set(rt, molid, 'rt')
229 }
230
231 # Select only one column if asked
232 if ( ! is.na(col)) rt <- rt[[col]]
233
234 return(rt)
235 })
236
237 #################
238 # GET NB PEAKS #
239 #################
240
241 MsXlsDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) {
242
243 # Initialize parameters
244 if (is.null(molid) || (length(molid) == 1 && is.na(molid)))
245 molid <- .self$getMoleculeIds()
246 if (is.na(type))
247 type <- c(MSDB.TAG.POS, MSDB.TAG.NEG)
248
249 return(sum(vapply(molid, function(m) { if (is.na(m)) 0 else sum(vapply(type, function(t) { peaks <- .self$.get.peaks(m, t) ; if (is.null(peaks)) 0 else nrow(peaks) }, FUN.VALUE = 1)) }, FUN.VALUE = 1)))
250 })
251
252 ##################
253 # GET PEAK TABLE #
254 ##################
255
256 MsXlsDb$methods( getPeakTable = function(molid = NA_integer_, mode = NA_character_) {
257
258 peaks <- NULL
259
260 # Set default molecule IDs
261 if (is.null(molid) || (length(molid) == 1 && is.na(molid)))
262 molid <- .self$getMoleculeIds()
263
264 # Set default modes
265 if (is.null(mode) || (length(mode) == 1 && is.na(mode)))
266 mode <- c(MSDB.TAG.POS, MSDB.TAG.NEG)
267
268 # Loop on all molecules
269 for (mol in molid) {
270
271 # Loop on all modes
272 for (mod in mode) {
273 m.peaks <- .self$.get.peaks(mol, mod)
274 if ( ! is.null(m.peaks) && nrow(m.peaks) > 0) {
275 m.peaks[[MSDB.TAG.MOLID]] <- mol
276 m.peaks[[MSDB.TAG.MODE]] <- mod
277 peaks <- if (is.null(peaks)) m.peaks else rbind(peaks, m.peaks)
278 peaks <- df.move.col.first(peaks, c(MSDB.TAG.MOLID, MSDB.TAG.MODE))
279 }
280 }
281 }
282
283 return(peaks)
284 })
285
286 #################
287 # GET MZ VALUES #
288 #################
289
290 # Returns a numeric vector of all masses stored inside the database.
291 MsXlsDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {
292
293 mz <- numeric()
294
295 # Get all mz values of all molecules
296 for(molid in .self$getMoleculeIds())
297 for (m in (if (is.null(mode) || is.na(mode)) c(MSDB.TAG.POS, MSDB.TAG.NEG) else mode))
298 mz <- c(mz, .self$.get.peaks(molid, m)[[MSDB.TAG.MZTHEO]])
299
300 # Remove duplicated
301 mz <- mz[ ! duplicated(mz)]
302
303 # Apply cut-off
304 if ( ! is.na(max.results))
305 mz <- mz[1:max.results]
306
307 return(mz)
308 })
309
310 #############
311 # GET PEAKS #
312 #############
313
314 MsXlsDb$methods( .get.peaks = function(molid, mode) {
315
316 tab <- if (mode == MSDB.TAG.POS) .XLS_MSPOS_TAB else .XLS_MSNEG_TAB
317
318 # Find it in memory
319 peak_df <- .self$.mem.get(molid, 'peaks', mode)
320
321 if (is.null(peak_df)) {
322 # Call observers
323 if ( ! is.null(.self$.observers))
324 for (obs in .self$.observers)
325 obs$progress(paste0("Loading peaks of tab ", tab, " of file ", .self$.get.file(molid), "."), level = 2)
326
327 peak_df <- NULL
328
329 # Load from cache file
330 cache_file <- NA_character_
331 if ( ! is.na(.self$.get.cache.dir())) {
332 cache_file <- file.path(.self$.get.cache.dir(), paste0('peaks-', molid, '-', tab, '.csv'))
333 if (file.exists(cache_file))
334 peak_df <- read.csv(cache_file, header = TRUE, stringsAsFactors = FALSE)
335 }
336
337 # Read from XLS file, if not in cache
338 if (is.null(peak_df)) {
339
340 # Load tab (peaks start at row 8)
341 if (.self$.tab.exists(.self$.get.file(molid), tab)) {
342
343 peaks <- read.excel(.self$.get.file(molid), tab, start.row = .XLS_PEAKS_ROW_OFFSET, stringsAsFactors = FALSE)
344 if ( ! is.null(peaks))
345 peaks <- peaks[ ! is.na(peaks[.XLS_MZ_COL]), , drop = FALSE] # Remove rows where m/z is not defined. TODO maybe call observer for notify a line with non NA values but without m/z value.
346
347 # Instantiate peaks
348 if ( ! is.null(peaks) && nrow(peaks) > 0) {
349 peak_df <- peaks[1:length(peaks[[.XLS_MZ_COL]]), c(.XLS_MZ_COL, .XLS_THEORETICAL_MZ_COL, .XLS_INTENSITY_COL, .XLS_RELATIVE_COL, .XLS_COMPOSITION_COL, .XLS_ATTRIBUTION_COL), drop = FALSE]
350 colnames(peak_df) <- c(MSDB.TAG.MZEXP, MSDB.TAG.MZTHEO, MSDB.TAG.INT, MSDB.TAG.REL, MSDB.TAG.COMP, MSDB.TAG.ATTR)
351 }
352
353 # Set default data frame (important for cache file writing, because we need a correct header to be written in order for loading)
354 else {
355 peak_df <- data.frame(stringsAsFactors = FALSE)
356 peak_df[MSDB.TAG.MZEXP] <- numeric()
357 peak_df[MSDB.TAG.MZTHEO] <- numeric()
358 peak_df[MSDB.TAG.INT] <- numeric()
359 peak_df[MSDB.TAG.REL] <- numeric()
360 peak_df[MSDB.TAG.COMP] <- character()
361 peak_df[MSDB.TAG.ATTR] <- character()
362 }
363
364 if (is.null(peak_df)) peak_df <- data.frame()
365
366 # Write in cache
367 if ( ! is.na(cache_file)) {
368
369 # Call observers
370 if ( ! is.null(.self$.observers))
371 for (obs in .self$.observers)
372 obs$progress(paste0("Caching peaks of tab ", tab, " of file ", .self$.get.file(molid), "."))
373
374 write.csv(peak_df, cache_file, row.names = FALSE)
375 }
376 }
377 }
378
379 # Store in memory
380 .self$.mem.set(peak_df, molid, 'peaks', mode)
381 }
382
383 return(peak_df)
384 })
385
386 ##############################
387 # GET FULL MS PEAK M/Z INDEX #
388 ##############################
389
390 # Get mz index for full ions, creating it if necessary.
391 MsXlsDb$methods( .get.mz.index = function(mode) {
392
393 if (is.null(.self$.mz.index[[mode]])) {
394
395 # Initialize data frame
396 mzi <- data.frame(stringsAsFactors = FALSE)
397 mzi[MSDB.TAG.MZTHEO] <- numeric()
398 mzi[MSDB.TAG.MOLID] <- character()
399 mzi[MSDB.TAG.COMP] <- character()
400 mzi[MSDB.TAG.ATTR] <- character()
401
402 # Loop on all molecules
403 for(molid in .self$getMoleculeIds()) {
404
405 # Get all peaks of this molecule
406 peaks <- .self$.get.peaks(molid, mode)
407
408 # Remove rows whose mz is NA.
409 peaks <- peaks[ ! is.na(peaks[[MSDB.TAG.MZTHEO]]), ]
410
411 if (nrow(peaks) > 0) {
412
413 # Add id column
414 peaks[MSDB.TAG.MOLID] <- molid
415
416 # Append peaks
417 r <- nrow(mzi) + 1
418 rows <- r:(r+nrow(peaks)-1)
419 mzi[rows, ] <- peaks[colnames(mzi)]
420 }
421 }
422
423 # Sort by M/Z
424 sorted_indices <- order(mzi[[MSDB.TAG.MZTHEO]])
425
426 # Group in a data frame
427 .self$.mz.index[[mode]] <- mzi[sorted_indices, ]
428 }
429
430 return(.self$.mz.index[[mode]])
431 })
432
433 ######################
434 # SEARCH FOR MZ & RT #
435 ######################
436
437 MsXlsDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) {
438
439 # Search for m/z
440 results <- .self$.do.search.for.mz(mode, mz.low, mz.high)
441
442 # Filter on attributions
443 if ( ! is.null(attribs)) {
444 results <- results[results[[MSDB.TAG.ATTR]] %in% attribs, ]
445 }
446
447 # Filer on molecule IDs
448 if ( ! is.null(molids)) {
449 results <- results[results[[MSDB.TAG.MOLID]] %in% molids, ]
450 }
451
452 # Use retention time
453 if ( ! is.null(col) && ! is.null(rt.low) && ! is.null(rt.high)) {
454
455 # Get list of unique IDs
456 ids <- results[[MSDB.TAG.MOLID]]
457 ids <- ids[ ! duplicated(ids)]
458 rt <- .self$.search.for.rt(mols = ids, rt.low = rt.low, rt.high = rt.high, col = col)
459 results <- results[results[[MSDB.TAG.MOLID]] %in% rt[[MSDB.TAG.MOLID]], ]
460 results <- merge(results, rt)
461 }
462
463 return(results)
464 })
465
466 ##############################
467 # SEARCH FOR M/Z IN MS PEAKS #
468 ##############################
469
470 MsXlsDb$methods( .do.search.for.mz = function(mode, mz.low, mz.high) {
471
472 results <- data.frame(stringsAsFactors = FALSE)
473 results[MSDB.TAG.MZTHEO] <- numeric()
474 results[MSDB.TAG.MOLID] <- character()
475 results[MSDB.TAG.MOLNAMES] <- character()
476 results[MSDB.TAG.COMP] <- character()
477 results[MSDB.TAG.ATTR] <- character()
478
479 # Create m/z index
480 mz_index <- .self$.get.mz.index(mode)
481
482 # Find molecules
483 low_bound <- binary.search(mz.low, mz_index[[MSDB.TAG.MZTHEO]], lower = FALSE)
484 high_bound <- binary.search(mz.high, mz_index[[MSDB.TAG.MZTHEO]], lower = TRUE)
485
486 # Get results
487 if ( ! is.na(high_bound) && ! is.na(low_bound) && low_bound <= high_bound)
488 results <- mz_index[low_bound:high_bound,]
489
490 # Remove row names
491 rownames(results) <- NULL
492
493 return(results)
494 })
495
496 ################
497 # GET MOL NAME #
498 ################
499
500 MsXlsDb$methods( .get.mol.name = function(molid) {
501
502 if (is.na(molid))
503 return(NA_character_)
504
505 # Find it in memory
506 name <- .self$.mem.get(molid, 'name')
507
508 if (is.null(name)) {
509
510 # Load molecule
511 mol <- .self$.load.molecule(molid)
512
513 # Look for name in tabs
514 for (tab in c(.XLS_MSPOS_TAB, .XLS_MSNEG_TAB)) {
515 hdr <- mol[[tab]][['header']]
516 if ( ! is.null(hdr))
517 name <- hdr[[1]]
518 if ( ! is.null(name) && ! is.na(name)) break
519 }
520
521 # Store in memory
522 if (is.null(name)) name <- NA_character_
523 .self$.mem.set(name, molid, 'name')
524 }
525
526 return(name)
527 })
528
529 ##################
530 # GET NAME INDEX #
531 ##################
532
533 # Get name index.
534 MsXlsDb$methods( .get.name.index = function() {
535
536 if (is.null(.self$.name_index)) {
537
538 # Get names
539 names <- vapply(.self$getMoleculeIds(), function(id) toupper(.self$getMoleculeName(id)), FUN.VALUE = "")
540
541 # Get molecule IDs
542 id <- .self$getMoleculeIds()
543
544 # Sort by names
545 sorted_indices <- order(names)
546
547 # Group in a data frame
548 .self$.name_index <- data.frame(name = rbind(names)[, sorted_indices],
549 id = rbind(id)[, sorted_indices],
550 stringsAsFactors = FALSE)
551 }
552
553 return(.self$.name_index)
554 })
555
556 ##################
557 # INIT FILE LIST #
558 ##################
559
560 MsXlsDb$methods( .init.file.list = function() {
561
562 if (is.null(.self$.files)) {
563
564 # List all files
565 files <- Sys.glob(file.path(.self$.db_dir, '*.xls'))
566
567 # Limit the size of the database
568 if ( ! is.na(.self$.limit))
569 files <- head(files, .self$.limit)
570
571 # Get IDs
572 ids <- vapply(files, function(f) .extract_molecule_id_from_filename(f), FUN.VALUE = 1)
573
574 # Use ids as indices to build the vector of files
575 .files <<- rep(NA_character_, max(ids))
576 .files[ids] <<- files
577 }
578 })
579
580 #################
581 # GET CACHE DIR #
582 #################
583
584 MsXlsDb$methods( .get.cache.dir = function() {
585
586 if ( ! is.na(.self$.cache_dir) && ! file.exists(.self$.cache_dir))
587 dir.create(.self$.cache_dir)
588
589 return(.self$.cache_dir)
590 })
591
592 #################
593 # LOAD MOLECULE #
594 #################
595
596 MsXlsDb$methods( .load.molecule = function(molid) {
597
598 # Init local variables
599 mol <- NULL
600 cache_file <- NA_character_
601 excel_file <- .self$.get.file(molid)
602
603 # Call observers
604 if ( ! is.null(.self$.observers))
605 for (obs in .self$.observers)
606 obs$progress(paste0("Loading molecule ", molid, "."), level = 2)
607
608 # Load from cache
609 if ( ! is.na(.self$.get.cache.dir())) {
610 cache_file <- file.path(.self$.get.cache.dir(), paste0(molid, '.bin'))
611 if (file.exists(cache_file))
612 load(file = cache_file) # load mol variable
613 }
614
615 # Load from Excel file & write to cache
616 if (is.null(mol) && ! is.na(excel_file)) {
617
618 source(file.path(.THIS.FILE.PATH, 'excelhlp.R'), chdir = TRUE) # we use the path set when sourcing the file, since when calling this method, the current path could be different.
619
620 # Load from Excel file
621 for(tab in c(.XLS_MSPOS_TAB, .XLS_MSNEG_TAB)) {
622
623 # Test that tab exists
624 if (.self$.tab.exists(excel_file, tab)) {
625 header <- read.excel(excel_file, tab, start.row = 1, end.row = .XLS_PEAKS_ROW_OFFSET - 1, header = FALSE, stringsAsFactors = FALSE, trim.values = TRUE, col.index = c(1))[[1]]
626 peaks <- read.excel(excel_file, tab, start.row = .XLS_PEAKS_ROW_OFFSET)
627 mol[[tab]] <- list(header = header, peaks = peaks)
628 }
629
630 # Missing tab
631 else {
632 for (obs in .self$.observers)
633 obs$warning(paste0("No excel tab ", tab, " in file ", excel_file, "."))
634 }
635 }
636
637 # Write in cache
638 if ( ! is.na(cache_file)) {
639
640 # Call observers
641 if ( ! is.null(.self$.observers))
642 for (obs in .self$.observers)
643 obs$progress(paste0("Caching file ", excel_file, "."))
644
645 save(mol, file = cache_file)
646 }
647 }
648
649 return(mol)
650 })
651
652 ########################
653 # DOES EXCEL TAB EXIST #
654 ########################
655
656 MsXlsDb$methods( .tab.exists = function(file, tab) {
657
658 source(file.path(.THIS.FILE.PATH, 'excelhlp.R'), chdir = TRUE) # we use the path set when sourcing the file, since when calling this method, the current path could be different.
659
660 if ( ! tab.exists(file, tab)) {
661
662 # Warn observers
663 for (obs in .self$.observers)
664 obs$warning(paste0("No excel tab ", tab, " in file ", file, "."))
665
666 return(FALSE)
667 }
668
669 return(TRUE)
670 })
671
672 #########################
673 # PARSE RETENTION TIMES #
674 #########################
675
676 MsXlsDb$methods( .parse_retention_times = function(id, tab) {
677
678 rt <- NULL
679
680 if (.self$.tab.exists(.self$.get.file(id), tab)) {
681 peaks <- read.excel(.self$.get.file(id), tab, start.row = .XLS_PEAKS_ROW_OFFSET)
682
683 # Get retention times
684 if ( ! is.null(peaks) && length(peaks) > 0 && ! is.na(peaks[[1]][[1]]))
685 for (c in .XLS_PEAKS_RT_COL_START:length(names(peaks)))
686 if ( ! is.na(peaks[[c]][[1]])) {
687
688 # Check retention times of all different m/z peaks for the same column.
689 .self$.check_retention_times(id, tab, names(peaks)[[c]], peaks[[c]], sum( ! is.na(peaks[[1]])))
690
691 # Add retention time
692 # TODO The column names are transformed through the read.xlsx call. For instance:
693 # HPLC (C18) 25mn QTOF (Bis) --> HPLC..C18..25mn.QTOF..Bis.
694 # ZICpHILIC 150*5*2.1 Shimadzu-Exactive-42mn --> ZICpHILIC.150.5.2.1.Shimadzu.Exactive.42mn
695 # This can be an issue, since we loose the formating.
696 col_id <- names(peaks)[[c]]
697 time <- peaks[[c]][[1]] * 60 # Read and convert retention time in seconds.
698 if (is.null(rt) || ! col_id %in% names(rt))
699 rt[[col_id]] <- list(time)
700 else
701 rt[[col_id]] <- c(rt[[col_id]], time)
702 }
703 }
704
705 return(rt)
706 })
707
708 #########################
709 # CHECK RETENTION TIMES #
710 #########################
711
712 MsXlsDb$methods( .check_retention_times = function(id, tab_name, column_name, rt, n) {
713
714 if (n >= 1 && ! is.null(.self$.observers) && length(.self$.observers) > 0)
715
716 # Check column only if there is at least one value inside
717 if (sum( ! is.na(rt)) > 0)
718
719 # Loop on all values
720 for(i in 1:n) {
721
722 # Check that it's defined
723 if (i > 1 && is.na(rt[[i]]))
724 for (obs in .self$.observers)
725 obs$warning(paste0("Retention times undefined for column ", column_name, " at row ", i + .XLS_PEAKS_ROW_OFFSET, " of tab ", tab_name, " in file ", .self$.get.file(id), "."))
726
727 else if (i > 1)
728 # Check the value (it must be constant)
729 if (rt[[i-1]] != rt[[i]])
730 for (obs in .self$.observers)
731 obs$error(paste0("Retention times not constant for column ", column_name, " between row ", i - 1 + .XLS_PEAKS_ROW_OFFSET, " and row ", i + .XLS_PEAKS_ROW_OFFSET, "o tab", tab_name, "in file", .self$.get.file(id)))
732 }
733 })
734
735 ####################
736 # GET FILE FROM ID #
737 ####################
738
739 MsXlsDb$methods( .get.file = function(id) {
740
741 # List files
742 .self$.init.file.list()
743
744 return( if (id > 0 && id <= length(.self$.files)) .self$.files[id] else NA_character_)
745 })
746
747 ###########
748 # MEM GET #
749 ###########
750
751 # Get database data from memory
752 MsXlsDb$methods( .mem.get = function(molid, field, second.field = NA_character_) {
753
754 data <- .self$.db[[as.character(molid)]][[field]]
755
756 if ( ! is.na(second.field))
757 data <- data[[second.field]]
758
759 return(data)
760 })
761
762 ###########
763 # MEM SET #
764 ###########
765
766 # Set database data into memory
767 MsXlsDb$methods( .mem.set = function(data, molid, field, second.field = NA_character_) {
768
769 id <- as.character(molid)
770
771 # Create db
772 if (is.null(.self$.db))
773 .db <<- list()
774
775 # Create first level
776 if (is.null(.self$.db[[id]]))
777 .self$.db[[id]] <- list()
778
779 # Create second level
780 if ( ! is.na(second.field) && is.null(.self$.db[[id]][[field]]))
781 .self$.db[[id]][[field]] <- list()
782
783 # Store data
784 if (is.na(second.field)) {
785 .self$.db[[id]][[field]] <- data
786 } else {
787 .self$.db[[id]][[field]][[second.field]] <- data
788 }
789 })
790
791 #################
792 # SEARCH FOR RT #
793 #################
794
795 # Find molecules matching a certain retention time.
796 # col A list of chromatographic columns to use.
797 # rt.low The lower bound of the rt value.
798 # rt.high The higher bound of the rt value.
799 # mols A list of molecule IDs to process. If unset, then take all molecules.
800 # Return a data frame with the following columns: id, col, colrt.
801 MsXlsDb$methods( .search.for.rt = function(col, rt.low, rt.high, mols = NULL) {
802
803 # Use all molecules if no list is provided
804 if (is.null(mols))
805 mols <- .self$getMoleculeIds()
806
807 results <- data.frame(id = integer(), col = character(), colrt = double(), stringsAsFactors = FALSE)
808 colnames(results) <- c(MSDB.TAG.MOLID, MSDB.TAG.COL, MSDB.TAG.COLRT)
809
810 # Loop on all molecules
811 for (molid in mols) {
812 no.col <- TRUE
813 for (c in col) {
814 molrts <- .self$getRetentionTimes(molid, c)
815 if ( ! is.null(molrts)) {
816 no.col <- FALSE
817 for (molrt in molrts) {
818 if (molrt >= rt.low && molrt <= rt.high) {
819 r <- nrow(results) + 1
820 results[r, ] <- c(id = molid, col = c, colrt = molrt)
821 }
822 }
823 }
824 }
825
826 if (no.col) {
827 r <- nrow(results) + 1
828 results[r, c(MSDB.TAG.MOLID)] <- c(id = molid)
829 }
830 }
831
832 return(results)
833 })
834
835 ############################
836 # EXTRACT ID FROM FILENAME #
837 ############################
838
839 .extract_molecule_id_from_filename <- function(filename) {
840
841 id <- NA_integer_
842
843 if ( ! is.na(filename)) {
844 g <- str_match(filename, "N(\\d+)[._-]")
845 if ( ! is.na(g[1,1]))
846 id <- as.numeric(g[1,2])
847 }
848
849 return(id)
850 }
851
852 } # end of load safe guard