lcmsmatching: MsXlsDb.R comparison

comparison MsXlsDb.R @ 6:f86fec07f392 draft default tip

planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce

author	prog
date	Fri, 22 Feb 2019 16:04:22 -0500
parents	fb9c0409d85c
children

comparison

equal deleted inserted replaced

-:fb9c0409d85c
+:f86fec07f392
-if ( ! exists('MsXlsDb')) { # Do not load again if already loaded
-	library('methods')
-	library('stringr')
-	source('msdb-common.R')
-	source('MsDb.R')
-	source('strhlp.R', chdir = TRUE)
-	source('dfhlp.R', chdir = TRUE)
-	source('search.R', chdir = TRUE)
-	source('excelhlp.R', chdir = TRUE)
-	#############
-	# CONSTANTS #
-	#############
-	.THIS.FILE.PATH <- getwd() # We suppose that the file has been sourced with the option chdir = TRUE
-	.XLS_PEAKS_ROW_OFFSET <- 8
-	.XLS_PEAKS_RT_COL_START <- 11
-	.XLS_MSPOS_TAB <- 'MS_POS'
-	.XLS_MSNEG_TAB <- 'MS_NEG'
-	.XLS_MZ_COL <- 1
-	.XLS_INTENSITY_COL <- 2
-	.XLS_RELATIVE_COL <- 3
-	.XLS_THEORETICAL_MZ_COL <- 5
-	.XLS_COMPOSITION_COL <- 8
-	.XLS_ATTRIBUTION_COL <- 9
-	#####################
-	# CLASS DECLARATION #
-	#####################
-	MsXlsDb <- setRefClass("MsXlsDb", contains = "MsDb", fields = list(.mz.index = "ANY", .name_index = "ANY", .db_dir = "character", .limit = "numeric", .files = "ANY", .cache_dir = "character", .db = "ANY"))
-	###############
-	# CONSTRUCTOR #
-	###############
-	MsXlsDb$methods( initialize = function(db_dir = NA_character_, limit = NA_integer_, cache_dir = NA_character_, cache = FALSE, ...) {
-		# Initialize members
-		                # TODO check that db_dir is not null neither na, and tests that it exists and is a directory.
-		.db_dir <<- if ( ! is.null(db_dir)) db_dir else NA_character_
-		.limit <<- if ( ! is.null(limit) && ! is.na(limit) && limit > 0) limit else NA_integer_
-		cache_dir <- if (cache && is.na(cache_dir) && ! is.na(db_dir)) file.path(db_dir, 'cache') else cache_dir
-		.cache_dir <<- if ( cache || ! is.null(cache_dir)) cache_dir else NA_character_
-		.files <<- NULL
-		.db <<- NULL
-		.mz.index <<- NULL
-		.name_index <<- NULL
-		callSuper(...)
-	})
-	####################
-	# GET MOLECULE IDS #
-	####################
-	MsXlsDb$methods( getMoleculeIds = function(max.results = NA_integer_) {
-		# Init file list
-		.self$.init.file.list()
-		# Get IDs
-		mol.ids <- as.integer(which( ! is.na(.self$.files)))
-		# Cut
-		if ( ! is.na(max.results) && length(mol.ids) > max.results)
-			mol.ids <- mol.ids[max.results, ]
-		return(mol.ids)
-	})
-	####################
-	# GET NB MOLECULES #
-	####################
-	# Returns a list of all molecule names
-	MsXlsDb$methods( getNbMolecules = function() {
-		return(length(.self$getMoleculeIds()))
-	})
-	#####################
-	# GET MOLECULE NAME #
-	#####################
-	MsXlsDb$methods( getMoleculeName = function(molid) {
-		return(vapply(molid, function(m) .self$.get.mol.name(m), FUN.VALUE = ""))
-	})
-	###############################
-	# GET CHROMATOGRAPHIC COLUMNS #
-	###############################
-	# Returns a list of all chromatographic columns used
-	MsXlsDb$methods( getChromCol = function(molid = NULL) {
-	    cn <- character()
-		# If no molecule IDs provided, then look at all molecules
-	    if	(is.null(molid))
-	    	molid <- .self$getMoleculeIds()
-		# Loop on molecules
-		for (mid in molid) {
-	    	rt <- .self$getRetentionTimes(mid)
-			if ( ! is.null(rt))
-				cn <- c(cn, names(rt))
-		}
-		# Remove duplicates
-		cn <- cn[ ! duplicated(cn)]
-		# Make data frame
-		cn <- data.frame(id = cn, title = cn, stringsAsFactors = FALSE)
-		return(cn)
-	})
-	################
-	# FIND BY NAME #
-	################
-	MsXlsDb$methods( findByName = function(name) {
-		# NULL entry
-		if (is.null(name))
-			return(NA_integer_)
-		# Initialize output list
-		ids <- NULL
-		for (n in name) {
-			id <- NA_integer_
-			if ( ! is.na(n)) {
-				# Get index
-				index <- .self$.get.name.index()
-				# Search for name in index
-				i <- binary.search(toupper(n), index[['name']])
-				id <- if (is.na(i)) NA_integer_ else index[i, 'id']
-			}
-			ids <- c(ids, id)
-		}
-		return(ids)
-	})
-	#######################
-	# GET RETENTION TIMES #
-	#######################
-	MsXlsDb$methods( getRetentionTimes = function(molid, col = NA_character_) {
-		if (is.null(molid) || is.na(molid))
-			return(NULL)
-		# Find it in memory
-		rt <- .self$.mem.get(molid, 'rt')
-		if (is.null(rt)) {
-			# Call observers
-			if ( ! is.null(.self$.observers))
-				for (obs in .self$.observers)
-					obs$progress(paste0("Loading retention times of file", .self$.get.file(molid), "."), level = 2)
-			rt <- NULL
-			# Load from cache file
-			cache_file <- NA_character_
-			if ( ! is.na(.self$.get.cache.dir())) {
-				cache_file <- file.path(.self$.get.cache.dir(), paste0('rt-', molid, '.bin'))
-				if (file.exists(cache_file))
-					load(file = cache_file) # load rt
-			}
-			if (is.null(rt)) {
-				# Get retention times of both positive and negative mode tabs
-				mspos_rt <- .self$.parse_retention_times(molid, .XLS_MSPOS_TAB)
-				msneg_rt <- .self$.parse_retention_times(molid, .XLS_MSNEG_TAB)
-				# Retention times stored in negative and positive modes
-				if ( ! is.null(mspos_rt) && ! is.null(msneg_rt)) {
-					# Warn observers when both retention time lists are not identical
-					if ( ! identical(mspos_rt, msneg_rt))
-						for (obs in .self$.observers)
-							obs$warning(paste0("Retention times in negative and positive modes are different in file ", .self$.get.file(molid), "."))
-					# Merge both lists
-					rt <- mspos_rt
-					for (c in names(msneg_rt))
-						if (c %in% names(rt)) {
-							v <- c(rt[[c]], msneg_rt[[c]])
-							rt[[c]] <- v[ ! duplicated(v)]
-						}
-						else
-							rt[[c]] <- msneg_rt[[c]]
-				}
-				else
-					# Set retention times
-					rt <- if (is.null(mspos_rt)) msneg_rt else mspos_rt
-				if (is.null(rt)) rt <- list()
-				# Write in cache
-				if ( ! is.na(cache_file)) {
-					# Call observers
-					if ( ! is.null(.self$.observers))
-						for (obs in .self$.observers)
-							obs$progress(paste0("Caching retention times of file ", .self$.get.file(molid), "."))
-					save(rt, file = cache_file)
-				}
-			}
-			# Store in memory
-			.self$.mem.set(rt, molid, 'rt')
-		}
-		# Select only one column if asked
-		if ( ! is.na(col)) rt <- rt[[col]]
-		return(rt)
-	})
-	#################
-	# GET NB PEAKS #
-	#################
-	MsXlsDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) {
-		# Initialize parameters
-		if (is.null(molid) || (length(molid) == 1 && is.na(molid)))
-			molid <- .self$getMoleculeIds()
-		if (is.na(type))
-			type <- c(MSDB.TAG.POS, MSDB.TAG.NEG)
-		return(sum(vapply(molid, function(m) { if (is.na(m)) 0 else sum(vapply(type, function(t) { peaks <- .self$.get.peaks(m, t) ; if (is.null(peaks)) 0 else nrow(peaks) }, FUN.VALUE = 1)) }, FUN.VALUE = 1)))
-	})
-	##################
-	# GET PEAK TABLE #
-	##################
-	MsXlsDb$methods( getPeakTable = function(molid = NA_integer_, mode = NA_character_) {
-		peaks <- NULL
-		# Set default molecule IDs
-		if (is.null(molid) || (length(molid) == 1 && is.na(molid)))
-			molid <- .self$getMoleculeIds()
-		# Set default modes
-		if (is.null(mode) || (length(mode) == 1 && is.na(mode)))
-			mode <- c(MSDB.TAG.POS, MSDB.TAG.NEG)
-		# Loop on all molecules
-		for (mol in molid) {
-			# Loop on all modes
-			for (mod in mode) {
-				m.peaks <- .self$.get.peaks(mol, mod)
-				if ( ! is.null(m.peaks) && nrow(m.peaks) > 0) {
-					m.peaks[[MSDB.TAG.MOLID]] <- mol
-					m.peaks[[MSDB.TAG.MODE]] <- mod
-					peaks <- if (is.null(peaks)) m.peaks else rbind(peaks, m.peaks)
-					peaks <- df.move.col.first(peaks, c(MSDB.TAG.MOLID, MSDB.TAG.MODE))
-				}
-			}
-		}
-		return(peaks)
-	})
-	#################
-	# GET MZ VALUES #
-	#################
-	# Returns a numeric vector of all masses stored inside the database.
-	MsXlsDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {
-		mz <- numeric()
-		# Get all mz values of all molecules
-		for(molid in .self$getMoleculeIds())
-			for (m in (if (is.null(mode) || is.na(mode)) c(MSDB.TAG.POS, MSDB.TAG.NEG) else mode))
-				mz <- c(mz, .self$.get.peaks(molid, m)[[MSDB.TAG.MZTHEO]])
-		# Remove duplicated
-		mz <- mz[ ! duplicated(mz)]
-		# Apply cut-off
-		if ( ! is.na(max.results))
-			mz <- mz[1:max.results]
-		return(mz)
-	})
-	#############
-	# GET PEAKS #
-	#############
-	MsXlsDb$methods( .get.peaks = function(molid, mode) {
-		tab <- if (mode == MSDB.TAG.POS) .XLS_MSPOS_TAB else .XLS_MSNEG_TAB
-		# Find it in memory
-		peak_df <- .self$.mem.get(molid, 'peaks', mode)
-		if (is.null(peak_df)) {
-			# Call observers
-			if ( ! is.null(.self$.observers))
-				for (obs in .self$.observers)
-					obs$progress(paste0("Loading peaks of tab ", tab, " of file ", .self$.get.file(molid), "."), level = 2)
-			peak_df <- NULL
-			# Load from cache file
-			cache_file <- NA_character_
-			if ( ! is.na(.self$.get.cache.dir())) {
-				cache_file <- file.path(.self$.get.cache.dir(), paste0('peaks-', molid, '-', tab, '.csv'))
-				if (file.exists(cache_file))
-					peak_df <- read.csv(cache_file, header = TRUE, stringsAsFactors = FALSE)
-			}
-			# Read from XLS file, if not in cache
-			if (is.null(peak_df)) {
-				# Load tab (peaks start at row 8)
-				if (.self$.tab.exists(.self$.get.file(molid), tab)) {
-					peaks <- read.excel(.self$.get.file(molid), tab, start.row = .XLS_PEAKS_ROW_OFFSET, stringsAsFactors = FALSE)
-					if ( ! is.null(peaks))
-						peaks <- peaks[ ! is.na(peaks[.XLS_MZ_COL]), , drop = FALSE] # Remove rows where m/z is not defined. TODO maybe call observer for notify a line with non NA values but without m/z value.
-					# Instantiate peaks
-					if ( ! is.null(peaks) && nrow(peaks) > 0) {
-						peak_df <- peaks[1:length(peaks[[.XLS_MZ_COL]]), c(.XLS_MZ_COL, .XLS_THEORETICAL_MZ_COL, .XLS_INTENSITY_COL, .XLS_RELATIVE_COL, .XLS_COMPOSITION_COL, .XLS_ATTRIBUTION_COL), drop = FALSE]
-						colnames(peak_df) <- c(MSDB.TAG.MZEXP, MSDB.TAG.MZTHEO, MSDB.TAG.INT, MSDB.TAG.REL, MSDB.TAG.COMP, MSDB.TAG.ATTR)
-					}
-					# Set default data frame (important for cache file writing, because we need a correct header to be written in order for loading)
-					else {
-						peak_df <- data.frame(stringsAsFactors = FALSE)
-						peak_df[MSDB.TAG.MZEXP] <- numeric()
-				   		peak_df[MSDB.TAG.MZTHEO] <- numeric()
-				   		peak_df[MSDB.TAG.INT] <- numeric()
-				   		peak_df[MSDB.TAG.REL] <- numeric()
-				   		peak_df[MSDB.TAG.COMP] <- character()
-				   		peak_df[MSDB.TAG.ATTR] <- character()
-					}
-					if (is.null(peak_df)) peak_df <- data.frame()
-					# Write in cache
-					if ( ! is.na(cache_file)) {
-						# Call observers
-						if ( ! is.null(.self$.observers))
-							for (obs in .self$.observers)
-								obs$progress(paste0("Caching peaks of tab ", tab, " of file ", .self$.get.file(molid), "."))
-						write.csv(peak_df, cache_file, row.names = FALSE)
-					}
-				}
-			}
-			# Store in memory
-			.self$.mem.set(peak_df, molid, 'peaks', mode)
-		}
-		return(peak_df)
-	})
-	##############################
-	# GET FULL MS PEAK M/Z INDEX #
-	##############################
-	# Get mz index for full ions, creating it if necessary.
-	MsXlsDb$methods( .get.mz.index = function(mode) {
-		if (is.null(.self$.mz.index[[mode]])) {
-			# Initialize data frame
-			mzi <- data.frame(stringsAsFactors = FALSE)
-			mzi[MSDB.TAG.MZTHEO]    <- numeric()
-			mzi[MSDB.TAG.MOLID]     <- character()
-			mzi[MSDB.TAG.COMP]      <- character()
-			mzi[MSDB.TAG.ATTR]      <- character()
-			# Loop on all molecules
-			for(molid in .self$getMoleculeIds()) {
-				# Get all peaks of this molecule
-				peaks <- .self$.get.peaks(molid, mode)
-				# Remove rows whose mz is NA.
-				peaks <- peaks[ ! is.na(peaks[[MSDB.TAG.MZTHEO]]), ]
-				if (nrow(peaks) > 0) {
-					# Add id column
-					peaks[MSDB.TAG.MOLID] <- molid
-					# Append peaks
-					r <- nrow(mzi) + 1
-					rows <- r:(r+nrow(peaks)-1)
-					mzi[rows, ] <- peaks[colnames(mzi)]
-				}
-			}
-			# Sort by M/Z
-			sorted_indices <- order(mzi[[MSDB.TAG.MZTHEO]])
-			# Group in a data frame
-			.self$.mz.index[[mode]] <- mzi[sorted_indices, ]
-		}
-		return(.self$.mz.index[[mode]])
-	})
-	######################
-	# SEARCH FOR MZ & RT #
-	######################
-	MsXlsDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) {
-		# Search for m/z
-		results <- .self$.do.search.for.mz(mode, mz.low, mz.high)
-		# Filter on attributions
-		if ( ! is.null(attribs)) {
-			results <- results[results[[MSDB.TAG.ATTR]] %in% attribs, ]
-		}
-		# Filer on molecule IDs
-		if ( ! is.null(molids)) {
-			results <- results[results[[MSDB.TAG.MOLID]] %in% molids, ]
-		}
-		# Use retention time
-		if ( ! is.null(col) && ! is.null(rt.low) && ! is.null(rt.high)) {
-			# Get list of unique IDs
-			ids <- results[[MSDB.TAG.MOLID]]
-			ids <- ids[ ! duplicated(ids)]
-			rt <- .self$.search.for.rt(mols = ids, rt.low = rt.low, rt.high = rt.high, col = col)
-			results <- results[results[[MSDB.TAG.MOLID]] %in% rt[[MSDB.TAG.MOLID]], ]
-			results <- merge(results, rt)
-		}
-		return(results)
-	})
-	##############################
-	# SEARCH FOR M/Z IN MS PEAKS #
-	##############################
-	MsXlsDb$methods( .do.search.for.mz = function(mode, mz.low, mz.high) {
-		results <- data.frame(stringsAsFactors = FALSE)
-		results[MSDB.TAG.MZTHEO]    <- numeric()
-		results[MSDB.TAG.MOLID]     <- character()
-		results[MSDB.TAG.MOLNAMES]  <- character()
-		results[MSDB.TAG.COMP]      <- character()
-		results[MSDB.TAG.ATTR]      <- character()
-		# Create m/z index
-		mz_index <- .self$.get.mz.index(mode)
-		# Find molecules
-		low_bound <- binary.search(mz.low, mz_index[[MSDB.TAG.MZTHEO]], lower = FALSE)
-		high_bound <- binary.search(mz.high, mz_index[[MSDB.TAG.MZTHEO]], lower = TRUE)
-		# Get results
-		if ( ! is.na(high_bound) && ! is.na(low_bound) && low_bound <= high_bound)
-			results <- mz_index[low_bound:high_bound,]
-		# Remove row names
-		rownames(results) <- NULL
-		return(results)
-	})
-	################
-	# GET MOL NAME #
-	################
-	MsXlsDb$methods( .get.mol.name = function(molid) {
-		if (is.na(molid))
-			return(NA_character_)
-		# Find it in memory
-		name <- .self$.mem.get(molid, 'name')
-		if (is.null(name)) {
-			# Load molecule
-			mol <- .self$.load.molecule(molid)
-			# Look for name in tabs
-			for (tab in c(.XLS_MSPOS_TAB, .XLS_MSNEG_TAB)) {
-				hdr <- mol[[tab]][['header']]
-				if ( ! is.null(hdr))
-					name <- hdr[[1]]
-				if ( ! is.null(name) && ! is.na(name)) break
-			}
-			# Store in memory
-			if (is.null(name)) name <-  NA_character_
-			.self$.mem.set(name, molid, 'name')
-		}
-		return(name)
-	})
-	##################
-	# GET NAME INDEX #
-	##################
-	# Get name index.
-	MsXlsDb$methods( .get.name.index = function() {
-		if (is.null(.self$.name_index)) {
-			# Get names
-			names <- vapply(.self$getMoleculeIds(), function(id) toupper(.self$getMoleculeName(id)), FUN.VALUE = "")
-			# Get molecule IDs
-			id <- .self$getMoleculeIds()
-			# Sort by names
-			sorted_indices <- order(names)
-			# Group in a data frame
-			.self$.name_index <- data.frame(name = rbind(names)[, sorted_indices],
-			                                id = rbind(id)[, sorted_indices],
-			                                stringsAsFactors = FALSE)
-		}
-		return(.self$.name_index)
-	})
-	##################
-	# INIT FILE LIST #
-	##################
-	MsXlsDb$methods( .init.file.list = function() {
-		if (is.null(.self$.files)) {
-			# List all files
-			files <- Sys.glob(file.path(.self$.db_dir, '*.xls'))
-			# Limit the size of the database
-			if ( ! is.na(.self$.limit))
-				files <- head(files, .self$.limit)
-			# Get IDs
-			ids <- vapply(files, function(f) .extract_molecule_id_from_filename(f), FUN.VALUE = 1)
-			# Use ids as indices to build the vector of files
-			.files <<- rep(NA_character_, max(ids))
-			.files[ids] <<- files
-		}
-	})
-	#################
-	# GET CACHE DIR #
-	#################
-	MsXlsDb$methods( .get.cache.dir = function() {
-		if ( ! is.na(.self$.cache_dir) && ! file.exists(.self$.cache_dir))
-			dir.create(.self$.cache_dir)
-		return(.self$.cache_dir)
-	})
-	#################
-	# LOAD MOLECULE #
-	#################
-	MsXlsDb$methods( .load.molecule = function(molid) {
-		# Init local variables
-		mol <- NULL
-		cache_file <- NA_character_
-		excel_file <- .self$.get.file(molid)
-		# Call observers
-		if ( ! is.null(.self$.observers))
-			for (obs in .self$.observers)
-				obs$progress(paste0("Loading molecule ", molid, "."), level = 2)
-		# Load from cache
-		if ( ! is.na(.self$.get.cache.dir())) {
-			cache_file <- file.path(.self$.get.cache.dir(), paste0(molid, '.bin'))
-			if (file.exists(cache_file))
-				load(file = cache_file) # load mol variable
-		}
-		# Load from Excel file & write to cache
-		if (is.null(mol) && ! is.na(excel_file)) {
-			source(file.path(.THIS.FILE.PATH, 'excelhlp.R'), chdir = TRUE) # we use the path set when sourcing the file, since when calling this method, the current path could be different.
-			# Load from Excel file
-			for(tab in c(.XLS_MSPOS_TAB, .XLS_MSNEG_TAB)) {
-				# Test that tab exists
-				if (.self$.tab.exists(excel_file, tab)) {
-					header <- read.excel(excel_file, tab, start.row = 1, end.row = .XLS_PEAKS_ROW_OFFSET - 1, header = FALSE, stringsAsFactors = FALSE, trim.values = TRUE, col.index = c(1))[[1]]
-					peaks <- read.excel(excel_file, tab, start.row = .XLS_PEAKS_ROW_OFFSET)
-					mol[[tab]] <- list(header = header, peaks = peaks)
-				}
-				# Missing tab
-				else {
-					for (obs in .self$.observers)
-						obs$warning(paste0("No excel tab ", tab, " in file ", excel_file, "."))
-				}
-			}
-			# Write in cache
-			if ( ! is.na(cache_file)) {
-				# Call observers
-				if ( ! is.null(.self$.observers))
-					for (obs in .self$.observers)
-						obs$progress(paste0("Caching file ", excel_file, "."))
-				save(mol, file = cache_file)
-			}
-		}
-		return(mol)
-	})
-	########################
-	# DOES EXCEL TAB EXIST #
-	########################
-	MsXlsDb$methods( .tab.exists = function(file, tab) {
-		source(file.path(.THIS.FILE.PATH, 'excelhlp.R'), chdir = TRUE) # we use the path set when sourcing the file, since when calling this method, the current path could be different.
-		if ( ! tab.exists(file, tab)) {
-			# Warn observers
-			for (obs in .self$.observers)
-				obs$warning(paste0("No excel tab ", tab, " in file ", file, "."))
-			return(FALSE)
-		}
-		return(TRUE)
-	})
-	#########################
-	# PARSE RETENTION TIMES #
-	#########################
-	MsXlsDb$methods( .parse_retention_times = function(id, tab) {
-	    rt <- NULL
-		if (.self$.tab.exists(.self$.get.file(id), tab)) {
-			peaks <- read.excel(.self$.get.file(id), tab, start.row = .XLS_PEAKS_ROW_OFFSET)
-			# Get retention times
-			if ( ! is.null(peaks) && length(peaks) > 0 && ! is.na(peaks[[1]][[1]]))
-				for (c in .XLS_PEAKS_RT_COL_START:length(names(peaks)))
-					if ( ! is.na(peaks[[c]][[1]])) {
-						# Check retention times of all different m/z peaks for the same column.
-						.self$.check_retention_times(id, tab, names(peaks)[[c]], peaks[[c]], sum( ! is.na(peaks[[1]])))
-						# Add retention time
-						# TODO The column names are transformed through the read.xlsx call. For instance:
-						#           HPLC (C18) 25mn QTOF (Bis)                  --> HPLC..C18..25mn.QTOF..Bis.
-	  					#           ZICpHILIC 150*5*2.1 Shimadzu-Exactive-42mn  --> ZICpHILIC.150.5.2.1.Shimadzu.Exactive.42mn
-						# This can be an issue, since we loose the formating.
-						col_id <- names(peaks)[[c]]
-						time <- peaks[[c]][[1]] * 60 # Read and convert retention time in seconds.
-						if (is.null(rt) || ! col_id %in% names(rt))
-							rt[[col_id]] <- list(time)
-						else
-							rt[[col_id]] <- c(rt[[col_id]], time)
-					}
-		}
-		return(rt)
-	})
-	#########################
-	# CHECK RETENTION TIMES #
-	#########################
-	MsXlsDb$methods( .check_retention_times = function(id, tab_name, column_name, rt, n) {
-		if (n >= 1  && ! is.null(.self$.observers) && length(.self$.observers) > 0)
-			# Check column only if there is at least one value inside
-			if (sum( ! is.na(rt)) > 0)
-				# Loop on all values
-				for(i in 1:n) {
-					# Check that it's defined
-					if (i > 1 && is.na(rt[[i]]))
-						for (obs in .self$.observers)
-							obs$warning(paste0("Retention times undefined for column ", column_name, " at row ", i + .XLS_PEAKS_ROW_OFFSET, " of tab ", tab_name, " in file ", .self$.get.file(id), "."))
-					else if (i > 1)
-						# Check the value (it must be constant)
-						if (rt[[i-1]] != rt[[i]])
-							for (obs in .self$.observers)
-								obs$error(paste0("Retention times not constant for column ", column_name, " between row ", i - 1 + .XLS_PEAKS_ROW_OFFSET, " and row ", i + .XLS_PEAKS_ROW_OFFSET, "o tab", tab_name, "in file", .self$.get.file(id)))
-				}
-	})
-	####################
-	# GET FILE FROM ID #
-	####################
-	MsXlsDb$methods( .get.file = function(id) {
-		# List files
-		.self$.init.file.list()
-		return( if (id > 0 && id <= length(.self$.files)) .self$.files[id] else NA_character_)
-	})
-	###########
-	# MEM GET #
-	###########
-	# Get database data from memory
-	MsXlsDb$methods( .mem.get = function(molid, field, second.field = NA_character_) {
-		data <- .self$.db[[as.character(molid)]][[field]]
-		if ( ! is.na(second.field))
-			data <- data[[second.field]]
-		return(data)
-	})
-	###########
-	# MEM SET #
-	###########
-	# Set database data into memory
-	MsXlsDb$methods( .mem.set = function(data, molid, field, second.field = NA_character_) {
-		id <- as.character(molid)
-		# Create db
-		if (is.null(.self$.db))
-		    .db <<- list()
-		# Create first level
-		if (is.null(.self$.db[[id]]))
-			.self$.db[[id]] <- list()
-		# Create second level
-		if ( ! is.na(second.field) && is.null(.self$.db[[id]][[field]]))
-			.self$.db[[id]][[field]] <- list()
-		# Store data
-		if (is.na(second.field)) {
-			.self$.db[[id]][[field]] <- data
-		} else {
-			.self$.db[[id]][[field]][[second.field]] <- data
-		}
-	})
-	#################
-	# SEARCH FOR RT #
-	#################
-	# Find molecules matching a certain retention time.
-	# col           A list of chromatographic columns to use.
-	# rt.low        The lower bound of the rt value.
-	# rt.high       The higher bound of the rt value.
-	# mols          A list of molecule IDs to process. If unset, then take all molecules.
-	# Return a data frame with the following columns: id, col, colrt.
-	MsXlsDb$methods( .search.for.rt = function(col, rt.low, rt.high, mols = NULL) {
-		# Use all molecules if no list is provided
-		if (is.null(mols))
-			mols <- .self$getMoleculeIds()
-		results <- data.frame(id = integer(), col = character(), colrt = double(), stringsAsFactors = FALSE)
-		colnames(results) <- c(MSDB.TAG.MOLID, MSDB.TAG.COL, MSDB.TAG.COLRT)
-		# Loop on all molecules
-		for (molid in mols) {
-			no.col <- TRUE
-			for (c in col) {
-				molrts <- .self$getRetentionTimes(molid, c)
-				if ( ! is.null(molrts)) {
-					no.col <- FALSE
-					for (molrt in molrts) {
-						if (molrt >= rt.low && molrt <= rt.high) {
-							r <- nrow(results) + 1
-							results[r, ] <- c(id = molid, col = c, colrt = molrt)
-						}
-					}
-				}
-			}
-			if (no.col) {
-				r <- nrow(results) + 1
-				results[r, c(MSDB.TAG.MOLID)] <- c(id = molid)
-			}
-		}
-		return(results)
-	})
-	############################
-	# EXTRACT ID FROM FILENAME #
-	############################
-	.extract_molecule_id_from_filename <- function(filename) {
-		id <- NA_integer_
-		if ( ! is.na(filename)) {
-			g <- str_match(filename, "N(\\d+)[._-]")
-			if ( ! is.na(g[1,1]))
-				id <- as.numeric(g[1,2])
-		}
-		return(id)
-	}
-} # end of load safe guard

Mercurial > repos > prog > lcmsmatching

comparison MsXlsDb.R @ 6:f86fec07f392 draft default tip