diff MsPeakForestDb.R @ 0:e66bb061af06 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author prog
date Tue, 12 Jul 2016 12:02:37 -0400
parents
children 253d531a0193
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MsPeakForestDb.R	Tue Jul 12 12:02:37 2016 -0400
@@ -0,0 +1,293 @@
+if ( ! exists('MsPeakForestDb')) { # Do not load again if already loaded
+
+	library(methods)
+	source('MsDb.R')
+	source(file.path('UrlRequestScheduler.R'))
+
+	#####################
+	# CLASS DECLARATION #
+	#####################
+	
+	MsPeakForestDb <- setRefClass("MsPeakForestDb", contains = "MsDb", fields = list(.url = "character", .url.scheduler = "ANY"))
+	
+	###############
+	# CONSTRUCTOR #
+	###############
+	
+	MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, ...) {
+
+		# Check URL
+		if (is.null(url) || is.na(url))
+		    stop("No URL defined for new MsPeakForestDb instance.")
+
+		.url <<- url
+		.url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent)
+		.self$.url.scheduler$setVerbose(1L)
+
+		callSuper(...)
+	})
+
+	###########
+	# GET URL #
+	###########
+
+	MsPeakForestDb$methods( .get.url = function(url, params = NULL, ret.type = 'json') {
+
+		res <- NULL
+
+		content <- .self$.url.scheduler$getUrl(url = url, params = params)
+
+		if (ret.type == 'json') {
+
+			library(RJSONIO)
+
+			res <- fromJSON(content, nullValue = NA)
+
+			if (class(res) == 'list' && 'success' %in% names(res) && res$success == FALSE) {
+				param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '')
+				stop(paste0("Failed to run web service. URL was \"", url, param.str, "\"."))
+			}
+		} else {
+			if (ret.type == 'integer') {
+				if (grepl('^[0-9]+$', content, perl = TRUE))
+					res <- as.integer(content)
+				else {
+					library(RJSONIO)
+					res <- fromJSON(content, nullValue = NA)
+				}
+			}
+		}
+
+		return(res)
+	})
+
+	####################
+	# GET MOLECULE IDS #
+	####################
+	
+	MsPeakForestDb$methods( getMoleculeIds = function() {
+
+		ids <- as.character(.self$.get.url(url = paste0(.self$.url, 'compounds/all/ids')))
+
+		return(ids)
+	})
+
+	####################
+	# GET NB MOLECULES #
+	####################
+	
+	MsPeakForestDb$methods( getNbMolecules = function() {
+
+		n <- .self$.get.url(url = paste0(.self$.url, 'compounds/all/count'), ret.type = 'integer')
+
+		return(n)
+	})
+	
+	###############################
+	# GET CHROMATOGRAPHIC COLUMNS #
+	###############################
+	
+	MsPeakForestDb$methods( getChromCol = function(molid = NULL) {
+
+		# Set URL
+		url <- paste0(.self$.url, 'metadata/lc/list-code-columns')
+		params <- NULL
+		if ( ! is.null(molid))
+			params <- list(molids = paste(molid, collapse = ','))
+
+		# Call webservice
+		wscols <- .self$.get.url(url = url, params = params)
+
+		# Build data frame
+		cols <- data.frame(id = character(), title = character())
+		for(id in names(wscols))
+			cols <- rbind(cols, data.frame(id = id, title = wscols[[id]]$name, stringsAsFactors = FALSE))
+
+		return(cols)
+	})
+	
+	#######################
+	# GET RETENTION TIMES #
+	#######################
+	
+	MsPeakForestDb$methods( getRetentionTimes = function(molid, col = NA_character_) {
+
+		if (is.null(molid) || is.na(molid) || length(molid)  != 1)
+			stop("The parameter molid must consist only in a single value.")
+
+		rt <- list()
+
+		# Set URL
+		url <- paste0(.self$.url, 'spectra/lcms/search')
+		params <- NULL
+		if ( ! is.null(molid))
+			params <- list(molids = paste(molid, collapse = ','))
+
+		# Call webservice
+		spectra <- .self$.get.url(url = url, params = params)
+		if (class(spectra) == 'list' && length(spectra) > 0) {
+			for (s in spectra)
+				if (is.na(col) || s$liquidChromatography$columnCode %in% col) {
+					ret.time <- (s$RTmin + s$RTmax) / 2
+					c <- s$liquidChromatography$columnCode
+					if (c %in% names(rt)) {
+						if ( ! ret.time %in% rt[[c]])
+							rt[[c]] <- c(rt[[c]], ret.time)
+					} else
+						rt[[c]] <- ret.time
+				}
+		}
+
+		return(rt)
+	})
+	
+	#####################
+	# GET MOLECULE NAME #
+	#####################
+
+	MsPeakForestDb$methods( getMoleculeName = function(molid) {
+
+		library(RJSONIO)
+
+		if (is.null(molid))
+			return(NA_character_)
+
+		# Initialize names
+		names <- as.character(molid)
+
+		# Get non NA values
+		non.na.molid <- molid[ ! is.na(molid)]
+
+		if (length(non.na.molid) > 0) {
+			# Set URL
+			url <- paste0(.self$.url, 'compounds/all/names')
+			params <- c(molids = paste(non.na.molid, collapse = ','))
+
+			# Call webservice
+			names[ ! is.na(molid)] <- .self$.get.url(url = url, params = params)
+		}
+
+		return(names)
+	})
+
+	################
+	# FIND BY NAME #
+	################
+
+	MsPeakForestDb$methods( findByName = function(name) {
+
+		if (is.null(name))
+			return(NA_character_)
+
+		ids <- list()
+
+		for (n in name) {
+
+			if (is.na(n))
+				ids <- c(ids, NA_character_)
+
+			else {
+				url <- paste0(.self$.url, 'search/compounds/name/', curlEscape(n))
+				compounds <- .self$.get.url(url = url)$compoundNames
+				ids <- c(ids, list(vapply(compounds, function(c) as.character(c$compound$id), FUN.VALUE = '')))
+			}
+		}
+
+		return(ids)
+	})
+
+	#################
+	# GET NB PEAKS #
+	#################
+	
+	MsPeakForestDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) {
+
+		# Build URL
+		url <- paste0(.self$.url, 'spectra/lcms/count-peaks')
+		params <- NULL
+		if ( ! is.na(type))
+			params <- c(params, mode = if (type == MSDB.TAG.POS) 'pos' else 'neg')
+		if ( ! is.null(molid) && (length(molid) > 1 || ! is.na(molid)))
+			params <- c(params, molids = paste(molid, collapse = ','))
+
+		# Run request
+		n <- .self$.get.url(url = url, params = params, ret.type = 'integer')
+
+		return(sum(n))
+	})
+	
+	#################
+	# GET MZ VALUES #
+	#################
+	
+	MsPeakForestDb$methods( getMzValues = function(mode = NULL) {
+
+		# Build URL
+		url <- paste0(.self$.url, 'spectra/lcms/peaks/list-mz')
+
+		# Query params
+		params <- NULL
+		if ( ! is.null(mode))
+			params <- c(params, mode = if (mode == MSDB.TAG.POS) 'positive' else 'negative')
+
+		# Get MZ valuels
+		mz <- .self$.get.url(url = url, params = params)
+
+		return(mz)
+	})
+
+	##############################
+	# DO SEARCH FOR MZ RT BOUNDS #
+	##############################
+
+	MsPeakForestDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) {
+
+		# Build URL for mz search
+		url <- paste0(.self$.url, 'spectra/lcms/peaks/get-range/', mz.low, '/', mz.high)
+
+		# Get spectra
+		spectra <- .self$.get.url(url = url)
+
+		# Build result data frame
+		results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character())
+		for (x in spectra)
+			results <- rbind(results, data.frame(MSDB.TAG.MOLID = vapply(x$source$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = ''),
+			                                     MSDB.TAG.MOLNAMES = vapply(x$source$listOfCompounds, function(c) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP), FUN.VALUE = ''),
+												 MSDB.TAG.MZTHEO = as.numeric(x$theoricalMass),
+												 MSDB.TAG.COMP = as.character(x$composition),
+												 MSDB.TAG.ATTR = as.character(x$attribution),
+												 stringsAsFactors = FALSE))
+
+		# RT search
+		if ( ! is.null(rt.low) && ! is.null(rt.high)) {
+
+			rt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric())
+
+			if (nrow(results) > 0) {
+				# Build URL for rt search
+				url <- paste0(.self$.url, 'spectra/lcms/range-rt-min/', rt.low, '/', rt.high)
+				params <- NULL
+				if ( ! is.null(col))
+					params <- c(columns = paste(col, collapse = ','))
+
+				# Run query
+				rtspectra <- .self$.get.url(url = url, params = params)
+
+				# Get compound/molecule IDs
+				for (x in spectra)
+					rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = vapply(x$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = ''),
+				                                   	   MSDB.TAG.COL = as.character(x$liquidChromatography$columnCode),
+				                                   	   MSDB.TAG.COLRT = (as.numeric(x$RTmin) + as.numeric(x$RTmax)) / 2,
+					                                   	   stringsAsFactors = FALSE))
+			}	
+
+			# Add retention times and column info
+			results <- merge(results, rt.res)
+		}
+		
+		# Rename columns with proper names
+		colnames(results) <- vapply(colnames(results), function(s) eval(parse(text=s)), FUN.VALUE = '')
+
+		return(results)
+	})
+}