Mercurial > repos > prog > lcmsmatching
diff MsPeakForestDb.R @ 5:fb9c0409d85c draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
author | prog |
---|---|
date | Wed, 19 Apr 2017 10:00:05 -0400 |
parents | b34c14151f25 |
children |
line wrap: on
line diff
--- a/MsPeakForestDb.R Tue Mar 14 12:40:22 2017 -0400 +++ b/MsPeakForestDb.R Wed Apr 19 10:00:05 2017 -0400 @@ -2,7 +2,7 @@ library(methods) source('MsDb.R') - source(file.path('UrlRequestScheduler.R')) + source('UrlRequestScheduler.R') ##################### # CLASS DECLARATION # @@ -16,6 +16,8 @@ MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) { + callSuper(...) + # Check URL if (is.null(url) || is.na(url)) stop("No URL defined for new MsPeakForestDb instance.") @@ -26,8 +28,7 @@ .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent) .self$.url.scheduler$setVerbose(1L) .token <<- token - - callSuper(...) + .rt.unit <<- MSDB.RTUNIT.MIN }) ########### @@ -46,18 +47,15 @@ # Add token if ( ! is.na(.self$.token)) params <- c(params, token = .self$.token) - param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') # Get URL content <- .self$.url.scheduler$getUrl(url = url, params = params) if (ret.type == 'json') { - library(RJSONIO) + res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) - res <- fromJSON(content, nullValue = NA) - - if (class(res) == 'list' && 'success' %in% names(res) && res$success == FALSE) { + if (is.null(res)) { param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') stop(paste0("Failed to run web service. URL was \"", url, param.str, "\".")) } @@ -66,8 +64,7 @@ if (grepl('^[0-9]+$', content, perl = TRUE)) res <- as.integer(content) else { - library(RJSONIO) - res <- fromJSON(content, nullValue = NA) + res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) } } } @@ -141,6 +138,7 @@ for (s in spectra) if (is.na(col) || s$liquidChromatography$columnCode %in% col) { ret.time <- (s$RTmin + s$RTmax) / 2 + ret.time <- ret.time * 60 # Retention time are in minutes in Peakforest, but we want them in seconds c <- s$liquidChromatography$columnCode if (c %in% names(rt)) { if ( ! ret.time %in% rt[[c]]) @@ -262,21 +260,21 @@ results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character()) for (x in spectra) { if ('source' %in% names(x) && is.list(x$source)) - mztheo <- if ('theoricalMass' %in% names(x)) as.numeric(x$theoricalMass) else NA_real_ - comp <- if ('composition' %in% names(x)) x$composition else NA_character_ - attr <- if ('attribution' %in% names(x)) x$attribution else NA_character_ + mztheo <- if ('mz' %in% names(x) && ! is.null(x$mz)) as.numeric(x$mz) else NA_real_ + comp <- if ('composition' %in% names(x) && ! is.null(x$composition)) x$composition else NA_character_ + attr <- if ('attribution' %in% names(x) && ! is.null(x$attribution)) x$attribution else NA_character_ if ('listOfCompounds' %in% names(x$source)) { - molids <- vapply(x$source$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = '') - molnames <- vapply(x$source$listOfCompounds, function(c) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP), FUN.VALUE = '') - mass <- vapply(x$source$listOfCompounds, function(c) as.character(c$averageMass), FUN.VALUE = '') - inchi <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChI), FUN.VALUE = '') - inchikey <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChIKey), FUN.VALUE = '') - chebi <- vapply(x$source$listOfCompounds, function(c) as.character(c$ChEBI), FUN.VALUE = '') + molids <- vapply(x$source$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') + molnames <- vapply(x$source$listOfCompounds, function(c) if ('names' %in% names(c) && ! is.null(c$names)) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP) else NA_character_, FUN.VALUE = '') + mass <- vapply(x$source$listOfCompounds, function(c) if ( ! 'averageMass' %in% names(c) || is.null(c$averageMass)) NA_real_ else as.double(c$averageMass), FUN.VALUE = 0.0) + inchi <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChI' %in% names(c) || is.null(c$inChI)) NA_character_ else as.character(c$inChI), FUN.VALUE = '') + inchikey <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChIKey' %in% names(c) || is.null(c$inChIKey)) NA_character_ else as.character(c$inChIKey), FUN.VALUE = '') + chebi <- vapply(x$source$listOfCompounds, function(c) if ('ChEBI' %in% names(c) && ! is.null(c$ChEBI)) as.character(c$ChEBI) else NA_character_, FUN.VALUE = '') chebi[chebi == 'CHEBI:null'] <- NA_character_ - hmdb <- vapply(x$source$listOfCompounds, function(c) as.character(c$HMDB), FUN.VALUE = '') + hmdb <- vapply(x$source$listOfCompounds, function(c) if ('HMDB' %in% names(c) && ! is.null(c$HMDB)) as.character(c$HMDB) else NA_character_, FUN.VALUE = '') hmdb[hmdb == 'HMDBnull'] <- NA_character_ - kegg <- vapply(x$source$listOfCompounds, function(c) as.character(c$KEGG), FUN.VALUE = '') - pubchem <- vapply(x$source$listOfCompounds, function(c) as.character(c$PubChemCID), FUN.VALUE = '') + kegg <- vapply(x$source$listOfCompounds, function(c) if ( ! 'KEGG' %in% names(c) || is.null(c$KEGG)) NA_character_ else as.character(c$KEGG), FUN.VALUE = '') + pubchem <- vapply(x$source$listOfCompounds, function(c) if ( ! 'PubChemCID' %in% names(c) || is.null(c$PubChemCID)) NA_character_ else as.character(c$PubChemCID), FUN.VALUE = '') if (length(molids) > 0 && length(molids) == length(molnames)) results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE)) } @@ -288,8 +286,9 @@ rt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric()) if (nrow(results) > 0) { + # Build URL for rt search - url <- paste0('spectra/lcms/range-rt-min/', rt.low, '/', rt.high) + url <- paste0('spectra/lcms/range-rt-min/', rt.low / 60, '/', rt.high / 60) params <- NULL if ( ! is.null(col)) params <- c(columns = paste(col, collapse = ',')) @@ -298,11 +297,20 @@ rtspectra <- .self$.get.url(url = url, params = params) # Get compound/molecule IDs - for (x in spectra) - rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = vapply(x$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = ''), - MSDB.TAG.COL = as.character(x$liquidChromatography$columnCode), - MSDB.TAG.COLRT = (as.numeric(x$RTmin) + as.numeric(x$RTmax)) / 2, - stringsAsFactors = FALSE)) + for (x in rtspectra) + if (all(c('listOfCompounds', 'liquidChromatography') %in% names(x))) { + molids <- vapply(x$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') + if (length(molids) > 0) { + col <- if ('columnCode' %in% names(x$liquidChromatography) && ! is.null(x$liquidChromatography$columnCode)) as.character(x$liquidChromatography$columnCode) else NA_character_ + rtmin <- if ('RTmin' %in% names(x) && ! is.null(x$RTmin)) as.double(x$RTmin) else NA_real_ + rtmax <- if ('RTmax' %in% names(x) && ! is.null(x$RTmax)) as.double(x$RTmax) else NA_real_ + colrt <- (rtmin + rtmax) / 2 + rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = molids, + MSDB.TAG.COL = col, + MSDB.TAG.COLRT = colrt * 60, + stringsAsFactors = FALSE)) + } + } } # Add retention times and column info