comparison MsPeakForestDb.R @ 5:fb9c0409d85c draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
author prog
date Wed, 19 Apr 2017 10:00:05 -0400
parents b34c14151f25
children
comparison
equal deleted inserted replaced
4:b34c14151f25 5:fb9c0409d85c
1 if ( ! exists('MsPeakForestDb')) { # Do not load again if already loaded 1 if ( ! exists('MsPeakForestDb')) { # Do not load again if already loaded
2 2
3 library(methods) 3 library(methods)
4 source('MsDb.R') 4 source('MsDb.R')
5 source(file.path('UrlRequestScheduler.R')) 5 source('UrlRequestScheduler.R')
6 6
7 ##################### 7 #####################
8 # CLASS DECLARATION # 8 # CLASS DECLARATION #
9 ##################### 9 #####################
10 10
13 ############### 13 ###############
14 # CONSTRUCTOR # 14 # CONSTRUCTOR #
15 ############### 15 ###############
16 16
17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) { 17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) {
18
19 callSuper(...)
18 20
19 # Check URL 21 # Check URL
20 if (is.null(url) || is.na(url)) 22 if (is.null(url) || is.na(url))
21 stop("No URL defined for new MsPeakForestDb instance.") 23 stop("No URL defined for new MsPeakForestDb instance.")
22 24
24 url <- substring(url, nchar(url) - 1) 26 url <- substring(url, nchar(url) - 1)
25 .url <<- url 27 .url <<- url
26 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent) 28 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent)
27 .self$.url.scheduler$setVerbose(1L) 29 .self$.url.scheduler$setVerbose(1L)
28 .token <<- token 30 .token <<- token
29 31 .rt.unit <<- MSDB.RTUNIT.MIN
30 callSuper(...)
31 }) 32 })
32 33
33 ########### 34 ###########
34 # GET URL # 35 # GET URL #
35 ########### 36 ###########
44 url <- paste(.self$.url, url, sep = '/') 45 url <- paste(.self$.url, url, sep = '/')
45 46
46 # Add token 47 # Add token
47 if ( ! is.na(.self$.token)) 48 if ( ! is.na(.self$.token))
48 params <- c(params, token = .self$.token) 49 params <- c(params, token = .self$.token)
49 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '')
50 50
51 # Get URL 51 # Get URL
52 content <- .self$.url.scheduler$getUrl(url = url, params = params) 52 content <- .self$.url.scheduler$getUrl(url = url, params = params)
53 53
54 if (ret.type == 'json') { 54 if (ret.type == 'json') {
55 55
56 library(RJSONIO) 56 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE)
57 57
58 res <- fromJSON(content, nullValue = NA) 58 if (is.null(res)) {
59
60 if (class(res) == 'list' && 'success' %in% names(res) && res$success == FALSE) {
61 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') 59 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '')
62 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\".")) 60 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\"."))
63 } 61 }
64 } else { 62 } else {
65 if (ret.type == 'integer') { 63 if (ret.type == 'integer') {
66 if (grepl('^[0-9]+$', content, perl = TRUE)) 64 if (grepl('^[0-9]+$', content, perl = TRUE))
67 res <- as.integer(content) 65 res <- as.integer(content)
68 else { 66 else {
69 library(RJSONIO) 67 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE)
70 res <- fromJSON(content, nullValue = NA)
71 } 68 }
72 } 69 }
73 } 70 }
74 71
75 return(res) 72 return(res)
139 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params) 136 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params)
140 if (class(spectra) == 'list' && length(spectra) > 0) { 137 if (class(spectra) == 'list' && length(spectra) > 0) {
141 for (s in spectra) 138 for (s in spectra)
142 if (is.na(col) || s$liquidChromatography$columnCode %in% col) { 139 if (is.na(col) || s$liquidChromatography$columnCode %in% col) {
143 ret.time <- (s$RTmin + s$RTmax) / 2 140 ret.time <- (s$RTmin + s$RTmax) / 2
141 ret.time <- ret.time * 60 # Retention time are in minutes in Peakforest, but we want them in seconds
144 c <- s$liquidChromatography$columnCode 142 c <- s$liquidChromatography$columnCode
145 if (c %in% names(rt)) { 143 if (c %in% names(rt)) {
146 if ( ! ret.time %in% rt[[c]]) 144 if ( ! ret.time %in% rt[[c]])
147 rt[[c]] <- c(rt[[c]], ret.time) 145 rt[[c]] <- c(rt[[c]], ret.time)
148 } else 146 } else
260 258
261 # Build result data frame 259 # Build result data frame
262 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character()) 260 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character())
263 for (x in spectra) { 261 for (x in spectra) {
264 if ('source' %in% names(x) && is.list(x$source)) 262 if ('source' %in% names(x) && is.list(x$source))
265 mztheo <- if ('theoricalMass' %in% names(x)) as.numeric(x$theoricalMass) else NA_real_ 263 mztheo <- if ('mz' %in% names(x) && ! is.null(x$mz)) as.numeric(x$mz) else NA_real_
266 comp <- if ('composition' %in% names(x)) x$composition else NA_character_ 264 comp <- if ('composition' %in% names(x) && ! is.null(x$composition)) x$composition else NA_character_
267 attr <- if ('attribution' %in% names(x)) x$attribution else NA_character_ 265 attr <- if ('attribution' %in% names(x) && ! is.null(x$attribution)) x$attribution else NA_character_
268 if ('listOfCompounds' %in% names(x$source)) { 266 if ('listOfCompounds' %in% names(x$source)) {
269 molids <- vapply(x$source$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = '') 267 molids <- vapply(x$source$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '')
270 molnames <- vapply(x$source$listOfCompounds, function(c) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP), FUN.VALUE = '') 268 molnames <- vapply(x$source$listOfCompounds, function(c) if ('names' %in% names(c) && ! is.null(c$names)) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP) else NA_character_, FUN.VALUE = '')
271 mass <- vapply(x$source$listOfCompounds, function(c) as.character(c$averageMass), FUN.VALUE = '') 269 mass <- vapply(x$source$listOfCompounds, function(c) if ( ! 'averageMass' %in% names(c) || is.null(c$averageMass)) NA_real_ else as.double(c$averageMass), FUN.VALUE = 0.0)
272 inchi <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChI), FUN.VALUE = '') 270 inchi <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChI' %in% names(c) || is.null(c$inChI)) NA_character_ else as.character(c$inChI), FUN.VALUE = '')
273 inchikey <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChIKey), FUN.VALUE = '') 271 inchikey <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChIKey' %in% names(c) || is.null(c$inChIKey)) NA_character_ else as.character(c$inChIKey), FUN.VALUE = '')
274 chebi <- vapply(x$source$listOfCompounds, function(c) as.character(c$ChEBI), FUN.VALUE = '') 272 chebi <- vapply(x$source$listOfCompounds, function(c) if ('ChEBI' %in% names(c) && ! is.null(c$ChEBI)) as.character(c$ChEBI) else NA_character_, FUN.VALUE = '')
275 chebi[chebi == 'CHEBI:null'] <- NA_character_ 273 chebi[chebi == 'CHEBI:null'] <- NA_character_
276 hmdb <- vapply(x$source$listOfCompounds, function(c) as.character(c$HMDB), FUN.VALUE = '') 274 hmdb <- vapply(x$source$listOfCompounds, function(c) if ('HMDB' %in% names(c) && ! is.null(c$HMDB)) as.character(c$HMDB) else NA_character_, FUN.VALUE = '')
277 hmdb[hmdb == 'HMDBnull'] <- NA_character_ 275 hmdb[hmdb == 'HMDBnull'] <- NA_character_
278 kegg <- vapply(x$source$listOfCompounds, function(c) as.character(c$KEGG), FUN.VALUE = '') 276 kegg <- vapply(x$source$listOfCompounds, function(c) if ( ! 'KEGG' %in% names(c) || is.null(c$KEGG)) NA_character_ else as.character(c$KEGG), FUN.VALUE = '')
279 pubchem <- vapply(x$source$listOfCompounds, function(c) as.character(c$PubChemCID), FUN.VALUE = '') 277 pubchem <- vapply(x$source$listOfCompounds, function(c) if ( ! 'PubChemCID' %in% names(c) || is.null(c$PubChemCID)) NA_character_ else as.character(c$PubChemCID), FUN.VALUE = '')
280 if (length(molids) > 0 && length(molids) == length(molnames)) 278 if (length(molids) > 0 && length(molids) == length(molnames))
281 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE)) 279 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE))
282 } 280 }
283 } 281 }
284 282
286 if ( ! is.null(rt.low) && ! is.null(rt.high)) { 284 if ( ! is.null(rt.low) && ! is.null(rt.high)) {
287 285
288 rt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric()) 286 rt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric())
289 287
290 if (nrow(results) > 0) { 288 if (nrow(results) > 0) {
289
291 # Build URL for rt search 290 # Build URL for rt search
292 url <- paste0('spectra/lcms/range-rt-min/', rt.low, '/', rt.high) 291 url <- paste0('spectra/lcms/range-rt-min/', rt.low / 60, '/', rt.high / 60)
293 params <- NULL 292 params <- NULL
294 if ( ! is.null(col)) 293 if ( ! is.null(col))
295 params <- c(columns = paste(col, collapse = ',')) 294 params <- c(columns = paste(col, collapse = ','))
296 295
297 # Run query 296 # Run query
298 rtspectra <- .self$.get.url(url = url, params = params) 297 rtspectra <- .self$.get.url(url = url, params = params)
299 298
300 # Get compound/molecule IDs 299 # Get compound/molecule IDs
301 for (x in spectra) 300 for (x in rtspectra)
302 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = vapply(x$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = ''), 301 if (all(c('listOfCompounds', 'liquidChromatography') %in% names(x))) {
303 MSDB.TAG.COL = as.character(x$liquidChromatography$columnCode), 302 molids <- vapply(x$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '')
304 MSDB.TAG.COLRT = (as.numeric(x$RTmin) + as.numeric(x$RTmax)) / 2, 303 if (length(molids) > 0) {
305 stringsAsFactors = FALSE)) 304 col <- if ('columnCode' %in% names(x$liquidChromatography) && ! is.null(x$liquidChromatography$columnCode)) as.character(x$liquidChromatography$columnCode) else NA_character_
305 rtmin <- if ('RTmin' %in% names(x) && ! is.null(x$RTmin)) as.double(x$RTmin) else NA_real_
306 rtmax <- if ('RTmax' %in% names(x) && ! is.null(x$RTmax)) as.double(x$RTmax) else NA_real_
307 colrt <- (rtmin + rtmax) / 2
308 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = molids,
309 MSDB.TAG.COL = col,
310 MSDB.TAG.COLRT = colrt * 60,
311 stringsAsFactors = FALSE))
312 }
313 }
306 } 314 }
307 315
308 # Add retention times and column info 316 # Add retention times and column info
309 results <- merge(results, rt.res) 317 results <- merge(results, rt.res)
310 } 318 }