Mercurial > repos > prog > lcmsmatching
comparison MsPeakForestDb.R @ 5:fb9c0409d85c draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
author | prog |
---|---|
date | Wed, 19 Apr 2017 10:00:05 -0400 |
parents | b34c14151f25 |
children |
comparison
equal
deleted
inserted
replaced
4:b34c14151f25 | 5:fb9c0409d85c |
---|---|
1 if ( ! exists('MsPeakForestDb')) { # Do not load again if already loaded | 1 if ( ! exists('MsPeakForestDb')) { # Do not load again if already loaded |
2 | 2 |
3 library(methods) | 3 library(methods) |
4 source('MsDb.R') | 4 source('MsDb.R') |
5 source(file.path('UrlRequestScheduler.R')) | 5 source('UrlRequestScheduler.R') |
6 | 6 |
7 ##################### | 7 ##################### |
8 # CLASS DECLARATION # | 8 # CLASS DECLARATION # |
9 ##################### | 9 ##################### |
10 | 10 |
13 ############### | 13 ############### |
14 # CONSTRUCTOR # | 14 # CONSTRUCTOR # |
15 ############### | 15 ############### |
16 | 16 |
17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) { | 17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) { |
18 | |
19 callSuper(...) | |
18 | 20 |
19 # Check URL | 21 # Check URL |
20 if (is.null(url) || is.na(url)) | 22 if (is.null(url) || is.na(url)) |
21 stop("No URL defined for new MsPeakForestDb instance.") | 23 stop("No URL defined for new MsPeakForestDb instance.") |
22 | 24 |
24 url <- substring(url, nchar(url) - 1) | 26 url <- substring(url, nchar(url) - 1) |
25 .url <<- url | 27 .url <<- url |
26 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent) | 28 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent) |
27 .self$.url.scheduler$setVerbose(1L) | 29 .self$.url.scheduler$setVerbose(1L) |
28 .token <<- token | 30 .token <<- token |
29 | 31 .rt.unit <<- MSDB.RTUNIT.MIN |
30 callSuper(...) | |
31 }) | 32 }) |
32 | 33 |
33 ########### | 34 ########### |
34 # GET URL # | 35 # GET URL # |
35 ########### | 36 ########### |
44 url <- paste(.self$.url, url, sep = '/') | 45 url <- paste(.self$.url, url, sep = '/') |
45 | 46 |
46 # Add token | 47 # Add token |
47 if ( ! is.na(.self$.token)) | 48 if ( ! is.na(.self$.token)) |
48 params <- c(params, token = .self$.token) | 49 params <- c(params, token = .self$.token) |
49 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') | |
50 | 50 |
51 # Get URL | 51 # Get URL |
52 content <- .self$.url.scheduler$getUrl(url = url, params = params) | 52 content <- .self$.url.scheduler$getUrl(url = url, params = params) |
53 | 53 |
54 if (ret.type == 'json') { | 54 if (ret.type == 'json') { |
55 | 55 |
56 library(RJSONIO) | 56 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) |
57 | 57 |
58 res <- fromJSON(content, nullValue = NA) | 58 if (is.null(res)) { |
59 | |
60 if (class(res) == 'list' && 'success' %in% names(res) && res$success == FALSE) { | |
61 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') | 59 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') |
62 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\".")) | 60 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\".")) |
63 } | 61 } |
64 } else { | 62 } else { |
65 if (ret.type == 'integer') { | 63 if (ret.type == 'integer') { |
66 if (grepl('^[0-9]+$', content, perl = TRUE)) | 64 if (grepl('^[0-9]+$', content, perl = TRUE)) |
67 res <- as.integer(content) | 65 res <- as.integer(content) |
68 else { | 66 else { |
69 library(RJSONIO) | 67 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) |
70 res <- fromJSON(content, nullValue = NA) | |
71 } | 68 } |
72 } | 69 } |
73 } | 70 } |
74 | 71 |
75 return(res) | 72 return(res) |
139 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params) | 136 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params) |
140 if (class(spectra) == 'list' && length(spectra) > 0) { | 137 if (class(spectra) == 'list' && length(spectra) > 0) { |
141 for (s in spectra) | 138 for (s in spectra) |
142 if (is.na(col) || s$liquidChromatography$columnCode %in% col) { | 139 if (is.na(col) || s$liquidChromatography$columnCode %in% col) { |
143 ret.time <- (s$RTmin + s$RTmax) / 2 | 140 ret.time <- (s$RTmin + s$RTmax) / 2 |
141 ret.time <- ret.time * 60 # Retention time are in minutes in Peakforest, but we want them in seconds | |
144 c <- s$liquidChromatography$columnCode | 142 c <- s$liquidChromatography$columnCode |
145 if (c %in% names(rt)) { | 143 if (c %in% names(rt)) { |
146 if ( ! ret.time %in% rt[[c]]) | 144 if ( ! ret.time %in% rt[[c]]) |
147 rt[[c]] <- c(rt[[c]], ret.time) | 145 rt[[c]] <- c(rt[[c]], ret.time) |
148 } else | 146 } else |
260 | 258 |
261 # Build result data frame | 259 # Build result data frame |
262 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character()) | 260 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character()) |
263 for (x in spectra) { | 261 for (x in spectra) { |
264 if ('source' %in% names(x) && is.list(x$source)) | 262 if ('source' %in% names(x) && is.list(x$source)) |
265 mztheo <- if ('theoricalMass' %in% names(x)) as.numeric(x$theoricalMass) else NA_real_ | 263 mztheo <- if ('mz' %in% names(x) && ! is.null(x$mz)) as.numeric(x$mz) else NA_real_ |
266 comp <- if ('composition' %in% names(x)) x$composition else NA_character_ | 264 comp <- if ('composition' %in% names(x) && ! is.null(x$composition)) x$composition else NA_character_ |
267 attr <- if ('attribution' %in% names(x)) x$attribution else NA_character_ | 265 attr <- if ('attribution' %in% names(x) && ! is.null(x$attribution)) x$attribution else NA_character_ |
268 if ('listOfCompounds' %in% names(x$source)) { | 266 if ('listOfCompounds' %in% names(x$source)) { |
269 molids <- vapply(x$source$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = '') | 267 molids <- vapply(x$source$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') |
270 molnames <- vapply(x$source$listOfCompounds, function(c) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP), FUN.VALUE = '') | 268 molnames <- vapply(x$source$listOfCompounds, function(c) if ('names' %in% names(c) && ! is.null(c$names)) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP) else NA_character_, FUN.VALUE = '') |
271 mass <- vapply(x$source$listOfCompounds, function(c) as.character(c$averageMass), FUN.VALUE = '') | 269 mass <- vapply(x$source$listOfCompounds, function(c) if ( ! 'averageMass' %in% names(c) || is.null(c$averageMass)) NA_real_ else as.double(c$averageMass), FUN.VALUE = 0.0) |
272 inchi <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChI), FUN.VALUE = '') | 270 inchi <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChI' %in% names(c) || is.null(c$inChI)) NA_character_ else as.character(c$inChI), FUN.VALUE = '') |
273 inchikey <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChIKey), FUN.VALUE = '') | 271 inchikey <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChIKey' %in% names(c) || is.null(c$inChIKey)) NA_character_ else as.character(c$inChIKey), FUN.VALUE = '') |
274 chebi <- vapply(x$source$listOfCompounds, function(c) as.character(c$ChEBI), FUN.VALUE = '') | 272 chebi <- vapply(x$source$listOfCompounds, function(c) if ('ChEBI' %in% names(c) && ! is.null(c$ChEBI)) as.character(c$ChEBI) else NA_character_, FUN.VALUE = '') |
275 chebi[chebi == 'CHEBI:null'] <- NA_character_ | 273 chebi[chebi == 'CHEBI:null'] <- NA_character_ |
276 hmdb <- vapply(x$source$listOfCompounds, function(c) as.character(c$HMDB), FUN.VALUE = '') | 274 hmdb <- vapply(x$source$listOfCompounds, function(c) if ('HMDB' %in% names(c) && ! is.null(c$HMDB)) as.character(c$HMDB) else NA_character_, FUN.VALUE = '') |
277 hmdb[hmdb == 'HMDBnull'] <- NA_character_ | 275 hmdb[hmdb == 'HMDBnull'] <- NA_character_ |
278 kegg <- vapply(x$source$listOfCompounds, function(c) as.character(c$KEGG), FUN.VALUE = '') | 276 kegg <- vapply(x$source$listOfCompounds, function(c) if ( ! 'KEGG' %in% names(c) || is.null(c$KEGG)) NA_character_ else as.character(c$KEGG), FUN.VALUE = '') |
279 pubchem <- vapply(x$source$listOfCompounds, function(c) as.character(c$PubChemCID), FUN.VALUE = '') | 277 pubchem <- vapply(x$source$listOfCompounds, function(c) if ( ! 'PubChemCID' %in% names(c) || is.null(c$PubChemCID)) NA_character_ else as.character(c$PubChemCID), FUN.VALUE = '') |
280 if (length(molids) > 0 && length(molids) == length(molnames)) | 278 if (length(molids) > 0 && length(molids) == length(molnames)) |
281 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE)) | 279 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE)) |
282 } | 280 } |
283 } | 281 } |
284 | 282 |
286 if ( ! is.null(rt.low) && ! is.null(rt.high)) { | 284 if ( ! is.null(rt.low) && ! is.null(rt.high)) { |
287 | 285 |
288 rt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric()) | 286 rt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric()) |
289 | 287 |
290 if (nrow(results) > 0) { | 288 if (nrow(results) > 0) { |
289 | |
291 # Build URL for rt search | 290 # Build URL for rt search |
292 url <- paste0('spectra/lcms/range-rt-min/', rt.low, '/', rt.high) | 291 url <- paste0('spectra/lcms/range-rt-min/', rt.low / 60, '/', rt.high / 60) |
293 params <- NULL | 292 params <- NULL |
294 if ( ! is.null(col)) | 293 if ( ! is.null(col)) |
295 params <- c(columns = paste(col, collapse = ',')) | 294 params <- c(columns = paste(col, collapse = ',')) |
296 | 295 |
297 # Run query | 296 # Run query |
298 rtspectra <- .self$.get.url(url = url, params = params) | 297 rtspectra <- .self$.get.url(url = url, params = params) |
299 | 298 |
300 # Get compound/molecule IDs | 299 # Get compound/molecule IDs |
301 for (x in spectra) | 300 for (x in rtspectra) |
302 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = vapply(x$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = ''), | 301 if (all(c('listOfCompounds', 'liquidChromatography') %in% names(x))) { |
303 MSDB.TAG.COL = as.character(x$liquidChromatography$columnCode), | 302 molids <- vapply(x$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') |
304 MSDB.TAG.COLRT = (as.numeric(x$RTmin) + as.numeric(x$RTmax)) / 2, | 303 if (length(molids) > 0) { |
305 stringsAsFactors = FALSE)) | 304 col <- if ('columnCode' %in% names(x$liquidChromatography) && ! is.null(x$liquidChromatography$columnCode)) as.character(x$liquidChromatography$columnCode) else NA_character_ |
305 rtmin <- if ('RTmin' %in% names(x) && ! is.null(x$RTmin)) as.double(x$RTmin) else NA_real_ | |
306 rtmax <- if ('RTmax' %in% names(x) && ! is.null(x$RTmax)) as.double(x$RTmax) else NA_real_ | |
307 colrt <- (rtmin + rtmax) / 2 | |
308 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = molids, | |
309 MSDB.TAG.COL = col, | |
310 MSDB.TAG.COLRT = colrt * 60, | |
311 stringsAsFactors = FALSE)) | |
312 } | |
313 } | |
306 } | 314 } |
307 | 315 |
308 # Add retention times and column info | 316 # Add retention times and column info |
309 results <- merge(results, rt.res) | 317 results <- merge(results, rt.res) |
310 } | 318 } |