Mercurial > repos > prog > lcmsmatching
comparison MsPeakForestDb.R @ 5:fb9c0409d85c draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
| author | prog |
|---|---|
| date | Wed, 19 Apr 2017 10:00:05 -0400 |
| parents | b34c14151f25 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:b34c14151f25 | 5:fb9c0409d85c |
|---|---|
| 1 if ( ! exists('MsPeakForestDb')) { # Do not load again if already loaded | 1 if ( ! exists('MsPeakForestDb')) { # Do not load again if already loaded |
| 2 | 2 |
| 3 library(methods) | 3 library(methods) |
| 4 source('MsDb.R') | 4 source('MsDb.R') |
| 5 source(file.path('UrlRequestScheduler.R')) | 5 source('UrlRequestScheduler.R') |
| 6 | 6 |
| 7 ##################### | 7 ##################### |
| 8 # CLASS DECLARATION # | 8 # CLASS DECLARATION # |
| 9 ##################### | 9 ##################### |
| 10 | 10 |
| 13 ############### | 13 ############### |
| 14 # CONSTRUCTOR # | 14 # CONSTRUCTOR # |
| 15 ############### | 15 ############### |
| 16 | 16 |
| 17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) { | 17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) { |
| 18 | |
| 19 callSuper(...) | |
| 18 | 20 |
| 19 # Check URL | 21 # Check URL |
| 20 if (is.null(url) || is.na(url)) | 22 if (is.null(url) || is.na(url)) |
| 21 stop("No URL defined for new MsPeakForestDb instance.") | 23 stop("No URL defined for new MsPeakForestDb instance.") |
| 22 | 24 |
| 24 url <- substring(url, nchar(url) - 1) | 26 url <- substring(url, nchar(url) - 1) |
| 25 .url <<- url | 27 .url <<- url |
| 26 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent) | 28 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent) |
| 27 .self$.url.scheduler$setVerbose(1L) | 29 .self$.url.scheduler$setVerbose(1L) |
| 28 .token <<- token | 30 .token <<- token |
| 29 | 31 .rt.unit <<- MSDB.RTUNIT.MIN |
| 30 callSuper(...) | |
| 31 }) | 32 }) |
| 32 | 33 |
| 33 ########### | 34 ########### |
| 34 # GET URL # | 35 # GET URL # |
| 35 ########### | 36 ########### |
| 44 url <- paste(.self$.url, url, sep = '/') | 45 url <- paste(.self$.url, url, sep = '/') |
| 45 | 46 |
| 46 # Add token | 47 # Add token |
| 47 if ( ! is.na(.self$.token)) | 48 if ( ! is.na(.self$.token)) |
| 48 params <- c(params, token = .self$.token) | 49 params <- c(params, token = .self$.token) |
| 49 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') | |
| 50 | 50 |
| 51 # Get URL | 51 # Get URL |
| 52 content <- .self$.url.scheduler$getUrl(url = url, params = params) | 52 content <- .self$.url.scheduler$getUrl(url = url, params = params) |
| 53 | 53 |
| 54 if (ret.type == 'json') { | 54 if (ret.type == 'json') { |
| 55 | 55 |
| 56 library(RJSONIO) | 56 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) |
| 57 | 57 |
| 58 res <- fromJSON(content, nullValue = NA) | 58 if (is.null(res)) { |
| 59 | |
| 60 if (class(res) == 'list' && 'success' %in% names(res) && res$success == FALSE) { | |
| 61 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') | 59 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') |
| 62 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\".")) | 60 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\".")) |
| 63 } | 61 } |
| 64 } else { | 62 } else { |
| 65 if (ret.type == 'integer') { | 63 if (ret.type == 'integer') { |
| 66 if (grepl('^[0-9]+$', content, perl = TRUE)) | 64 if (grepl('^[0-9]+$', content, perl = TRUE)) |
| 67 res <- as.integer(content) | 65 res <- as.integer(content) |
| 68 else { | 66 else { |
| 69 library(RJSONIO) | 67 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) |
| 70 res <- fromJSON(content, nullValue = NA) | |
| 71 } | 68 } |
| 72 } | 69 } |
| 73 } | 70 } |
| 74 | 71 |
| 75 return(res) | 72 return(res) |
| 139 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params) | 136 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params) |
| 140 if (class(spectra) == 'list' && length(spectra) > 0) { | 137 if (class(spectra) == 'list' && length(spectra) > 0) { |
| 141 for (s in spectra) | 138 for (s in spectra) |
| 142 if (is.na(col) || s$liquidChromatography$columnCode %in% col) { | 139 if (is.na(col) || s$liquidChromatography$columnCode %in% col) { |
| 143 ret.time <- (s$RTmin + s$RTmax) / 2 | 140 ret.time <- (s$RTmin + s$RTmax) / 2 |
| 141 ret.time <- ret.time * 60 # Retention time are in minutes in Peakforest, but we want them in seconds | |
| 144 c <- s$liquidChromatography$columnCode | 142 c <- s$liquidChromatography$columnCode |
| 145 if (c %in% names(rt)) { | 143 if (c %in% names(rt)) { |
| 146 if ( ! ret.time %in% rt[[c]]) | 144 if ( ! ret.time %in% rt[[c]]) |
| 147 rt[[c]] <- c(rt[[c]], ret.time) | 145 rt[[c]] <- c(rt[[c]], ret.time) |
| 148 } else | 146 } else |
| 260 | 258 |
| 261 # Build result data frame | 259 # Build result data frame |
| 262 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character()) | 260 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character()) |
| 263 for (x in spectra) { | 261 for (x in spectra) { |
| 264 if ('source' %in% names(x) && is.list(x$source)) | 262 if ('source' %in% names(x) && is.list(x$source)) |
| 265 mztheo <- if ('theoricalMass' %in% names(x)) as.numeric(x$theoricalMass) else NA_real_ | 263 mztheo <- if ('mz' %in% names(x) && ! is.null(x$mz)) as.numeric(x$mz) else NA_real_ |
| 266 comp <- if ('composition' %in% names(x)) x$composition else NA_character_ | 264 comp <- if ('composition' %in% names(x) && ! is.null(x$composition)) x$composition else NA_character_ |
| 267 attr <- if ('attribution' %in% names(x)) x$attribution else NA_character_ | 265 attr <- if ('attribution' %in% names(x) && ! is.null(x$attribution)) x$attribution else NA_character_ |
| 268 if ('listOfCompounds' %in% names(x$source)) { | 266 if ('listOfCompounds' %in% names(x$source)) { |
| 269 molids <- vapply(x$source$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = '') | 267 molids <- vapply(x$source$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') |
| 270 molnames <- vapply(x$source$listOfCompounds, function(c) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP), FUN.VALUE = '') | 268 molnames <- vapply(x$source$listOfCompounds, function(c) if ('names' %in% names(c) && ! is.null(c$names)) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP) else NA_character_, FUN.VALUE = '') |
| 271 mass <- vapply(x$source$listOfCompounds, function(c) as.character(c$averageMass), FUN.VALUE = '') | 269 mass <- vapply(x$source$listOfCompounds, function(c) if ( ! 'averageMass' %in% names(c) || is.null(c$averageMass)) NA_real_ else as.double(c$averageMass), FUN.VALUE = 0.0) |
| 272 inchi <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChI), FUN.VALUE = '') | 270 inchi <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChI' %in% names(c) || is.null(c$inChI)) NA_character_ else as.character(c$inChI), FUN.VALUE = '') |
| 273 inchikey <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChIKey), FUN.VALUE = '') | 271 inchikey <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChIKey' %in% names(c) || is.null(c$inChIKey)) NA_character_ else as.character(c$inChIKey), FUN.VALUE = '') |
| 274 chebi <- vapply(x$source$listOfCompounds, function(c) as.character(c$ChEBI), FUN.VALUE = '') | 272 chebi <- vapply(x$source$listOfCompounds, function(c) if ('ChEBI' %in% names(c) && ! is.null(c$ChEBI)) as.character(c$ChEBI) else NA_character_, FUN.VALUE = '') |
| 275 chebi[chebi == 'CHEBI:null'] <- NA_character_ | 273 chebi[chebi == 'CHEBI:null'] <- NA_character_ |
| 276 hmdb <- vapply(x$source$listOfCompounds, function(c) as.character(c$HMDB), FUN.VALUE = '') | 274 hmdb <- vapply(x$source$listOfCompounds, function(c) if ('HMDB' %in% names(c) && ! is.null(c$HMDB)) as.character(c$HMDB) else NA_character_, FUN.VALUE = '') |
| 277 hmdb[hmdb == 'HMDBnull'] <- NA_character_ | 275 hmdb[hmdb == 'HMDBnull'] <- NA_character_ |
| 278 kegg <- vapply(x$source$listOfCompounds, function(c) as.character(c$KEGG), FUN.VALUE = '') | 276 kegg <- vapply(x$source$listOfCompounds, function(c) if ( ! 'KEGG' %in% names(c) || is.null(c$KEGG)) NA_character_ else as.character(c$KEGG), FUN.VALUE = '') |
| 279 pubchem <- vapply(x$source$listOfCompounds, function(c) as.character(c$PubChemCID), FUN.VALUE = '') | 277 pubchem <- vapply(x$source$listOfCompounds, function(c) if ( ! 'PubChemCID' %in% names(c) || is.null(c$PubChemCID)) NA_character_ else as.character(c$PubChemCID), FUN.VALUE = '') |
| 280 if (length(molids) > 0 && length(molids) == length(molnames)) | 278 if (length(molids) > 0 && length(molids) == length(molnames)) |
| 281 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE)) | 279 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE)) |
| 282 } | 280 } |
| 283 } | 281 } |
| 284 | 282 |
| 286 if ( ! is.null(rt.low) && ! is.null(rt.high)) { | 284 if ( ! is.null(rt.low) && ! is.null(rt.high)) { |
| 287 | 285 |
| 288 rt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric()) | 286 rt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric()) |
| 289 | 287 |
| 290 if (nrow(results) > 0) { | 288 if (nrow(results) > 0) { |
| 289 | |
| 291 # Build URL for rt search | 290 # Build URL for rt search |
| 292 url <- paste0('spectra/lcms/range-rt-min/', rt.low, '/', rt.high) | 291 url <- paste0('spectra/lcms/range-rt-min/', rt.low / 60, '/', rt.high / 60) |
| 293 params <- NULL | 292 params <- NULL |
| 294 if ( ! is.null(col)) | 293 if ( ! is.null(col)) |
| 295 params <- c(columns = paste(col, collapse = ',')) | 294 params <- c(columns = paste(col, collapse = ',')) |
| 296 | 295 |
| 297 # Run query | 296 # Run query |
| 298 rtspectra <- .self$.get.url(url = url, params = params) | 297 rtspectra <- .self$.get.url(url = url, params = params) |
| 299 | 298 |
| 300 # Get compound/molecule IDs | 299 # Get compound/molecule IDs |
| 301 for (x in spectra) | 300 for (x in rtspectra) |
| 302 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = vapply(x$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = ''), | 301 if (all(c('listOfCompounds', 'liquidChromatography') %in% names(x))) { |
| 303 MSDB.TAG.COL = as.character(x$liquidChromatography$columnCode), | 302 molids <- vapply(x$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') |
| 304 MSDB.TAG.COLRT = (as.numeric(x$RTmin) + as.numeric(x$RTmax)) / 2, | 303 if (length(molids) > 0) { |
| 305 stringsAsFactors = FALSE)) | 304 col <- if ('columnCode' %in% names(x$liquidChromatography) && ! is.null(x$liquidChromatography$columnCode)) as.character(x$liquidChromatography$columnCode) else NA_character_ |
| 305 rtmin <- if ('RTmin' %in% names(x) && ! is.null(x$RTmin)) as.double(x$RTmin) else NA_real_ | |
| 306 rtmax <- if ('RTmax' %in% names(x) && ! is.null(x$RTmax)) as.double(x$RTmax) else NA_real_ | |
| 307 colrt <- (rtmin + rtmax) / 2 | |
| 308 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = molids, | |
| 309 MSDB.TAG.COL = col, | |
| 310 MSDB.TAG.COLRT = colrt * 60, | |
| 311 stringsAsFactors = FALSE)) | |
| 312 } | |
| 313 } | |
| 306 } | 314 } |
| 307 | 315 |
| 308 # Add retention times and column info | 316 # Add retention times and column info |
| 309 results <- merge(results, rt.res) | 317 results <- merge(results, rt.res) |
| 310 } | 318 } |
