Mercurial > repos > prog > lcmsmatching
comparison MsPeakForestDb.R @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
| author | prog |
|---|---|
| date | Fri, 22 Feb 2019 16:04:22 -0500 |
| parents | fb9c0409d85c |
| children |
comparison
equal
deleted
inserted
replaced
| 5:fb9c0409d85c | 6:f86fec07f392 |
|---|---|
| 1 if ( ! exists('MsPeakForestDb')) { # Do not load again if already loaded | |
| 2 | |
| 3 library(methods) | |
| 4 source('MsDb.R') | |
| 5 source('UrlRequestScheduler.R') | |
| 6 | |
| 7 ##################### | |
| 8 # CLASS DECLARATION # | |
| 9 ##################### | |
| 10 | |
| 11 MsPeakForestDb <- setRefClass("MsPeakForestDb", contains = "MsDb", fields = list(.url = "character", .url.scheduler = "ANY", .token = "character")) | |
| 12 | |
| 13 ############### | |
| 14 # CONSTRUCTOR # | |
| 15 ############### | |
| 16 | |
| 17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) { | |
| 18 | |
| 19 callSuper(...) | |
| 20 | |
| 21 # Check URL | |
| 22 if (is.null(url) || is.na(url)) | |
| 23 stop("No URL defined for new MsPeakForestDb instance.") | |
| 24 | |
| 25 if (substring(url, nchar(url) - 1, 1) == '/') | |
| 26 url <- substring(url, nchar(url) - 1) | |
| 27 .url <<- url | |
| 28 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent) | |
| 29 .self$.url.scheduler$setVerbose(1L) | |
| 30 .token <<- token | |
| 31 .rt.unit <<- MSDB.RTUNIT.MIN | |
| 32 }) | |
| 33 | |
| 34 ########### | |
| 35 # GET URL # | |
| 36 ########### | |
| 37 | |
| 38 MsPeakForestDb$methods( .get.url = function(url, params = NULL, ret.type = 'json') { | |
| 39 | |
| 40 res <- NULL | |
| 41 | |
| 42 # Add url prefix | |
| 43 if (substring(url, 1, 1) == '/') | |
| 44 url <- substring(url, 2) | |
| 45 url <- paste(.self$.url, url, sep = '/') | |
| 46 | |
| 47 # Add token | |
| 48 if ( ! is.na(.self$.token)) | |
| 49 params <- c(params, token = .self$.token) | |
| 50 | |
| 51 # Get URL | |
| 52 content <- .self$.url.scheduler$getUrl(url = url, params = params) | |
| 53 | |
| 54 if (ret.type == 'json') { | |
| 55 | |
| 56 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) | |
| 57 | |
| 58 if (is.null(res)) { | |
| 59 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') | |
| 60 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\".")) | |
| 61 } | |
| 62 } else { | |
| 63 if (ret.type == 'integer') { | |
| 64 if (grepl('^[0-9]+$', content, perl = TRUE)) | |
| 65 res <- as.integer(content) | |
| 66 else { | |
| 67 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) | |
| 68 } | |
| 69 } | |
| 70 } | |
| 71 | |
| 72 return(res) | |
| 73 }) | |
| 74 | |
| 75 #################### | |
| 76 # GET MOLECULE IDS # | |
| 77 #################### | |
| 78 | |
| 79 MsPeakForestDb$methods( getMoleculeIds = function() { | |
| 80 | |
| 81 ids <- as.character(.self$.get.url(url = 'compounds/all/ids')) | |
| 82 | |
| 83 return(ids) | |
| 84 }) | |
| 85 | |
| 86 #################### | |
| 87 # GET NB MOLECULES # | |
| 88 #################### | |
| 89 | |
| 90 MsPeakForestDb$methods( getNbMolecules = function() { | |
| 91 | |
| 92 n <- .self$.get.url(url = 'compounds/all/count', ret.type = 'integer') | |
| 93 | |
| 94 return(n) | |
| 95 }) | |
| 96 | |
| 97 ############################### | |
| 98 # GET CHROMATOGRAPHIC COLUMNS # | |
| 99 ############################### | |
| 100 | |
| 101 MsPeakForestDb$methods( getChromCol = function(molid = NULL) { | |
| 102 | |
| 103 # Set URL | |
| 104 params <- NULL | |
| 105 if ( ! is.null(molid)) | |
| 106 params <- list(molids = paste(molid, collapse = ',')) | |
| 107 | |
| 108 # Call webservice | |
| 109 wscols <- .self$.get.url(url = 'metadata/lc/list-code-columns', params = params) | |
| 110 | |
| 111 # Build data frame | |
| 112 cols <- data.frame(id = character(), title = character()) | |
| 113 for(id in names(wscols)) | |
| 114 cols <- rbind(cols, data.frame(id = id, title = wscols[[id]]$name, stringsAsFactors = FALSE)) | |
| 115 | |
| 116 return(cols) | |
| 117 }) | |
| 118 | |
| 119 ####################### | |
| 120 # GET RETENTION TIMES # | |
| 121 ####################### | |
| 122 | |
| 123 MsPeakForestDb$methods( getRetentionTimes = function(molid, col = NA_character_) { | |
| 124 | |
| 125 if (is.null(molid) || is.na(molid) || length(molid) != 1) | |
| 126 stop("The parameter molid must consist only in a single value.") | |
| 127 | |
| 128 rt <- list() | |
| 129 | |
| 130 # Set URL | |
| 131 params <- NULL | |
| 132 if ( ! is.null(molid)) | |
| 133 params <- list(molids = paste(molid, collapse = ',')) | |
| 134 | |
| 135 # Call webservice | |
| 136 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params) | |
| 137 if (class(spectra) == 'list' && length(spectra) > 0) { | |
| 138 for (s in spectra) | |
| 139 if (is.na(col) || s$liquidChromatography$columnCode %in% col) { | |
| 140 ret.time <- (s$RTmin + s$RTmax) / 2 | |
| 141 ret.time <- ret.time * 60 # Retention time are in minutes in Peakforest, but we want them in seconds | |
| 142 c <- s$liquidChromatography$columnCode | |
| 143 if (c %in% names(rt)) { | |
| 144 if ( ! ret.time %in% rt[[c]]) | |
| 145 rt[[c]] <- c(rt[[c]], ret.time) | |
| 146 } else | |
| 147 rt[[c]] <- ret.time | |
| 148 } | |
| 149 } | |
| 150 | |
| 151 return(rt) | |
| 152 }) | |
| 153 | |
| 154 ##################### | |
| 155 # GET MOLECULE NAME # | |
| 156 ##################### | |
| 157 | |
| 158 MsPeakForestDb$methods( getMoleculeName = function(molid) { | |
| 159 | |
| 160 library(RJSONIO) | |
| 161 | |
| 162 if (is.null(molid)) | |
| 163 return(NA_character_) | |
| 164 | |
| 165 # Initialize names | |
| 166 names <- as.character(molid) | |
| 167 | |
| 168 # Get non NA values | |
| 169 non.na.molid <- molid[ ! is.na(molid)] | |
| 170 | |
| 171 if (length(non.na.molid) > 0) { | |
| 172 # Set URL | |
| 173 params <- c(molids = paste(non.na.molid, collapse = ',')) | |
| 174 | |
| 175 # Call webservice | |
| 176 names[ ! is.na(molid)] <- .self$.get.url(url = 'compounds/all/names', params = params) | |
| 177 } | |
| 178 | |
| 179 return(names) | |
| 180 }) | |
| 181 | |
| 182 ################ | |
| 183 # FIND BY NAME # | |
| 184 ################ | |
| 185 | |
| 186 MsPeakForestDb$methods( findByName = function(name) { | |
| 187 | |
| 188 if (is.null(name)) | |
| 189 return(NA_character_) | |
| 190 | |
| 191 ids <- list() | |
| 192 | |
| 193 for (n in name) { | |
| 194 | |
| 195 if (is.na(n)) | |
| 196 ids <- c(ids, NA_character_) | |
| 197 | |
| 198 else { | |
| 199 compounds <- .self$.get.url(url = paste0('search/compounds/name/', curlEscape(n)))$compoundNames | |
| 200 ids <- c(ids, list(vapply(compounds, function(c) as.character(c$compound$id), FUN.VALUE = ''))) | |
| 201 } | |
| 202 } | |
| 203 | |
| 204 return(ids) | |
| 205 }) | |
| 206 | |
| 207 ################# | |
| 208 # GET NB PEAKS # | |
| 209 ################# | |
| 210 | |
| 211 MsPeakForestDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) { | |
| 212 | |
| 213 # Build URL | |
| 214 params <- NULL | |
| 215 if ( ! is.na(type)) | |
| 216 params <- c(params, mode = if (type == MSDB.TAG.POS) 'pos' else 'neg') | |
| 217 if ( ! is.null(molid) && (length(molid) > 1 || ! is.na(molid))) | |
| 218 params <- c(params, molids = paste(molid, collapse = ',')) | |
| 219 | |
| 220 # Run request | |
| 221 n <- .self$.get.url(url = 'spectra/lcms/count-peaks', params = params, ret.type = 'integer') | |
| 222 | |
| 223 return(sum(n)) | |
| 224 }) | |
| 225 | |
| 226 ################# | |
| 227 # GET MZ VALUES # | |
| 228 ################# | |
| 229 | |
| 230 MsPeakForestDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { | |
| 231 | |
| 232 # Query params | |
| 233 params <- NULL | |
| 234 if ( ! is.null(mode)) | |
| 235 params <- c(params, mode = if (mode == MSDB.TAG.POS) 'positive' else 'negative') | |
| 236 | |
| 237 # Get MZ valuels | |
| 238 mz <- .self$.get.url(url = 'spectra/lcms/peaks/list-mz', params = params) | |
| 239 | |
| 240 # Apply cut-off | |
| 241 if ( ! is.na(max.results)) | |
| 242 mz <- mz[1:max.results] | |
| 243 | |
| 244 return(mz) | |
| 245 }) | |
| 246 | |
| 247 ############################## | |
| 248 # DO SEARCH FOR MZ RT BOUNDS # | |
| 249 ############################## | |
| 250 | |
| 251 MsPeakForestDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) { | |
| 252 | |
| 253 # Build URL for mz search | |
| 254 url <- paste0('spectra/lcms/peaks/get-range/', mz.low, '/', mz.high) | |
| 255 | |
| 256 # Get spectra | |
| 257 spectra <- .self$.get.url(url = url) | |
| 258 | |
| 259 # Build result data frame | |
| 260 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character()) | |
| 261 for (x in spectra) { | |
| 262 if ('source' %in% names(x) && is.list(x$source)) | |
| 263 mztheo <- if ('mz' %in% names(x) && ! is.null(x$mz)) as.numeric(x$mz) else NA_real_ | |
| 264 comp <- if ('composition' %in% names(x) && ! is.null(x$composition)) x$composition else NA_character_ | |
| 265 attr <- if ('attribution' %in% names(x) && ! is.null(x$attribution)) x$attribution else NA_character_ | |
| 266 if ('listOfCompounds' %in% names(x$source)) { | |
| 267 molids <- vapply(x$source$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') | |
| 268 molnames <- vapply(x$source$listOfCompounds, function(c) if ('names' %in% names(c) && ! is.null(c$names)) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP) else NA_character_, FUN.VALUE = '') | |
| 269 mass <- vapply(x$source$listOfCompounds, function(c) if ( ! 'averageMass' %in% names(c) || is.null(c$averageMass)) NA_real_ else as.double(c$averageMass), FUN.VALUE = 0.0) | |
| 270 inchi <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChI' %in% names(c) || is.null(c$inChI)) NA_character_ else as.character(c$inChI), FUN.VALUE = '') | |
| 271 inchikey <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChIKey' %in% names(c) || is.null(c$inChIKey)) NA_character_ else as.character(c$inChIKey), FUN.VALUE = '') | |
| 272 chebi <- vapply(x$source$listOfCompounds, function(c) if ('ChEBI' %in% names(c) && ! is.null(c$ChEBI)) as.character(c$ChEBI) else NA_character_, FUN.VALUE = '') | |
| 273 chebi[chebi == 'CHEBI:null'] <- NA_character_ | |
| 274 hmdb <- vapply(x$source$listOfCompounds, function(c) if ('HMDB' %in% names(c) && ! is.null(c$HMDB)) as.character(c$HMDB) else NA_character_, FUN.VALUE = '') | |
| 275 hmdb[hmdb == 'HMDBnull'] <- NA_character_ | |
| 276 kegg <- vapply(x$source$listOfCompounds, function(c) if ( ! 'KEGG' %in% names(c) || is.null(c$KEGG)) NA_character_ else as.character(c$KEGG), FUN.VALUE = '') | |
| 277 pubchem <- vapply(x$source$listOfCompounds, function(c) if ( ! 'PubChemCID' %in% names(c) || is.null(c$PubChemCID)) NA_character_ else as.character(c$PubChemCID), FUN.VALUE = '') | |
| 278 if (length(molids) > 0 && length(molids) == length(molnames)) | |
| 279 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE)) | |
| 280 } | |
| 281 } | |
| 282 | |
| 283 # RT search | |
| 284 if ( ! is.null(rt.low) && ! is.null(rt.high)) { | |
| 285 | |
| 286 rt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric()) | |
| 287 | |
| 288 if (nrow(results) > 0) { | |
| 289 | |
| 290 # Build URL for rt search | |
| 291 url <- paste0('spectra/lcms/range-rt-min/', rt.low / 60, '/', rt.high / 60) | |
| 292 params <- NULL | |
| 293 if ( ! is.null(col)) | |
| 294 params <- c(columns = paste(col, collapse = ',')) | |
| 295 | |
| 296 # Run query | |
| 297 rtspectra <- .self$.get.url(url = url, params = params) | |
| 298 | |
| 299 # Get compound/molecule IDs | |
| 300 for (x in rtspectra) | |
| 301 if (all(c('listOfCompounds', 'liquidChromatography') %in% names(x))) { | |
| 302 molids <- vapply(x$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') | |
| 303 if (length(molids) > 0) { | |
| 304 col <- if ('columnCode' %in% names(x$liquidChromatography) && ! is.null(x$liquidChromatography$columnCode)) as.character(x$liquidChromatography$columnCode) else NA_character_ | |
| 305 rtmin <- if ('RTmin' %in% names(x) && ! is.null(x$RTmin)) as.double(x$RTmin) else NA_real_ | |
| 306 rtmax <- if ('RTmax' %in% names(x) && ! is.null(x$RTmax)) as.double(x$RTmax) else NA_real_ | |
| 307 colrt <- (rtmin + rtmax) / 2 | |
| 308 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = molids, | |
| 309 MSDB.TAG.COL = col, | |
| 310 MSDB.TAG.COLRT = colrt * 60, | |
| 311 stringsAsFactors = FALSE)) | |
| 312 } | |
| 313 } | |
| 314 } | |
| 315 | |
| 316 # Add retention times and column info | |
| 317 results <- merge(results, rt.res) | |
| 318 } | |
| 319 | |
| 320 # Rename columns with proper names | |
| 321 colnames(results) <- vapply(colnames(results), function(s) eval(parse(text=s)), FUN.VALUE = '') | |
| 322 | |
| 323 return(results) | |
| 324 }) | |
| 325 } |
