comparison MsPeakForestDb.R @ 6:f86fec07f392 draft default tip

planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author prog
date Fri, 22 Feb 2019 16:04:22 -0500
parents fb9c0409d85c
children
comparison
equal deleted inserted replaced
5:fb9c0409d85c 6:f86fec07f392
1 if ( ! exists('MsPeakForestDb')) { # Do not load again if already loaded
2
3 library(methods)
4 source('MsDb.R')
5 source('UrlRequestScheduler.R')
6
7 #####################
8 # CLASS DECLARATION #
9 #####################
10
11 MsPeakForestDb <- setRefClass("MsPeakForestDb", contains = "MsDb", fields = list(.url = "character", .url.scheduler = "ANY", .token = "character"))
12
13 ###############
14 # CONSTRUCTOR #
15 ###############
16
17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) {
18
19 callSuper(...)
20
21 # Check URL
22 if (is.null(url) || is.na(url))
23 stop("No URL defined for new MsPeakForestDb instance.")
24
25 if (substring(url, nchar(url) - 1, 1) == '/')
26 url <- substring(url, nchar(url) - 1)
27 .url <<- url
28 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent)
29 .self$.url.scheduler$setVerbose(1L)
30 .token <<- token
31 .rt.unit <<- MSDB.RTUNIT.MIN
32 })
33
34 ###########
35 # GET URL #
36 ###########
37
38 MsPeakForestDb$methods( .get.url = function(url, params = NULL, ret.type = 'json') {
39
40 res <- NULL
41
42 # Add url prefix
43 if (substring(url, 1, 1) == '/')
44 url <- substring(url, 2)
45 url <- paste(.self$.url, url, sep = '/')
46
47 # Add token
48 if ( ! is.na(.self$.token))
49 params <- c(params, token = .self$.token)
50
51 # Get URL
52 content <- .self$.url.scheduler$getUrl(url = url, params = params)
53
54 if (ret.type == 'json') {
55
56 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE)
57
58 if (is.null(res)) {
59 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '')
60 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\"."))
61 }
62 } else {
63 if (ret.type == 'integer') {
64 if (grepl('^[0-9]+$', content, perl = TRUE))
65 res <- as.integer(content)
66 else {
67 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE)
68 }
69 }
70 }
71
72 return(res)
73 })
74
75 ####################
76 # GET MOLECULE IDS #
77 ####################
78
79 MsPeakForestDb$methods( getMoleculeIds = function() {
80
81 ids <- as.character(.self$.get.url(url = 'compounds/all/ids'))
82
83 return(ids)
84 })
85
86 ####################
87 # GET NB MOLECULES #
88 ####################
89
90 MsPeakForestDb$methods( getNbMolecules = function() {
91
92 n <- .self$.get.url(url = 'compounds/all/count', ret.type = 'integer')
93
94 return(n)
95 })
96
97 ###############################
98 # GET CHROMATOGRAPHIC COLUMNS #
99 ###############################
100
101 MsPeakForestDb$methods( getChromCol = function(molid = NULL) {
102
103 # Set URL
104 params <- NULL
105 if ( ! is.null(molid))
106 params <- list(molids = paste(molid, collapse = ','))
107
108 # Call webservice
109 wscols <- .self$.get.url(url = 'metadata/lc/list-code-columns', params = params)
110
111 # Build data frame
112 cols <- data.frame(id = character(), title = character())
113 for(id in names(wscols))
114 cols <- rbind(cols, data.frame(id = id, title = wscols[[id]]$name, stringsAsFactors = FALSE))
115
116 return(cols)
117 })
118
119 #######################
120 # GET RETENTION TIMES #
121 #######################
122
123 MsPeakForestDb$methods( getRetentionTimes = function(molid, col = NA_character_) {
124
125 if (is.null(molid) || is.na(molid) || length(molid) != 1)
126 stop("The parameter molid must consist only in a single value.")
127
128 rt <- list()
129
130 # Set URL
131 params <- NULL
132 if ( ! is.null(molid))
133 params <- list(molids = paste(molid, collapse = ','))
134
135 # Call webservice
136 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params)
137 if (class(spectra) == 'list' && length(spectra) > 0) {
138 for (s in spectra)
139 if (is.na(col) || s$liquidChromatography$columnCode %in% col) {
140 ret.time <- (s$RTmin + s$RTmax) / 2
141 ret.time <- ret.time * 60 # Retention time are in minutes in Peakforest, but we want them in seconds
142 c <- s$liquidChromatography$columnCode
143 if (c %in% names(rt)) {
144 if ( ! ret.time %in% rt[[c]])
145 rt[[c]] <- c(rt[[c]], ret.time)
146 } else
147 rt[[c]] <- ret.time
148 }
149 }
150
151 return(rt)
152 })
153
154 #####################
155 # GET MOLECULE NAME #
156 #####################
157
158 MsPeakForestDb$methods( getMoleculeName = function(molid) {
159
160 library(RJSONIO)
161
162 if (is.null(molid))
163 return(NA_character_)
164
165 # Initialize names
166 names <- as.character(molid)
167
168 # Get non NA values
169 non.na.molid <- molid[ ! is.na(molid)]
170
171 if (length(non.na.molid) > 0) {
172 # Set URL
173 params <- c(molids = paste(non.na.molid, collapse = ','))
174
175 # Call webservice
176 names[ ! is.na(molid)] <- .self$.get.url(url = 'compounds/all/names', params = params)
177 }
178
179 return(names)
180 })
181
182 ################
183 # FIND BY NAME #
184 ################
185
186 MsPeakForestDb$methods( findByName = function(name) {
187
188 if (is.null(name))
189 return(NA_character_)
190
191 ids <- list()
192
193 for (n in name) {
194
195 if (is.na(n))
196 ids <- c(ids, NA_character_)
197
198 else {
199 compounds <- .self$.get.url(url = paste0('search/compounds/name/', curlEscape(n)))$compoundNames
200 ids <- c(ids, list(vapply(compounds, function(c) as.character(c$compound$id), FUN.VALUE = '')))
201 }
202 }
203
204 return(ids)
205 })
206
207 #################
208 # GET NB PEAKS #
209 #################
210
211 MsPeakForestDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) {
212
213 # Build URL
214 params <- NULL
215 if ( ! is.na(type))
216 params <- c(params, mode = if (type == MSDB.TAG.POS) 'pos' else 'neg')
217 if ( ! is.null(molid) && (length(molid) > 1 || ! is.na(molid)))
218 params <- c(params, molids = paste(molid, collapse = ','))
219
220 # Run request
221 n <- .self$.get.url(url = 'spectra/lcms/count-peaks', params = params, ret.type = 'integer')
222
223 return(sum(n))
224 })
225
226 #################
227 # GET MZ VALUES #
228 #################
229
230 MsPeakForestDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {
231
232 # Query params
233 params <- NULL
234 if ( ! is.null(mode))
235 params <- c(params, mode = if (mode == MSDB.TAG.POS) 'positive' else 'negative')
236
237 # Get MZ valuels
238 mz <- .self$.get.url(url = 'spectra/lcms/peaks/list-mz', params = params)
239
240 # Apply cut-off
241 if ( ! is.na(max.results))
242 mz <- mz[1:max.results]
243
244 return(mz)
245 })
246
247 ##############################
248 # DO SEARCH FOR MZ RT BOUNDS #
249 ##############################
250
251 MsPeakForestDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) {
252
253 # Build URL for mz search
254 url <- paste0('spectra/lcms/peaks/get-range/', mz.low, '/', mz.high)
255
256 # Get spectra
257 spectra <- .self$.get.url(url = url)
258
259 # Build result data frame
260 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character())
261 for (x in spectra) {
262 if ('source' %in% names(x) && is.list(x$source))
263 mztheo <- if ('mz' %in% names(x) && ! is.null(x$mz)) as.numeric(x$mz) else NA_real_
264 comp <- if ('composition' %in% names(x) && ! is.null(x$composition)) x$composition else NA_character_
265 attr <- if ('attribution' %in% names(x) && ! is.null(x$attribution)) x$attribution else NA_character_
266 if ('listOfCompounds' %in% names(x$source)) {
267 molids <- vapply(x$source$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '')
268 molnames <- vapply(x$source$listOfCompounds, function(c) if ('names' %in% names(c) && ! is.null(c$names)) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP) else NA_character_, FUN.VALUE = '')
269 mass <- vapply(x$source$listOfCompounds, function(c) if ( ! 'averageMass' %in% names(c) || is.null(c$averageMass)) NA_real_ else as.double(c$averageMass), FUN.VALUE = 0.0)
270 inchi <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChI' %in% names(c) || is.null(c$inChI)) NA_character_ else as.character(c$inChI), FUN.VALUE = '')
271 inchikey <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChIKey' %in% names(c) || is.null(c$inChIKey)) NA_character_ else as.character(c$inChIKey), FUN.VALUE = '')
272 chebi <- vapply(x$source$listOfCompounds, function(c) if ('ChEBI' %in% names(c) && ! is.null(c$ChEBI)) as.character(c$ChEBI) else NA_character_, FUN.VALUE = '')
273 chebi[chebi == 'CHEBI:null'] <- NA_character_
274 hmdb <- vapply(x$source$listOfCompounds, function(c) if ('HMDB' %in% names(c) && ! is.null(c$HMDB)) as.character(c$HMDB) else NA_character_, FUN.VALUE = '')
275 hmdb[hmdb == 'HMDBnull'] <- NA_character_
276 kegg <- vapply(x$source$listOfCompounds, function(c) if ( ! 'KEGG' %in% names(c) || is.null(c$KEGG)) NA_character_ else as.character(c$KEGG), FUN.VALUE = '')
277 pubchem <- vapply(x$source$listOfCompounds, function(c) if ( ! 'PubChemCID' %in% names(c) || is.null(c$PubChemCID)) NA_character_ else as.character(c$PubChemCID), FUN.VALUE = '')
278 if (length(molids) > 0 && length(molids) == length(molnames))
279 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE))
280 }
281 }
282
283 # RT search
284 if ( ! is.null(rt.low) && ! is.null(rt.high)) {
285
286 rt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric())
287
288 if (nrow(results) > 0) {
289
290 # Build URL for rt search
291 url <- paste0('spectra/lcms/range-rt-min/', rt.low / 60, '/', rt.high / 60)
292 params <- NULL
293 if ( ! is.null(col))
294 params <- c(columns = paste(col, collapse = ','))
295
296 # Run query
297 rtspectra <- .self$.get.url(url = url, params = params)
298
299 # Get compound/molecule IDs
300 for (x in rtspectra)
301 if (all(c('listOfCompounds', 'liquidChromatography') %in% names(x))) {
302 molids <- vapply(x$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '')
303 if (length(molids) > 0) {
304 col <- if ('columnCode' %in% names(x$liquidChromatography) && ! is.null(x$liquidChromatography$columnCode)) as.character(x$liquidChromatography$columnCode) else NA_character_
305 rtmin <- if ('RTmin' %in% names(x) && ! is.null(x$RTmin)) as.double(x$RTmin) else NA_real_
306 rtmax <- if ('RTmax' %in% names(x) && ! is.null(x$RTmax)) as.double(x$RTmax) else NA_real_
307 colrt <- (rtmin + rtmax) / 2
308 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = molids,
309 MSDB.TAG.COL = col,
310 MSDB.TAG.COLRT = colrt * 60,
311 stringsAsFactors = FALSE))
312 }
313 }
314 }
315
316 # Add retention times and column info
317 results <- merge(results, rt.res)
318 }
319
320 # Rename columns with proper names
321 colnames(results) <- vapply(colnames(results), function(s) eval(parse(text=s)), FUN.VALUE = '')
322
323 return(results)
324 })
325 }