Mercurial > repos > prog > lcmsmatching
comparison MsPeakForestDb.R @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author | prog |
---|---|
date | Fri, 22 Feb 2019 16:04:22 -0500 |
parents | fb9c0409d85c |
children |
comparison
equal
deleted
inserted
replaced
5:fb9c0409d85c | 6:f86fec07f392 |
---|---|
1 if ( ! exists('MsPeakForestDb')) { # Do not load again if already loaded | |
2 | |
3 library(methods) | |
4 source('MsDb.R') | |
5 source('UrlRequestScheduler.R') | |
6 | |
7 ##################### | |
8 # CLASS DECLARATION # | |
9 ##################### | |
10 | |
11 MsPeakForestDb <- setRefClass("MsPeakForestDb", contains = "MsDb", fields = list(.url = "character", .url.scheduler = "ANY", .token = "character")) | |
12 | |
13 ############### | |
14 # CONSTRUCTOR # | |
15 ############### | |
16 | |
17 MsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) { | |
18 | |
19 callSuper(...) | |
20 | |
21 # Check URL | |
22 if (is.null(url) || is.na(url)) | |
23 stop("No URL defined for new MsPeakForestDb instance.") | |
24 | |
25 if (substring(url, nchar(url) - 1, 1) == '/') | |
26 url <- substring(url, nchar(url) - 1) | |
27 .url <<- url | |
28 .url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent) | |
29 .self$.url.scheduler$setVerbose(1L) | |
30 .token <<- token | |
31 .rt.unit <<- MSDB.RTUNIT.MIN | |
32 }) | |
33 | |
34 ########### | |
35 # GET URL # | |
36 ########### | |
37 | |
38 MsPeakForestDb$methods( .get.url = function(url, params = NULL, ret.type = 'json') { | |
39 | |
40 res <- NULL | |
41 | |
42 # Add url prefix | |
43 if (substring(url, 1, 1) == '/') | |
44 url <- substring(url, 2) | |
45 url <- paste(.self$.url, url, sep = '/') | |
46 | |
47 # Add token | |
48 if ( ! is.na(.self$.token)) | |
49 params <- c(params, token = .self$.token) | |
50 | |
51 # Get URL | |
52 content <- .self$.url.scheduler$getUrl(url = url, params = params) | |
53 | |
54 if (ret.type == 'json') { | |
55 | |
56 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) | |
57 | |
58 if (is.null(res)) { | |
59 param.str <- if (is.null(params)) '' else paste('?', vapply(names(params), function(p) paste(p, params[[p]], sep = '='), FUN.VALUE = ''), collapse = '&', sep = '') | |
60 stop(paste0("Failed to run web service. URL was \"", url, param.str, "\".")) | |
61 } | |
62 } else { | |
63 if (ret.type == 'integer') { | |
64 if (grepl('^[0-9]+$', content, perl = TRUE)) | |
65 res <- as.integer(content) | |
66 else { | |
67 res <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE) | |
68 } | |
69 } | |
70 } | |
71 | |
72 return(res) | |
73 }) | |
74 | |
75 #################### | |
76 # GET MOLECULE IDS # | |
77 #################### | |
78 | |
79 MsPeakForestDb$methods( getMoleculeIds = function() { | |
80 | |
81 ids <- as.character(.self$.get.url(url = 'compounds/all/ids')) | |
82 | |
83 return(ids) | |
84 }) | |
85 | |
86 #################### | |
87 # GET NB MOLECULES # | |
88 #################### | |
89 | |
90 MsPeakForestDb$methods( getNbMolecules = function() { | |
91 | |
92 n <- .self$.get.url(url = 'compounds/all/count', ret.type = 'integer') | |
93 | |
94 return(n) | |
95 }) | |
96 | |
97 ############################### | |
98 # GET CHROMATOGRAPHIC COLUMNS # | |
99 ############################### | |
100 | |
101 MsPeakForestDb$methods( getChromCol = function(molid = NULL) { | |
102 | |
103 # Set URL | |
104 params <- NULL | |
105 if ( ! is.null(molid)) | |
106 params <- list(molids = paste(molid, collapse = ',')) | |
107 | |
108 # Call webservice | |
109 wscols <- .self$.get.url(url = 'metadata/lc/list-code-columns', params = params) | |
110 | |
111 # Build data frame | |
112 cols <- data.frame(id = character(), title = character()) | |
113 for(id in names(wscols)) | |
114 cols <- rbind(cols, data.frame(id = id, title = wscols[[id]]$name, stringsAsFactors = FALSE)) | |
115 | |
116 return(cols) | |
117 }) | |
118 | |
119 ####################### | |
120 # GET RETENTION TIMES # | |
121 ####################### | |
122 | |
123 MsPeakForestDb$methods( getRetentionTimes = function(molid, col = NA_character_) { | |
124 | |
125 if (is.null(molid) || is.na(molid) || length(molid) != 1) | |
126 stop("The parameter molid must consist only in a single value.") | |
127 | |
128 rt <- list() | |
129 | |
130 # Set URL | |
131 params <- NULL | |
132 if ( ! is.null(molid)) | |
133 params <- list(molids = paste(molid, collapse = ',')) | |
134 | |
135 # Call webservice | |
136 spectra <- .self$.get.url(url = 'spectra/lcms/search', params = params) | |
137 if (class(spectra) == 'list' && length(spectra) > 0) { | |
138 for (s in spectra) | |
139 if (is.na(col) || s$liquidChromatography$columnCode %in% col) { | |
140 ret.time <- (s$RTmin + s$RTmax) / 2 | |
141 ret.time <- ret.time * 60 # Retention time are in minutes in Peakforest, but we want them in seconds | |
142 c <- s$liquidChromatography$columnCode | |
143 if (c %in% names(rt)) { | |
144 if ( ! ret.time %in% rt[[c]]) | |
145 rt[[c]] <- c(rt[[c]], ret.time) | |
146 } else | |
147 rt[[c]] <- ret.time | |
148 } | |
149 } | |
150 | |
151 return(rt) | |
152 }) | |
153 | |
154 ##################### | |
155 # GET MOLECULE NAME # | |
156 ##################### | |
157 | |
158 MsPeakForestDb$methods( getMoleculeName = function(molid) { | |
159 | |
160 library(RJSONIO) | |
161 | |
162 if (is.null(molid)) | |
163 return(NA_character_) | |
164 | |
165 # Initialize names | |
166 names <- as.character(molid) | |
167 | |
168 # Get non NA values | |
169 non.na.molid <- molid[ ! is.na(molid)] | |
170 | |
171 if (length(non.na.molid) > 0) { | |
172 # Set URL | |
173 params <- c(molids = paste(non.na.molid, collapse = ',')) | |
174 | |
175 # Call webservice | |
176 names[ ! is.na(molid)] <- .self$.get.url(url = 'compounds/all/names', params = params) | |
177 } | |
178 | |
179 return(names) | |
180 }) | |
181 | |
182 ################ | |
183 # FIND BY NAME # | |
184 ################ | |
185 | |
186 MsPeakForestDb$methods( findByName = function(name) { | |
187 | |
188 if (is.null(name)) | |
189 return(NA_character_) | |
190 | |
191 ids <- list() | |
192 | |
193 for (n in name) { | |
194 | |
195 if (is.na(n)) | |
196 ids <- c(ids, NA_character_) | |
197 | |
198 else { | |
199 compounds <- .self$.get.url(url = paste0('search/compounds/name/', curlEscape(n)))$compoundNames | |
200 ids <- c(ids, list(vapply(compounds, function(c) as.character(c$compound$id), FUN.VALUE = ''))) | |
201 } | |
202 } | |
203 | |
204 return(ids) | |
205 }) | |
206 | |
207 ################# | |
208 # GET NB PEAKS # | |
209 ################# | |
210 | |
211 MsPeakForestDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) { | |
212 | |
213 # Build URL | |
214 params <- NULL | |
215 if ( ! is.na(type)) | |
216 params <- c(params, mode = if (type == MSDB.TAG.POS) 'pos' else 'neg') | |
217 if ( ! is.null(molid) && (length(molid) > 1 || ! is.na(molid))) | |
218 params <- c(params, molids = paste(molid, collapse = ',')) | |
219 | |
220 # Run request | |
221 n <- .self$.get.url(url = 'spectra/lcms/count-peaks', params = params, ret.type = 'integer') | |
222 | |
223 return(sum(n)) | |
224 }) | |
225 | |
226 ################# | |
227 # GET MZ VALUES # | |
228 ################# | |
229 | |
230 MsPeakForestDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) { | |
231 | |
232 # Query params | |
233 params <- NULL | |
234 if ( ! is.null(mode)) | |
235 params <- c(params, mode = if (mode == MSDB.TAG.POS) 'positive' else 'negative') | |
236 | |
237 # Get MZ valuels | |
238 mz <- .self$.get.url(url = 'spectra/lcms/peaks/list-mz', params = params) | |
239 | |
240 # Apply cut-off | |
241 if ( ! is.na(max.results)) | |
242 mz <- mz[1:max.results] | |
243 | |
244 return(mz) | |
245 }) | |
246 | |
247 ############################## | |
248 # DO SEARCH FOR MZ RT BOUNDS # | |
249 ############################## | |
250 | |
251 MsPeakForestDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) { | |
252 | |
253 # Build URL for mz search | |
254 url <- paste0('spectra/lcms/peaks/get-range/', mz.low, '/', mz.high) | |
255 | |
256 # Get spectra | |
257 spectra <- .self$.get.url(url = url) | |
258 | |
259 # Build result data frame | |
260 results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character()) | |
261 for (x in spectra) { | |
262 if ('source' %in% names(x) && is.list(x$source)) | |
263 mztheo <- if ('mz' %in% names(x) && ! is.null(x$mz)) as.numeric(x$mz) else NA_real_ | |
264 comp <- if ('composition' %in% names(x) && ! is.null(x$composition)) x$composition else NA_character_ | |
265 attr <- if ('attribution' %in% names(x) && ! is.null(x$attribution)) x$attribution else NA_character_ | |
266 if ('listOfCompounds' %in% names(x$source)) { | |
267 molids <- vapply(x$source$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') | |
268 molnames <- vapply(x$source$listOfCompounds, function(c) if ('names' %in% names(c) && ! is.null(c$names)) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP) else NA_character_, FUN.VALUE = '') | |
269 mass <- vapply(x$source$listOfCompounds, function(c) if ( ! 'averageMass' %in% names(c) || is.null(c$averageMass)) NA_real_ else as.double(c$averageMass), FUN.VALUE = 0.0) | |
270 inchi <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChI' %in% names(c) || is.null(c$inChI)) NA_character_ else as.character(c$inChI), FUN.VALUE = '') | |
271 inchikey <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChIKey' %in% names(c) || is.null(c$inChIKey)) NA_character_ else as.character(c$inChIKey), FUN.VALUE = '') | |
272 chebi <- vapply(x$source$listOfCompounds, function(c) if ('ChEBI' %in% names(c) && ! is.null(c$ChEBI)) as.character(c$ChEBI) else NA_character_, FUN.VALUE = '') | |
273 chebi[chebi == 'CHEBI:null'] <- NA_character_ | |
274 hmdb <- vapply(x$source$listOfCompounds, function(c) if ('HMDB' %in% names(c) && ! is.null(c$HMDB)) as.character(c$HMDB) else NA_character_, FUN.VALUE = '') | |
275 hmdb[hmdb == 'HMDBnull'] <- NA_character_ | |
276 kegg <- vapply(x$source$listOfCompounds, function(c) if ( ! 'KEGG' %in% names(c) || is.null(c$KEGG)) NA_character_ else as.character(c$KEGG), FUN.VALUE = '') | |
277 pubchem <- vapply(x$source$listOfCompounds, function(c) if ( ! 'PubChemCID' %in% names(c) || is.null(c$PubChemCID)) NA_character_ else as.character(c$PubChemCID), FUN.VALUE = '') | |
278 if (length(molids) > 0 && length(molids) == length(molnames)) | |
279 results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE)) | |
280 } | |
281 } | |
282 | |
283 # RT search | |
284 if ( ! is.null(rt.low) && ! is.null(rt.high)) { | |
285 | |
286 rt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric()) | |
287 | |
288 if (nrow(results) > 0) { | |
289 | |
290 # Build URL for rt search | |
291 url <- paste0('spectra/lcms/range-rt-min/', rt.low / 60, '/', rt.high / 60) | |
292 params <- NULL | |
293 if ( ! is.null(col)) | |
294 params <- c(columns = paste(col, collapse = ',')) | |
295 | |
296 # Run query | |
297 rtspectra <- .self$.get.url(url = url, params = params) | |
298 | |
299 # Get compound/molecule IDs | |
300 for (x in rtspectra) | |
301 if (all(c('listOfCompounds', 'liquidChromatography') %in% names(x))) { | |
302 molids <- vapply(x$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '') | |
303 if (length(molids) > 0) { | |
304 col <- if ('columnCode' %in% names(x$liquidChromatography) && ! is.null(x$liquidChromatography$columnCode)) as.character(x$liquidChromatography$columnCode) else NA_character_ | |
305 rtmin <- if ('RTmin' %in% names(x) && ! is.null(x$RTmin)) as.double(x$RTmin) else NA_real_ | |
306 rtmax <- if ('RTmax' %in% names(x) && ! is.null(x$RTmax)) as.double(x$RTmax) else NA_real_ | |
307 colrt <- (rtmin + rtmax) / 2 | |
308 rt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = molids, | |
309 MSDB.TAG.COL = col, | |
310 MSDB.TAG.COLRT = colrt * 60, | |
311 stringsAsFactors = FALSE)) | |
312 } | |
313 } | |
314 } | |
315 | |
316 # Add retention times and column info | |
317 results <- merge(results, rt.res) | |
318 } | |
319 | |
320 # Rename columns with proper names | |
321 colnames(results) <- vapply(colnames(results), function(s) eval(parse(text=s)), FUN.VALUE = '') | |
322 | |
323 return(results) | |
324 }) | |
325 } |