comparison Ms4TabSqlDb.R @ 0:e66bb061af06 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author prog
date Tue, 12 Jul 2016 12:02:37 -0400
parents
children 20d69a062da3
comparison
equal deleted inserted replaced
-1:000000000000 0:e66bb061af06
1 if ( ! exists('Ms4TabSqlDb')) { # Do not load again if already loaded
2
3 library('methods')
4 source('msdb-common.R')
5 source('MsDb.R')
6
7 #####################
8 # CLASS DECLARATION #
9 #####################
10
11 Ms4TabSqlDb <- setRefClass("Ms4TabSqlDb", contains = "MsDb", fields = list(.host = "character", .port = "integer", .dbname = "character", .user = "character", .password = "character", .drv = "ANY", .conn = "ANY"))
12
13 ###############
14 # CONSTRUCTOR #
15 ###############
16
17 Ms4TabSqlDb$methods( initialize = function(host = NA_character_, port = NA_integer_, dbname = NA_character_, user = NA_character_, password = NA_character_, ...) {
18
19 # Initialize members
20 .host <<- if ( ! is.null(host)) host else NA_character_
21 .port <<- if ( ! is.null(port)) port else NA_integer_
22 .dbname <<- if ( ! is.null(dbname)) dbname else NA_character_
23 .user <<- if ( ! is.null(user)) user else NA_character_
24 .password <<- if ( ! is.null(password)) password else NA_character_
25 .drv <<- NULL
26 .conn <<- NULL
27
28 callSuper(...)
29 })
30
31 ##################
32 # GET CONNECTION #
33 ##################
34
35 Ms4TabSqlDb$methods( .get.connection = function() {
36
37 # Initialize connection
38 if (is.null(.self$.conn)) {
39 library('RPostgreSQL')
40 .drv <<- dbDriver("PostgreSQL")
41 .conn <<- dbConnect(.self$.drv, host = .self$.host, port = .self$.port, dbname = .self$.dbname, user = .self$.user, password = .self$.password)
42 }
43
44 return(.self$.conn)
45 })
46
47 ##############
48 # SEND QUERY #
49 ##############
50
51 Ms4TabSqlDb$methods( .send.query = function(query) {
52 conn <- .self$.get.connection() # Call it first separately, so library RPostgreSQL is loaded.
53 rs <- try(dbSendQuery(conn, query))
54 return(rs)
55 })
56
57 ####################
58 # GET MOLECULE IDS #
59 ####################
60
61 Ms4TabSqlDb$methods( getMoleculeIds = function() {
62
63 rs <- .self$.send.query('select pkmol.molecule_id as id from peaklist_name as pkmol;')
64 ids <- fetch(rs,n=-1)
65 ids <- ids[['id']] # Get 'id' column
66 ids <- vapply(ids, function(x) { if (substring(x, 1, 1) == 'N') as.integer(substring(x, 2)) else as.integer(x) } , FUN.VALUE = 1, USE.NAMES = FALSE)
67 ids <- (sort(ids))
68
69 return(ids)
70 })
71
72 ####################
73 # GET NB MOLECULES #
74 ####################
75
76 Ms4TabSqlDb$methods( getNbMolecules = function() {
77
78 rs <- .self$.send.query('select count(*) from peaklist_name;')
79 df <- fetch(rs,n=-1)
80 n <- df[[1]]
81
82 return(n)
83 })
84
85 #####################
86 # GET MOLECULE NAME #
87 #####################
88
89 Ms4TabSqlDb$methods( getMoleculeName = function(molid) {
90
91 # Build request
92 where <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ')
93 request <- paste0('select pkmol.molecule_id as id, pkmol.name from peaklist_name as pkmol where ', where, ';')
94
95 # Run request
96 rs <- .self$.send.query(request)
97 df <- fetch(rs,n=-1)
98
99 # Get IDs
100 ids <- vapply(df[['id']], function(x) as.integer(substring(x, 2)), FUN.VALUE = 1, USE.NAMES = FALSE)
101
102 # Get names in the same order as the input vector
103 names <- df[['name']][order(ids)[order(molid)]]
104
105 return(if (is.null(names)) NA_character_ else names)
106 })
107
108
109 ###############################
110 # GET CHROMATOGRAPHIC COLUMNS #
111 ###############################
112
113 Ms4TabSqlDb$methods( getChromCol = function(molid = NULL) {
114
115 # Get all columns
116 if (is.null(molid)) {
117 request <- 'select name from method;'
118
119 # Get columns of the specified molecules
120 } else {
121 where_molids <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ')
122 where <- paste0('pk.name_id = pkmol.id and pk.id = pkret.id_peak and pkret.id_method = method.id and (', where_molids, ')')
123 request <- paste0('select distinct method.name from method, peaklist as pk, peaklist_name as pkmol, peaklist_ret as pkret where ', where, ';')
124 }
125
126 # Run request
127 rs <- .self$.send.query(request)
128 df <- fetch(rs,n=-1)
129
130 # Gets column list
131 cols <- df[['name']]
132
133 # Remove FIA
134 cols <- cols[ cols != 'FIA']
135
136 # Normalize names
137 cols <- vapply(cols, .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE)
138
139 # Remove duplicates
140 cols <- cols[ ! duplicated(cols)]
141
142 # Make data frame
143 cols <- data.frame(id = cols, title = cols, stringsAsFactors = FALSE)
144
145 return(cols)
146 })
147
148 ################
149 # FIND BY NAME #
150 ################
151
152 Ms4TabSqlDb$methods( findByName = function(name) {
153
154 if (is.null(name)) return(NA_integer_)
155
156 # Put names in uppercase
157 uname <- toupper(name)
158
159 # Build request
160 where <- paste0(vapply(uname, function(n) paste0("upper(pkmol.name) = '", gsub("'", "''", n, perl = TRUE), "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = ' or ')
161 request <- paste0('select pkmol.molecule_id as id, pkmol.name from peaklist_name as pkmol where ', where, ';')
162
163 # Run request
164 rs <- .self$.send.query(request)
165 df <- fetch(rs,n=-1)
166
167 # Adds missing names/IDs
168 missing_names <- uname[ ! uname %in% toupper(df[['name']])]
169 df <- rbind(df, data.frame(id = rep(NA_integer_, length(missing_names)), name = missing_names))
170
171 # Get IDs and names
172 ids <- vapply(df[['id']], function(x) as.integer(substring(x, 2)), FUN.VALUE = 1, USE.NAMES = FALSE)
173 names <- toupper(as.character(df[['name']]))
174
175 # Get IDs in the same order as the input vector
176 ids[order(uname)] <- ids[order(names)]
177
178 return(if (is.null(ids)) NA_integer_ else ids)
179 })
180
181 #######################
182 # GET RETENTION TIMES #
183 #######################
184
185 Ms4TabSqlDb$methods( getRetentionTimes = function(molid, col = NA_character_) {
186
187 if (is.null(molid) || is.na(molid) || length(molid) != 1)
188 stop("The parameter molid must consist only in a single integer.")
189
190 # Build request
191 request <- paste0("select distinct method.name as col, (pkret.retention * 60) as ret from peaklist as pk, peaklist_name as pkmol, peaklist_ret as pkret, method where pkret.id_peak = pk.id and pkmol.id = pk.name_id and pkret.id_method = method.id and pkmol.molecule_id = 'N", molid, "'")
192 if ( ! is.na(col)) {
193 where_cols <- paste0(vapply(col, function(c) paste0("method.name = '", c, "'"), FUN.VALUE = ''), collapse = ' or ')
194 request <- paste0(request, ' and (', where_cols, ')')
195 }
196 request <- paste0(request, ';')
197
198 # Run request
199 rs <- .self$.send.query(request)
200 df <- fetch(rs,n=-1)
201
202 # Remove FIA
203 df <- df[df[['col']] != 'FIA', ]
204
205 # Normalize names
206 df[['col']] <- vapply(df[['col']], .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE)
207
208 # Build output list
209 lst <- list()
210 if (nrow(df) > 0)
211 for (i in 1:nrow(df)) {
212 c <- df[i, 'col']
213 lst[[c]] <- c(lst[[c]], df[i, 'ret'])
214 }
215
216 return(lst)
217 })
218
219 ################
220 # GET NB PEAKS #
221 ################
222
223 Ms4TabSqlDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) {
224
225 # Build request
226 request <- paste0("select count(*) from peaklist as pk, peaklist_name as pkmol where pkmol.id = pk.name_id")
227 if ( length(molid) > 1 || ! is.na(molid)) {
228 where_molids <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ')
229 request <- paste0(request, ' and (', where_molids, ')')
230 }
231 if ( ! is.na(type)) {
232 request <- paste0(request, ' and ', if (type == MSDB.TAG.POS) '' else 'not ', 'ion_pos')
233 }
234 request <- paste0(request, ';')
235
236 # Run request
237 rs <- .self$.send.query(request)
238 df <- fetch(rs,n=-1)
239
240 return(df[1,1])
241 })
242
243 ###############################
244 # GET CHROMATOGRAPHIC COLUMNS #
245 ###############################
246
247 Ms4TabSqlDb$methods( .to.dbcols = function(col) {
248
249 # Get all column names
250 request <- 'select name from method;'
251 rs <- .self$.send.query(request)
252 df <- fetch(rs,n=-1)
253
254 # Get database column names
255 dbcols <- df[['name']]
256 dbcols <- dbcols[ dbcols != 'FIA']
257
258 # Get normalize names
259 normcols <- vapply(dbcols, .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE)
260
261 return(dbcols[normcols == tolower(col)])
262 })
263
264 #################
265 # GET MZ VALUES #
266 #################
267
268 # Returns a numeric vector of all masses stored inside the database.
269 Ms4TabSqlDb$methods( getMzValues = function(mode = NULL) {
270
271 # Build request
272 select <- paste0("select distinct pk.mass as ", MSDB.TAG.MZTHEO)
273 from <- " from peaklist as pk"
274 where <- ""
275 if ( ! is.null(mode))
276 where <- paste0(" where ", if (mode == MSDB.TAG.POS) '' else 'not ', 'pk.ion_pos')
277
278 # Assemble request
279 request <- paste0(select, from, where, ';')
280
281 # Run request
282 rs <- .self$.send.query(request)
283 df <- fetch(rs, n=-1)
284
285 return(df[[MSDB.TAG.MZTHEO]])
286 })
287
288 ##########
289 # SEARCH #
290 ##########
291
292 Ms4TabSqlDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) {
293
294 # Build request
295 select <- paste0("select pkmol.molecule_id as ", MSDB.TAG.MOLID, ", pkmol.name as ", MSDB.TAG.MOLNAMES,", pk.mass as ", MSDB.TAG.MZTHEO, ", pk.composition as ", MSDB.TAG.COMP,", pk.attribution as ", MSDB.TAG.ATTR)
296 from <- " from peaklist as pk, peaklist_name as pkmol"
297 where <- paste0(" where pkmol.id = pk.name_id and pk.mass >= ", mz.low, " and pk.mass <= ", mz.high)
298 where <- paste0(where, ' and ', if (mode == MSDB.TAG.POS) '' else 'not ', 'pk.ion_pos')
299
300 # Insert where clause on attribs
301 if ( ! is.null(attribs)) {
302 where.attribs <- paste0(vapply(attribs, function(a) paste0("pk.attribution = '", a, "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = " or ")
303 where <- paste0(where, ' and (', where.attribs, ')')
304 }
305
306 # Insert where clause on molids
307 if ( ! is.null(molids)) {
308 where.molids <- paste0(vapply(molids, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ')
309 where <- paste0(where, ' and (', where.molids, ')')
310 }
311
312 # Insert where clause on columns
313 if ( ! is.null(col)) {
314 dbcols <- .self$.to.dbcols(col)
315 if ( ! is.null(dbcols)) {
316
317 # Can't find specified columns
318 if (length(dbcols) == 0 && length(col) > 0)
319 return(.get.empty.result.df(rt = TRUE))
320
321 select <- paste0(select, ", (60 * pkret.retention) as ", MSDB.TAG.COLRT, ", method.name as ", MSDB.TAG.COL)
322 from <- paste0(from, ", method, peaklist_ret as pkret")
323 where.cols <- if (length(dbcols) == 0) 'TRUE' else paste0(vapply(dbcols, function(c) paste0("method.name = '", c, "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = " or ")
324 where <- paste0(where, " and pk.id = pkret.id_peak and pkret.id_method = method.id and (", where.cols, ")")
325 if (! is.null(rt.low) && ! is.null(rt.high))
326 where <- paste0(where, " and pkret.retention * 60 >= ", rt.low, " and pkret.retention * 60 <= ", rt.high)
327 }
328 }
329
330 # Assemble request
331 request <- paste0(select, from, where, ';')
332
333 # Run request
334 rs <- .self$.send.query(request)
335 df <- fetch(rs,n=-1)
336
337 # No results
338
339 # Remove N prefix from IDs
340 if (nrow(df) > 0)
341 df[[MSDB.TAG.MOLID]] <- vapply(df[[MSDB.TAG.MOLID]], function(x) substring(x, 2), FUN.VALUE = '', USE.NAMES = FALSE)
342 else if (nrow(df) == 0)
343 df <- .get.empty.result.df(rt = ! is.null(col))
344
345 return(df)
346 })
347
348 } # end of load safe guard