Mercurial > repos > prog > lcmsmatching
comparison Ms4TabSqlDb.R @ 0:e66bb061af06 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author | prog |
---|---|
date | Tue, 12 Jul 2016 12:02:37 -0400 |
parents | |
children | 20d69a062da3 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e66bb061af06 |
---|---|
1 if ( ! exists('Ms4TabSqlDb')) { # Do not load again if already loaded | |
2 | |
3 library('methods') | |
4 source('msdb-common.R') | |
5 source('MsDb.R') | |
6 | |
7 ##################### | |
8 # CLASS DECLARATION # | |
9 ##################### | |
10 | |
11 Ms4TabSqlDb <- setRefClass("Ms4TabSqlDb", contains = "MsDb", fields = list(.host = "character", .port = "integer", .dbname = "character", .user = "character", .password = "character", .drv = "ANY", .conn = "ANY")) | |
12 | |
13 ############### | |
14 # CONSTRUCTOR # | |
15 ############### | |
16 | |
17 Ms4TabSqlDb$methods( initialize = function(host = NA_character_, port = NA_integer_, dbname = NA_character_, user = NA_character_, password = NA_character_, ...) { | |
18 | |
19 # Initialize members | |
20 .host <<- if ( ! is.null(host)) host else NA_character_ | |
21 .port <<- if ( ! is.null(port)) port else NA_integer_ | |
22 .dbname <<- if ( ! is.null(dbname)) dbname else NA_character_ | |
23 .user <<- if ( ! is.null(user)) user else NA_character_ | |
24 .password <<- if ( ! is.null(password)) password else NA_character_ | |
25 .drv <<- NULL | |
26 .conn <<- NULL | |
27 | |
28 callSuper(...) | |
29 }) | |
30 | |
31 ################## | |
32 # GET CONNECTION # | |
33 ################## | |
34 | |
35 Ms4TabSqlDb$methods( .get.connection = function() { | |
36 | |
37 # Initialize connection | |
38 if (is.null(.self$.conn)) { | |
39 library('RPostgreSQL') | |
40 .drv <<- dbDriver("PostgreSQL") | |
41 .conn <<- dbConnect(.self$.drv, host = .self$.host, port = .self$.port, dbname = .self$.dbname, user = .self$.user, password = .self$.password) | |
42 } | |
43 | |
44 return(.self$.conn) | |
45 }) | |
46 | |
47 ############## | |
48 # SEND QUERY # | |
49 ############## | |
50 | |
51 Ms4TabSqlDb$methods( .send.query = function(query) { | |
52 conn <- .self$.get.connection() # Call it first separately, so library RPostgreSQL is loaded. | |
53 rs <- try(dbSendQuery(conn, query)) | |
54 return(rs) | |
55 }) | |
56 | |
57 #################### | |
58 # GET MOLECULE IDS # | |
59 #################### | |
60 | |
61 Ms4TabSqlDb$methods( getMoleculeIds = function() { | |
62 | |
63 rs <- .self$.send.query('select pkmol.molecule_id as id from peaklist_name as pkmol;') | |
64 ids <- fetch(rs,n=-1) | |
65 ids <- ids[['id']] # Get 'id' column | |
66 ids <- vapply(ids, function(x) { if (substring(x, 1, 1) == 'N') as.integer(substring(x, 2)) else as.integer(x) } , FUN.VALUE = 1, USE.NAMES = FALSE) | |
67 ids <- (sort(ids)) | |
68 | |
69 return(ids) | |
70 }) | |
71 | |
72 #################### | |
73 # GET NB MOLECULES # | |
74 #################### | |
75 | |
76 Ms4TabSqlDb$methods( getNbMolecules = function() { | |
77 | |
78 rs <- .self$.send.query('select count(*) from peaklist_name;') | |
79 df <- fetch(rs,n=-1) | |
80 n <- df[[1]] | |
81 | |
82 return(n) | |
83 }) | |
84 | |
85 ##################### | |
86 # GET MOLECULE NAME # | |
87 ##################### | |
88 | |
89 Ms4TabSqlDb$methods( getMoleculeName = function(molid) { | |
90 | |
91 # Build request | |
92 where <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') | |
93 request <- paste0('select pkmol.molecule_id as id, pkmol.name from peaklist_name as pkmol where ', where, ';') | |
94 | |
95 # Run request | |
96 rs <- .self$.send.query(request) | |
97 df <- fetch(rs,n=-1) | |
98 | |
99 # Get IDs | |
100 ids <- vapply(df[['id']], function(x) as.integer(substring(x, 2)), FUN.VALUE = 1, USE.NAMES = FALSE) | |
101 | |
102 # Get names in the same order as the input vector | |
103 names <- df[['name']][order(ids)[order(molid)]] | |
104 | |
105 return(if (is.null(names)) NA_character_ else names) | |
106 }) | |
107 | |
108 | |
109 ############################### | |
110 # GET CHROMATOGRAPHIC COLUMNS # | |
111 ############################### | |
112 | |
113 Ms4TabSqlDb$methods( getChromCol = function(molid = NULL) { | |
114 | |
115 # Get all columns | |
116 if (is.null(molid)) { | |
117 request <- 'select name from method;' | |
118 | |
119 # Get columns of the specified molecules | |
120 } else { | |
121 where_molids <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') | |
122 where <- paste0('pk.name_id = pkmol.id and pk.id = pkret.id_peak and pkret.id_method = method.id and (', where_molids, ')') | |
123 request <- paste0('select distinct method.name from method, peaklist as pk, peaklist_name as pkmol, peaklist_ret as pkret where ', where, ';') | |
124 } | |
125 | |
126 # Run request | |
127 rs <- .self$.send.query(request) | |
128 df <- fetch(rs,n=-1) | |
129 | |
130 # Gets column list | |
131 cols <- df[['name']] | |
132 | |
133 # Remove FIA | |
134 cols <- cols[ cols != 'FIA'] | |
135 | |
136 # Normalize names | |
137 cols <- vapply(cols, .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE) | |
138 | |
139 # Remove duplicates | |
140 cols <- cols[ ! duplicated(cols)] | |
141 | |
142 # Make data frame | |
143 cols <- data.frame(id = cols, title = cols, stringsAsFactors = FALSE) | |
144 | |
145 return(cols) | |
146 }) | |
147 | |
148 ################ | |
149 # FIND BY NAME # | |
150 ################ | |
151 | |
152 Ms4TabSqlDb$methods( findByName = function(name) { | |
153 | |
154 if (is.null(name)) return(NA_integer_) | |
155 | |
156 # Put names in uppercase | |
157 uname <- toupper(name) | |
158 | |
159 # Build request | |
160 where <- paste0(vapply(uname, function(n) paste0("upper(pkmol.name) = '", gsub("'", "''", n, perl = TRUE), "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = ' or ') | |
161 request <- paste0('select pkmol.molecule_id as id, pkmol.name from peaklist_name as pkmol where ', where, ';') | |
162 | |
163 # Run request | |
164 rs <- .self$.send.query(request) | |
165 df <- fetch(rs,n=-1) | |
166 | |
167 # Adds missing names/IDs | |
168 missing_names <- uname[ ! uname %in% toupper(df[['name']])] | |
169 df <- rbind(df, data.frame(id = rep(NA_integer_, length(missing_names)), name = missing_names)) | |
170 | |
171 # Get IDs and names | |
172 ids <- vapply(df[['id']], function(x) as.integer(substring(x, 2)), FUN.VALUE = 1, USE.NAMES = FALSE) | |
173 names <- toupper(as.character(df[['name']])) | |
174 | |
175 # Get IDs in the same order as the input vector | |
176 ids[order(uname)] <- ids[order(names)] | |
177 | |
178 return(if (is.null(ids)) NA_integer_ else ids) | |
179 }) | |
180 | |
181 ####################### | |
182 # GET RETENTION TIMES # | |
183 ####################### | |
184 | |
185 Ms4TabSqlDb$methods( getRetentionTimes = function(molid, col = NA_character_) { | |
186 | |
187 if (is.null(molid) || is.na(molid) || length(molid) != 1) | |
188 stop("The parameter molid must consist only in a single integer.") | |
189 | |
190 # Build request | |
191 request <- paste0("select distinct method.name as col, (pkret.retention * 60) as ret from peaklist as pk, peaklist_name as pkmol, peaklist_ret as pkret, method where pkret.id_peak = pk.id and pkmol.id = pk.name_id and pkret.id_method = method.id and pkmol.molecule_id = 'N", molid, "'") | |
192 if ( ! is.na(col)) { | |
193 where_cols <- paste0(vapply(col, function(c) paste0("method.name = '", c, "'"), FUN.VALUE = ''), collapse = ' or ') | |
194 request <- paste0(request, ' and (', where_cols, ')') | |
195 } | |
196 request <- paste0(request, ';') | |
197 | |
198 # Run request | |
199 rs <- .self$.send.query(request) | |
200 df <- fetch(rs,n=-1) | |
201 | |
202 # Remove FIA | |
203 df <- df[df[['col']] != 'FIA', ] | |
204 | |
205 # Normalize names | |
206 df[['col']] <- vapply(df[['col']], .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE) | |
207 | |
208 # Build output list | |
209 lst <- list() | |
210 if (nrow(df) > 0) | |
211 for (i in 1:nrow(df)) { | |
212 c <- df[i, 'col'] | |
213 lst[[c]] <- c(lst[[c]], df[i, 'ret']) | |
214 } | |
215 | |
216 return(lst) | |
217 }) | |
218 | |
219 ################ | |
220 # GET NB PEAKS # | |
221 ################ | |
222 | |
223 Ms4TabSqlDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) { | |
224 | |
225 # Build request | |
226 request <- paste0("select count(*) from peaklist as pk, peaklist_name as pkmol where pkmol.id = pk.name_id") | |
227 if ( length(molid) > 1 || ! is.na(molid)) { | |
228 where_molids <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') | |
229 request <- paste0(request, ' and (', where_molids, ')') | |
230 } | |
231 if ( ! is.na(type)) { | |
232 request <- paste0(request, ' and ', if (type == MSDB.TAG.POS) '' else 'not ', 'ion_pos') | |
233 } | |
234 request <- paste0(request, ';') | |
235 | |
236 # Run request | |
237 rs <- .self$.send.query(request) | |
238 df <- fetch(rs,n=-1) | |
239 | |
240 return(df[1,1]) | |
241 }) | |
242 | |
243 ############################### | |
244 # GET CHROMATOGRAPHIC COLUMNS # | |
245 ############################### | |
246 | |
247 Ms4TabSqlDb$methods( .to.dbcols = function(col) { | |
248 | |
249 # Get all column names | |
250 request <- 'select name from method;' | |
251 rs <- .self$.send.query(request) | |
252 df <- fetch(rs,n=-1) | |
253 | |
254 # Get database column names | |
255 dbcols <- df[['name']] | |
256 dbcols <- dbcols[ dbcols != 'FIA'] | |
257 | |
258 # Get normalize names | |
259 normcols <- vapply(dbcols, .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE) | |
260 | |
261 return(dbcols[normcols == tolower(col)]) | |
262 }) | |
263 | |
264 ################# | |
265 # GET MZ VALUES # | |
266 ################# | |
267 | |
268 # Returns a numeric vector of all masses stored inside the database. | |
269 Ms4TabSqlDb$methods( getMzValues = function(mode = NULL) { | |
270 | |
271 # Build request | |
272 select <- paste0("select distinct pk.mass as ", MSDB.TAG.MZTHEO) | |
273 from <- " from peaklist as pk" | |
274 where <- "" | |
275 if ( ! is.null(mode)) | |
276 where <- paste0(" where ", if (mode == MSDB.TAG.POS) '' else 'not ', 'pk.ion_pos') | |
277 | |
278 # Assemble request | |
279 request <- paste0(select, from, where, ';') | |
280 | |
281 # Run request | |
282 rs <- .self$.send.query(request) | |
283 df <- fetch(rs, n=-1) | |
284 | |
285 return(df[[MSDB.TAG.MZTHEO]]) | |
286 }) | |
287 | |
288 ########## | |
289 # SEARCH # | |
290 ########## | |
291 | |
292 Ms4TabSqlDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) { | |
293 | |
294 # Build request | |
295 select <- paste0("select pkmol.molecule_id as ", MSDB.TAG.MOLID, ", pkmol.name as ", MSDB.TAG.MOLNAMES,", pk.mass as ", MSDB.TAG.MZTHEO, ", pk.composition as ", MSDB.TAG.COMP,", pk.attribution as ", MSDB.TAG.ATTR) | |
296 from <- " from peaklist as pk, peaklist_name as pkmol" | |
297 where <- paste0(" where pkmol.id = pk.name_id and pk.mass >= ", mz.low, " and pk.mass <= ", mz.high) | |
298 where <- paste0(where, ' and ', if (mode == MSDB.TAG.POS) '' else 'not ', 'pk.ion_pos') | |
299 | |
300 # Insert where clause on attribs | |
301 if ( ! is.null(attribs)) { | |
302 where.attribs <- paste0(vapply(attribs, function(a) paste0("pk.attribution = '", a, "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = " or ") | |
303 where <- paste0(where, ' and (', where.attribs, ')') | |
304 } | |
305 | |
306 # Insert where clause on molids | |
307 if ( ! is.null(molids)) { | |
308 where.molids <- paste0(vapply(molids, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') | |
309 where <- paste0(where, ' and (', where.molids, ')') | |
310 } | |
311 | |
312 # Insert where clause on columns | |
313 if ( ! is.null(col)) { | |
314 dbcols <- .self$.to.dbcols(col) | |
315 if ( ! is.null(dbcols)) { | |
316 | |
317 # Can't find specified columns | |
318 if (length(dbcols) == 0 && length(col) > 0) | |
319 return(.get.empty.result.df(rt = TRUE)) | |
320 | |
321 select <- paste0(select, ", (60 * pkret.retention) as ", MSDB.TAG.COLRT, ", method.name as ", MSDB.TAG.COL) | |
322 from <- paste0(from, ", method, peaklist_ret as pkret") | |
323 where.cols <- if (length(dbcols) == 0) 'TRUE' else paste0(vapply(dbcols, function(c) paste0("method.name = '", c, "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = " or ") | |
324 where <- paste0(where, " and pk.id = pkret.id_peak and pkret.id_method = method.id and (", where.cols, ")") | |
325 if (! is.null(rt.low) && ! is.null(rt.high)) | |
326 where <- paste0(where, " and pkret.retention * 60 >= ", rt.low, " and pkret.retention * 60 <= ", rt.high) | |
327 } | |
328 } | |
329 | |
330 # Assemble request | |
331 request <- paste0(select, from, where, ';') | |
332 | |
333 # Run request | |
334 rs <- .self$.send.query(request) | |
335 df <- fetch(rs,n=-1) | |
336 | |
337 # No results | |
338 | |
339 # Remove N prefix from IDs | |
340 if (nrow(df) > 0) | |
341 df[[MSDB.TAG.MOLID]] <- vapply(df[[MSDB.TAG.MOLID]], function(x) substring(x, 2), FUN.VALUE = '', USE.NAMES = FALSE) | |
342 else if (nrow(df) == 0) | |
343 df <- .get.empty.result.df(rt = ! is.null(col)) | |
344 | |
345 return(df) | |
346 }) | |
347 | |
348 } # end of load safe guard |