Mercurial > repos > prog > lcmsmatching
comparison Ms4TabSqlDb.R @ 0:e66bb061af06 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
| author | prog |
|---|---|
| date | Tue, 12 Jul 2016 12:02:37 -0400 |
| parents | |
| children | 20d69a062da3 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e66bb061af06 |
|---|---|
| 1 if ( ! exists('Ms4TabSqlDb')) { # Do not load again if already loaded | |
| 2 | |
| 3 library('methods') | |
| 4 source('msdb-common.R') | |
| 5 source('MsDb.R') | |
| 6 | |
| 7 ##################### | |
| 8 # CLASS DECLARATION # | |
| 9 ##################### | |
| 10 | |
| 11 Ms4TabSqlDb <- setRefClass("Ms4TabSqlDb", contains = "MsDb", fields = list(.host = "character", .port = "integer", .dbname = "character", .user = "character", .password = "character", .drv = "ANY", .conn = "ANY")) | |
| 12 | |
| 13 ############### | |
| 14 # CONSTRUCTOR # | |
| 15 ############### | |
| 16 | |
| 17 Ms4TabSqlDb$methods( initialize = function(host = NA_character_, port = NA_integer_, dbname = NA_character_, user = NA_character_, password = NA_character_, ...) { | |
| 18 | |
| 19 # Initialize members | |
| 20 .host <<- if ( ! is.null(host)) host else NA_character_ | |
| 21 .port <<- if ( ! is.null(port)) port else NA_integer_ | |
| 22 .dbname <<- if ( ! is.null(dbname)) dbname else NA_character_ | |
| 23 .user <<- if ( ! is.null(user)) user else NA_character_ | |
| 24 .password <<- if ( ! is.null(password)) password else NA_character_ | |
| 25 .drv <<- NULL | |
| 26 .conn <<- NULL | |
| 27 | |
| 28 callSuper(...) | |
| 29 }) | |
| 30 | |
| 31 ################## | |
| 32 # GET CONNECTION # | |
| 33 ################## | |
| 34 | |
| 35 Ms4TabSqlDb$methods( .get.connection = function() { | |
| 36 | |
| 37 # Initialize connection | |
| 38 if (is.null(.self$.conn)) { | |
| 39 library('RPostgreSQL') | |
| 40 .drv <<- dbDriver("PostgreSQL") | |
| 41 .conn <<- dbConnect(.self$.drv, host = .self$.host, port = .self$.port, dbname = .self$.dbname, user = .self$.user, password = .self$.password) | |
| 42 } | |
| 43 | |
| 44 return(.self$.conn) | |
| 45 }) | |
| 46 | |
| 47 ############## | |
| 48 # SEND QUERY # | |
| 49 ############## | |
| 50 | |
| 51 Ms4TabSqlDb$methods( .send.query = function(query) { | |
| 52 conn <- .self$.get.connection() # Call it first separately, so library RPostgreSQL is loaded. | |
| 53 rs <- try(dbSendQuery(conn, query)) | |
| 54 return(rs) | |
| 55 }) | |
| 56 | |
| 57 #################### | |
| 58 # GET MOLECULE IDS # | |
| 59 #################### | |
| 60 | |
| 61 Ms4TabSqlDb$methods( getMoleculeIds = function() { | |
| 62 | |
| 63 rs <- .self$.send.query('select pkmol.molecule_id as id from peaklist_name as pkmol;') | |
| 64 ids <- fetch(rs,n=-1) | |
| 65 ids <- ids[['id']] # Get 'id' column | |
| 66 ids <- vapply(ids, function(x) { if (substring(x, 1, 1) == 'N') as.integer(substring(x, 2)) else as.integer(x) } , FUN.VALUE = 1, USE.NAMES = FALSE) | |
| 67 ids <- (sort(ids)) | |
| 68 | |
| 69 return(ids) | |
| 70 }) | |
| 71 | |
| 72 #################### | |
| 73 # GET NB MOLECULES # | |
| 74 #################### | |
| 75 | |
| 76 Ms4TabSqlDb$methods( getNbMolecules = function() { | |
| 77 | |
| 78 rs <- .self$.send.query('select count(*) from peaklist_name;') | |
| 79 df <- fetch(rs,n=-1) | |
| 80 n <- df[[1]] | |
| 81 | |
| 82 return(n) | |
| 83 }) | |
| 84 | |
| 85 ##################### | |
| 86 # GET MOLECULE NAME # | |
| 87 ##################### | |
| 88 | |
| 89 Ms4TabSqlDb$methods( getMoleculeName = function(molid) { | |
| 90 | |
| 91 # Build request | |
| 92 where <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') | |
| 93 request <- paste0('select pkmol.molecule_id as id, pkmol.name from peaklist_name as pkmol where ', where, ';') | |
| 94 | |
| 95 # Run request | |
| 96 rs <- .self$.send.query(request) | |
| 97 df <- fetch(rs,n=-1) | |
| 98 | |
| 99 # Get IDs | |
| 100 ids <- vapply(df[['id']], function(x) as.integer(substring(x, 2)), FUN.VALUE = 1, USE.NAMES = FALSE) | |
| 101 | |
| 102 # Get names in the same order as the input vector | |
| 103 names <- df[['name']][order(ids)[order(molid)]] | |
| 104 | |
| 105 return(if (is.null(names)) NA_character_ else names) | |
| 106 }) | |
| 107 | |
| 108 | |
| 109 ############################### | |
| 110 # GET CHROMATOGRAPHIC COLUMNS # | |
| 111 ############################### | |
| 112 | |
| 113 Ms4TabSqlDb$methods( getChromCol = function(molid = NULL) { | |
| 114 | |
| 115 # Get all columns | |
| 116 if (is.null(molid)) { | |
| 117 request <- 'select name from method;' | |
| 118 | |
| 119 # Get columns of the specified molecules | |
| 120 } else { | |
| 121 where_molids <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') | |
| 122 where <- paste0('pk.name_id = pkmol.id and pk.id = pkret.id_peak and pkret.id_method = method.id and (', where_molids, ')') | |
| 123 request <- paste0('select distinct method.name from method, peaklist as pk, peaklist_name as pkmol, peaklist_ret as pkret where ', where, ';') | |
| 124 } | |
| 125 | |
| 126 # Run request | |
| 127 rs <- .self$.send.query(request) | |
| 128 df <- fetch(rs,n=-1) | |
| 129 | |
| 130 # Gets column list | |
| 131 cols <- df[['name']] | |
| 132 | |
| 133 # Remove FIA | |
| 134 cols <- cols[ cols != 'FIA'] | |
| 135 | |
| 136 # Normalize names | |
| 137 cols <- vapply(cols, .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE) | |
| 138 | |
| 139 # Remove duplicates | |
| 140 cols <- cols[ ! duplicated(cols)] | |
| 141 | |
| 142 # Make data frame | |
| 143 cols <- data.frame(id = cols, title = cols, stringsAsFactors = FALSE) | |
| 144 | |
| 145 return(cols) | |
| 146 }) | |
| 147 | |
| 148 ################ | |
| 149 # FIND BY NAME # | |
| 150 ################ | |
| 151 | |
| 152 Ms4TabSqlDb$methods( findByName = function(name) { | |
| 153 | |
| 154 if (is.null(name)) return(NA_integer_) | |
| 155 | |
| 156 # Put names in uppercase | |
| 157 uname <- toupper(name) | |
| 158 | |
| 159 # Build request | |
| 160 where <- paste0(vapply(uname, function(n) paste0("upper(pkmol.name) = '", gsub("'", "''", n, perl = TRUE), "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = ' or ') | |
| 161 request <- paste0('select pkmol.molecule_id as id, pkmol.name from peaklist_name as pkmol where ', where, ';') | |
| 162 | |
| 163 # Run request | |
| 164 rs <- .self$.send.query(request) | |
| 165 df <- fetch(rs,n=-1) | |
| 166 | |
| 167 # Adds missing names/IDs | |
| 168 missing_names <- uname[ ! uname %in% toupper(df[['name']])] | |
| 169 df <- rbind(df, data.frame(id = rep(NA_integer_, length(missing_names)), name = missing_names)) | |
| 170 | |
| 171 # Get IDs and names | |
| 172 ids <- vapply(df[['id']], function(x) as.integer(substring(x, 2)), FUN.VALUE = 1, USE.NAMES = FALSE) | |
| 173 names <- toupper(as.character(df[['name']])) | |
| 174 | |
| 175 # Get IDs in the same order as the input vector | |
| 176 ids[order(uname)] <- ids[order(names)] | |
| 177 | |
| 178 return(if (is.null(ids)) NA_integer_ else ids) | |
| 179 }) | |
| 180 | |
| 181 ####################### | |
| 182 # GET RETENTION TIMES # | |
| 183 ####################### | |
| 184 | |
| 185 Ms4TabSqlDb$methods( getRetentionTimes = function(molid, col = NA_character_) { | |
| 186 | |
| 187 if (is.null(molid) || is.na(molid) || length(molid) != 1) | |
| 188 stop("The parameter molid must consist only in a single integer.") | |
| 189 | |
| 190 # Build request | |
| 191 request <- paste0("select distinct method.name as col, (pkret.retention * 60) as ret from peaklist as pk, peaklist_name as pkmol, peaklist_ret as pkret, method where pkret.id_peak = pk.id and pkmol.id = pk.name_id and pkret.id_method = method.id and pkmol.molecule_id = 'N", molid, "'") | |
| 192 if ( ! is.na(col)) { | |
| 193 where_cols <- paste0(vapply(col, function(c) paste0("method.name = '", c, "'"), FUN.VALUE = ''), collapse = ' or ') | |
| 194 request <- paste0(request, ' and (', where_cols, ')') | |
| 195 } | |
| 196 request <- paste0(request, ';') | |
| 197 | |
| 198 # Run request | |
| 199 rs <- .self$.send.query(request) | |
| 200 df <- fetch(rs,n=-1) | |
| 201 | |
| 202 # Remove FIA | |
| 203 df <- df[df[['col']] != 'FIA', ] | |
| 204 | |
| 205 # Normalize names | |
| 206 df[['col']] <- vapply(df[['col']], .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE) | |
| 207 | |
| 208 # Build output list | |
| 209 lst <- list() | |
| 210 if (nrow(df) > 0) | |
| 211 for (i in 1:nrow(df)) { | |
| 212 c <- df[i, 'col'] | |
| 213 lst[[c]] <- c(lst[[c]], df[i, 'ret']) | |
| 214 } | |
| 215 | |
| 216 return(lst) | |
| 217 }) | |
| 218 | |
| 219 ################ | |
| 220 # GET NB PEAKS # | |
| 221 ################ | |
| 222 | |
| 223 Ms4TabSqlDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) { | |
| 224 | |
| 225 # Build request | |
| 226 request <- paste0("select count(*) from peaklist as pk, peaklist_name as pkmol where pkmol.id = pk.name_id") | |
| 227 if ( length(molid) > 1 || ! is.na(molid)) { | |
| 228 where_molids <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') | |
| 229 request <- paste0(request, ' and (', where_molids, ')') | |
| 230 } | |
| 231 if ( ! is.na(type)) { | |
| 232 request <- paste0(request, ' and ', if (type == MSDB.TAG.POS) '' else 'not ', 'ion_pos') | |
| 233 } | |
| 234 request <- paste0(request, ';') | |
| 235 | |
| 236 # Run request | |
| 237 rs <- .self$.send.query(request) | |
| 238 df <- fetch(rs,n=-1) | |
| 239 | |
| 240 return(df[1,1]) | |
| 241 }) | |
| 242 | |
| 243 ############################### | |
| 244 # GET CHROMATOGRAPHIC COLUMNS # | |
| 245 ############################### | |
| 246 | |
| 247 Ms4TabSqlDb$methods( .to.dbcols = function(col) { | |
| 248 | |
| 249 # Get all column names | |
| 250 request <- 'select name from method;' | |
| 251 rs <- .self$.send.query(request) | |
| 252 df <- fetch(rs,n=-1) | |
| 253 | |
| 254 # Get database column names | |
| 255 dbcols <- df[['name']] | |
| 256 dbcols <- dbcols[ dbcols != 'FIA'] | |
| 257 | |
| 258 # Get normalize names | |
| 259 normcols <- vapply(dbcols, .normalize_column_name, FUN.VALUE = '', USE.NAMES = FALSE) | |
| 260 | |
| 261 return(dbcols[normcols == tolower(col)]) | |
| 262 }) | |
| 263 | |
| 264 ################# | |
| 265 # GET MZ VALUES # | |
| 266 ################# | |
| 267 | |
| 268 # Returns a numeric vector of all masses stored inside the database. | |
| 269 Ms4TabSqlDb$methods( getMzValues = function(mode = NULL) { | |
| 270 | |
| 271 # Build request | |
| 272 select <- paste0("select distinct pk.mass as ", MSDB.TAG.MZTHEO) | |
| 273 from <- " from peaklist as pk" | |
| 274 where <- "" | |
| 275 if ( ! is.null(mode)) | |
| 276 where <- paste0(" where ", if (mode == MSDB.TAG.POS) '' else 'not ', 'pk.ion_pos') | |
| 277 | |
| 278 # Assemble request | |
| 279 request <- paste0(select, from, where, ';') | |
| 280 | |
| 281 # Run request | |
| 282 rs <- .self$.send.query(request) | |
| 283 df <- fetch(rs, n=-1) | |
| 284 | |
| 285 return(df[[MSDB.TAG.MZTHEO]]) | |
| 286 }) | |
| 287 | |
| 288 ########## | |
| 289 # SEARCH # | |
| 290 ########## | |
| 291 | |
| 292 Ms4TabSqlDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) { | |
| 293 | |
| 294 # Build request | |
| 295 select <- paste0("select pkmol.molecule_id as ", MSDB.TAG.MOLID, ", pkmol.name as ", MSDB.TAG.MOLNAMES,", pk.mass as ", MSDB.TAG.MZTHEO, ", pk.composition as ", MSDB.TAG.COMP,", pk.attribution as ", MSDB.TAG.ATTR) | |
| 296 from <- " from peaklist as pk, peaklist_name as pkmol" | |
| 297 where <- paste0(" where pkmol.id = pk.name_id and pk.mass >= ", mz.low, " and pk.mass <= ", mz.high) | |
| 298 where <- paste0(where, ' and ', if (mode == MSDB.TAG.POS) '' else 'not ', 'pk.ion_pos') | |
| 299 | |
| 300 # Insert where clause on attribs | |
| 301 if ( ! is.null(attribs)) { | |
| 302 where.attribs <- paste0(vapply(attribs, function(a) paste0("pk.attribution = '", a, "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = " or ") | |
| 303 where <- paste0(where, ' and (', where.attribs, ')') | |
| 304 } | |
| 305 | |
| 306 # Insert where clause on molids | |
| 307 if ( ! is.null(molids)) { | |
| 308 where.molids <- paste0(vapply(molids, function(id) paste0("pkmol.molecule_id = 'N", id, "'"), FUN.VALUE = ''), collapse = ' or ') | |
| 309 where <- paste0(where, ' and (', where.molids, ')') | |
| 310 } | |
| 311 | |
| 312 # Insert where clause on columns | |
| 313 if ( ! is.null(col)) { | |
| 314 dbcols <- .self$.to.dbcols(col) | |
| 315 if ( ! is.null(dbcols)) { | |
| 316 | |
| 317 # Can't find specified columns | |
| 318 if (length(dbcols) == 0 && length(col) > 0) | |
| 319 return(.get.empty.result.df(rt = TRUE)) | |
| 320 | |
| 321 select <- paste0(select, ", (60 * pkret.retention) as ", MSDB.TAG.COLRT, ", method.name as ", MSDB.TAG.COL) | |
| 322 from <- paste0(from, ", method, peaklist_ret as pkret") | |
| 323 where.cols <- if (length(dbcols) == 0) 'TRUE' else paste0(vapply(dbcols, function(c) paste0("method.name = '", c, "'"), FUN.VALUE = '', USE.NAMES = FALSE), collapse = " or ") | |
| 324 where <- paste0(where, " and pk.id = pkret.id_peak and pkret.id_method = method.id and (", where.cols, ")") | |
| 325 if (! is.null(rt.low) && ! is.null(rt.high)) | |
| 326 where <- paste0(where, " and pkret.retention * 60 >= ", rt.low, " and pkret.retention * 60 <= ", rt.high) | |
| 327 } | |
| 328 } | |
| 329 | |
| 330 # Assemble request | |
| 331 request <- paste0(select, from, where, ';') | |
| 332 | |
| 333 # Run request | |
| 334 rs <- .self$.send.query(request) | |
| 335 df <- fetch(rs,n=-1) | |
| 336 | |
| 337 # No results | |
| 338 | |
| 339 # Remove N prefix from IDs | |
| 340 if (nrow(df) > 0) | |
| 341 df[[MSDB.TAG.MOLID]] <- vapply(df[[MSDB.TAG.MOLID]], function(x) substring(x, 2), FUN.VALUE = '', USE.NAMES = FALSE) | |
| 342 else if (nrow(df) == 0) | |
| 343 df <- .get.empty.result.df(rt = ! is.null(col)) | |
| 344 | |
| 345 return(df) | |
| 346 }) | |
| 347 | |
| 348 } # end of load safe guard |
