Mercurial > repos > prog > lcmsmatching
comparison search-mz @ 1:253d531a0193 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 36c9d8099c20a1ae848f1337c16564335dd8fb2b
author | prog |
---|---|
date | Sat, 03 Sep 2016 17:02:01 -0400 |
parents | e66bb061af06 |
children | 20d69a062da3 |
comparison
equal
deleted
inserted
replaced
0:e66bb061af06 | 1:253d531a0193 |
---|---|
15 source(file.path(dirname(script.path), 'strhlp.R'), chdir = TRUE) | 15 source(file.path(dirname(script.path), 'strhlp.R'), chdir = TRUE) |
16 source(file.path(dirname(script.path), 'fshlp.R'), chdir = TRUE) | 16 source(file.path(dirname(script.path), 'fshlp.R'), chdir = TRUE) |
17 source(file.path(dirname(script.path), 'biodb-common.R'), chdir = TRUE) | 17 source(file.path(dirname(script.path), 'biodb-common.R'), chdir = TRUE) |
18 source(file.path(dirname(script.path), 'nethlp.R'), chdir = TRUE) | 18 source(file.path(dirname(script.path), 'nethlp.R'), chdir = TRUE) |
19 | 19 |
20 # Missing paste0() function in R 2.14.1 | |
21 if (as.integer(R.Version()$major) == 2 && as.numeric(R.Version()$minor) < 15) | |
22 paste0 <- function(...) paste(..., sep = '') | |
23 | |
20 ############# | 24 ############# |
21 # CONSTANTS # | 25 # CONSTANTS # |
22 ############# | 26 ############# |
23 | 27 |
24 PROG <- sub('^.*/([^/]+)$', '\\1', commandArgs()[4], perl = TRUE) | 28 PROG <- sub('^.*/([^/]+)$', '\\1', commandArgs()[4], perl = TRUE) |
29 USERAGENT <- 'search-mz ; pierrick.roger@gmail.com' | |
25 | 30 |
26 # Authorized database types | 31 # Authorized database types |
27 MSDB.XLS <- 'xls' | 32 MSDB.XLS <- 'xls' |
28 MSDB.4TABSQL <- '4tabsql' | 33 MSDB.4TABSQL <- '4tabsql' |
29 MSDB.FILE <- 'file' | 34 MSDB.FILE <- 'file' |
185 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.', | 190 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.', |
186 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.', | 191 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.', |
187 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'), | 192 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'), |
188 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', | 193 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', |
189 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".', | 194 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".', |
190 'useragent', NA_character_, 1, 'character', 'User agent. Used by the "Peakforest" database.', | |
191 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', | 195 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', |
192 'db-user', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', | 196 'db-user', NA_character_, 1, 'character', 'User of the database. Used by the "4tabsql" database.', |
193 'db-password', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', | 197 'db-password', NA_character_, 1, 'character', 'Password of the database user. Used by the "4tabsql" database.', |
194 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-fields']], '".'), | 198 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-fields']], '".'), |
195 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-ms-modes']], '".'), | 199 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-ms-modes']], '".'), |
200 'db-token', NA_character_, 1, 'character', 'Database token. Used by Peakforest database.', | |
196 'debug', NA_character_, 0, 'logical', 'Set debug mode.' | 201 'debug', NA_character_, 0, 'logical', 'Set debug mode.' |
197 ) | 202 ) |
198 | 203 |
199 spec <- c(spec, make.getopt.spec.print.dft()) | 204 spec <- c(spec, make.getopt.spec.print.dft()) |
200 | 205 |
222 opt <- set.dft.arg.val(opt) # Set default values | 227 opt <- set.dft.arg.val(opt) # Set default values |
223 opt <- parse.arg.val(opt) # Parse list values | 228 opt <- parse.arg.val(opt) # Parse list values |
224 | 229 |
225 # Check values | 230 # Check values |
226 error <- .check.db.conn.opts(opt) | 231 error <- .check.db.conn.opts(opt) |
227 if (is.null(opt[['output-file']])) { | 232 if (is.null(opt[['output-file']]) && is.null(opt[['list-cols']])) { |
228 warning("You must set a path for the output file.") | 233 warning("You must set a path for the output file.") |
229 error <- TRUE | 234 error <- TRUE |
230 } | 235 } |
231 if (is.null(opt[['list-cols']])) { | 236 if (is.null(opt[['list-cols']])) { |
232 if (is.null(opt[['input-file']])) { | 237 if (is.null(opt[['input-file']])) { |
323 } | 328 } |
324 } | 329 } |
325 if (opt$database == MSDB.PEAKFOREST) { | 330 if (opt$database == MSDB.PEAKFOREST) { |
326 if (is.null(opt$url)) { | 331 if (is.null(opt$url)) { |
327 warning("When using PeakForest database, you must specify the URL of the PeakForest server with option --url.") | 332 warning("When using PeakForest database, you must specify the URL of the PeakForest server with option --url.") |
328 error <- TRUE | |
329 } | |
330 if (is.null(opt$useragent)) { | |
331 warning("When using PeakForest database, you must specify a user agent with option --useragent.") | |
332 error <- TRUE | 333 error <- TRUE |
333 } | 334 } |
334 } | 335 } |
335 | 336 |
336 return(error) | 337 return(error) |
361 precursors[[MSDB.TAG.POS]] <- opt[['pos-prec']] | 362 precursors[[MSDB.TAG.POS]] <- opt[['pos-prec']] |
362 precursors[[MSDB.TAG.NEG]] <- opt[['neg-prec']] | 363 precursors[[MSDB.TAG.NEG]] <- opt[['neg-prec']] |
363 } | 364 } |
364 | 365 |
365 db <- switch(opt$database, | 366 db <- switch(opt$database, |
366 peakforest = MsPeakForestDb$new(url = opt$url, useragent = opt$useragent), | 367 peakforest = MsPeakForestDb$new(url = opt$url, useragent = USERAGENT, token = opt[['db-token']]), |
367 xls = MsXlsDb(db_dir = opt$url, cache_dir = opt[['cache-dir']]), | 368 xls = MsXlsDb$new(db_dir = opt$url, cache_dir = opt[['cache-dir']]), |
368 '4tabsql' = Ms4TabSqlDb(host = extract.address(opt$url), port = extract.port(opt$url), dbname = opt[['db-name']], user = opt[['db-user']], password = opt[['db-password']]), | 369 '4tabsql' = Ms4TabSqlDb$new(host = extract.address(opt$url), port = extract.port(opt$url), dbname = opt[['db-name']], user = opt[['db-user']], password = opt[['db-password']]), |
369 file = MsFileDb(file = opt$url), | 370 file = MsFileDb$new(file = opt$url), |
370 NULL) | 371 NULL) |
371 db$setPrecursors(precursors) | 372 db$setPrecursors(precursors) |
372 if (db$areDbFieldsSettable()) | 373 if (db$areDbFieldsSettable()) |
373 db$setDbFields(opt[['db-fields']]) | 374 db$setDbFields(opt[['db-fields']]) |
374 if (db$areDbMsModesSettable()) | 375 if (db$areDbMsModesSettable()) |
387 # Replace public database IDs by URLs | 388 # Replace public database IDs by URLs |
388 if ( ! is.null(peaks)) | 389 if ( ! is.null(peaks)) |
389 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) { | 390 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) { |
390 field <- output.fields[[extdb]] | 391 field <- output.fields[[extdb]] |
391 if (field %in% colnames(peaks)) | 392 if (field %in% colnames(peaks)) |
392 peaks[[field]] <- vapply(peaks[[field]], function(id) paste0('<a href="', get.entry.url(class = extdb, accession = id, content.type = RBIODB.HTML), '">', id, '</a>'), FUN.VALUE = '') | 393 peaks[[field]] <- vapply(peaks[[field]], function(id) paste0('<a href="', get.entry.url(class = extdb, accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = '') |
393 } | 394 } |
394 | 395 |
395 # Write HTML | 396 # Write HTML |
396 html <- HtmlWriter(file = file) | 397 html <- HtmlWriter(file = file) |
397 html$writeBegTag('html') | 398 html$writeBegTag('html') |
467 db <- .load.db(opt) | 468 db <- .load.db(opt) |
468 | 469 |
469 # Print columns | 470 # Print columns |
470 if ( ! is.null(opt[['list-cols']])) { | 471 if ( ! is.null(opt[['list-cols']])) { |
471 cols <- db$getChromCol() | 472 cols <- db$getChromCol() |
472 df.write.tsv(cols, file = opt[['output-file']]) | 473 df.write.tsv(cols, file = if (is.null(opt[['output-file']])) stdout() else opt[['output-file']]) |
473 q(status = 0) | 474 q(status = 0) |
474 } | 475 } |
475 | 476 |
476 # Read input | 477 # Read input |
477 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']])) | 478 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']])) |
504 if ( ! is.null(opt[['all-cols']])) | 505 if ( ! is.null(opt[['all-cols']])) |
505 opt$rtcol <- db$getChromCol() | 506 opt$rtcol <- db$getChromCol() |
506 | 507 |
507 # Check chrom columns | 508 # Check chrom columns |
508 if ( ! is.null(opt[['check-cols']]) && ! is.null(opt$rtcol)) { | 509 if ( ! is.null(opt[['check-cols']]) && ! is.null(opt$rtcol)) { |
509 dbcols <- db$getChromCol() | 510 dbcols <- db$getChromCol()[['id']] |
510 unknown.cols <- opt$rtcol[ ! opt$rtcol %in% dbcols] | 511 unknown.cols <- opt$rtcol[ ! opt$rtcol %in% dbcols] |
511 if (length(unknown.cols) > 0) { | 512 if (length(unknown.cols) > 0) { |
512 stop(paste0("Unknown chromatographic column", (if (length(unknown.cols) > 1) 's' else ''), ': ', paste(unknown.cols, collapse = ', '), ".\nAllowed chromatographic column names are:\n", paste(dbcols, collapse = "\n"))) | 513 stop(paste0("Unknown chromatographic column", (if (length(unknown.cols) > 1) 's' else ''), ': ', paste(unknown.cols, collapse = ', '), ".\nAllowed chromatographic column names are:\n", paste(dbcols, collapse = "\n"))) |
513 } | 514 } |
514 } | 515 } |