Mercurial > repos > prog > lcmsmatching
comparison search-mz @ 2:20d69a062da3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
| author | prog | 
|---|---|
| date | Thu, 02 Mar 2017 08:55:00 -0500 | 
| parents | 253d531a0193 | 
| children | fb9c0409d85c | 
   comparison
  equal
  deleted
  inserted
  replaced
| 1:253d531a0193 | 2:20d69a062da3 | 
|---|---|
| 47 MSDB.DFT[['mztolunit']] <- MSDB.DFT.MZTOLUNIT | 47 MSDB.DFT[['mztolunit']] <- MSDB.DFT.MZTOLUNIT | 
| 48 MSDB.DFT[['precursor-rt-tol']] <- 5 | 48 MSDB.DFT[['precursor-rt-tol']] <- 5 | 
| 49 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP | 49 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP | 
| 50 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields()) | 50 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields()) | 
| 51 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES) | 51 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES) | 
| 52 MSDB.DFT[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields()) | |
| 53 MSDB.DFT[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields()) | |
| 54 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',') | 52 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',') | 
| 55 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',') | 53 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',') | 
| 54 DEFAULT.ARG.VALUES <- MSDB.DFT | |
| 55 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields()) | |
| 56 DEFAULT.ARG.VALUES[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields()) | |
| 56 | 57 | 
| 57 ############## | 58 ############## | 
| 58 # PRINT HELP # | 59 # PRINT HELP # | 
| 59 ############## | 60 ############## | 
| 60 | 61 | 
| 106 # Parse retention time columns | 107 # Parse retention time columns | 
| 107 if ( ! is.null(opt$rtcol)) | 108 if ( ! is.null(opt$rtcol)) | 
| 108 opt$rtcol <- strsplit(opt$rtcol, ',')[[1]] | 109 opt$rtcol <- strsplit(opt$rtcol, ',')[[1]] | 
| 109 | 110 | 
| 110 # Parse input column names | 111 # Parse input column names | 
| 111 if ( ! is.null(opt[['input-col-names']])) { | 112 if (is.null(opt[['input-col-names']])) { | 
| 113 opt[['input-col-names']] <- msdb.get.dft.input.fields() | |
| 114 } | |
| 115 else { | |
| 112 custcols <- split.kv.list(opt[['input-col-names']]) | 116 custcols <- split.kv.list(opt[['input-col-names']]) | 
| 113 dftcols <- split.kv.list(MSDB.DFT[['input-col-names']]) | 117 dftcols <- msdb.get.dft.input.fields() | 
| 114 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) | 118 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) | 
| 115 } | 119 } | 
| 116 | 120 | 
| 117 # Parse output column names | 121 # Parse output column names | 
| 118 if ( ! is.null(opt[['output-col-names']])) { | 122 if (is.null(opt[['output-col-names']])) { | 
| 123 # By default keep input col names for output | |
| 124 opt[['output-col-names']] <- msdb.get.dft.output.fields() | |
| 125 input.cols <- names(opt[['input-col-names']]) | |
| 126 output.cols <- names(opt[['output-col-names']]) | |
| 127 opt[['output-col-names']] <- c(opt[['input-col-names']][input.cols %in% output.cols], opt[['output-col-names']][ ! output.cols %in% input.cols]) | |
| 128 } | |
| 129 else { | |
| 119 custcols <- split.kv.list(opt[['output-col-names']]) | 130 custcols <- split.kv.list(opt[['output-col-names']]) | 
| 120 dftcols <- split.kv.list(MSDB.DFT[['output-col-names']]) | 131 dftcols <- msdb.get.dft.output.fields() | 
| 121 opt[['output-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) | 132 opt[['output-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) | 
| 122 } | 133 } | 
| 123 | 134 | 
| 124 # Parse lists of precursors | 135 # Parse lists of precursors | 
| 125 if ( ! is.null(opt[['pos-prec']])) | 136 if ( ! is.null(opt[['pos-prec']])) | 
| 134 # PRINT DEFAULT ARGUMENT VALUES # | 145 # PRINT DEFAULT ARGUMENT VALUES # | 
| 135 ################################# | 146 ################################# | 
| 136 | 147 | 
| 137 print.dft.arg.val <- function(opt) { | 148 print.dft.arg.val <- function(opt) { | 
| 138 | 149 | 
| 139 print.flags <- MSDB.DFT | 150 print.flags <- DEFAULT.ARG.VALUES | 
| 140 names(print.flags) <- vapply(names(print.flags), function(x) paste0('print-', x), FUN.VALUE = '') | 151 names(print.flags) <- vapply(names(print.flags), function(x) paste0('print-', x), FUN.VALUE = '') | 
| 141 for (f in names(print.flags)) | 152 for (f in names(print.flags)) | 
| 142 if ( ! is.null(opt[[f]])) { | 153 if ( ! is.null(opt[[f]])) { | 
| 143 cat(print.flags[[f]]) | 154 cat(print.flags[[f]]) | 
| 144 q(status = 0) | 155 q(status = 0) | 
| 147 | 158 | 
| 148 make.getopt.spec.print.dft <- function() { | 159 make.getopt.spec.print.dft <- function() { | 
| 149 | 160 | 
| 150 spec <- character() | 161 spec <- character() | 
| 151 | 162 | 
| 152 for (f in names(MSDB.DFT)) | 163 for (f in names(DEFAULT.ARG.VALUES)) | 
| 153 spec <- c(spec, paste0('print-', f), NA_character_, 0, 'logical', paste0('Print default value of --', f)) | 164 spec <- c(spec, paste0('print-', f), NA_character_, 0, 'logical', paste0('Print default value of --', f)) | 
| 154 | 165 | 
| 155 return(spec) | 166 return(spec) | 
| 156 } | 167 } | 
| 157 | 168 | 
| 182 'no-main-table-in-html-output', NA_character_, 0, 'logical', 'Do not display main table in HTML output.', | 193 'no-main-table-in-html-output', NA_character_, 0, 'logical', 'Do not display main table in HTML output.', | 
| 183 'precursor-match', NA_character_, 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.', | 194 'precursor-match', NA_character_, 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.', | 
| 184 'precursor-rt-tol', NA_character_, 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'), | 195 'precursor-rt-tol', NA_character_, 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'), | 
| 185 'pos-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'), | 196 'pos-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'), | 
| 186 'neg-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".'), | 197 'neg-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".'), | 
| 187 'input-col-names', NA_character_, 1, 'character', paste0('Set the input column names. Default is "', MSDB.DFT[['input-col-names']], '".'), | 198 'input-col-names', NA_character_, 1, 'character', paste0('Set the input column names. Default is "', DEFAULT.ARG.VALUES[['input-col-names']], '".'), | 
| 188 'output-col-names', NA_character_, 1, 'character', paste0('Set the output column names. Default is "', MSDB.DFT[['output-col-names']], '".'), | 199 'output-col-names', NA_character_, 1, 'character', paste0('Set the output column names. Default is "', DEFAULT.ARG.VALUES[['output-col-names']], '".'), | 
| 189 'molids-sep', NA_character_, 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'), | 200 'molids-sep', NA_character_, 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'), | 
| 190 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.', | 201 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.', | 
| 191 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.', | 202 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.', | 
| 192 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'), | 203 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'), | 
| 193 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', | 204 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', | 
| 384 ############### | 395 ############### | 
| 385 | 396 | 
| 386 output.html <- function(db, main, peaks, file, opt, output.fields) { | 397 output.html <- function(db, main, peaks, file, opt, output.fields) { | 
| 387 | 398 | 
| 388 # Replace public database IDs by URLs | 399 # Replace public database IDs by URLs | 
| 389 if ( ! is.null(peaks)) | 400 if ( ! is.null(peaks) || ! is.null(main)) { | 
| 401 # Conversion from extdb id field to extdb name | |
| 402 extdb2classdb = list() | |
| 403 extdb2classdb[MSDB.TAG.KEGG] = BIODB.KEGG | |
| 404 extdb2classdb[MSDB.TAG.HMDB] = BIODB.HMDB | |
| 405 extdb2classdb[MSDB.TAG.CHEBI] = BIODB.CHEBI | |
| 406 extdb2classdb[MSDB.TAG.PUBCHEM] = BIODB.PUBCHEMCOMP | |
| 407 | |
| 408 # Loop on all dbs | |
| 390 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) { | 409 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) { | 
| 391 field <- output.fields[[extdb]] | 410 field <- output.fields[[extdb]] | 
| 392 if (field %in% colnames(peaks)) | 411 if ( ! is.null(peaks) && field %in% colnames(peaks)) | 
| 393 peaks[[field]] <- vapply(peaks[[field]], function(id) paste0('<a href="', get.entry.url(class = extdb, accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = '') | 412 peaks[[field]] <- vapply(peaks[[field]], function(id) if (is.na(id)) '' else paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = '') | 
| 394 } | 413 if ( ! is.null(main) && field %in% colnames(main)) | 
| 414 main[[field]] <- vapply(main[[field]], function(ids) if (is.na(ids) || nchar(ids) == 0) '' else paste(vapply(strsplit(ids, opt[['molids-sep']])[[1]], function(id) paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = ''), collapse = opt[['molids-sep']]), FUN.VALUE = '') | |
| 415 } | |
| 416 } | |
| 395 | 417 | 
| 396 # Write HTML | 418 # Write HTML | 
| 397 html <- HtmlWriter(file = file) | 419 html <- HtmlWriter(file = file) | 
| 398 html$writeBegTag('html') | 420 html$writeBegTag('html') | 
| 399 html$writeBegTag('header') | 421 html$writeBegTag('header') | 
| 422 html$writeTag('meta', attr = c(charset = "UTF-8")) | |
| 400 html$writeTag('title', text = "LC/MS matching results") | 423 html$writeTag('title', text = "LC/MS matching results") | 
| 401 html$writeBegTag('style') | 424 html$writeBegTag('style') | 
| 402 html$write('table, th, td { border-collapse: collapse; }') | 425 html$write('table, th, td { border-collapse: collapse; }') | 
| 403 html$write('table, th { border: 1px solid black; }') | 426 html$write('table, th { border: 1px solid black; }') | 
| 404 html$write('td { border-left: 1px solid black; border-right: 1px solid black; }') | 427 html$write('td { border-left: 1px solid black; border-right: 1px solid black; }') | 
| 412 html$writeTag('h1', text = "LC/MS matching") | 435 html$writeTag('h1', text = "LC/MS matching") | 
| 413 | 436 | 
| 414 # Write parameters | 437 # Write parameters | 
| 415 html$writeTag('h2', text = "Parameters") | 438 html$writeTag('h2', text = "Parameters") | 
| 416 html$writeBegTag('ul') | 439 html$writeBegTag('ul') | 
| 417 html$writeTag('li', paste0("Mode = ", opt$mode, ".")) | 440 html$writeTag('li', text = paste0("Mode = ", opt$mode, ".")) | 
| 418 html$writeTag('li', paste0("M/Z precision = ", opt$mzprec, ".")) | 441 html$writeTag('li', text = paste0("M/Z precision = ", opt$mzprec, ".")) | 
| 419 html$writeTag('li', paste0("M/Z shift = ", opt$mzshift, ".")) | 442 html$writeTag('li', text = paste0("M/Z shift = ", opt$mzshift, ".")) | 
| 420 html$writeTag('li', paste0("Precursor match = ", (if (is.null(opt[['precursor-match']])) "no" else "yes"), ".")) | 443 html$writeTag('li', text = paste0("Precursor match = ", (if (is.null(opt[['precursor-match']])) "no" else "yes"), ".")) | 
| 421 if ( ! is.null(opt[['precursor-match']])) { | 444 if ( ! is.null(opt[['precursor-match']])) { | 
| 422 html$writeTag('li', paste0("Positive precursors = ", paste0(opt[['pos-prec']], collapse = ', '), ".")) | 445 html$writeTag('li', text = paste0("Positive precursors = ", paste0(opt[['pos-prec']], collapse = ', '), ".")) | 
| 423 html$writeTag('li', paste0("Negative precursors = ", paste0(opt[['neg-prec']], collapse = ', '), ".")) | 446 html$writeTag('li', text = paste0("Negative precursors = ", paste0(opt[['neg-prec']], collapse = ', '), ".")) | 
| 424 } | 447 } | 
| 425 if ( ! is.null(opt$rtcol)) { | 448 if ( ! is.null(opt$rtcol)) { | 
| 426 html$writeTag('li', paste0("Columns = ", paste(opt$rtcol, collapse = ", "), ".")) | 449 html$writeTag('li', text = paste0("Columns = ", paste(opt$rtcol, collapse = ", "), ".")) | 
| 427 html$writeTag('li', paste0("RTX = ", opt$rttolx, ".")) | 450 html$writeTag('li', text = paste0("RTX = ", opt$rttolx, ".")) | 
| 428 html$writeTag('li', paste0("RTY = ", opt$rttoly, ".")) | 451 html$writeTag('li', text = paste0("RTY = ", opt$rttoly, ".")) | 
| 429 if ( ! is.null(opt[['precursor-match']])) | 452 if ( ! is.null(opt[['precursor-match']])) | 
| 430 html$writeTag('li', paste0("RTZ = ", opt[['precursor-rt-tol']], ".")) | 453 html$writeTag('li', text = paste0("RTZ = ", opt[['precursor-rt-tol']], ".")) | 
| 431 } | 454 } | 
| 432 html$writeEndTag('ul') | 455 html$writeEndTag('ul') | 
| 433 | 456 | 
| 434 # Write results | 457 # Write results | 
| 435 html$writeTag('h2', text = "Results") | 458 html$writeTag('h2', text = "Results") | 
| 478 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']])) | 501 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']])) | 
| 479 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist.")) | 502 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist.")) | 
| 480 if (file.info(opt[['input-file']])$size > 0) { | 503 if (file.info(opt[['input-file']])$size > 0) { | 
| 481 | 504 | 
| 482 # Load file into data frame | 505 # Load file into data frame | 
| 483 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t") | 506 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t", stringsAsFactor = FALSE) | 
| 484 | 507 | 
| 485 # Convert each column that is identified by a number into a name | 508 # Convert each column that is identified by a number into a name | 
| 486 for (field in names(opt[['input-col-names']])) { | 509 for (field in names(opt[['input-col-names']])) { | 
| 487 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) { | 510 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) { | 
| 488 col.index <- as.integer(opt[['input-col-names']][[field]]) | 511 col.index <- as.integer(opt[['input-col-names']][[field]]) | 
| 531 # Search database | 554 # Search database | 
| 532 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG | 555 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG | 
| 533 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']]) | 556 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']]) | 
| 534 | 557 | 
| 535 # Write output | 558 # Write output | 
| 559 main.output$moveColumnsToBeginning(colnames(input)) | |
| 560 peaks.output$moveColumnsToBeginning(colnames(input)) | |
| 536 # TODO Create a class MsDbOutputCsvFileStream | 561 # TODO Create a class MsDbOutputCsvFileStream | 
| 537 df.write.tsv(main.output$getDataFrame(), file = opt[['output-file']], row.names = FALSE) | 562 df.write.tsv(main.output$getDataFrame(), file = opt[['output-file']], row.names = FALSE) | 
| 538 if ( ! is.null(opt[['peak-output-file']])) | 563 if ( ! is.null(opt[['peak-output-file']])) | 
| 539 # TODO Create a class MsDbOutputCsvFileStream | 564 # TODO Create a class MsDbOutputCsvFileStream | 
| 540 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE) | 565 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE) | 
