comparison search-mz @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author prog
date Thu, 02 Mar 2017 08:55:00 -0500
parents 253d531a0193
children fb9c0409d85c
comparison
equal deleted inserted replaced
1:253d531a0193 2:20d69a062da3
47 MSDB.DFT[['mztolunit']] <- MSDB.DFT.MZTOLUNIT 47 MSDB.DFT[['mztolunit']] <- MSDB.DFT.MZTOLUNIT
48 MSDB.DFT[['precursor-rt-tol']] <- 5 48 MSDB.DFT[['precursor-rt-tol']] <- 5
49 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP 49 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP
50 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields()) 50 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields())
51 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES) 51 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES)
52 MSDB.DFT[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields())
53 MSDB.DFT[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields())
54 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',') 52 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',')
55 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',') 53 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',')
54 DEFAULT.ARG.VALUES <- MSDB.DFT
55 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields())
56 DEFAULT.ARG.VALUES[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields())
56 57
57 ############## 58 ##############
58 # PRINT HELP # 59 # PRINT HELP #
59 ############## 60 ##############
60 61
106 # Parse retention time columns 107 # Parse retention time columns
107 if ( ! is.null(opt$rtcol)) 108 if ( ! is.null(opt$rtcol))
108 opt$rtcol <- strsplit(opt$rtcol, ',')[[1]] 109 opt$rtcol <- strsplit(opt$rtcol, ',')[[1]]
109 110
110 # Parse input column names 111 # Parse input column names
111 if ( ! is.null(opt[['input-col-names']])) { 112 if (is.null(opt[['input-col-names']])) {
113 opt[['input-col-names']] <- msdb.get.dft.input.fields()
114 }
115 else {
112 custcols <- split.kv.list(opt[['input-col-names']]) 116 custcols <- split.kv.list(opt[['input-col-names']])
113 dftcols <- split.kv.list(MSDB.DFT[['input-col-names']]) 117 dftcols <- msdb.get.dft.input.fields()
114 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) 118 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)])
115 } 119 }
116 120
117 # Parse output column names 121 # Parse output column names
118 if ( ! is.null(opt[['output-col-names']])) { 122 if (is.null(opt[['output-col-names']])) {
123 # By default keep input col names for output
124 opt[['output-col-names']] <- msdb.get.dft.output.fields()
125 input.cols <- names(opt[['input-col-names']])
126 output.cols <- names(opt[['output-col-names']])
127 opt[['output-col-names']] <- c(opt[['input-col-names']][input.cols %in% output.cols], opt[['output-col-names']][ ! output.cols %in% input.cols])
128 }
129 else {
119 custcols <- split.kv.list(opt[['output-col-names']]) 130 custcols <- split.kv.list(opt[['output-col-names']])
120 dftcols <- split.kv.list(MSDB.DFT[['output-col-names']]) 131 dftcols <- msdb.get.dft.output.fields()
121 opt[['output-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) 132 opt[['output-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)])
122 } 133 }
123 134
124 # Parse lists of precursors 135 # Parse lists of precursors
125 if ( ! is.null(opt[['pos-prec']])) 136 if ( ! is.null(opt[['pos-prec']]))
134 # PRINT DEFAULT ARGUMENT VALUES # 145 # PRINT DEFAULT ARGUMENT VALUES #
135 ################################# 146 #################################
136 147
137 print.dft.arg.val <- function(opt) { 148 print.dft.arg.val <- function(opt) {
138 149
139 print.flags <- MSDB.DFT 150 print.flags <- DEFAULT.ARG.VALUES
140 names(print.flags) <- vapply(names(print.flags), function(x) paste0('print-', x), FUN.VALUE = '') 151 names(print.flags) <- vapply(names(print.flags), function(x) paste0('print-', x), FUN.VALUE = '')
141 for (f in names(print.flags)) 152 for (f in names(print.flags))
142 if ( ! is.null(opt[[f]])) { 153 if ( ! is.null(opt[[f]])) {
143 cat(print.flags[[f]]) 154 cat(print.flags[[f]])
144 q(status = 0) 155 q(status = 0)
147 158
148 make.getopt.spec.print.dft <- function() { 159 make.getopt.spec.print.dft <- function() {
149 160
150 spec <- character() 161 spec <- character()
151 162
152 for (f in names(MSDB.DFT)) 163 for (f in names(DEFAULT.ARG.VALUES))
153 spec <- c(spec, paste0('print-', f), NA_character_, 0, 'logical', paste0('Print default value of --', f)) 164 spec <- c(spec, paste0('print-', f), NA_character_, 0, 'logical', paste0('Print default value of --', f))
154 165
155 return(spec) 166 return(spec)
156 } 167 }
157 168
182 'no-main-table-in-html-output', NA_character_, 0, 'logical', 'Do not display main table in HTML output.', 193 'no-main-table-in-html-output', NA_character_, 0, 'logical', 'Do not display main table in HTML output.',
183 'precursor-match', NA_character_, 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.', 194 'precursor-match', NA_character_, 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.',
184 'precursor-rt-tol', NA_character_, 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'), 195 'precursor-rt-tol', NA_character_, 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'),
185 'pos-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'), 196 'pos-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'),
186 'neg-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".'), 197 'neg-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".'),
187 'input-col-names', NA_character_, 1, 'character', paste0('Set the input column names. Default is "', MSDB.DFT[['input-col-names']], '".'), 198 'input-col-names', NA_character_, 1, 'character', paste0('Set the input column names. Default is "', DEFAULT.ARG.VALUES[['input-col-names']], '".'),
188 'output-col-names', NA_character_, 1, 'character', paste0('Set the output column names. Default is "', MSDB.DFT[['output-col-names']], '".'), 199 'output-col-names', NA_character_, 1, 'character', paste0('Set the output column names. Default is "', DEFAULT.ARG.VALUES[['output-col-names']], '".'),
189 'molids-sep', NA_character_, 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'), 200 'molids-sep', NA_character_, 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'),
190 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.', 201 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.',
191 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.', 202 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.',
192 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'), 203 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'),
193 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', 204 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.',
384 ############### 395 ###############
385 396
386 output.html <- function(db, main, peaks, file, opt, output.fields) { 397 output.html <- function(db, main, peaks, file, opt, output.fields) {
387 398
388 # Replace public database IDs by URLs 399 # Replace public database IDs by URLs
389 if ( ! is.null(peaks)) 400 if ( ! is.null(peaks) || ! is.null(main)) {
401 # Conversion from extdb id field to extdb name
402 extdb2classdb = list()
403 extdb2classdb[MSDB.TAG.KEGG] = BIODB.KEGG
404 extdb2classdb[MSDB.TAG.HMDB] = BIODB.HMDB
405 extdb2classdb[MSDB.TAG.CHEBI] = BIODB.CHEBI
406 extdb2classdb[MSDB.TAG.PUBCHEM] = BIODB.PUBCHEMCOMP
407
408 # Loop on all dbs
390 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) { 409 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) {
391 field <- output.fields[[extdb]] 410 field <- output.fields[[extdb]]
392 if (field %in% colnames(peaks)) 411 if ( ! is.null(peaks) && field %in% colnames(peaks))
393 peaks[[field]] <- vapply(peaks[[field]], function(id) paste0('<a href="', get.entry.url(class = extdb, accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = '') 412 peaks[[field]] <- vapply(peaks[[field]], function(id) if (is.na(id)) '' else paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = '')
394 } 413 if ( ! is.null(main) && field %in% colnames(main))
414 main[[field]] <- vapply(main[[field]], function(ids) if (is.na(ids) || nchar(ids) == 0) '' else paste(vapply(strsplit(ids, opt[['molids-sep']])[[1]], function(id) paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = ''), collapse = opt[['molids-sep']]), FUN.VALUE = '')
415 }
416 }
395 417
396 # Write HTML 418 # Write HTML
397 html <- HtmlWriter(file = file) 419 html <- HtmlWriter(file = file)
398 html$writeBegTag('html') 420 html$writeBegTag('html')
399 html$writeBegTag('header') 421 html$writeBegTag('header')
422 html$writeTag('meta', attr = c(charset = "UTF-8"))
400 html$writeTag('title', text = "LC/MS matching results") 423 html$writeTag('title', text = "LC/MS matching results")
401 html$writeBegTag('style') 424 html$writeBegTag('style')
402 html$write('table, th, td { border-collapse: collapse; }') 425 html$write('table, th, td { border-collapse: collapse; }')
403 html$write('table, th { border: 1px solid black; }') 426 html$write('table, th { border: 1px solid black; }')
404 html$write('td { border-left: 1px solid black; border-right: 1px solid black; }') 427 html$write('td { border-left: 1px solid black; border-right: 1px solid black; }')
412 html$writeTag('h1', text = "LC/MS matching") 435 html$writeTag('h1', text = "LC/MS matching")
413 436
414 # Write parameters 437 # Write parameters
415 html$writeTag('h2', text = "Parameters") 438 html$writeTag('h2', text = "Parameters")
416 html$writeBegTag('ul') 439 html$writeBegTag('ul')
417 html$writeTag('li', paste0("Mode = ", opt$mode, ".")) 440 html$writeTag('li', text = paste0("Mode = ", opt$mode, "."))
418 html$writeTag('li', paste0("M/Z precision = ", opt$mzprec, ".")) 441 html$writeTag('li', text = paste0("M/Z precision = ", opt$mzprec, "."))
419 html$writeTag('li', paste0("M/Z shift = ", opt$mzshift, ".")) 442 html$writeTag('li', text = paste0("M/Z shift = ", opt$mzshift, "."))
420 html$writeTag('li', paste0("Precursor match = ", (if (is.null(opt[['precursor-match']])) "no" else "yes"), ".")) 443 html$writeTag('li', text = paste0("Precursor match = ", (if (is.null(opt[['precursor-match']])) "no" else "yes"), "."))
421 if ( ! is.null(opt[['precursor-match']])) { 444 if ( ! is.null(opt[['precursor-match']])) {
422 html$writeTag('li', paste0("Positive precursors = ", paste0(opt[['pos-prec']], collapse = ', '), ".")) 445 html$writeTag('li', text = paste0("Positive precursors = ", paste0(opt[['pos-prec']], collapse = ', '), "."))
423 html$writeTag('li', paste0("Negative precursors = ", paste0(opt[['neg-prec']], collapse = ', '), ".")) 446 html$writeTag('li', text = paste0("Negative precursors = ", paste0(opt[['neg-prec']], collapse = ', '), "."))
424 } 447 }
425 if ( ! is.null(opt$rtcol)) { 448 if ( ! is.null(opt$rtcol)) {
426 html$writeTag('li', paste0("Columns = ", paste(opt$rtcol, collapse = ", "), ".")) 449 html$writeTag('li', text = paste0("Columns = ", paste(opt$rtcol, collapse = ", "), "."))
427 html$writeTag('li', paste0("RTX = ", opt$rttolx, ".")) 450 html$writeTag('li', text = paste0("RTX = ", opt$rttolx, "."))
428 html$writeTag('li', paste0("RTY = ", opt$rttoly, ".")) 451 html$writeTag('li', text = paste0("RTY = ", opt$rttoly, "."))
429 if ( ! is.null(opt[['precursor-match']])) 452 if ( ! is.null(opt[['precursor-match']]))
430 html$writeTag('li', paste0("RTZ = ", opt[['precursor-rt-tol']], ".")) 453 html$writeTag('li', text = paste0("RTZ = ", opt[['precursor-rt-tol']], "."))
431 } 454 }
432 html$writeEndTag('ul') 455 html$writeEndTag('ul')
433 456
434 # Write results 457 # Write results
435 html$writeTag('h2', text = "Results") 458 html$writeTag('h2', text = "Results")
478 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']])) 501 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']]))
479 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist.")) 502 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist."))
480 if (file.info(opt[['input-file']])$size > 0) { 503 if (file.info(opt[['input-file']])$size > 0) {
481 504
482 # Load file into data frame 505 # Load file into data frame
483 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t") 506 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t", stringsAsFactor = FALSE)
484 507
485 # Convert each column that is identified by a number into a name 508 # Convert each column that is identified by a number into a name
486 for (field in names(opt[['input-col-names']])) { 509 for (field in names(opt[['input-col-names']])) {
487 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) { 510 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) {
488 col.index <- as.integer(opt[['input-col-names']][[field]]) 511 col.index <- as.integer(opt[['input-col-names']][[field]])
531 # Search database 554 # Search database
532 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG 555 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG
533 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']]) 556 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']])
534 557
535 # Write output 558 # Write output
559 main.output$moveColumnsToBeginning(colnames(input))
560 peaks.output$moveColumnsToBeginning(colnames(input))
536 # TODO Create a class MsDbOutputCsvFileStream 561 # TODO Create a class MsDbOutputCsvFileStream
537 df.write.tsv(main.output$getDataFrame(), file = opt[['output-file']], row.names = FALSE) 562 df.write.tsv(main.output$getDataFrame(), file = opt[['output-file']], row.names = FALSE)
538 if ( ! is.null(opt[['peak-output-file']])) 563 if ( ! is.null(opt[['peak-output-file']]))
539 # TODO Create a class MsDbOutputCsvFileStream 564 # TODO Create a class MsDbOutputCsvFileStream
540 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE) 565 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE)