comparison search-mz @ 0:e66bb061af06 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author prog
date Tue, 12 Jul 2016 12:02:37 -0400
parents
children 253d531a0193
comparison
equal deleted inserted replaced
-1:000000000000 0:e66bb061af06
1 #!/usr/bin/env Rscript
2 # vi: ft=R
3 args <- commandArgs(trailingOnly = F)
4 script.path <- sub("--file=","",args[grep("--file=",args)])
5 library(getopt)
6 source(file.path(dirname(script.path), 'msdb-common.R'), chdir = TRUE)
7 source(file.path(dirname(script.path), 'MsFileDb.R'), chdir = TRUE)
8 source(file.path(dirname(script.path), 'MsPeakForestDb.R'), chdir = TRUE)
9 source(file.path(dirname(script.path), 'MsXlsDb.R'), chdir = TRUE)
10 source(file.path(dirname(script.path), 'Ms4TabSqlDb.R'), chdir = TRUE)
11 source(file.path(dirname(script.path), 'MsDbLogger.R'), chdir = TRUE)
12 source(file.path(dirname(script.path), 'MsDbInputDataFrameStream.R'), chdir = TRUE)
13 source(file.path(dirname(script.path), 'MsDbOutputDataFrameStream.R'), chdir = TRUE)
14 source(file.path(dirname(script.path), 'htmlhlp.R'), chdir = TRUE)
15 source(file.path(dirname(script.path), 'strhlp.R'), chdir = TRUE)
16 source(file.path(dirname(script.path), 'fshlp.R'), chdir = TRUE)
17 source(file.path(dirname(script.path), 'biodb-common.R'), chdir = TRUE)
18 source(file.path(dirname(script.path), 'nethlp.R'), chdir = TRUE)
19
20 #############
21 # CONSTANTS #
22 #############
23
24 PROG <- sub('^.*/([^/]+)$', '\\1', commandArgs()[4], perl = TRUE)
25
26 # Authorized database types
27 MSDB.XLS <- 'xls'
28 MSDB.4TABSQL <- '4tabsql'
29 MSDB.FILE <- 'file'
30 MSDB.PEAKFOREST <- 'peakforest'
31 MSDB.VALS <- c(MSDB.XLS, MSDB.4TABSQL, MSDB.FILE, MSDB.PEAKFOREST)
32
33 # Authorized mode values
34 POS_MODE <- 'pos'
35 NEG_MODE <- 'neg'
36 MSDB.MODE.VALS <- c(POS_MODE, NEG_MODE)
37
38 # Default
39 MSDB.DFT <- list()
40 MSDB.DFT[['mzshift']] <- 0 # in ppm
41 MSDB.DFT[['mzprec']] <- 5 # in ppm
42 MSDB.DFT[['mztolunit']] <- MSDB.DFT.MZTOLUNIT
43 MSDB.DFT[['precursor-rt-tol']] <- 5
44 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP
45 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields())
46 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES)
47 MSDB.DFT[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields())
48 MSDB.DFT[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields())
49 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',')
50 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',')
51
52 ##############
53 # PRINT HELP #
54 ##############
55
56 print.help <- function(spec, status = 0) {
57 cat(getopt(spec, usage = TRUE, command = PROG))
58 q(status = status)
59 }
60
61 ###############################
62 # SET DEFAULT ARGUMENT VALUES #
63 ###############################
64
65 set.dft.arg.val <-function(opt) {
66
67 for (f in names(MSDB.DFT))
68 if (is.null(opt[[f]]))
69 opt[[f]] <- MSDB.DFT[[f]]
70
71 # Set default values
72 if ( opt$database == MSDB.XLS && ! is.null(opt$url) && is.null(opt[['cache-dir']]))
73 opt[['cache-dir']] <- file.path(opt$url, 'cache')
74
75 if ( ! is.null(opt$rtcol) && opt$rtcol == '')
76 opt$rtcol <- NULL
77
78 return(opt)
79 }
80
81 #########################
82 # PARSE ARGUMENT VALUES #
83 #########################
84
85 parse.arg.val <- function(opt) {
86
87 # Parse input column names
88 if ( ! is.null(opt[['db-fields']])) {
89 cust <- split.kv.list(opt[['db-fields']])
90 opt[['db-fields']] <- split.kv.list(MSDB.DFT[['db-fields']])
91 opt[['db-fields']][names(cust)] <- cust
92 }
93
94 # Parse MS modes
95 if ( ! is.null(opt[['db-ms-modes']])) {
96 cust <- split.kv.list(opt[['db-ms-modes']])
97 opt[['db-ms-modes']] <- split.kv.list(MSDB.DFT[['db-ms-modes']])
98 opt[['db-ms-modes']][names(cust)] <- cust
99 }
100
101 # Parse retention time columns
102 if ( ! is.null(opt$rtcol))
103 opt$rtcol <- strsplit(opt$rtcol, ',')[[1]]
104
105 # Parse input column names
106 if ( ! is.null(opt[['input-col-names']])) {
107 custcols <- split.kv.list(opt[['input-col-names']])
108 dftcols <- split.kv.list(MSDB.DFT[['input-col-names']])
109 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)])
110 }
111
112 # Parse output column names
113 if ( ! is.null(opt[['output-col-names']])) {
114 custcols <- split.kv.list(opt[['output-col-names']])
115 dftcols <- split.kv.list(MSDB.DFT[['output-col-names']])
116 opt[['output-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)])
117 }
118
119 # Parse lists of precursors
120 if ( ! is.null(opt[['pos-prec']]))
121 opt[['pos-prec']] <- split.str(opt[['pos-prec']], unlist = TRUE)
122 if ( ! is.null(opt[['neg-prec']]))
123 opt[['neg-prec']] <- split.str(opt[['neg-prec']], unlist = TRUE)
124
125 return(opt)
126 }
127
128 #################################
129 # PRINT DEFAULT ARGUMENT VALUES #
130 #################################
131
132 print.dft.arg.val <- function(opt) {
133
134 print.flags <- MSDB.DFT
135 names(print.flags) <- vapply(names(print.flags), function(x) paste0('print-', x), FUN.VALUE = '')
136 for (f in names(print.flags))
137 if ( ! is.null(opt[[f]])) {
138 cat(print.flags[[f]])
139 q(status = 0)
140 }
141 }
142
143 make.getopt.spec.print.dft <- function() {
144
145 spec <- character()
146
147 for (f in names(MSDB.DFT))
148 spec <- c(spec, paste0('print-', f), NA_character_, 0, 'logical', paste0('Print default value of --', f))
149
150 return(spec)
151 }
152
153 ##############################
154 # MAKE GETOPT SPECIFICATIONS #
155 ##############################
156
157 make.getopt.spec <- function() {
158 spec = c(
159 'help', 'h', 0, 'logical', 'Print this help.',
160 'mode', 'm', 1, 'character', paste0('MS mode. Possible values are:', paste(MSDB.MODE.VALS, collapse = ", "), '.'),
161 'mzshift', 's', 1, 'numeric', paste0('Shift on m/z, in ppm. Default is ', MSDB.DFT$mzshift,'.'),
162 'mzprec', 'p', 1, 'numeric', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mzprec,'.'),
163 'mztolunit', NA_character_, 1, 'character', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mztolunit,'.'),
164 'rttol', 'r', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
165 'rttolx', 'x', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
166 'rttoly', 'y', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
167 'rtcol', 'c', 1, 'character', paste0('Chromatographic column to use. Unset by default. If set, use the corresponding column to filter on retention times, if retention times are provided.'),
168 'all-cols', NA_character_, 0, 'logical', 'Use all available chromatographic columns to match retention times.',
169 'check-cols', NA_character_, 0, 'logical', 'Check that the chromatographic column names specified with option -c really exist.',
170 'list-cols', NA_character_, 0, 'logical', 'List all chromatographic columns present in the database. Write list inside the file specified by -o option.',
171 'same-rows', 'a', 0, 'logical', 'If set, output exactly the same number of rows as the input. This means that in case of multiple matches for one mz, then only one line is output (i.e.: the mz value is not duplicated on several lines). In the main output file, an "ms.matching" column is output with inside, for each mz, a comma separated list of matched component/molecule IDs. If unset, then only the main output file is used, and one single is written to it with one line per peak match, and eventual mz line duplicated if there are multiple matches for this mz.',
172 'same-cols', 'b', 0, 'logical', 'If set, output the same columns as inside the input. All input columns are copied to the output.',
173 'input-file', 'i', 1, 'character', 'Set input file.',
174 'output-file', 'o', 1, 'character', 'Set file to use for the main output.',
175 'peak-output-file', NA_character_, 1, 'character', 'If set and if --same-rows is set, then output all matches inside the specified file, with one mz match per line. The output columns are: mz, rt, id, col, colrt, composition, attribution. This means that if an mz value is matched several times, then it will repeated on several lines, with one match description per line.',
176 'html-output-file', NA_character_, 1, 'character', 'Set file to use for the HTML output.',
177 'no-main-table-in-html-output', NA_character_, 0, 'logical', 'Do not display main table in HTML output.',
178 'precursor-match', NA_character_, 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.',
179 'precursor-rt-tol', NA_character_, 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'),
180 'pos-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'),
181 'neg-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".'),
182 'input-col-names', NA_character_, 1, 'character', paste0('Set the input column names. Default is "', MSDB.DFT[['input-col-names']], '".'),
183 'output-col-names', NA_character_, 1, 'character', paste0('Set the output column names. Default is "', MSDB.DFT[['output-col-names']], '".'),
184 'molids-sep', NA_character_, 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'),
185 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.',
186 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.',
187 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'),
188 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.',
189 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".',
190 'useragent', NA_character_, 1, 'character', 'User agent. Used by the "Peakforest" database.',
191 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.',
192 'db-user', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.',
193 'db-password', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.',
194 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-fields']], '".'),
195 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-ms-modes']], '".'),
196 'debug', NA_character_, 0, 'logical', 'Set debug mode.'
197 )
198
199 spec <- c(spec, make.getopt.spec.print.dft())
200
201 if ( ! is.null(spec))
202 spec <- matrix(spec, byrow = TRUE, ncol = 5)
203
204 return(spec)
205 }
206
207 #############
208 # READ ARGS #
209 #############
210
211 read_args <- function() {
212
213 # options
214 spec <- make.getopt.spec()
215 opt <- getopt(spec)
216
217 # help
218 if ( ! is.null(opt$help))
219 print.help(spec)
220
221 print.dft.arg.val(opt) # Print default values
222 opt <- set.dft.arg.val(opt) # Set default values
223 opt <- parse.arg.val(opt) # Parse list values
224
225 # Check values
226 error <- .check.db.conn.opts(opt)
227 if (is.null(opt[['output-file']])) {
228 warning("You must set a path for the output file.")
229 error <- TRUE
230 }
231 if (is.null(opt[['list-cols']])) {
232 if (is.null(opt[['input-file']])) {
233 warning("You must provide an input file.")
234 error <- TRUE
235 }
236 if (is.null(opt$mode) || ( ! opt$mode %in% MSDB.MODE.VALS)) {
237 warning("You must specify a mode through the --mode option.")
238 error <- TRUE
239 }
240 if (is.null(opt$mzprec)) {
241 warning("You must set a precision in MZ with the --mzprec option.")
242 error <- TRUE
243 }
244 if ( ( ! is.null(opt$rtcol) || ! is.null(opt[['all-cols']])) && (is.null(opt$rttolx) || is.null(opt$rttoly))) {
245 warning("When chromatographic columns are set, you must provide values for --rttolx and -rttoly.")
246 error <- TRUE
247 }
248 if (is.null(opt$mztolunit) || ( ! opt$mztolunit %in% MSDB.MZTOLUNIT.VALS)) {
249 warning("You must specify an M/Z tolerance unit through the --mztolunit option.")
250 error <- TRUE
251 }
252 }
253
254 # help
255 if (error)
256 print.help(spec, status = 1)
257
258 return(opt)
259 }
260
261 #####################################
262 # CHECK DATABASE CONNECTION OPTIONS #
263 #####################################
264
265 .check.db.conn.opts <- function(opt) {
266
267 # Print default values
268 if ( ! is.null(opt[['print-db-fields']])) {
269 cat(MSDB.DFT[['db-fields']])
270 q(status = 0)
271 }
272 if ( ! is.null(opt[['print-db-ms-modes']])) {
273 cat(MSDB.DFT[['db-ms-modes']])
274 q(status = 0)
275 }
276
277 # Check values
278 error <- FALSE
279 if (is.null(opt$database)) {
280 warning("You must provide a database type through --database option.")
281 error <- TRUE
282 }
283 if ( ! opt$database %in% MSDB.VALS) {
284 warning(paste0("Invalid value \"", opt$database, "\" for --database option."))
285 error <- TRUE
286 }
287 if (opt$database == MSDB.FILE) {
288 if (is.null(opt$url)) {
289 warning("When using single file database, you must specify the location of the database file with option --url.")
290 error <- TRUE
291 }
292 if ( ! file.exists(opt$url)) {
293 warning(paste0("The file path \"", opt$url,"\" specified with --db-file option is not valid."))
294 error <- TRUE
295 }
296 }
297 if (opt$database == MSDB.XLS) {
298 if (is.null(opt$url)) {
299 warning("When using Excel database, you must specify the location of the Excel files directory with option --url.")
300 error <- TRUE
301 }
302 if ( ! file.exists(opt$url)) {
303 warning(paste0("The directory path \"", opt$url,"\" specified with --xls-dir option is not valid."))
304 error <- TRUE
305 }
306 }
307 if (opt$database == MSDB.4TABSQL) {
308 if (is.null(opt$url)) {
309 warning("When using 4Tab SQL database, you must specify the URL of the SQL server with option --url.")
310 error <- TRUE
311 }
312 if (is.null(opt[['db-name']])) {
313 warning("When using 4Tab SQL database, you must specify the database name through the --db-name option.")
314 error <- TRUE
315 }
316 if (is.null(opt[['db-user']])) {
317 warning("When using 4Tab SQL database, you must specify the database user through the --db-user option.")
318 error <- TRUE
319 }
320 if (is.null(opt[['db-password']])) {
321 warning("When using 4Tab SQL database, you must specify the database user password through the --db-password option.")
322 error <- TRUE
323 }
324 }
325 if (opt$database == MSDB.PEAKFOREST) {
326 if (is.null(opt$url)) {
327 warning("When using PeakForest database, you must specify the URL of the PeakForest server with option --url.")
328 error <- TRUE
329 }
330 if (is.null(opt$useragent)) {
331 warning("When using PeakForest database, you must specify a user agent with option --useragent.")
332 error <- TRUE
333 }
334 }
335
336 return(error)
337 }
338
339 #############################
340 # DISPLAY COMMAND LINE HELP #
341 #############################
342
343 .disp.cmd.line.help <- function(optspec, opt, prog, error = FALSE) {
344
345 if ( ! is.null(opt$help) || error ) {
346 cat(getopt(optspec, usage = TRUE, command = prog))
347 q(status = 1)
348 }
349 }
350
351 #################
352 # LOAD DATABASE #
353 #################
354
355 .load.db <- function(opt) {
356
357 if (is.null(opt[['pos-prec']]) && is.null(opt[['neg-prec']])) {
358 precursors <- NULL
359 } else {
360 precursors <- list()
361 precursors[[MSDB.TAG.POS]] <- opt[['pos-prec']]
362 precursors[[MSDB.TAG.NEG]] <- opt[['neg-prec']]
363 }
364
365 db <- switch(opt$database,
366 peakforest = MsPeakForestDb$new(url = opt$url, useragent = opt$useragent),
367 xls = MsXlsDb(db_dir = opt$url, cache_dir = opt[['cache-dir']]),
368 '4tabsql' = Ms4TabSqlDb(host = extract.address(opt$url), port = extract.port(opt$url), dbname = opt[['db-name']], user = opt[['db-user']], password = opt[['db-password']]),
369 file = MsFileDb(file = opt$url),
370 NULL)
371 db$setPrecursors(precursors)
372 if (db$areDbFieldsSettable())
373 db$setDbFields(opt[['db-fields']])
374 if (db$areDbMsModesSettable())
375 db$setDbMsModes(opt[['db-ms-modes']])
376 db$addObservers(MsDbLogger$new())
377
378 return(db)
379 }
380
381 ###############
382 # OUTPUT HTML #
383 ###############
384
385 output.html <- function(db, main, peaks, file, opt, output.fields) {
386
387 # Replace public database IDs by URLs
388 if ( ! is.null(peaks))
389 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) {
390 field <- output.fields[[extdb]]
391 if (field %in% colnames(peaks))
392 peaks[[field]] <- vapply(peaks[[field]], function(id) paste0('<a href="', get.entry.url(class = extdb, accession = id, content.type = RBIODB.HTML), '">', id, '</a>'), FUN.VALUE = '')
393 }
394
395 # Write HTML
396 html <- HtmlWriter(file = file)
397 html$writeBegTag('html')
398 html$writeBegTag('header')
399 html$writeTag('title', text = "LC/MS matching results")
400 html$writeBegTag('style')
401 html$write('table, th, td { border-collapse: collapse; }')
402 html$write('table, th { border: 1px solid black; }')
403 html$write('td { border-left: 1px solid black; border-right: 1px solid black; }')
404 html$write('th, td { padding: 5px; }')
405 html$write('th { background-color: LightBlue; }')
406 html$write('tr:nth-child(even) { background-color: LemonChiffon; }')
407 html$write('tr:nth-child(odd) { background-color: LightGreen; }')
408 html$writeEndTag('style')
409 html$writeEndTag('header')
410 html$writeBegTag('body')
411 html$writeTag('h1', text = "LC/MS matching")
412
413 # Write parameters
414 html$writeTag('h2', text = "Parameters")
415 html$writeBegTag('ul')
416 html$writeTag('li', paste0("Mode = ", opt$mode, "."))
417 html$writeTag('li', paste0("M/Z precision = ", opt$mzprec, "."))
418 html$writeTag('li', paste0("M/Z shift = ", opt$mzshift, "."))
419 html$writeTag('li', paste0("Precursor match = ", (if (is.null(opt[['precursor-match']])) "no" else "yes"), "."))
420 if ( ! is.null(opt[['precursor-match']])) {
421 html$writeTag('li', paste0("Positive precursors = ", paste0(opt[['pos-prec']], collapse = ', '), "."))
422 html$writeTag('li', paste0("Negative precursors = ", paste0(opt[['neg-prec']], collapse = ', '), "."))
423 }
424 if ( ! is.null(opt$rtcol)) {
425 html$writeTag('li', paste0("Columns = ", paste(opt$rtcol, collapse = ", "), "."))
426 html$writeTag('li', paste0("RTX = ", opt$rttolx, "."))
427 html$writeTag('li', paste0("RTY = ", opt$rttoly, "."))
428 if ( ! is.null(opt[['precursor-match']]))
429 html$writeTag('li', paste0("RTZ = ", opt[['precursor-rt-tol']], "."))
430 }
431 html$writeEndTag('ul')
432
433 # Write results
434 html$writeTag('h2', text = "Results")
435 results <- FALSE
436 if ( ! is.null(main) && nrow(main) > 0 && is.null(opt[['no-main-table-in-html-output']])) {
437 html$writeTag('h3', text = "Main output")
438 html$writeTable(main)
439 results <- TRUE
440 }
441 if ( ! is.null(peaks) && nrow(peaks) > 0) {
442 html$writeTag('h3', text = "Matched peaks")
443 html$writeTable(peaks)
444 results <- TRUE
445 }
446 if ( ! results)
447 html$writeTag('p', 'None.')
448
449 html$writeEndTag('body')
450 html$writeEndTag('html')
451 }
452
453 ########
454 # MAIN #
455 ########
456
457 options(error = function() { traceback(2) ; quit(status = 1) }, warn = 2 )
458
459 # Read command line arguments
460 opt <- read_args()
461
462 if (is.null(opt$debug)) {
463 options(error = function() { quit(status = 1) }, warn = 0 )
464 }
465
466 # Load database
467 db <- .load.db(opt)
468
469 # Print columns
470 if ( ! is.null(opt[['list-cols']])) {
471 cols <- db$getChromCol()
472 df.write.tsv(cols, file = opt[['output-file']])
473 q(status = 0)
474 }
475
476 # Read input
477 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']]))
478 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist."))
479 if (file.info(opt[['input-file']])$size > 0) {
480
481 # Load file into data frame
482 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t")
483
484 # Convert each column that is identified by a number into a name
485 for (field in names(opt[['input-col-names']])) {
486 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) {
487 col.index <- as.integer(opt[['input-col-names']][[field]])
488 if (col.index < 1 || col.index > length(colnames(input)))
489 stop(paste0("No column n°", col.index, " for input field ", field, "."))
490 opt[['input-col-names']][[field]] <- colnames(input)[[col.index]]
491 }
492 }
493 } else {
494 input <- data.frame()
495 input[[opt[['input-col-names']][['mz']]]] <- double()
496 input[[opt[['input-col-names']][['rt']]]] <- double()
497 }
498
499 # Check mz column
500 if ( ! opt[['input-col-names']][['mz']] %in% colnames(input))
501 stop(paste0('No column named "', opt[['input-col-names']][['mz']], '" in input file.'))
502
503 # Set columns 'all-cols' specified
504 if ( ! is.null(opt[['all-cols']]))
505 opt$rtcol <- db$getChromCol()
506
507 # Check chrom columns
508 if ( ! is.null(opt[['check-cols']]) && ! is.null(opt$rtcol)) {
509 dbcols <- db$getChromCol()
510 unknown.cols <- opt$rtcol[ ! opt$rtcol %in% dbcols]
511 if (length(unknown.cols) > 0) {
512 stop(paste0("Unknown chromatographic column", (if (length(unknown.cols) > 1) 's' else ''), ': ', paste(unknown.cols, collapse = ', '), ".\nAllowed chromatographic column names are:\n", paste(dbcols, collapse = "\n")))
513 }
514 }
515
516 # Check that an RT column exists when using MZ/RT matching
517 if ( ! is.null(opt$rtcol) && ! opt[['input-col-names']][['rt']] %in% colnames(input))
518 stop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named '", opt[['input-col-names']][['rt']],"' can be found inside your input file."))
519
520 # Set streams
521 input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']])
522 main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], one.line = ! is.null(opt[['same-rows']]), match.sep = opt[['molids-sep']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']]))
523 peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']]))
524 invisible(db$setInputStream(input.stream))
525 db$addOutputStreams(c(main.output, peaks.output))
526
527 # Set M/Z tolerance unit
528 db$setMzTolUnit(opt$mztolunit)
529
530 # Search database
531 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG
532 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']])
533
534 # Write output
535 # TODO Create a class MsDbOutputCsvFileStream
536 df.write.tsv(main.output$getDataFrame(), file = opt[['output-file']], row.names = FALSE)
537 if ( ! is.null(opt[['peak-output-file']]))
538 # TODO Create a class MsDbOutputCsvFileStream
539 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE)
540 if ( ! is.null(opt[['html-output-file']]))
541 # TODO Create a class MsDbOutputHtmlFileStream
542 output.html(db = db, main = main.output$getDataFrame(), peaks = peaks.output$getDataFrame(), file = opt[['html-output-file']], opt = opt, output.fields = opt[['output-col-names']])