Mercurial > repos > prog > lcmsmatching
comparison search-mz @ 0:e66bb061af06 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author | prog |
---|---|
date | Tue, 12 Jul 2016 12:02:37 -0400 |
parents | |
children | 253d531a0193 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e66bb061af06 |
---|---|
1 #!/usr/bin/env Rscript | |
2 # vi: ft=R | |
3 args <- commandArgs(trailingOnly = F) | |
4 script.path <- sub("--file=","",args[grep("--file=",args)]) | |
5 library(getopt) | |
6 source(file.path(dirname(script.path), 'msdb-common.R'), chdir = TRUE) | |
7 source(file.path(dirname(script.path), 'MsFileDb.R'), chdir = TRUE) | |
8 source(file.path(dirname(script.path), 'MsPeakForestDb.R'), chdir = TRUE) | |
9 source(file.path(dirname(script.path), 'MsXlsDb.R'), chdir = TRUE) | |
10 source(file.path(dirname(script.path), 'Ms4TabSqlDb.R'), chdir = TRUE) | |
11 source(file.path(dirname(script.path), 'MsDbLogger.R'), chdir = TRUE) | |
12 source(file.path(dirname(script.path), 'MsDbInputDataFrameStream.R'), chdir = TRUE) | |
13 source(file.path(dirname(script.path), 'MsDbOutputDataFrameStream.R'), chdir = TRUE) | |
14 source(file.path(dirname(script.path), 'htmlhlp.R'), chdir = TRUE) | |
15 source(file.path(dirname(script.path), 'strhlp.R'), chdir = TRUE) | |
16 source(file.path(dirname(script.path), 'fshlp.R'), chdir = TRUE) | |
17 source(file.path(dirname(script.path), 'biodb-common.R'), chdir = TRUE) | |
18 source(file.path(dirname(script.path), 'nethlp.R'), chdir = TRUE) | |
19 | |
20 ############# | |
21 # CONSTANTS # | |
22 ############# | |
23 | |
24 PROG <- sub('^.*/([^/]+)$', '\\1', commandArgs()[4], perl = TRUE) | |
25 | |
26 # Authorized database types | |
27 MSDB.XLS <- 'xls' | |
28 MSDB.4TABSQL <- '4tabsql' | |
29 MSDB.FILE <- 'file' | |
30 MSDB.PEAKFOREST <- 'peakforest' | |
31 MSDB.VALS <- c(MSDB.XLS, MSDB.4TABSQL, MSDB.FILE, MSDB.PEAKFOREST) | |
32 | |
33 # Authorized mode values | |
34 POS_MODE <- 'pos' | |
35 NEG_MODE <- 'neg' | |
36 MSDB.MODE.VALS <- c(POS_MODE, NEG_MODE) | |
37 | |
38 # Default | |
39 MSDB.DFT <- list() | |
40 MSDB.DFT[['mzshift']] <- 0 # in ppm | |
41 MSDB.DFT[['mzprec']] <- 5 # in ppm | |
42 MSDB.DFT[['mztolunit']] <- MSDB.DFT.MZTOLUNIT | |
43 MSDB.DFT[['precursor-rt-tol']] <- 5 | |
44 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP | |
45 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields()) | |
46 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES) | |
47 MSDB.DFT[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields()) | |
48 MSDB.DFT[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields()) | |
49 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',') | |
50 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',') | |
51 | |
52 ############## | |
53 # PRINT HELP # | |
54 ############## | |
55 | |
56 print.help <- function(spec, status = 0) { | |
57 cat(getopt(spec, usage = TRUE, command = PROG)) | |
58 q(status = status) | |
59 } | |
60 | |
61 ############################### | |
62 # SET DEFAULT ARGUMENT VALUES # | |
63 ############################### | |
64 | |
65 set.dft.arg.val <-function(opt) { | |
66 | |
67 for (f in names(MSDB.DFT)) | |
68 if (is.null(opt[[f]])) | |
69 opt[[f]] <- MSDB.DFT[[f]] | |
70 | |
71 # Set default values | |
72 if ( opt$database == MSDB.XLS && ! is.null(opt$url) && is.null(opt[['cache-dir']])) | |
73 opt[['cache-dir']] <- file.path(opt$url, 'cache') | |
74 | |
75 if ( ! is.null(opt$rtcol) && opt$rtcol == '') | |
76 opt$rtcol <- NULL | |
77 | |
78 return(opt) | |
79 } | |
80 | |
81 ######################### | |
82 # PARSE ARGUMENT VALUES # | |
83 ######################### | |
84 | |
85 parse.arg.val <- function(opt) { | |
86 | |
87 # Parse input column names | |
88 if ( ! is.null(opt[['db-fields']])) { | |
89 cust <- split.kv.list(opt[['db-fields']]) | |
90 opt[['db-fields']] <- split.kv.list(MSDB.DFT[['db-fields']]) | |
91 opt[['db-fields']][names(cust)] <- cust | |
92 } | |
93 | |
94 # Parse MS modes | |
95 if ( ! is.null(opt[['db-ms-modes']])) { | |
96 cust <- split.kv.list(opt[['db-ms-modes']]) | |
97 opt[['db-ms-modes']] <- split.kv.list(MSDB.DFT[['db-ms-modes']]) | |
98 opt[['db-ms-modes']][names(cust)] <- cust | |
99 } | |
100 | |
101 # Parse retention time columns | |
102 if ( ! is.null(opt$rtcol)) | |
103 opt$rtcol <- strsplit(opt$rtcol, ',')[[1]] | |
104 | |
105 # Parse input column names | |
106 if ( ! is.null(opt[['input-col-names']])) { | |
107 custcols <- split.kv.list(opt[['input-col-names']]) | |
108 dftcols <- split.kv.list(MSDB.DFT[['input-col-names']]) | |
109 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) | |
110 } | |
111 | |
112 # Parse output column names | |
113 if ( ! is.null(opt[['output-col-names']])) { | |
114 custcols <- split.kv.list(opt[['output-col-names']]) | |
115 dftcols <- split.kv.list(MSDB.DFT[['output-col-names']]) | |
116 opt[['output-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) | |
117 } | |
118 | |
119 # Parse lists of precursors | |
120 if ( ! is.null(opt[['pos-prec']])) | |
121 opt[['pos-prec']] <- split.str(opt[['pos-prec']], unlist = TRUE) | |
122 if ( ! is.null(opt[['neg-prec']])) | |
123 opt[['neg-prec']] <- split.str(opt[['neg-prec']], unlist = TRUE) | |
124 | |
125 return(opt) | |
126 } | |
127 | |
128 ################################# | |
129 # PRINT DEFAULT ARGUMENT VALUES # | |
130 ################################# | |
131 | |
132 print.dft.arg.val <- function(opt) { | |
133 | |
134 print.flags <- MSDB.DFT | |
135 names(print.flags) <- vapply(names(print.flags), function(x) paste0('print-', x), FUN.VALUE = '') | |
136 for (f in names(print.flags)) | |
137 if ( ! is.null(opt[[f]])) { | |
138 cat(print.flags[[f]]) | |
139 q(status = 0) | |
140 } | |
141 } | |
142 | |
143 make.getopt.spec.print.dft <- function() { | |
144 | |
145 spec <- character() | |
146 | |
147 for (f in names(MSDB.DFT)) | |
148 spec <- c(spec, paste0('print-', f), NA_character_, 0, 'logical', paste0('Print default value of --', f)) | |
149 | |
150 return(spec) | |
151 } | |
152 | |
153 ############################## | |
154 # MAKE GETOPT SPECIFICATIONS # | |
155 ############################## | |
156 | |
157 make.getopt.spec <- function() { | |
158 spec = c( | |
159 'help', 'h', 0, 'logical', 'Print this help.', | |
160 'mode', 'm', 1, 'character', paste0('MS mode. Possible values are:', paste(MSDB.MODE.VALS, collapse = ", "), '.'), | |
161 'mzshift', 's', 1, 'numeric', paste0('Shift on m/z, in ppm. Default is ', MSDB.DFT$mzshift,'.'), | |
162 'mzprec', 'p', 1, 'numeric', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mzprec,'.'), | |
163 'mztolunit', NA_character_, 1, 'character', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mztolunit,'.'), | |
164 'rttol', 'r', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | |
165 'rttolx', 'x', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | |
166 'rttoly', 'y', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'), | |
167 'rtcol', 'c', 1, 'character', paste0('Chromatographic column to use. Unset by default. If set, use the corresponding column to filter on retention times, if retention times are provided.'), | |
168 'all-cols', NA_character_, 0, 'logical', 'Use all available chromatographic columns to match retention times.', | |
169 'check-cols', NA_character_, 0, 'logical', 'Check that the chromatographic column names specified with option -c really exist.', | |
170 'list-cols', NA_character_, 0, 'logical', 'List all chromatographic columns present in the database. Write list inside the file specified by -o option.', | |
171 'same-rows', 'a', 0, 'logical', 'If set, output exactly the same number of rows as the input. This means that in case of multiple matches for one mz, then only one line is output (i.e.: the mz value is not duplicated on several lines). In the main output file, an "ms.matching" column is output with inside, for each mz, a comma separated list of matched component/molecule IDs. If unset, then only the main output file is used, and one single is written to it with one line per peak match, and eventual mz line duplicated if there are multiple matches for this mz.', | |
172 'same-cols', 'b', 0, 'logical', 'If set, output the same columns as inside the input. All input columns are copied to the output.', | |
173 'input-file', 'i', 1, 'character', 'Set input file.', | |
174 'output-file', 'o', 1, 'character', 'Set file to use for the main output.', | |
175 'peak-output-file', NA_character_, 1, 'character', 'If set and if --same-rows is set, then output all matches inside the specified file, with one mz match per line. The output columns are: mz, rt, id, col, colrt, composition, attribution. This means that if an mz value is matched several times, then it will repeated on several lines, with one match description per line.', | |
176 'html-output-file', NA_character_, 1, 'character', 'Set file to use for the HTML output.', | |
177 'no-main-table-in-html-output', NA_character_, 0, 'logical', 'Do not display main table in HTML output.', | |
178 'precursor-match', NA_character_, 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.', | |
179 'precursor-rt-tol', NA_character_, 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'), | |
180 'pos-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'), | |
181 'neg-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".'), | |
182 'input-col-names', NA_character_, 1, 'character', paste0('Set the input column names. Default is "', MSDB.DFT[['input-col-names']], '".'), | |
183 'output-col-names', NA_character_, 1, 'character', paste0('Set the output column names. Default is "', MSDB.DFT[['output-col-names']], '".'), | |
184 'molids-sep', NA_character_, 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'), | |
185 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.', | |
186 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.', | |
187 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'), | |
188 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.', | |
189 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".', | |
190 'useragent', NA_character_, 1, 'character', 'User agent. Used by the "Peakforest" database.', | |
191 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', | |
192 'db-user', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', | |
193 'db-password', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.', | |
194 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-fields']], '".'), | |
195 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-ms-modes']], '".'), | |
196 'debug', NA_character_, 0, 'logical', 'Set debug mode.' | |
197 ) | |
198 | |
199 spec <- c(spec, make.getopt.spec.print.dft()) | |
200 | |
201 if ( ! is.null(spec)) | |
202 spec <- matrix(spec, byrow = TRUE, ncol = 5) | |
203 | |
204 return(spec) | |
205 } | |
206 | |
207 ############# | |
208 # READ ARGS # | |
209 ############# | |
210 | |
211 read_args <- function() { | |
212 | |
213 # options | |
214 spec <- make.getopt.spec() | |
215 opt <- getopt(spec) | |
216 | |
217 # help | |
218 if ( ! is.null(opt$help)) | |
219 print.help(spec) | |
220 | |
221 print.dft.arg.val(opt) # Print default values | |
222 opt <- set.dft.arg.val(opt) # Set default values | |
223 opt <- parse.arg.val(opt) # Parse list values | |
224 | |
225 # Check values | |
226 error <- .check.db.conn.opts(opt) | |
227 if (is.null(opt[['output-file']])) { | |
228 warning("You must set a path for the output file.") | |
229 error <- TRUE | |
230 } | |
231 if (is.null(opt[['list-cols']])) { | |
232 if (is.null(opt[['input-file']])) { | |
233 warning("You must provide an input file.") | |
234 error <- TRUE | |
235 } | |
236 if (is.null(opt$mode) || ( ! opt$mode %in% MSDB.MODE.VALS)) { | |
237 warning("You must specify a mode through the --mode option.") | |
238 error <- TRUE | |
239 } | |
240 if (is.null(opt$mzprec)) { | |
241 warning("You must set a precision in MZ with the --mzprec option.") | |
242 error <- TRUE | |
243 } | |
244 if ( ( ! is.null(opt$rtcol) || ! is.null(opt[['all-cols']])) && (is.null(opt$rttolx) || is.null(opt$rttoly))) { | |
245 warning("When chromatographic columns are set, you must provide values for --rttolx and -rttoly.") | |
246 error <- TRUE | |
247 } | |
248 if (is.null(opt$mztolunit) || ( ! opt$mztolunit %in% MSDB.MZTOLUNIT.VALS)) { | |
249 warning("You must specify an M/Z tolerance unit through the --mztolunit option.") | |
250 error <- TRUE | |
251 } | |
252 } | |
253 | |
254 # help | |
255 if (error) | |
256 print.help(spec, status = 1) | |
257 | |
258 return(opt) | |
259 } | |
260 | |
261 ##################################### | |
262 # CHECK DATABASE CONNECTION OPTIONS # | |
263 ##################################### | |
264 | |
265 .check.db.conn.opts <- function(opt) { | |
266 | |
267 # Print default values | |
268 if ( ! is.null(opt[['print-db-fields']])) { | |
269 cat(MSDB.DFT[['db-fields']]) | |
270 q(status = 0) | |
271 } | |
272 if ( ! is.null(opt[['print-db-ms-modes']])) { | |
273 cat(MSDB.DFT[['db-ms-modes']]) | |
274 q(status = 0) | |
275 } | |
276 | |
277 # Check values | |
278 error <- FALSE | |
279 if (is.null(opt$database)) { | |
280 warning("You must provide a database type through --database option.") | |
281 error <- TRUE | |
282 } | |
283 if ( ! opt$database %in% MSDB.VALS) { | |
284 warning(paste0("Invalid value \"", opt$database, "\" for --database option.")) | |
285 error <- TRUE | |
286 } | |
287 if (opt$database == MSDB.FILE) { | |
288 if (is.null(opt$url)) { | |
289 warning("When using single file database, you must specify the location of the database file with option --url.") | |
290 error <- TRUE | |
291 } | |
292 if ( ! file.exists(opt$url)) { | |
293 warning(paste0("The file path \"", opt$url,"\" specified with --db-file option is not valid.")) | |
294 error <- TRUE | |
295 } | |
296 } | |
297 if (opt$database == MSDB.XLS) { | |
298 if (is.null(opt$url)) { | |
299 warning("When using Excel database, you must specify the location of the Excel files directory with option --url.") | |
300 error <- TRUE | |
301 } | |
302 if ( ! file.exists(opt$url)) { | |
303 warning(paste0("The directory path \"", opt$url,"\" specified with --xls-dir option is not valid.")) | |
304 error <- TRUE | |
305 } | |
306 } | |
307 if (opt$database == MSDB.4TABSQL) { | |
308 if (is.null(opt$url)) { | |
309 warning("When using 4Tab SQL database, you must specify the URL of the SQL server with option --url.") | |
310 error <- TRUE | |
311 } | |
312 if (is.null(opt[['db-name']])) { | |
313 warning("When using 4Tab SQL database, you must specify the database name through the --db-name option.") | |
314 error <- TRUE | |
315 } | |
316 if (is.null(opt[['db-user']])) { | |
317 warning("When using 4Tab SQL database, you must specify the database user through the --db-user option.") | |
318 error <- TRUE | |
319 } | |
320 if (is.null(opt[['db-password']])) { | |
321 warning("When using 4Tab SQL database, you must specify the database user password through the --db-password option.") | |
322 error <- TRUE | |
323 } | |
324 } | |
325 if (opt$database == MSDB.PEAKFOREST) { | |
326 if (is.null(opt$url)) { | |
327 warning("When using PeakForest database, you must specify the URL of the PeakForest server with option --url.") | |
328 error <- TRUE | |
329 } | |
330 if (is.null(opt$useragent)) { | |
331 warning("When using PeakForest database, you must specify a user agent with option --useragent.") | |
332 error <- TRUE | |
333 } | |
334 } | |
335 | |
336 return(error) | |
337 } | |
338 | |
339 ############################# | |
340 # DISPLAY COMMAND LINE HELP # | |
341 ############################# | |
342 | |
343 .disp.cmd.line.help <- function(optspec, opt, prog, error = FALSE) { | |
344 | |
345 if ( ! is.null(opt$help) || error ) { | |
346 cat(getopt(optspec, usage = TRUE, command = prog)) | |
347 q(status = 1) | |
348 } | |
349 } | |
350 | |
351 ################# | |
352 # LOAD DATABASE # | |
353 ################# | |
354 | |
355 .load.db <- function(opt) { | |
356 | |
357 if (is.null(opt[['pos-prec']]) && is.null(opt[['neg-prec']])) { | |
358 precursors <- NULL | |
359 } else { | |
360 precursors <- list() | |
361 precursors[[MSDB.TAG.POS]] <- opt[['pos-prec']] | |
362 precursors[[MSDB.TAG.NEG]] <- opt[['neg-prec']] | |
363 } | |
364 | |
365 db <- switch(opt$database, | |
366 peakforest = MsPeakForestDb$new(url = opt$url, useragent = opt$useragent), | |
367 xls = MsXlsDb(db_dir = opt$url, cache_dir = opt[['cache-dir']]), | |
368 '4tabsql' = Ms4TabSqlDb(host = extract.address(opt$url), port = extract.port(opt$url), dbname = opt[['db-name']], user = opt[['db-user']], password = opt[['db-password']]), | |
369 file = MsFileDb(file = opt$url), | |
370 NULL) | |
371 db$setPrecursors(precursors) | |
372 if (db$areDbFieldsSettable()) | |
373 db$setDbFields(opt[['db-fields']]) | |
374 if (db$areDbMsModesSettable()) | |
375 db$setDbMsModes(opt[['db-ms-modes']]) | |
376 db$addObservers(MsDbLogger$new()) | |
377 | |
378 return(db) | |
379 } | |
380 | |
381 ############### | |
382 # OUTPUT HTML # | |
383 ############### | |
384 | |
385 output.html <- function(db, main, peaks, file, opt, output.fields) { | |
386 | |
387 # Replace public database IDs by URLs | |
388 if ( ! is.null(peaks)) | |
389 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) { | |
390 field <- output.fields[[extdb]] | |
391 if (field %in% colnames(peaks)) | |
392 peaks[[field]] <- vapply(peaks[[field]], function(id) paste0('<a href="', get.entry.url(class = extdb, accession = id, content.type = RBIODB.HTML), '">', id, '</a>'), FUN.VALUE = '') | |
393 } | |
394 | |
395 # Write HTML | |
396 html <- HtmlWriter(file = file) | |
397 html$writeBegTag('html') | |
398 html$writeBegTag('header') | |
399 html$writeTag('title', text = "LC/MS matching results") | |
400 html$writeBegTag('style') | |
401 html$write('table, th, td { border-collapse: collapse; }') | |
402 html$write('table, th { border: 1px solid black; }') | |
403 html$write('td { border-left: 1px solid black; border-right: 1px solid black; }') | |
404 html$write('th, td { padding: 5px; }') | |
405 html$write('th { background-color: LightBlue; }') | |
406 html$write('tr:nth-child(even) { background-color: LemonChiffon; }') | |
407 html$write('tr:nth-child(odd) { background-color: LightGreen; }') | |
408 html$writeEndTag('style') | |
409 html$writeEndTag('header') | |
410 html$writeBegTag('body') | |
411 html$writeTag('h1', text = "LC/MS matching") | |
412 | |
413 # Write parameters | |
414 html$writeTag('h2', text = "Parameters") | |
415 html$writeBegTag('ul') | |
416 html$writeTag('li', paste0("Mode = ", opt$mode, ".")) | |
417 html$writeTag('li', paste0("M/Z precision = ", opt$mzprec, ".")) | |
418 html$writeTag('li', paste0("M/Z shift = ", opt$mzshift, ".")) | |
419 html$writeTag('li', paste0("Precursor match = ", (if (is.null(opt[['precursor-match']])) "no" else "yes"), ".")) | |
420 if ( ! is.null(opt[['precursor-match']])) { | |
421 html$writeTag('li', paste0("Positive precursors = ", paste0(opt[['pos-prec']], collapse = ', '), ".")) | |
422 html$writeTag('li', paste0("Negative precursors = ", paste0(opt[['neg-prec']], collapse = ', '), ".")) | |
423 } | |
424 if ( ! is.null(opt$rtcol)) { | |
425 html$writeTag('li', paste0("Columns = ", paste(opt$rtcol, collapse = ", "), ".")) | |
426 html$writeTag('li', paste0("RTX = ", opt$rttolx, ".")) | |
427 html$writeTag('li', paste0("RTY = ", opt$rttoly, ".")) | |
428 if ( ! is.null(opt[['precursor-match']])) | |
429 html$writeTag('li', paste0("RTZ = ", opt[['precursor-rt-tol']], ".")) | |
430 } | |
431 html$writeEndTag('ul') | |
432 | |
433 # Write results | |
434 html$writeTag('h2', text = "Results") | |
435 results <- FALSE | |
436 if ( ! is.null(main) && nrow(main) > 0 && is.null(opt[['no-main-table-in-html-output']])) { | |
437 html$writeTag('h3', text = "Main output") | |
438 html$writeTable(main) | |
439 results <- TRUE | |
440 } | |
441 if ( ! is.null(peaks) && nrow(peaks) > 0) { | |
442 html$writeTag('h3', text = "Matched peaks") | |
443 html$writeTable(peaks) | |
444 results <- TRUE | |
445 } | |
446 if ( ! results) | |
447 html$writeTag('p', 'None.') | |
448 | |
449 html$writeEndTag('body') | |
450 html$writeEndTag('html') | |
451 } | |
452 | |
453 ######## | |
454 # MAIN # | |
455 ######## | |
456 | |
457 options(error = function() { traceback(2) ; quit(status = 1) }, warn = 2 ) | |
458 | |
459 # Read command line arguments | |
460 opt <- read_args() | |
461 | |
462 if (is.null(opt$debug)) { | |
463 options(error = function() { quit(status = 1) }, warn = 0 ) | |
464 } | |
465 | |
466 # Load database | |
467 db <- .load.db(opt) | |
468 | |
469 # Print columns | |
470 if ( ! is.null(opt[['list-cols']])) { | |
471 cols <- db$getChromCol() | |
472 df.write.tsv(cols, file = opt[['output-file']]) | |
473 q(status = 0) | |
474 } | |
475 | |
476 # Read input | |
477 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']])) | |
478 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist.")) | |
479 if (file.info(opt[['input-file']])$size > 0) { | |
480 | |
481 # Load file into data frame | |
482 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t") | |
483 | |
484 # Convert each column that is identified by a number into a name | |
485 for (field in names(opt[['input-col-names']])) { | |
486 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) { | |
487 col.index <- as.integer(opt[['input-col-names']][[field]]) | |
488 if (col.index < 1 || col.index > length(colnames(input))) | |
489 stop(paste0("No column n°", col.index, " for input field ", field, ".")) | |
490 opt[['input-col-names']][[field]] <- colnames(input)[[col.index]] | |
491 } | |
492 } | |
493 } else { | |
494 input <- data.frame() | |
495 input[[opt[['input-col-names']][['mz']]]] <- double() | |
496 input[[opt[['input-col-names']][['rt']]]] <- double() | |
497 } | |
498 | |
499 # Check mz column | |
500 if ( ! opt[['input-col-names']][['mz']] %in% colnames(input)) | |
501 stop(paste0('No column named "', opt[['input-col-names']][['mz']], '" in input file.')) | |
502 | |
503 # Set columns 'all-cols' specified | |
504 if ( ! is.null(opt[['all-cols']])) | |
505 opt$rtcol <- db$getChromCol() | |
506 | |
507 # Check chrom columns | |
508 if ( ! is.null(opt[['check-cols']]) && ! is.null(opt$rtcol)) { | |
509 dbcols <- db$getChromCol() | |
510 unknown.cols <- opt$rtcol[ ! opt$rtcol %in% dbcols] | |
511 if (length(unknown.cols) > 0) { | |
512 stop(paste0("Unknown chromatographic column", (if (length(unknown.cols) > 1) 's' else ''), ': ', paste(unknown.cols, collapse = ', '), ".\nAllowed chromatographic column names are:\n", paste(dbcols, collapse = "\n"))) | |
513 } | |
514 } | |
515 | |
516 # Check that an RT column exists when using MZ/RT matching | |
517 if ( ! is.null(opt$rtcol) && ! opt[['input-col-names']][['rt']] %in% colnames(input)) | |
518 stop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named '", opt[['input-col-names']][['rt']],"' can be found inside your input file.")) | |
519 | |
520 # Set streams | |
521 input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']]) | |
522 main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], one.line = ! is.null(opt[['same-rows']]), match.sep = opt[['molids-sep']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) | |
523 peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) | |
524 invisible(db$setInputStream(input.stream)) | |
525 db$addOutputStreams(c(main.output, peaks.output)) | |
526 | |
527 # Set M/Z tolerance unit | |
528 db$setMzTolUnit(opt$mztolunit) | |
529 | |
530 # Search database | |
531 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG | |
532 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']]) | |
533 | |
534 # Write output | |
535 # TODO Create a class MsDbOutputCsvFileStream | |
536 df.write.tsv(main.output$getDataFrame(), file = opt[['output-file']], row.names = FALSE) | |
537 if ( ! is.null(opt[['peak-output-file']])) | |
538 # TODO Create a class MsDbOutputCsvFileStream | |
539 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE) | |
540 if ( ! is.null(opt[['html-output-file']])) | |
541 # TODO Create a class MsDbOutputHtmlFileStream | |
542 output.html(db = db, main = main.output$getDataFrame(), peaks = peaks.output$getDataFrame(), file = opt[['html-output-file']], opt = opt, output.fields = opt[['output-col-names']]) |