comparison search-mz @ 5:fb9c0409d85c draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
author prog
date Wed, 19 Apr 2017 10:00:05 -0400
parents 20d69a062da3
children
comparison
equal deleted inserted replaced
4:b34c14151f25 5:fb9c0409d85c
1 #!/usr/bin/env Rscript 1 #!/usr/bin/env Rscript
2 # vi: ft=R 2 # vi: ft=R fdm=marker
3 args <- commandArgs(trailingOnly = F) 3 args <- commandArgs(trailingOnly = F)
4 script.path <- sub("--file=","",args[grep("--file=",args)]) 4 script.path <- sub("--file=","",args[grep("--file=",args)])
5 library(getopt) 5 library(getopt)
6 source(file.path(dirname(script.path), 'msdb-common.R'), chdir = TRUE) 6 source(file.path(dirname(script.path), 'msdb-common.R'), chdir = TRUE)
7 source(file.path(dirname(script.path), 'MsFileDb.R'), chdir = TRUE)
8 source(file.path(dirname(script.path), 'MsPeakForestDb.R'), chdir = TRUE)
9 source(file.path(dirname(script.path), 'MsXlsDb.R'), chdir = TRUE)
10 source(file.path(dirname(script.path), 'Ms4TabSqlDb.R'), chdir = TRUE)
11 source(file.path(dirname(script.path), 'MsDbLogger.R'), chdir = TRUE) 7 source(file.path(dirname(script.path), 'MsDbLogger.R'), chdir = TRUE)
12 source(file.path(dirname(script.path), 'MsDbInputDataFrameStream.R'), chdir = TRUE) 8 source(file.path(dirname(script.path), 'MsDbInputDataFrameStream.R'), chdir = TRUE)
13 source(file.path(dirname(script.path), 'MsDbOutputDataFrameStream.R'), chdir = TRUE) 9 source(file.path(dirname(script.path), 'MsDbOutputDataFrameStream.R'), chdir = TRUE)
14 source(file.path(dirname(script.path), 'htmlhlp.R'), chdir = TRUE) 10 source(file.path(dirname(script.path), 'htmlhlp.R'), chdir = TRUE)
15 source(file.path(dirname(script.path), 'strhlp.R'), chdir = TRUE) 11 source(file.path(dirname(script.path), 'strhlp.R'), chdir = TRUE)
19 15
20 # Missing paste0() function in R 2.14.1 16 # Missing paste0() function in R 2.14.1
21 if (as.integer(R.Version()$major) == 2 && as.numeric(R.Version()$minor) < 15) 17 if (as.integer(R.Version()$major) == 2 && as.numeric(R.Version()$minor) < 15)
22 paste0 <- function(...) paste(..., sep = '') 18 paste0 <- function(...) paste(..., sep = '')
23 19
24 ############# 20 # Constants {{{1
25 # CONSTANTS # 21 ################################################################
26 #############
27 22
28 PROG <- sub('^.*/([^/]+)$', '\\1', commandArgs()[4], perl = TRUE) 23 PROG <- sub('^.*/([^/]+)$', '\\1', commandArgs()[4], perl = TRUE)
29 USERAGENT <- 'search-mz ; pierrick.roger@gmail.com' 24 USERAGENT <- 'search-mz ; pierrick.roger@gmail.com'
30 25
31 # Authorized database types 26 # Authorized database types
32 MSDB.XLS <- 'xls' 27 MSDB.XLS <- 'xls'
33 MSDB.4TABSQL <- '4tabsql' 28 MSDB.4TABSQL <- '4tabsql'
34 MSDB.FILE <- 'file' 29 MSDB.FILE <- 'file'
35 MSDB.PEAKFOREST <- 'peakforest' 30 MSDB.PEAKFOREST <- 'peakforest'
36 MSDB.VALS <- c(MSDB.XLS, MSDB.4TABSQL, MSDB.FILE, MSDB.PEAKFOREST) 31 MSDB.VALS <- c(MSDB.XLS, MSDB.4TABSQL, MSDB.FILE, MSDB.PEAKFOREST)
32 DB.SRC.FILE <- list ()
33 DB.SRC.FILE[[MSDB.FILE]] <- 'MsFileDb.R'
34 DB.SRC.FILE[[MSDB.PEAKFOREST]] <- 'MsPeakForestDb.R'
35 DB.SRC.FILE[[MSDB.XLS]] <- 'MsXlsDb.R'
36 DB.SRC.FILE[[MSDB.4TABSQL]] <- 'Ms4TabSqlDb.R'
37 37
38 # Authorized mode values 38 # Authorized mode values
39 POS_MODE <- 'pos' 39 POS_MODE <- 'pos'
40 NEG_MODE <- 'neg' 40 NEG_MODE <- 'neg'
41 MSDB.MODE.VALS <- c(POS_MODE, NEG_MODE) 41 MSDB.MODE.VALS <- c(POS_MODE, NEG_MODE)
49 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP 49 MSDB.DFT[['molids-sep']] <- MSDB.DFT.MATCH.SEP
50 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields()) 50 MSDB.DFT[['db-fields']] <- concat.kv.list(msdb.get.dft.db.fields())
51 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES) 51 MSDB.DFT[['db-ms-modes']] <- concat.kv.list(MSDB.DFT.MODES)
52 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',') 52 MSDB.DFT[['pos-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = ',')
53 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',') 53 MSDB.DFT[['neg-prec']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = ',')
54 MSDB.DFT[['db-rt-unit']] <- MSDB.RTUNIT.SEC
55 MSDB.DFT[['rtunit']] <- MSDB.RTUNIT.SEC
54 DEFAULT.ARG.VALUES <- MSDB.DFT 56 DEFAULT.ARG.VALUES <- MSDB.DFT
55 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields()) 57 DEFAULT.ARG.VALUES[['input-col-names']] <- concat.kv.list(msdb.get.dft.input.fields())
56 DEFAULT.ARG.VALUES[['output-col-names']] <- concat.kv.list(msdb.get.dft.output.fields()) 58
57 59 # Print help {{{1
58 ############## 60 ################################################################
59 # PRINT HELP # 61
60 ############## 62 print.help <- function() {
61 63
62 print.help <- function(spec, status = 0) { 64 cat("USAGE:\n")
63 cat(getopt(spec, usage = TRUE, command = PROG)) 65 prog.mz.match <- paste(PROG, ' -d (', paste(MSDB.VALS, collapse = '|'), ') --url (file|dir|database URL) -i <file> -m (', paste(MSDB.MODE.VALS, collapse = '|'), ") -p <mz precision> -s <mz shift> -u (", paste(MSDB.MZTOLUNIT.VALS, collapse = '|'), ") -o <file>", sep = '')
64 q(status = status) 66 cat("\t(1) ", prog.mz.match, " ...\n", sep = '')
65 } 67 cat("\n")
66 68 cat("\t(2) ", prog.mz.match, "(--all-cols|-c <cols>) -x <X RT tolerance> -y <Y RT tolerance>", " ...\n", sep = '')
67 ############################### 69 cat("\n")
68 # SET DEFAULT ARGUMENT VALUES # 70 cat("\t(3) ", PROG, ' -d (', paste(MSDB.VALS, collapse = '|'), ") --url (file|dir|database URL) --list-cols\n", sep = '')
69 ############################### 71
72 cat("\nDETAILS:\n")
73 cat("Form (1) is for running an MZ match on a database.\n")
74 cat("Form (2) is for running an MZ/RT match on a database.\n")
75 cat("Form (3) is for getting a list of available chromatographic columns in a database.\n")
76
77 cat("\nOPTIONS:\n")
78 spec <- matrix(make.getopt.spec(), byrow = TRUE, ncol = 5)
79 max.length.opt.cols <- max(nchar(spec[,1])) + 1
80 sections <- list(database = "Database setting", input = "Input file", output = "Output files", mz = "M/Z matching", rt = "RT matching", precursor = "Precursor matching", misc = "Miscellaneous")
81 for (section in names(sections)) {
82 cat("\n\t", sections[[section]], ":\n", sep = '')
83 spec <- matrix(make.getopt.spec(section), byrow = TRUE, ncol = 5)
84 for (i in seq(nrow(spec))) {
85 opt <- ''
86 if ( ! is.na(spec[i,2]))
87 opt <- paste('-', spec[i,2], '|', sep = '')
88 opt <- paste(opt, '--', spec[i, 1], sep = '')
89 nb.space.padding <- max.length.opt.cols - nchar(opt) + 6
90 padding <- paste(rep(' ', nb.space.padding), sep = '')
91 cat("\t\t", opt, padding, "\t", spec[i, 5], "\n", sep = '')
92 }
93 }
94
95 cat("\nEXAMPLES:\n")
96
97 cat("\nSimple M/Z matching with a file database:\n")
98 cat("\t./", PROG, " -d file --url mydbfile.tsv -i input.tsv -m pos -o output.tsv\n", sep = '')
99
100 cat("\nFile database with M/Z tolerance:\n")
101 cat("\t./", PROG, " -d file --url mydbfile.tsv -i input.tsv -m pos -o output.tsv -p 0.5 -s 0\n", sep = '')
102
103 cat("\nFile database with M/Z tolerance unit:\n")
104 cat("\t./", PROG, " -d file --url mydbfile.tsv -i input.tsv -m pos -o output.tsv -p 1 -s 0.5 -u plain\n", sep = '')
105
106 cat("\nPeakforest database:\n")
107 cat("\t./", PROG, " -d peakforest --url https://rest.peakforest.org/ --db-token <your Peakforest token> -i input.tsv -m pos -o output.tsv\n", sep = '')
108 }
109
110 # Set default argument values {{{1
111 ################################################################
70 112
71 set.dft.arg.val <-function(opt) { 113 set.dft.arg.val <-function(opt) {
72 114
73 for (f in names(MSDB.DFT)) 115 for (f in names(MSDB.DFT))
74 if (is.null(opt[[f]])) 116 if (is.null(opt[[f]]))
82 opt$rtcol <- NULL 124 opt$rtcol <- NULL
83 125
84 return(opt) 126 return(opt)
85 } 127 }
86 128
87 ######################### 129 # Parse argument values {{{1
88 # PARSE ARGUMENT VALUES # 130 ################################################################
89 #########################
90 131
91 parse.arg.val <- function(opt) { 132 parse.arg.val <- function(opt) {
92 133
93 # Parse input column names 134 # Parse input column names
94 if ( ! is.null(opt[['db-fields']])) { 135 if ( ! is.null(opt[['db-fields']])) {
95 cust <- split.kv.list(opt[['db-fields']]) 136 cust <- split.kv.list(opt[['db-fields']])
137 cust <- cust[cust != 'NA']
96 opt[['db-fields']] <- split.kv.list(MSDB.DFT[['db-fields']]) 138 opt[['db-fields']] <- split.kv.list(MSDB.DFT[['db-fields']])
139 cust <- cust[names(cust) %in% names(opt[['db-fields']])]
97 opt[['db-fields']][names(cust)] <- cust 140 opt[['db-fields']][names(cust)] <- cust
98 } 141 }
99 142
100 # Parse MS modes 143 # Parse MS modes
101 if ( ! is.null(opt[['db-ms-modes']])) { 144 if ( ! is.null(opt[['db-ms-modes']])) {
112 if (is.null(opt[['input-col-names']])) { 155 if (is.null(opt[['input-col-names']])) {
113 opt[['input-col-names']] <- msdb.get.dft.input.fields() 156 opt[['input-col-names']] <- msdb.get.dft.input.fields()
114 } 157 }
115 else { 158 else {
116 custcols <- split.kv.list(opt[['input-col-names']]) 159 custcols <- split.kv.list(opt[['input-col-names']])
160 custcols <- custcols[custcols != 'NA']
117 dftcols <- msdb.get.dft.input.fields() 161 dftcols <- msdb.get.dft.input.fields()
118 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)]) 162 opt[['input-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)])
119 }
120
121 # Parse output column names
122 if (is.null(opt[['output-col-names']])) {
123 # By default keep input col names for output
124 opt[['output-col-names']] <- msdb.get.dft.output.fields()
125 input.cols <- names(opt[['input-col-names']])
126 output.cols <- names(opt[['output-col-names']])
127 opt[['output-col-names']] <- c(opt[['input-col-names']][input.cols %in% output.cols], opt[['output-col-names']][ ! output.cols %in% input.cols])
128 }
129 else {
130 custcols <- split.kv.list(opt[['output-col-names']])
131 dftcols <- msdb.get.dft.output.fields()
132 opt[['output-col-names']] <- c(custcols, dftcols[ ! names(dftcols) %in% names(custcols)])
133 } 163 }
134 164
135 # Parse lists of precursors 165 # Parse lists of precursors
136 if ( ! is.null(opt[['pos-prec']])) 166 if ( ! is.null(opt[['pos-prec']]))
137 opt[['pos-prec']] <- split.str(opt[['pos-prec']], unlist = TRUE) 167 opt[['pos-prec']] <- split.str(opt[['pos-prec']], unlist = TRUE)
139 opt[['neg-prec']] <- split.str(opt[['neg-prec']], unlist = TRUE) 169 opt[['neg-prec']] <- split.str(opt[['neg-prec']], unlist = TRUE)
140 170
141 return(opt) 171 return(opt)
142 } 172 }
143 173
144 ################################# 174 # Make getopt specifications {{{1
145 # PRINT DEFAULT ARGUMENT VALUES # 175 ################################################################
146 ################################# 176
147 177 make.getopt.spec <- function(sections = NULL) {
148 print.dft.arg.val <- function(opt) { 178
149 179 spec <- character(0)
150 print.flags <- DEFAULT.ARG.VALUES 180
151 names(print.flags) <- vapply(names(print.flags), function(x) paste0('print-', x), FUN.VALUE = '') 181 if (is.null(sections) || 'input' %in% sections)
152 for (f in names(print.flags)) 182 spec <- c(spec,
153 if ( ! is.null(opt[[f]])) { 183 'input-file', 'i', 1, 'character', 'Set input file.',
154 cat(print.flags[[f]]) 184 'input-col-names', 'j', 1, 'character', paste0('Set the input column names. Default is "', DEFAULT.ARG.VALUES[['input-col-names']], '".')
155 q(status = 0) 185 )
156 } 186
157 } 187 if (is.null(sections) || 'mz' %in% sections)
158 188 spec <- c(spec,
159 make.getopt.spec.print.dft <- function() { 189 'mode', 'm', 1, 'character', paste0('MS mode. Possible values are:', paste(MSDB.MODE.VALS, collapse = ", "), '.'),
160 190 'mzshift', 's', 1, 'numeric', paste0('Shift on m/z. Default is ', MSDB.DFT$mzshift,'.'),
161 spec <- character() 191 'mzprec', 'p', 1, 'numeric', paste0('Tolerance on m/z. Default is ', MSDB.DFT$mzprec,'.'),
162 192 'mztolunit', 'u', 1, 'character', paste0('Unit used for tolerance values (options -s and -p) on M/Z. Default is ', MSDB.DFT$mztolunit,'.')
163 for (f in names(DEFAULT.ARG.VALUES)) 193 )
164 spec <- c(spec, paste0('print-', f), NA_character_, 0, 'logical', paste0('Print default value of --', f)) 194
195 if (is.null(sections) || 'rt' %in% sections)
196 spec <- c(spec,
197 'all-cols', 'A', 0, 'logical', 'Use all available chromatographic columns to match retention times.',
198 'rtcol', 'c', 1, 'character', paste0('Chromatographic column to use. Unset by default. If set, use the corresponding column to filter on retention times, if retention times are provided.'),
199 'check-cols', 'k', 0, 'logical', 'Check that the chromatographic column names specified with option -c really exist.',
200 'list-cols', 'l', 0, 'logical', 'List all chromatographic columns present in the database. Write list inside the file specified by -o option.',
201 'rttol', 'r', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
202 'rttolx', 'x', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
203 'rttoly', 'y', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
204 'rtunit', 'v', 1, 'character', paste0('Retention time unit for the input file. Default is ', MSDB.DFT$rtunit, '. Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.')
205 )
206
207 if (is.null(sections) || 'precursor' %in% sections)
208 spec <- c(spec,
209 'precursor-match', 'Q', 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.',
210 'precursor-rt-tol', 'R', 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'),
211 'pos-prec', 'Y', 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'),
212 'neg-prec', 'Z', 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".')
213 )
214
215 if (is.null(sections) || 'output' %in% sections)
216 spec <- c(spec,
217 'output-file', 'o', 1, 'character', 'Set file to use for the main output.',
218 'peak-output-file', 'O', 1, 'character', 'If set and if --same-rows is set, then output all matches inside the specified file, with one mz match per line. The output columns are: mz, rt, id, col, colrt, composition, attribution. This means that if an mz value is matched several times, then it will repeated on several lines, with one match description per line.',
219 'html-output-file', 'H', 1, 'character', 'Set file to use for the HTML output.',
220 'no-main-table-in-html-output', 't', 0, 'logical', 'Do not display main table in HTML output.',
221 'same-rows', 'a', 0, 'logical', 'If set, output exactly the same number of rows as the input. This means that in case of multiple matches for one mz, then only one line is output (i.e.: the mz value is not duplicated on several lines). In the main output file, an "ms.matching" column is output with inside, for each mz, a comma separated list of matched component/molecule IDs. If unset, then only the main output file is used, and one single is written to it with one line per peak match, and eventual mz line duplicated if there are multiple matches for this mz.',
222 'same-cols', 'b', 0, 'logical', 'If set, output the same columns as inside the input. All input columns are copied to the output.',
223 'molids-sep', 'S', 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'),
224 'first-val', '1', 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.',
225 'excel2011comp', 'X', 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.'
226 )
227
228 if (is.null(sections) || 'database' %in% sections)
229 spec <- c(spec,
230 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'),
231 'url', 'W', 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.',
232 'cache-dir', 'C', 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".',
233 'db-name', 'N', 1, 'character', 'Name of the database. Used by the "4tabsql" database.',
234 'db-user', 'U', 1, 'character', 'User of the database. Used by the "4tabsql" database.',
235 'db-password', 'P', 1, 'character', 'Password of the database user. Used by the "4tabsql" database.',
236 'db-ms-modes', 'M', 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database. Default is "', MSDB.DFT[['db-ms-modes']], '".'),
237 'db-rt-unit', 'V', 1, 'character', paste0('Retention time unit for the database, used in the single file database. Default is "', MSDB.DFT[['db-rt-unit']], '". Allowed values are:', paste(MSDB.RTUNIT.VALS, collapse = ", "), '.'),
238 'db-token', 'T', 1, 'character', 'Database token. Used by Peakforest database.',
239 'db-fields', 'F', 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database. Default is "', MSDB.DFT[['db-fields']], '".')
240 )
241
242 if (is.null(sections) || 'misc' %in% sections)
243 spec <- c(spec,
244 'help', 'h', 0, 'logical', 'Print this help.',
245 'debug', 'g', 0, 'logical', 'Set debug mode.'
246 )
165 247
166 return(spec) 248 return(spec)
167 } 249 }
168 250
169 ############################## 251 # Read args {{{1
170 # MAKE GETOPT SPECIFICATIONS # 252 ################################################################
171 ##############################
172
173 make.getopt.spec <- function() {
174 spec = c(
175 'help', 'h', 0, 'logical', 'Print this help.',
176 'mode', 'm', 1, 'character', paste0('MS mode. Possible values are:', paste(MSDB.MODE.VALS, collapse = ", "), '.'),
177 'mzshift', 's', 1, 'numeric', paste0('Shift on m/z, in ppm. Default is ', MSDB.DFT$mzshift,'.'),
178 'mzprec', 'p', 1, 'numeric', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mzprec,'.'),
179 'mztolunit', NA_character_, 1, 'character', paste0('Tolerance on m/z, in ppm. Default is ', MSDB.DFT$mztolunit,'.'),
180 'rttol', 'r', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
181 'rttolx', 'x', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
182 'rttoly', 'y', 1, 'numeric', paste0('Tolerance on retention times. Unset by default.'),
183 'rtcol', 'c', 1, 'character', paste0('Chromatographic column to use. Unset by default. If set, use the corresponding column to filter on retention times, if retention times are provided.'),
184 'all-cols', NA_character_, 0, 'logical', 'Use all available chromatographic columns to match retention times.',
185 'check-cols', NA_character_, 0, 'logical', 'Check that the chromatographic column names specified with option -c really exist.',
186 'list-cols', NA_character_, 0, 'logical', 'List all chromatographic columns present in the database. Write list inside the file specified by -o option.',
187 'same-rows', 'a', 0, 'logical', 'If set, output exactly the same number of rows as the input. This means that in case of multiple matches for one mz, then only one line is output (i.e.: the mz value is not duplicated on several lines). In the main output file, an "ms.matching" column is output with inside, for each mz, a comma separated list of matched component/molecule IDs. If unset, then only the main output file is used, and one single is written to it with one line per peak match, and eventual mz line duplicated if there are multiple matches for this mz.',
188 'same-cols', 'b', 0, 'logical', 'If set, output the same columns as inside the input. All input columns are copied to the output.',
189 'input-file', 'i', 1, 'character', 'Set input file.',
190 'output-file', 'o', 1, 'character', 'Set file to use for the main output.',
191 'peak-output-file', NA_character_, 1, 'character', 'If set and if --same-rows is set, then output all matches inside the specified file, with one mz match per line. The output columns are: mz, rt, id, col, colrt, composition, attribution. This means that if an mz value is matched several times, then it will repeated on several lines, with one match description per line.',
192 'html-output-file', NA_character_, 1, 'character', 'Set file to use for the HTML output.',
193 'no-main-table-in-html-output', NA_character_, 0, 'logical', 'Do not display main table in HTML output.',
194 'precursor-match', NA_character_, 0, 'logical', 'Remove peaks whose molecule precursor peak has not been matched. Unset by default.',
195 'precursor-rt-tol', NA_character_, 1, 'numeric', paste0('Precursor retention time tolerance. Only used when precursor-match is enabled. Default is ', MSDB.DFT[['precursor-rt-tol']], '.'),
196 'pos-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in positive mode. Default is "', MSDB.DFT[['pos-prec']], '".'),
197 'neg-prec', NA_character_, 1, 'character', paste0('Set the list of precursors to use in negative mode. Default is "', MSDB.DFT[['neg-prec']], '".'),
198 'input-col-names', NA_character_, 1, 'character', paste0('Set the input column names. Default is "', DEFAULT.ARG.VALUES[['input-col-names']], '".'),
199 'output-col-names', NA_character_, 1, 'character', paste0('Set the output column names. Default is "', DEFAULT.ARG.VALUES[['output-col-names']], '".'),
200 'molids-sep', NA_character_, 1, 'character', paste0('Set character separator used to when concatenating molecule IDs in output. Default is "', MSDB.DFT[['molids-sep']] , '".'),
201 'first-val', NA_character_, 0, 'logical', 'Keep only the first value in multi-value fields. Unset by default.',
202 'excel2011comp', NA_character_, 0, 'logical', 'Excel 2011 compatiblity mode. Output ASCII text files instead of UTF-8 files, where greek letters are replaced with their latin names, plusminus sign is replaced with +- and apostrophe is replaced with \"prime\". All other non-ASCII characters are repladed with underscore.',
203 'database', 'd', 1, 'character', paste0('Set database to use: "xls" for an Excel database, "file" for a single file database, "4tabsql" for a 4Tab SQL database, and "peakforest" for a connection to PeakForest database.'),
204 'url', NA_character_, 1, 'character', 'URL of database. For "peakforest" database it is the HTTP URL, for the "xls" database it is the path to the directory containing the Excel files, for the "file" database it is the path to the file database and for the "4tabsql" database it is the IP address of the server.',
205 'cache-dir', NA_character_, 1, 'character', 'Path to directory where to store cache files. Only used when database flag is set to "xls".',
206 'db-name', NA_character_, 1, 'character', 'Name of the database. Used by the "4tabsql" database.',
207 'db-user', NA_character_, 1, 'character', 'User of the database. Used by the "4tabsql" database.',
208 'db-password', NA_character_, 1, 'character', 'Password of the database user. Used by the "4tabsql" database.',
209 'db-fields', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the field names to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-fields']], '".'),
210 'db-ms-modes', NA_character_, 1, 'character', paste0('Comma separated key/value list giving the MS modes to be used in the single file database (option --db-file). Default is "', MSDB.DFT[['db-ms-modes']], '".'),
211 'db-token', NA_character_, 1, 'character', 'Database token. Used by Peakforest database.',
212 'debug', NA_character_, 0, 'logical', 'Set debug mode.'
213 )
214
215 spec <- c(spec, make.getopt.spec.print.dft())
216
217 if ( ! is.null(spec))
218 spec <- matrix(spec, byrow = TRUE, ncol = 5)
219
220 return(spec)
221 }
222
223 #############
224 # READ ARGS #
225 #############
226 253
227 read_args <- function() { 254 read_args <- function() {
228 255
229 # options 256 # Get options
230 spec <- make.getopt.spec() 257 opt <- getopt(matrix(make.getopt.spec(), byrow = TRUE, ncol = 5))
231 opt <- getopt(spec)
232 258
233 # help 259 # help
234 if ( ! is.null(opt$help)) 260 if ( ! is.null(opt$help)) {
235 print.help(spec) 261 print.help()
236 262 quit()
237 print.dft.arg.val(opt) # Print default values 263 }
264
238 opt <- set.dft.arg.val(opt) # Set default values 265 opt <- set.dft.arg.val(opt) # Set default values
239 opt <- parse.arg.val(opt) # Parse list values 266 opt <- parse.arg.val(opt) # Parse list values
240 267
241 # Check values 268 # Check values
242 error <- .check.db.conn.opts(opt) 269 error <- check.args(opt)
243 if (is.null(opt[['output-file']]) && is.null(opt[['list-cols']])) { 270
244 warning("You must set a path for the output file.") 271 return(opt)
245 error <- TRUE 272 }
246 } 273
274 # Check args {{{1
275 ################################################################
276
277 check.args <- function(opt) {
278
279 # Check database type
280 if (is.null(opt$database))
281 stop("You must provide a database type through --database option.")
282 if ( ! opt$database %in% MSDB.VALS)
283 stop(paste0("Invalid value \"", opt$database, "\" for --database option."))
284
285 # Check filedb database
286 if (opt$database == MSDB.FILE) {
287 if (is.null(opt$url))
288 stop("When using single file database, you must specify the location of the database file with option --url.")
289 if ( ! file.exists(opt$url))
290 stop(paste0("The file path \"", opt$url,"\" specified with --db-file option is not valid."))
291 }
292
293 # Check Excel database
294 if (opt$database == MSDB.XLS) {
295 if (is.null(opt$url))
296 stop("When using Excel database, you must specify the location of the Excel files directory with option --url.")
297 if ( ! file.exists(opt$url))
298 stop(paste0("The directory path \"", opt$url,"\" specified with --xls-dir option is not valid."))
299 }
300
301 # Check 4 tab database
302 if (opt$database == MSDB.4TABSQL) {
303 if (is.null(opt$url))
304 stop("When using 4Tab SQL database, you must specify the URL of the SQL server with option --url.")
305 if (is.null(opt[['db-name']]))
306 stop("When using 4Tab SQL database, you must specify the database name through the --db-name option.")
307 if (is.null(opt[['db-user']]))
308 stop("When using 4Tab SQL database, you must specify the database user through the --db-user option.")
309 if (is.null(opt[['db-password']]))
310 stop("When using 4Tab SQL database, you must specify the database user password through the --db-password option.")
311 }
312
313 # Check Peakforest database
314 if (opt$database == MSDB.PEAKFOREST) {
315 if (is.null(opt$url))
316 stop("When using PeakForest database, you must specify the URL of the PeakForest server with option --url.")
317 }
318
247 if (is.null(opt[['list-cols']])) { 319 if (is.null(opt[['list-cols']])) {
248 if (is.null(opt[['input-file']])) { 320
249 warning("You must provide an input file.") 321 if (is.null(opt[['output-file']]))
250 error <- TRUE 322 stop("You must set a path for the output file.")
251 } 323
252 if (is.null(opt$mode) || ( ! opt$mode %in% MSDB.MODE.VALS)) { 324 if (is.null(opt[['input-file']]))
253 warning("You must specify a mode through the --mode option.") 325 stop("You must provide an input file.")
254 error <- TRUE 326
255 } 327 if (is.null(opt$mode) || ( ! opt$mode %in% MSDB.MODE.VALS))
256 if (is.null(opt$mzprec)) { 328 stop("You must specify a mode through the --mode option.")
257 warning("You must set a precision in MZ with the --mzprec option.") 329
258 error <- TRUE 330 if (is.null(opt$mzprec))
259 } 331 stop("You must set a precision in MZ with the --mzprec option.")
260 if ( ( ! is.null(opt$rtcol) || ! is.null(opt[['all-cols']])) && (is.null(opt$rttolx) || is.null(opt$rttoly))) { 332
261 warning("When chromatographic columns are set, you must provide values for --rttolx and -rttoly.") 333 if ( ( ! is.null(opt$rtcol) || ! is.null(opt[['all-cols']])) && (is.null(opt$rttolx) || is.null(opt$rttoly)))
262 error <- TRUE 334 stop("When chromatographic columns are set, you must provide values for --rttolx and -rttoly.")
263 } 335
264 if (is.null(opt$mztolunit) || ( ! opt$mztolunit %in% MSDB.MZTOLUNIT.VALS)) { 336 if (is.null(opt$mztolunit) || ( ! opt$mztolunit %in% MSDB.MZTOLUNIT.VALS))
265 warning("You must specify an M/Z tolerance unit through the --mztolunit option.") 337 stop("You must specify an M/Z tolerance unit through the --mztolunit option.")
266 error <- TRUE 338 }
267 } 339 }
268 } 340
269 341 # Load database {{{1
270 # help 342 ################################################################
271 if (error) 343
272 print.help(spec, status = 1) 344 .load.db <- function(opt) {
273 345
274 return(opt) 346 if (is.null(opt[['pos-prec']]) && is.null(opt[['neg-prec']])) {
275 } 347 precursors <- NULL
276 348 } else {
277 ##################################### 349 precursors <- list()
278 # CHECK DATABASE CONNECTION OPTIONS # 350 precursors[[MSDB.TAG.POS]] <- opt[['pos-prec']]
279 ##################################### 351 precursors[[MSDB.TAG.NEG]] <- opt[['neg-prec']]
280 352 }
281 .check.db.conn.opts <- function(opt) { 353
282 354 db <- switch(opt$database,
283 # Print default values 355 peakforest = MsPeakForestDb$new(url = opt$url, useragent = USERAGENT, token = opt[['db-token']]),
284 if ( ! is.null(opt[['print-db-fields']])) { 356 xls = MsXlsDb$new(db_dir = opt$url, cache_dir = opt[['cache-dir']]),
285 cat(MSDB.DFT[['db-fields']]) 357 '4tabsql' = Ms4TabSqlDb$new(host = extract.address(opt$url), port = extract.port(opt$url), dbname = opt[['db-name']], user = opt[['db-user']], password = opt[['db-password']]),
286 q(status = 0) 358 file = MsFileDb$new(file = opt$url),
287 } 359 NULL)
288 if ( ! is.null(opt[['print-db-ms-modes']])) { 360 db$setPrecursors(precursors)
289 cat(MSDB.DFT[['db-ms-modes']]) 361 if (db$areDbFieldsSettable())
290 q(status = 0) 362 db$setDbFields(opt[['db-fields']])
291 } 363 if (db$areDbMsModesSettable())
292 364 db$setDbMsModes(opt[['db-ms-modes']])
293 # Check values 365 db$addObservers(MsDbLogger$new())
294 error <- FALSE 366
295 if (is.null(opt$database)) { 367 return(db)
296 warning("You must provide a database type through --database option.") 368 }
297 error <- TRUE 369
298 } 370 # Output HTML {{{1
299 if ( ! opt$database %in% MSDB.VALS) { 371 ################################################################
300 warning(paste0("Invalid value \"", opt$database, "\" for --database option.")) 372
301 error <- TRUE 373 output.html <- function(db, peaks, file) {
302 }
303 if (opt$database == MSDB.FILE) {
304 if (is.null(opt$url)) {
305 warning("When using single file database, you must specify the location of the database file with option --url.")
306 error <- TRUE
307 }
308 if ( ! file.exists(opt$url)) {
309 warning(paste0("The file path \"", opt$url,"\" specified with --db-file option is not valid."))
310 error <- TRUE
311 }
312 }
313 if (opt$database == MSDB.XLS) {
314 if (is.null(opt$url)) {
315 warning("When using Excel database, you must specify the location of the Excel files directory with option --url.")
316 error <- TRUE
317 }
318 if ( ! file.exists(opt$url)) {
319 warning(paste0("The directory path \"", opt$url,"\" specified with --xls-dir option is not valid."))
320 error <- TRUE
321 }
322 }
323 if (opt$database == MSDB.4TABSQL) {
324 if (is.null(opt$url)) {
325 warning("When using 4Tab SQL database, you must specify the URL of the SQL server with option --url.")
326 error <- TRUE
327 }
328 if (is.null(opt[['db-name']])) {
329 warning("When using 4Tab SQL database, you must specify the database name through the --db-name option.")
330 error <- TRUE
331 }
332 if (is.null(opt[['db-user']])) {
333 warning("When using 4Tab SQL database, you must specify the database user through the --db-user option.")
334 error <- TRUE
335 }
336 if (is.null(opt[['db-password']])) {
337 warning("When using 4Tab SQL database, you must specify the database user password through the --db-password option.")
338 error <- TRUE
339 }
340 }
341 if (opt$database == MSDB.PEAKFOREST) {
342 if (is.null(opt$url)) {
343 warning("When using PeakForest database, you must specify the URL of the PeakForest server with option --url.")
344 error <- TRUE
345 }
346 }
347
348 return(error)
349 }
350
351 #############################
352 # DISPLAY COMMAND LINE HELP #
353 #############################
354
355 .disp.cmd.line.help <- function(optspec, opt, prog, error = FALSE) {
356
357 if ( ! is.null(opt$help) || error ) {
358 cat(getopt(optspec, usage = TRUE, command = prog))
359 q(status = 1)
360 }
361 }
362
363 #################
364 # LOAD DATABASE #
365 #################
366
367 .load.db <- function(opt) {
368
369 if (is.null(opt[['pos-prec']]) && is.null(opt[['neg-prec']])) {
370 precursors <- NULL
371 } else {
372 precursors <- list()
373 precursors[[MSDB.TAG.POS]] <- opt[['pos-prec']]
374 precursors[[MSDB.TAG.NEG]] <- opt[['neg-prec']]
375 }
376
377 db <- switch(opt$database,
378 peakforest = MsPeakForestDb$new(url = opt$url, useragent = USERAGENT, token = opt[['db-token']]),
379 xls = MsXlsDb$new(db_dir = opt$url, cache_dir = opt[['cache-dir']]),
380 '4tabsql' = Ms4TabSqlDb$new(host = extract.address(opt$url), port = extract.port(opt$url), dbname = opt[['db-name']], user = opt[['db-user']], password = opt[['db-password']]),
381 file = MsFileDb$new(file = opt$url),
382 NULL)
383 db$setPrecursors(precursors)
384 if (db$areDbFieldsSettable())
385 db$setDbFields(opt[['db-fields']])
386 if (db$areDbMsModesSettable())
387 db$setDbMsModes(opt[['db-ms-modes']])
388 db$addObservers(MsDbLogger$new())
389
390 return(db)
391 }
392
393 ###############
394 # OUTPUT HTML #
395 ###############
396
397 output.html <- function(db, main, peaks, file, opt, output.fields) {
398 374
399 # Replace public database IDs by URLs 375 # Replace public database IDs by URLs
400 if ( ! is.null(peaks) || ! is.null(main)) { 376 if ( ! is.null(peaks)) {
401 # Conversion from extdb id field to extdb name 377 # Conversion from extdb id field to extdb name
402 extdb2classdb = list() 378 extdb2classdb = list()
403 extdb2classdb[MSDB.TAG.KEGG] = BIODB.KEGG 379 extdb2classdb[MSDB.TAG.KEGG] = BIODB.KEGG
404 extdb2classdb[MSDB.TAG.HMDB] = BIODB.HMDB 380 extdb2classdb[MSDB.TAG.HMDB] = BIODB.HMDB
405 extdb2classdb[MSDB.TAG.CHEBI] = BIODB.CHEBI 381 extdb2classdb[MSDB.TAG.CHEBI] = BIODB.CHEBI
406 extdb2classdb[MSDB.TAG.PUBCHEM] = BIODB.PUBCHEMCOMP 382 extdb2classdb[MSDB.TAG.PUBCHEM] = BIODB.PUBCHEMCOMP
407 383
408 # Loop on all dbs 384 # Loop on all dbs
409 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) { 385 for (extdb in c(MSDB.TAG.KEGG, MSDB.TAG.HMDB, MSDB.TAG.CHEBI, MSDB.TAG.PUBCHEM)) {
410 field <- output.fields[[extdb]] 386 if ( ! is.null(peaks) && extdb %in% colnames(peaks))
411 if ( ! is.null(peaks) && field %in% colnames(peaks)) 387 peaks[[extdb]] <- vapply(peaks[[extdb]], function(id) if (is.na(id)) '' else paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = '')
412 peaks[[field]] <- vapply(peaks[[field]], function(id) if (is.na(id)) '' else paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = '')
413 if ( ! is.null(main) && field %in% colnames(main))
414 main[[field]] <- vapply(main[[field]], function(ids) if (is.na(ids) || nchar(ids) == 0) '' else paste(vapply(strsplit(ids, opt[['molids-sep']])[[1]], function(id) paste0('<a href="', get.entry.url(class = extdb2classdb[[extdb]], accession = id, content.type = BIODB.HTML), '">', id, '</a>'), FUN.VALUE = ''), collapse = opt[['molids-sep']]), FUN.VALUE = '')
415 } 388 }
416 } 389 }
417 390
418 # Write HTML 391 # Write HTML
419 html <- HtmlWriter(file = file) 392 html <- HtmlWriter(file = file)
430 html$write('tr:nth-child(even) { background-color: LemonChiffon; }') 403 html$write('tr:nth-child(even) { background-color: LemonChiffon; }')
431 html$write('tr:nth-child(odd) { background-color: LightGreen; }') 404 html$write('tr:nth-child(odd) { background-color: LightGreen; }')
432 html$writeEndTag('style') 405 html$writeEndTag('style')
433 html$writeEndTag('header') 406 html$writeEndTag('header')
434 html$writeBegTag('body') 407 html$writeBegTag('body')
435 html$writeTag('h1', text = "LC/MS matching")
436
437 # Write parameters
438 html$writeTag('h2', text = "Parameters")
439 html$writeBegTag('ul')
440 html$writeTag('li', text = paste0("Mode = ", opt$mode, "."))
441 html$writeTag('li', text = paste0("M/Z precision = ", opt$mzprec, "."))
442 html$writeTag('li', text = paste0("M/Z shift = ", opt$mzshift, "."))
443 html$writeTag('li', text = paste0("Precursor match = ", (if (is.null(opt[['precursor-match']])) "no" else "yes"), "."))
444 if ( ! is.null(opt[['precursor-match']])) {
445 html$writeTag('li', text = paste0("Positive precursors = ", paste0(opt[['pos-prec']], collapse = ', '), "."))
446 html$writeTag('li', text = paste0("Negative precursors = ", paste0(opt[['neg-prec']], collapse = ', '), "."))
447 }
448 if ( ! is.null(opt$rtcol)) {
449 html$writeTag('li', text = paste0("Columns = ", paste(opt$rtcol, collapse = ", "), "."))
450 html$writeTag('li', text = paste0("RTX = ", opt$rttolx, "."))
451 html$writeTag('li', text = paste0("RTY = ", opt$rttoly, "."))
452 if ( ! is.null(opt[['precursor-match']]))
453 html$writeTag('li', text = paste0("RTZ = ", opt[['precursor-rt-tol']], "."))
454 }
455 html$writeEndTag('ul')
456 408
457 # Write results 409 # Write results
458 html$writeTag('h2', text = "Results")
459 results <- FALSE 410 results <- FALSE
460 if ( ! is.null(main) && nrow(main) > 0 && is.null(opt[['no-main-table-in-html-output']])) {
461 html$writeTag('h3', text = "Main output")
462 html$writeTable(main)
463 results <- TRUE
464 }
465 if ( ! is.null(peaks) && nrow(peaks) > 0) { 411 if ( ! is.null(peaks) && nrow(peaks) > 0) {
466 html$writeTag('h3', text = "Matched peaks") 412 html$writeTag('h3', text = "Matched peaks")
467 html$writeTable(peaks) 413 html$writeTable(peaks)
468 results <- TRUE 414 results <- TRUE
469 } 415 }
472 418
473 html$writeEndTag('body') 419 html$writeEndTag('body')
474 html$writeEndTag('html') 420 html$writeEndTag('html')
475 } 421 }
476 422
477 ######## 423 # MAIN {{{1
478 # MAIN # 424 ################################################################
479 ########
480
481 options(error = function() { traceback(2) ; quit(status = 1) }, warn = 2 )
482 425
483 # Read command line arguments 426 # Read command line arguments
484 opt <- read_args() 427 opt <- read_args()
485 428
486 if (is.null(opt$debug)) { 429 if (is.null(opt$debug)) {
487 options(error = function() { quit(status = 1) }, warn = 0 ) 430 options(error = function() { quit(status = 1) }, warn = 0 )
488 } 431 }
489 432
490 # Load database 433 # Load database
434 source(file.path(dirname(script.path), DB.SRC.FILE[[opt$database]]), chdir = TRUE)
491 db <- .load.db(opt) 435 db <- .load.db(opt)
492 436
493 # Print columns 437 # Print columns
494 if ( ! is.null(opt[['list-cols']])) { 438 if ( ! is.null(opt[['list-cols']])) {
495 cols <- db$getChromCol() 439 cols <- db$getChromCol()
501 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']])) 445 if ( ! is.null(opt[['input-file']]) && ! file.exists(opt[['input-file']]))
502 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist.")) 446 stop(paste0("Input file \"", opt[['input-file']], "\" does not exist."))
503 if (file.info(opt[['input-file']])$size > 0) { 447 if (file.info(opt[['input-file']])$size > 0) {
504 448
505 # Load file into data frame 449 # Load file into data frame
506 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t", stringsAsFactor = FALSE) 450 input <- read.table(file = opt[['input-file']], header = TRUE, sep = "\t", stringsAsFactor = FALSE, check.names = FALSE, comment.char = '')
507 451
508 # Convert each column that is identified by a number into a name 452 # Convert each column that is identified by a number into a name
509 for (field in names(opt[['input-col-names']])) { 453 for (field in names(opt[['input-col-names']])) {
510 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) { 454 if ( ! opt[['input-col-names']][[field]] %in% colnames(input) && length(grep('^[0-9]+$', opt[['input-col-names']][[field]])) > 0) {
511 col.index <- as.integer(opt[['input-col-names']][[field]]) 455 col.index <- as.integer(opt[['input-col-names']][[field]])
524 if ( ! opt[['input-col-names']][['mz']] %in% colnames(input)) 468 if ( ! opt[['input-col-names']][['mz']] %in% colnames(input))
525 stop(paste0('No column named "', opt[['input-col-names']][['mz']], '" in input file.')) 469 stop(paste0('No column named "', opt[['input-col-names']][['mz']], '" in input file.'))
526 470
527 # Set columns 'all-cols' specified 471 # Set columns 'all-cols' specified
528 if ( ! is.null(opt[['all-cols']])) 472 if ( ! is.null(opt[['all-cols']]))
529 opt$rtcol <- db$getChromCol() 473 opt$rtcol <- db$getChromCol()[['id']]
530 474
531 # Check chrom columns 475 # Check chrom columns
532 if ( ! is.null(opt[['check-cols']]) && ! is.null(opt$rtcol)) { 476 if ( ! is.null(opt[['check-cols']]) && ! is.null(opt$rtcol)) {
533 dbcols <- db$getChromCol()[['id']] 477 dbcols <- db$getChromCol()[['id']]
534 unknown.cols <- opt$rtcol[ ! opt$rtcol %in% dbcols] 478 unknown.cols <- opt$rtcol[ ! opt$rtcol %in% dbcols]
539 483
540 # Check that an RT column exists when using MZ/RT matching 484 # Check that an RT column exists when using MZ/RT matching
541 if ( ! is.null(opt$rtcol) && ! opt[['input-col-names']][['rt']] %in% colnames(input)) 485 if ( ! is.null(opt$rtcol) && ! opt[['input-col-names']][['rt']] %in% colnames(input))
542 stop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named '", opt[['input-col-names']][['rt']],"' can be found inside your input file.")) 486 stop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named '", opt[['input-col-names']][['rt']],"' can be found inside your input file."))
543 487
488 # Set output col names
489 output.col.names <- opt[['input-col-names']]
490
544 # Set streams 491 # Set streams
545 input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']]) 492 input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[['input-col-names']], rtunit = opt[['rtunit']])
546 main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], one.line = ! is.null(opt[['same-rows']]), match.sep = opt[['molids-sep']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) 493 main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = output.col.names, one.line = ! is.null(opt[['same-rows']]), match.sep = opt[['molids-sep']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']]), rtunit = opt[['rtunit']])
547 peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = opt[['output-col-names']], first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']])) 494 peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[['same-cols']]), output.fields = output.col.names, first.val = ! is.null(opt[['first-val']]), ascii = ! is.null(opt[['excel2011comp']]), nogreek = ! is.null(opt[['excel2011comp']]), noapostrophe = ! is.null(opt[['excel2011comp']]), noplusminus = ! is.null(opt[['excel2011comp']]), rtunit = opt[['rtunit']])
548 invisible(db$setInputStream(input.stream)) 495 invisible(db$setInputStream(input.stream))
549 db$addOutputStreams(c(main.output, peaks.output)) 496 db$addOutputStreams(c(main.output, peaks.output))
550 497
551 # Set M/Z tolerance unit 498 # Set database units
552 db$setMzTolUnit(opt$mztolunit) 499 db$setMzTolUnit(opt$mztolunit)
500 if ( ! is.null(opt[['db-rt-unit']]) && opt$database == 'file')
501 db$setRtUnit(opt[['db-rt-unit']])
553 502
554 # Search database 503 # Search database
555 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG 504 mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG
556 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']]) 505 db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[['precursor-match']]), precursor.rt.tol = opt[['precursor-rt-tol']])
557 506
563 if ( ! is.null(opt[['peak-output-file']])) 512 if ( ! is.null(opt[['peak-output-file']]))
564 # TODO Create a class MsDbOutputCsvFileStream 513 # TODO Create a class MsDbOutputCsvFileStream
565 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE) 514 df.write.tsv(peaks.output$getDataFrame(), file = opt[['peak-output-file']], row.names = FALSE)
566 if ( ! is.null(opt[['html-output-file']])) 515 if ( ! is.null(opt[['html-output-file']]))
567 # TODO Create a class MsDbOutputHtmlFileStream 516 # TODO Create a class MsDbOutputHtmlFileStream
568 output.html(db = db, main = main.output$getDataFrame(), peaks = peaks.output$getDataFrame(), file = opt[['html-output-file']], opt = opt, output.fields = opt[['output-col-names']]) 517 output.html(db = db, peaks = peaks.output$getDataFrame(), file = opt[['html-output-file']])