Galaxy |

Changeset 6:f86fec07f392 (2019-02-22)

Previous changeset 5:fb9c0409d85c (2017-04-19)

Commit message:
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce

modified:
README.md
lcmsmatching.xml
test-data/filedb.tsv

added:
lcmsmatching
test-data/mz-input-small_with_nas.tsv
test-data/mzrt-input-small.tsv
test-data/test_1_main_output.tsv
test-data/test_1_peaks_output.html
test-data/test_1_peaks_output.tsv
test-data/test_2_main_output.tsv
test-data/test_2_peaks_output.html
test-data/test_2_peaks_output.tsv
test-data/test_3_main_output.tsv
test-data/test_3_peaks_output.html
test-data/test_3_peaks_output.tsv

removed:
BiodbFactory.R
BiodbObject.R
MassFiledbConn.R
MassbankConn.R
MassbankEntry.R
MassdbConn.R
Ms4TabSqlDb.R
MsBioDb.R
MsDb.R
MsDbChecker.R
MsDbInputDataFrameStream.R
MsDbInputStream.R
MsDbLogger.R
MsDbObserver.R
MsDbOutputDataFrameStream.R
MsDbOutputStream.R
MsFileDb.R
MsPeakForestDb.R
MsXlsDb.R
PeakforestConn.R
PeakforestEntry.R
UrlRequestScheduler.R
biodb-common.R
build.xml
dfhlp.R
excelhlp.R
fshlp.R
htmlhlp.R
list-chrom-cols.py
list-file-cols.py
list-ms-mode-values.py
msdb-common.R
nethlp.R
search-mz
search.R
strhlp.R

diff -r fb9c0409d85c -r f86fec07f392 BiodbFactory.R
--- a/BiodbFactory.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,274 +0,0 @@\n-# vi: fdm=marker\n-\n-##########################\n-# CLASS DECLARATION {{{1 #\n-##########################\n-\n-BiodbFactory <- methods::setRefClass("BiodbFactory", contains = \'BiodbObject\', fields = list(.useragent = "character",\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t .conn = "list",\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t .cache.dir = "character",\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t .cache.mode = "character",\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t .debug = "logical",\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t .chunk.size = "integer",\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t .use.env.var = "logical"))\n-\n-###############\n-# CONSTRUCTOR #\n-###############\n-\n-BiodbFactory$methods( initialize = function(useragent = NA_character_, cache.dir = NA_character_, cache.mode = BIODB.CACHE.READ.WRITE, debug = FALSE, chunk.size = NA_integer_, use.env.var = FALSE, ...) {\n-\n-\t.useragent <<- useragent\n-\t.conn <<- list()\n-\t.cache.dir <<- cache.dir\n-\t.cache.mode <<- cache.mode\n-\t.debug <<- debug\n-\t.chunk.size <<- as.integer(chunk.size)\n-\t.use.env.var <<- use.env.var\n-\n-\tcallSuper(...) # calls super-class initializer with remaining parameters\n-})\n-\n-#######################\n-# PRINT DEBUG MESSAGE #\n-#######################\n-\n-BiodbFactory$methods( .print.debug.msg = function(msg) {\n-\tif (.self$.debug)\n-\t\t.print.msg(msg = msg, class = class(.self))\n-})\n-\n-##################\n-# GET USER AGENT #\n-##################\n-\n-BiodbFactory$methods( getUserAgent = function() {\n-\treturn(.self$.useragent)\n-})\n-\n-##################\n-# SET USER AGENT #\n-##################\n-\n-\tBiodbFactory$methods( setUserAgent = function(useragent) {\n-\t"Set useragent of BiodbFactory."\n-\t.useragent <<- useragent\n-})\n-\n-###############\n-# CREATE CONN #\n-###############\n-\n-BiodbFactory$methods( createConn = function(class, url = NA_character_, token = NA_character_) {\n- " Create connection to databases useful for metabolomics."\n-\tif (class %in% names(.self$.conn))\n-\t\tstop(paste0(\'A connection of type \', class, \' already exists. Please use method getConn() to access it.\'))\n-\n-\t# Use environment variables\n-\tif (.self$.use.env.var) {\n-\t\tif (is.na(url))\n-\t\t\turl <- .biodb.get.env.var(c(class, \'URL\'))\n-\t\tif (is.na(token))\n-\t\t\ttoken <- .biodb.get.env.var(c(class, \'TOKEN\'))\n-\t}\n-\n-\t# Create connection instance\n-\tconn <- switch(class,\n-\t\t chebi = ChebiConn$new(useragent = .self$.useragent, debug = .self$.debug),\n-\t\t kegg = KeggConn$new(useragent = .self$.useragent, debug = .self$.debug),\n-\t\t pubchemcomp = PubchemConn$new(useragent = .self$.useragent, db = BIODB.PUBCHEMCOMP, debug = .self$.debug),\n-\t\t pubchemsub = PubchemConn$new(useragent = .self$.useragent, db = BIODB.PUBCHEMSUB, debug = .self$.debug),\n-\t\t hmdb = HmdbConn$new(useragent = .self$.useragent, debug = .self$.debug),\n-\t\t chemspider = ChemspiderConn$new(useragent = .self$.useragent, debug = .self$.debug, token = token),\n-\t\t enzyme = EnzymeConn$new(useragent = .self$.useragent, debug = .self$.debug),\n-\t\t lipidmaps = LipidmapsConn$new(useragent = .self$.useragent, debug = .self$.debug),\n-\t\t mirbase = MirbaseConn$new(useragent = .self$.useragent, debug = .self$.debug),\n-\t\t ncbigene = NcbigeneConn$new(useragent = .self$.useragent, debug = .self$.debug),\n-\t\t ncbiccds = NcbiccdsConn$new(useragent = .self$.useragent, debug = .self$.debug),\n-\t\t uniprot = UniprotConn$new(useragent = .self$.useragent, debug = .self$.debug),\n-\t\t massbank = MassbankConn$new(useragent = .self$.useragent, url = url, debug = .self$.debug),\n-\t\t\t\t\tmassfiledb = MassFiledbConn$new(file = url, debug = .self$.debug),\n-\t\t\t\t\tpeakforest = PeakforestConn$new(useragent = .self$.useragent, debug = .self$.debug),\n-\t\t \t NULL)\n-\n-\t# Unknown class\n-\tif (is.null(conn))\n-\t\tstop(paste0("Unknown r-biodb class \\"", class,"\\"."))\n-\n-\t# Register new class\n-\t.self$.conn[[class]] <- conn\n-\n-\treturn (.self$.conn[[class]])\n-})\n-\n-############\n-# GET CONN #\n-############\n-\n-BiodbFactory$methods( getConn = funct'..b'class, \'-\', x, \'.\', ext) }, FUN.VALUE = \'\')\n-\n-\t# set file paths\n-\tfile.paths <- vapply(filenames, function(x) { if (is.na(x)) NA_character_ else file.path(.self$.cache.dir, x) }, FUN.VALUE = \'\')\n-\n-\t# Create cache dir if needed\n-\tif ( ! is.na(.self$.cache.dir) && ! file.exists(.self$.cache.dir))\n-\t\tdir.create(.self$.cache.dir)\n-\n-\treturn(file.paths)\n-})\n-\n-###########################\n-# LOAD CONTENT FROM CACHE #\n-###########################\n-\n-BiodbFactory$methods( .load.content.from.cache = function(class, id) {\n-\n-\tcontent <- NULL\n-\n-\t# Read contents from files\n-\tfile.paths <- .self$.get.cache.file.paths(class, id)\n-\tcontent <- lapply(file.paths, function(x) { if (is.na(x)) NA_character_ else ( if (file.exists(x)) paste(readLines(x), collapse = "\\n") else NULL )} )\n-\n-\treturn(content)\n-})\n-\n-############################\n-# IS CACHE READING ENABLED #\n-############################\n-\n-BiodbFactory$methods( .is.cache.reading.enabled = function() {\n-\treturn( ! is.na(.self$.cache.dir) && .self$.cache.mode %in% c(BIODB.CACHE.READ.ONLY, BIODB.CACHE.READ.WRITE))\n-})\n-\n-############################\n-# IS CACHE WRITING ENABLED #\n-############################\n-\n-BiodbFactory$methods( .is.cache.writing.enabled = function() {\n-\treturn( ! is.na(.self$.cache.dir) && .self$.cache.mode %in% c(BIODB.CACHE.WRITE.ONLY, BIODB.CACHE.READ.WRITE))\n-})\n-\n-#########################\n-# SAVE CONTENT TO CACHE #\n-#########################\n-\n-BiodbFactory$methods( .save.content.to.cache = function(class, id, content) {\n-\n-\t# Write contents into files\n-\tfile.paths <- .self$.get.cache.file.paths(class, id)\n-\tmapply(function(c, f) { if ( ! is.null(c)) writeLines(c, f) }, content, file.paths)\n-})\n-\n-#####################\n-# GET ENTRY CONTENT #\n-#####################\n-\n-BiodbFactory$methods( getEntryContent = function(class, id) {\n-\n-\t# Debug\n-\t.self$.print.debug.msg(paste0("Get entry content(s) for ", length(id)," id(s)..."))\n-\n-\t# Initialize content\n-\tif (.self$.is.cache.reading.enabled()) {\n-\t\tcontent <- .self$.load.content.from.cache(class, id)\t\n-\t\tmissing.ids <- id[vapply(content, is.null, FUN.VALUE = TRUE)]\n-\t}\n-\telse {\n-\t\tcontent <- lapply(id, as.null)\n-\t\tmissing.ids <- id\n-\t}\n-\n-\t# Remove duplicates\n-\tn.duplicates <- sum(duplicated(missing.ids))\n-\tmissing.ids <- missing.ids[ ! duplicated(missing.ids)]\n-\n-\t# Debug\n-\tif (any(is.na(id)))\n-\t\t.self$.print.debug.msg(paste0(sum(is.na(id)), " entry ids are NA."))\n-\tif (.self$.is.cache.reading.enabled()) {\n-\t\t.self$.print.debug.msg(paste0(sum( ! is.na(id)) - length(missing.ids), " entry content(s) loaded from cache."))\n-\t\tif (n.duplicates > 0)\n-\t\t\t.self$.print.debug.msg(paste0(n.duplicates, " entry ids, whose content needs to be fetched, are duplicates."))\n-\t\t.self$.print.debug.msg(paste0(length(missing.ids), " entry content(s) need to be fetched."))\n-\t}\n-\n-\t# Get contents\n-\tif (length(missing.ids) > 0) {\n-\n-\t\t# Use connector to get missing contents\n-\t\tconn <- .self$getConn(class)\n-\n-\t\t# Divide list of missing ids in chunks (in order to save in cache regularly)\n-\t\tchunks.of.missing.ids = if (is.na(.self$.chunk.size)) list(missing.ids) else split(missing.ids, ceiling(seq_along(missing.ids) / .self$.chunk.size))\n-\n-\t\t# Loop on chunks\n-\t\tmissing.contents <- NULL\n-\t\tfor (ch.missing.ids in chunks.of.missing.ids) {\n-\n-\t\t\tch.missing.contents <- conn$getEntryContent(ch.missing.ids)\n-\n-\t\t\t# Save to cache\n-\t\t\tif ( ! is.null(ch.missing.contents) && .self$.is.cache.writing.enabled())\n-\t\t\t\t.self$.save.content.to.cache(class, ch.missing.ids, ch.missing.contents)\n-\n-\t\t\t# Append\n-\t\t\tmissing.contents <- c(missing.contents, ch.missing.contents)\n-\n-\t\t\t# Debug\n-\t\t\tif (.self$.is.cache.reading.enabled())\n-\t\t\t\t.self$.print.debug.msg(paste0("Now ", length(missing.ids) - length(missing.contents)," id(s) left to be retrieved..."))\n-\t\t}\n-\n-\t\t# Merge content and missing.contents\n-\t\tcontent[id %in% missing.ids] <- vapply(id[id %in% missing.ids], function(x) missing.contents[missing.ids %in% x], FUN.VALUE = \'\')\n-\t}\n-\n-\treturn(content)\n-})\n'

diff -r fb9c0409d85c -r f86fec07f392 BiodbObject.R
--- a/BiodbObject.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,32 +0,0 @@
-##########################
-# CLASS DECLARATION {{{1 #
-##########################
-
-BiodbObject <- methods::setRefClass("BiodbObject", fields = list( .observers = "ANY" ))
-
-########################
-# ABSTRACT METHOD {{{1 #
-########################
-
-BiodbObject$methods( .abstract.method = function() {
-
- class <- class(.self)
- method <- sys.call(length(sys.calls()) - 1)
- method <- sub('^[^$]*\\$([^(]*)\\(.*$', '\\1()', method)
-
- stop(paste("Method", method, "is not implemented in", class, "class."))
-})
-
-######################
-# ADD OBSERVERS {{{1 #
-######################
-
-BiodbObject$methods( addObservers = function(obs) {
-
- # Check types of observers
- if ( ( ! is.list(obs) && ! inherits(obs, "BiodbObserver")) || (is.list(obs) && any( ! vapply(obs, function(o) inherits(o, "BiodbObserver"), FUN.VALUE = TRUE))))
- stop("Observers must inherit from BiodbObserver class.")
-
- # Add observers to current list
- .observers <<- if (is.null(.self$.observers)) c(obs) else c(.self$.observers, obs)
-})

diff -r fb9c0409d85c -r f86fec07f392 MassFiledbConn.R
--- a/MassFiledbConn.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,275 +0,0 @@
-# LCMS File db.
-# In this type of database, a single file is provided in CSV format. Default separator is tabulation.
-# Each line is a MS peak measure, .
-# The file contains molecule and spectrum information. Each spectrum has an accession id.
-
-# TODO Rename setField into setFieldName + addNewField, and setMsMode into setMsModeValue
-
-#############
-# CONSTANTS #
-#############
-
-# Default database fields
-.BIODB.DFT.DB.FIELDS <- list()
-for (f in c(BIODB.ACCESSION, BIODB.NAME, BIODB.FULLNAMES, BIODB.COMPOUND.ID, BIODB.MSMODE, BIODB.PEAK.MZEXP, BIODB.PEAK.MZTHEO, BIODB.PEAK.COMP, BIODB.PEAK.ATTR, BIODB.CHROM.COL, BIODB.CHROM.COL.RT, BIODB.FORMULA, BIODB.MASS))
- .BIODB.DFT.DB.FIELDS[[f]] <- f
-
-#####################
-# CLASS DECLARATION #
-#####################
-
-MassFiledbConn <- methods::setRefClass("MassFiledbConn", contains = "MassdbConn", fields = list(.file = "character", .file.sep = "character", .file.quote = "character", .field.multval.sep = 'character', .db = "ANY", .db.orig.colnames = "character", .fields = "list", .ms.modes = "character"))
-
-###############
-# CONSTRUCTOR #
-###############
-
-MassFiledbConn$methods( initialize = function(file = NA_character_, file.sep = "\t", file.quote = "\"", ...) {
-
- # Check file
- (! is.null(file) && ! is.na(file)) || stop("You must specify a file database to load.")
- file.exists(file) || stop(paste0("Cannot locate the file database \"", file ,"\"."))
-
- # Set fields
- .db <<- NULL
- .db.orig.colnames <<- NA_character_
- .file <<- file
- .file.sep <<- file.sep
- .file.quote <<- file.quote
- .fields <<- .BIODB.DFT.DB.FIELDS
- .field.multval.sep <<- ';'
- .ms.modes <<- c(BIODB.MSMODE.NEG, BIODB.MSMODE.POS)
- names(.self$.ms.modes) <- .self$.ms.modes
-
- callSuper(...)
-})
-
-######################
-# Is valid field tag #
-######################
-
-MassFiledbConn$methods( isValidFieldTag = function(tag) {
- return (tag %in% names(.self$.fields))
-})
-
-###########
-# INIT DB #
-###########
-
-MassFiledbConn$methods( .init.db = function() {
-
- if (is.null(.self$.db)) {
-
- # Load database
- .db <<- read.table(.self$.file, sep = .self$.file.sep, .self$.file.quote, header = TRUE, stringsAsFactors = FALSE, row.names = NULL, comment.char = '')
-
- # Save column names
- .db.orig.colnames <<- colnames(.self$.db)
- }
-})
-
-#############
-# Set field #
-#############
-
-MassFiledbConn$methods( setField = function(tag, colname) {
-
- ( ! is.null(tag) && ! is.na(tag)) || stop("No tag specified.")
- ( ! is.null(colname) && ! is.na(colname)) || stop("No column name specified.")
-
- # Load database file
- .self$.init.db()
-
- # Check that this field tag is defined in the fields list
- .self$isValidFieldTag(tag) || stop(paste0("Database field tag \"", tag, "\" is not valid."))
-
- # Check that columns are defined in database file
- all(colname %in% names(.self$.db)) || stop(paste0("One or more columns among ", paste(colname, collapse = ", "), " are not defined in database file."))
-
- # Set new definition
- if (length(colname) == 1)
- .fields[[tag]] <<- colname
- else {
- new.col <- paste(colname, collapse = ".")
- .self$.db[[new.col]] <- vapply(seq(nrow(.self$.db)), function(i) { paste(.self$.db[i, colname], collapse = '.') }, FUN.VALUE = '')
- .fields[[tag]] <<- new.col
- }
-
- # Update data frame column names
- colnames(.self$.db) <- vapply(.self$.db.orig.colnames, function(c) if (c %in% .self$.fields) names(.self$.fields)[.self$.fields %in% c] else c, FUN.VALUE = '')
-})
-
-######################################
-# SET FIELD MULTIPLE VALUE SEPARATOR #
-######################################
-
-MassFiledbConn$methods( setFieldMultValSep = function(sep) {
- .field.multval.sep <<- sep
-})
-
-################
-# SET MS MODES #
-################
-
-MassFiledbConn$methods( setMsMode = function(mode, value) {
- .self$.ms.modes[[mode]] <- value
-})
-
-##########################
-# GET ENTRY CONTENT TYPE #
-##########################
-
-MassFiledbConn$methods( getEntryContentType = function(type) {
- return(BIODB.DATAFRAME)
-})
-
-################
-# CHECK FIELDS #
-################
-
-MassFiledbConn$methods( .check.fields = function(fields) {
-
- if (length(fields) ==0 || (length(fields) == 1 && is.na(fields)))
- return
-
- # Check if fields are known
- unknown.fields <- names(.self$.fields)[ ! fields %in% names(.self$.fields)]
- if (length(unknown.fields) > 0)
- stop(paste0("Field(s) ", paste(fields, collapse = ", "), " is/are unknown."))
-
- # Init db
- .self$.init.db()
-
- # Check if fields are defined in file database
- undefined.fields <- colnames(.self$.db)[ ! fields %in% colnames(.self$.db)]
- if (length(undefined.fields) > 0)
- stop(paste0("Column(s) ", paste(fields), collapse = ", "), " is/are undefined in file database.")
-})
-
-##########
-# SELECT #
-##########
-
-# Select data from database
-MassFiledbConn$methods( .select = function(cols = NULL, mode = NULL, compound.ids = NULL, drop = FALSE, uniq = FALSE, sort = FALSE, max.rows = NA_integer_) {
-
- x <- NULL
-
- # Init db
- .self$.init.db()
-
- # Get db
- db <- .self$.db
-
- # Filter db on mode
- if ( ! is.null(mode) && ! is.na(mode)) {
-
- # Check mode value
- mode %in% names(.self$.ms.modes) || stop(paste0("Unknown mode value '", mode, "'."))
- .self$.check.fields(BIODB.MSMODE)
-
- # Filter on mode
- db <- db[db[[unlist(.self$.fields[BIODB.MSMODE])]] %in% .self$.ms.modes[[mode]], ]
- }
-
- # Filter db on compound ids
- # TODO
-
- if ( ! is.null(cols) && ! is.na(cols))
- .self$.check.fields(cols)
-
- # Get subset
- if (is.null(cols) || is.na(cols))
- x <- db
- else
- x <- db[, unlist(.self$.fields[cols]), drop = drop]
-
- # Rearrange
- if (drop && is.vector(x)) {
- if (uniq)
- x <- x[ ! duplicated(x)]
- if (sort)
- x <- sort(x)
- }
-
- # Cut
- if ( ! is.na(max.rows))
- x <- if (is.vector(x)) x[1:max.rows] else x[1:max.rows, ]
-
- return(x)
-})
-
-#################
-# GET ENTRY IDS #
-#################
-
-MassFiledbConn$methods( getEntryIds = function(type) {
-
- ids <- NA_character_
-
- if (type %in% c(BIODB.SPECTRUM, BIODB.COMPOUND))
- ids <- as.character(.self$.select(cols = if (type == BIODB.SPECTRUM) BIODB.ACCESSION else BIODB.COMPOUND.ID, drop = TRUE, uniq = TRUE, sort = TRUE))
-
- return(ids)
-})
-
-##################
-# GET NB ENTRIES #
-##################
-
-MassFiledbConn$methods( getNbEntries = function(type) {
- return(length(.self$getEntryIds(type)))
-})
-
-###############################
-# GET CHROMATOGRAPHIC COLUMNS #
-###############################
-
-# Inherited from MassdbConn.
-MassFiledbConn$methods( getChromCol = function(compound.ids = NULL) {
-
- # Extract needed columns
- db <- .self$.select(cols = c(BIODB.COMPOUND.ID, BIODB.CHROM.COL))
-
- # Filter on molecule IDs
- if ( ! is.null(compound.ids))
- db <- db[db[[BIODB.COMPOUND.ID]] %in% compound.ids, ]
-
- # Get column names
- cols <- db[[BIODB.CHROM.COL]]
-
- # Remove duplicates
- cols <- cols[ ! duplicated(cols)]
-
- # Make data frame
- chrom.cols <- data.frame(cols, cols, stringsAsFactors = FALSE)
- colnames(chrom.cols) <- c(BIODB.ID, BIODB.TITLE)
-
- return(chrom.cols)
-})
-
-#################
-# GET MZ VALUES #
-#################
-
-# Inherited from MassdbConn.
-MassFiledbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {
-
- # Get mz values
- mz <- .self$.select(cols = BIODB.PEAK.MZ, mode = mode, drop = TRUE, uniq = TRUE, sort = TRUE, max.rows = max.results)
-
- return(mz)
-})
-
-################
-# GET NB PEAKS #
-################
-
-# Inherited from MassdbConn.
-MassFiledbConn$methods( getNbPeaks = function(mode = NULL, compound.ids = NULL) {
-
- # Get peaks
- peaks <- .self$.select(cols = BIODB.PEAK.MZTHEO, mode = mode, compound.ids = compound.ids)
-
- return(length(peaks))
-})

diff -r fb9c0409d85c -r f86fec07f392 MassbankConn.R
--- a/MassbankConn.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,122 +0,0 @@
-#####################
-# CLASS DECLARATION #
-#####################
-
-MassbankConn <- methods::setRefClass("MassbankConn", contains = c("RemotedbConn", "MassdbConn"), fields = list( .url = "character" ))
-
-###############
-# CONSTRUCTOR #
-###############
-
-MassbankConn$methods( initialize = function(url = NA_character_, ...) {
-
- # Set URL
- .url <<- if (is.null(url) || is.na(url)) BIODB.MASSBANK.EU.WS.URL else url
-
- callSuper(...)
-})
-
-##########################
-# GET ENTRY CONTENT TYPE #
-##########################
-
-MassbankConn$methods( getEntryContentType = function() {
- return(BIODB.TXT)
-})
-
-#####################
-# GET ENTRY CONTENT #
-#####################
-
-MassbankConn$methods( getEntryContent = function(ids) {
-
- # Debug
- .self$.print.debug.msg(paste0("Get entry content(s) for ", length(ids)," id(s)..."))
-
- URL.MAX.LENGTH <- 2083
-
- # Initialize return values
- content <- rep(NA_character_, length(ids))
-
- # Loop on all
- n <- 0
- while (n < length(ids)) {
-
- # Get list of accession ids to retrieve
- accessions <- ids[(n + 1):length(ids)]
-
- # Create URL request
- x <- get.entry.url(class = BIODB.MASSBANK, accession = accessions, content.type = BIODB.TXT, max.length = URL.MAX.LENGTH, base.url = .self$.url)
-
- # Debug
- .self$.print.debug.msg(paste0("Send URL request for ", x$n," id(s)..."))
-
- # Send request
- xmlstr <- .self$.get.url(x$url)
-
- # Increase number of entries retrieved
- n <- n + x$n
-
- # Parse XML and get text
- if ( ! is.na(xmlstr)) {
- xml <-  xmlInternalTreeParse(xmlstr, asText = TRUE)
- ns <- c(ax21 = "http://api.massbank/xsd")
- returned.ids <- xpathSApply(xml, "//ax21:id", xmlValue, namespaces = ns)
- if (length(returned.ids) > 0)
- content[match(returned.ids, ids)] <- xpathSApply(xml, "//ax21:info", xmlValue, namespaces = ns)
- }
-
- # Debug
- .self$.print.debug.msg(paste0("Now ", length(ids) - n," id(s) left to be retrieved..."))
- }
-
- return(content)
-})
-
-################
-# CREATE ENTRY #
-################
-
-# Creates a Spectrum instance from file content.
-# content       A file content, downloaded from the public database.
-# RETURN        A spectrum instance.
-MassbankConn$methods( createEntry = function(content, drop = TRUE) {
- return(createMassbankEntryFromTxt(content, drop = drop))
-})
-
-#################
-# GET MZ VALUES #
-#################
-
-MassbankConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {
-})
-
-#################
-# GET ENTRY IDS #
-#################
-
-MassbankConn$methods( getEntryIds = function(max.results = NA_integer_) {
-
- # Set URL
- url <- paste0(.self$.url, 'searchPeak?mzs=1000&relativeIntensity=100&tolerance=1000&instrumentTypes=all&ionMode=Both')
- url <- paste0(url, '&maxNumResults=', (if (is.na(max.results)) 0 else max.results))
-
- # Send request
- xmlstr <- .self$.get.url(url)
-
- # Parse XML and get text
- if ( ! is.na(xmlstr)) {
- xml <-  xmlInternalTreeParse(xmlstr, asText = TRUE)
- ns <- c(ax21 = "http://api.massbank/xsd")
- returned.ids <- xpathSApply(xml, "//ax21:id", xmlValue, namespaces = ns)
- return(returned.ids)
- }
-})
-
-##################
-# GET NB ENTRIES #
-##################
-
-MassbankConn$methods( getNbEntries = function() {
- return(length(.self$getEntryIds()))
-})

diff -r fb9c0409d85c -r f86fec07f392 MassbankEntry.R
--- a/MassbankEntry.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,129 +0,0 @@
-###########################
-# MASSBANK SPECTRUM CLASS #
-###########################
-
-MassbankEntry <- methods::setRefClass("MassbankEntry", contains = "BiodbEntry")
-
-###########
-# FACTORY #
-###########
-
-createMassbankEntryFromTxt <- function(contents, drop = TRUE) {
-
- entries <- list()
-
- # Define fields regex
- regex <- character()
- regex[[BIODB.ACCESSION]] <- "^ACCESSION: (.+)$"
- regex[[BIODB.MSDEV]] <- "^AC\\$INSTRUMENT: (.+)$"
- regex[[BIODB.MSDEVTYPE]] <- "^AC\\$INSTRUMENT_TYPE: (.+)$"
- regex[[BIODB.MSTYPE]] <- "^AC\\$MASS_SPECTROMETRY: MS_TYPE (.+)$"
- regex[[BIODB.MSPRECMZ]] <- "^MS\\$FOCUSED_ION: PRECURSOR_M/Z (.+)$"
- regex[[BIODB.NB.PEAKS]] <- "^PK\\$NUM_PEAK: ([0-9]+)$"
- regex[[BIODB.MSPRECANNOT]] <- "^MS\\$FOCUSED_ION: PRECURSOR_TYPE (.+)$"
- regex[[BIODB.CHEBI.ID]] <- "^CH\\$LINK: CHEBI\\s+(.+)$"
- regex[[BIODB.KEGG.ID]] <- "^CH\\$LINK: KEGG\\s+(.+)$"
- regex[[BIODB.INCHI]] <- "^CH\\$IUPAC:\\s+(.+)$"
- regex[[BIODB.INCHIKEY]] <- "^CH\\$LINK: INCHIKEY\\s+(.+)$"
- regex[[BIODB.CHEMSPIDER.ID]] <- "^CH\\$LINK: CHEMSPIDER\\s+(.+)$"
- regex[[BIODB.CAS.ID]] <- "^CH\\$LINK: CAS\\s+(.+)$"
- regex[[BIODB.FORMULA]] <- "^CH\\$FORMULA:\\s+(.+)$"
- regex[[BIODB.SMILES]] <- "^CH\\$SMILES:\\s+(.+)$"
- regex[[BIODB.MASS]] <- "^CH\\$EXACT_MASS:\\s+(.+)$"
- regex[[BIODB.PUBCHEMCOMP.ID]] <- "^CH\\$LINK: PUBCHEM\\s+.*CID:([0-9]+)"
- regex[[BIODB.PUBCHEMSUB.ID]] <- "^CH\\$LINK: PUBCHEM\\s+.*SID:([0-9]+)"
-
- for (text in contents) {
-
- # Create instance
- entry <- MassbankEntry$new()
-
- if ( ! is.null(text) && ! is.na(text)) {
-
- # Read text
- lines <- strsplit(text, "\n")
- for (s in lines[[1]]) {
-
- # Test generic regex
- parsed <- FALSE
- for (field in names(regex)) {
- g <- stringr::str_match(s, regex[[field]])
- if ( ! is.na(g[1,1])) {
- entry$setField(field, g[1,2])
- parsed <- TRUE
- break
- }
- }
- if (parsed)
- next
-
- # Name
- if (is.na(entry$getField(BIODB.NAME))) {
- g <- stringr::str_match(s, "^CH\\$NAME:\\s+(.+)$")
- if ( ! is.na(g[1,1]))
- entry$setField(BIODB.NAME, g[1,2])
- }
-
- # PubChem
- g <- stringr::str_match(s, "^CH\\$LINK: PUBCHEM\\s+([0-9]+)$")
- if ( ! is.na(g[1,1]))
- entry$setField(BIODB.PUBCHEMSUB.ID, g[1,2])
-
- # MS MODE
- g <- stringr::str_match(s, "^AC\\$MASS_SPECTROMETRY: ION_MODE (.+)$")
- if ( ! is.na(g[1,1])) {
- entry$setField(BIODB.MSMODE, if (g[1,2] == 'POSITIVE') BIODB.MSMODE.POS else BIODB.MSMODE.NEG)
- next
- }
-
- # PEAKS
- if (.parse.peak.line(entry, s))
- next
- }
- }
-
- entries <- c(entries, entry)
- }
-
- # Replace elements with no accession id by NULL
- entries <- lapply(entries, function(x) if (is.na(x$getField(BIODB.ACCESSION))) NULL else x)
-
- # If the input was a single element, then output a single object
- if (drop && length(contents) == 1)
- entries <- entries[[1]]
-
- return(entries)
-}
-
-###################
-# PARSE PEAK LINE #
-###################
-
-.parse.peak.line <- function(entry, line) {
-
- peaks <- BIODB.PEAK.DF.EXAMPLE
-
- # Annotation
- g <- stringr::str_match(line, "^\\s+([0-9][0-9.]*) ([A-Z0-9+-]+) ([0-9]+) ([0-9][0-9.]*) ([0-9][0-9.]*)$")
- if ( ! is.na(g[1,1]))
- peaks[1, c(BIODB.PEAK.MZ, BIODB.PEAK.FORMULA, BIODB.PEAK.FORMULA.COUNT, BIODB.PEAK.MASS, BIODB.PEAK.ERROR.PPM)] <- list(as.double(g[1,2]), g[1,3], as.integer(g[1,4]), as.double(g[1,5]), as.double(g[1,6]))
-
- # Peak
- g <- stringr::str_match(line, "^\\s+([0-9][0-9.]*) ([0-9][0-9.]*) ([0-9]+)$")
- if ( ! is.na(g[1,1]))
- peaks[1, c(BIODB.PEAK.MZ, BIODB.PEAK.INTENSITY, BIODB.PEAK.RELATIVE.INTENSITY)] <- list(as.double(g[1,2]), as.double(g[1,3]), as.integer(g[1,4]))
-
- if (nrow(peaks) > 0) {
-
- # Get curent peaks and merge with new peaks
- current.peaks <- entry$getField(BIODB.PEAKS)
- if ( ! is.null(current.peaks))
- peaks <- rbind(current.peaks, peaks)
-
- entry$setField(BIODB.PEAKS, peaks)
-
- return(TRUE)
- }
-
- return(FALSE)
-}

diff -r fb9c0409d85c -r f86fec07f392 MassdbConn.R
--- a/MassdbConn.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,130 +0,0 @@
-#####################
-# CLASS DECLARATION #
-#####################
-
-MassdbConn <- methods::setRefClass("MassdbConn", contains = "BiodbConn")
-
-###############################
-# GET CHROMATOGRAPHIC COLUMNS #
-###############################
-
-# Get a list of chromatographic columns contained in this database.
-# compound.ids  A list of compound IDs used to filter results.
-# The returned value is a data.frame with two columns : one for the ID (BIODB.ID) and another one for the title (BIODB.TITLE).
-MassdbConn$methods( getChromCol = function(compound.ids = NULL) {
- stop("Method getChromCol() is not implemented in concrete class.")
-})
-
-#################
-# GET MZ VALUES #
-#################
-
-# Returns a numeric vector of all masses stored inside the database.
-MassdbConn$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {
- stop("Method getMzValues() not implemented in concrete class.")
-})
-
-################
-# GET NB PEAKS #
-################
-
-# Returns the number of peaks contained in the database
-MassdbConn$methods( getNbPeaks = function(mode = NULL, compound.ids = NULL) {
- stop("Method getNbPeaks() not implemented in concrete class.")
-})
-
-#########################
-# FIND COMPOUND BY NAME #
-#########################
-
-# Find a molecule by name
-# name   A vector of molecule names to search for.
-# Return an integer vector of the same size as the name input vector, containing the found molecule IDs, in the same order.
-MassdbConn$methods( findCompoundByName = function(name) {
- stop("Method findCompoundByName() not implemented in concrete class.")
-})
-
-####################################
-# FIND SPECTRA IN GIVEN MASS RANGE #
-####################################
-# Find spectra in the given mass range.
-# rtype the type of return, objects, dfspecs data.frame of spectra, dfpeaks data.frame of peaks.
-MassdbConn$methods( searchMzRange = function(mzmin, mzmax, rtype = c("objects","dfspecs","dfpeaks")){
- stop("Method searchMzRange() not implemented in concrete class.")
-})
-
-####################################
-# FIND SPECTRA IN GIVEN MASS RANGE #
-####################################
-MassdbConn$methods( searchMzTol = function(mz, tol, tolunit=BIODB.MZTOLUNIT.PLAIN, rtype = c("objects","dfspecs","dfpeaks")){
- stop("Method searchMzTol() not implemented in concrete class.")
-})
-
-######################################################
-# FIND A MOLECULES WITH PRECURSOR WITHIN A TOLERANCE #
-######################################################
- MassdbConn$methods( searchSpecPrecTol = function(mz, tol, tolunit=BIODB.MZTOLUNIT.PLAIN, mode = NULL){
- stop("Method searchSpecPrecTol not implemented in concrete class.")
- })
-
-#################################
-#perform a database MS-MS search#
-#################################
-
-### spec : the spec to match against the database.
-### precursor : the mass/charge of the precursor to be looked for.
-### mtol : the size of the windows arounf the precursor to be looked for.
-### ppm : the matching ppm tolerance.
-### fun :
-### dmz : the mass tolerance is taken as the minium between this quantity and the ppm.
-### npmin : the minimum number of peak to detect a match (2 recommended)
-
-MassdbConn$methods( msmsSearch = function(spec, precursor, mztol, tolunit,
- ppm, fun = BIODB.MSMS.DIST.WCOSINE,
- params = list(), npmin=2, dmz = 0.001,
- mode = BIODB.MSMODE.POS, return.ids.only = TRUE){
-
-
- # TODO replace by msms precursor search when available.
- lspec <- .self$searchSpecPrecTol( precursor, mztol, BIODB.MZTOLUNIT.PLAIN, mode = mode)
- rspec <- lapply(lspec,function(x){
-        peaks <- x$getFieldValue(BIODB.PEAKS)
-
- ####Getting the correct fields
- vcomp <- c(BIODB.PEAK.MZ, BIODB.PEAK.RELATIVE.INTENSITY, BIODB.PEAK.INTENSITY)
-
- foundfields <- vcomp %in% colnames(peaks)
- if(sum(foundfields ) < 2){
- stop(paste0("fields can't be coerced to mz and intensity : ",colnames(peaks)))
- }
-
- peaks <- peaks[ , vcomp[which( foundfields ) ] ]
-
- peaks
- })
-
- # TODO Import compareSpectra into biodb and put it inside massdb-helper.R or hide it as a private method.
- res <- compareSpectra(spec, rspec, npmin = npmin, fun = fun, params = params)
-
- if(is.null(res)) return(NULL) # To decide at MassdbConn level: return empty list (or empty data frame) or NULL.
- ###Adiing the matched peaks and the smimlarity values to spectra.
-
- lret <-vector(length(lspec),mode = "list")
- vsimilarity <- numeric( length( lspec ) )
- vmatched <- vector( mode = "list", length( lspec ) )
-
- if( return.ids.only ){
-     lret <- sapply( lspec, function( x ) {
-      x$getFieldValue( BIODB.ACCESSION )
-     })
- }else{
-     ###TODO implement three types of return.
-     lret <- lspec
- }
-
- ###Reordering the list.
- lret <- lret[ res$ord ]
-
-
- return( list(measure = res$similarity[ res$ord ], matchedpeaks = res$matched [ res$ord ], id = lret))
-})

diff -r fb9c0409d85c -r f86fec07f392 Ms4TabSqlDb.R
--- a/Ms4TabSqlDb.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,351 +0,0 @@\n-if ( ! exists(\'Ms4TabSqlDb\')) { # Do not load again if already loaded\n-\n-\tlibrary(\'methods\')\n-\tsource(\'msdb-common.R\')\n-\tsource(\'MsDb.R\')\n-\t\n-\t#####################\n-\t# CLASS DECLARATION #\n-\t#####################\n-\t\n-\tMs4TabSqlDb <- setRefClass("Ms4TabSqlDb", contains = "MsDb", fields = list(.host = "character", .port = "integer", .dbname = "character", .user = "character", .password = "character", .drv = "ANY", .conn = "ANY"))\n-\t\n-\t###############\n-\t# CONSTRUCTOR #\n-\t###############\n-\t\n-\tMs4TabSqlDb$methods( initialize = function(host = NA_character_, port = NA_integer_, dbname = NA_character_, user = NA_character_, password = NA_character_, ...) {\n-\n-\t\t# Initialize members\n-\t\t.host <<- if ( ! is.null(host)) host else NA_character_\n-\t\t.port <<- if ( ! is.null(port)) port else NA_integer_\n-\t\t.dbname <<- if ( ! is.null(dbname)) dbname else NA_character_\n-\t\t.user <<- if ( ! is.null(user)) user else NA_character_\n-\t\t.password <<- if ( ! is.null(password)) password else NA_character_\n-\t\t.drv <<- NULL\n-\t\t.conn <<- NULL\n-\n-\t\tcallSuper(...)\n-\t})\n-\t\n-\t##################\n-\t# GET CONNECTION #\n-\t##################\n-\t\n-\tMs4TabSqlDb$methods( .get.connection = function() {\n-\n-\t\t# Initialize connection\n-\t\tif (is.null(.self$.conn)) {\n-\t\t\tlibrary(\'RPostgreSQL\')\n-\t\t\t.drv <<- dbDriver("PostgreSQL")\n-\t\t\t.conn <<- dbConnect(.self$.drv, host = .self$.host, port = .self$.port, dbname = .self$.dbname, user = .self$.user, password = .self$.password)\n-\t\t}\n-\n-\t\treturn(.self$.conn)\n-\t})\n-\t\n-\t##############\n-\t# SEND QUERY #\n-\t##############\n-\n-\tMs4TabSqlDb$methods( .send.query = function(query) {\n-\t\tconn <- .self$.get.connection() # Call it first separately, so library RPostgreSQL is loaded.\n-\t\trs <- try(dbSendQuery(conn, query))\n-\t\treturn(rs)\n-\t})\n-\n-\t####################\n-\t# GET MOLECULE IDS #\n-\t####################\n-\t\n-\tMs4TabSqlDb$methods( getMoleculeIds = function() {\n-\n-\t\trs <- .self$.send.query(\'select pkmol.molecule_id as id from peaklist_name as pkmol;\')\n-\t\tids <- fetch(rs,n=-1)\n-\t\tids <- ids[[\'id\']] # Get \'id\' column\n-\t\tids <- vapply(ids, function(x) { if (substring(x, 1, 1) == \'N\') as.integer(substring(x, 2)) else as.integer(x) } , FUN.VALUE = 1, USE.NAMES = FALSE)\n-\t\tids <- (sort(ids))\n-\n-\t\treturn(ids)\n-\t})\n-\n-\t####################\n-\t# GET NB MOLECULES #\n-\t####################\n-\t\n-\tMs4TabSqlDb$methods( getNbMolecules = function() {\n-\n-\t\trs <- .self$.send.query(\'select count(*) from peaklist_name;\')\n-\t\tdf <- fetch(rs,n=-1)\n-\t\tn <- df[[1]]\n-\n-\t\treturn(n)\n-\t})\n-\t\n-\t#####################\n-\t# GET MOLECULE NAME #\n-\t#####################\n-\t\n-\tMs4TabSqlDb$methods( getMoleculeName = function(molid) {\n-\n-\t\t# Build request\n-\t\twhere <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = \'N", id, "\'"), FUN.VALUE = \'\'), collapse = \' or \')\n-\t\trequest <- paste0(\'select pkmol.molecule_id as id, pkmol.name from peaklist_name as pkmol where \', where, \';\')\n-\n-\t\t# Run request\n-\t\trs <- .self$.send.query(request)\n-\t\tdf <- fetch(rs,n=-1)\n-\n-\t\t# Get IDs\n-\t\tids <- vapply(df[[\'id\']], function(x) as.integer(substring(x, 2)), FUN.VALUE = 1, USE.NAMES = FALSE)\n-\n-\t\t# Get names in the same order as the input vector\n-\t\tnames <- df[[\'name\']][order(ids)[order(molid)]]\n-\n-\t\treturn(if (is.null(names)) NA_character_ else names)\n-\t})\n-\n-\t\n-\t###############################\n-\t# GET CHROMATOGRAPHIC COLUMNS #\n-\t###############################\n-\t\n-\tMs4TabSqlDb$methods( getChromCol = function(molid = NULL) {\n-\n-\t\t# Get all columns\n-\t\tif (is.null(molid)) {\n-\t\t\trequest <- \'select name from method;\'\n-\n-\t\t# Get columns of the specified molecules\n-\t\t} else {\n-\t\t\twhere_molids <- paste0(vapply(molid, function(id) paste0("pkmol.molecule_id = \'N", id, "\'"), FUN.VALUE = \'\'), collapse = \' or \')\n-\t\t\twhere <- paste0(\'pk.name_id = pkmol.id and pk.id = pkret.id_peak and pkret.id_method = method.id and (\', where_molids, \')\')\n-\t\t\trequest <- paste0(\'select distinct method.name from method, peaklist as pk, peaklist_name as pkmol, peaklist_ret as pkret where \', where, \';\')\n-\t\t'..b'<- .self$.send.query(request)\n-\t\tdf <- fetch(rs,n=-1)\n-\n-\t\treturn(df[1,1])\n-\t})\n-\t\n-\t###############################\n-\t# GET CHROMATOGRAPHIC COLUMNS #\n-\t###############################\n-\t\n-\tMs4TabSqlDb$methods( .to.dbcols = function(col) {\n-\n-\t\t# Get all column names\n-\t\trequest <- \'select name from method;\'\n-\t\trs <- .self$.send.query(request)\n-\t\tdf <- fetch(rs,n=-1)\n-\n-\t\t# Get database column names\n-\t\tdbcols <- df[[\'name\']]\n-\t\tdbcols <- dbcols[ dbcols != \'FIA\']\n-\n-\t\t# Get normalize names\n-\t\tnormcols <- vapply(dbcols, .normalize_column_name, FUN.VALUE = \'\', USE.NAMES = FALSE)\n-\n-\t\treturn(dbcols[normcols == tolower(col)])\n-\t})\n-\t\n-\t#################\n-\t# GET MZ VALUES #\n-\t#################\n-\t\n-\t# Returns a numeric vector of all masses stored inside the database.\n-\tMs4TabSqlDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {\n-\n-\t\t# Build request\n-\t\tselect <- paste0("select distinct pk.mass as ", MSDB.TAG.MZTHEO)\n-\t\tfrom <- " from peaklist as pk"\n-\t\twhere <- ""\n-\t\tif ( ! is.null(mode))\n-\t\t\twhere <- paste0(" where ", if (mode == MSDB.TAG.POS) \'\' else \'not \', \'pk.ion_pos\')\n-\t\tlimit <- ""\n-\t\tif ( ! is.na(NA_integer_))\n-\t\t\tlimit <- paste(" limit", max.results)\n-\n-\t\t# Assemble request\n-\t\trequest <- paste0(select, from, where, \';\')\n-\n-\t\t# Run request\n-\t\trs <- .self$.send.query(request)\n-\t\tdf <- fetch(rs, n=-1)\n-\n-\t\treturn(df[[MSDB.TAG.MZTHEO]])\n-\t})\n-\n-\t##########\n-\t# SEARCH #\n-\t##########\n-\n-\tMs4TabSqlDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) {\n-\n-\t\t# Build request\n-\t\tselect <- paste0("select pkmol.molecule_id as ", MSDB.TAG.MOLID, ", pkmol.name as ", MSDB.TAG.MOLNAMES,", pk.mass as ", MSDB.TAG.MZTHEO, ", pk.composition as ", MSDB.TAG.COMP,", pk.attribution as ", MSDB.TAG.ATTR)\n-\t\tfrom <- " from peaklist as pk, peaklist_name as pkmol"\n-\t\twhere <- paste0(" where pkmol.id = pk.name_id and pk.mass >= ", mz.low, " and pk.mass <= ", mz.high)\n-\t\twhere <- paste0(where, \' and \', if (mode == MSDB.TAG.POS) \'\' else \'not \', \'pk.ion_pos\')\n-\n-\t\t# Insert where clause on attribs\n-\t\tif ( ! is.null(attribs)) {\n-\t\t\twhere.attribs <- paste0(vapply(attribs, function(a) paste0("pk.attribution = \'", a, "\'"), FUN.VALUE = \'\', USE.NAMES = FALSE), collapse = " or ")\n-\t\t\twhere <- paste0(where, \' and (\', where.attribs, \')\')\n-\t\t}\n-\n-\t\t# Insert where clause on molids\n-\t\tif ( ! is.null(molids)) {\n-\t\t\twhere.molids <- paste0(vapply(molids, function(id) paste0("pkmol.molecule_id = \'N", id, "\'"), FUN.VALUE = \'\'), collapse = \' or \')\n-\t\t\twhere <- paste0(where, \' and (\', where.molids, \')\')\n-\t\t}\n-\n-\t\t# Insert where clause on columns\n-\t\tif ( ! is.null(col)) {\n-\t\t\tdbcols <- .self$.to.dbcols(col)\n-\t\t\tif ( ! is.null(dbcols)) {\n-\n-\t\t\t\t# Can\'t find specified columns\n-\t\t\t\tif (length(dbcols) == 0 && length(col) > 0)\n-\t\t\t\t\treturn(.get.empty.result.df(rt = TRUE))\n-\n-\t\t\t\tselect <- paste0(select, ", (60 * pkret.retention) as ", MSDB.TAG.COLRT, ", method.name as ", MSDB.TAG.COL)\n-\t\t\t\tfrom <- paste0(from, ", method, peaklist_ret as pkret")\n-\t\t\t\twhere.cols <- if (length(dbcols) == 0) \'TRUE\' else paste0(vapply(dbcols, function(c) paste0("method.name = \'", c, "\'"), FUN.VALUE = \'\', USE.NAMES = FALSE), collapse = " or ")\n-\t\t\t\twhere <- paste0(where, " and pk.id = pkret.id_peak and pkret.id_method = method.id and (", where.cols, ")")\n-\t\t\t\tif (! is.null(rt.low) && ! is.null(rt.high))\n-\t\t\t\t\twhere <- paste0(where, " and pkret.retention * 60 >= ", rt.low, " and pkret.retention * 60 <= ", rt.high)\n-\t\t\t}\n-\t\t}\n-\n-\t\t# Assemble request\n-\t\trequest <- paste0(select, from, where, \';\')\n-\n-\t\t# Run request\n-\t\trs <- .self$.send.query(request)\n-\t\tdf <- fetch(rs,n=-1)\n-\n-\t\t# No results\n-\n-\t\t# Remove N prefix from IDs\n-\t\tif (nrow(df) > 0)\n-\t\t\tdf[[MSDB.TAG.MOLID]] <- vapply(df[[MSDB.TAG.MOLID]], function(x) substring(x, 2), FUN.VALUE = \'\', USE.NAMES = FALSE)\n-\t\telse if (nrow(df) == 0)\n-\t\t\tdf <- .get.empty.result.df(rt = ! is.null(col))\n-\n-\t\treturn(df)\n-\t})\n-\t\n-} # end of load safe guard\n'

diff -r fb9c0409d85c -r f86fec07f392 MsBioDb.R
--- a/MsBioDb.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,100 +0,0 @@
-if ( ! exists('MsBioDb')) { # Do not load again if already loaded
-
- library(methods)
- source('MsDb.R')
- source('BiodbObject.R', chdir = TRUE)
- source('BiodbFactory.R', chdir = TRUE)
-
- #####################
- # CLASS DECLARATION #
- #####################
-
- MsBioDb <- setRefClass("MsBioDb", contains = "MsDb", fields = list(.massdb = "ANY"))
-
- ###############
- # CONSTRUCTOR #
- ###############
-
- MsBioDb$methods( initialize = function(massdb = NULL, ...) {
-
- # Check bio database
- ! is.null(massdb) || stop("You must set a bio database.")
- inherits(massdb, "MassdbConn") || stop("The bio database must inherit from MassdbConn class.")
- .massdb <<- massdb
-
- callSuper(...)
- })
-
- ####################
- # HANDLE COMPOUNDS #
- ####################
-
- MsBioDb$methods( handleCompounds = function() {
- return(.self$.massdb$handlesEntryType(BIODB.COMPOUND))
- })
-
- ####################
- # GET MOLECULE IDS #
- ####################
-
- MsBioDb$methods( getMoleculeIds = function(max.results = NA_integer_) {
- return(.self$.massdb$getEntryIds(type = BIODB.COMPOUND, max.results = max.results))
- })
-
- ####################
- # GET NB MOLECULES #
- ####################
-
- MsBioDb$methods( getNbMolecules = function() {
- return(.self$.massdb$getNbEntries(type = BIODB.COMPOUND))
- })
-
- #################
- # GET MZ VALUES #
- #################
-
- MsBioDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {
- return(.self$.massdb$getMzValues(mode = mode, max.results = max.results))
- })
-
- #####################
- # GET MOLECULE NAME #
- #####################
-
- MsBioDb$methods( getMoleculeName = function(molid) {
- return(.self$.massdb$getMoleculeName(molid))
- })
-
- ###############################
- # GET CHROMATOGRAPHIC COLUMNS #
- ###############################
-
- MsBioDb$methods( getChromCol = function(molid = NULL) {
- return(.self$.massdb$getChromCol(molid))
- })
-
- ################
- # FIND BY NAME #
- ################
-
- MsBioDb$methods( findByName = function(name) {
- return(.self$.massdb$findCompoundByName(name))
- })
-
- #######################
- # GET RETENTION TIMES #
- #######################
-
- MsBioDb$methods( getRetentionTimes = function(molid, col = NA_character_) {
- return(.self$.massdb$getRetentionTimes(molid, chrom.cols = col))
- })
-
- ################
- # GET NB PEAKS #
- ################
-
- MsBioDb$methods( getNbPeaks = function(molid = NA_integer_, mode = NA_character_) {
- return(.self$.massdb$getNbPeaks(compound.ids = molid, mode = mode))
- })
-
-}

diff -r fb9c0409d85c -r f86fec07f392 MsDb.R
--- a/MsDb.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,500 +0,0 @@\n-if ( ! exists(\'MsDb\')) { # Do not load again if already loaded\n-\t\n-\tlibrary(\'methods\')\n-\tsource(\'msdb-common.R\')\n-\tsource(\'MsDbObserver.R\')\n-\tsource(\'MsDbOutputStream.R\')\n-\n-\t#####################\n-\t# CLASS DECLARATION #\n-\t#####################\n-\t\n-\tMsDb <- setRefClass("MsDb", fields = list(.observers = "ANY", .prec = "list", .output.streams = "ANY", .input.stream = "ANY", .mz.tol.unit = "character", .rt.unit = "character"))\n-\t\n-\t###############\n-\t# CONSTRUCTOR #\n-\t###############\n-\t\n-\tMsDb$methods( initialize = function(...) {\n-\t\t\n-\t\tcallSuper(...)\n-\n-\t\t.observers <<- NULL\n-\t\t.output.streams <<- NULL\n-\t\t.input.stream <<- NULL\n-\t\t.prec <<- MSDB.DFT.PREC\n-\t\t.mz.tol.unit <<- MSDB.DFT.MZTOLUNIT\n-\t\t.rt.unit <<- MSDB.RTUNIT.SEC\n-\t})\n-\n-\t####################\n-\t# SET INPUT STREAM #\n-\t####################\n-\t\n-\tMsDb$methods( setInputStream = function(stream) {\n-\t\n-\t\t# Check types of input stream\n-\t\tif ( ! inherits(stream, "MsDbInputStream") && ! is.null(stream))\n-\t\t\tstop("Input stream must inherit from MsDbInputStream class.")\n-\t\n-\t\t# Save current stream\n-\t\tcur.stream <- .self$.input.stream\n-\n-\t\t# Set stream\n-\t\t.input.stream <<- stream\n-\n-\t\treturn(cur.stream)\n-\t})\n-\n-\t######################\n-\t# ADD OUTPUT STREAMS #\n-\t######################\n-\t\n-\tMsDb$methods( addOutputStreams = function(stream) {\n-\t\n-\t\t# Check types of output streams\n-\t\tif ( ( ! is.list(stream) && ! inherits(stream, "MsDbOutputStream")) || (is.list(stream) && any( ! vapply(stream, function(s) inherits(s, "MsDbOutputStream"), FUN.VALUE = TRUE))))\n-\t\t\tstop("Output streams must inherit from MsDbOutputStream class.")\n-\t\n-\t\t# Add streams to current list\n-\t\t.output.streams <<- if (is.null(.self$.output.streams)) c(stream) else c(.self$.output.streams, stream)\n-\t})\n-\n-\t#########################\n-\t# REMOVE OUTPUT STREAMS #\n-\t#########################\n-\t\n-\tMsDb$methods( removeOutputStreams = function(stream) {\n-\t\n-\t\t# Check types of output streams\n-\t\tif ( ( ! is.list(stream) && ! inherits(stream, "MsDbOutputStream")) || (is.list(stream) && any( ! vapply(stream, function(s) inherits(s, "MsDbOutputStream"), FUN.VALUE = TRUE))))\n-\n-\t\t# Remove streams from current list\n-\t\t.output.streams <<- .self$.output.streams[ ! stream %in% .self$.output.streams]\n-\t})\n-\n-\t########################\n-\t# RESET OUTPUT STREAMS #\n-\t########################\n-\t\n-\tMsDb$methods( resetOutputStreams = function(stream) {\n-\t\t.output.streams <<- NULL\n-\t})\n-\n-\t#################\n-\t# ADD OBSERVERS #\n-\t#################\n-\t\n-\tMsDb$methods( addObservers = function(obs) {\n-\t\n-\t\t# Check types of observers\n-\t\tif ( ( ! is.list(obs) && ! inherits(obs, "MsDbObserver")) || (is.list(obs) && any( ! vapply(obs, function(o) inherits(o, "MsDbObserver"), FUN.VALUE = TRUE))))\n-\t\t\tstop("Observers must inherit from MsDbObserver class.")\n-\t\n-\t\t# Add observers to current list\n-\t\t.observers <<- if (is.null(.self$.observers)) c(obs) else c(.self$.observers, obs)\n-\t})\n-\t\n-\t##################\n-\t# SET PRECURSORS #\n-\t##################\n-\t\n-\tMsDb$methods( setPrecursors = function(prec) {\n-\t\t.prec <<- prec\n-\t})\n-\t\n-\t#################\n-\t# SET DB FIELDS #\n-\t#################\n-\t\n-\tMsDb$methods( areDbFieldsSettable = function() {\n-\t\treturn(FALSE)\n-\t})\n-\t\n-\tMsDb$methods( setDbFields = function(fields) {\n-\t\tstop("Method setDbFields() not implemented in concrete class.")\n-\t})\n-\t\n-\t################\n-\t# SET MS MODES #\n-\t################\n-\t\n-\tMsDb$methods( areDbMsModesSettable = function() {\n-\t\treturn(FALSE)\n-\t})\n-\t\n-\tMsDb$methods( setDbMsModes = function(modes) {\n-\t\tstop("Method setDbMsModes() not implemented in concrete class.")\n-\t})\n-\t\n-\t###################\n-\t# SET MZ TOL UNIT #\n-\t###################\n-\n-\tMsDb$methods( setMzTolUnit = function(mztolunit) {\n-\n-\t\tif ( ! mztolunit %in% MSDB.MZTOLUNIT.VALS)\n-\t\t\tstop(paste0("M/Z tolerance unit must be one of: ", paste(MSDB.MZTOLUNIT.VALS, collapse = \', \'), "."))\n-\n-\t\t.mz.tol.unit <<- mztolunit\n-\t})\n-\n-\t###############\n-\t# SET RT UNIT #\n-\t###############\n-\n-\tMsDb$methods( setRtUnit = fun'..b'x.lines <- x[i, x.cols, drop = FALSE]\n-#\t\t\t\tx.lines <- rename.col(x.lines, unlist(x.colnames), unlist(.self$.output.fields[names(x.colnames)]))\n-#\t\t\t\tif (nrow(results) == 0) {\n-#\t\t\t\t\ty[r, colnames(x.lines)] <- x.lines\n-#\t\t\t\t}\n-#\t\t\t\telse {\n-#\t\t\t\t\tif (same.rows) {\n-#\t\t\t\t\t\ty[r, colnames(x.lines)] <- x.lines\n-#\t\t\t\t\t\tids <- results[[MSDB.TAG.MOLID]]\n-#\t\t\t\t\t\tids <- ids[ ! duplicated(ids)] # Remove duplicated values\n-#\t\t\t\t\t\ty[r, MSDB.TAG.msmatching] <- paste(ids, collapse = .self$.molids.sep)\n-#\t\t\t\t\t}\n-#\t\t\t\t\tif ( ! same.rows || peak.table) {\n-#\t\t\t\t\t\tnew.rows <- cbind(x.lines, results, row.names = NULL)\n-#\t\t\t\t\t\tif ( ! same.rows) {\n-#\t\t\t\t\t\t\trows <- r:(r+nrow(results)-1)\n-#\t\t\t\t\t\t\ty[rows, colnames(new.rows)] <- new.rows\n-#\t\t\t\t\t\t}\n-#\t\t\t\t\t\tif (peak.table) {\n-#\t\t\t\t\t\t\tzr <- nrow(z) + 1\n-#\t\t\t\t\t\t\tzrows <- zr:(zr+nrow(results)-1)\n-#\t\t\t\t\t\t\tz[zrows, colnames(new.rows)] <- new.rows\n-#\t\t\t\t\t\t}\n-#\t\t\t\t\t}\n-#\t\t\t\t}\n-\t\t}\n-\n-#\t\tresults <- if (peak.table) list(main = y, peaks = z) else y\n-\n-#\t\treturn(results)\n-\t\treturn(peaks)\n-\t})\n-\n-\t# rt Retention time in seconds.\n-\t# molids An option vector of molecule IDs, used to restrict the search.\n-\tMsDb$methods( searchForMzRtTols = function(mode, mz, rt = NULL, shift = NULL, prec = NULL, col = NULL, rt.tol = NULL, rt.tol.x = NULL, rt.tol.y = NULL, attribs = NULL, molids = NULL, molids.rt.tol = NULL, colnames = MSDB.DFT.INPUT.FIELDS) {\n-\n-\t\t# Set M/Z bounds\n-\t\tif (.self$.mz.tol.unit == MSDB.MZTOLUNIT.PPM) {\n-\t\t\tmz.low <- mz * (1 + (- shift - prec) * 1e-6)\n-\t\t\tmz.high <- mz * (1 + (- shift + prec) * 1e-6)\n-\t\t}\n-\t\telse { # PLAIN\n-\t\t\tmz.low <- mz - shift - prec\n-\t\t\tmz.high <- mz - shift + prec\n-\t\t}\n-\n-\t\t# Set retention time bounds\n-\t\trt.low <- NULL\n-\t\trt.high <- NULL\n-\t\tif ( ! is.null(rt.tol)) {\n-\t\t\tlow <- rt - rt.tol\n-\t\t\thigh <- rt + rt.tol\n-\t\t\trt.low <- if (is.null(rt.low)) low else max(low, rt.low)\n-\t\t\trt.high <- if (is.null(rt.high)) high else min(high, rt.high)\n-\t\t}\n-\t\tif ( ! is.null(rt.tol.x)) {\n-\t\t\tlow <- rt - rt.tol.x - rt ^ rt.tol.y\n-\t\t\thigh <- rt + rt.tol.x + rt ^ rt.tol.y\n-\t\t\trt.low <- if (is.null(rt.low)) low else max(low, rt.low)\n-\t\t\trt.high <- if (is.null(rt.high)) high else min(high, rt.high)\n-\t\t}\n-\n-\t\t# List molecule IDs\n-\t\tif ( ! is.null(molids.rt.tol) && is.data.frame(molids)) {\n-\t\t\tids <- molids[(rt >= molids[[MSDB.TAG.COLRT]] - molids.rt.tol) & (rt <= molids[[MSDB.TAG.COLRT]] + molids.rt.tol), MSDB.TAG.MOLID]\n-\t\t\tif (length(ids) == 0)\n-\t\t\t\t# No molecule ID match for this retention time\n-\t\t\t\treturn(data.frame()) # return empty result set\n-\t\t} else {\n-\t\t\tids <- molids\n-\t\t}\n-\n-\t\treturn(.self$searchForMzRtBounds(mode,\n-\t\t\t\t\t\t\t\t\t\t mz.low = mz * (1 + (- shift - prec) * 1e-6),\n-\t\t\t\t\t\t\t\t\t\t mz.high = mz * (1 + (- shift + prec) * 1e-6),\n-\t\t\t\t\t\t\t\t\t\t rt.low = rt.low,\n-\t\t\t\t\t\t\t\t\t\t rt.high = rt.high,\n-\t\t\t\t\t\t\t\t\t\t col = col,\n-\t\t\t\t\t\t\t\t\t\t attribs = attribs,\n-\t\t\t\t\t\t\t\t\t\t molids = ids))\n-\t})\n-\n-\t# rt.low Lower bound of the retention time in seconds.\n-\t# rt.high Higher bound of the retention time in seconds.\n-\tMsDb$methods( searchForMzRtBounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) {\n-\n-\t\tresults <- .self$.do.search.for.mz.rt.bounds(mode = mode, mz.low = mz.low, mz.high = mz.high, rt.low = rt.low, rt.high = rt.high, col = col, attribs = attribs, molids = molids)\n-\n-\t\treturn(results)\n-\t})\n-\n-\t# TODO Write description of output: data frame with which columns ?\n-\tMsDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) {\n-\t\tstop("Method .do.search.for.mz.rt.bounds() not implemented in concrete class.")\n-\t})\n-\n-\t# DEPRECATED\n-\tMsDb$methods( searchForMz = function(x, mode, tol = 5, col = NULL, rt.tol.x = 5, rt.tol.y = 0.80) {\n-\t\twarning("Method searchForMz() is deprecated. Use searchForMzRtList() instead.")\n-\t\t.self$searchForMzRtList(x = x, mode = mode, prec = tol, col = col, rt.tol.x = rt.tol.x, rt.tol.y = rt.tol.y)\n-\t})\n-\n-} # end of load safe guard\n'

diff -r fb9c0409d85c -r f86fec07f392 MsDbChecker.R
--- a/MsDbChecker.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,44 +0,0 @@
-if ( ! exists('MsDbChecker')) { # Do not load again if already loaded
-
- source('MsDbObserver.R')
-
- #####################
- # CLASS DECLARATION #
- #####################
-
- MsDbChecker <- setRefClass("MsDbChecker", contains = 'MsDbObserver', fields = list(.fail = 'logical'))
-
- ###############
- # CONSTRUCTOR #
- ###############
-
- # fail If set to TRUE, will fail (i.e.: quit application with a status set to 1) on error.
- MsDbChecker$methods( initialize = function(fail = FALSE, ...) {
-
- .fail <<- if ( ! is.null(fail) && ! is.na(fail)) fail else FALSE
-
- callSuper(...) # calls super-class initializer with remaining parameters
- })
-
- ###########
- # WARNING #
- ###########
-
- MsDbChecker$methods( warning = function(msg) {
- write(paste('WARNING: ', msg), stderr())
- })
-
- #########
- # ERROR #
- #########
-
- MsDbChecker$methods( error = function(msg) {
-
- write(paste('ERROR:', msg), stderr())
-
- # Fail
- if (.self$.fail)
- quit(status = 1)
- })
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 MsDbInputDataFrameStream.R
--- a/MsDbInputDataFrameStream.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,99 +0,0 @@
-if ( ! exists('MsDbInputDataFrameStream')) { # Do not load again if already loaded
-
- library(methods)
- source('MsDbInputStream.R')
-
- #####################
- # CLASS DECLARATION #
- #####################
-
- MsDbInputDataFrameStream <- setRefClass("MsDbInputDataFrameStream", contains = 'MsDbInputStream', fields = list( .df = "ANY", .i = "integer", .rtunit = 'character'))
-
- ###############
- # CONSTRUCTOR #
- ###############
-
- MsDbInputDataFrameStream$methods( initialize = function(df = data.frame(), input.fields = msdb.get.dft.input.fields(), rtunit = MSDB.RTUNIT.SEC, ...) {
-
- callSuper(input.fields = input.fields, ...)
-
- .df <<- df
- .i <<- 0L
- .rtunit <<- rtunit
- })
-
- ##########
- # GET MZ #
- ##########
-
- MsDbInputDataFrameStream$methods( getMz = function() {
-
- if (.self$.i > 0 && .self$.i <= nrow(.self$.df) && ! is.null(.self$.input.fields[[MSDB.TAG.MZ]]))
- return(.self$.df[.self$.i, .self$.input.fields[[MSDB.TAG.MZ]]])
-
- return(NULL)
- })
-
- ##########
- # GET RT #
- ##########
-
- MsDbInputDataFrameStream$methods( getRt = function() {
-
- rt <- NULL
-
- if (.self$.i > 0 && .self$.i <= nrow(.self$.df) && ! is.null(.self$.input.fields[[MSDB.TAG.RT]])) {
- rt <- .self$.df[.self$.i, .self$.input.fields[[MSDB.TAG.RT]]]
- if (.self$.rtunit == MSDB.RTUNIT.MIN)
- rt <- rt * 60
- }
-
- return(rt)
- })
-
- ###########
- # GET ALL #
- ###########
-
- MsDbInputDataFrameStream$methods( getAll = function(but = NULL) {
-
- if (.self$.i > 0 && .self$.i <= nrow(.self$.df)) {
-
- vals <- .self$.df[.self$.i, , drop = FALSE]
-
- if ( ! is.null(but))
- vals <- vals[, ! colnames(vals) %in% .self$.input.fields[but], drop = FALSE]
-
- return(vals)
- }
-
- return(NULL)
- })
-
- ###############
- # NEXT VALUES #
- ###############
-
- MsDbInputDataFrameStream$methods( nextValues = function() {
-
- if (.self$.i <= nrow(.self$.df))
- .i <<- .self$.i + 1L
- })
-
- ###################
- # HAS NEXT VALUES #
- ###################
-
- MsDbInputDataFrameStream$methods( hasNextValues = function() {
- return(.self$.i < nrow(.self$.df))
- })
-
- #########
- # RESET #
- #########
-
- MsDbInputDataFrameStream$methods( reset = function() {
- .i <<- 0L
- })
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 MsDbInputStream.R
--- a/MsDbInputStream.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,63 +0,0 @@
-if ( ! exists('MsDbInputStream')) { # Do not load again if already loaded
-
- library('methods')
- source('msdb-common.R')
-
- #####################
- # CLASS DECLARATION #
- #####################
-
- MsDbInputStream <- setRefClass("MsDbInputStream", fields = list(.input.fields = "ANY"))
-
- ###############
- # CONSTRUCTOR #
- ###############
-
- MsDbInputStream$methods( initialize = function(input.fields = msdb.get.dft.input.fields(), ...) {
-
- .input.fields <<- input.fields
-
- callSuper(...)
- })
-
- ##########
- # GET MZ #
- ##########
-
- MsDbInputStream$methods( getMz = function() {
- stop("Method getMz() not implemented in concrete class.")
- })
-
- ##########
- # GET RT #
- ##########
-
- MsDbInputStream$methods( getRt = function() {
- stop("Method getRt() not implemented in concrete class.")
- })
-
- ###########
- # GET ALL #
- ###########
-
- MsDbInputStream$methods( getAll = function(but = NULL) {
- stop("Method getUnused() not implemented in concrete class.")
- })
-
- ###############
- # NEXT VALUES #
- ###############
-
- MsDbInputStream$methods( nextValues = function() {
- stop("Method nextValues() not implemented in concrete class.")
- })
-
- ###################
- # HAS NEXT VALUES #
- ###################
-
- MsDbInputStream$methods( hasNextValues = function() {
- stop("Method hasNextValues() not implemented in concrete class.")
- })
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 MsDbLogger.R
--- a/MsDbLogger.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,32 +0,0 @@
-if ( ! exists('MsDbLogger')) { # Do not load again if already loaded
-
- source('MsDbObserver.R')
-
- #####################
- # CLASS DECLARATION #
- #####################
-
- MsDbLogger <- setRefClass("MsDbLogger", contains = 'MsDbObserver', fields = list(.verbose = 'numeric', .file = 'ANY' ))
-
- ###############
- # CONSTRUCTOR #
- ###############
-
- MsDbLogger$methods( initialize = function(verbose = 1, file = NULL, ...) {
-
- .verbose <<- if ( ! is.null(verbose) && ! is.na(verbose)) verbose else 1
- .file <<- if ( ! is.null(file) && ! is.na(file)) file else stderr()
-
- callSuper(...) # calls super-class initializer with remaining parameters
- })
-
- ############
- # PROGRESS #
- ############
-
- MsDbLogger$methods( progress = function(msg, level = 1) {
- if (.self$.verbose >= level)
- cat(msg, "\n", sep = '', file = .self$.file)
- })
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 MsDbObserver.R
--- a/MsDbObserver.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,32 +0,0 @@
-if ( ! exists('MsDbObserver')) { # Do not load again if already loaded
-
- library('methods')
-
- #####################
- # CLASS DECLARATION #
- #####################
-
- MsDbObserver <- setRefClass("MsDbObserver", fields = list())
-
- ############
- # PROGRESS #
- ############
-
- MsDbObserver$methods( progress = function(msg, level = 1) {
- })
-
- ###########
- # WARNING #
- ###########
-
- MsDbObserver$methods( warning = function(msg) {
- })
-
- #########
- # ERROR #
- #########
-
- MsDbObserver$methods( error = function(msg) {
- })
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 MsDbOutputDataFrameStream.R
--- a/MsDbOutputDataFrameStream.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,145 +0,0 @@
-if ( ! exists('MsDbOutputDataFrameStream')) { # Do not load again if already loaded
-
- library(methods)
- source('MsDbOutputStream.R')
- source('dfhlp.R', chdir = TRUE)
-
- #####################
- # CLASS DECLARATION #
- #####################
-
- MsDbOutputDataFrameStream <- setRefClass("MsDbOutputDataFrameStream", contains = 'MsDbOutputStream', fields = list( .df = "ANY", .output.fields = "ANY"))
-
- ###############
- # CONSTRUCTOR #
- ###############
-
- MsDbOutputDataFrameStream$methods( initialize = function(keep.unused = FALSE, one.line = FALSE, match.sep = MSDB.DFT.MATCH.SEP, output.fields = NULL, multval.field.sep = MSDB.DFT.OUTPUT.MULTIVAL.FIELD.SEP, first.val = FALSE, ascii = FALSE, noapostrophe = FALSE, noplusminus = FALSE, nogreek = FALSE, ...) {
-
- callSuper(keep.unused = keep.unused, one.line = one.line, match.sep = match.sep, multval.field.sep = multval.field.sep, first.val = first.val, ascii = ascii, noapostrophe = noapostrophe, noplusminus = noplusminus, nogreek = nogreek, ...)
-
- .df <<- data.frame()
- .output.fields <<- output.fields
- })
-
- ##################
- # GET DATA FRAME #
- ##################
-
- MsDbOutputDataFrameStream$methods( getDataFrame = function() {
-
- # Put at least a column name if empty
- if (nrow(.self$.df) == 0)
- .self$.df[[.self$.output.fields[[MSDB.TAG.MZ]]]] <- numeric()
-
- return(.self$.df)
- })
-
- # Move columns to beginning {{{1
-
- MsDbOutputDataFrameStream$methods( moveColumnsToBeginning = function(cols) {
- all.cols <- colnames(.self$.df)
- other.cols <- all.cols[ ! all.cols %in% cols]
- cols <- cols[cols %in% all.cols]
- .df <<- .self$.df[c(cols, other.cols)]
- })
-
- #################
- # MATCHED PEAKS #
- #################
-
- MsDbOutputDataFrameStream$methods( matchedPeaks = function(mz, rt = NULL, unused = NULL, peaks = NULL) {
-
- library(plyr)
-
- # Set input values
- x <- data.frame(mz = mz)
- colnames(x) <- MSDB.TAG.MZ
- if ( ! is.null(rt)) {
- x.rt <- data.frame(rt = rt)
- colnames(x.rt) <- MSDB.TAG.RT
- if (.self$.rtunit == MSDB.RTUNIT.MIN)
- x.rt[[MSDB.TAG.RT]] <- x.rt[[MSDB.TAG.RT]] / 60
- x <- cbind(x, x.rt)
- }
-
-
- # Merge input values with matched peaks
- if ( ! is.null(peaks)) {
-
- # No rows
- if (nrow(peaks) == 0) {
- # Add NA values
- peaks[1, ] <- NA
-
- # Process existing rows
- } else {
-
- # Convert RT
- if (.self$.rtunit == MSDB.RTUNIT.MIN)
- if (MSDB.TAG.COLRT %in% colnames(peaks))
- peaks[[MSDB.TAG.COLRT]] <- peaks[[MSDB.TAG.COLRT]] / 60
-
- # Process multi-value fields
- for (c in colnames(peaks))
- if (c %in% MSDB.MULTIVAL.FIELDS) {
-
- # Keep only first value in multi-value fields
- if (.self$.first.val)
- peaks[[c]] <- vapply(peaks[[c]], function(s) split.str(s, sep = MSDB.MULTIVAL.FIELD.SEP, unlist = TRUE)[[1]], FUN.VALUE = '')
-
- # Change separator
- else
- peaks[[c]] <- vapply(peaks[[c]], function(s) paste0(split.str(s, sep = MSDB.MULTIVAL.FIELD.SEP, unlist = TRUE), collapse = .self$.multval.field.sep), FUN.VALUE = '')
-
- }
-
- # Concatenate results in one line
- if (.self$.one.line) {
- # For each column, concatenate all values in one string.
- for (c in seq(peaks)) {
- v <- peaks[[c]]
- v <- v[ ! is.na(v)] # remove NA values
- v <- v[ ! duplicated(v)] # remove duplicates
- peaks[1, c] <- paste0(v, collapse = .self$.match.sep, FUN.VALUE = '')
- }
- peaks <- peaks[1, ] # Keep only first line
- }
- }
-
- # Merge
- x <- cbind(x, peaks, row.names = NULL)
- }
-
- # Rename columns for output
- x <- rename.col(x, names(.self$.output.fields), .self$.output.fields)
-
- # Add unused columns
- if ( .self$.keep.unused && ! is.null(unused)) {
- x <- cbind(x, unused, row.names = NULL)
- }
-
- # Convert strings to ASCII
- if (.self$.ascii || .self$.noapostrophe || .self$.noplusminus || .self$.nogreek)
- for (c in seq(x))
- if (class(x[[c]]) == 'character') {
- if (.self$.noapostrophe)
- x[[c]] <- gsub("'", 'prime', x[[c]], perl = TRUE)
- if (.self$.noplusminus)
- x[[c]] <- gsub('±', '+-', x[[c]], perl = TRUE)
- if (.self$.nogreek) {
- x[[c]] <- gsub('α', 'alpha', x[[c]], perl = TRUE)
- x[[c]] <- gsub('β', 'beta', x[[c]], perl = TRUE)
- x[[c]] <- gsub('γ', 'gamma', x[[c]], perl = TRUE)
- x[[c]] <- gsub('δ', 'delta', x[[c]], perl = TRUE)
- }
- if (.self$.ascii) {
- x[[c]] <- gsub('[^\u0001-\u007F]', '_', x[[c]], perl = TRUE)
- }
- }
-
- # Add new rows to data frame
- .df <<- rbind.fill(.self$.df, x)
- })
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 MsDbOutputStream.R
--- a/MsDbOutputStream.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,47 +0,0 @@
-if ( ! exists('MsDbOutputStream')) { # Do not load again if already loaded
-
- library('methods')
- source('msdb-common.R')
-
- #####################
- # CLASS DECLARATION #
- #####################
-
- MsDbOutputStream <- setRefClass("MsDbOutputStream", fields = list(.keep.unused = "logical", .one.line = "logical", .match.sep = "character", .multval.field.sep = "character", .first.val = "logical", .ascii = "logical", .noapostrophe = "logical", .noplusminus = "logical", .nogreek = "logical", .rtunit = 'character'))
-
- ###############
- # CONSTRUCTOR #
- ###############
-
- #' Constructor.
- #'
- #' @param keep.unused Set to \code{TRUE} if you want to keep in the output, unused columns of the input.
- #' @param one.line Set to \code{TRUE} if you want to output only one line for each input line.
- #' @return
- #' @examples
- #' stream <- MsDbOutputDataFrameStream$new(one.line = TRUE)
- MsDbOutputStream$methods( initialize = function(keep.unused = FALSE, one.line = FALSE, match.sep = MSDB.DFT.MATCH.SEP, multval.field.sep = MSDB.DFT.OUTPUT.MULTIVAL.FIELD.SEP, first.val = FALSE, ascii = FALSE, noapostrophe = FALSE, noplusminus = FALSE, nogreek = FALSE, rtunit = MSDB.RTUNIT.SEC, ...) {
-
- callSuper(...)
-
- .keep.unused <<- keep.unused
- .one.line <<- one.line
- .match.sep <<- match.sep
- .multval.field.sep <<- multval.field.sep
- .first.val <<- first.val
- .ascii <<- ascii
- .noapostrophe <<- noapostrophe
- .noplusminus <<- noplusminus
- .nogreek <<- nogreek
- .rtunit <<- rtunit
- })
-
- #################
- # MATCHED PEAKS #
- #################
-
- MsDbOutputStream$methods( matchedPeaks = function(mz, rt = NULL, unused = NULL, peaks = NULL) {
- stop("Method matchedPeaks() not implemented in concrete class.")
- })
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 MsFileDb.R
--- a/MsFileDb.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,485 +0,0 @@\n-if ( ! exists(\'MsFileDb\')) { # Do not load again if already loaded\n-\n-\tlibrary(\'methods\')\n-\tsource(\'MsDb.R\')\n-\tsource(\'msdb-common.R\')\n-\tsource(\'search.R\', chdir = TRUE)\n-\n-\t#####################\n-\t# CLASS DECLARATION #\n-\t#####################\n-\t\n-\tMsFileDb <- setRefClass("MsFileDb", contains = "MsDb", fields = list(.file = "character", .db = "ANY", .fields = "list", .modes = "list", .name.to.id = "ANY"))\n-\t\n-\t###############\n-\t# CONSTRUCTOR #\n-\t###############\n-\t\n-\tMsFileDb$methods( initialize = function(file = NA_character_, ...) {\n-\n-\t\t# Initialize members\n-\t\t.file <<- if ( ! is.null(file)) file else NA_character_\n-\t\t.db <<- NULL\n-\t\t.fields <<- msdb.get.dft.db.fields()\n-\t\t.modes <<- MSDB.DFT.MODES\n-\t\t.name.to.id <<- NULL\n-\t\n-\t\tcallSuper(...)\n-\t})\n-\t\n-\t#################\n-\t# SET DB FIELDS #\n-\t#################\n-\t\n-\tMsFileDb$methods( areDbFieldsSettable = function() {\n-\t\treturn(TRUE)\n-\t})\n-\t\n-\tMsFileDb$methods( setDbFields = function(fields) {\n-\t\t.fields <<- as.list(fields)\n-\t})\n-\t\n-\t################\n-\t# CHECK FIELDS #\n-\t################\n-\t\n-\tMsFileDb$methods( .check.fields = function(fields) {\n-\n-\t\tif (is.null(fields))\n-\t\t\tstop("No fields specified for .check.fields()")\n-\n-\t\t# Check that fields are defined in the fields list\n-\t\tunknown <- fields[ ! fields %in% names(.self$.fields)]\n-\t\tif (length(unknown) > 0)\n-\t\t\tstop(paste0("Database field", if (length(unknown) == 1) "" else "s", " \\"", paste(unkown, collapse = ", "), "\\" ", if (length(unknown) == 1) "is" else "are", " not defined."))\n-\n-\t\t# Check that field values are real columns inside the database\n-\t\t.self$.init.db()\n-\t\tdb.col.names <- fields #vapply(fields, function(s) .self$.fields[[s]], FUN.VALUE = \'\')\n-\t\tunknown.cols <- db.col.names[ ! db.col.names %in% colnames(.self$.db)]\n-\t\tif (length(unknown.cols) > 0)\n-\t\t\tstop(paste0("Column", if (length(unknown.cols) == 1) "" else "s", " \\"", paste(unknown.cols, collapse = ", "), "\\" ", if (length(unknown.cols) == 1) "is" else "are", " not defined inside the database \\"", .self$.file, "\\"."))\n-\t})\n-\n-\t################\n-\t# SET MS MODES #\n-\t################\n-\t\n-\tMsFileDb$methods( areDbMsModesSettable = function() {\n-\t\treturn(TRUE)\n-\t})\n-\t\n-\tMsFileDb$methods( setDbMsModes = function(modes) {\n-\t\t.modes <<- as.list(modes)\n-\t})\n-\t\n-\t###########\n-\t# INIT DB #\n-\t###########\n-\n-\tMsFileDb$methods( .init.db = function() {\n-\n-\t\tif (is.null(.self$.db)) {\n-\n-\t\t\t# Load database\n-\t\t\t.db <<- read.table(.self$.file, sep = "\\t", quote = "\\"", header = TRUE, stringsAsFactors = FALSE, row.names = NULL, check.names = FALSE, comment.char = \'\')\n-\n-\t\t\t# Check that colnames are unique\n-\t\t\tdupcol <- duplicated(colnames(.self$.db))\n-\t\t\tif (any(dupcol))\n-\t\t\t\tstop(paste("Database header contains duplicated names: ", paste(unique(colnames(.self$.db)[dupcol]), collapse = \', \'), "."))\n-\n-\t\t\t# Check that columns names supplied through field map are unique\n-\t\t\tdupfields <- duplicated(.self$.fields)\n-\t\t\tif (any(dupfields))\n-\t\t\t\tstop(paste("Some db column names supplied are duplicated: ", paste(unique(.self$.fields[dupfields]), collapse = \', \'), "."))\n-\n-\t\t\t# Rename columns\n-\t\t\tcolnames(.self$.db) <- vapply(colnames(.self$.db), function(c) if (c %in% .self$.fields) names(.self$.fields)[.self$.fields %in% c] else c, FUN.VALUE = \'\')\n-\t\t}\n-\t})\n-\n-\t############\n-\t# GET DATA #\n-\t############\n-\n-\tMsFileDb$methods( .get = function(db = NULL, col = NULL) {\n-\t\n-\t\t# Init db\n-\t\tif (is.null(db)) {\n-\t\t\t.self$.init.db()\n-\t\t\tdb <- .self$.db\n-\t\t}\n-\n-\t\t# Check fields\n-\t\t.self$.check.fields(col)\n-\n-\t\t# Get database columns\n-#\t\tdb.cols <- unlist(.self$.fields[col])\n-\n-\t\treturn(db[, col])\n-\t})\n-\n-\t###########\n-\t# GET ROW #\n-\t###########\n-\n-\tMsFileDb$methods( .get.row = function(row, cols = NULL) {\n-\t\n-\t\t# Init db\n-\t\t.self$.init.db()\n-\n-\t\t# Check fields\n-\t\tif ( ! is.null(cols))\n-\t\t\t.self$.check.fields(cols)\n-\n-\t\tif ( ! is.null(cols)) {\n-\t\t\t#cols <- vapply(cols, function(c) .self$.fields[[c]], FUN.VALUE = \'\')\n-\t\t\treturn(.self$.db[row, cols])\n-\t\t}\n-\n-\t\treturn(.self$'..b'FileDb$methods( getNbPeaks = function(molid = NA_integer_, type = NA_character_) {\n-\n-\t\t# Init db\n-\t\t.self$.init.db()\n-\n-\t\t# Get database\n-\t\tdb <- .self$.db[, c(MSDB.TAG.MOLID, MSDB.TAG.MODE, MSDB.TAG.MZTHEO)]\n-\n-\t\t# Filter on mode\n-\t\tif ( ! is.null(type) && ! is.na(type))\n-\t\t\tdb <- db[db[[MSDB.TAG.MODE]] == (if (type == MSDB.TAG.POS) .self$.modes$pos else .self$.modes$neg), ]\n-\n-\t\t# Filter on molecule IDs\n-\t\tif ( ! is.null(molid) && ! is.na(molid))\n-\t\t\tdb <- db[db[[MSDB.TAG.MOLID]] %in% molid,]\n-\n-\t\t# Get mz values\n-\t\tmz <- db[[MSDB.TAG.MZTHEO]]\n-\n-\t\t# Count number of unique values\n-\t\tn <- sum(as.integer(! duplicated(mz)))\n-\n-\t\treturn(n)\n-\t})\n-\n-\t##########\n-\t# SEARCH #\n-\t##########\n-\n-\tMsFileDb$methods( .do.search.for.mz.rt.bounds = function(mode, mz.low, mz.high, rt.low = NULL, rt.high = NULL, col = NULL, attribs = NULL, molids = NULL) {\n-\n-\t\t# Init db\n-\t\t.self$.init.db()\n-\t\tdb <- .self$.db\n-\n-\t\t# Filter on mode\n-\t\tif ( ! is.null(mode) && ! is.na(mode))\n-\t\t\tdb <- db[db[[MSDB.TAG.MODE]] == (if (mode == MSDB.TAG.POS) .self$.modes$pos else .self$.modes$neg), ]\n-\n-\t\t# Filter on molecule IDs\n-\t\tif ( ! is.null(molids))\n-\t\t\tdb <- db[db[[MSDB.TAG.MOLID]] %in% molids,]\n-\n-\t\t# Filter on attributions\n-\t\tif ( ! is.null(attribs) && ! is.na(attribs))\n-\t\t\tdb <- db[db[[MSDB.TAG.ATTR]] %in% attribs,]\n-\n-\t\t# Filter on columns\n-\t\tif ( ! is.null(col) && ! is.na(col))\n-\t\t\tdb <- db[db[[MSDB.TAG.COL]] %in% col,]\n-\n-\t\t# Filter on retention time\n-\t\tif ( ! is.null(rt.low) && ! is.na(rt.low) && ! is.null(rt.high) && ! is.na(rt.high)) {\n-\t\t\tscale <- if (.self$getRtUnit() == MSDB.RTUNIT.MIN) 60 else 1\n-\t\t\tdb <- db[db[[MSDB.TAG.COLRT]] * scale >= rt.low & db[[MSDB.TAG.COLRT]] * scale <= rt.high, ]\n-\t\t}\n-\n-\t\t# Remove retention times and column information\n-\t\tif (is.null(col) || is.na(col) || is.null(rt.low) || is.na(rt.low) || is.null(rt.high) || is.na(rt.high)) {\n-\t\t\tdb <- db[, ! (colnames(db) %in% c(MSDB.TAG.COL, MSDB.TAG.COLRT))]\n-\n-\t\t\t# Remove duplicates\n-\t\t\tdb <- db[ ! duplicated(db), ]\n-\t\t}\n-\n-\t\t# Filter on mz\n-\t\tdb <- db[db[[MSDB.TAG.MZTHEO]] >= mz.low & db[[MSDB.TAG.MZTHEO]] <= mz.high, ]\n-\n-\t\treturn(db)\n-\t})\n-\t\n-\t#################\n-\t# GET MZ VALUES #\n-\t#################\n-\t\n-\t# Returns a numeric vector of all masses stored inside the database.\n-\tMsFileDb$methods( getMzValues = function(mode = NULL, max.results = NA_integer_) {\n-\n-\t\t# Init db\n-\t\t.self$.init.db()\n-\t\tdb <- .self$.db\n-\n-\t\t# Filter on mode\n-\t\tif ( ! is.null(mode) && ! is.na(mode)) {\n-\t\t\tmode.tag <- if (mode == MSDB.TAG.POS) .self$.modes$pos else .self$.modes$neg\n-\t\t\tselected.lines <- (.self$.get(db, col = MSDB.TAG.MODE) == mode.tag)\n-\t\t\tdb <- db[selected.lines, ]\n-\t\t}\n-\n-\t\t# Get masses\n-\t\tmz <- .self$.get(db, col = MSDB.TAG.MZTHEO)\n-\n-\t\t# Remove duplicates\n-\t\tmz <- mz[ ! duplicated(mz)]\n-\n-\t\t# Apply cut-off\n-\t\tif ( ! is.na(max.results))\n-\t\t\tmz <- mz[1:max.results]\n-\n-\t\treturn(mz)\n-\t})\n-\t\n-\t#######################\n-\t# GET RETENTION TIMES #\n-\t#######################\n-\t\n-\t# Get the retention times of a molecule.\n-\t# Returns a list of numeric vectors. The list has for keys/names the columns, and for values vectors of numerics (the retention times). If no retention times are registered for this molecule, then returns an empty list.\n-\tMsFileDb$methods( getRetentionTimes = function(molid, col = NA_character_) {\n-\n-\t\tif (is.null(molid) || is.na(molid))\n-\t\t\treturn(list())\n-\n-\t\t# Init db\n-\t\t.self$.init.db()\n-\t\tdb <- .self$.db[, c(MSDB.TAG.MOLID, MSDB.TAG.COL, MSDB.TAG.COLRT)]\n-\n-\t\t# Filter on molecule ID\n-\t\tif ( ! is.null(molid) && ! is.na(molid))\n-\t\t\tdb <- db[db[[MSDB.TAG.MOLID]] %in% molid,]\n-\n-\t\t# Remove duplicates\n-\t\tdb <- db[! duplicated(db), ]\n-\n-\t\t# Build retention time list\n-\t\trt <- list()\n-\t\tcols <- db[[MSDB.TAG.COL]]\n-\t\tcols <- cols[ ! duplicated(cols)]\n-\t\tfor (col in cols) {\n-\t\t\tcolrts <- db[db[[MSDB.TAG.COL]] %in% col, MSDB.TAG.COLRT]\n-\t\t\trt[col] <- list(colrts)\n-\t\t}\n-\n-\t\tif (.self$getRtUnit() == MSDB.RTUNIT.MIN)\n-\t\t\trt <- 60 * rt\n-\n-\t\treturn(rt)\n-\t})\n-\n-} # end of load safe guard\n'

diff -r fb9c0409d85c -r f86fec07f392 MsPeakForestDb.R
--- a/MsPeakForestDb.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,325 +0,0 @@\n-if ( ! exists(\'MsPeakForestDb\')) { # Do not load again if already loaded\n-\n-\tlibrary(methods)\n-\tsource(\'MsDb.R\')\n-\tsource(\'UrlRequestScheduler.R\')\n-\n-\t#####################\n-\t# CLASS DECLARATION #\n-\t#####################\n-\t\n-\tMsPeakForestDb <- setRefClass("MsPeakForestDb", contains = "MsDb", fields = list(.url = "character", .url.scheduler = "ANY", .token = "character"))\n-\t\n-\t###############\n-\t# CONSTRUCTOR #\n-\t###############\n-\t\n-\tMsPeakForestDb$methods( initialize = function(url = NA_character_, useragent = NA_character_, token = NA_character_, ...) {\n-\n-\t\tcallSuper(...)\n-\n-\t\t# Check URL\n-\t\tif (is.null(url) || is.na(url))\n-\t\t stop("No URL defined for new MsPeakForestDb instance.")\n-\n-\t\tif (substring(url, nchar(url) - 1, 1) == \'/\')\n-\t\t\turl <- substring(url, nchar(url) - 1)\n-\t\t.url <<- url\n-\t\t.url.scheduler <<- UrlRequestScheduler$new(n = 3, useragent = useragent)\n-\t\t.self$.url.scheduler$setVerbose(1L)\n-\t\t.token <<- token\n-\t\t.rt.unit <<- MSDB.RTUNIT.MIN\n-\t})\n-\n-\t###########\n-\t# GET URL #\n-\t###########\n-\n-\tMsPeakForestDb$methods( .get.url = function(url, params = NULL, ret.type = \'json\') {\n-\n-\t\tres <- NULL\n-\n-\t\t# Add url prefix\n-\t\tif (substring(url, 1, 1) == \'/\')\n-\t\t\turl <- substring(url, 2)\n-\t\turl <- paste(.self$.url, url, sep = \'/\')\n-\n-\t\t# Add token\n-\t\tif ( ! is.na(.self$.token))\n-\t\t\tparams <- c(params, token = .self$.token)\n-\n-\t\t# Get URL\n-\t\tcontent <- .self$.url.scheduler$getUrl(url = url, params = params)\n-\n-\t\tif (ret.type == \'json\') {\n-\n-\t\t\tres <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE)\n-\n-\t\t\tif (is.null(res)) {\n-\t\t\t\tparam.str <- if (is.null(params)) \'\' else paste(\'?\', vapply(names(params), function(p) paste(p, params[[p]], sep = \'=\'), FUN.VALUE = \'\'), collapse = \'&\', sep = \'\')\n-\t\t\t\tstop(paste0("Failed to run web service. URL was \\"", url, param.str, "\\"."))\n-\t\t\t}\n-\t\t} else {\n-\t\t\tif (ret.type == \'integer\') {\n-\t\t\t\tif (grepl(\'^[0-9]+$\', content, perl = TRUE))\n-\t\t\t\t\tres <- as.integer(content)\n-\t\t\t\telse {\n-\t\t\t\t\tres <- jsonlite::fromJSON(content, simplifyDataFrame = FALSE)\n-\t\t\t\t}\n-\t\t\t}\n-\t\t}\n-\n-\t\treturn(res)\n-\t})\n-\n-\t####################\n-\t# GET MOLECULE IDS #\n-\t####################\n-\t\n-\tMsPeakForestDb$methods( getMoleculeIds = function() {\n-\n-\t\tids <- as.character(.self$.get.url(url = \'compounds/all/ids\'))\n-\n-\t\treturn(ids)\n-\t})\n-\n-\t####################\n-\t# GET NB MOLECULES #\n-\t####################\n-\t\n-\tMsPeakForestDb$methods( getNbMolecules = function() {\n-\n-\t\tn <- .self$.get.url(url = \'compounds/all/count\', ret.type = \'integer\')\n-\n-\t\treturn(n)\n-\t})\n-\t\n-\t###############################\n-\t# GET CHROMATOGRAPHIC COLUMNS #\n-\t###############################\n-\t\n-\tMsPeakForestDb$methods( getChromCol = function(molid = NULL) {\n-\n-\t\t# Set URL\n-\t\tparams <- NULL\n-\t\tif ( ! is.null(molid))\n-\t\t\tparams <- list(molids = paste(molid, collapse = \',\'))\n-\n-\t\t# Call webservice\n-\t\twscols <- .self$.get.url(url = \'metadata/lc/list-code-columns\', params = params)\n-\n-\t\t# Build data frame\n-\t\tcols <- data.frame(id = character(), title = character())\n-\t\tfor(id in names(wscols))\n-\t\t\tcols <- rbind(cols, data.frame(id = id, title = wscols[[id]]$name, stringsAsFactors = FALSE))\n-\n-\t\treturn(cols)\n-\t})\n-\t\n-\t#######################\n-\t# GET RETENTION TIMES #\n-\t#######################\n-\t\n-\tMsPeakForestDb$methods( getRetentionTimes = function(molid, col = NA_character_) {\n-\n-\t\tif (is.null(molid) || is.na(molid) || length(molid) != 1)\n-\t\t\tstop("The parameter molid must consist only in a single value.")\n-\n-\t\trt <- list()\n-\n-\t\t# Set URL\n-\t\tparams <- NULL\n-\t\tif ( ! is.null(molid))\n-\t\t\tparams <- list(molids = paste(molid, collapse = \',\'))\n-\n-\t\t# Call webservice\n-\t\tspectra <- .self$.get.url(url = \'spectra/lcms/search\', params = params)\n-\t\tif (class(spectra) == \'list\' && length(spectra) > 0) {\n-\t\t\tfor (s in spectra)\n-\t\t\t\tif (is.na(col) || s$liquidChromatography$columnCode %in% col) {\n-\t\t\t\t\tret.time <- (s$RTmin + s$RTmax) / 2\n-\t\t\t\t\tret.time <- ret.time * 60 # Retention time are in minutes in Peakforest, but we want them in seconds\n-\t\t'..b"ter_\n-\t\t\t\tif ('listOfCompounds' %in% names(x$source)) {\n-\t\t\t\t\tmolids <- vapply(x$source$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '')\n-\t\t\t\t\tmolnames <- vapply(x$source$listOfCompounds, function(c) if ('names' %in% names(c) && ! is.null(c$names)) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP) else NA_character_, FUN.VALUE = '')\n-\t\t\t\t\tmass <- vapply(x$source$listOfCompounds, function(c) if ( ! 'averageMass' %in% names(c) || is.null(c$averageMass)) NA_real_ else as.double(c$averageMass), FUN.VALUE = 0.0)\n-\t\t\t\t\tinchi <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChI' %in% names(c) || is.null(c$inChI)) NA_character_ else as.character(c$inChI), FUN.VALUE = '')\n-\t\t\t\t\tinchikey <- vapply(x$source$listOfCompounds, function(c) if ( ! 'inChIKey' %in% names(c) || is.null(c$inChIKey)) NA_character_ else as.character(c$inChIKey), FUN.VALUE = '')\n-\t\t\t\t\tchebi <- vapply(x$source$listOfCompounds, function(c) if ('ChEBI' %in% names(c) && ! is.null(c$ChEBI)) as.character(c$ChEBI) else NA_character_, FUN.VALUE = '')\n-\t\t\t\t\tchebi[chebi == 'CHEBI:null'] <- NA_character_\n-\t\t\t\t\thmdb <- vapply(x$source$listOfCompounds, function(c) if ('HMDB' %in% names(c) && ! is.null(c$HMDB)) as.character(c$HMDB) else NA_character_, FUN.VALUE = '')\n-\t\t\t\t\thmdb[hmdb == 'HMDBnull'] <- NA_character_\n-\t\t\t\t\tkegg <- vapply(x$source$listOfCompounds, function(c) if ( ! 'KEGG' %in% names(c) || is.null(c$KEGG)) NA_character_ else as.character(c$KEGG), FUN.VALUE = '')\n-\t\t\t\t\tpubchem <- vapply(x$source$listOfCompounds, function(c) if ( ! 'PubChemCID' %in% names(c) || is.null(c$PubChemCID)) NA_character_ else as.character(c$PubChemCID), FUN.VALUE = '')\n-\t\t\t\t\tif (length(molids) > 0 && length(molids) == length(molnames))\n-\t\t\t\t\t\tresults <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE))\n-\t\t\t\t}\n-\t\t}\n-\n-\t\t# RT search\n-\t\tif ( ! is.null(rt.low) && ! is.null(rt.high)) {\n-\n-\t\t\trt.res <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.COL = character(), MSDB.TAG.COLRT = numeric())\n-\n-\t\t\tif (nrow(results) > 0) {\n-\n-\t\t\t\t# Build URL for rt search\n-\t\t\t\turl <- paste0('spectra/lcms/range-rt-min/', rt.low / 60, '/', rt.high / 60)\n-\t\t\t\tparams <- NULL\n-\t\t\t\tif ( ! is.null(col))\n-\t\t\t\t\tparams <- c(columns = paste(col, collapse = ','))\n-\n-\t\t\t\t# Run query\n-\t\t\t\trtspectra <- .self$.get.url(url = url, params = params)\n-\n-\t\t\t\t# Get compound/molecule IDs\n-\t\t\t\tfor (x in rtspectra)\n-\t\t\t\t\tif (all(c('listOfCompounds', 'liquidChromatography') %in% names(x))) {\n-\t\t\t\t\t\tmolids <- vapply(x$listOfCompounds, function(c) if ('id' %in% names(c) && ! is.null(c$id)) as.character(c$id) else NA_character_, FUN.VALUE = '')\n-\t\t\t\t\t\tif (length(molids) > 0) {\n-\t\t\t\t\t\t\tcol <- if ('columnCode' %in% names(x$liquidChromatography) && ! is.null(x$liquidChromatography$columnCode)) as.character(x$liquidChromatography$columnCode) else NA_character_\n-\t\t\t\t\t\t\trtmin <- if ('RTmin' %in% names(x) && ! is.null(x$RTmin)) as.double(x$RTmin) else NA_real_\n-\t\t\t\t\t\t\trtmax <- if ('RTmax' %in% names(x) && ! is.null(x$RTmax)) as.double(x$RTmax) else NA_real_\n-\t\t\t\t\t\t\tcolrt <- (rtmin + rtmax) / 2\n-\t\t\t\t\t\t\trt.res <- rbind(rt.res, data.frame(MSDB.TAG.MOLID = molids,\n-\t\t\t\t \t \t \t MSDB.TAG.COL = col,\n-\t\t\t\t \t \t \t MSDB.TAG.COLRT = colrt * 60,\n-\t\t\t\t\t \t \t stringsAsFactors = FALSE))\n-\t\t\t\t\t\t}\n-\t\t\t\t\t}\n-\t\t\t}\t\n-\n-\t\t\t# Add retention times and column info\n-\t\t\tresults <- merge(results, rt.res)\n-\t\t}\n-\t\t\n-\t\t# Rename columns with proper names\n-\t\tcolnames(results) <- vapply(colnames(results), function(s) eval(parse(text=s)), FUN.VALUE = '')\n-\n-\t\treturn(results)\n-\t})\n-}\n"

diff -r fb9c0409d85c -r f86fec07f392 MsXlsDb.R
--- a/MsXlsDb.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,852 +0,0 @@\n-if ( ! exists(\'MsXlsDb\')) { # Do not load again if already loaded\n-\n-\tlibrary(\'methods\')\n-\tlibrary(\'stringr\')\n-\tsource(\'msdb-common.R\')\n-\tsource(\'MsDb.R\')\n-\tsource(\'strhlp.R\', chdir = TRUE)\n-\tsource(\'dfhlp.R\', chdir = TRUE)\n-\tsource(\'search.R\', chdir = TRUE)\n-\tsource(\'excelhlp.R\', chdir = TRUE)\n-\t\n-\t#############\n-\t# CONSTANTS #\n-\t#############\n-\t\n-\t.THIS.FILE.PATH <- getwd() # We suppose that the file has been sourced with the option chdir = TRUE\n-\n-\t.XLS_PEAKS_ROW_OFFSET <- 8\n-\t.XLS_PEAKS_RT_COL_START <- 11\n-\t.XLS_MSPOS_TAB <- \'MS_POS\'\n-\t.XLS_MSNEG_TAB <- \'MS_NEG\'\n-\t.XLS_MZ_COL <- 1\n-\t.XLS_INTENSITY_COL <- 2\n-\t.XLS_RELATIVE_COL <- 3\n-\t.XLS_THEORETICAL_MZ_COL <- 5\n-\t.XLS_COMPOSITION_COL <- 8\n-\t.XLS_ATTRIBUTION_COL <- 9\n-\t\n-\t#####################\n-\t# CLASS DECLARATION #\n-\t#####################\n-\t\n-\tMsXlsDb <- setRefClass("MsXlsDb", contains = "MsDb", fields = list(.mz.index = "ANY", .name_index = "ANY", .db_dir = "character", .limit = "numeric", .files = "ANY", .cache_dir = "character", .db = "ANY"))\n-\t\n-\t###############\n-\t# CONSTRUCTOR #\n-\t###############\n-\t\n-\tMsXlsDb$methods( initialize = function(db_dir = NA_character_, limit = NA_integer_, cache_dir = NA_character_, cache = FALSE, ...) {\n-\n-\t\t# Initialize members\n-\t\t # TODO check that db_dir is not null neither na, and tests that it exists and is a directory.\n-\t\t.db_dir <<- if ( ! is.null(db_dir)) db_dir else NA_character_\n-\t\t.limit <<- if ( ! is.null(limit) && ! is.na(limit) && limit > 0) limit else NA_integer_\n-\t\tcache_dir <- if (cache && is.na(cache_dir) && ! is.na(db_dir)) file.path(db_dir, \'cache\') else cache_dir\n-\t\t.cache_dir <<- if ( cache || ! is.null(cache_dir)) cache_dir else NA_character_\n-\t\t.files <<- NULL\n-\t\t.db <<- NULL\n-\t\t.mz.index <<- NULL\n-\t\t.name_index <<- NULL\n-\t\n-\t\tcallSuper(...)\n-\t})\n-\t\n-\t####################\n-\t# GET MOLECULE IDS #\n-\t####################\n-\t\n-\tMsXlsDb$methods( getMoleculeIds = function(max.results = NA_integer_) {\n-\t\n-\t\t# Init file list\n-\t\t.self$.init.file.list()\n-\n-\t\t# Get IDs\n-\t\tmol.ids <- as.integer(which( ! is.na(.self$.files)))\n-\n-\t\t# Cut\n-\t\tif ( ! is.na(max.results) && length(mol.ids) > max.results)\n-\t\t\tmol.ids <- mol.ids[max.results, ]\n-\n-\t\treturn(mol.ids)\n-\t})\n-\t\n-\t####################\n-\t# GET NB MOLECULES #\n-\t####################\n-\t\n-\t# Returns a list of all molecule names\n-\tMsXlsDb$methods( getNbMolecules = function() {\n-\t\treturn(length(.self$getMoleculeIds()))\n-\t})\n-\n-\t#####################\n-\t# GET MOLECULE NAME #\n-\t#####################\n-\t\n-\tMsXlsDb$methods( getMoleculeName = function(molid) {\n-\t\treturn(vapply(molid, function(m) .self$.get.mol.name(m), FUN.VALUE = ""))\n-\t})\n-\t\n-\t###############################\n-\t# GET CHROMATOGRAPHIC COLUMNS #\n-\t###############################\n-\t\n-\t# Returns a list of all chromatographic columns used\n-\tMsXlsDb$methods( getChromCol = function(molid = NULL) {\n-\t\n-\t cn <- character()\n-\n-\t\t# If no molecule IDs provided, then look at all molecules\n-\t if\t(is.null(molid))\n-\t \tmolid <- .self$getMoleculeIds()\n-\n-\t\t# Loop on molecules\n-\t\tfor (mid in molid) {\n-\n-\t \trt <- .self$getRetentionTimes(mid)\n-\n-\t\t\tif ( ! is.null(rt))\n-\t\t\t\tcn <- c(cn, names(rt))\n-\t\t}\n-\t\n-\t\t# Remove duplicates\n-\t\tcn <- cn[ ! duplicated(cn)]\n-\n-\t\t# Make data frame\n-\t\tcn <- data.frame(id = cn, title = cn, stringsAsFactors = FALSE)\n-\n-\t\treturn(cn)\n-\t})\n-\n-\t################\n-\t# FIND BY NAME #\n-\t################\n-\n-\tMsXlsDb$methods( findByName = function(name) {\n-\n-\t\t# NULL entry\n-\t\tif (is.null(name))\n-\t\t\treturn(NA_integer_)\n-\t\n-\t\t# Initialize output list\n-\t\tids <- NULL\n-\n-\t\tfor (n in name) {\n-\n-\t\t\tid <- NA_integer_\n-\n-\t\t\tif ( ! is.na(n)) {\n-\n-\t\t\t\t# Get index\n-\t\t\t\tindex <- .self$.get.name.index()\n-\n-\t\t\t\t# Search for name in index\n-\t\t\t\ti <- binary.search(toupper(n), index[[\'name\']])\n-\n-\t\t\t\tid <- if (is.na(i)) NA_integer_ else index[i, \'id\']\n-\t\t\t}\n-\n-\t\t\tids <- c(ids, id)\n-\t\t}\n-\n-\t\treturn(ids)\n-\t})\n-\t\n-\t#######################\n-\t# GET RETENTION TIMES #\n-\t#######################\n-\t\n-\tMsXlsDb$'..b'#################\n-\t# CHECK RETENTION TIMES #\n-\t#########################\n-\t\n-\tMsXlsDb$methods( .check_retention_times = function(id, tab_name, column_name, rt, n) {\n-\t\n-\t\tif (n >= 1 && ! is.null(.self$.observers) && length(.self$.observers) > 0)\n-\t\n-\t\t\t# Check column only if there is at least one value inside\n-\t\t\tif (sum( ! is.na(rt)) > 0)\n-\t\n-\t\t\t\t# Loop on all values\n-\t\t\t\tfor(i in 1:n) {\n-\t\n-\t\t\t\t\t# Check that it\'s defined\n-\t\t\t\t\tif (i > 1 && is.na(rt[[i]]))\n-\t\t\t\t\t\tfor (obs in .self$.observers)\n-\t\t\t\t\t\t\tobs$warning(paste0("Retention times undefined for column ", column_name, " at row ", i + .XLS_PEAKS_ROW_OFFSET, " of tab ", tab_name, " in file ", .self$.get.file(id), "."))\n-\t\n-\t\t\t\t\telse if (i > 1)\n-\t\t\t\t\t\t# Check the value (it must be constant)\n-\t\t\t\t\t\tif (rt[[i-1]] != rt[[i]])\n-\t\t\t\t\t\t\tfor (obs in .self$.observers)\n-\t\t\t\t\t\t\t\tobs$error(paste0("Retention times not constant for column ", column_name, " between row ", i - 1 + .XLS_PEAKS_ROW_OFFSET, " and row ", i + .XLS_PEAKS_ROW_OFFSET, "o tab", tab_name, "in file", .self$.get.file(id)))\n-\t\t\t\t}\n-\t})\n-\t\n-\t####################\n-\t# GET FILE FROM ID #\n-\t####################\n-\t\n-\tMsXlsDb$methods( .get.file = function(id) {\n-\t\n-\t\t# List files\n-\t\t.self$.init.file.list()\n-\t\n-\t\treturn( if (id > 0 && id <= length(.self$.files)) .self$.files[id] else NA_character_)\n-\t})\n-\t\n-\t###########\n-\t# MEM GET #\n-\t###########\n-\n-\t# Get database data from memory\n-\tMsXlsDb$methods( .mem.get = function(molid, field, second.field = NA_character_) {\n-\n-\t\tdata <- .self$.db[[as.character(molid)]][[field]]\n-\n-\t\tif ( ! is.na(second.field))\n-\t\t\tdata <- data[[second.field]]\n-\n-\t\treturn(data)\n-\t})\n-\t\n-\t###########\n-\t# MEM SET #\n-\t###########\n-\n-\t# Set database data into memory\n-\tMsXlsDb$methods( .mem.set = function(data, molid, field, second.field = NA_character_) {\n-\n-\t\tid <- as.character(molid)\n-\n-\t\t# Create db\n-\t\tif (is.null(.self$.db))\n-\t\t .db <<- list()\n-\n-\t\t# Create first level\n-\t\tif (is.null(.self$.db[[id]]))\n-\t\t\t.self$.db[[id]] <- list()\n-\n-\t\t# Create second level\n-\t\tif ( ! is.na(second.field) && is.null(.self$.db[[id]][[field]]))\n-\t\t\t.self$.db[[id]][[field]] <- list()\n-\n-\t\t# Store data\n-\t\tif (is.na(second.field)) {\n-\t\t\t.self$.db[[id]][[field]] <- data\n-\t\t} else {\n-\t\t\t.self$.db[[id]][[field]][[second.field]] <- data\n-\t\t}\n-\t})\n-\n-\t#################\n-\t# SEARCH FOR RT #\n-\t#################\n-\n-\t# Find molecules matching a certain retention time.\n-\t# col A list of chromatographic columns to use.\n-\t# rt.low The lower bound of the rt value.\n-\t# rt.high The higher bound of the rt value.\n-\t# mols A list of molecule IDs to process. If unset, then take all molecules.\n-\t# Return a data frame with the following columns: id, col, colrt.\n-\tMsXlsDb$methods( .search.for.rt = function(col, rt.low, rt.high, mols = NULL) {\n-\n-\t\t# Use all molecules if no list is provided\n-\t\tif (is.null(mols))\n-\t\t\tmols <- .self$getMoleculeIds()\n-\n-\t\tresults <- data.frame(id = integer(), col = character(), colrt = double(), stringsAsFactors = FALSE)\n-\t\tcolnames(results) <- c(MSDB.TAG.MOLID, MSDB.TAG.COL, MSDB.TAG.COLRT)\n-\n-\t\t# Loop on all molecules\n-\t\tfor (molid in mols) {\n-\t\t\tno.col <- TRUE\n-\t\t\tfor (c in col) {\n-\t\t\t\tmolrts <- .self$getRetentionTimes(molid, c)\n-\t\t\t\tif ( ! is.null(molrts)) {\n-\t\t\t\t\tno.col <- FALSE\n-\t\t\t\t\tfor (molrt in molrts) {\n-\t\t\t\t\t\tif (molrt >= rt.low && molrt <= rt.high) {\n-\t\t\t\t\t\t\tr <- nrow(results) + 1\n-\t\t\t\t\t\t\tresults[r, ] <- c(id = molid, col = c, colrt = molrt)\n-\t\t\t\t\t\t}\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t}\n-\n-\t\t\tif (no.col) {\n-\t\t\t\tr <- nrow(results) + 1\n-\t\t\t\tresults[r, c(MSDB.TAG.MOLID)] <- c(id = molid)\n-\t\t\t}\n-\t\t}\n-\n-\t\treturn(results)\n-\t})\n-\n-\t############################\n-\t# EXTRACT ID FROM FILENAME #\n-\t############################\n-\t\n-\t.extract_molecule_id_from_filename <- function(filename) {\n-\t\n-\t\tid <- NA_integer_\n-\t\n-\t\tif ( ! is.na(filename)) {\n-\t\t\tg <- str_match(filename, "N(\\\\d+)[._-]")\n-\t\t\tif ( ! is.na(g[1,1]))\n-\t\t\t\tid <- as.numeric(g[1,2])\n-\t\t}\n-\t\n-\t\treturn(id)\n-\t}\n-\t\n-} # end of load safe guard\n'

diff -r fb9c0409d85c -r f86fec07f392 PeakforestConn.R
--- a/PeakforestConn.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,176 +0,0 @@
-#####################
-# CLASS DECLARATION #
-#####################
-#'A class to connect to peakforest
-#'@export
-#'@field .url An urel to the database
-PeakforestConn <- methods::setRefClass("PeakforestConn", contains = c("RemotedbConn","MassdbConn"), fields = list( .url = "character" )) # TODO Inherits also from MassdbConn
-
-##########################
-# GET ENTRY CONTENT TYPE #
-##########################
-
-PeakforestConn$methods( getEntryContentType = function(type) {
- return(BIODB.JSON)
-})
-
-#####################
-# GET ENTRY CONTENT #
-#####################
-
-PeakforestConn$methods( getEntryContent = function(id) {
-
-
- # Initialize return values
- content <- rep(NA_character_, length(id))
- # Request
-
- url <- get.entry.url(BIODB.PEAKFOREST, id[i], BIODB.JSON,token = .self$.token)
- jsonstr <- .self$.get.url(url)
- if(startsWith("<html>", jsonstr) ){
- next
- }
-
- return(content)
-})
-
-
-##########################################
-# SEARCH FOR SPECTRA IN GIVEN MASS RANGE #
-##########################################
-
-PeakforestConn$methods( searchMzRange = function(mzmin, mzmax, rtype = c("object","spec","peak")){
-
- rtype <- match.arg(rtype)
- if(mzmin>mzmax){
- stop("mzmin shloud be inferior to mzmax in searchMzRange.")
- }
-
- url <- paste0("https://rest.peakforest.org/spectra/lcms/peaks/get-range/",mzmin,"/",mzmax)
-
- contents <-  .self$.get.url(url)
-
- jsontree <- fromJSON(contents)
-
- ###No match form the output.
- if( length(jsontree)==0 ) return(NULL)
-
- # Getting a list of all the id.
- lid <- sapply(jsontree,function(x){
- x$source$id
- })
-
- # Returning the content for all the spectra
- contents <- .self$getEntryContent(lid)
-
- entries  <- .self$createEntry(contents)
-
- # Checking the return type
- if( rtype=="object" ){
- return( entries )
- }
-
- ### XXXX See if we don't want to reduce the output and factorize this shit.
- toreturn <- NULL
- if( rtype=="spec" ){
- toreturn <- sapply(entries,function(x){
- x$getFieldsAsDataFrame()
- })
- }
- if( rtype=="peak" ){
- toreturn <- lapply(entries,function(x){
- temp <- as.data.frame( x$getFieldValue( BIODB.PEAKS ))
- temp$accession = x$getFieldValue( BIODB.ACCESSION)
- return(temp)
-
- })
- }
- ###Trying to convert in data.frame
- if(!is.data.frame(toreturn)){
- temp <- colnames(toreturn[[1]])
- toreturn <- do.call("rbind.fill",toreturn)
- colnames(toreturn) <- temp
- }
-
- return(toreturn)
-})
-
-
-#################################################
-# SEARCH FOR SPECTRA IN A TOLERANCE AROUND A MZ #
-#################################################
-
-PeakforestConn$methods( searchMzTol = function(mz, tol, tolunit=BIODB.MZTOLUNIT.VALS,
-    rtype = c("object","spec","peak")){
-
- rtype <- match.arg(rtype)
- tolunit <- match.arg(tolunit)
-
- if( tolunit == BIODB.MZTOLUNIT.PPM){
- tol <- tol * mz * 10^-6
- }
-
- mzmin <- mz - tol
- mzmax <- mz + tol
-
- return(.self$searchMzRange(mzmin, mzmax, rtype = rtype))
-
-})
-
-##################################################
-# SEARCH FOR MSMS SPECTRA PRECUSOR AROUND A MASS #
-##################################################
-
-
-PeakforestConn$methods(
- searchSpecPrecTol = function(mz,
- tol,
- tolunit = "plain",
- mode = NULL) {
- #TODO handle the units
- #tolunit <- match.arg(tolunit)
-
- strmode <- ''
-
- if (!is.null(mode)) {
- if (mode %in% c(BIODB.MSMODE.NEG, BIODB.MSMODE.POS)) {
- strmode <- paste0('?polarity=', mode)
- }
-
- }
-
- if (tolunit == BIODB.MZTOLUNIT.PPM) {
- tol <- tol * mz * 10 ^ -6
- }
-
- ##Request which return peak and not spectra.
- url <-
- paste0(
- "https://rest.peakforest.org/spectra/lcms/search-naive/",
- mz,
- "/",
- tol,
- strmode
- )
- contents <-  .self$.get.url(url)
- entries  <- .self$createReducedEntry(contents, drop = TRUE)
- return(entries)
- }
-)
-
-
-################
-# CREATE ENTRY #
-################
-
-# Creates a Spectrum instance from file content.
-# content       A file content, downloaded from the public database.
-# RETURN        A spectrum instance.
-PeakforestConn$methods( createEntry = function(content, drop = TRUE) {
- return(createPeakforestSpectraFromJSON(content, drop = drop))
-})
-
-PeakforestConn$methods( createReducedEntry = function(content , drop = TRUE){
- entries <- createReducedSpectraFromJSON(content, drop = drop)
- return(entries)
-})

diff -r fb9c0409d85c -r f86fec07f392 PeakforestEntry.R
--- a/PeakforestEntry.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,250 +0,0 @@
-#####################
-# CLASS DECLARATION #
-#####################
-
-# TODO Create class PeakforestCompoundEntry
-PeakForestSpectrumEntry <- methods::setRefClass("PeakForestSpectrumEntry", contains = "BiodbEntry")
-
-PeakForestCompoundEntry <- methods::setRefClass("PeakForestCompoundEntry", contains = "BiodbEntry")
-
-
-###########
-# FACTORY #
-###########
-
-
-###Arg is jcontent ot indicate that the content is already a json.
-createPeakforestCompoundFromJSON <- function(contents, drop = FALSE) {
-
- if(is.character(contents))
- contents <- jsonlite::fromJSON(contents, simplifyDataFrame=FALSE)
-
- jsonfields <- list()
- jsonfields[[BIODB.ACCESSION]] <- "id"
- jsonfields[[BIODB.PUBCHEMCOMP.ID]] <- "PubChemCID"
- jsonfields[[BIODB.CHEBI.ID]] <- "ChEBI"
- jsonfields[[BIODB.HMDB.ID]] <- "HMDB"
- jsonfields[[BIODB.KEGG.ID]] <- "KEGG"
- jsonfields[[BIODB.FORMULA]] <- "formula"
- jsonfields[[BIODB.SMILES]] <- "canSmiles"
- jsonfields[[BIODB.AVERAGE.MASS]] <- "averageMass"
- jsonfields[[BIODB.MONOISOTOPIC.MASS]] <- "monoisotopicMass"
- jsonfields[[BIODB.INCHI]] <- "inChI"
- jsonfields[[BIODB.INCHIKEY]] <- "inchiIKey"
- jsonfields[[BIODB.NAME]] <- "mainName"
-
- entries <- vector(length(contents),mode="list")
-
- for (i in seq_along(contents)){
-
- jsontree <- contents[[i]]
- entry <- PeakForestCompoundEntry$new()
-
-
- for(field in names(jsonfields)){
-
- tosearch <- jsonfields[[field]]
- value <- jsontree$tosearch
- entry$setField(field,value)
- }
-
- entries[[i]] <- entry
- }
-
-
- if (drop && length(contents) == 1)
- entries <- entries[[1]]
-
- entries
-}
-
-createPeakforestSpectraFromJSON <- function(contents, drop = FALSE, checkSub = TRUE) {
-
- entries <- vector(length(contents),mode="list")
- jsonfields <- character()
- jsonfields[[BIODB.ACCESSION]] <- "id" # TODO Use BIODB.ACCESSION instead
- jsonfields[[BIODB.MSMODE]] <- "polarity"
-
-
- ###Checking that it's a list.
- if(length(contents) == 1){
- if(startsWith(contents[[1]], "<html>") ){
- return(NULL)
- }else{
- contents <- jsonlite::fromJSON(contents[[1]],simplifyDataFrame=FALSE)
-
- }
- }
-
- for (i in seq_along(contents)){
-
- content <- contents[[i]]
- jsontree <- NULL
- if(typeof(content) == "character"){
- if(startsWith(content, "<html>")|content=="null"){
- entries[[i]] <- NULL
- next
- }
- jsontree <- jsonlite::fromJSON(content,simplifyDataFrame=FALSE)
- }else{
- jsontree <- content
- }
- cnames <- c(BIODB.PEAK.MZ, BIODB.PEAK.RELATIVE.INTENSITY, BIODB.PEAK.FORMULA, BIODB.PEAK.MZTHEO, BIODB.PEAK.ERROR.PPM)
-
- entry <- PeakForestSpectrumEntry$new()
- #####Setting thz mass analyzer
- entry$setField(BIODB.MSDEV,jsontree$analyzerMassSpectrometerDevice$instrumentName)
- entry$setField(BIODB.MSDEVTYPE,jsontree$analyzerMassSpectrometerDevice$ionAnalyzerType)
-
-
-
- for(field in names(jsonfields)){
-
- tosearch <- jsonfields[[field]]
- value <- jsontree$tosearch
- entry$setField(field,value)
- }
-
- ######################
- # TREATING THE PEAKS #
- ######################
-
- entry$setField(BIODB.NB.PEAKS,length(jsontree$peaks))
- peaks <- data.frame( matrix( 0,ncol = length(cnames), nrow = 0))
- colnames(peaks) <- cnames
- ###Parsing peaks.
- if(length(jsontree$peaks) != 0){
- peaks <- sapply(jsontree$peaks,function(x){
- return(list(as.double(x$mz),
- as.integer(x$ri),
- as.character(x$composition),
- as.double(x$theoricalMass),
- as.double(x$deltaPPM)
- ))
- })
- ###Removing all whitespaces from the formule.
- peaks[3,]<-vapply(peaks[3,],function(x){
- gsub(" ","",trimws(x))
- },FUN.VALUE = NA_character_)
-
- peaks<-t(peaks)
- colnames(peaks)<-cnames
- }
-
- entry$setField(BIODB.PEAKS,peaks)
-
- ##################################
- # TREATING THE LIST OF COMPOUNDS #
- ##################################
-
- entry$setField(BIODB.NB.COMPOUNDS,length(jsontree$listOfCompounds))
- compounds <- list()
-
- ###Parsing compounds.
- if( length( jsontree$listOfCompounds) != 0){
- compounds <- lapply( jsontree$listOfCompounds, function(x){
- createPeakforestCompoundFromJSON(x)
- })
- }
-
- entry$setField(BIODB.COMPOUNDS, compounds)
-
-
- entries[[i]] <- entry
- }
-
-
- if (drop && length(contents) == 1)
- entries <- entries[[1]]
-
- entries
-}
-
-
-####TDO CLEAN THIS
-
-createReducedSpectraFromJSON <- function(contents,
- drop = FALSE,
- checkSub = TRUE) {
- entries <- vector(length(contents), mode = "list")
- jsonfields <- character()
- # jsonfields[[BIODB.ACCESSION]] <-
- # "id" # TODO Use BIODB.ACCESSION instead
-
-
- ###Checking that it's a list.
- if (length(contents) == 1) {
- if (startsWith(contents[[1]], "<html>")) {
- return(NULL)
- } else{
- contents <- jsonlite::fromJSON(contents[[1]], simplifyDataFrame=FALSE)
-
- }
- }
-
- for (i in seq_along(contents)) {
- content <- contents[[i]]
- jsontree <- NULL
- if (typeof(content) == "character") {
- if (startsWith(content, "<html>") | content == "null") {
- entries[[i]] <- NULL
- next
- }
- jsontree <- jsonlite::fromJSON(content, simplifyDataFrame=FALSE)
- } else{
- jsontree <- content
- }
-
-
- cnames <-
- c(
- BIODB.PEAK.MZ,
- BIODB.PEAK.RELATIVE.INTENSITY,
- BIODB.PEAK.FORMULA,
- BIODB.PEAK.MZTHEO,
- BIODB.PEAK.ERROR.PPM
- )
-
- entry <- PeakForestSpectrumEntry$new()
- entry$setField(BIODB.ACCESSION, jsontree$id)
-
- ######################
- # TREATING THE PEAKS #
- ######################
-
- entry$setField(BIODB.NB.PEAKS, length(jsontree$peaks))
- peaks <- data.frame(matrix(0, ncol = length(cnames), nrow = 0))
- colnames(peaks) <- cnames
- ###Parsing peaks.
- if (length(jsontree$peaks) != 0) {
- peaks <- sapply(jsontree$peaks, function(x) {
- return(
- list(
- as.double(x$mz),
- as.integer(x$ri),
- as.character(x$composition),
- as.double(x$theoricalMass),
- as.double(x$deltaPPM)
- )
- )
- })
- ###Removing all whitespaces from the formule.
- peaks[3, ] <- vapply(peaks[3, ], function(x) {
- gsub(" ", "", trimws(x))
- }, FUN.VALUE = NA_character_)
-
- peaks <- as.data.frame(t(peaks))
- colnames(peaks) <- cnames
- }
-
- entry$setField(BIODB.PEAKS, peaks)
-
- entries[[i]] <- entry
- }
-
-
- if (drop && length(contents) == 1)
- entries <- entries[[1]]
-
- entries
-}

diff -r fb9c0409d85c -r f86fec07f392 README.md
--- a/README.md Wed Apr 19 10:00:05 2017 -0400
+++ b/README.md Fri Feb 22 16:04:22 2019 -0500

@@ -9,28 +9,41 @@

For more information, see the galaxy tool page, help section, available inside `galaxy/lcmsmatching.xml`.

-## search-mz
+## lcmsmatching script

-This is the script, included in this repository, that allows run on command line an MZ matching on one of the available database types.
+This is the script, included in this repository, that allows to run on command line an MZ matching on one of the available database types.

-Please run `search-mz -h` for a help page listing all options and presenting some examples.
+Please run `lcmsmatching -h` for a help page listing all options and presenting some examples.

## Dependencies

- * `libssl-dev`.
- * `libcurl4-openssl-dev`.
- * `libxml2-dev`.
- * `R` version `3.2.2`.
+ * `R` version `3.5.1`.
  * `R` packages:
    - `getopt` >= `1.20.0`.
-   - `stringr` >= `1.0.0`.
-   - `plyr` >= `1.8.3`.
-   - `XML` >= `3.98`.
-   - `bitops` >= `1.0_6`.
-   - `RCurl` >= `1.95`.
-   - `jsonlite` >= `1.1`.
+   - `biodb` >= `1.2.0rc2`.
+
+## Changelog
+
+### 4.0.2
+
+   * Increase getopt version to 1.20.2.
+
+### 4.0.1
+
+   * Downgrade to Galaxy 18.05. Test in both 18.05 and 18.09.

-## Updates
+### 4.0.0
+
+   * Switch to biodb R library (<http://github.com/pkrog/biodb>).
+   * Remove Excel and 4TabSql databases from script.
+   * Remove all dynamic fields in XML (i.e.: fields computed using python scripts, like the list of chromatogaphic columns).
+   * Use now a single field for in-house file databases column names, whose value is a comma separated list of key/value pairs.
+   * Update Peakforest URL.
+
+### 3.4.3
+
+   * Returns empty match in case of NA values in mz.low and mz.high.
+   * Speed up HTML output writing.

### 3.3.1

diff -r fb9c0409d85c -r f86fec07f392 UrlRequestScheduler.R
--- a/UrlRequestScheduler.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,126 +0,0 @@
-if ( ! exists('UrlRequestScheduler')) { # Do not load again if already loaded
-
- #############
- # CONSTANTS #
- #############
-
- RLIB.GET  <- 'GET'
- RLIB.POST <- 'POST'
-
- #####################
- # CLASS DECLARATION #
- #####################
-
- UrlRequestScheduler <- setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer"))
-
- # n: number of connections
- # t: time (in seconds)
-
- # The scheduler restrict the number of connections at n per t seconds.
-
- ###############
- # CONSTRUCTOR #
- ###############
-
- UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) {
- .n <<- n
- .t <<- t
- .time.of.last.request <<- -1
- .useragent <<- useragent
- .nb.max.tries <<- 10L
- .ssl.verifypeer <<- ssl.verifypeer
- .verbose <<- 0L
- callSuper(...) # calls super-class initializer with remaining parameters
- })
-
- ##################
- # SET USER AGENT #
- ##################
-
- UrlRequestScheduler$methods( setUserAgent = function(useragent) {
- .useragent <<- useragent
- })
-
- ###############
- # SET VERBOSE #
- ###############
-
- UrlRequestScheduler$methods( setVerbose = function(verbose) {
- .verbose <<- verbose
- })
-
- ##################
- # WAIT AS NEEDED #
- ##################
-
- # Wait the specified between two requests.
- UrlRequestScheduler$methods( .wait.as.needed = function() {
-
- # Compute minimum waiting time between two URL requests
- waiting_time <- .self$.t / .self$.n
-
- # Wait, if needed, before previous URL request and this new URL request.
- if (.self$.time.of.last.request > 0) {
- spent_time <- Sys.time() - .self$.time.of.last.request
- if (spent_time < waiting_time)
- Sys.sleep(waiting_time - spent_time)
- }
-
- # Store current time
- .time.of.last.request <<- Sys.time()
- })
-
- ####################
- # GET CURL OPTIONS #
- ####################
-
- UrlRequestScheduler$methods( .get_curl_opts = function(url) {
- opts <- curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE)
- return(opts)
- })
-
- ###########
- # GET URL #
- ###########
-
- UrlRequestScheduler$methods( .doGetUrl = function(url, params = NULL, method = RLIB.GET) {
-
- content <- NA_character_
-
- # Use form to send URL request
- if ( ! is.null(params) && ! is.na(params))
- switch(method,
-        GET = { content <- getForm(url, .opts = .self$.get_curl_opts(), .params = params) },
-        POST = { content <- postForm(url, .opts = .self$.get_curl_opts(), .params = params) },
-        stop(paste('Unknown method "', method, '".'))
-       )
-
- # Get URL normally
- else
- content <- getURL(url, .opts = .self$.get_curl_opts(), ssl.verifypeer = .self$.ssl.verifypeer)
-
- return(content)
- })
-
- UrlRequestScheduler$methods( getUrl = function(url, params = NULL, method = RLIB.GET) {
-
- # Load library here and not inside .doGetUrl() since it is called from inside a try/catch clause, hence if library is missing the error will be ignored.
- library(bitops)
- library(RCurl)
-
- content <- NA_character_
-
- # Wait required time between two requests
- .self$.wait.as.needed()
-
- # Run query
- for (i in seq(.self$.nb.max.tries)) {
- tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method) },
-          error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } )
- if ( ! is.na(content))
- break
- }
-
- return(content)
- })
-}

diff -r fb9c0409d85c -r f86fec07f392 biodb-common.R
--- a/biodb-common.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b"@@ -1,350 +0,0 @@\n-if ( ! exists('BIODB.XML')) {\n-\n-\t###############\n-\t# CACHE MODES #\n-\t###############\n-\n-\tBIODB.CACHE.READ.ONLY <- 'read-only'\n-\tBIODB.CACHE.READ.WRITE <- 'read-write'\n-\tBIODB.CACHE.WRITE.ONLY <- 'write-only'\n-\n-\t#######################\n-\t# ENTRY CONTENT TYPES #\n-\t#######################\n-\n-\tBIODB.HTML <- 'html'\n-\tBIODB.TXT <- 'txt'\n-\tBIODB.XML <- 'xml'\n-\tBIODB.CSV <- 'csv'\n-\tBIODB.DATAFRAME <- 'dataframe'\n-\tBIODB.JSON <- 'json'\n-\n-\t#############\n-\t# DATABASES #\n-\t#############\n-\n-\tBIODB.CHEBI <- 'chebi'\n-\tBIODB.KEGG <- 'kegg'\n-\tBIODB.PUBCHEMCOMP <- 'pubchemcomp' # Compound database\n-\tBIODB.PUBCHEMSUB <- 'pubchemsub' # Substance database\n-\tBIODB.HMDB <- 'hmdb'\n-\tBIODB.CHEMSPIDER <- 'chemspider'\n-\tBIODB.ENZYME <- 'enzyme'\n-\tBIODB.LIPIDMAPS <- 'lipidmaps'\n-\tBIODB.MIRBASE <- 'mirbase'\n-\tBIODB.NCBIGENE <- 'ncbigene'\n-\tBIODB.NCBICCDS <- 'ncbiccds'\n-\tBIODB.UNIPROT <- 'uniprot'\n-\tBIODB.MASSBANK <- 'massbank'\n-\tBIODB.MASSFILEDB <- 'massfiledb'\n-\tBIODB.PEAKFOREST <- 'peakforest'\n-\n-\tBIODB.DATABASES <- c(BIODB.CHEBI, BIODB.KEGG, BIODB.PUBCHEMCOMP, BIODB.PUBCHEMSUB, BIODB.HMDB, BIODB.CHEMSPIDER, BIODB.ENZYME, BIODB.LIPIDMAPS, BIODB.MIRBASE, BIODB.NCBIGENE, BIODB.NCBICCDS, BIODB.UNIPROT, BIODB.MASSBANK, BIODB.MASSFILEDB, BIODB.PEAKFOREST)\n-\n-\t##########\n-\t# FIELDS #\n-\t##########\n-\n-\tBIODB.ACCESSION <- 'accession'\n-\tBIODB.DESCRIPTION <- 'description'\n-\tBIODB.PROTEIN.DESCRIPTION <- 'protdesc'\n-\tBIODB.NAME <- 'name'\n-\tBIODB.COMP.IUPAC.NAME.ALLOWED <- 'comp.iupac.name.allowed'\n-\tBIODB.COMP.IUPAC.NAME.TRAD <- 'comp.iupac.name.trad'\n-\tBIODB.COMP.IUPAC.NAME.SYST <- 'comp.iupac.name.syst'\n-\tBIODB.COMP.IUPAC.NAME.PREF <- 'comp.iupac.name.pref'\n-\tBIODB.COMP.IUPAC.NAME.CAS <- 'comp.iupac.name.cas'\n-\tBIODB.FULLNAMES <- 'fullnames'\n-\tBIODB.SYNONYMS <- 'synonyms'\n-\tBIODB.SYMBOL <- 'symbol'\n-\tBIODB.GENE.SYMBOLS <- 'genesymbols'\n-\tBIODB.CHEBI.ID <- 'chebiid'\n-\tBIODB.LIPIDMAPS.ID <- 'lipidmapsid'\n-\tBIODB.KEGG.ID <- 'keggid'\n-\tBIODB.HMDB.ID <- 'hmdbid'\n-\tBIODB.ENZYME.ID <- 'enzymeid'\n-\tBIODB.NCBI.CCDS.ID <- 'ncbiccdsid'\n-\tBIODB.NCBI.GENE.ID <- 'ncbigeneid'\n-\tBIODB.PUBCHEMCOMP.ID <- 'pubchemcompid'\n-\tBIODB.PUBCHEMSUB.ID <- 'pubchemsubid'\n-\tBIODB.CHEMSPIDER.ID <- 'chemspiderid'\n-\tBIODB.UNIPROT.ID <- 'uniprotid'\n-\tBIODB.CAS.ID <- 'casid'\n-\tBIODB.PEAKFOREST.ID <- 'peakforestid'\n-\tBIODB.SMILES <- 'smiles'\n-\tBIODB.INCHI <- 'inchi'\n-\tBIODB.INCHIKEY <- 'inchikey'\n-\tBIODB.MSDEV <- 'msdev'\n-\tBIODB.MSDEVTYPE <- 'msdevtype'\n-\tBIODB.MSTYPE <- 'mstype'\n-\tBIODB.MSMODE <- 'msmode'\n-\tBIODB.MSPRECMZ <- 'msprecmz' # numeric\n-\tBIODB.MSPRECANNOT <- 'msprecannot'\n-\tBIODB.FORMULA <- 'formula'\n-\tBIODB.SUPER.CLASS <- 'superclass'\n-\tBIODB.MASS <- 'mass'\n-\tBIODB.AVERAGE.MASS <- 'averagemass'\n-\tBIODB.MONOISOTOPIC.MASS <- 'monoisotopicmass'\n-\tBIODB.SEQUENCE <- 'sequence'\n-\tBIODB.LOCATION <- 'location'\n-\tBIODB.LENGTH <- 'length'\n-\tBIODB.NB.PEAKS <- 'nbpeaks'\n-\tBIODB.PEAKS <- 'peaks'\n-\tBIODB.COMPOUNDS <- 'compounds'\n- BIODB.NB.COMPOUNDS <- 'nbcompounds'\n-\tBIODB.COMPOUND.ID <- 'compoundid'\n-\tBIODB.COMPOUND.MASS <- 'compoundmass'\n-\tBIODB.COMPOUND.COMP <- 'compoundcomp'\n-\tBIODB.CHROM.COL <- 'chromcol' # Chromatographic column\n-\tBIODB.CHROM.COL.RT <- 'chromcolrt' # Retention time measured on chromatographic column\n-\tBIODB.ID <- 'id'\n-\tBIODB.TITLE <- 'title'\n-\tBIODB.PEAK.MZ <- 'mz'\n-\tBIODB.PEAK.RT <- 'rt'\n-\tBIODB.PEAK.MZEXP <- 'mzexp'\n-\tBIODB.PEAK.MZTHEO <- 'mztheo'\n-\tBIODB.PEAK.FORMULA <- 'formula'\n-\tBIODB.PEAK.FORMULA.COUNT <- 'formula.count'\n-\tBIODB.PEAK.COMP <- 'peakcomp' # Peak composition\n-\tBIODB.PEAK.ATTR <- 'peakattr' # Peak attribution\n-\tBIODB.PEAK.MASS <- 'mass'\n-#\tBIODB.PEAK.ATTR <- 'attr'\n-\tBIODB.PEAK.ERROR.PPM <- 'error.ppm'\n-\tBIODB.PEAK.INTENSITY <- 'intensity'\n-\tBIODB.PEAK.RELATIVE.INTENSITY <- 'relati"..b' = switch(content.type,\n-\t\t\t xml = paste0(\'http://www.hmdb.ca/metabolites/\', accession, \'.xml\'),\n-\t\t\t html = paste0(\'http://www.hmdb.ca/metabolites/\', accession),\n-\t\t\t NULL),\n-\t\t\tkegg = switch(content.type,\n-\t\t\t txt = paste0(\'http://rest.kegg.jp/get/\', accession),\n-\t\t\t html = paste0(\'http://www.genome.jp/dbget-bin/www_bget?cpd:\', accession),\n-\t\t\t NULL),\n-\t\t\tlipidmaps = if (content.type == BIODB.CSV) paste0(\'http://www.lipidmaps.org/data/LMSDRecord.php?Mode=File&LMID=\', accession, \'&OutputType=CSV&OutputQuote=No\') else NULL, \n-\t\t\tmassbank = if (content.type == BIODB.TXT) paste0((if (is.na(base.url)) BIODB.MASSBANK.EU.WS.URL else base.url), \'getRecordInfo?ids=\', paste(accession, collapse = \',\')) else NULL,\n-\t\t\tmirbase = if (content.type == BIODB.HTML) paste0(\'http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=\', accession) else NULL,\n-\t\t\tpubchemcomp = switch(content.type,\n-\t\t\t xml = paste0(\'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/\', paste(accession, collapse = \',\'), \'/XML\'),\n-\t\t\t html = paste0(\'http://pubchem.ncbi.nlm.nih.gov/compound/\', accession),\n-\t\t\t NULL),\n-\t\t\tpubchemsub = switch(content.type,\n-\t\t\t xml = paste0(\'https://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/sid/\', paste(accession, collapse = \',\'), \'/XML\'),\n-\t\t\t html = paste0(\'http://pubchem.ncbi.nlm.nih.gov/substance/\', accession),\n-\t\t\t NULL),\n-\t\t\tncbigene = if (content.type == BIODB.XML) paste0(\'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=\', accession, \'&rettype=xml&retmode=text\') else NULL,\n-\t\t\tncbiccds = if (content.type == BIODB.HTML) paste0(\'https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&GO=MainBrowse&DATA=\', accession),\n-\t\t\tuniprot = if (content.type == BIODB.XML) paste0(\'http://www.uniprot.org/uniprot/\', accession, \'.xml\'),\n-\t\t\tpeakforest = switch(content.type,\n-\t\t\t html= paste0(\'https://peakforest.org/home?PFs=\',accession),\n-\t\t\t json= paste0(\'https://peakforest-alpha.inra.fr/rest/spectra/lcms/ids/\',paste(accession,sep=\',\'),\'?token=\',token),\n-\t\t\t \n-\t\t\tNULL\n-\t\t\t)\n-\t\t)\n-\t\treturn(url)\n-\t}\n-\n-\tget.entry.url <- function(class, accession, content.type = BIODB.HTML, max.length = 0, base.url = NA_character_, token = NA_character_) {\n-\n-\t\tif (length(accession) == 0)\n-\t\t\treturn(NULL)\n-\n-\t\tfull.url <- .do.get.entry.url(class, accession, content.type = content.type, base.url = base.url, token = token)\n-\t\tif (max.length == 0 || nchar(full.url) <= max.length)\n-\t\t\treturn(if (max.length == 0) full.url else list(url = full.url, n = length(accession)))\n-\n-\t\t# Find max size URL\n-\t\ta <- 1\n-\t\tb <- length(accession)\n-\t\twhile (a < b) {\n-\t\t\tm <- as.integer((a + b) / 2)\n-\t\t\turl <- .do.get.entry.url(class, accession[1:m], content.type = content.type, base.url = base.url, token = token)\n-\t\t\tif (nchar(url) <= max.length && m != a)\n-\t\t\t\ta <- m\n-\t\t\telse\n-\t\t\t\tb <- m\n-\t\t}\n-\t\turl <- .do.get.entry.url(class, accession[1:a], content.type = content.type, base.url = base.url, token = token)\n-\t\t\t\n-\t\treturn(list( url = url, n = a))\n-\t}\n-\n-\t#################\n-\t# PRINT MESSAGE #\n-\t#################\n-\n-\tBIODB.DEBUG <- 1\n-\tBIODB.LEVEL.NAMES <- c(\'DEBUG\')\n-\n-\t.print.msg <- function(msg, level = BIODB.DEBUG, class = NA_character_) {\n-\t\tcat(paste0(BIODB.LEVEL.NAMES[[level]], if (is.na(class)) \'\' else paste0(", ", class), ": ", msg, "\\n"), file = stderr())\n-\t}\n-\n-\t#####################\n-\t# BIODB GET ENV VAR #\n-\t#####################\n-\n-\t.biodb.get.env.var <- function(v) {\n-\n-\t\t# Get all env vars\n-\t\tenv <- Sys.getenv()\n-\n-\t\t# Make env var name\n-\t\tenv.var <- paste(c(\'BIODB\', toupper(v)), collapse = \'_\')\n-\n-\t\t# Look if this env var exists\n-\t\tif (env.var %in% names(env))\n-\t\t\treturn(env[[env.var]])\n-\n-\t\treturn(NA_character_)\n-\t}\n-}\n'

diff -r fb9c0409d85c -r f86fec07f392 build.xml
--- a/build.xml Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

b'@@ -1,396 +0,0 @@\n-<project name="w4m.tool.lcmsmatching" default="all">\n-\n-\t<dirname property="this.dir" file="${ant.file.w4m.tool.lcmsmatching}"/>\n-\t<property name="conda.dir" value="${user.home}/w4m-conda"/>\n-\t<property name="planemo.dir" value="${user.home}/.planemo"/>\n-\n-\t\n-\n-\t\n-\n-\t<property name="TIMESTAMP" value="true"/>\n-\t<property name="VERSION" value="true"/>\n-\t<property name="DIST.TEST" value="true"/>\n-\t<property name="TOOL.PREFIX" value="$__tool_directory__/"/>\n-\t<property name="PKG.PREFIX" value="w4m-tool-lcmsmatching"/>\n-\n-\t\n-\n-\t\n-\t<property name="version" value="2.1.3"/>\n-\t<condition property="version.suffix" value="" else="-${version}">\n-\t\t<isfalse value="${VERSION}"/>\n-\t</condition>\n-\n-\t\n-\t<property name="dist.dir" value="dist"/>\n-\t<property name="dist.code.dir" value="${dist.dir}/code"/>\n-\t<property name="dist.test.dir" value="${dist.dir}/test"/>\n-\n-\t\n-\t<property name="tool.xml" value="lcmsmatching.xml"/>\n-\t<property name="orig.tool.xml" value="${tool.xml}"/>\n-\t<property name="dest.tool.xml" value="${dist.code.dir}/${tool.xml}"/>\n-\n-\t\n-\t<tstamp/>\n-\t<property name="timestamp" value="${DSTAMP}-${TSTAMP}"/>\n-\t<condition property="timestamp.suffix" value="" else="-${timestamp}">\n-\t\t<isfalse value="${TIMESTAMP}"/>\n-\t</condition>\n-\n-\t\n-\t<property name="pkg.ext" value="tar.gz"/>\n-\t<property name="pkg.name" value="${PKG.PREFIX}${version.suffix}${timestamp.suffix}"/>\n-\t<property name="pkg.path" value="${dist.dir}/${pkg.name}.${pkg.ext}"/>\n-\n-\t\n-\n-\t<target name="all"/>\n-\n-\t\n-\n-\t<target name="dist" depends="dist.code,dist.tar,dist.test"/>\n-\n-\t\n-\n-\t<target name="dist.w4m" depends="w4m.code,dist.tar,dist.test"/>\n-\n-\t\n-\n-\t<target name="dist.test" if="${DIST.TEST}">\n-\n-\t\t\n-\t\t<delete dir="${dist.test.dir}"/>\n-\t\t<mkdir dir="${dist.test.dir}"/>\n-\n-\t\t\n-\t\t<untar src="${pkg.path}" dest="${dist.test.dir}" compression="gzip"/>\n-\t\t<chmod file="${dist.test.dir}/search-mz" perm="u+x"/> \n-\n-\t\t\n-\t\t<exec executable="${dist.test.dir}/search-mz" failonerror="true">\n-\t\t\t<arg value="-d"/>\n-\t\t\t<arg value="file"/>\n-\t\t\t<arg value="--url"/>\n-\t\t\t<arg value="test/filedb.tsv"/>\n-\t\t\t<arg value="-m"/>\n-\t\t\t<arg value="pos"/>\n-\t\t\t<arg value="-i"/>\n-\t\t\t<arg value="test/mzrt-input.tsv"/>\n-\t\t\t<arg value="-o"/>\n-\t\t\t<arg value="mzrt-output.tsv"/>\n-\t\t</exec>\n-\n-\t</target>\n-\n-\t\n-\n-\t<target name="w4m.code" depends="dist.code">\n-\n-\t\t\n-\t\t<copy file="${orig.tool.xml}" tofile="${dest.tool.xml}"/>\n-\n-\t\t\n-\t\t<copy todir="${dist.code.dir}">\n-\t\t\t<fileset dir="." includes="*.py"/>\n-\t\t</copy>\n-\t</target>\n-\n-\t\n-\n-\t<target name="dist.code">\n-\n-\t\t\n-\t\t<delete dir="${dist.code.dir}"/>\n-\t\t<mkdir dir="${dist.code.dir}"/>\n-\n-\t\t\n-\t\t<copy todir="${dist.code.dir}">\n-\t\t\t<fileset dir="." includes="search-mz,*.R"/>\n-\t\t</copy>\n-\n-\t</target>\n-\n-\t\n-\n-\t<target name="dist.tar">\n-\n-\t\t\n-\t\t<tar destfile="${pkg.path}" compression="gzip">\n-\n-\t\t\t\n-\t\t\t<tarfileset dir="${dist.code.dir}" filemode="755">\n-\t\t\t\t<include name="search-mz"/>\n-\t\t\t</tarfileset>\n-\n-\t\t\t\n-\n-\t<target name="planemo.testtoolshed.test" depends="planemo.env">\n-\t\t<exec executable="planemo" dir="${dist.code.dir}" failonerror="true">\n-\t\t\t<arg value="shed_test"/>\n-\t\t\t<arg value="--shed_target"/>\n-\t\t\t<arg value="testtoolshed"/>\n-\t\t\t<arg value="--install_galaxy"/>\n-\t\t\t<arg value="--galaxy_branch"/>\n-\t\t\t<arg value="release_16.01"/>\n-\t\t</exec>\n-\t</target>\n-\n-\t\n-\n-\t<target name="planemo.toolshed.create" depends="planemo.env">\n-\t\t<exec executable="planemo" dir="${dist.code.dir}" failonerror="true">\n-\t\t\t<arg value="shed_create"/>\n-\t\t\t<arg value="--shed_target"/>\n-\t\t\t<arg value="toolshed"/>\n-\t\t</exec>\n-\t</target>\n-\n-\t\n-\n-\t<target name="planemo.toolshed.diff" depends="planemo.env">\n-\t\t<exec executable="planemo" dir="${dist.code.dir}" failonerror="true">\n-\t\t\t<arg value="shed_diff"/>\n-\t\t\t<arg value="--shed_target"/>\n-\t\t\t<arg value="toolshed"/>\n-\t\t</exec>\n-\t</target>\n-\n-\t\n-\n-\t<target name="planemo.toolshed.update" depends="planemo.env">\n-\t\t<exec executable="planemo" dir="${dist.code.dir}" failonerror="true">\n-\t\t\t<arg value="shed_update"/>\n-\t\t\t<arg value="--check_diff"/>\n-\t\t\t<arg value="--shed_target"/>\n-\t\t\t<arg value="toolshed"/>\n-\t\t</exec>\n-\t</target>\n-\n-\t\n-\n-\t<target name="planemo.env" depends="w4m.code">\n-\t\t<chmod file="${dist.code.dir}/search-mz" perm="u+x"/>\n-\t\t<ant dir="test" target="input.files"/>\n-\t\t<mkdir dir="${dist.code.dir}/test-data"/>\n-\t\t<copy todir="${dist.code.dir}/test-data">\n-\t\t\t<fileset dir="test" includes="filedb.tsv"/>\n-\t\t\t<fileset dir="test" includes="mz-input-small.tsv"/>\n-\t\t\t<fileset dir="test/res" includes="filedb-small-mz-match-*"/>\n-\t\t</copy>\n-\t\t<copy file="shed.yml" tofile="${dist.code.dir}/.shed.yml"/>\n-\t</target>\n-\n-\t\n-\n-\t\n-\n-\t\n-\t<target name="update.w4m.vm" depends="clean,dist">\n-\n-\t\t<property name="w4m.login" value="galaxy@w4m"/>\n-\t\t<property name="tool.path" value="galaxy-pfem/tools/metabolomics/annotation/lcmsmatching"/>\n-\n-\t\t\n-\t\t<exec executable="ssh" failonerror="true">\n-\t\t\t<arg value="${w4m.login}"/>\n-\t\t\t<arg value="/sbin/service galaxy stop"/>\n-\t\t</exec>\n-\n-\t\t\n-\t\t<exec executable="ssh" failonerror="true">\n-\t\t\t<arg value="${w4m.login}"/>\n-\t\t\t<arg value="rm -rf ${tool.path}"/>\n-\t\t</exec>\n-\n-\t\t\n-\t\t<exec executable="ssh" failonerror="true">\n-\t\t\t<arg value="${w4m.login}"/>\n-\t\t\t<arg value="rm -f ${PKG.PREFIX}-*.${pkg.ext}"/>\n-\t\t</exec>\n-\n-\t\t\n-\t\t<exec executable="scp" failonerror="true">\n-\t\t\t<arg value="${dist.dir}/${pkg.name}.${pkg.ext}"/>\n-\t\t\t<arg value="${w4m.login}:."/>\n-\t\t</exec>\n-\n-\t\t\n-\t\t<exec executable="ssh" failonerror="true">\n-\t\t\t<arg value="${w4m.login}"/>\n-\t\t\t<arg value="mkdir -p ${tool.path}"/>\n-\t\t</exec>\n-\n-\t\t\n-\t\t<exec executable="ssh" failonerror="true">\n-\t\t\t<arg value="${w4m.login}"/>\n-\t\t\t<arg value="tar -xzf ${pkg.name}.${pkg.ext} -C ${tool.path}"/>\n-\t\t</exec>\n-\n-\t\t\n-\t\t<exec executable="ssh" failonerror="true">\n-\t\t\t<arg value="${w4m.login}"/>\n-\t\t\t<arg value="/sbin/service galaxy start"/>\n-\t\t</exec>\n-\n-\t</target>\n-\n-</project>\n'

diff -r fb9c0409d85c -r f86fec07f392 dfhlp.R
--- a/dfhlp.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,101 +0,0 @@
-if ( ! exists('remove.na.rows')) { # Do not load again if already loaded
-
- source('strhlp.R')
-
- #################
- # RENAME COLUMN #
- #################
-
- rename.col <- function(df, cur, new) {
-
- for (k in seq(cur)) {
- i <- which(cur[k] == colnames(df))
- if (length(i) == 1)
- colnames(df)[i] <- new[k]
- }
-
- return(df)
- }
-
- ##################
- # REMOVE NA ROWS #
- ##################
-
- remove.na.rows <- function(df) {
- na.rows <- apply(is.na(df), MARGIN = 1, all)
- return(df[ ! na.rows, , drop = FALSE])
- }
-
- ######################
- # MOVE COLUMNS FIRST #
- ######################
-
- df.move.col.first <- function(df, cols) {
- not.cols <- setdiff(names(df), cols)
- df[c(cols, not.cols)]
- }
-
- #####################
- # MOVE COLUMNS LAST #
- #####################
-
- df.move.col.last <- function(df, cols) {
- not.cols <- setdiff(names(df), cols)
- df[c(not.cols, cols)]
- }
-
- #################
- # READ CSV FILE #
- #################
-
- # Read CSV file and return a data.frame.
- # file      The path to the CSV file.
- # header                If TRUE, use first line as header line.
- # check.names           If TRUE, correct header (column) names in the data frame, by replacing non-ASCII characters by dot.
- # stringsAsFactors      If TRUE, replace string values by factors.
- # trim.header           If TRUE, remove whitespaces at beginning and of header titles.
- # trim.values           If TRUE, remove whitespaces at beginning and of string values.
- # remove.na.rows        If TRUE, remove all lines that contain only NA values.
- df.read.csv <- function(file, header = TRUE, remove.na.rows = TRUE, check.names = TRUE, stringsAsFactors = TRUE, trim.header = FALSE, trim.values = FALSE) {
-
- # Call built-in read.csv()
- df <- read.csv(file, header = header, check.names = check.names, stringsAsFactors = stringsAsFactors)
-
- # Clean data frame
- df <- df.clean(df, trim.colnames = trim.header, trim.values = trim.values, remove.na.rows = remove.na.rows)
-
- return(df)
- }
-
- ##################
- # WRITE TSV FILE #
- ##################
-
- df.write.tsv <- function(df, file, row.names = FALSE, col.names = TRUE) {
- write.table(df, file = file, row.names = row.names, col.names = col.names, sep = "\t")
- }
-
- ####################
- # CLEAN DATA FRAME #
- ####################
-
- df.clean <- function(df, trim.colnames = FALSE, trim.values = FALSE, remove.na.rows = FALSE) {
-
- # Remove NA lines
- if (remove.na.rows)
- df <- remove.na.rows(df)
-
- # Trim header
- if (trim.colnames)
- colnames(df) <- trim(colnames(df))
-
- # Trim values
- if (trim.values)
- for (c in 1:ncol(df))
- if (typeof(df[[c]]) == 'character')
- df[[c]] <- trim(df[[c]])
-
- return(df)
- }
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 excelhlp.R
--- a/excelhlp.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,83 +0,0 @@
-if ( ! exists('read.excel')) { # Do not load again if already loaded
-
- source('strhlp.R')
- source('dfhlp.R')
-
- ###############
- # GET NB ROWS #
- ###############
-
- get.nbrows <- function(file, tab) {
-
- library(rJava)
- library(xlsxjars)
- library(xlsx, quietly = TRUE)
-
- df <- read.xlsx(file, tab)
- na_rows <- apply(is.na(df), MARGIN = 1, FUN = all) # look for rows that contain only NA values.
- last_row <- tail(which(! na_rows), n = 1)
- return(last_row)
- }
-
- ##############
- # READ EXCEL #
- ##############
-
- # Read Excel xlsx file
- # file                  The path to the Excel file.
- # sheet
- # start.row
- # end.row
- # header                If TRUE, use first line as header line.
- # check.names           If TRUE, correct header (column) names in the data frame, by replacing non-ASCII characters by dot.
- # stringsAsFactors      If TRUE, replace string values by factors.
- # trim.header           If TRUE, remove whitespaces at beginning and of header titles.
- # trim.values           If TRUE, remove whitespaces at beginning and of string values.
- # remove.na.rows        If TRUE, remove all lines that contain only NA values.
- read.excel <- function(file, sheet, start.row = NULL, end.row = NULL, header = TRUE, remove.na.rows = TRUE, check.names = TRUE, stringsAsFactors = TRUE, trim.header = FALSE, trim.values = FALSE, col.index = NULL) {
-
- library(rJava)
- library(xlsxjars)
- library(xlsx, quietly = TRUE)
-
- # Check that start row and end row exist
- if ( ! is.null(start.row) || ! is.null(end.row)) {
- nb_rows <- get.nbrows(file, sheet)
- if ( ! is.null(start.row) && start.row > nb_rows)
- return(NULL)
- if ( ! is.null(end.row) && end.row > nb_rows)
- return(NULL)
- }
-
- # Call xlsx package
- df <- read.xlsx(file, sheet, startRow = start.row, endRow = end.row, header = header, check.names = check.names, stringsAsFactors = stringsAsFactors, colIndex = col.index)
-
- # Remove column default names if header was set to false
- if ( ! header)
- colnames(df) <- NULL
-
- # Clean data frame
- df <- df.clean(df, trim.colnames = trim.header, trim.values = trim.values, remove.na.rows = remove.na.rows)
-
- return(df)
- }
-
- #######################
- # CHECK IF TAB EXISTS #
- #######################
-
- tab.exists <- function(file, tab) {
-
- if (is.null(file) || is.na(file) || is.null(tab) || is.na(tab))
- return(FALSE)
-
- library(rJava)
- library(xlsxjars)
- library(xlsx, quietly = TRUE)
-
- wb <- loadWorkbook(file)
- sheets <- getSheets(wb)
- return(tab %in% names(sheets))
- }
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 fshlp.R
--- a/fshlp.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,20 +0,0 @@
-if ( ! exists('extname')) { # Do not load again if already loaded
-
- source('strhlp.R')
-
- ###########
- # EXTNAME #
- ###########
-
- extname <- function(path) {
- return(sub('^.*\\.([^.]*)$', '\\1', path, perl = TRUE))
- }
-
- ##############
- # REMOVE EXT #
- ##############
-
- remove.ext <- function(path) {
- return(sub('\\.[^.]*$', '', path))
- }
-}

diff -r fb9c0409d85c -r f86fec07f392 htmlhlp.R
--- a/htmlhlp.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,114 +0,0 @@
-if ( ! exists('HtmlWriter')) { # Do not load again if already loaded
-
- library(methods)
-
- #####################
- # CLASS DECLARATION #
- #####################
-
- HtmlWriter <- setRefClass("HtmlWriter", fields = list(.file = "character", .auto.indent = "numeric"))
-
-
- ###############
- # CONSTRUCTOR #
- ###############
-
- HtmlWriter$methods( initialize = function(file = NA_character_, auto.indent = TRUE, ...) {
-
- .file <<- file
- .auto.indent <<- if (auto.indent) 0 else NA_integer_
-
- # Create empty file
- cat('', file = .self$.file, append = FALSE)
-
- callSuper(...) # calls super-class initializer with remaining parameters
- })
-
- #########
- # WRITE #
- #########
-
- HtmlWriter$methods( write = function(text, indent = NA_integer_, newline = TRUE, escape = FALSE) {
-
- # Compute indentation
- if (is.na(indent))
- indent <- if (is.na(.self$.auto.indent)) 0 else .self$.auto.indent
-
- cat(rep("\t", indent), text, if (newline) "\n" else "", sep = '', file = .self$.file, append = TRUE)
- })
-
- #############
- # WRITE TAG #
- #############
-
- HtmlWriter$methods( writeTag = function(tag, attr = NA_character_, text = NA_character_, indent = NA_integer_, newline = TRUE) {
-
- if (is.na(text)) {
- attributes <- if (is.na(attr)) '' else paste0(' ', paste(vapply(names(attr), function(a) paste0(a, '="', attr[[a]], '"'), FUN.VALUE=''), collapse = ' '))
- .self$write(paste0("<", tag, attributes, "/>"), indent = indent, newline = newline, escape = FALSE)
- }
- else {
- .self$writeBegTag(tag, attr = attr, indent = indent, newline = FALSE)
- .self$write(text, escape = TRUE , indent = 0, newline = FALSE)
- .self$writeEndTag(tag, indent = 0, newline = newline)
- }
- })
-
- ###################
- # WRITE BEGIN TAG #
- ###################
-
- HtmlWriter$methods( writeBegTag = function(tag, attr = NA_character_, indent = NA_integer_, newline = TRUE) {
-
- # Write opening tag
- attributes <- if (is.na(attr)) '' else paste0(' ', paste(vapply(names(attr), function(a) paste0(a, '="', attr[[a]], '"'), FUN.VALUE=''), collapse = ' '))
- .self$write(paste0("<", tag, attributes, ">"), indent = indent, newline = newline, escape = FALSE)
-
- # Increment auto-indent
- if ( ! is.na(.self$.auto.indent))
- .auto.indent <<- .self$.auto.indent + 1
- })
-
- #################
- # WRITE END TAG #
- #################
-
- HtmlWriter$methods( writeEndTag = function(tag, indent = NA_integer_, newline = TRUE) {
-
- # Decrement auto-indent
- if ( ! is.na(.self$.auto.indent))
- .auto.indent <<- .self$.auto.indent - 1
-
- # Write closing tag
- .self$write(paste0("</", tag, ">"), indent = indent, newline = newline, escape = FALSE)
- })
-
- ###############
- # WRITE TABLE #
- ###############
-
- HtmlWriter$methods( writeTable = function(x, indent = NA_integer_, newline = TRUE) {
-
- .self$writeBegTag('table', indent = indent, newline = newline)
-
- # Write table header
- if ( ! is.null(colnames(x))) {
- .self$writeBegTag('tr', indent = indent + 1, newline = newline)
- for (field in colnames(x))
- .self$writeTag('th', text = field, indent = indent + 2, newline = newline)
- .self$writeEndTag('tr', indent = indent + 1, newline = newline)
- }
-
- # Write values
- if (nrow(x) > 0 && ncol(x) > 0)
- for (i in 1:nrow(x)) {
- .self$writeBegTag('tr', indent = indent + 1, newline = newline)
- for (j in 1:ncol(x))
- .self$writeTag('td', text = x[i, j], indent = indent + 2, newline = newline)
- .self$writeEndTag('tr', indent = indent + 1, newline = newline)
- }
- .self$writeEndTag('table', indent = indent, newline = newline)
- })
-
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 lcmsmatching
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lcmsmatching Fri Feb 22 16:04:22 2019 -0500

[

b'@@ -0,0 +1,866 @@\n+#!/usr/bin/env Rscript\n+# vi: ft=r fdm=marker\n+\n+args <- commandArgs(trailingOnly = F)\n+script.path <- sub("--file=","",args[grep("--file=",args)])\n+\n+library(getopt)\n+library(methods)\n+library(biodb)\n+\n+# HTML Writer {{{1\n+################################################################\n+\n+HtmlWriter <- methods::setRefClass("HtmlWriter", fields = list(.con = "ANY", .auto.indent = "numeric"))\n+\n+# Constructor {{{2\n+################################################################\n+\n+HtmlWriter$methods( initialize = function(auto.indent = TRUE, ...) {\n+\n+\t.auto.indent <<- if (auto.indent) 0 else NA_integer_\n+\t.con <<- NULL\n+\n+\tcallSuper(...) # calls super-class initializer with remaining parameters\n+})\n+\n+# Open {{{2\n+################################################################################\n+\n+HtmlWriter$methods( file.opn = function(file) {\n+\t.con <<- file(file, open = "w")\n+})\n+\n+# Close {{{2\n+################################################################################\n+\n+HtmlWriter$methods( file.close = function() {\n+\tclose(.self$.con)\n+})\n+\n+# Write {{{2\n+################################################################\n+\n+HtmlWriter$methods( write = function(text, indent = NA_integer_, newline = TRUE, escape = FALSE) {\n+\n+\t# Compute indentation\n+\tif (is.na(indent))\n+\t\tindent <- if (is.na(.self$.auto.indent)) 0 else .self$.auto.indent\n+\n+\tcat(rep("\\t", indent), text, if (newline) "\\n" else "", sep = \'\', file = .self$.con)\n+})\n+\n+# Write tag {{{2\n+################################################################\n+\n+HtmlWriter$methods( writeTag = function(tag, attr = NA_character_, text = NA_character_, indent = NA_integer_, newline = TRUE) {\n+\n+\tif (is.na(text)) {\n+\t\tattributes <- if (is.na(attr)) \'\' else paste0(\' \', paste(vapply(names(attr), function(a) paste0(a, \'="\', attr[[a]], \'"\'), FUN.VALUE=\'\'), collapse = \' \'))\n+\t\t.self$write(paste0("<", tag, attributes, "/>"), indent = indent, newline = newline, escape = FALSE)\n+\t}\n+\telse {\n+\t\t.self$writeBegTag(tag, attr = attr, indent = indent, newline = FALSE)\n+\t\t.self$write(text, escape = TRUE , indent = 0, newline = FALSE)\n+\t\t.self$writeEndTag(tag, indent = 0, newline = newline)\n+\t}\n+})\n+\n+# Write begin tag {{{2\n+###################################################################################\n+\n+HtmlWriter$methods( writeBegTag = function(tag, attr = NA_character_, indent = NA_integer_, newline = TRUE) {\n+\n+\t# Write opening tag\n+\tattributes <- if (is.na(attr)) \'\' else paste0(\' \', paste(vapply(names(attr), function(a) paste0(a, \'="\', attr[[a]], \'"\'), FUN.VALUE=\'\'), collapse = \' \'))\n+\t.self$write(paste0("<", tag, attributes, ">"), indent = indent, newline = newline, escape = FALSE)\n+\n+\t# Increment auto-indent\n+\tif ( ! is.na(.self$.auto.indent))\n+\t\t.auto.indent <<- .self$.auto.indent + 1\n+})\n+\n+# Write end tag {{{2\n+################################################################\n+\n+HtmlWriter$methods( writeEndTag = function(tag, indent = NA_integer_, newline = TRUE) {\n+\n+\t# Decrement auto-indent\n+\tif ( ! is.na(.self$.auto.indent))\n+\t\t.auto.indent <<- .self$.auto.indent - 1\n+\n+\t# Write closing tag\n+\t.self$write(paste0("</", tag, ">"), indent = indent, newline = newline, escape = FALSE)\n+})\n+\n+# Write table {{{2\n+################################################################\n+\n+HtmlWriter$methods( writeTable = function(x, indent = NA_integer_, newline = TRUE) {\n+\n+\t.self$writeBegTag(\'table\', indent = indent, newline = newline)\n+\n+\t# Write table header\n+\tif ( ! is.null(colnames(x))) {\n+\t\t.self$writeBegTag(\'tr\', indent = indent + 1, newline = newline)\n+\t\tfor (field in colnames(x))\n+\t\t\t.self$writeTag(\'th\', text = field, indent = indent + 2, newline = newline)\n+\t\t.self$writeEndTag(\'tr\', indent = indent + 1, newline = newline)\n+\t}\n+\n+\t# Write values\n+\tif (nrow(x) > 0 && ncol(x) > 0)\n+\t\tfor (i in 1:nrow(x)) {\n+\t\t\t.self$writeBegTag(\'tr\', indent = indent + 1, newline = newline)\n+\t\t\tfor (j in 1:ncol(x))\n+\t\t\t\t.self$writeTag(\'td\', text = (if (j == 1 && is.na(x[i, '..b') && ! all(is.na(chrom.cols))\n+\n+\t# Load input file\n+\tinput <- load.input.file(input.file, col.names = input.colnames)\n+\n+\t# Check input column names\n+\tinput.colnames <- check.input.colnames(input.colnames, input = input, needs.rt = rt.search)\n+\n+\t# Restrict input to essential columns\n+\tinput <- restrict.input.cols(input, col.names = input.colnames, same.cols = same.cols, keep.rt = rt.search)\n+\n+\t# Update RT search flag\n+\trt.search <- rt.search && \'rt\' %in% names(input.colnames) && input.colnames$rt %in% names(input)\n+\n+\t# Run MZ/RT matching\n+\trt.unit <- if (rt.search) (if (rt.unit == MSDB.RTUNIT.SEC) \'s\' else \'min\') else NA_character_\n+\trt.tol <- if (rt.search && ! is.null(rt.tol)) rt.tol else NA_real_\n+\trt.tol.exp <- if (rt.search && ! is.null(rt.tol.exp)) rt.tol.exp else NA_real_\n+\n+\t# Force type for input columns\n+\tinput[[input.colnames$mz]] <- as.numeric(input[[input.colnames$mz]])\n+\tif (rt.search)\n+\t\tinput[[input.colnames$rt]] <- as.numeric(input[[input.colnames$rt]])\n+\n+\tpeaks <- conn$searchMsPeaks(input.df = input, mz.shift = mz.shift, mz.tol = mz.tol, mz.tol.unit = mz.tol.unit, ms.mode = ms.mode, chrom.col.ids = chrom.cols, rt.unit = rt.unit, rt.tol = rt.tol, rt.tol.exp = rt.tol.exp, precursor = precursor, precursor.rt.tol = precursor.rt.tol, insert.input.values = TRUE, compute = FALSE, prefix.on.result.cols = \'lcmsmatching.\', input.df.colnames = c(mz = input.colnames$mz, rt = input.colnames$rt), match.rt = rt.search)\n+\n+\t# Build outputs\n+\tmain <- NULL\n+\tif ( ! is.null(peaks))\n+\t\tmain <- conn$collapseResultsDataFrame(results.df = peaks, sep = results.sep, mz.col = input.colnames$mz, rt.col = input.colnames$rt)\n+\n+\t# Write main output\n+\tif ( ! is.null(main.output))\n+\t\twrite.table(main, file = main.output, row.names = FALSE, sep = "\\t", quote = FALSE)\n+\n+\t# Write peaks output\n+\tif ( ! is.null(peaks.output))\n+\t\twrite.table(peaks, file = peaks.output, row.names = FALSE, sep = "\\t", quote = FALSE)\n+\n+\t# Write HTML output\n+\tif ( ! is.null(html.output))\n+\t\toutput.html(biodb = conn$getBiodb(), peaks = peaks, file = html.output)\n+}\n+\n+# MAIN {{{1\n+################################################################\n+\n+# Read command line arguments\n+opt <- read.args()\n+\n+# Set error function for debugging\n+if (is.null(opt$debug)) {\n+\toptions(error = function() { quit(status = 1) }, warn = 0 )\n+}\n+\n+# Create Biodb instance\n+biodb <- create.biodb.instance(quiet = opt$quiet, ms.modes = opt[[\'db-ms-modes\']])\n+\n+# Get database connector\n+conn <- get.db.conn(biodb, db.name = opt$database, url = opt$url, token = opt[[\'db-token\']], fields = opt[[\'db-fields\']], pos.prec = opt[[\'pos-prec\']], neg.prec = opt[[\'neg-prec\']])\n+\n+# Print columns\n+if ( ! is.null(opt[[\'list-cols\']])) {\n+\tprint.chrom.cols(conn, opt[[\'output-file\']])\n+\tquit(status = 0)\n+}\n+\n+# MS mode\n+ms.mode <- (if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG)\n+\n+# Set RT unit\n+rt.search <- ! is.null(opt$rtcol) || ! is.null(opt[[\'all-cols\']])\n+if ( rt.search && opt$database == \'file\' && ! conn$hasField(\'chrom.rt.unit\'))\n+\tconn$addField(\'chrom.rt.unit\', (if (opt[[\'db-rt-unit\']] == MSDB.RTUNIT.SEC) \'s\' else \'min\'))\n+\n+# Select chromatographic columns\n+chrom.cols <- get.chrom.cols(conn, check.cols = ! is.null(opt[[\'check-cols\']]), chrom.cols = opt$rtcol, all.cols = ! is.null(opt[[\'all-cols\']]))\n+\n+# Search\n+search(conn, input.file = opt[[\'input-file\']], input.colnames = opt[[\'input-col-names\']], same.cols = ! is.null(opt[[\'same-cols\']]), mz.tol = opt$mzprec, mz.tol.unit = opt$mztolunit, mz.shift = - opt$mzshift,\n+\t ms.mode = ms.mode,\n+\t chrom.cols = chrom.cols, rt.unit = opt$rtunit, rt.tol = (if (is.null(opt$rttol)) opt$rttolx else opt$rttol), rt.tol.exp = opt$rttoly,\n+\t results.sep = opt[[\'molids-sep\']], precursor = ! is.null(opt[[\'precursor-match\']]), precursor.rt.tol = opt[[\'precursor-rt-tol\']],\n+\t main.output = opt[[\'output-file\']], peaks.output = opt[[\'peak-output-file\']], html.output = opt[[\'html-output-file\']])\n+\n+# Terminate Biodb instance\n+biodb$terminate()\n'

diff -r fb9c0409d85c -r f86fec07f392 lcmsmatching.xml
--- a/lcmsmatching.xml Wed Apr 19 10:00:05 2017 -0400
+++ b/lcmsmatching.xml Fri Feb 22 16:04:22 2019 -0500

[

b'@@ -1,43 +1,38 @@\n-<tool id="lcmsmatching" name="LC/MS matching" version="3.3.1" profile="16.01">\n+\n+<tool id="lcmsmatching" name="LCMS matching" version="4.0.2" profile="18.05">\n \n-\t<description>Annotation of MS peaks using matching on a spectra database.</description>\n+\t<description>Annotation of LCMS peaks using matching on a in-house spectra database or on PeakForest spectra database.</description>\n \n+\t\n+\t\n \t<requirements>\n-\t\t\n-\t\t<requirement type="package" version="7.0">readline</requirement> \n-\t\t<requirement type="package" version="1.20.0">r-getopt</requirement>\n-\t\t<requirement type="package" version="1.0.0">r-stringr</requirement>\n-\t\t<requirement type="package" version="1.8.3">r-plyr</requirement>\n-\t\t<requirement type="package" version="3.98">r-xml</requirement>\n-\t\t<requirement type="package" version="1.0_6">r-bitops</requirement>\n-\t\t<requirement type="package" version="1.95">r-rcurl</requirement>\n-\t\t<requirement type="package" version="1.1">r-jsonlite</requirement>\n+\t\t<requirement type="package" version="1.2.2">r-biodb</requirement>\n+\t\t<requirement type="package" version="1.20.2">r-getopt</requirement>\n+\t\t<requirement type="package" version="0.2_15">r-codetools</requirement>  \n+\t\t\n \t</requirements>\n \n-\t<code file="list-chrom-cols.py"/>\n-\t<code file="list-file-cols.py"/>\n-\t<code file="list-ms-mode-values.py"/>\n-\n-\t\n+\t\n+\t\n \n \t<command>\n \t\t<![CDATA[\n \t\t## @@@BEGIN_CHEETAH@@@\n-\t\t$__tool_directory__/search-mz\n+\t\t$__tool_directory__/lcmsmatching\n+\n+\t\t--log-to-stdout\n \n \t\t## Input file\n \t\t-i "$mzrtinput"\n-\t\t--input-col-names "mz=$inputmzfield,rt=$inputrtfield"\n+\t\t--input-col-names "$inputfields"\n \t\t--rtunit "$inputrtunit"\n \n \t\t## Database\n \t\t#if $db.dbtype == "inhouse"\n \t\t\t-d file\n-\t\t\t--db-fields "mztheo=$db.dbmzreffield,chromcolrt=$db.dbchromcolrtfield,compoundid=$db.dbspectrumidfield,chromcol=$db.dbchromcolfield,msmode=$db.dbmsmodefield,peakattr=$db.dbpeakattrfield,pubchemcompid=$db.dbpubchemcompidfield,chebiid=$db.dbchebiidfield,hmdbid=$db.dbhmdbidfield,keggid=$db.dbkeggidfield"\n-\t\t\t--db-ms-modes "pos=$db.dbmsposmode,neg=$db.dbmsnegmode"\n-\t\t\t--db-rt-unit $db.dbrtunit\n+\t\t\t--db-fields "$db.dbfields"\n+\t\t\t--db-ms-modes "$db.dbmsmodes"\n+\t\t\t--db-rt-unit "$db.dbrtunit"\n \t\t#end if\n \t\t#if $db.dbtype == "peakforest"\n \t\t\t-d peakforest\n@@ -46,7 +41,7 @@\n \t\t\t--url "$db.dburl"\n \n \t\t## M/Z matching\n-\t\t-m $mzmode -p $mzprec -s $mzshift\n+\t\t-m $mzmode -p $mzprec -s $mzshift -u $mztolunit\n \n \t\t## Precursor matching\n \t\t#if $prec.match == "true"\n@@ -72,14 +67,13 @@\n \t\t## @@@END_CHEETAH@@@\n \t]]></command>\n \n-\t\n+\t\n+\t\n \n \t<inputs>\n \n-\t\t\n-\n+\t\t\n+\t\t\n \t\t<conditional name="db">\n \n \t\t\t<param name="dbtype" label="Database" type="select" refresh_on_change="true">\n@@ -87,25 +81,17 @@\n \t\t\t\t<option value="peakforest">Peakforest</option>\n \t\t\t</param>\n \n+\t\t\t\n+\t\t\t\n \t\t\t<when value="inhouse">\n \t\t\t\t\n \t\t\t\t<param name="dburl" label="Database file" type="data" format="tabular,tsv" refresh_on_change="'..b' | | match are concatenated using the provided separator |\n+| | | character. |\n +-------------+--------------------------------------+--------------------------------------------------------+\n-| Peak list | lcmsmatching_peaks_{input_file_name} | Contains all matched database peaks. |\n+| Peak list | lcmsmatching_{input_file_name}_peaks | Contains the same data as the input dataset, with |\n+| | | match result included on each row. If more than one |\n+| | | match is found for a row, then the row is duplicated. |\n+| | | Hence there is either no match for a row, or one |\n+| | | single match. |\n +-------------+--------------------------------------+--------------------------------------------------------+\n-| HTML output | lcmsmatching_{input_file_name}.html | Contains the two tables on one page. |\n+| HTML output | lcmsmatching_{input_file_name}.html | Contains the same table as *Peak list* but in HTML |\n+| | | format and with links to external databases if columns |\n+| | | for PubChem Compound, ChEBI, HMDB Metabolites or KEGG |\n+| | | Compounds are provided. |\n +-------------+--------------------------------------+--------------------------------------------------------+\n \n-The **main** output is identical to the input file, to which is added an *msmatching* column. This column contains a list of IDs of the compounds that have been matched for this couple of (m/z, rt) values.\n-\n-The **peak list** output contains all database peaks that have been matched, for each (m/z, rt) input couple. Thus for each (m/z, rt) couple, there will be zero, one or more matched peaks output. The columns output are *mz*, *rt*, *id*, *mztheo*, *col*, *colrt*, *attribution* and *composition*, where *id* is the compound ID, *mztheo* is the theoretical mass of the fragment, *col* is the matched column and *colrt* is the retention time measured on the column for the reference compound.\n-\n-The **HTML** output contains the peak table with links toward HMDB, KEGG, ChEBI and PubChem public databases, when IDs are available.\n+The match results are output as new columns appended to the columns provided inside the MZ/RT input dataset, and prefixed with "lcmsmatching.".\n \n =====\n About\n@@ -455,15 +484,30 @@\n .. class:: infomark\n \n **Please cite**\n-\tR Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org\n+\tR Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org.\n+\n+==============\n+Changelog/News\n+==============\n+\n+**Version 4.0.0 - 02/01/2019**\n+\n+- NEW: Use of R biodb library. Connection to databases and matching have been moved to biodb library, which is maintained separately at http://github.com/pkrog/biodb.\n \n \n \t</help>\n \n-\t\n+\t\n+\t\n \n-\t<citations/>\n+\t<citations>\n+\t\t<citation type="bibtex">@unpublished{FGiacomoni2017,\n+\t\t\ttitle = {PeakForest [Internet], a spectral data portal for Metabolomics community - storing, curating and annotation services for metabolic profiles of biological matrix.},\n+\t\t\tauthor = {Franck Giacomoni, Nils Paulhe},\n+\t\t\tinstitution = {INRA / MetaboHUB},\n+\t\t\tyear = {2017},\n+\t\t\tnote = {Unpublished paper, available from: https://peakforest.org/.}\n+\t\t\t}</citation>\n+\t</citations>\n \n </tool>\n'

diff -r fb9c0409d85c -r f86fec07f392 list-chrom-cols.py
--- a/list-chrom-cols.py Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,62 +0,0 @@
-#!/usr/bin/env python
-# vi: fdm=marker
-
-import argparse
-import subprocess
-import re
-import urllib2
-import json
-import csv
-
-# Get chrom cols {{{1
-################################################################
-
-def get_chrom_cols(dbtype, dburl, dbtoken = None, col_field = 'chromcol'):
-
-    cols = []
-
-    if dbtype == 'peakforest':
-        url = dburl + ( '' if dburl[-1] == '/' else '/' ) + 'metadata/lc/list-code-columns'
-        if dbtoken is not None:
-            url += '?token=' + dbtoken
-        result = urllib2.urlopen(url).read()
-        v = json.JSONDecoder().decode(result)
-        i = 0
-        for colid, coldesc in v.iteritems():
-            s = coldesc['name'] + ' - ' + coldesc['constructor'] + ' - L' + str(coldesc['length']) + ' - diam. ' + str(coldesc['diameter']) + ' - part. ' + str(coldesc['particule_size']) + ' - flow ' + str(coldesc['flow_rate'])
-            cols.append( (s , colid, i == 0) )
-            ++i
-
-    elif dbtype == 'inhouse':
-
-        # Get all column names from file
-        with open(dburl if isinstance(dburl, str) else dburl.get_file_name(), 'r') as dbfile:
-            reader = csv.reader(dbfile, delimiter = "\t", quotechar='"')
-            header = reader.next()
-            if col_field in header:
-                i = header.index(col_field)
-                allcols = []
-                for row in reader:
-                    col = row[i]
-                    if col not in allcols:
-                        allcols.append(col)
-                for i, c in enumerate(allcols):
-                    cols.append( (c, c, i == 0) )
-
-    return cols
-
-# Main {{{1
-################################################################
-
-if __name__ == '__main__':
-
-    # Parse command line arguments
-    parser = argparse.ArgumentParser(description='Script for getting chromatographic columns of an RMSDB database for Galaxy tool lcmsmatching.')
-    parser.add_argument('-d', help = 'Database type',       dest = 'dbtype',    required = True)
-    parser.add_argument('-u', help = 'Database URL',        dest = 'dburl',     required = True)
-    parser.add_argument('-t', help = 'Database token',      dest = 'dbtoken',   required = False)
-    parser.add_argument('-f', help = 'Chromatogrphic column field name',     dest = 'col_field',  required = False)
-    args = parser.parse_args()
-    args_dict = vars(args)
-
-    print(get_chrom_cols(**args_dict))

diff -r fb9c0409d85c -r f86fec07f392 list-file-cols.py
--- a/list-file-cols.py Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,61 +0,0 @@
-#!/usr/bin/env python
-# vi: fdm=marker
-
-import csv
-import re
-import argparse
-
-# Get file cols {{{1
-################################################################
-
-def get_file_cols(file, preferred):
-
- cols = []
-
- with open(file if isinstance(file, str) else file.get_file_name(), 'r') as f:
-
- # Read file header
- reader = csv.reader(f, delimiter = "\t", quotechar='"')
- header = reader.next()
-
- preferred = preferred.split(',')
-
- # Determine default value
- perfect_matches = []
- partial_matches = []
- for p in preferred:
- for c in header:
- if c == p:
- perfect_matches.append(c) # Perfect match !
- elif re.match(p, c):
- partial_matches.append(c) # Keep this partial match in case we find no perfect match
-
- ordered_cols = perfect_matches + partial_matches
- for c in header:
- if not c in ordered_cols:
- ordered_cols.append(c)
- ordered_cols.append('NA')
-
- default = 0
- if len(perfect_matches) + len(partial_matches) == 0:
- default = len(ordered_cols) - 1
-
- # Build list of cols
- for i, c in enumerate(ordered_cols):
- cols.append( (c, c, i == default) )
-
- return cols
-
-# Main {{{1
-################################################################
-
-if __name__ == '__main__':
-
-    # Parse command line arguments
-    parser = argparse.ArgumentParser(description='Script for getting column names in a csv file.')
-    parser.add_argument('-f', help = 'CSV File (separator must be TAB)',       dest = 'file',    required = True)
-    parser.add_argument('-p', help = 'List (comma separated values) of preferred column names for default one.',        dest = 'preferred',     required = True)
-    args = parser.parse_args()
-    args_dict = vars(args)
-
-    print(get_file_cols(**args_dict))

diff -r fb9c0409d85c -r f86fec07f392 list-ms-mode-values.py
--- a/list-ms-mode-values.py Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,60 +0,0 @@
-#!/usr/bin/env python
-# vi: fdm=marker
-
-import csv
-import re
-import argparse
-
-# Get MS mode values {{{1
-################################################################
-
-def get_ms_mode_value(file, col, preferred):
-
- modes = []
- cols = []
- preferred = preferred.split(',')
-
- with open(file if isinstance(file, str) else file.get_file_name(), 'r') as f:
-
- # Read file header
- reader = csv.reader(f, delimiter = "\t", quotechar='"')
- header = reader.next()
- try:
- index = header.index(col)
- for row in reader:
- v = row[index]
- if v not in modes:
- modes.append(v)
-
- # Find default value
- default = 0
- for p in preferred:
- for i, m in enumerate(modes):
- if m == p:
- default = i
- break
- if default != 0:
- break
-
- # Build list of cols
- for i, c in enumerate(modes):
- cols.append( (c, c, i == default) )
- except:
- pass
-
- return cols
-
-# Main {{{1
-################################################################
-
-if __name__ == '__main__':
-
-    # Parse command line arguments
-    parser = argparse.ArgumentParser(description='Script for getting column names in a csv file.')
-    parser.add_argument('-f', help = 'CSV File (separator must be TAB)',       dest = 'file',    required = True)
-    parser.add_argument('-c', help = 'MS mode column name.',        dest = 'col',     required = True)
-    parser.add_argument('-p', help = 'List (comma separated values) of preferred column names for default one.',        dest = 'preferred',     required = True)
-    args = parser.parse_args()
-    args_dict = vars(args)
-
-    print(get_ms_mode_value(**args_dict))

diff -r fb9c0409d85c -r f86fec07f392 msdb-common.R
--- a/msdb-common.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,212 +0,0 @@
-if ( ! exists('.parse_chrom_col_desc')) { # Do not load again if already loaded
-
- library('stringr')
- source('strhlp.R', chdir = TRUE)
- source('biodb-common.R', chdir = TRUE)
-
- #############
- # CONSTANTS #
- #############
-
- # Field tags
- MSDB.TAG.MZ <- 'mz'
- MSDB.TAG.MZEXP <- 'mzexp'
- MSDB.TAG.MZTHEO <- 'mztheo'
- MSDB.TAG.RT <- 'rt'
- MSDB.TAG.MODE <- 'msmode'
- MSDB.TAG.MOLID <- 'compoundid'
- MSDB.TAG.COL <- 'chromcol'
- MSDB.TAG.COLRT <- 'chromcolrt'
- MSDB.TAG.ATTR <- 'peakattr'
- MSDB.TAG.INT <- 'intensity'
- MSDB.TAG.REL <- 'relative.intensity'
- MSDB.TAG.COMP <- 'peakcomp'
- MSDB.TAG.MOLNAMES <- 'fullnames'
- MSDB.TAG.MOLCOMP <- 'compoundmass'
- MSDB.TAG.MOLMASS <- 'compoundcomp'
- MSDB.TAG.INCHI <- 'inchi'
- MSDB.TAG.INCHIKEY <- 'inchikey'
- MSDB.TAG.PUBCHEM <- 'pubchemcompid'
- MSDB.TAG.CHEBI <- 'chebiid'
- MSDB.TAG.HMDB <- 'hmdbid'
- MSDB.TAG.KEGG <- 'keggid'
-
- # Mode tags
- MSDB.TAG.POS           <- 'neg'
- MSDB.TAG.NEG           <- 'pos'
-
- # Fields containing multiple values
- MSDB.MULTIVAL.FIELDS <- c(MSDB.TAG.MOLNAMES)
- MSDB.MULTIVAL.FIELD.SEP <- ';'
-
- # Authorized mz tolerance unit values
- MSDB.MZTOLUNIT.PPM <- 'ppm'
- MSDB.MZTOLUNIT.PLAIN <- 'plain' # same as mz: mass-to-charge ratio
- MSDB.MZTOLUNIT.VALS <- c(MSDB.MZTOLUNIT.PPM, MSDB.MZTOLUNIT.PLAIN)
-
- # Authorized rt units
- MSDB.RTUNIT.SEC <- 'sec'
- MSDB.RTUNIT.MIN <- 'min'
- MSDB.RTUNIT.VALS <- c(MSDB.RTUNIT.SEC ,MSDB.RTUNIT.MIN)
-
- # Default values
- MSDB.DFT.PREC <- list()
- MSDB.DFT.PREC[[MSDB.TAG.POS]] <- c("[(M+H)]+", "[M+H]+", "[(M+Na)]+", "[M+Na]+", "[(M+K)]+", "[M+K]+")
- MSDB.DFT.PREC[[MSDB.TAG.NEG]] <- c("[(M-H)]-", "[M-H]-", "[(M+Cl)]-", "[M+Cl]-")
- MSDB.DFT.OUTPUT.MULTIVAL.FIELD.SEP <- MSDB.MULTIVAL.FIELD.SEP
- MSDB.DFT.MATCH.FIELDS <- list( molids = 'molid', molnames = 'molnames')
- MSDB.DFT.MATCH.SEP <- ','
- MSDB.DFT.MODES <- list( pos = 'POS', neg = 'NEG')
- MSDB.DFT.MZTOLUNIT <- MSDB.MZTOLUNIT.PPM
-
- ############################
- # GET DEFAULT INPUT FIELDS #
- ############################
-
- msdb.get.dft.input.fields <- function () {
-
- dft.fields <- list()
-
- for(f in c(MSDB.TAG.MZ, MSDB.TAG.RT))
- dft.fields[[f]] <- f
-
- return(dft.fields)
- }
-
- #########################
- # GET DEFAULT DB FIELDS #
- #########################
-
- msdb.get.dft.db.fields <- function () {
-
- dft.fields <- list()
-
- for (f in c(MSDB.TAG.MZTHEO, MSDB.TAG.COLRT, MSDB.TAG.MOLID, MSDB.TAG.COL, MSDB.TAG.MODE, MSDB.TAG.ATTR, MSDB.TAG.COMP, MSDB.TAG.MOLNAMES, MSDB.TAG.MOLCOMP, MSDB.TAG.MOLMASS, MSDB.TAG.INCHI, MSDB.TAG.INCHIKEY, MSDB.TAG.PUBCHEM, MSDB.TAG.CHEBI, MSDB.TAG.HMDB, MSDB.TAG.KEGG))
- dft.fields[[f]] <- f
-
- return(dft.fields)
- }
-
- ##################
- # MAKE DB FIELDS #
- ##################
-
- msdb.make.db.fields <- function(fields) {
-
- # Merge with default fields
- dft.fields <- msdb.get.dft.db.fields()
- absent <- ! names(dft.fields) %in% names(fields)
- if (length(absent) > 0)
- fields <- c(fields, dft.fields[absent])
-
- return(fields)
- }
-
- #########################
- # MAKE INPUT DATA FRAME #
- #########################
-
- msdb.make.input.df <- function(mz, rt = NULL, rtunit = MSDB.RTUNIT.SEC) {
-
- field <- msdb.get.dft.input.fields()
-
- x <- data.frame()
-
- # Set mz
- if (length(mz) > 1)
- x[seq(mz), field[[MSDB.TAG.MZ]]] <- mz
- else if (length(mz) == 1)
- x[1, field[[MSDB.TAG.MZ]]] <- mz
- else
- x[, field[[MSDB.TAG.MZ]]] <- numeric()
-
- # Set rt
- if ( ! is.null(rt)) {
- if (rtunit == MSDB.RTUNIT.MIN)
- rtunit <- rtunit * 60
- if (length(rt) > 1)
- x[seq(rt), field[[MSDB.TAG.RT]]] <- rt
- else if (length(rt) == 1)
- x[1, field[[MSDB.TAG.RT]]] <- rt
- else
- x[, field[[MSDB.TAG.RT]]] <- numeric()
- }
-
- return(x)
- }
-
- ###############################
- # GET EMPTY RESULT DATA FRAME #
- ###############################
-
- .get.empty.result.df <- function(rt = FALSE) {
-
- df <- data.frame(stringsAsFactors = FALSE)
- df[MSDB.TAG.MOLID] <- character()
- df[MSDB.TAG.MOLNAMES] <- character()
- df[MSDB.TAG.MZ] <- numeric()
- df[MSDB.TAG.MZTHEO] <- numeric()
- df[MSDB.TAG.ATTR] <- character()
- df[MSDB.TAG.COMP] <- character()
- if (rt) {
- df[MSDB.TAG.RT] <- numeric()
- df[MSDB.TAG.COL] <- character()
- df[MSDB.TAG.COLRT] <- numeric()
- }
-
- return(df)
- }
-
- ############################
- # PARSE COLUMN DESCRIPTION #
- ############################
-
- .parse_chrom_col_desc <- function(desc) {
-
- # Clean string
- s <- desc
- s <- gsub('\\.+', ' ', s, perl = TRUE) # Replace '.' characters by spaces
- s <- gsub('[*-]', ' ', s, perl = TRUE) # Replace dashes and asterisks by spaces
- s <- gsub('[)(]', '', s, perl = TRUE) # Remove paranthesis
- s <- trim(s)
- s <- tolower(s) # put in lowercase
-
- # Match      2                         3 4                   5 6         7 8                                           9 10        1112         13
- pattern <- "^(uplc|hsf5|hplc|zicphilic)( (c8|c18|150 5 2 1))?( (\\d+)mn)?( (orbitrap|exactive|qtof|shimadzu exactive))?( (\\d+)mn)?( (bis|ter))?( 1)?$"
- g <- str_match(s, pattern)
- if (is.na(g[1, 1]))
- stop(paste0("Impossible to parse column description \"", desc, "\"."))
-
- type <- g[1, 2]
- stationary_phase <- if ( ! is.na(g[1, 4]) && nchar(g[1, 4]) > 0) g[1, 4] else NA_character_
- msdevice <- if ( ! is.na(g[1, 8]) && nchar(g[1, 8]) > 0) g[1, 8] else NA_character_
- time <- if ( ! is.na(g[1,6]) && nchar(g[1, 6]) > 0) as.integer(g[1, 6]) else ( if ( ! is.na(g[1, 10]) && nchar(g[1, 10]) > 0) as.integer(g[1, 10]) else NA_integer_ )
-
- # Correct values
- if ( ! is.na(stationary_phase) && stationary_phase == '150 5 2 1') stationary_phase <- '150*5*2.1'
- if ( ! is.na(msdevice)) msdevice <- gsub(' ', '', msdevice) # remove spaces
-
- return(list( type = type, stationary_phase = stationary_phase, time = time, msdevice = msdevice))
-
- }
-
- #########################
- # NORMALIZE COLUMN NAME #
- #########################
-
- .normalize_column_name <- function(desc) {
-
- lst <- .parse_chrom_col_desc(desc)
-
- v <- c(lst$type)
- if ( ! is.na(lst$stationary_phase))
- v <- c(v, lst$stationary_phase)
- if ( ! is.na(lst$time))
- v <- c(v, paste0(lst$time, "min"))
- if ( ! is.na(lst$msdevice))
- v <- c(v, lst$msdevice)
-
- return(paste(v, collapse = '-'))
- }
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 nethlp.R
--- a/nethlp.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,24 +0,0 @@
-if ( ! exists('extract.address')) {
-
- ###################
- # EXTRACT ADDRESS #
- ###################
-
- extract.address <- function(url) {
-
- addr <- sub('^([0-9A-Za-z.]+).*$', '\\1', url, perl = TRUE)
-
- return(addr)
- }
-
- ################
- # EXTRACT PORT #
- ################
-
- extract.port <- function(url) {
-
- port <- sub('^.*:([0-9]+)$', '\\1', url, perl = TRUE)
-
- return(as.integer(port))
- }
-}

diff -r fb9c0409d85c -r f86fec07f392 search-mz
--- a/search-mz Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,517 +0,0 @@\n-#!/usr/bin/env Rscript\n-# vi: ft=R fdm=marker\n-args <- commandArgs(trailingOnly = F)\n-script.path <- sub("--file=","",args[grep("--file=",args)])\n-library(getopt)\n-source(file.path(dirname(script.path), \'msdb-common.R\'), chdir = TRUE)\n-source(file.path(dirname(script.path), \'MsDbLogger.R\'), chdir = TRUE)\n-source(file.path(dirname(script.path), \'MsDbInputDataFrameStream.R\'), chdir = TRUE)\n-source(file.path(dirname(script.path), \'MsDbOutputDataFrameStream.R\'), chdir = TRUE)\n-source(file.path(dirname(script.path), \'htmlhlp.R\'), chdir = TRUE)\n-source(file.path(dirname(script.path), \'strhlp.R\'), chdir = TRUE)\n-source(file.path(dirname(script.path), \'fshlp.R\'), chdir = TRUE)\n-source(file.path(dirname(script.path), \'biodb-common.R\'), chdir = TRUE)\n-source(file.path(dirname(script.path), \'nethlp.R\'), chdir = TRUE)\n-\n-# Missing paste0() function in R 2.14.1\n-if (as.integer(R.Version()$major) == 2 && as.numeric(R.Version()$minor) < 15)\n-\tpaste0 <- function(...) paste(..., sep = \'\')\n-\n-# Constants {{{1\n-################################################################\n-\n-PROG <- sub(\'^.*/([^/]+)$\', \'\\\\1\', commandArgs()[4], perl = TRUE)\n-USERAGENT <- \'search-mz ; pierrick.roger@gmail.com\'\n-\n-# Authorized database types\n-MSDB.XLS <- \'xls\'\n-MSDB.4TABSQL <- \'4tabsql\'\n-MSDB.FILE <- \'file\'\n-MSDB.PEAKFOREST <- \'peakforest\'\n-MSDB.VALS <- c(MSDB.XLS, MSDB.4TABSQL, MSDB.FILE, MSDB.PEAKFOREST)\n-DB.SRC.FILE <- list ()\n-DB.SRC.FILE[[MSDB.FILE]] <- \'MsFileDb.R\'\n-DB.SRC.FILE[[MSDB.PEAKFOREST]] <- \'MsPeakForestDb.R\'\n-DB.SRC.FILE[[MSDB.XLS]] <- \'MsXlsDb.R\'\n-DB.SRC.FILE[[MSDB.4TABSQL]] <- \'Ms4TabSqlDb.R\'\n-\n-# Authorized mode values\n-POS_MODE <- \'pos\'\n-NEG_MODE <- \'neg\'\n-MSDB.MODE.VALS <- c(POS_MODE, NEG_MODE)\n-\n-# Default \n-MSDB.DFT <- list()\n-MSDB.DFT[[\'mzshift\']] <- 0 # in ppm\n-MSDB.DFT[[\'mzprec\']] <- 5 # in ppm\n-MSDB.DFT[[\'mztolunit\']] <- MSDB.DFT.MZTOLUNIT\n-MSDB.DFT[[\'precursor-rt-tol\']] <- 5\n-MSDB.DFT[[\'molids-sep\']] <- MSDB.DFT.MATCH.SEP\n-MSDB.DFT[[\'db-fields\']] <- concat.kv.list(msdb.get.dft.db.fields())\n-MSDB.DFT[[\'db-ms-modes\']] <- concat.kv.list(MSDB.DFT.MODES)\n-MSDB.DFT[[\'pos-prec\']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.POS]], collapse = \',\')\n-MSDB.DFT[[\'neg-prec\']] <- paste(MSDB.DFT.PREC[[MSDB.TAG.NEG]], collapse = \',\')\n-MSDB.DFT[[\'db-rt-unit\']] <- MSDB.RTUNIT.SEC\n-MSDB.DFT[[\'rtunit\']] <- MSDB.RTUNIT.SEC\n-DEFAULT.ARG.VALUES <- MSDB.DFT\n-DEFAULT.ARG.VALUES[[\'input-col-names\']] <- concat.kv.list(msdb.get.dft.input.fields())\n-\n-# Print help {{{1\n-################################################################\n-\n-print.help <- function() {\n-\n-\tcat("USAGE:\\n")\n-\tprog.mz.match <- paste(PROG, \' -d (\', paste(MSDB.VALS, collapse = \'|\'), \') --url (file|dir|database URL) -i <file> -m (\', paste(MSDB.MODE.VALS, collapse = \'|\'), ") -p <mz precision> -s <mz shift> -u (", paste(MSDB.MZTOLUNIT.VALS, collapse = \'|\'), ") -o <file>", sep = \'\')\n-\tcat("\\t(1) ", prog.mz.match, " ...\\n", sep = \'\')\n-\tcat("\\n")\n-\tcat("\\t(2) ", prog.mz.match, "(--all-cols|-c <cols>) -x <X RT tolerance> -y <Y RT tolerance>", " ...\\n", sep = \'\')\n-\tcat("\\n")\n-\tcat("\\t(3) ", PROG, \' -d (\', paste(MSDB.VALS, collapse = \'|\'), ") --url (file|dir|database URL) --list-cols\\n", sep = \'\')\n-\n-\tcat("\\nDETAILS:\\n")\n-\tcat("Form (1) is for running an MZ match on a database.\\n")\n-\tcat("Form (2) is for running an MZ/RT match on a database.\\n")\n-\tcat("Form (3) is for getting a list of available chromatographic columns in a database.\\n")\n-\n-\tcat("\\nOPTIONS:\\n")\n-\tspec <- matrix(make.getopt.spec(), byrow = TRUE, ncol = 5)\n-\tmax.length.opt.cols <- max(nchar(spec[,1])) + 1\n-\tsections <- list(database = "Database setting", input = "Input file", output = "Output files", mz = "M/Z matching", rt = "RT matching", precursor = "Precursor matching", misc = "Miscellaneous")\n-\tfor (section in names(sections)) {\n-\t\tcat("\\n\\t", sections[[section]], ":\\n", sep = \'\')\n-\t\tspec <- matrix(make.getopt.spec(section), byrow = TRUE, ncol = 5)\n-\t\tfor (i in seq(nrow(spec))) {\n-\t\t\topt <- \'\'\n-\t\t\tif ( ! is.na(spec[i,2]))'..b'ified by a number into a name\n-\tfor (field in names(opt[[\'input-col-names\']])) {\n-\t\tif ( ! opt[[\'input-col-names\']][[field]] %in% colnames(input) && length(grep(\'^[0-9]+$\', opt[[\'input-col-names\']][[field]])) > 0) {\n-\t\t\tcol.index <- as.integer(opt[[\'input-col-names\']][[field]])\n-\t\t\tif (col.index < 1 || col.index > length(colnames(input)))\n-\t\t\t\tstop(paste0("No column n\xc2\xb0", col.index, " for input field ", field, "."))\n-\t\t\topt[[\'input-col-names\']][[field]] <- colnames(input)[[col.index]]\n-\t\t}\n-\t}\n-} else {\n-\tinput <- data.frame()\n-\tinput[[opt[[\'input-col-names\']][[\'mz\']]]] <- double()\n-\tinput[[opt[[\'input-col-names\']][[\'rt\']]]] <- double()\n-}\n-\n-# Check mz column\n-if ( ! opt[[\'input-col-names\']][[\'mz\']] %in% colnames(input))\n-\tstop(paste0(\'No column named "\', opt[[\'input-col-names\']][[\'mz\']], \'" in input file.\'))\n-\n-# Set columns \'all-cols\' specified\n-if ( ! is.null(opt[[\'all-cols\']]))\n-\topt$rtcol <- db$getChromCol()[[\'id\']]\n-\n-# Check chrom columns\n-if ( ! is.null(opt[[\'check-cols\']]) && ! is.null(opt$rtcol)) {\n-\tdbcols <- db$getChromCol()[[\'id\']]\n-\tunknown.cols <- opt$rtcol[ ! opt$rtcol %in% dbcols]\n-\tif (length(unknown.cols) > 0) {\n-\t\tstop(paste0("Unknown chromatographic column", (if (length(unknown.cols) > 1) \'s\' else \'\'), \': \', paste(unknown.cols, collapse = \', \'), ".\\nAllowed chromatographic column names are:\\n", paste(dbcols, collapse = "\\n")))\n-\t}\n-}\n-\n-# Check that an RT column exists when using MZ/RT matching\n-if ( ! is.null(opt$rtcol) && ! opt[[\'input-col-names\']][[\'rt\']] %in% colnames(input))\n-\tstop(paste0("You are running an MZ/RT match run on your input data, but no retention time column named \'", opt[[\'input-col-names\']][[\'rt\']],"\' can be found inside your input file."))\n-\n-# Set output col names\n-output.col.names <- opt[[\'input-col-names\']]\n-\n-# Set streams\n-input.stream <- MsDbInputDataFrameStream$new(df = input, input.fields = opt[[\'input-col-names\']], rtunit = opt[[\'rtunit\']])\n-main.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[[\'same-cols\']]), output.fields = output.col.names, one.line = ! is.null(opt[[\'same-rows\']]), match.sep = opt[[\'molids-sep\']], first.val = ! is.null(opt[[\'first-val\']]), ascii = ! is.null(opt[[\'excel2011comp\']]), nogreek = ! is.null(opt[[\'excel2011comp\']]), noapostrophe = ! is.null(opt[[\'excel2011comp\']]), noplusminus = ! is.null(opt[[\'excel2011comp\']]), rtunit = opt[[\'rtunit\']])\n-peaks.output <- MsDbOutputDataFrameStream$new(keep.unused = ! is.null(opt[[\'same-cols\']]), output.fields = output.col.names, first.val = ! is.null(opt[[\'first-val\']]), ascii = ! is.null(opt[[\'excel2011comp\']]), nogreek = ! is.null(opt[[\'excel2011comp\']]), noapostrophe = ! is.null(opt[[\'excel2011comp\']]), noplusminus = ! is.null(opt[[\'excel2011comp\']]), rtunit = opt[[\'rtunit\']])\n-invisible(db$setInputStream(input.stream))\n-db$addOutputStreams(c(main.output, peaks.output))\n-\n-# Set database units\n-db$setMzTolUnit(opt$mztolunit)\n-if ( ! is.null(opt[[\'db-rt-unit\']]) && opt$database == \'file\')\n-\tdb$setRtUnit(opt[[\'db-rt-unit\']])\n-\n-# Search database\n-mode <- if (opt$mode == POS_MODE) MSDB.TAG.POS else MSDB.TAG.NEG\n-db$searchForMzRtList(mode = mode, shift = opt$mzshift, prec = opt$mzprec, rt.tol = opt$rttol, rt.tol.x = opt$rttolx, rt.tol.y = opt$rttoly, col = opt$rtcol, precursor.match = ! is.null(opt[[\'precursor-match\']]), precursor.rt.tol = opt[[\'precursor-rt-tol\']])\n-\n-# Write output\n-main.output$moveColumnsToBeginning(colnames(input))\n-peaks.output$moveColumnsToBeginning(colnames(input))\n-# TODO Create a class MsDbOutputCsvFileStream\n-df.write.tsv(main.output$getDataFrame(), file = opt[[\'output-file\']], row.names = FALSE)\n-if ( ! is.null(opt[[\'peak-output-file\']]))\n-\t# TODO Create a class MsDbOutputCsvFileStream\n-\tdf.write.tsv(peaks.output$getDataFrame(), file = opt[[\'peak-output-file\']], row.names = FALSE)\n-if ( ! is.null(opt[[\'html-output-file\']]))\n-\t# TODO Create a class MsDbOutputHtmlFileStream\n-\toutput.html(db = db, peaks = peaks.output$getDataFrame(), file = opt[[\'html-output-file\']])\n'

diff -r fb9c0409d85c -r f86fec07f392 search.R
--- a/search.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,64 +0,0 @@
-if ( ! exists('binary.search')) { # Do not load again if already loaded
-
- # Run a binary search on a sorted array.
- # val       The value to search.
- # tab       The array of values, sorted in ascending order.
- # lower     If set to NA, then search for the first value found by the binary search. If set to TRUE, find the value with the lowest index in the array. If set to FALSE, find the value with the highest index in the array.
- # first     The index of the array from which to start (1 by default).
- # last      The index of the array where to stop searching (end of the array by default).
- # Returns the index of the found value, or NA.
- binary.search <- function(val, tab, lower = NA, first = 1L, last = length(tab))
- {
- # Check array & value
- if (is.null(tab))
- stop('Argument "tab" is NULL.')
- if (is.null(val))
- stop('Argument "val" is NULL.')
-
- # Wrong arguments
- if (is.na(val) || last < first || length(tab) == 0)
- return(NA_integer_)
-
- # Find value
- l <- first
- h <- last
- while (h >= l) {
-
- # Take middle point
- m <- (h + l) %/% 2
- # Found value
- if (tab[m] == val) {
- if (is.na(lower))
- return(m)
- if (lower && m > first) {
- for (i in (m-1):first)
- if (tab[i] != val)
- return(i+1)
- }
- else if ( ! lower && m < last)
- for (i in (m+1):last)
- if (tab[i] != val)
- return(i-1)
- return(m)
- }
-
- # Decrease higher bound
- else if (tab[m] > val) h <- m - 1
-
- # Increase lower bound
- else l <- m + 1
- }
-
- # Value not found
- if ( ! is.na(lower)) {
- # Look for lower or higher bound
- if (lower)
- return(if (h < first) NA_integer_ else h)
- else
- return(if (l > last) NA_integer_ else l)
- }
-
- return(NA_integer_)
- }
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 strhlp.R
--- a/strhlp.R Wed Apr 19 10:00:05 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,72 +0,0 @@
-if ( ! exists('trim')) { # Do not load again if already loaded
-
- #######################
- # WHITESPACE TRIMMING #
- #######################
-
- # Trim leading whitespaces
- trim.leading <- function (x)  sub("^\\s+", "", x)
-
- # Trim trailing whitespaces
- trim.trailing <- function (x) sub("\\s+$", "", x)
-
- # Trim leading and trailing whitespaces
- trim <- function (x) gsub("^\\s+|\\s+$", "", x)
-
- #############
- # SPLITTING #
- #############
-
- # s         The string to split.
- # sep       The separator on which to split.
- # trim      Trim whitespaces for the resulting elements.
- # unlist    Unlist the result, So that for a single string (i.e.: s has length 1), it returns a vector of strings instead of a list of vectors of strings.
- # RETURN    A list of strings.
- split.str <- function(s, sep = ',', trim = TRUE, unlist = FALSE) {
- v <- strsplit(s, sep)
- if (trim) v <- lapply(v, trim)
- if (unlist) v <- unlist(v)
- return(v)
- }
-
- ########################
- # SPLIT KEY/VALUE LIST #
- ########################
-
- split.kv.list <- function(s, sep = ',', kvsep = '=') {
-
- # Split
- kvs <- strsplit(strsplit(s, sep)[[1]], kvsep)
-
- # Get keys
- k <- vapply(kvs, function(x) x[[1]], FUN.VALUE = '')
- v <- vapply(kvs, function(x) x[[2]], FUN.VALUE = '')
-
- # Set names
- names(v) <- k
-
- return(v)
- }
-
- #########################
- # CONCAT KEY/VALUE LIST #
- #########################
-
- concat.kv.list <- function(x, sep = ',', kvsep = '=') {
-
- k <- names(x)
-
- s = paste(paste(names(x), x, sep = kvsep), collapse = sep)
-
- return(s)
- }
-
- #################
- # REMOVE QUOTES #
- #################
-
- remove.quotes <- function(s) {
- return(sub('^["\']?([^\'"]*)["\']?$', '\\1', s, perl = TRUE))
- }
-
-} # end of load safe guard

diff -r fb9c0409d85c -r f86fec07f392 test-data/filedb.tsv
--- a/test-data/filedb.tsv Wed Apr 19 10:00:05 2017 -0400
+++ b/test-data/filedb.tsv Fri Feb 22 16:04:22 2019 -0500

[

@@ -19,7 +19,7 @@
A10 "POS" 84.080775 "P9Z5W410 O0" "[(M+H)-(NH3)-(HCOOH)]+" "colzz" 5.69 "J114L6M62O2" 146.10553 "Blablaine"
A10 "POS" 84.080775 "P9Z5W410 O0" "[(M+H)-(NH3)-(HCOOH)]+" "colzz3" 4.54 "J114L6M62O2" 146.10553 "Blablaine"
A10 "POS" 84.080775 "P9Z5W410 O0" "[(M+H)-(NH3)-(HCOOH)]+" "colpp" 0.89 "J114L6M62O2" 146.10553 "Blablaine"
-A10 "POS" 148.116159 "U513P92ZW415 O2" "[(M+H)]+ (13C)" "hcoltt" 0.8 "J114L6M62O2" 146.10553 "Blablaine"
+A10 "POS" 148.116159 "U513P92ZW415 O2" "[(M+H)]+ (13C)" "coltt" 0.8 "J114L6M62O2" 146.10553 "Blablaine"
A10 "POS" 148.116159 "U513P92ZW415 O2" "[(M+H)]+ (13C)" "colzz" 5.69 "J114L6M62O2" 146.10553 "Blablaine"
A10 "POS" 145.097154 "P92Z6W413 O2" "[(M+H)-(H2)]+" "somecol" 8.97 "J114L6M62O2" 146.10553 "Blablaine"
A10 "POS" 148.116159 "U513P92ZW415 O2" "[(M+H)]+ (13C)" "colpp" 0.89 "J114L6M62O2" 146.10553 "Blablaine"

diff -r fb9c0409d85c -r f86fec07f392 test-data/mz-input-small_with_nas.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mz-input-small_with_nas.tsv Fri Feb 22 16:04:22 2019 -0500

@@ -0,0 +1,31 @@
+mz
+80.04959021
+82.04819461
+83.01343941
+84.05585475
+87.05536392
+89.50682004
+90.97680734
+NA
+94.57331384
+97.07602789
+99.5429594
+101.0708987
+102.066292
+NA
+104.0034256
+104.5317528
+105.4460999
+105.7271343
+106.0231437
+106.2399954
+106.5116177
+106.7629705
+NA
+107.2424051
+107.4569385
+107.6884734
+107.9272908
+108.1575604
+109.0777249
+NA

diff -r fb9c0409d85c -r f86fec07f392 test-data/mzrt-input-small.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mzrt-input-small.tsv Fri Feb 22 16:04:22 2019 -0500

@@ -0,0 +1,32 @@
+mz rt
+80.04959021 339.9725632
+82.04819461 1593.540123
+83.01343941 654.9535891
+84.05585475 4.748268943
+87.05536392 3.480291112
+89.50682004 39.62335341
+90.97680734 1598.991244
+92.98092987 46.13716368
+94.57331384 44.37587921
+97.07602789 655.2993307
+99.5429594 42.19533608
+101.0708987 733.3084926
+102.066292 52.02654598
+102.2845376 1601.345355
+104.0034256 48.82052248
+104.5317528 1602.886534
+105.4460999 1611.919675
+105.7271343 1611.835039
+106.0231437 64.49318885
+106.2399954 1612.325904
+106.5116177 1612.17329
+106.7629705 1611.850322
+106.9814579 1611.648399
+107.2424051 1611.574767
+107.4569385 1611.778713
+107.6884734 1611.621904
+107.9272908 1611.145653
+108.1575604 1611.664677
+109.0777249 3.299196943
+110.0599023 3.456417112
+147.112804 48

diff -r fb9c0409d85c -r f86fec07f392 test-data/test_1_main_output.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_1_main_output.tsv Fri Feb 22 16:04:22 2019 -0500

[

@@ -0,0 +1,31 @@
+mz lcmsmatching.accession lcmsmatching.chrom.col.id lcmsmatching.chrom.col.name lcmsmatching.chrom.rt lcmsmatching.compound.id lcmsmatching.formula lcmsmatching.mass.csv.file.id lcmsmatching.molecular.mass lcmsmatching.ms.level lcmsmatching.ms.mode lcmsmatching.msprecmz lcmsmatching.name lcmsmatching.peak.attr lcmsmatching.peak.comp lcmsmatching.peak.mz lcmsmatching.peak.mztheo
+80.04959021 U761.pos.col12.1.32|U761.pos.colpp.0.95|U761.pos.colzz2.4.24|U761.pos.colzz3.4.3|U761.pos.hcoltt.2.5 col12|colpp|colzz2|colzz3|hcoltt col12|colpp|colzz2|colzz3|hcoltt 1.32|0.95|4.24|4.3|2.5 U761 J16L6M62O U761.pos.col12.1.32|U761.pos.colpp.0.95|U761.pos.colzz2.4.24|U761.pos.colzz3.4.3|U761.pos.hcoltt.2.5 122.04801 1 pos 123.055289 Coquelicol;Paquerettol [(M+H)-(NHCO)]+ P9Z5W46 O0 80.049475 80.049475
+82.04819461 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+83.01343941 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+84.05585475 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+87.05536392 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+89.50682004 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+90.97680734 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+92.98092987 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+94.57331384 A10.pos.col12.0.8|A10.pos.colAA.1.58|A10.pos.somecol.8.97 col12|colAA|somecol col12|colAA|somecol 0.8|1.58|8.97 A10 J114L6M62O2 A10.pos.col12.0.8|A10.pos.colAA.1.58|A10.pos.somecol.8.97 146.10553 1 pos NA Blablaine|Blablaine';Blablaine|Blablaine [(M+2H)+(CH3CN)]++ P93Z8W419 O2 94.5733145 94.5733145
+97.07602789 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+99.5429594 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+101.0708987 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+102.066292 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+102.2845376 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+104.0034256 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+104.5317528 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+105.4460999 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+105.7271343 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.0231437 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.2399954 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.5116177 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.7629705 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.9814579 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.2424051 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.4569385 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.6884734 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.9272908 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+108.1575604 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+109.0777249 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+110.0599023 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA

diff -r fb9c0409d85c -r f86fec07f392 test-data/test_1_peaks_output.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_1_peaks_output.html Fri Feb 22 16:04:22 2019 -0500

b'@@ -0,0 +1,723 @@\n+<html>\n+\t<header>\n+\t\t<meta charset="UTF-8"/>\n+\t\t<title>LC/MS matching results</title>\n+\t\t<style>\n+\t\t\ttable, th, td { border-collapse: collapse; }\n+\t\t\ttable, th { border: 1px solid black; }\n+\t\t\ttd { border-left: 1px solid black; border-right: 1px solid black; }\n+\t\t\tth, td { padding: 5px; }\n+\t\t\tth { background-color: LightBlue; }\n+\t\t\ttr:nth-child(even) { background-color: LemonChiffon; }\n+\t\t\ttr:nth-child(odd) { background-color: LightGreen; }\n+\t\t</style>\n+\t</header>\n+\t<body>\n+\t\t<h3>Matched peaks</h3>\n+\t\t<table>\n+\t\t\t<tr>\n+\t\t\t\t<th>mz</th>\n+\t\t\t\t<th>lcmsmatching.accession</th>\n+\t\t\t\t<th>lcmsmatching.chrom.col.id</th>\n+\t\t\t\t<th>lcmsmatching.chrom.col.name</th>\n+\t\t\t\t<th>lcmsmatching.chrom.rt</th>\n+\t\t\t\t<th>lcmsmatching.compound.id</th>\n+\t\t\t\t<th>lcmsmatching.formula</th>\n+\t\t\t\t<th>lcmsmatching.mass.csv.file.id</th>\n+\t\t\t\t<th>lcmsmatching.molecular.mass</th>\n+\t\t\t\t<th>lcmsmatching.ms.level</th>\n+\t\t\t\t<th>lcmsmatching.ms.mode</th>\n+\t\t\t\t<th>lcmsmatching.msprecmz</th>\n+\t\t\t\t<th>lcmsmatching.name</th>\n+\t\t\t\t<th>lcmsmatching.peak.attr</th>\n+\t\t\t\t<th>lcmsmatching.peak.comp</th>\n+\t\t\t\t<th>lcmsmatching.peak.mz</th>\n+\t\t\t\t<th>lcmsmatching.peak.mztheo</th>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>80.04959</td>\n+\t\t\t\t<td>U761.pos.col12.1.32</td>\n+\t\t\t\t<td>col12</td>\n+\t\t\t\t<td>col12</td>\n+\t\t\t\t<td>1.32</td>\n+\t\t\t\t<td>U761</td>\n+\t\t\t\t<td>J16L6M62O</td>\n+\t\t\t\t<td>U761.pos.col12.1.32</td>\n+\t\t\t\t<td>122.048</td>\n+\t\t\t\t<td>1</td>\n+\t\t\t\t<td>pos</td>\n+\t\t\t\t<td>123.0553</td>\n+\t\t\t\t<td>Coquelicol;Paquerettol</td>\n+\t\t\t\t<td>4</td>\n+\t\t\t\t<td>P9Z5W46 O0</td>\n+\t\t\t\t<td>80.04948</td>\n+\t\t\t\t<td>80.04948</td>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>80.04959</td>\n+\t\t\t\t<td>U761.pos.colpp.0.95</td>\n+\t\t\t\t<td>colpp</td>\n+\t\t\t\t<td>colpp</td>\n+\t\t\t\t<td>0.95</td>\n+\t\t\t\t<td>U761</td>\n+\t\t\t\t<td>J16L6M62O</td>\n+\t\t\t\t<td>U761.pos.colpp.0.95</td>\n+\t\t\t\t<td>122.048</td>\n+\t\t\t\t<td>1</td>\n+\t\t\t\t<td>pos</td>\n+\t\t\t\t<td>123.0553</td>\n+\t\t\t\t<td>Coquelicol;Paquerettol</td>\n+\t\t\t\t<td>4</td>\n+\t\t\t\t<td>P9Z5W46 O0</td>\n+\t\t\t\t<td>80.04948</td>\n+\t\t\t\t<td>80.04948</td>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>80.04959</td>\n+\t\t\t\t<td>U761.pos.colzz2.4.24</td>\n+\t\t\t\t<td>colzz2</td>\n+\t\t\t\t<td>colzz2</td>\n+\t\t\t\t<td>4.24</td>\n+\t\t\t\t<td>U761</td>\n+\t\t\t\t<td>J16L6M62O</td>\n+\t\t\t\t<td>U761.pos.colzz2.4.24</td>\n+\t\t\t\t<td>122.048</td>\n+\t\t\t\t<td>1</td>\n+\t\t\t\t<td>pos</td>\n+\t\t\t\t<td>123.0553</td>\n+\t\t\t\t<td>Coquelicol;Paquerettol</td>\n+\t\t\t\t<td>4</td>\n+\t\t\t\t<td>P9Z5W46 O0</td>\n+\t\t\t\t<td>80.04948</td>\n+\t\t\t\t<td>80.04948</td>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>80.04959</td>\n+\t\t\t\t<td>U761.pos.colzz3.4.3</td>\n+\t\t\t\t<td>colzz3</td>\n+\t\t\t\t<td>colzz3</td>\n+\t\t\t\t<td>4.3</td>\n+\t\t\t\t<td>U761</td>\n+\t\t\t\t<td>J16L6M62O</td>\n+\t\t\t\t<td>U761.pos.colzz3.4.3</td>\n+\t\t\t\t<td>122.048</td>\n+\t\t\t\t<td>1</td>\n+\t\t\t\t<td>pos</td>\n+\t\t\t\t<td>123.0553</td>\n+\t\t\t\t<td>Coquelicol;Paquerettol</td>\n+\t\t\t\t<td>4</td>\n+\t\t\t\t<td>P9Z5W46 O0</td>\n+\t\t\t\t<td>80.04948</td>\n+\t\t\t\t<td>80.04948</td>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>80.04959</td>\n+\t\t\t\t<td>U761.pos.hcoltt.2.5</td>\n+\t\t\t\t<td>hcoltt</td>\n+\t\t\t\t<td>hcoltt</td>\n+\t\t\t\t<td>2.5</td>\n+\t\t\t\t<td>U761</td>\n+\t\t\t\t<td>J16L6M62O</td>\n+\t\t\t\t<td>U761.pos.hcoltt.2.5</td>\n+\t\t\t\t<td>122.048</td>\n+\t\t\t\t<td>1</td>\n+\t\t\t\t<td>pos</td>\n+\t\t\t\t<td>123.0553</td>\n+\t\t\t\t<td>Coquelicol;Paquerettol</td>\n+\t\t\t\t<td>4</td>\n+\t\t\t\t<td>P9Z5W46 O0</td>\n+\t\t\t\t<td>80.04948</td>\n+\t\t\t\t<td>80.04948</td>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>82.04819</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>83.01344</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>84.05585</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>87.05536</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t'..b'd/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>102.0663</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>102.2845</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>104.0034</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>104.5318</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>105.4461</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>105.7271</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.0231</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.24</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.5116</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.763</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.9815</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.2424</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.4569</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.6885</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.9273</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>108.1576</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>109.0777</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>110.0599</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t</table>\n+\t</body>\n+</html>\n'

diff -r fb9c0409d85c -r f86fec07f392 test-data/test_1_peaks_output.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_1_peaks_output.tsv Fri Feb 22 16:04:22 2019 -0500

[

@@ -0,0 +1,37 @@
+mz lcmsmatching.accession lcmsmatching.chrom.col.id lcmsmatching.chrom.col.name lcmsmatching.chrom.rt lcmsmatching.compound.id lcmsmatching.formula lcmsmatching.mass.csv.file.id lcmsmatching.molecular.mass lcmsmatching.ms.level lcmsmatching.ms.mode lcmsmatching.msprecmz lcmsmatching.name lcmsmatching.peak.attr lcmsmatching.peak.comp lcmsmatching.peak.mz lcmsmatching.peak.mztheo
+80.04959021 U761.pos.col12.1.32 col12 col12 1.32 U761 J16L6M62O U761.pos.col12.1.32 122.04801 1 pos 123.055289 Coquelicol;Paquerettol [(M+H)-(NHCO)]+ P9Z5W46 O0 80.049475 80.049475
+80.04959021 U761.pos.colpp.0.95 colpp colpp 0.95 U761 J16L6M62O U761.pos.colpp.0.95 122.04801 1 pos 123.055289 Coquelicol;Paquerettol [(M+H)-(NHCO)]+ P9Z5W46 O0 80.049475 80.049475
+80.04959021 U761.pos.colzz2.4.24 colzz2 colzz2 4.24 U761 J16L6M62O U761.pos.colzz2.4.24 122.04801 1 pos 123.055289 Coquelicol;Paquerettol [(M+H)-(NHCO)]+ P9Z5W46 O0 80.049475 80.049475
+80.04959021 U761.pos.colzz3.4.3 colzz3 colzz3 4.3 U761 J16L6M62O U761.pos.colzz3.4.3 122.04801 1 pos 123.055289 Coquelicol;Paquerettol [(M+H)-(NHCO)]+ P9Z5W46 O0 80.049475 80.049475
+80.04959021 U761.pos.hcoltt.2.5 hcoltt hcoltt 2.5 U761 J16L6M62O U761.pos.hcoltt.2.5 122.04801 1 pos 123.055289 Coquelicol;Paquerettol [(M+H)-(NHCO)]+ P9Z5W46 O0 80.049475 80.049475
+82.04819461 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+83.01343941 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+84.05585475 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+87.05536392 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+89.50682004 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+90.97680734 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+92.98092987 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+94.57331384 A10.pos.col12.0.8 col12 col12 0.8 A10 J114L6M62O2 A10.pos.col12.0.8 146.10553 1 pos NA Blablaine [(M+2H)+(CH3CN)]++ P93Z8W419 O2 94.5733145 94.5733145
+94.57331384 A10.pos.colAA.1.58 colAA colAA 1.58 A10 J114L6M62O2 A10.pos.colAA.1.58 146.10553 1 pos NA Blablaine';Blablaine [(M+2H)+(CH3CN)]++ P93Z8W419 O2 94.5733145 94.5733145
+94.57331384 A10.pos.somecol.8.97 somecol somecol 8.97 A10 J114L6M62O2 A10.pos.somecol.8.97 146.10553 1 pos NA Blablaine [(M+2H)+(CH3CN)]++ P93Z8W419 O2 94.5733145 94.5733145
+97.07602789 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+99.5429594 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+101.0708987 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+102.066292 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+102.2845376 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+104.0034256 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+104.5317528 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+105.4460999 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+105.7271343 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.0231437 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.2399954 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.5116177 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.7629705 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.9814579 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.2424051 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.4569385 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.6884734 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.9272908 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+108.1575604 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+109.0777249 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+110.0599023 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA

diff -r fb9c0409d85c -r f86fec07f392 test-data/test_2_main_output.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_2_main_output.tsv Fri Feb 22 16:04:22 2019 -0500

[

@@ -0,0 +1,32 @@
+mz rt lcmsmatching.accession lcmsmatching.chrom.col.id lcmsmatching.chrom.col.name lcmsmatching.chrom.rt lcmsmatching.chrom.rt.unit lcmsmatching.compound.id lcmsmatching.formula lcmsmatching.mass.csv.file.id lcmsmatching.molecular.mass lcmsmatching.ms.level lcmsmatching.ms.mode lcmsmatching.name lcmsmatching.peak.attr lcmsmatching.peak.comp lcmsmatching.peak.mz lcmsmatching.peak.mztheo
+80.04959021 339.9725632 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+82.04819461 1593.540123 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+83.01343941 654.9535891 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+84.05585475 4.748268943 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+87.05536392 3.480291112 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+89.50682004 39.62335341 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+90.97680734 1598.991244 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+92.98092987 46.13716368 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+94.57331384 44.37587921 A10.pos.col12.0.8 col12 col12 0.8 min A10 J114L6M62O2 A10.pos.col12.0.8 146.10553 1 pos Blablaine [(M+2H)+(CH3CN)]++ P93Z8W419 O2 94.5733145 94.5733145
+97.07602789 655.2993307 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+99.5429594 42.19533608 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+101.0708987 733.3084926 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+102.066292 52.02654598 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+102.2845376 1601.345355 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+104.0034256 48.82052248 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+104.5317528 1602.886534 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+105.4460999 1611.919675 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+105.7271343 1611.835039 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.0231437 64.49318885 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.2399954 1612.325904 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.5116177 1612.17329 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.7629705 1611.850322 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.9814579 1611.648399 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.2424051 1611.574767 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.4569385 1611.778713 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.6884734 1611.621904 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.9272908 1611.145653 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+108.1575604 1611.664677 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+109.0777249 3.299196943 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+110.0599023 3.456417112 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+147.112804 48 A10.pos.col12.0.8 col12 col12 0.8 min A10 J114L6M62O2 A10.pos.col12.0.8 146.10553 1 pos Blablaine [(M+H)]+ P92Z6W415 O2 147.112804 147.112804

diff -r fb9c0409d85c -r f86fec07f392 test-data/test_2_peaks_output.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_2_peaks_output.html Fri Feb 22 16:04:22 2019 -0500

b'@@ -0,0 +1,660 @@\n+<html>\n+\t<header>\n+\t\t<meta charset="UTF-8"/>\n+\t\t<title>LC/MS matching results</title>\n+\t\t<style>\n+\t\t\ttable, th, td { border-collapse: collapse; }\n+\t\t\ttable, th { border: 1px solid black; }\n+\t\t\ttd { border-left: 1px solid black; border-right: 1px solid black; }\n+\t\t\tth, td { padding: 5px; }\n+\t\t\tth { background-color: LightBlue; }\n+\t\t\ttr:nth-child(even) { background-color: LemonChiffon; }\n+\t\t\ttr:nth-child(odd) { background-color: LightGreen; }\n+\t\t</style>\n+\t</header>\n+\t<body>\n+\t\t<h3>Matched peaks</h3>\n+\t\t<table>\n+\t\t\t<tr>\n+\t\t\t\t<th>mz</th>\n+\t\t\t\t<th>rt</th>\n+\t\t\t\t<th>lcmsmatching.accession</th>\n+\t\t\t\t<th>lcmsmatching.chrom.col.id</th>\n+\t\t\t\t<th>lcmsmatching.chrom.col.name</th>\n+\t\t\t\t<th>lcmsmatching.chrom.rt</th>\n+\t\t\t\t<th>lcmsmatching.chrom.rt.unit</th>\n+\t\t\t\t<th>lcmsmatching.compound.id</th>\n+\t\t\t\t<th>lcmsmatching.formula</th>\n+\t\t\t\t<th>lcmsmatching.mass.csv.file.id</th>\n+\t\t\t\t<th>lcmsmatching.molecular.mass</th>\n+\t\t\t\t<th>lcmsmatching.ms.level</th>\n+\t\t\t\t<th>lcmsmatching.ms.mode</th>\n+\t\t\t\t<th>lcmsmatching.name</th>\n+\t\t\t\t<th>lcmsmatching.peak.attr</th>\n+\t\t\t\t<th>lcmsmatching.peak.comp</th>\n+\t\t\t\t<th>lcmsmatching.peak.mz</th>\n+\t\t\t\t<th>lcmsmatching.peak.mztheo</th>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>80.04959</td>\n+\t\t\t\t<td>339.9726</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>82.04819</td>\n+\t\t\t\t<td>1593.54</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>83.01344</td>\n+\t\t\t\t<td>654.9536</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>84.05585</td>\n+\t\t\t\t<td>4.748269</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>87.05536</td>\n+\t\t\t\t<td>3.480291</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>89.50682</td>\n+\t\t\t\t<td>39.62335</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>90.97681</td>\n+\t\t\t\t<td>1598.991</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>92.98093</td>\n+\t\t\t\t<td>46.13716</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>94.57331</td>\n+\t\t\t\t<td>44.37588</td>\n+\t\t\t\t<td>A10.pos.col12.0.8</td>\n+\t\t\t\t<td>col12</td>\n+\t\t\t\t<td>col12</td>\n+\t\t\t\t<td>0.8</td>\n+\t\t\t\t<td>min</td>\n+\t\t\t\t<td>A10</td>\n+\t\t\t\t<td>J114L6M62O2</td>\n+\t\t\t\t<td>A10.pos.col12.0.8</td>\n+\t\t\t\t<td>146.1055</td>\n+\t\t\t\t<td>1</td>\n+\t\t\t\t<td>pos</td>\n+\t\t\t\t<td>Blablaine</td>\n+\t\t\t\t<td>19</td>\n+\t\t\t\t<td>P93Z8W419 O2</td>\n+\t\t\t\t<td>94.57331</td>\n+\t\t\t\t<td>94.57331</td>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>97.07603</td>\n+\t\t\t\t<td>655.2993</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>99.54296</td>\n+\t\t\t\t<td>42.19534</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td'..b'\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>105.4461</td>\n+\t\t\t\t<td>1611.92</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>105.7271</td>\n+\t\t\t\t<td>1611.835</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.0231</td>\n+\t\t\t\t<td>64.49319</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.24</td>\n+\t\t\t\t<td>1612.326</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.5116</td>\n+\t\t\t\t<td>1612.173</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.763</td>\n+\t\t\t\t<td>1611.85</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.9815</td>\n+\t\t\t\t<td>1611.648</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.2424</td>\n+\t\t\t\t<td>1611.575</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.4569</td>\n+\t\t\t\t<td>1611.779</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.6885</td>\n+\t\t\t\t<td>1611.622</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.9273</td>\n+\t\t\t\t<td>1611.146</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>108.1576</td>\n+\t\t\t\t<td>1611.665</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>109.0777</td>\n+\t\t\t\t<td>3.299197</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>110.0599</td>\n+\t\t\t\t<td>3.456417</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>147.1128</td>\n+\t\t\t\t<td>48</td>\n+\t\t\t\t<td>A10.pos.col12.0.8</td>\n+\t\t\t\t<td>col12</td>\n+\t\t\t\t<td>col12</td>\n+\t\t\t\t<td>0.8</td>\n+\t\t\t\t<td>min</td>\n+\t\t\t\t<td>A10</td>\n+\t\t\t\t<td>J114L6M62O2</td>\n+\t\t\t\t<td>A10.pos.col12.0.8</td>\n+\t\t\t\t<td>146.1055</td>\n+\t\t\t\t<td>1</td>\n+\t\t\t\t<td>pos</td>\n+\t\t\t\t<td>Blablaine</td>\n+\t\t\t\t<td>34</td>\n+\t\t\t\t<td>P92Z6W415 O2</td>\n+\t\t\t\t<td>147.1128</td>\n+\t\t\t\t<td>147.1128</td>\n+\t\t\t</tr>\n+\t\t</table>\n+\t</body>\n+</html>\n'

diff -r fb9c0409d85c -r f86fec07f392 test-data/test_2_peaks_output.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_2_peaks_output.tsv Fri Feb 22 16:04:22 2019 -0500

[

diff -r fb9c0409d85c -r f86fec07f392 test-data/test_3_main_output.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_3_main_output.tsv Fri Feb 22 16:04:22 2019 -0500

[

@@ -0,0 +1,32 @@
+mz rt lcmsmatching.accession lcmsmatching.chrom.col.id lcmsmatching.chrom.col.name lcmsmatching.chrom.rt lcmsmatching.chrom.rt.unit lcmsmatching.compound.id lcmsmatching.formula lcmsmatching.mass.csv.file.id lcmsmatching.molecular.mass lcmsmatching.ms.level lcmsmatching.ms.mode lcmsmatching.msprecmz lcmsmatching.name lcmsmatching.peak.attr lcmsmatching.peak.comp lcmsmatching.peak.mz lcmsmatching.peak.mztheo
+80.04959021 339.9725632 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+82.04819461 1593.540123 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+83.01343941 654.9535891 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+84.05585475 4.748268943 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+87.05536392 3.480291112 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+89.50682004 39.62335341 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+90.97680734 1598.991244 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+92.98092987 46.13716368 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+94.57331384 44.37587921 A10.pos.col12.0.8 col12 col12 0.8 min A10 J114L6M62O2 A10.pos.col12.0.8 146.10553 1 pos 147.112804 Blablaine [(M+2H)+(CH3CN)]++ P93Z8W419 O2 94.5733145 94.5733145
+97.07602789 655.2993307 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+99.5429594 42.19533608 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+101.0708987 733.3084926 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+102.066292 52.02654598 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+102.2845376 1601.345355 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+104.0034256 48.82052248 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+104.5317528 1602.886534 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+105.4460999 1611.919675 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+105.7271343 1611.835039 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.0231437 64.49318885 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.2399954 1612.325904 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.5116177 1612.17329 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.7629705 1611.850322 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.9814579 1611.648399 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.2424051 1611.574767 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.4569385 1611.778713 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.6884734 1611.621904 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.9272908 1611.145653 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+108.1575604 1611.664677 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+109.0777249 3.299196943 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+110.0599023 3.456417112 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+147.112804 48 A10.pos.col12.0.8 col12 col12 0.8 min A10 J114L6M62O2 A10.pos.col12.0.8 146.10553 1 pos 147.112804 Blablaine [(M+H)]+ P92Z6W415 O2 147.112804 147.112804

diff -r fb9c0409d85c -r f86fec07f392 test-data/test_3_peaks_output.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_3_peaks_output.html Fri Feb 22 16:04:22 2019 -0500

b'@@ -0,0 +1,692 @@\n+<html>\n+\t<header>\n+\t\t<meta charset="UTF-8"/>\n+\t\t<title>LC/MS matching results</title>\n+\t\t<style>\n+\t\t\ttable, th, td { border-collapse: collapse; }\n+\t\t\ttable, th { border: 1px solid black; }\n+\t\t\ttd { border-left: 1px solid black; border-right: 1px solid black; }\n+\t\t\tth, td { padding: 5px; }\n+\t\t\tth { background-color: LightBlue; }\n+\t\t\ttr:nth-child(even) { background-color: LemonChiffon; }\n+\t\t\ttr:nth-child(odd) { background-color: LightGreen; }\n+\t\t</style>\n+\t</header>\n+\t<body>\n+\t\t<h3>Matched peaks</h3>\n+\t\t<table>\n+\t\t\t<tr>\n+\t\t\t\t<th>mz</th>\n+\t\t\t\t<th>rt</th>\n+\t\t\t\t<th>lcmsmatching.accession</th>\n+\t\t\t\t<th>lcmsmatching.chrom.col.id</th>\n+\t\t\t\t<th>lcmsmatching.chrom.col.name</th>\n+\t\t\t\t<th>lcmsmatching.chrom.rt</th>\n+\t\t\t\t<th>lcmsmatching.chrom.rt.unit</th>\n+\t\t\t\t<th>lcmsmatching.compound.id</th>\n+\t\t\t\t<th>lcmsmatching.formula</th>\n+\t\t\t\t<th>lcmsmatching.mass.csv.file.id</th>\n+\t\t\t\t<th>lcmsmatching.molecular.mass</th>\n+\t\t\t\t<th>lcmsmatching.ms.level</th>\n+\t\t\t\t<th>lcmsmatching.ms.mode</th>\n+\t\t\t\t<th>lcmsmatching.msprecmz</th>\n+\t\t\t\t<th>lcmsmatching.name</th>\n+\t\t\t\t<th>lcmsmatching.peak.attr</th>\n+\t\t\t\t<th>lcmsmatching.peak.comp</th>\n+\t\t\t\t<th>lcmsmatching.peak.mz</th>\n+\t\t\t\t<th>lcmsmatching.peak.mztheo</th>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>80.04959</td>\n+\t\t\t\t<td>339.9726</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>82.04819</td>\n+\t\t\t\t<td>1593.54</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>83.01344</td>\n+\t\t\t\t<td>654.9536</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>84.05585</td>\n+\t\t\t\t<td>4.748269</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>87.05536</td>\n+\t\t\t\t<td>3.480291</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>89.50682</td>\n+\t\t\t\t<td>39.62335</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>90.97681</td>\n+\t\t\t\t<td>1598.991</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>92.98093</td>\n+\t\t\t\t<td>46.13716</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>94.57331</td>\n+\t\t\t\t<td>44.37588</td>\n+\t\t\t\t<td>A10.pos.col12.0.8</td>\n+\t\t\t\t<td>col12</td>\n+\t\t\t\t<td>col12</td>\n+\t\t\t\t<td>0.8</td>\n+\t\t\t\t<td>min</td>\n+\t\t\t\t<td>A10</td>\n+\t\t\t\t<td>J114L6M62O2</td>\n+\t\t\t\t<td>A10.pos.col12.0.8</td>\n+\t\t\t\t<td>146.1055</td>\n+\t\t\t\t<td>1</td>\n+\t\t\t\t<td>pos</td>\n+\t\t\t\t<td>147.1128</td>\n+\t\t\t\t<td>Blablaine</td>\n+\t\t\t\t<td>19</td>\n+\t\t\t\t<td>P93Z8W419 O2</td>\n+\t\t\t\t<td>94.57331</td>\n+\t\t\t\t<td>94.57331</td>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>97.07603</td>\n+\t\t\t\t<td>655.2993</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>99.54296</td>\n+\t\t\t\t<td>42.19534</td>\n+\t\t\t'..b'\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>105.4461</td>\n+\t\t\t\t<td>1611.92</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>105.7271</td>\n+\t\t\t\t<td>1611.835</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.0231</td>\n+\t\t\t\t<td>64.49319</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.24</td>\n+\t\t\t\t<td>1612.326</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.5116</td>\n+\t\t\t\t<td>1612.173</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.763</td>\n+\t\t\t\t<td>1611.85</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>106.9815</td>\n+\t\t\t\t<td>1611.648</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.2424</td>\n+\t\t\t\t<td>1611.575</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.4569</td>\n+\t\t\t\t<td>1611.779</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.6885</td>\n+\t\t\t\t<td>1611.622</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>107.9273</td>\n+\t\t\t\t<td>1611.146</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>108.1576</td>\n+\t\t\t\t<td>1611.665</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>109.0777</td>\n+\t\t\t\t<td>3.299197</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>110.0599</td>\n+\t\t\t\t<td>3.456417</td>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t\t<td/>\n+\t\t\t</tr>\n+\t\t\t<tr>\n+\t\t\t\t<td>147.1128</td>\n+\t\t\t\t<td>48</td>\n+\t\t\t\t<td>A10.pos.col12.0.8</td>\n+\t\t\t\t<td>col12</td>\n+\t\t\t\t<td>col12</td>\n+\t\t\t\t<td>0.8</td>\n+\t\t\t\t<td>min</td>\n+\t\t\t\t<td>A10</td>\n+\t\t\t\t<td>J114L6M62O2</td>\n+\t\t\t\t<td>A10.pos.col12.0.8</td>\n+\t\t\t\t<td>146.1055</td>\n+\t\t\t\t<td>1</td>\n+\t\t\t\t<td>pos</td>\n+\t\t\t\t<td>147.1128</td>\n+\t\t\t\t<td>Blablaine</td>\n+\t\t\t\t<td>34</td>\n+\t\t\t\t<td>P92Z6W415 O2</td>\n+\t\t\t\t<td>147.1128</td>\n+\t\t\t\t<td>147.1128</td>\n+\t\t\t</tr>\n+\t\t</table>\n+\t</body>\n+</html>\n'

diff -r fb9c0409d85c -r f86fec07f392 test-data/test_3_peaks_output.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_3_peaks_output.tsv Fri Feb 22 16:04:22 2019 -0500

[

@@ -0,0 +1,32 @@
+mz rt lcmsmatching.accession lcmsmatching.chrom.col.id lcmsmatching.chrom.col.name lcmsmatching.chrom.rt lcmsmatching.chrom.rt.unit lcmsmatching.compound.id lcmsmatching.formula lcmsmatching.mass.csv.file.id lcmsmatching.molecular.mass lcmsmatching.ms.level lcmsmatching.ms.mode lcmsmatching.msprecmz lcmsmatching.name lcmsmatching.peak.attr lcmsmatching.peak.comp lcmsmatching.peak.mz lcmsmatching.peak.mztheo
+80.04959021 339.9725632 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+82.04819461 1593.540123 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+83.01343941 654.9535891 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+84.05585475 4.748268943 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+87.05536392 3.480291112 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+89.50682004 39.62335341 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+90.97680734 1598.991244 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+92.98092987 46.13716368 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+94.57331384 44.37587921 A10.pos.col12.0.8 col12 col12 0.8 min A10 J114L6M62O2 A10.pos.col12.0.8 146.10553 1 pos 147.112804 Blablaine [(M+2H)+(CH3CN)]++ P93Z8W419 O2 94.5733145 94.5733145
+97.07602789 655.2993307 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+99.5429594 42.19533608 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+101.0708987 733.3084926 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+102.066292 52.02654598 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+102.2845376 1601.345355 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+104.0034256 48.82052248 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+104.5317528 1602.886534 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+105.4460999 1611.919675 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+105.7271343 1611.835039 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.0231437 64.49318885 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.2399954 1612.325904 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.5116177 1612.17329 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.7629705 1611.850322 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+106.9814579 1611.648399 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.2424051 1611.574767 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.4569385 1611.778713 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.6884734 1611.621904 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+107.9272908 1611.145653 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+108.1575604 1611.664677 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+109.0777249 3.299196943 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+110.0599023 3.456417112 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
+147.112804 48 A10.pos.col12.0.8 col12 col12 0.8 min A10 J114L6M62O2 A10.pos.col12.0.8 146.10553 1 pos 147.112804 Blablaine [(M+H)]+ P92Z6W415 O2 147.112804 147.112804