changeset 0:a174cbbb12dd draft

" master branch Updating"
author lain
date Tue, 24 Nov 2020 18:55:08 +0000
parents
children 1c2ce385b84e
files Dockerfile README.md XSeekerPreparator.R data/SERUM_v2019Jan17.tabular data/models.R format_versionning.MD galaxy/config/datatype_conf.xml galaxy/config/tool_conf.xml galaxy/lib/galaxy/datatypes/binary.py galaxy/lib/galaxy/datatypes/text.py galaxy/tools/LC-MSMS/XSeekerPreparator.R galaxy/tools/LC-MSMS/XSeekerPreparator.xml test/recreate_full.R test/test.sh
diffstat 14 files changed, 3031 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dockerfile	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,56 @@
+
+FROM python:3.8-buster
+
+# set author
+MAINTAINER Lain Pavot <lain.pavot@inra.fr>
+
+# set encoding
+ENV LC_ALL en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV R_BASE_VERSION 4.0.3
+
+ENV PLANEMO_VENV_LOCATION /planemo-venv
+ENV CONDA /tmp/conda
+
+RUN \
+        apt-get update                                                                                         \
+    &&  apt-get install -y --no-install-recommends                                                             \
+        ed                                                                                                     \
+        less                                                                                                   \
+        locales                                                                                                \
+        vim-tiny                                                                                               \
+        wget                                                                                                   \
+        ca-certificates                                                                                        \
+        fonts-texgyre                                                                                          \
+    &&  echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen                                                            \
+    &&  locale-gen en_US.utf8                                                                                  \
+    &&  /usr/sbin/update-locale LANG=en_US.UTF-8                                                               \
+    &&  echo "deb http://http.debian.net/debian buster main" > /etc/apt/sources.list.d/debian-unstable.list    \
+    &&  echo 'APT::Default-Release "stable";' > /etc/apt/apt.conf.d/default                                    \
+    &&  echo 'APT::Install-Recommends "false";' > /etc/apt/apt.conf.d/90local-no-recommends                    \
+    &&  apt-get update                                                                                         \
+    &&  apt-get upgrade -y                                                                                     \
+    &&  apt-get install -y --no-install-recommends                                                             \
+        git                                                                                                    \
+        littler                                                                                                \
+        libhdf5-dev                                                                                            \
+        r-cran-littler                                                                                         \
+        r-base                                                                                                 \
+        r-base-dev                                                                                             \
+        r-recommended                                                                                          \
+        python-virtualenv                                                                                      \
+    &&  R -e 'install.packages("batch", repos="http://cran.us.r-project.org")'                                 \
+    &&  pip install virtualenv                                                                                 \
+    &&  python -m virtualenv "$PLANEMO_VENV_LOCATION"                                                          \
+    &&  . "$PLANEMO_VENV_LOCATION"/bin/activate                                                                \
+    &&  pip install --upgrade pip setuptools                                                                   \
+    &&  pip install planemo numpy                                                                              \
+    &&  planemo conda_init --conda_prefix "$CONDA"                                                             \
+    &&  apt-get clean autoclean                                                                                \
+    &&  apt-get autoremove --yes                                                                               \
+    &&  rm -rf /var/lib/{apt,dpkg,cache,log}/                                                                  \
+    &&  rm -rf /usr/bin/X11                                                                                    \
+    &&  rm -rf /tmp/*                                                                                          ;
+
+CMD []
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,115 @@
+INTRODUCTION
+============
+
+This tool is part of the xcms/camera/XSeeker workflow, and it inserts
+between camera and XSeeker. It takes in input a rdata producd by camera,
+gather some data from mzxml original files, and create a database
+containing all these informations organized in such a way that XSeeker
+has just to display the data without manipulating them anymore, which
+makes it a lot faster and easier to use.
+
+
+PREREQUISITES
+=============
+There is not real preriquesite to understand how this tool works
+and to modify it. There is a single R script - XSeekerPreparator.R - that do
+all the work.
+
+You can make this tool work alone, but you can integrate it with galaxy.
+In this case, you need to have a galaxy instance you can configure.
+It is also recomended to have access to the datatypes definitions (in
+the python files) to make the outputs recognized by galaxy as XSeeker's
+database files.
+
+
+DESIGN
+======
+This tool is very simple and does a little number of things, so there is
+only a single file: XSeekerPreparator.R .
+There is a test/test.sh file that can be used as an example to know how
+to use this tool.
+
+You will find the galaxy patches in the galaxy/ directory. These files
+are not meant to be copy/paste-ed. The content of these files must be
+added to the existing files of your galaxy instance
+
+
+REQUIREMENT
+=====
+
+ - R-4.0.0
+ - optparse
+ - xcms
+ - blob
+ - fst
+ - DBModelR
+ - stringr
+ - optparse
+ - galaxy (optional)
+
+#### R 4.0.0
+ - `export R_MAJOR=4 R_VERSION=4.0.0 BUILD_TARGET_DIR=~/R/`
+ - `wget https://cran.rstudio.com/src/base/R-${R_MAJOR}/R-${R_VERSION}.tar.gz`
+ - `tar -xf "R-${R_VERSION}.tar.gz"`
+ - `cd ./R-${R_VERSION}/`
+ - `./configure --prefix="${BUILD_TARGET_DIR}" --with-readline="no" --with-x="no"`
+ - `export CC="gcc -fPIC"`
+ - `make`
+ - `make install`
+
+#### Packages
+```bash
+~/R/bin/R -e "
+install.packages(
+    c(
+        'optparse',
+        'blob',
+        'fst',
+        'stringr',
+        'optparse',
+        'RSQLite',
+        'remotes',
+        'BiocManager'
+    ), repos='https://cloud.r-project.org'
+)" && \
+~/R/bin/R -e '
+    remotes::install_github("LainPavot/DBModelR", force=TRUE)
+' && \
+~/R/bin/R -e '
+    BiocManager::install("xcms")
+'
+```
+
+#### galaxy
+```bash
+git clone https://github.com/galaxyproject/galaxy
+```
+
+DEPLOY
+=====
+
+Install the tool in galaxy:  
+Open each file in the galaxy/ directory and copy their content to their
+respective files in your galaxy instance.  
+Copy XSeekerPreparator.R in galaxy/tool/tools/LC-MSMS/
+
+METADATA
+--------
+
+ - **@name**: XSeekerPreparator
+ - **@version**: 1.1.2
+ - **@authors**: Lain Pavot
+ - **@date creation**: 15/09/2020
+
+NOTES
+-----
+Developed and tested using:
+
+ - R 4.0.0
+ - optparse 1.6.6
+ - xcms 3.10.2
+ - blob 1.2.1
+ - fst 0.9.4
+ - DBModelR
+ - stringr 1.4.0
+ - galaxy 21.01
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/XSeekerPreparator.R	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,919 @@
+
+
+TOOL_NAME <- "XSeekerPreparator"
+VERSION <- "1.1.2"
+
+OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy"
+
+ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-")
+ENRICHED_RDATA_DOC <- sprintf("
+Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
+This doc was generated by the tool: %s - Version %s
+To show the different variables contained in this rdata, type:
+ - `load('this_rdata.rdata', rdata_env <- new.env())`
+ - `names(rdata_env)`
+
+Sections
+######
+
+
+This tools helpers
+------
+    The version number is somewhat special because the evolution of the
+    rdata's format is non-linear.
+    There may be different branches, each evolving separatly.
+    To reflect these branches's diversions, there may be a prepended
+    branch name following this format:
+        major.minor.patch-branch_name
+    Like this, we can process rdata with the same tool, and output
+    rdata formated differently, for each tool.
+
+
+  - enriched_rdata:
+    - Description: flag created by that tool to tell it was enriched.
+    - Retrieval method: enriched_rdata <- TRUE
+
+  - enriched_rdata_version:
+    - Description: A flag created by that tool to tell which version of
+        this tool has enriched the rdata.
+    - Retrieval method: enriched_rdata_version <- sprintf(\"%s\", ENRICHED_RDATA_VERSION)
+
+  - enriched_rdata_doc:
+    - Description: Contains the documentation string.
+
+Data from original mzxml file
+------
+  - tic:
+    - Description: Those are the tic values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@tic
+    - xcms version: 2.0
+
+  - mz:
+    - Description: Those are the m/z values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$mz
+    - xcms version: 2.0
+
+  - scanindex:
+    - Description: Those are the scanindex values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scanindex
+    - xcms version: 2.0
+
+  - scantime:
+    - Description: Those are the scantime values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scantime
+    - xcms version: 2.0
+
+  - intensity:
+    - Description: Those are the intensity values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$intensity
+    - xcms version: 2.0
+
+  - polarity:
+    - Description: Those are the polarity values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: as.character(xcms::xcmsRaw('original_file.mzxml')@polarity[[1]])
+    - xcms version: 2.0
+
+Data taken from incoming rdata
+------
+  - variableMetadata:
+    - Description: Unmodified copy of variableMetadata from incoming rdata.
+    - Retrieval method: rdata_file$variableMetadata
+
+  - process_params:
+    - Description: Those are the processing parameters values from the
+        curent rdata. They have been simplified to allow easy access like:
+        for (params in process_params) {
+            if (params[[\"xfunction\"]] == \"annotatediff\") {
+                process_peak_picking_params(params)
+            }
+        }
+    - Retrieval method:
+        ## just he same list, but simplified
+        process_params <- list()
+        for (list_name in names(rdata_file$listOFlistArguments)) {
+            param_list <- list()
+            for (param_name in names(rdata_file$listOFlistArguments[[list_name]])) {
+                param_list[[param_name]] <- rdata_file$listOFlistArguments[[list_name]][[param_name]]
+            }
+            process_params[[length(process_params)+1]] <- param_list
+        }
+", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION)
+
+
+
+get_models <- function(path) {
+    if (is.null(path)) {
+        stop("No models to define the database schema")
+    } else {
+        message(sprintf("Loading models from %s", path))
+    }
+    ## galaxy mangles the "@" to a "__at__"
+    if (substr(path, 1, 9) == "git__at__") {
+        path <- sub("^git__at__", "git@", path, perl=TRUE)
+    }
+    if (
+        substr(path, 1, 4) == "git@"
+        || substr(path, length(path)-4, 4) == ".git"
+    ) {
+        return (get_models_from_git(path))
+    }
+    if (substr(path, 1, 4) == "http") {
+        return (get_models_from_url(path))
+    }
+    return (source(path)$value)
+}
+
+get_models_from_git <- function (url, target_file="models.R", rm=TRUE) {
+    tmp <- tempdir()
+    message(sprintf("Cloning %s", url))
+    system2("git", c("clone", url, tmp))
+    result <- search_tree(file.path(tmp, dir), target_file)
+    if (!is.null(result)) {
+        models <- source(result)$value
+        if (rm) {
+            unlink(tmp, recursive=TRUE)
+        }
+        return (models)
+    }
+    if (rm) {
+        unlink(tmp, recursive=TRUE)
+    }
+    stop(sprintf(
+        "Could not find any file named \"%s\" in this repo",
+        target_file
+    ))
+}
+
+get_models_from_url <- function (url, target_file="models.R", rm=TRUE) {
+    tmp <- tempdir()
+    message(sprintf("Downloading %s", url))
+    result <- file.path(tmp, target_file)
+    if (download.file(url, destfile=result) == 0) {
+        models <- source(result)$value
+        if (rm) {
+            unlink(tmp, recursive=TRUE)
+        }
+        return (models)
+    }
+    if (rm) {
+        unlink(tmp, recursive=TRUE)
+    }
+    stop("Could not download any file at this adress.")
+}
+
+search_tree <- function(path, target) {
+    target <- tolower(target)
+    for (file in list.files(path)) {
+        if (is.dir(file)) {
+            result <- search_tree(file.path(path, file), target)
+            if (!is.null(result)) {
+                return (result)
+            }
+        } else if (tolower(file) == target) {
+            return (file.path(path, file))
+        }
+    }
+    return (NULL)
+}
+
+create_database <- function(orm) {
+    orm$recreate_database(no_exists=FALSE)
+    set_database_version(orm, "created")
+}
+
+insert_adducts <- function(orm) {
+    message("Creating adducts...")
+    adducts <- list(
+        list("[M-H2O-H]-",1,-1,-48.992020312000001069,1,0,0.5,"H0","H1O3"),
+        list("[M-H-Cl+O]-",1,-1,-19.981214542000000022,2,0,0.5,"O1","H1Cl1"),
+        list("[M-Cl+O]-",1,-1,-18.973389510000000512,3,0,0.5,"O1","Cl1"),
+        list("[M-3H]3-",1,-3,-3.0218293560000000219,4,0,1.0,"H0","H3"),
+        list("[2M-3H]3-",2,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
+        list("[3M-3H]3-",3,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
+        list("[M-2H]2-",1,-2,-2.0145529039999998666,5,0,1.0,"H0","H2"),
+        list("[2M-2H]2-",2,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
+        list("[3M-2H]2-",3,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
+        list("[M-H]-",1,-1,-1.0072764519999999333,6,1,1.0,"H0","H1"),
+        list("[2M-H]-",2,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
+        list("[3M-H]-",3,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
+        list("[M]+",1,1,-0.00054858000000000000945,7,1,1.0,"H0","H0"),
+        list("[M]-",1,-1,0.00054858000000000000945,8,1,1.0,"H0","H0"),
+        list("[M+H]+",1,1,1.0072764519999999333,9,1,1.0,"H1","H0"),
+        list("[2M+H]+",2,1,1.0072764519999999333,9,0,0.5,"H1","H0"),
+        list("[3M+H]+",3,1,1.0072764519999999333,9,0,0.25,"H1","H0"),
+        list("[M+2H]2+",1,2,2.0145529039999998666,10,0,0.75,"H2","H0"),
+        list("[2M+2H]2+",2,2,2.0145529039999998666,10,0,0.5,"H2","H0"),
+        list("[3M+2H]2+",3,2,2.0145529039999998666,10,0,0.25,"H2","H0"),
+        list("[M+3H]3+",1,3,3.0218293560000000219,11,0,0.75,"H3","H0"),
+        list("[2M+3H]3+",2,3,3.0218293560000000219,11,0,0.5,"H3","H0"),
+        list("[3M+3H]3+",3,3,3.0218293560000000219,11,0,0.25,"H3","H0"),
+        list("[M-2H+NH4]-",1,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
+        list("[2M-2H+NH4]-",2,-1,16.019272654000001665,12,0,0.0,"N1H4","H2"),
+        list("[3M-2H+NH4]-",3,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
+        list("[M+NH4]+",1,1,18.033825558000000199,13,1,1.0,"N1H4","H0"),
+        list("[2M+NH4]+",2,1,18.033825558000000199,13,0,0.5,"N1H4","H0"),
+        list("[3M+NH4]+",3,1,18.033825558000000199,13,0,0.25,"N1H4","H0"),
+        list("[M+H+NH4]2+",1,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
+        list("[2M+H+NH4]2+",2,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
+        list("[3M+H+NH4]2+",3,2,19.041102009999999467,14,0,0.25,"N1H5","H0"),
+        list("[M+Na-2H]-",1,-1,20.974668176000001551,15,0,0.75,"Na1","H2"),
+        list("[2M-2H+Na]-",2,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
+        list("[3M-2H+Na]-",3,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
+        list("[M+Na]+",1,1,22.989221080000000086,16,1,1.0,"Na1","H0"),
+        list("[2M+Na]+",2,1,22.989221080000000086,16,0,0.5,"Na1","H0"),
+        list("[3M+Na]+",3,1,22.989221080000000086,16,0,0.25,"Na1","H0"),
+        list("[M+H+Na]2+",1,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
+        list("[2M+H+Na]2+",2,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
+        list("[3M+H+Na]2+",3,2,23.996497531999999353,17,0,0.25,"Na1H1","H0"),
+        list("[M+2H+Na]3+",1,3,25.003773983999998619,18,0,0.25,"H2Na1","H0"),
+        list("[M+CH3OH+H]+",1,1,33.033491200000000276,19,0,0.25,"C1O1H5","H0"),
+        list("[M-H+Cl]2-",1,-2,33.962124838000001148,20,0,1.0,"Cl1","H1"),
+        list("[2M-H+Cl]2-",2,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
+        list("[3M-H+Cl]2-",3,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
+        list("[M+Cl]-",1,-1,34.969401290000000416,21,1,1.0,"Cl1","H0"),
+        list("[2M+Cl]-",2,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
+        list("[3M+Cl]-",3,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
+        list("[M+K-2H]-",1,-1,36.948605415999999479,22,0,0.5,"K1","H2"),
+        list("[2M-2H+K]-",2,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
+        list("[3M-2H+K]-",3,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
+        list("[M+K]+",1,1,38.963158319999998013,23,1,1.0,"K1","H0"),
+        list("[2M+K]+",2,1,38.963158319999998013,23,0,0.5,"K1","H0"),
+        list("[3M+K]+",3,1,38.963158319999998013,23,0,0.25,"K1","H0"),
+        list("[M+H+K]2+",1,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
+        list("[2M+H+K]2+",2,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
+        list("[3M+H+K]2+",3,2,39.970434771999997281,24,0,0.25,"K1H1","H0"),
+        list("[M+ACN+H]+",1,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
+        list("[2M+ACN+H]+",2,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
+        list("[M+2Na-H]+",1,1,44.971165708000000902,26,0,0.5,"Na2","H1"),
+        list("[2M+2Na-H]+",2,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
+        list("[3M+2Na-H]+",3,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
+        list("[2M+FA-H]-",2,-1,44.998202851999998586,27,0,0.25,"C1O2H2","H1"),
+        list("[M+FA-H]-",1,-1,44.998202851999998586,27,0,0.5,"C1O2H2","H1"),
+        list("[M+2Na]2+",1,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
+        list("[2M+2Na]2+",2,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
+        list("[3M+2Na]2+",3,2,45.978442160000000172,28,0,0.25,"Na2","H0"),
+        list("[M+H+2Na]3+",1,3,46.985718611999999438,29,0,0.25,"H1Na2","H0"),
+        list("[M+H+FA]+",1,1,47.012755755999997122,30,0,0.25,"C1O2H3","H0"),
+        list("[M+Hac-H]-",1,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
+        list("[2M+Hac-H]-",2,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
+        list("[M+IsoProp+H]+",1,1,61.064791327999998317,32,0,0.25,"C3H9O1","H0"),
+        list("[M+Na+K]2+",1,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
+        list("[2M+Na+K]2+",2,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
+        list("[3M+Na+K]2+",3,2,61.9523793999999981,33,0,0.25,"Na1K1","H0"),
+        list("[M+NO3]-",1,-1,61.988366450000000895,34,0,0.5,"N1O3","H0"),
+        list("[M+ACN+Na]+",1,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
+        list("[2M+ACN+Na]+",2,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
+        list("[M+NH4+FA]+",1,1,64.039304861999994502,36,0,0.25,"N1C1O2H6","H0"),
+        list("[M-2H+Na+FA]-",1,-1,66.980147479999999405,37,0,0.5,"NaC1O2H2","H2"),
+        list("[M+3Na]3+",1,3,68.967663239999993153,38,0,0.25,"Na3","H0"),
+        list("[M+Na+FA]+",1,1,68.99470038399999794,39,0,0.25,"Na1C1O2H2","H0"),
+        list("[M+2Cl]2-",1,-2,69.938802580000000832,40,0,1.0,"Cl2","H0"),
+        list("[2M+2Cl]2-",2,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
+        list("[3M+2Cl]2-",3,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
+        list("[M+2K-H]+",1,1,76.919040187999996758,41,0,0.5,"K2","H1"),
+        list("[2M+2K-H]+",2,1,76.919040187999996758,41,0,0.25,"K2","H1"),
+        list("[3M+2K-H]+",3,1,76.919040187999996758,41,0,0.25,"K2","H1"),
+        list("[M+2K]2+",1,2,77.926316639999996028,42,0,0.5,"K2","H0"),
+        list("[2M+2K]2+",2,2,77.926316639999996028,42,0,0.5,"K2","H0"),
+        list("[3M+2K]2+",3,2,77.926316639999996028,42,0,0.25,"K2","H0"),
+        list("[M+Br]-",1,-1,78.918886479999997619,43,1,1.0,"Br1","H0"),
+        list("[M+Cl+FA]-",1,-1,80.974880593999998268,44,0,0.5,"Cl1C1O2H2","H0"),
+        list("[M+AcNa-H]-",1,-1,80.995797543999998426,45,0,0.25,"C2H3Na1O2","H1"),
+        list("[M+2ACN+2H]2+",1,2,84.067651115999993292,46,0,0.25,"C4H8N2","H0"),
+        list("[M+K+FA]+",1,1,84.968637623999995868,47,0,0.25,"K1C1O2H2","H0"),
+        list("[M+Cl+Na+FA-H]-",1,-1,102.95682522200000619,48,0,0.5,"Cl1Na1C1O2H2","H1"),
+        list("[2M+3H2O+2H]+",2,1,104.03153939599999944,49,0,0.25,"H8O6","H0"),
+        list("[M+TFA-H]-",1,-1,112.98558742000000165,50,0,0.5,"C2F3O2H1","H1"),
+        list("[M+H+TFA]+",1,1,115.00014032400000019,51,0,0.25,"C2F3O2H2","H0"),
+        list("[M+3ACN+2H]2+",1,2,125.09420022199999778,52,0,0.25,"C6H11N3","H0"),
+        list("[M+NH4+TFA]+",1,1,132.02668943000000468,53,0,0.25,"N1C2F3O2H5","H0"),
+        list("[M+Na+TFA]+",1,1,136.98208495200000811,54,0,0.25,"Na1C2F3O2H1","H0"),
+        list("[M+Cl+TFA]-",1,-1,148.96226516199999423,55,0,0.5,"Cl1C2F3O2H1","H0"),
+        list("[M+K+TFA]+",1,1,152.95602219200000604,56,0,0.25,"K1C2F3O2H1","H0")
+    )
+    dummy_adduct <- orm$adduct()
+    for (adduct in adducts) {
+        i <- 0
+        dummy_adduct$set_name(adduct[[i <- i+1]])
+        dummy_adduct$set_multi(adduct[[i <- i+1]])
+        dummy_adduct$set_charge(adduct[[i <- i+1]])
+        dummy_adduct$set_mass(adduct[[i <- i+1]])
+        dummy_adduct$set_oidscore(adduct[[i <- i+1]])
+        dummy_adduct$set_quasi(adduct[[i <- i+1]])
+        dummy_adduct$set_ips(adduct[[i <- i+1]])
+        dummy_adduct$set_formula_add(adduct[[i <- i+1]])
+        dummy_adduct$set_formula_ded(adduct[[i <- i+1]])
+        dummy_adduct$save()
+        dummy_adduct$clear(unset_id=TRUE)
+    }
+    message("Adducts created")
+}
+
+insert_base_data <- function(orm, path, archetype=FALSE) {
+    if (archetype) {
+        ## not implemented yet
+        return ()
+    }
+    base_data <- readLines(path)
+    for (sql in strsplit(paste(base_data, collapse=" "), ";")[[1]]) {
+        orm$execute(sql)
+    }
+    set_database_version(orm, "enriched")
+}
+
+insert_compounds <- function(orm, compounds_path) {
+    compounds <- read.csv(file=compounds_path, sep="\t")
+    if (is.null(compounds <- translate_compounds(compounds))) {
+        stop("Could not find asked compound's attributes in csv file.")
+    }
+    dummy_compound <- orm$compound()
+    compound_list <- list()
+    for (i in seq_len(nrow(compounds))) {
+        dummy_compound$set_mz(compounds[i, "mz"])
+        dummy_compound$set_name(compounds[i, "name"])
+        dummy_compound$set_common_name(compounds[i, "common_name"])
+        dummy_compound$set_formula(compounds[i, "formula"])
+        # dummy_compound$set_mz(compounds[i, "mz"])
+        # dummy_compound$set_mz(compounds[i, "mz"])
+        compound_list[[length(compound_list)+1]] <- as.list(
+            dummy_compound,
+            c("mz", "name", "common_name", "formula")
+        )
+        dummy_compound$clear(unset_id=TRUE)
+    }
+    dummy_compound$save(bulk=compound_list)
+}
+
+translate_compounds <- function(compounds) {
+    recognized_headers <- list(
+        c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey")
+    )
+    header_translators <- list(
+        hmdb_header_translator
+    )
+    for (index in seq_along(recognized_headers)) {
+        headers <- recognized_headers[[index]]
+        if (identical(colnames(compounds), headers)) {
+            return (header_translators[[index]](compounds))
+        }
+    }
+    if (is.null(translator <- guess_translator(colnames(compounds)))) {
+        return (NULL)
+    }
+    return (csv_header_translator(translator, compounds))
+}
+
+guess_translator <- function(header) {
+    result <- list(
+        # HMDB_ID=NULL,<
+        mz=NULL,
+        name=NULL,
+        common_name=NULL,
+        formula=NULL,
+        # inchi_key=NULL
+    )
+    asked_cols <- names(result)
+    for (asked_col in asked_cols) {
+        for (col in header) {
+            if ((twisted <- tolower(col)) == asked_col
+                || gsub("-", "_", twisted) == asked_col
+                || gsub(" ", "_", twisted) == asked_col
+                || tolower(gsub("(.)([A-Z])", "\\1_\\2", col)) == asked_col
+            ) {
+                result[[asked_col]] <- col
+                next
+            }
+        }
+    }
+    if (any(mapply(is.null, result))) {
+        return (NULL)
+    }
+    return (result)
+}
+
+hmdb_header_translator <- function(compounds) {
+    return (csv_header_translator(
+        list(
+            HMDB_ID="HMDB_ID",
+            mz="MzBank",
+            name="MetName",
+            common_name="MetName",
+            formula="ChemFormula",
+            inchi_key="INChIkey"
+        ), compounds
+    ))
+}
+
+csv_header_translator <- function(translation_table, csv) {
+    header_names <- names(translation_table)
+    result <- data.frame(1:nrow(csv))
+    # colnames(result) <- header_names
+    for (i in seq_along(header_names)) {
+        result[, header_names[[i]]] <- csv[, translation_table[[i]]]
+    }
+    print(result[, "mz"])
+    result[, "mz"] <- as.numeric(result[, "mz"])
+    print(result[, "mz"])
+    return (result)
+}
+
+set_database_version <- function(orm, version) {
+    orm$set_tag(
+        version,
+        tag_name="database_version",
+        tag_table_name="XSeeker_tagging_table"
+    )
+}
+
+process_rdata <- function(orm, rdata, options) {
+    mzml_tmp_dir <- gather_mzml_files(rdata)
+    samples <- names(rdata$singlefile)
+    if (!is.null(options$samples)) {
+        samples <- samples[options$samples %in% samples]
+    }
+    show_percent <- (
+        is.null(options$`not-show-percent`)
+        || options$`not-show-percent` == FALSE
+    )
+    error <- tryCatch({
+        process_sample_list(
+            orm, rdata, samples,
+            show_percent=show_percent
+        )
+        NULL
+    }, error=function(e) {
+        message(e)
+        e
+    })
+    if (!is.null(mzml_tmp_dir)) {
+        unlink(mzml_tmp_dir, recursive=TRUE)
+    }
+    if (!is.null(error)) {
+        stop(error)
+    }
+}
+
+gather_mzml_files <- function(rdata) {
+    if (is.null(rdata$singlefile)) {
+        message("Extracting mxml files")
+        tmp <- tempdir()
+        rdata$singlefile <- utils::unzip(rdata$zipfile, exdir=tmp)
+        names(rdata$singlefile) <- tools::file_path_sans_ext(basename(rdata$singlefile))
+        message("Extracted")
+        return (tmp)
+    } else {
+        message(sprintf("Not a zip file, loading files directly from path: %s", paste(rdata$singlefile, collapse=" ; ")))
+    }
+    return (NULL)
+}
+
+process_sample_list <- function(orm, radta, sample_names, show_percent) {
+    file_grouping_var <- find_grouping_var(rdata$variableMetadata)
+    message("Processing samples.")
+    message(sprintf("File grouping variable: %s", file_grouping_var))
+    if(is.null(file_grouping_var)) {
+        stop("Malformed variableMetada.")
+    }
+
+    process_arg_list <- rdata$listOFlistArguments
+    process_params <- list()
+    for (list_name in names(process_arg_list)) {
+        param_list <- list()
+        for (param_name in names(process_arg_list[[list_name]])) {
+            param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]]
+        }
+        process_params[[length(process_params)+1]] <- param_list
+    }
+    message("Parameters from previous processes extracted.")
+
+    var_meta <- rdata$variableMetadata
+    align_group <- rep(0, nrow(var_meta))
+    var_meta <- cbind(var_meta, align_group)
+    context <- new.env()
+    context$clusters <- list()
+    context$groupidx <- rdata$xa@xcmsSet@groupidx
+    context$peaks <- rdata$xa@xcmsSet@peaks
+    context$show_percent <- show_percent
+
+    indices <- as.numeric(unique(var_meta[, file_grouping_var]))
+    smol_xcms_set <- orm$smol_xcms_set()
+    mz_tab_info <- new.env()
+    xcms_set <- rdata$xa@xcmsSet
+    g <- xcms::groups(xcms_set)
+    mz_tab_info$group_length <- nrow(g)
+    mz_tab_info$dataset_path <- xcms::filepaths(xcms_set)
+    mz_tab_info$sampnames <- xcms::sampnames(xcms_set)
+    mz_tab_info$sampclass <- xcms::sampclass(xcms_set)
+    mz_tab_info$rtmed <- g[,"rtmed"]
+    mz_tab_info$mzmed <- g[,"mzmed"]
+    mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into")
+    blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100))
+    smol_xcms_set$set_raw(blogified)$save()
+    for (no in indices) {
+        sample_name <- names(rdata$singlefile)[[no]]
+        sample_path <- rdata$singlefile[[no]]
+        if (
+            is.na(no)
+            || is.null(sample_path)
+            || !(sample_name %in% sample_names)
+        ) {
+            next
+        }
+        ms_file=xcms::xcmsRaw(sample_path)
+        env <- new.env()
+        env$variableMetadata <- var_meta[var_meta[, file_grouping_var]==no,]
+        env$tic <- ms_file@tic
+        env$mz <- ms_file@env$mz
+        env$scanindex <- ms_file@scanindex
+        env$scantime <- ms_file@scantime
+        env$intensity <- ms_file@env$intensity
+        env$polarity <- as.character(ms_file@polarity[[1]])
+        env$sample_name <- sample_name
+        env$dataset_path <- sample_path
+        env$process_params <- process_params
+        env$enriched_rdata <- TRUE
+        env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
+        env$tool_name <- TOOL_NAME
+        env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
+        context$sample_no <- no
+        add_sample_to_database(orm, env, context, smol_xcms_set)
+    }
+    message("Features enrichment")
+    complete_features(orm, context)
+    message("Features enrichment done.")
+    return (NULL)
+}
+
+find_grouping_var <- function(var_meta) {
+    for (grouping_var in c(".", "Bio")) {
+        if (!is.null(rdata$variableMetadata[[grouping_var]])) {
+            return (grouping_var)
+        }
+    }
+    return (NULL)
+}
+
+add_sample_to_database <- function(orm, env, context, smol_xcms_set) {
+    message(sprintf("Processing sample %s", env$sample_name))
+    sample <- (
+        orm$sample()
+        $set_name(env$sample_name)
+        $set_path(env$dataset_path)
+        $set_kind("enriched_rdata")
+        $set_polarity(
+            if (is.null(env$polarity) || identical(env$polarity, character(0))) ""
+            else env$polarity
+        )
+        $set_smol_xcms_set(smol_xcms_set)
+        $set_raw(blob::blob(fst::compress_fst(
+            serialize(env, NULL),
+            compression=100
+        )))
+        $save()
+    )
+    load_variable_metadata(orm, sample, env$variableMetadata, context)
+    load_process_params(orm, sample, env$process_params)
+    message(sprintf("Sample %s inserted.", env$sample_name))
+    return (sample)
+}
+
+
+load_variable_metadata <- function(orm, sample, var_meta, context) {
+    all_clusters <- orm$cluster()$all()
+
+    next_feature_id <- get_next_id(orm$feature()$all(), "featureID")
+    next_cluster_id <- get_next_id(all_clusters, "clusterID")
+    next_pc_group <- get_next_id(all_clusters, "pc_group")
+    next_align_group <- get_next_id(all_clusters, "align_group")
+    message("Extracting features")
+    invisible(create_features(
+        orm, sample, var_meta, context,
+        next_feature_id, next_cluster_id,
+        next_pc_group, next_align_group
+    ))
+    message("Extracting features done.")
+    return (NULL)
+}
+
+get_next_id <- function(models, attribute) {
+    if ((id <- models$max(attribute)) == Inf || id == -Inf) {
+        return (1)
+    }
+    return (id + 1)
+}
+
+create_features <- function(
+    orm, sample, var_meta, context,
+    next_feature_id, next_cluster_id,
+    next_pc_group, next_align_group
+) {
+    field_names <- as.list(names(orm$feature()$fields__))
+    field_names[field_names=="id"] <- NULL
+
+    features <- list()
+    dummy_feature <- orm$feature()
+
+    if (show_percent <- context$show_percent) {
+        percent <- -1
+        total <- nrow(var_meta)
+    }
+    for (row in seq_len(nrow(var_meta))) {
+        if (show_percent && (row / total) * 100 > percent) {
+            percent <- percent + 1
+            message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
+        }
+
+        curent_var_meta <- var_meta[row, ]
+
+        peak_list <- context$peaks[context$groupidx[[row]], ]
+        sample_peak_list <- peak_list[peak_list[, "sample"] == context$sample_no, , drop=FALSE]
+        if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
+            if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
+                dummy_feature$set_int_o(int_o)
+            }
+            if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
+                dummy_feature$set_int_b(int_b)
+            }
+            if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
+                dummy_feature$set_max_o(max_o)
+            }
+        }
+
+        set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
+
+        dummy_feature$set_featureID(next_feature_id)
+        next_feature_id <- next_feature_id + 1
+        fake_iso <- dummy_feature$get_iso()
+        iso <- extract_iso(fake_iso)
+        clusterID <- extract_clusterID(fake_iso, next_cluster_id)
+        context$clusterID <- clusterID
+        dummy_feature$set_iso(iso)
+        create_associated_cluster(
+            sample, dummy_feature, clusterID,
+            context, curent_var_meta, next_pc_group,
+            next_align_group
+        )
+        next_align_group <- next_align_group + 1
+        features[[length(features)+1]] <- as.list(dummy_feature, field_names)
+        dummy_feature$clear()
+    }
+    message("")## +\n for previous message 
+    message("Saving features")
+    dummy_feature$save(bulk=features)
+    message("Saved.")
+    return (context$clusters)
+}
+
+extract_peak_var <- function(peak_list, var_name, selector=max) {
+    value <- peak_list[, var_name]
+    names(value) <- NULL
+    return (selector(value))
+}
+
+set_feature_fields_from_var_meta <- function(feature, var_meta) {
+    if (!is.null(mz <- var_meta[["mz"]]) && !is.na(mz)) {
+        feature$set_mz(mz)
+    }
+    if (!is.null(mzmin <- var_meta[["mzmin"]]) && !is.na(mzmin)) {
+        feature$set_mz_min(mzmin)
+    }
+    if (!is.null(mzmax <- var_meta[["mzmax"]]) && !is.na(mzmax)) {
+        feature$set_mz_max(mzmax)
+    }
+    if (!is.null(rt <- var_meta[["rt"]]) && !is.na(rt)) {
+        feature$set_rt(rt)
+    }
+    if (!is.null(rtmin <- var_meta[["rtmin"]]) && !is.na(rtmin)) {
+        feature$set_rt_min(rtmin)
+    }
+    if (!is.null(rtmax <- var_meta[["rtmax"]]) && !is.na(rtmax)) {
+        feature$set_rt_max(rtmax)
+    }
+    if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) {
+        feature$set_iso(isotopes)
+    }
+    return (feature)
+}
+
+extract_iso  <- function(weird_data) {
+    if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
+        return (sub("^\\[\\d+\\]", "", weird_data, perl=TRUE))
+    }
+    return (weird_data)
+}
+
+extract_clusterID <- function(weird_data, next_cluster_id){
+    if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
+        clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]")
+        clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+"))
+    } else {
+        clusterID <- 0
+    }
+    return (clusterID + next_cluster_id)
+}
+
+create_associated_cluster <- function(
+    sample, feature, grouping_variable,
+    context, curent_var_meta, next_pc_group, next_align_group
+) {
+    pcgroup <- as.numeric(curent_var_meta[["pcgroup"]])
+    adduct <- as.character(curent_var_meta[["adduct"]])
+    annotation <- curent_var_meta[["isotopes"]]
+    grouping_variable <- as.character(grouping_variable)
+    if (is.null(cluster <- context$clusters[[grouping_variable]])) {
+        cluster <- context$clusters[[grouping_variable]] <- orm$cluster(
+            pc_group=pcgroup + next_pc_group,
+            adduct=adduct,
+            align_group=next_align_group,
+            # curent_group=curent_group,
+            clusterID=context$clusterID,
+            annotation=annotation
+        )$set_sample(sample)
+    } else {
+        if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
+            cluster$set_clusterID(context$clusterID)
+        }
+    }
+    cluster$save()
+    feature$set_cluster(cluster)
+    return (feature)
+}
+
+complete_features <- function(orm, context) {
+    for (cluster in context$clusters) {
+        features <- orm$feature()$load_by(cluster_id=cluster$get_id())
+        if (features$any()) {
+            if (!is.null(rt <- features$mean("rt"))) {
+                cluster$set_mean_rt(rt)$save()
+            }
+            features_df <- as.data.frame(features)
+            central_feature <- features_df[grepl("^\\[M\\]", features_df[, "iso"]), ]
+            central_feature_into <- central_feature[["int_o"]]
+            if (!identical(central_feature_into, numeric(0)) && central_feature_into != 0) {
+                for (feature in as.vector(features)) {
+                    feature$set_abundance(
+                        feature$get_int_o() / central_feature_into * 100
+                    )$save()
+                }
+            }
+        }
+    }
+    return (NULL)
+}
+
+load_process_params <- function(orm, sample, params) {
+    for (param_list in params) {
+        if (is.null(param_list[["xfunction"]])) {
+            next
+        }
+        if (param_list[["xfunction"]] == "annotatediff") {
+            load_process_params_peak_picking(orm, sample, param_list)
+        }
+    }
+    return (sample)
+}
+
+load_process_params_peak_picking <- function(orm, sample, peak_picking_params) {
+    return (add_sample_process_parameters(
+        params=peak_picking_params,
+        params_translation=list(
+            ppm="ppm",
+            maxcharge="maxCharge",
+            maxiso="maxIso"
+        ),
+        param_model_generator=orm$peak_picking_parameters,
+        sample_param_setter=sample$set_peak_picking_parameters
+    ))
+}
+
+add_sample_process_parameters <- function(
+    params,
+    params_translation,
+    param_model_generator,
+    sample_param_setter
+) {
+    model_params <- list()
+    for (rdata_param_name in names(params_translation)) {
+        database_param_name <- params_translation[[rdata_param_name]]
+        if (is.null(rdata_param <- params[[rdata_param_name]])) {
+            next
+        }
+        model_params[[database_param_name]] <- rdata_param
+    }
+    params_models <- do.call(param_model_generator()$load_by, model_params)
+    if (params_models$any()) {
+        params_model <- params_models$first()
+    } else {
+        params_model <- do.call(param_model_generator, model_params)
+        params_model$save()
+    }
+    return (sample_param_setter(params_model)$save())
+}
+
+
+library(optparse)
+
+option_list <- list(
+    optparse::make_option(
+        c("-v", "--version"),
+        action="store_true",
+        help="Display this tool's version and exits"
+    ),
+    optparse::make_option(
+        c("-i", "--input"),
+        type="character",
+        help="The rdata path to import in XSeeker"
+    ),
+    optparse::make_option(
+        c("-s", "--samples"),
+        type="character",
+        help="Samples to visualise in XSeeker"
+    ),
+    optparse::make_option(
+        c("-B", "--archetype"),
+        type="character",
+        help="The name of the base database"
+    ),
+    optparse::make_option(
+        c("-b", "--database"),
+        type="character",
+        help="The base database's path"
+    ),
+    optparse::make_option(
+        c("-c", "--compounds-csv"),
+        type="character",
+        help="The csv containing compounds"
+    ),
+    optparse::make_option(
+        c("-m", "--models"),
+        type="character",
+        help="The path or url (must begin with http[s]:// or git@) to the database's models"
+    ),
+    optparse::make_option(
+        c("-o", "--output"),
+        type="character",
+        help="The path where to output sqlite"
+    ),
+    optparse::make_option(
+        c("-P", "--not-show-percent"),
+        action="store_true",
+        help="Flag not to show the percents",
+        default=FALSE
+    )
+)
+
+options(error=function(){traceback(3)})
+
+parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
+args <- parse_args(parser, positional_arguments=0)
+
+err_code <- 0
+
+if (!is.null(args$options$version)) {
+    message(sprintf("%s %s", TOOL_NAME, VERSION))
+    quit()
+}
+
+models <- get_models(args$options$models)
+orm <- DBModelR::ORM(
+    connection_params=list(dbname=args$options$output),
+    dbms="SQLite"
+)
+
+invisible(orm$models(models))
+invisible(create_database(orm))
+
+message("Database model created")
+
+insert_adducts(orm)
+
+if (!is.null(args$options$database)) {
+    insert_base_data(orm, args$options$database)
+}
+message(sprintf("Base data inserted using %s.", args$options$database))
+
+if (!is.null(args$options$archetype)) {
+    insert_base_data(orm, args$options$archetype, archetype=TRUE)
+}
+if (!is.null(args$options$`compounds-csv`)) {
+    insert_compounds(orm, args$options$`compounds-csv`)
+}
+
+# if (!is.null(args$options$rdata)) {
+#     load_rdata_in_base(args$options$rdata, args$options$samples, args$options$`not-show-percent`)
+# }
+
+
+load(args$options$input, rdata <- new.env())
+
+process_rdata(orm, rdata, args$options)
+
+quit(status=err_code)
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/SERUM_v2019Jan17.tabular	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,253 @@
+HMDB_ID	MzBank	[M+H]+	[M-H]-	MetName	ChemFormula	INChIkey
+HMDB0000001	169.085126611	170.092403011	168.077850211	1-Methylhistidine	C7H11N3O2	BRMWTNUJHUMWMS-LURJTMIESA-N
+HMDB0000002	74.08439833	75.09167473	73.07712193	1,3-Diaminopropane	C3H10N2	XFNJVJPLKCPIBV-UHFFFAOYSA-N
+HMDB0000005	102.031694058	103.038970458	101.024417658	"2-Ketobutyric acid"	C4H6O3	TYEYBOSBBBHJIV-UHFFFAOYSA-N
+HMDB0000008	104.047344122	105.054620522	103.040067722	"2-Hydroxybutyric acid"	C4H8O3	AFENDNXGAFYKQO-UHFFFAOYSA-N
+HMDB0000010	300.172544634	301.179821034	299.165268234	2-Methoxyestrone	C19H24O3	WHEUWNKSCXYKBU-QPWUGHHJSA-N
+HMDB0000011	104.047344122	105.054620522	103.040067722	"(R)-3-Hydroxybutyric acid"	C4H8O3	WHBMMWSBFZVSSR-GSVOUGTGSA-N
+HMDB0000012	228.074621504	229.081897904	227.067345104	Deoxyuridine	C9H12N2O5	MXHRCPNRJAMMIM-SHYZEUOFSA-N
+HMDB0000014	227.090605919	228.097882319	226.083329519	Deoxycytidine	C9H13N3O4	CKTSBUTUHBMZGZ-SHYZEUOFSA-N
+HMDB0000015	346.214409448	347.221685848	345.207133048	Cortexolone	C21H30O4	WHBHBVVOGNECLV-OBQKJFGGSA-N
+HMDB0000016	330.219494826	331.226771226	329.212218426	Deoxycorticosterone	C21H30O3	ZESRJSPZRDMNHY-YFWFAHHUSA-N
+HMDB0000017	183.053157781	184.060434181	182.045881381	"4-Pyridoxic acid"	C8H9NO4	HXACOUQIXZGNBF-UHFFFAOYSA-N
+HMDB0000019	116.047344122	117.054620522	115.040067722	"alpha-Ketoisovaleric acid"	C5H8O3	QHKABHOOEWYVLI-UHFFFAOYSA-N
+HMDB0000020	152.047344122	153.054620522	151.040067722	"p-Hydroxyphenylacetic acid"	C8H8O3	XQXPVVBIMDBYFF-UHFFFAOYSA-N
+HMDB0000021	306.970536611	307.977813011	305.963260211	Iodotyrosine	C9H10INO3	UQTZMGFTRHFAAM-ZETCQYMHSA-N
+HMDB0000022	167.094628665	168.101905065	166.087352265	3-Methoxytyramine	C9H13NO2	DIVQKHQLANKJQO-UHFFFAOYSA-N
+HMDB0000023	104.047344122	105.054620522	103.040067722	"(S)-3-Hydroxyisobutyric acid"	C4H8O3	DBXBTMSZEOQQDU-VKHMYHEASA-N
+HMDB0000026	132.053492132	133.060768532	131.046215732	"Ureidopropionic acid"	C4H8N2O3	JSJWCHRYRHKBBW-UHFFFAOYSA-N
+HMDB0000027	241.117489371	242.124765771	240.110212971	Tetrahydrobiopterin	C9H15N5O3	FNKQXYHWGSIFBK-UHFFFAOYSA-N
+HMDB0000030	244.088163078	245.095439478	243.080886678	Biotin	C10H16N2O3S	YBJHBAHKTGYVGT-ZKWXMUAHSA-N
+HMDB0000031	290.224580204	291.231856604	289.217303804	Androsterone	C19H30O2	QGXBDMJGAMFCBF-HLUDHZFRSA-N
+HMDB0000032	384.33921603	385.34649243	383.33193963	7-Dehydrocholesterol	C27H44O	UCTLRSWJYQTBFZ-DDPQNLDTSA-N
+HMDB0000033	226.106590334	227.113866734	225.099313934	Carnosine	C9H14N4O3	CQOVPNPJLQNMDC-ZETCQYMHSA-N
+HMDB0000034	135.054495185	136.061771585	134.047218785	Adenine	C5H5N5	GFFGJBXGBJISGV-UHFFFAOYSA-N
+HMDB0000036	515.291673489	516.298949889	514.284397089	"Taurocholic acid"	C26H45NO7S	WBWWGRHZICKQGZ-HZAMXZRMSA-N
+HMDB0000037	360.193674006	361.200950406	359.186397606	Aldosterone	C21H28O5	PQSUYGKTWSAVDQ-SRPWZAMTSA-N
+HMDB0000038	239.101839307	240.109115707	238.094562907	Dihydrobiopterin	C9H13N5O3	FEMXZDUTFRTWPE-UHFFFAOYSA-N
+HMDB0000039	88.0524295	89.0597059	87.0451531	"Butyric acid"	C4H8O2	FERIUCNNQQJTOY-UHFFFAOYSA-N
+HMDB0000042	60.021129372	61.028405772	59.013852972	"Acetic acid"	C2H4O2	QTBSBXVTEAMEQO-UHFFFAOYSA-N
+HMDB0000043	117.078978601	118.086255001	116.071702201	Betaine	C5H11NO2	KWIUHFFTVRNATP-UHFFFAOYSA-N
+HMDB0000044	176.032087988	177.039364388	175.024811588	"Ascorbic acid"	C6H8O6	CIWBSHSKHKDKBQ-JLAZNSOCSA-N
+HMDB0000045	347.063084339	348.070360739	346.055807939	"Adenosine monophosphate"	C10H14N5O7P	UDMBCSSLTHHNCD-KQYNXXCUSA-N
+HMDB0000050	267.096753929	268.104030329	266.089477529	Adenosine	C10H13N5O4	OIRDTQYFTABQOQ-KQYNXXCUSA-N
+HMDB0000051	17.026549101	18.033825501	16.019272701	Ammonia	H3N	QGZKDVFQNNGYKY-UHFFFAOYSA-N
+HMDB0000052	290.122634328	291.129910728	289.115357928	"Argininosuccinic acid"	C10H18N4O6	KDZOASGQNOPSCU-WDSKDSINSA-N
+HMDB0000053	286.193280076	287.200556476	285.186003676	Androstenedione	C19H26O2	AEMFNILZOJDQLW-QAGGRKNESA-N
+HMDB0000054	584.263484904	585.270761304	583.256208504	Bilirubin	C33H36N4O6	BPYKTIZUTYGOLE-IFADSCNNSA-N
+HMDB0000056	89.047678473	90.054954873	88.040402073	beta-Alanine	C3H7NO2	UCMIRNVEIXFBKS-UHFFFAOYSA-N
+HMDB0000058	329.052519653	330.059796053	328.045243253	"Cyclic AMP"	C10H12N5O6P	IVOMOUWHDPKRLL-KQYNXXCUSA-N
+HMDB0000060	102.031694058	103.038970458	101.024417658	"Acetoacetic acid"	C4H6O3	WDJHALXBUFZDSR-UHFFFAOYSA-N
+HMDB0000062	161.105193351	162.112469751	160.097916951	L-Carnitine	C7H15NO3	PHIQHXFUZVPYII-ZCFIWIBFSA-N
+HMDB0000063	362.20932407	363.21660047	361.20204767	Cortisol	C21H30O5	JYGXADMDTFJGBT-VWUMJDOOSA-N
+HMDB0000064	131.069476547	132.076752947	130.062200147	Creatine	C4H9N3O2	CVSVTCORWBXHQV-UHFFFAOYSA-N
+HMDB0000067	386.354866094	387.362142494	385.347589694	Cholesterol	C27H46O	HVYWMOMLDIMFJA-DPAQBDIFSA-N
+HMDB0000068	183.089543287	184.096819687	182.082266887	Epinephrine	C9H13NO3	UCTWMZQNUQWSLP-VIFPVBQESA-N
+HMDB0000070	129.078978601	130.086255001	128.071702201	"Pipecolic acid"	C6H11NO2	HXEACLLIILLPRG-UHFFFAOYSA-N
+HMDB0000071	252.085854882	253.093131282	251.078578482	Deoxyinosine	C10H12N4O4	VGONTNSXDCQUGY-RRKCRQDMSA-N
+HMDB0000073	153.078978601	154.086255001	152.071702201	Dopamine	C8H11NO2	VYFYYTLLBUKUHU-UHFFFAOYSA-N
+HMDB0000076	114.042927446	115.050203846	113.035651046	Dihydrouracil	C4H6N2O2	OIVLITBTBDPEFK-UHFFFAOYSA-N
+HMDB0000077	288.20893014	289.21620654	287.20165374	Dehydroepiandrosterone	C19H28O2	FMGSKLZLMKYGDP-USOAJAOKSA-N
+HMDB0000078	178.041212886	179.048489286	177.033936486	Cysteinylglycine	C5H10N2O3S	ZUKPVRWZDMRIEO-VKHMYHEASA-N
+HMDB0000079	128.05857751	129.06585391	127.05130111	Dihydrothymine	C5H8N2O2	NBAKTGXDIBVZOO-UHFFFAOYSA-N
+HMDB0000085	267.096753929	268.104030329	266.089477529	Deoxyguanosine	C10H13N5O4	YKBGVTZYEHREMT-KVQBGUIXSA-N
+HMDB0000086	257.102823889	258.110100289	256.095547489	Glycerophosphocholine	C8H20NO6P	SUHOQUVVVLNYQR-QMMMGPOBSA-N
+HMDB0000087	45.057849229	46.065125629	44.050572829	Dimethylamine	C2H7N	ROSDSFDQCJNGOL-UHFFFAOYSA-N
+HMDB0000089	243.085520541	244.092796941	242.078244141	Cytidine	C9H13N3O5	UHDGCWIWMRVCDJ-XVFCMESISA-N
+HMDB0000092	103.063328537	104.070604937	102.056052137	Dimethylglycine	C4H9NO2	FFDGPVCHZBVARC-UHFFFAOYSA-N
+HMDB0000094	192.02700261	193.03427901	191.01972621	"Citric acid"	C6H8O7	KRKNYBCHXYNGOX-UHFFFAOYSA-N
+HMDB0000097	104.107539075	105.114815475	103.100262675	Choline	C5H14NO	OEYIOHPDSNJKLS-UHFFFAOYSA-N
+HMDB0000098	150.05282343	151.06009983	149.04554703	D-Xylose	C5H10O5	SRBFZHDQGSBBOR-IOVATXLUSA-N
+HMDB0000099	222.067427636	223.074704036	221.060151236	L-Cystathionine	C7H14N2O4S	ILRYLPWNYFXEMH-WHFBIAKZSA-N
+HMDB0000101	251.101839307	252.109115707	250.094562907	Deoxyadenosine	C10H13N5O3	OLXZPDWKRNYJJZ-RRKCRQDMSA-N
+HMDB0000107	182.07903818	183.08631458	181.07176178	Galactitol	C6H14O6	FBPFZTCFMRRESA-GUCUJZIJSA-N
+HMDB0000108	46.041864814	47.049141214	45.034588414	Ethanol	C2H6O	LFQSCWFLJHTTHZ-UHFFFAOYSA-N
+HMDB0000112	103.063328537	104.070604937	102.056052137	"gamma-Aminobutyric acid"	C4H9NO2	BTCSSZJGUNDROE-UHFFFAOYSA-N
+HMDB0000115	76.016043994	77.023320394	75.008767594	"Glycolic acid"	C2H4O3	AEMRFAOFKBGASW-UHFFFAOYSA-N
+HMDB0000118	182.057908808	183.065185208	181.050632408	"Homovanillic acid"	C9H10O4	QRMZSPFSDQBLIX-UHFFFAOYSA-N
+HMDB0000119	74.00039393	75.00767033	72.99311753	"Glyoxylic acid"	C2H2O3	HHLFWLYXYJOTON-UHFFFAOYSA-N
+HMDB0000121	441.139681375	442.146957775	440.132404975	"Folic acid"	C19H19N7O6	OVBPIULPVIDEAO-LBPRGKRZSA-N
+HMDB0000122	180.063388116	181.070664516	179.056111716	D-Glucose	C6H12O6	WQZGKKKJIJFFOK-GASJEMHNSA-N
+HMDB0000123	75.032028409	76.039304809	74.024752009	Glycine	C2H5NO2	DHMQDGOQFOQNFH-UHFFFAOYSA-N
+HMDB0000124	260.029718526	261.036994926	259.022442126	"Fructose 6-phosphate"	C6H13O9P	GSXOAOHZAIYLCY-HSUXUTPPSA-N
+HMDB0000125	307.083805981	308.091082381	306.076529581	Glutathione	C10H17N3O6S	RWSXRVCMGQZWBV-WDSKDSINSA-N
+HMDB0000126	172.013674532	173.020950932	171.006398132	"Glycerol 3-phosphate"	C3H9O6P	AWUCVROLDVIAJX-GSVOUGTGSA-N
+HMDB0000127	194.042652674	195.049929074	193.035376274	"D-Glucuronic acid"	C6H10O7	AEMOLEFTQBMNLQ-WAXACMCWSA-N
+HMDB0000128	117.053826483	118.061102883	116.046550083	"Guanidoacetic acid"	C3H7N3O2	BPMFZUMJYQTVII-UHFFFAOYSA-N
+HMDB0000130	168.042258744	169.049535144	167.034982344	"Homogentisic acid"	C8H8O4	IGMNYECMUMZDDF-UHFFFAOYSA-N
+HMDB0000131	92.047344122	93.054620522	91.040067722	Glycerol	C3H8O3	PEDCQBHIVMGVHV-UHFFFAOYSA-N
+HMDB0000133	283.091668551	284.098944951	282.084392151	Guanosine	C10H13N5O5	NYHBQMYGNKIUIF-UUOKFMHZSA-N
+HMDB0000134	116.010958616	117.018235016	115.003682216	"Fumaric acid"	C4H4O4	VZCYOOQTPOCHFL-OWOJBTEDSA-N
+HMDB0000138	465.309038113	466.316314513	464.301761713	"Glycocholic acid"	C26H43NO6	RFDAIACWWDREDC-MZMBZMQMSA-N
+HMDB0000139	106.02660868	107.03388508	105.01933228	"Glyceric acid"	C3H6O4	RBNPOMFGQQGHHO-UWTATZPHSA-N
+HMDB0000140	811.690118957	812.697395357	810.682842557	Glucosylceramide	C48H93NO8	POQRWMRXUOPCLD-XNWFPASESA-N
+HMDB0000142	46.005479308	47.012755708	44.998202908	"Formic acid"	CH2O2	BDAGIHXWWSANSR-UHFFFAOYSA-N
+HMDB0000143	180.063388116	181.070664516	179.056111716	D-Galactose	C6H12O6	WQZGKKKJIJFFOK-PHYPRBDBSA-N
+HMDB0000145	270.161979948	271.169256348	269.154703548	Estrone	C18H22O2	DNXHEGUUPJUMQT-CBZIJGRNSA-N
+HMDB0000148	147.053157781	148.060434181	146.045881381	"L-Glutamic acid"	C5H9NO4	WHUUTDBJXJRKMK-VKHMYHEASA-N
+HMDB0000149	61.052763851	62.060040251	60.045487451	Ethanolamine	C2H7NO	HZAXFHJVJLSVMW-UHFFFAOYSA-N
+HMDB0000150	178.047738052	179.055014452	177.040461652	Gluconolactone	C6H10O6	PHOQVHQSTUBQQK-SQOUGZDYSA-N
+HMDB0000151	272.177630012	273.184906412	271.170353612	Estradiol	C18H24O2	VOXZDWNPVJITMN-ZBRFXRBCSA-N
+HMDB0000152	154.026608673	155.033885073	153.019332273	"Gentisic acid"	C7H6O4	WXTMDXOMEHJXQO-UHFFFAOYSA-N
+HMDB0000153	288.172544634	289.179821034	287.165268234	Estriol	C18H24O3	PROQIPRRNZUXQM-ZXXIGWHRSA-N
+HMDB0000156	134.021523302	135.028799702	133.014246902	"L-Malic acid"	C4H6O5	BJEPYKJPYRNKOW-REOHCLBHSA-N
+HMDB0000157	136.03851077	137.04578717	135.03123437	Hypoxanthine	C5H4N4O	FDGQSTZJBFJUBT-UHFFFAOYSA-N
+HMDB0000158	181.073893223	182.081169623	180.066616823	L-Tyrosine	C9H11NO3	OUYCCCASQSFEME-QMMMGPOBSA-N
+HMDB0000159	165.078978601	166.086255001	164.071702201	L-Phenylalanine	C9H11NO2	COLNVLDHVKWLRT-QMMMGPOBSA-N
+HMDB0000161	89.047678473	90.054954873	88.040402073	L-Alanine	C3H7NO2	QNAYBMKLOCPYGJ-REOHCLBHSA-N
+HMDB0000162	115.063328537	116.070604937	114.056052137	L-Proline	C5H9NO2	ONIBWKKTOPOVIA-BYPYZUCNSA-N
+HMDB0000163	342.116211546	343.123487946	341.108935146	D-Maltose	C12H22O11	GUBGYTABKSRVRQ-DKBJLJRDSA-N
+HMDB0000164	31.042199165	32.049475565	30.034922765	Methylamine	CH5N	BAVYZALUXZFZLV-UHFFFAOYSA-N
+HMDB0000167	119.058243159	120.065519559	118.050966759	L-Threonine	C4H9NO3	AYFVYJQAPQTCCC-GBXIJSLDSA-N
+HMDB0000168	132.053492132	133.060768532	131.046215732	L-Asparagine	C4H8N2O3	DCXYFEDJOCDNAF-REOHCLBHSA-N
+HMDB0000169	180.063388116	181.070664516	179.056111716	D-Mannose	C6H12O6	WQZGKKKJIJFFOK-QTVWNMPRSA-N
+HMDB0000172	131.094628665	132.101905065	130.087352265	L-Isoleucine	C6H13NO2	AGPKZVBTJJNPAG-WHFBIAKZSA-N
+HMDB0000174	164.068473494	165.075749894	163.061197094	L-Fucose	C6H12O5	SHZGCJCMOBCMKK-DHVFOXMCSA-N
+HMDB0000175	348.047099924	349.054376324	347.039823524	"Inosinic acid"	C10H13N4O8P	GRSZFWQUAKGDAV-KQYNXXCUSA-N
+HMDB0000176	116.010958616	117.018235016	115.003682216	"Maleic acid"	C4H4O4	VZCYOOQTPOCHFL-UPHRSURJSA-N
+HMDB0000177	155.069476547	156.076752947	154.062200147	L-Histidine	C6H9N3O2	HNDVDQJCIGZPNO-YFKPBYRVSA-N
+HMDB0000181	197.068807845	198.076084245	196.061531445	L-Dopa	C9H11NO4	WTDRDQBEARUVNC-LURJTMIESA-N
+HMDB0000182	146.105527702	147.112804102	145.098251302	L-Lysine	C6H14N2O2	KDXKERNSBIXSRK-YFKPBYRVSA-N
+HMDB0000186	342.116211546	343.123487946	341.108935146	Alpha-Lactose	C12H22O11	GUBGYTABKSRVRQ-XLOQQCSPSA-N
+HMDB0000187	105.042593095	106.049869495	104.035316695	L-Serine	C3H7NO3	MTCFGRXMJLQNBG-REOHCLBHSA-N
+HMDB0000190	90.031694058	91.038970458	89.024417658	"L-Lactic acid"	C3H6O3	JVTAAEKCZFNVCJ-REOHCLBHSA-N
+HMDB0000191	133.037507717	134.044784117	132.030231317	"L-Aspartic acid"	C4H7NO4	CKLJMWTZIZZHCS-REOHCLBHSA-N
+HMDB0000192	240.023848262	241.031124662	239.016571862	L-Cystine	C6H12N2O4S2	LEVWYRKDKASIDU-IMJSIDKUSA-N
+HMDB0000193	192.02700261	193.03427901	191.01972621	"Isocitric acid"	C6H8O7	ODBLHEXUDAPZAU-UHFFFAOYSA-N
+HMDB0000194	240.122240398	241.129516798	239.114963998	Anserine	C10H16N4O3	MYYIAHXIVFADCU-QMMMGPOBSA-N
+HMDB0000195	268.080769514	269.088045914	267.073493114	Inosine	C10H12N4O5	UGQMRVRMYYASKQ-KQYNXXCUSA-N
+HMDB0000197	175.063328537	176.070604937	174.056052137	"Indoleacetic acid"	C10H9NO2	SEOVTRFCIGRIMH-UHFFFAOYSA-N
+HMDB0000201	203.115758031	204.123034431	202.108481631	L-Acetylcarnitine	C9H17NO4	RDHQFKQIGNGIED-MRVPVSSYSA-N
+HMDB0000202	118.02660868	119.03388508	117.01933228	"Methylmalonic acid"	C4H6O4	ZIYVHBGGAOATLY-UHFFFAOYSA-N
+HMDB0000205	164.047344122	165.054620522	163.040067722	"Phenylpyruvic acid"	C9H8O3	BTNMPGBKDVTSJY-UHFFFAOYSA-N
+HMDB0000206	188.116092388	189.123368788	187.108815988	N6-Acetyl-L-lysine	C8H16N2O3	DTERQYGMUDWYAZ-ZETCQYMHSA-N
+HMDB0000207	282.255880332	283.263156732	281.248603932	"Oleic acid"	C18H34O2	ZQPPMHVWECSIRJ-KTKRTIGZSA-N
+HMDB0000208	146.021523302	147.028799702	145.014246902	"Oxoglutaric acid"	C5H6O5	KPGXRSRHYNQIFN-UHFFFAOYSA-N
+HMDB0000209	136.0524295	137.0597059	135.0451531	"Phenylacetic acid"	C8H8O2	WLJVXDMOQOGPHL-UHFFFAOYSA-N
+HMDB0000210	219.110672659	220.117949059	218.103396259	"Pantothenic acid"	C9H17NO5	GHOKWGTUZJEAQD-ZETCQYMHSA-N
+HMDB0000211	180.063388116	181.070664516	179.056111716	myo-Inositol	C6H12O6	CDAISMWEOUEBRE-GPIVLXJGSA-N
+HMDB0000214	132.089877638	133.097154038	131.082601238	Ornithine	C5H12N2O2	AHLPHDHHMVZTML-BYPYZUCNSA-N
+HMDB0000215	221.089937217	222.097213617	220.082660817	N-Acetyl-D-glucosamine	C8H15NO6	OVRNDRQMDRJTHS-RTRLPJTCSA-N
+HMDB0000216	169.073893223	170.081169623	168.066616823	Norepinephrine	C8H11NO3	SFLSHLFXELFNJZ-QMMMGPOBSA-N
+HMDB0000217	744.083277073	745.090553473	743.076000673	NADP	C21H29N7O17P3	XJLXINKUBYWONI-NNYOXOHSSA-O
+HMDB0000220	256.240230268	257.247506668	255.232953868	"Palmitic acid"	C16H32O2	IPCSVZSSVZVIGE-UHFFFAOYSA-N
+HMDB0000221	745.091102105	746.098378505	744.083825705	NADPH	C21H30N7O17P3	ACFIXJIJDZMPPO-NCHANQSKSA-N
+HMDB0000222	399.334858933	400.342135333	398.327582533	L-Palmitoylcarnitine	C23H45NO4	XOMRRQXKHMYMOC-OAQYLSRUSA-N
+HMDB0000224	141.019094261	142.026370661	140.011817861	O-Phosphoethanolamine	C2H8NO4P	SUHOOTKUPISOBE-UHFFFAOYSA-N
+HMDB0000225	160.037173366	161.044449766	159.029896966	"Oxoadipic acid"	C6H8O5	FGSBNBBHOZHUBO-UHFFFAOYSA-N
+HMDB0000226	156.017106626	157.024383026	155.009830226	"Orotic acid"	C5H4N2O4	PXQPEWDEAKTCGB-UHFFFAOYSA-N
+HMDB0000227	148.073558872	149.080835272	147.066282472	"Mevalonic acid"	C6H12O4	KJTLQQUUPVSXIM-UHFFFAOYSA-N
+HMDB0000228	94.041864814	95.049141214	93.034588414	Phenol	C6H6O	ISWSIDIOOBJBQZ-UHFFFAOYSA-N
+HMDB0000229	334.056601978	335.063878378	333.049325578	"Nicotinamide ribotide"	C11H15N2O8P	DAYLJWODMCOQEW-TURQNECASA-N
+HMDB0000230	309.105981211	310.113257611	308.098704811	"N-Acetylneuraminic acid"	C11H19NO9	SQVRNKJHWKZAKO-PFQGKNLYSA-N
+HMDB0000232	167.021857653	168.029134053	166.014581253	"Quinolinic acid"	C7H5NO4	GJAWHXHKYYXBSV-UHFFFAOYSA-N
+HMDB0000234	288.20893014	289.21620654	287.20165374	Testosterone	C19H28O2	MUMGGOZAMZWBJJ-DYKIIFRCSA-N
+HMDB0000235	265.112306876	266.119583276	264.105030476	Thiamine	C12H17N4OS	JZRWCGZRTZMZEH-UHFFFAOYSA-N
+HMDB0000237	74.036779436	75.044055836	73.029503036	"Propionic acid"	C3H6O2	XBDQKXXYIPTUBI-UHFFFAOYSA-N
+HMDB0000239	169.073893223	170.081169623	168.066616823	Pyridoxine	C8H11NO3	LXNHXLLTXMVWPM-UHFFFAOYSA-N
+HMDB0000240	81.97246462	82.97974102	80.96518822	Sulfite	H2O3S	LSNNMFCWUKXFEE-UHFFFAOYSA-N
+HMDB0000241	562.258005596	563.265281996	561.250729196	"Protoporphyrin IX"	C34H34N4O4	KSFOVUSSGSKXFI-UJJXFSCMSA-N
+HMDB0000243	88.016043994	89.023320394	87.008767594	"Pyruvic acid"	C3H4O3	LCTONWCANYUPML-UHFFFAOYSA-N
+HMDB0000244	376.138284392	377.145560792	375.131007992	Riboflavin	C17H20N4O6	AUNGANRZJHBGPY-SCRDCRAPSA-N
+HMDB0000245	226.095356946	227.102633346	225.088080546	Porphobilinogen	C10H14N2O4	QSHWIQZFGQKFMA-UHFFFAOYSA-N
+HMDB0000246	72.057514878	73.064791278	71.050238478	Tetrahydrofuran	C4H8O	WYURNTSHIVDZCO-UHFFFAOYSA-N
+HMDB0000247	182.07903818	183.08631458	181.07176178	Sorbitol	C6H14O6	FBPFZTCFMRRESA-JGWLITMVSA-N
+HMDB0000248	776.686681525	777.693957925	775.679405125	Thyroxine	C15H11I4NO4	XUIIKFGFIJCVMT-LBPRGKRZSA-N
+HMDB0000250	177.943225506	178.950501906	176.935949106	Pyrophosphate	H4O7P2	XPPKVPWEQAFLFU-UHFFFAOYSA-N
+HMDB0000251	125.014663785	126.021940185	124.007387385	Taurine	C2H7NO3S	XOAAWQZATWQOTB-UHFFFAOYSA-N
+HMDB0000252	299.282429433	300.289705833	298.275153033	Sphingosine	C18H37NO2	WWUZIQQURGPMPG-CCEZHUSRSA-N
+HMDB0000253	316.240230268	317.247506668	315.232953868	Pregnenolone	C21H32O2	ORNBQBCIOKFOEO-QGVNFLHTSA-N
+HMDB0000254	118.02660868	119.03388508	117.01933228	"Succinic acid"	C4H6O4	KDYFGRWQOYBRFD-UHFFFAOYSA-N
+HMDB0000256	410.3912516	411.398528	409.3839752	Squalene	C30H50	YYGNTYWPHWGJRM-FLHYQJCXSA-N
+HMDB0000257	113.94453531	114.95181171	112.93725891	Thiosulfate	H2O3S2	DHCDFWKWKRSZHF-UHFFFAOYSA-N
+HMDB0000258	342.116211546	343.123487946	341.108935146	Sucrose	C12H22O11	CZMRCDWAGMRECN-UGDNZRGBSA-N
+HMDB0000259	176.094963016	177.102239416	175.087686616	Serotonin	C10H12N2O	QZAYGJVTTNCVMB-UHFFFAOYSA-N
+HMDB0000262	126.042927446	127.050203846	125.035651046	Thymine	C5H6N2O2	RWQNBRDOKXIBIV-UHFFFAOYSA-N
+HMDB0000263	167.982374404	168.989650804	166.975098004	"Phosphoenolpyruvic acid"	C3H5O6P	DTBNBXWJWCWCIK-UHFFFAOYSA-N
+HMDB0000265	650.790038137	651.797314537	649.782761737	Liothyronine	C15H12I3NO4	AUYYCJSJGJYCDS-LBPRGKRZSA-N
+HMDB0000267	129.042593095	130.049869495	128.035316695	"Pyroglutamic acid"	C5H7NO3	ODHCTXKNWHHXJC-VKHMYHEASA-N
+HMDB0000268	350.245709576	351.252985976	349.238433176	Tetrahydrocorticosterone	C21H34O4	RHQQHZQUAMFINJ-DSCSGEDNSA-N
+HMDB0000269	301.298079497	302.305355897	300.290803097	Sphinganine	C18H39NO2	OTKJDMGTUTTYMP-ZWKOTPCHSA-N
+HMDB0000271	89.047678473	90.054954873	88.040402073	Sarcosine	C3H7NO2	FSYKKLYZXJSNPZ-UHFFFAOYSA-N
+HMDB0000272	185.008923505	186.016199905	184.001647105	Phosphoserine	C3H8NO6P	BZQFBWGGLXLEPQ-REOHCLBHSA-N
+HMDB0000273	242.090271568	243.097547968	241.082995168	Thymidine	C10H14N2O5	IQFYYKKMVGJFEH-XLPZGREQSA-N
+HMDB0000277	379.248759843	380.256036243	378.241483443	"Sphingosine 1-phosphate"	C18H38NO5P	DUYSYHSSBDVJSM-KRWOKUGFSA-N
+HMDB0000279	276.132136382	277.139412782	275.124859982	Saccharopine	C11H20N2O6	ZDGJAHTZVHVLOT-YUMQZZPRSA-N
+HMDB0000280	389.95181466	390.95909106	388.94453826	"Phosphoribosyl pyrophosphate"	C5H13O14P3	PQGCEDQWHSBAJP-TXICZTDVSA-N
+HMDB0000283	150.05282343	151.06009983	149.04554703	D-Ribose	C5H10O5	HMFHBZSHGGEWLO-SOOFDHNKSA-N
+HMDB0000286	566.055020376	567.062296776	565.047743976	"Uridine diphosphate glucose"	C15H24N2O17P2	HSCJRCZFDFQWRP-LPTOLDDLSA-N
+HMDB0000288	324.035866536	325.043142936	323.028590136	"Uridine 5'-monophosphate"	C9H13N2O9P	DJJCXFVJDGTHFX-XVFCMESISA-N
+HMDB0000289	168.028340014	169.035616414	167.021063614	"Uric acid"	C5H4N4O3	LEHOTFFKMJEONL-UHFFFAOYSA-N
+HMDB0000291	198.05282343	199.06009983	197.04554703	"Vanillylmandelic acid"	C9H10O5	CGQCWMIAEPEHNQ-QMMMGPOBSA-N
+HMDB0000292	152.033425392	153.040701792	151.026148992	Xanthine	C5H4N4O2	LRFVTYWOQMYALW-UHFFFAOYSA-N
+HMDB0000294	60.03236276	61.03963916	59.02508636	Urea	CH4N2O	XSQUKJJJFZCRTK-UHFFFAOYSA-N
+HMDB0000295	404.002196946	405.009473346	402.994920546	"Uridine 5'-diphosphate"	C9H14N2O12P2	XCCTYIAWTASOJW-XVFCMESISA-N
+HMDB0000296	244.069536126	245.076812526	243.062259726	Uridine	C9H12N2O6	DRTQHJPVMGBUCF-XVFCMESISA-N
+HMDB0000299	284.075684136	285.082960536	283.068407736	Xanthosine	C10H12N4O6	UBORTCNDUKBEOP-UUOKFMHZSA-N
+HMDB0000300	112.027277382	113.034553782	111.020000982	Uracil	C4H4N2O2	ISAKRJDGNUQOIC-UHFFFAOYSA-N
+HMDB0000301	138.042927446	139.050203846	137.035651046	"Urocanic acid"	C6H6N2O2	LOIYMIARKYCTBW-OWOJBTEDSA-N
+HMDB0000303	160.100048394	161.107324794	159.092771994	Tryptamine	C10H12N2	APJYDQYYACXCRM-UHFFFAOYSA-N
+HMDB0000305	286.229665582	287.236941982	285.222389182	"Vitamin A"	C20H30O	FPIPGXGPPPQFEQ-OVSJKPMPSA-N
+HMDB0000306	137.084063979	138.091340379	136.076787579	Tyramine	C8H11NO	DZGWFCGJZKJUFP-UHFFFAOYSA-N
+HMDB0000308	374.282095082	375.289371482	373.274818682	"3b-Hydroxy-5-cholenoic acid"	C24H38O3	HIAJCGFYHIANNA-QIZZZRFXSA-N
+HMDB0000315	332.23514489	333.24242129	331.22786849	16-a-Hydroxypregnenolone	C21H32O3	ZAKJZPQDUPCXSD-YRWKUUEZSA-N
+HMDB0000317	132.07864425	133.08592065	131.07136785	"2-Hydroxy-3-methylpentanoic acid"	C6H12O3	RILPIWOPNGRASR-RFZPGFLSSA-N
+HMDB0000318	170.057908808	171.065185208	169.050632408	3,4-Dihydroxyphenylglycol	C8H10O4	MTVWFVDWRVYDOR-UHFFFAOYSA-N
+HMDB0000319	362.20932407	363.21660047	361.20204767	18-Hydroxycorticosterone	C21H30O5	HFSXHZZDNDGLQN-ZVIOFETBSA-N
+HMDB0000321	162.05282343	163.06009983	161.04554703	"2-Hydroxyadipic acid"	C6H10O5	OTTXIFWBPRRYOG-UHFFFAOYSA-N
+HMDB0000325	190.084123558	191.091399958	189.076847158	"3-Hydroxysuberic acid"	C8H14O5	ARJZZFJXSNJKGR-UHFFFAOYSA-N
+HMDB0000326	408.28757439	409.29485079	407.28029799	"1b,3a,12a-Trihydroxy-5b-cholanoic acid"	C24H40O5	DAKYVYUAVGJDRK-LFMRMFNLSA-N
+HMDB0000332	376.188588628	377.195865028	375.181312228	18-Oxocortisol	C21H28O6	XUQWWIFROYJHCU-FJNAKSJRSA-N
+HMDB0000335	286.15689457	287.16417097	285.14961817	16a-Hydroxyestrone	C18H22O3	WPOCIZJTELRQMF-QFXBJFAPSA-N
+HMDB0000336	104.047344122	105.054620522	103.040067722	"(R)-3-Hydroxyisobutyric acid"	C4H8O3	DBXBTMSZEOQQDU-GSVOUGTGSA-N
+HMDB0000337	120.042258744	121.049535144	119.034982344	"(S)-3,4-Dihydroxybutyric acid"	C4H8O4	DZAIOXUZHHTJKN-UHFFFAOYSA-N
+HMDB0000343	286.15689457	287.16417097	285.14961817	2-Hydroxyestrone	C18H22O3	SWINWPBPEKHUOD-JPVZDGGYSA-N
+HMDB0000345	162.05282343	163.06009983	161.04554703	"3-Hydroxyadipic acid"	C6H10O5	YVOMYDHIQVMMTA-UHFFFAOYSA-N
+HMDB0000347	288.172544634	289.179821034	287.165268234	16b-Hydroxyestradiol	C18H24O3	PROQIPRRNZUXQM-ZMSHIADSSA-N
+HMDB0000350	218.115423686	219.122700086	217.108147286	"3-Hydroxysebacic acid"	C10H18O5	OQYZCCKCJQWHIE-UHFFFAOYSA-N
+HMDB0000352	304.203844762	305.211121162	303.196568362	16a-Hydroxydehydroisoandrosterone	C19H28O3	QQIVKFZWLZJXJT-DNKQKWOHSA-N
+HMDB0000357	104.047344122	105.054620522	103.040067722	"3-Hydroxybutyric acid"	C4H8O3	WHBMMWSBFZVSSR-UHFFFAOYSA-N
+HMDB0000359	434.339609961	435.346886361	433.332333561	"3alpha,7alpha-Dihydroxycoprostanic acid"	C27H46O4	ITZYGDKGRKKBSN-HKFUITGCSA-N
+HMDB0000360	120.042258744	121.049535144	119.034982344	"2,4-Dihydroxybutanoic acid"	C4H8O4	UFYGCFHQAXXBCF-UHFFFAOYSA-N
+HMDB0000362	185.99293909	187.00021549	184.98566269	"2-Phosphoglyceric acid"	C3H7O7P	GXIURPTVHJPJLF-UHFFFAOYSA-N
+HMDB0000363	332.23514489	333.24242129	331.22786849	17a-Hydroxypregnenolone	C21H32O3	JERGUCIJOXJXHF-TVWVXWENSA-N
+HMDB0000365	290.224580204	291.231856604	289.217303804	Epiandrosterone	C19H30O2	QGXBDMJGAMFCBF-QRIARFFBSA-N
+HMDB0000369	292.240230268	293.247506668	291.232953868	3b,17b-Dihydroxyetiocholane	C19H32O2	CBMYJHIOYJEBSB-WTVXNACZSA-N
+HMDB0000374	330.219494826	331.226771226	329.212218426	17-Hydroxyprogesterone	C21H30O3	DBPWSSGDRRHUNT-CEGNMAFCSA-N
+HMDB0000375	166.062994186	167.070270586	165.055717786	"3-(3-Hydroxyphenyl)propanoic acid"	C9H10O3	QVWAEZJXDYOKEH-UHFFFAOYSA-N
+HMDB0000379	206.042652674	207.049929074	205.035376274	"2-Methylcitric acid"	C7H10O7	YNOXCRMFGMSKIJ-UHFFFAOYSA-N
+HMDB0000380	302.188194698	303.195471098	301.180918298	"2-Hydroxyestradiol-3-methyl ether"	C19H26O3	MMKYSUOJWFKECQ-SSTWWWIQSA-N
+HMDB0000387	216.172544634	217.179821034	215.165268234	"3-Hydroxydodecanoic acid"	C12H24O3	MUCMKTPAZLSKTL-UHFFFAOYSA-N
+HMDB0000394	274.178023942	275.185300342	273.170747542	"3-Hydroxytetradecanedioic acid"	C14H26O5	CEDZIURHISELSQ-UHFFFAOYSA-N
+HMDB0000396	118.062994186	119.070270586	117.055717786	"2-Ethylhydracrylic acid"	C5H10O3	ZMZQVAUJTDKQGE-UHFFFAOYSA-N
+HMDB0000397	154.026608673	155.033885073	153.019332273	"2-Pyrocatechuic acid"	C7H6O4	GLDQAMYCGOIJDV-UHFFFAOYSA-N
+HMDB0000405	302.188194698	303.195471098	301.180918298	2-Methoxyestradiol	C19H26O3	CQOQDQWUFQDJMK-SSTWWWIQSA-N
+HMDB0000407	118.062994186	119.070270586	117.055717786	"2-Hydroxy-3-methylbutyric acid"	C5H10O3	NGEWQZIDQIYUNV-UHFFFAOYSA-N
+HMDB0000413	246.146723814	247.154000214	245.139447414	"3-Hydroxydodecanedioic acid"	C12H22O5	FYVQCLGZFXHEGL-UHFFFAOYSA-N
+HMDB0000416	412.191959446	413.199235846	411.184683046	"17-Hydroxypregnenolone sulfate"	C21H32O6S	OMOKWYAQVYBHMG-QUPIPBJSSA-N
+HMDB0000418	378.204238692	379.211515092	377.196962292	18-Hydroxycortisol	C21H30O6	HESFZGWRDUVOMS-FJNAKSJRSA-N
+HMDB0000422	146.057908808	147.065185208	145.050632408	"2-Methylglutaric acid"	C6H10O4	AQYCMVICBNBXNA-UHFFFAOYSA-N
+HMDB0000423	182.057908808	183.065185208	181.050632408	"3,4-Dihydroxyhydrocinnamic acid"	C9H10O4	DZAUWHJDUNRCTF-UHFFFAOYSA-N
+HMDB0000424	218.115423686	219.122700086	217.108147286	"2-Hydroxydecanedioic acid"	C10H18O5	LPIOYESQKJFWPQ-UHFFFAOYSA-N
+HMDB0000426	148.037173366	149.044449766	147.029896966	"Citramalic acid"	C5H8O5	XFTRTWQBIOMVPK-UHFFFAOYSA-N
+HMDB0000428	148.037173366	149.044449766	147.029896966	"3-Hydroxyglutaric acid"	C5H8O5	ZQHYXNSQOIDNTL-UHFFFAOYSA-N
+HMDB0000429	272.177630012	273.184906412	271.170353612	17a-Estradiol	C18H24O2	VOXZDWNPVJITMN-SFFUCWETSA-N
+HMDB0000430	416.329045274	417.336321674	415.321768874	"24,25-Dihydroxyvitamin D"	C27H44O3	FCKJYANJHNLEEP-OIMXRAFZSA-N
+HMDB0000434	196.073558872	197.080835272	195.066282472	"Homoveratric acid"	C10H12O4	WUAXWQRULBZETB-UHFFFAOYSA-N
+HMDB0000439	169.037507717	170.044784117	168.030231317	2-Furoylglycine	C7H7NO4	KSPQDMRTZZYQLM-UHFFFAOYSA-N
+HMDB0000440	152.047344122	153.054620522	151.040067722	"3-Hydroxyphenylacetic acid"	C8H8O3	FVMDYYGIDFPZAX-UHFFFAOYSA-N
+HMDB0000442	104.047344122	105.054620522	103.040067722	"(S)-3-Hydroxybutyric acid"	C4H8O3	WHBMMWSBFZVSSR-VKHMYHEASA-N
+HMDB0000448	146.057908808	147.065185208	145.050632408	"Adipic acid"	C6H10O4	WNLRTRBMVRJNCN-UHFFFAOYSA-N
+HMDB0000449	350.245709576	351.252985976	349.238433176	5a-Tetrahydrocorticosterone	C21H34O4	RHQQHZQUAMFINJ-NZTKVECHSA-N
+HMDB0000450	162.100442324	163.107718724	161.093165924	5-Hydroxylysine	C6H14N2O3	YSMODUONRAFBET-UHNVWZDZSA-N
+HMDB0000452	103.063328537	104.070604937	102.056052137	"L-alpha-Aminobutyric acid"	C4H9NO2	QWCKQJZIFLGMSD-VKHMYHEASA-N
+HMDB0000459	157.073893223	158.081169623	156.066616823	3-Methylcrotonylglycine	C7H11NO3	PFWQSHXPNKRLIV-UHFFFAOYSA-N
+HMDB0000462	158.043990078	159.051266478	157.036713678	Allantoin	C4H6N4O3	POJWUDADGALRAB-UHFFFAOYSA-N
+HMDB0000464	39.962591155	40.969867555	38.955314755	Calcium	Ca	BHPQYMZQTOCNFJ-UHFFFAOYSA-N
+HMDB0000468	237.086189243	238.093465643	236.078912843	Biopterin	C9H11N5O3	LHQIJBMDNUYRAM-DZSWIPIPSA-N
+HMDB0000469	142.037842068	143.045118468	141.030565668	5-Hydroxymethyluracil	C5H6N2O3	JDBGXEHEIRGOBU-UHFFFAOYSA-N
+HMDB0000472	220.08479226	221.09206866	219.07751586	5-Hydroxy-L-tryptophan	C11H12N2O3	LDCYZAJDBXYCGN-VIFPVBQESA-N
+HMDB0000473	163.085795313	164.093071713	162.078518913	6-Dimethylaminopurine	C7H9N5	BVIAOQMSVZHOJM-UHFFFAOYSA-N
+HMDB0000474	72.057514878	73.064791278	71.050238478	Butanone	C4H8O	ZWEHNKRNPOVVGH-UHFFFAOYSA-N
+HMDB0000479	169.085126611	170.092403011	168.077850211	3-Methylhistidine	C7H11N3O2	JDHILDINMRGULE-LURJTMIESA-N
+HMDB0000481	268.080769514	269.088045914	267.073493114	"Allopurinol riboside"	C10H12N4O5	KFQUAMTWOJHPEJ-DAGMQNCNSA-N
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/models.R	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,183 @@
+
+tryCatch({
+    DBModelR::ModelDefinition(table="yui", fields=list(yui="INTEGER"))
+}, error=function(e) {
+    stop("Please, install DBModelR before you source this file.")
+})
+
+list(
+    adduct=DBModelR::ModelDefinition(
+        table="adduct",
+        fields=list(
+            name="TEXT",
+            mass="FLOAT",
+            charge="INTEGER",
+            multi="INTEGER",
+            formula_add="TEXT",
+            formula_ded="TEXT",
+            sign="TEXT",
+            oidscore="INTEGER",
+            quasi="INTEGER",
+            ips="FLOAT"
+        )
+    ),
+    cluster=DBModelR::ModelDefinition(
+        table="cluster",
+        fields=list(
+            clusterID="INTEGER",
+            formula="TEXT",
+            annotation="TEXT",
+            coeff="FLOAT",
+            r_squared="FLOAT",
+            charge="INTEGER",
+            mean_rt="FLOAT",
+            score="FLOAT",
+            deviation="FLOAT",
+            status="TEXT",
+            adduct="TEXT",
+            curent_group="INTEGER",
+            pc_group="INTEGER",
+            align_group="INTEGER",
+            xcms_group="INTEGER"
+        ),
+        one=list("sample", "compound")
+    ),
+    compound=DBModelR::ModelDefinition(
+        table="compound",
+        fields=list(
+            name="TEXT",
+            common_name="TEXT",
+            formula="TEXT",
+            charge="INTEGER",
+            date="TEXT",
+            mz="FLOAT"
+        )
+    ),
+    feature=DBModelR::ModelDefinition(
+        table="feature",
+        fields=list(
+            featureID="INTEGER",
+            mz="FLOAT",
+            mz_min="FLOAT",
+            mz_max="FLOAT",
+            rt="FLOAT",
+            rt_min="FLOAT",
+            rt_max="FLOAT",
+            int_o="FLOAT",
+            int_b="FLOAT",
+            max_o="FLOAT",
+            iso="TEXT",
+            abundance="FLOAT"
+        ),
+        one=list("cluster")
+    ),
+    instrument=DBModelR::ModelDefinition(
+        table="instrument",
+        fields=list(
+            model="TEXT",
+            manufacturer="TEXT",
+            analyzer="TEXT",
+            detector_type="TEXT",
+            ion_source="TEXT"
+        )
+    ),
+    instrument_config=DBModelR::ModelDefinition(
+        table="instrument_config",
+        fields=list(
+            resolution="TEXT",
+            agc_target="TEXT",
+            maximum_IT="TEXT",
+            number_of_scan_range="TEXT",
+            scan_range="TEXT",
+            version="TEXT"
+        )
+    ),
+    project=DBModelR::ModelDefinition(
+        table="project",
+        fields=list(
+            name="TEXT",
+            comment="TEXT"
+        ),
+        one=list("sample")
+    ),
+    sample=DBModelR::ModelDefinition(
+        table="sample",
+        fields=list(
+            name="TEXT",
+            path="TEXT",
+            polarity="TEXT",
+            kind="TEXT", ## rdata or mxml or enriched_rdata
+            raw="BLOB"
+        ),
+        one=list(
+            "peak_picking_parameters",
+            "pairing_parameters",
+            "alignmenmt_parameters",
+            "camera_parameters",
+            "instrument",
+            "instrument_config",
+            "software",
+            "smol_xcms_set"
+        )
+    ),
+    smol_xcms_set=DBModelR::ModelDefinition(
+        table="smol_xcms_set",
+        fields=list(
+            raw="BLOB"
+        )
+    ),
+    software=DBModelR::ModelDefinition(
+        table="software",
+        fields=list(
+            name="TEXT",
+            version="TEXT"
+        )
+    ),
+    # camera_parameters=DBModelR::ModelDefinition(
+    #     table="camera_parameters",
+    #     fields=list()
+    # ),
+    # pairing_parameters=DBModelR::ModelDefinition(
+    #     table="pairing_parameters",
+    #     fields=list()
+    # ),
+    peak_picking_parameters=DBModelR::ModelDefinition(
+        table="peak_picking_parameters",
+        fields=list(
+            ppm="FLOAT",
+            peakwidth="TEXT",
+            snthresh="TEXT",
+            prefilterStep="TEXT",
+            prefilterLevel="TEXT",
+            mzdiff="TEXT",
+            fitgauss="TEXT",
+            noise="TEXT",
+            mzCenterFun="TEXT",
+            integrate="INTEGER",
+            firstBaselineCheck="TEXT",
+            snthreshIsoROIs="TEXT",
+            maxCharge="INTEGER",
+            maxIso="INTEGER",
+            mzIntervalExtension="TEXT"
+        )
+    ),
+    alignmenmt_parameters=DBModelR::ModelDefinition(
+        table="alignmenmt_parameters",
+        fields=list(
+            binSize="TEXT",
+            centerSample="TEXT",
+            response="TEXT",
+            distFun="TEXT",
+            gapInit="TEXT",
+            gapExtend="TEXT",
+            factorDiag="TEXT",
+            factorGap="TEXT",
+            localAlignment="INTEGER",
+            initPenalty="TEXT",
+            bw="TEXT",
+            minFraction="TEXT",
+            minSamples="TEXT",
+            maxFeatures="TEXT"
+        )
+    )
+)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/format_versionning.MD	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,91 @@
+
+INTRODUCTION
+=====
+This file describes the format of the database generated by XSeeker
+Preprocessor. This format will evolve in the future to fit users
+wanted features. That's why there is a history of versions numbers,
+describing what they brought to the sqlite file, and how it was usefull.
+
+The first version (the older one) is at the bottom of this file, and the
+modifications provided by the newest versions are on top of the file.
+
+
+VERSION 1.1.2
+=====
+add missing mz_tab_info$group_length field to produce mzTab
+
+VERSION 1.1.1
+=====
+add missing mz_tab_info$dataset_path field to produce mzTab
+
+
+VERSION 1.1.0
+=====
+Summary:
+ - The field `mz_tab_info` was added in new table smol_xcms_set.
+
+smol_xcms_set table added
+-----
+This table contains a subset of the original ms file.
+
+mz_tab_info field added to smol_xcms_set
+-----
+This field contains five subfields:
+ - sampclass ;
+ - sampnames ;
+ - rtmed ;
+ - mzmed ;
+ - smallmolecule_abundance_assay .
+
+These fields were added after users asked to export data from XSeeker
+in mzTab files.
+XCMS has some functions to extract sampclass, sampnames, rtmed, mzmed
+and smallmolecule_abundance_assay from xcms set. Then, they are used
+in the mz tab creation process, but we didn't want to keep the whole
+xcmsset object. So we used the original code from XCMS and modified it
+a little bit, justifying the extraction of these new fields.
+
+
+VERSION 1.0.0
+=====
+
+
+DATABASE
+-----
+
+### SAMPLE
+
+#### RAW
+This structure is an epurated and enriched version of the original
+rdata, saved as a compressed env:  
+`blob::blob(fst::compress_fst(serialize(raw, NULL), compression=100))`
+
+The fields contained in the env are as follow:
+
+##### variableMetadata
+
+##### tic
+
+##### mz
+
+##### scanindex
+
+##### scantime
+
+##### intensity
+
+##### polarity
+
+##### sample_name
+
+##### dataset_path
+
+##### process_params
+
+##### enriched_rdata
+
+##### enriched_rdata_version
+
+##### tool_name
+
+##### enriched_rdata_doc
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy/config/datatype_conf.xml	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,6 @@
+
+    <datatype extension="sql" type="galaxy.datatypes.text:SQL" mimetype="application/sql" display_in_upload="true"/>
+    <datatype extension="xseeker.sqlite" type="galaxy.datatypes.binary:XSeekerDatabase" mimetype="application/octet-stream" display_in_upload="true"/>
+
+    <sniffer type="galaxy.datatypes.text:SQL"/>
+    <sniffer type="galaxy.datatypes.binary:XSeekerDatabase"/>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy/config/tool_conf.xml	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,6 @@
+<?xml version='1.0' encoding='utf-8'?>
+<toolbox monitor="true">
+  <section id="lc-msms" name="LC MSMS">
+    <tool file="LC-MSMS/XSeekerPreparator.xml" />
+  </section>
+</toolbox>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy/lib/galaxy/datatypes/binary.py	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,52 @@
+
+
+class XSeekerDatabase(SQlite):
+    """Class describing an XSeeker Sqlite database """
+    MetadataElement(
+        name="xseeker_version",
+        default="1.0.0",
+        param=MetadataParameter,
+        desc="XSeeker Version",
+        readonly=True,
+        visible=True,
+        no_value="1.0.0"
+    )
+    file_ext = "xseeker.sqlite"
+    edam_format = "format_3622"
+    edam_data = "data_3498"
+
+    def set_meta(self, dataset, overwrite=True, **kwd):
+        super(XSeekerDatabase, self).set_meta(dataset, overwrite=overwrite, **kwd)
+        try:
+            conn = sqlite.connect(dataset.file_name)
+            c = conn.cursor()
+            tables_query = "SELECT database_version FROM XSeeker_tagging_table"
+            result = c.execute(tables_query).fetchall()
+            for version, in result:
+                dataset.metadata.xseeker_vesrion = version
+            # TODO: Can/should we detect even more attributes, such as use of PED file, what was input annotation type, etc.
+        except Exception as e:
+            log.warning('%s, set_meta Exception: %s', self, e)
+
+    def sniff(self, filename):
+        if super(XSeekerDatabase, self).sniff(filename):
+            table_names = [
+                "XSeeker_tagging_table"
+            ]
+            return self.sniff_table_names(filename, table_names)
+        return False
+
+    def set_peek(self, dataset, is_multi_byte=False):
+        if not dataset.dataset.purged:
+            dataset.peek = "XSeeker SQLite Database, version %s" % (dataset.metadata.xseeker_version or 'unknown')
+            dataset.blurb = nice_size(dataset.get_size())
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+    def display_peek(self, dataset):
+        try:
+            return dataset.peek
+        except Exception:
+            return "XSeeker SQLite Database, version %s" % (dataset.metadata.xseeker_version or 'unknown')
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy/lib/galaxy/datatypes/text.py	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,35 @@
+
+
+@build_sniff_from_prefix
+class SQL(Text):
+    """Class describing an html file"""
+    file_ext = "sql"
+
+    def set_peek(self, dataset, is_multi_byte=False):
+        if not dataset.dataset.purged:
+            dataset.peek = "SQL file"
+            dataset.blurb = nice_size(dataset.get_size())
+        else:
+            dataset.peek = "file does not exist"
+            dataset.blurb = "file purged from disk"
+
+    def get_mime(self):
+        """Returns the mime type of the datatype"""
+        return "application/sql"
+
+    def sniff_prefix(self, file_prefix):
+        """
+        Uses some patterns usualy encountered in sql files to guess
+        it's type
+        """
+        start = file_prefix.string_io().read(42).strip()
+        return any(
+            header in start
+            for header in (
+                "CREATE DATABASE",
+                "INSERT INTO",
+                "CREATE TABLE",
+                "BEGIN TRANSACTION"
+            )
+        )
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy/tools/LC-MSMS/XSeekerPreparator.R	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,919 @@
+
+
+TOOL_NAME <- "XSeekerPreparator"
+VERSION <- "1.1.2"
+
+OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy"
+
+ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-")
+ENRICHED_RDATA_DOC <- sprintf("
+Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
+This doc was generated by the tool: %s - Version %s
+To show the different variables contained in this rdata, type:
+ - `load('this_rdata.rdata', rdata_env <- new.env())`
+ - `names(rdata_env)`
+
+Sections
+######
+
+
+This tools helpers
+------
+    The version number is somewhat special because the evolution of the
+    rdata's format is non-linear.
+    There may be different branches, each evolving separatly.
+    To reflect these branches's diversions, there may be a prepended
+    branch name following this format:
+        major.minor.patch-branch_name
+    Like this, we can process rdata with the same tool, and output
+    rdata formated differently, for each tool.
+
+
+  - enriched_rdata:
+    - Description: flag created by that tool to tell it was enriched.
+    - Retrieval method: enriched_rdata <- TRUE
+
+  - enriched_rdata_version:
+    - Description: A flag created by that tool to tell which version of
+        this tool has enriched the rdata.
+    - Retrieval method: enriched_rdata_version <- sprintf(\"%s\", ENRICHED_RDATA_VERSION)
+
+  - enriched_rdata_doc:
+    - Description: Contains the documentation string.
+
+Data from original mzxml file
+------
+  - tic:
+    - Description: Those are the tic values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@tic
+    - xcms version: 2.0
+
+  - mz:
+    - Description: Those are the m/z values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$mz
+    - xcms version: 2.0
+
+  - scanindex:
+    - Description: Those are the scanindex values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scanindex
+    - xcms version: 2.0
+
+  - scantime:
+    - Description: Those are the scantime values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scantime
+    - xcms version: 2.0
+
+  - intensity:
+    - Description: Those are the intensity values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$intensity
+    - xcms version: 2.0
+
+  - polarity:
+    - Description: Those are the polarity values from the original mzxml
+        file, extracted using xcms 2.
+    - Retrieval method: as.character(xcms::xcmsRaw('original_file.mzxml')@polarity[[1]])
+    - xcms version: 2.0
+
+Data taken from incoming rdata
+------
+  - variableMetadata:
+    - Description: Unmodified copy of variableMetadata from incoming rdata.
+    - Retrieval method: rdata_file$variableMetadata
+
+  - process_params:
+    - Description: Those are the processing parameters values from the
+        curent rdata. They have been simplified to allow easy access like:
+        for (params in process_params) {
+            if (params[[\"xfunction\"]] == \"annotatediff\") {
+                process_peak_picking_params(params)
+            }
+        }
+    - Retrieval method:
+        ## just he same list, but simplified
+        process_params <- list()
+        for (list_name in names(rdata_file$listOFlistArguments)) {
+            param_list <- list()
+            for (param_name in names(rdata_file$listOFlistArguments[[list_name]])) {
+                param_list[[param_name]] <- rdata_file$listOFlistArguments[[list_name]][[param_name]]
+            }
+            process_params[[length(process_params)+1]] <- param_list
+        }
+", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION)
+
+
+
+get_models <- function(path) {
+    if (is.null(path)) {
+        stop("No models to define the database schema")
+    } else {
+        message(sprintf("Loading models from %s", path))
+    }
+    ## galaxy mangles the "@" to a "__at__"
+    if (substr(path, 1, 9) == "git__at__") {
+        path <- sub("^git__at__", "git@", path, perl=TRUE)
+    }
+    if (
+        substr(path, 1, 4) == "git@"
+        || substr(path, length(path)-4, 4) == ".git"
+    ) {
+        return (get_models_from_git(path))
+    }
+    if (substr(path, 1, 4) == "http") {
+        return (get_models_from_url(path))
+    }
+    return (source(path)$value)
+}
+
+get_models_from_git <- function (url, target_file="models.R", rm=TRUE) {
+    tmp <- tempdir()
+    message(sprintf("Cloning %s", url))
+    system2("git", c("clone", url, tmp))
+    result <- search_tree(file.path(tmp, dir), target_file)
+    if (!is.null(result)) {
+        models <- source(result)$value
+        if (rm) {
+            unlink(tmp, recursive=TRUE)
+        }
+        return (models)
+    }
+    if (rm) {
+        unlink(tmp, recursive=TRUE)
+    }
+    stop(sprintf(
+        "Could not find any file named \"%s\" in this repo",
+        target_file
+    ))
+}
+
+get_models_from_url <- function (url, target_file="models.R", rm=TRUE) {
+    tmp <- tempdir()
+    message(sprintf("Downloading %s", url))
+    result <- file.path(tmp, target_file)
+    if (download.file(url, destfile=result) == 0) {
+        models <- source(result)$value
+        if (rm) {
+            unlink(tmp, recursive=TRUE)
+        }
+        return (models)
+    }
+    if (rm) {
+        unlink(tmp, recursive=TRUE)
+    }
+    stop("Could not download any file at this adress.")
+}
+
+search_tree <- function(path, target) {
+    target <- tolower(target)
+    for (file in list.files(path)) {
+        if (is.dir(file)) {
+            result <- search_tree(file.path(path, file), target)
+            if (!is.null(result)) {
+                return (result)
+            }
+        } else if (tolower(file) == target) {
+            return (file.path(path, file))
+        }
+    }
+    return (NULL)
+}
+
+create_database <- function(orm) {
+    orm$recreate_database(no_exists=FALSE)
+    set_database_version(orm, "created")
+}
+
+insert_adducts <- function(orm) {
+    message("Creating adducts...")
+    adducts <- list(
+        list("[M-H2O-H]-",1,-1,-48.992020312000001069,1,0,0.5,"H0","H1O3"),
+        list("[M-H-Cl+O]-",1,-1,-19.981214542000000022,2,0,0.5,"O1","H1Cl1"),
+        list("[M-Cl+O]-",1,-1,-18.973389510000000512,3,0,0.5,"O1","Cl1"),
+        list("[M-3H]3-",1,-3,-3.0218293560000000219,4,0,1.0,"H0","H3"),
+        list("[2M-3H]3-",2,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
+        list("[3M-3H]3-",3,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
+        list("[M-2H]2-",1,-2,-2.0145529039999998666,5,0,1.0,"H0","H2"),
+        list("[2M-2H]2-",2,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
+        list("[3M-2H]2-",3,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
+        list("[M-H]-",1,-1,-1.0072764519999999333,6,1,1.0,"H0","H1"),
+        list("[2M-H]-",2,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
+        list("[3M-H]-",3,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
+        list("[M]+",1,1,-0.00054858000000000000945,7,1,1.0,"H0","H0"),
+        list("[M]-",1,-1,0.00054858000000000000945,8,1,1.0,"H0","H0"),
+        list("[M+H]+",1,1,1.0072764519999999333,9,1,1.0,"H1","H0"),
+        list("[2M+H]+",2,1,1.0072764519999999333,9,0,0.5,"H1","H0"),
+        list("[3M+H]+",3,1,1.0072764519999999333,9,0,0.25,"H1","H0"),
+        list("[M+2H]2+",1,2,2.0145529039999998666,10,0,0.75,"H2","H0"),
+        list("[2M+2H]2+",2,2,2.0145529039999998666,10,0,0.5,"H2","H0"),
+        list("[3M+2H]2+",3,2,2.0145529039999998666,10,0,0.25,"H2","H0"),
+        list("[M+3H]3+",1,3,3.0218293560000000219,11,0,0.75,"H3","H0"),
+        list("[2M+3H]3+",2,3,3.0218293560000000219,11,0,0.5,"H3","H0"),
+        list("[3M+3H]3+",3,3,3.0218293560000000219,11,0,0.25,"H3","H0"),
+        list("[M-2H+NH4]-",1,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
+        list("[2M-2H+NH4]-",2,-1,16.019272654000001665,12,0,0.0,"N1H4","H2"),
+        list("[3M-2H+NH4]-",3,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
+        list("[M+NH4]+",1,1,18.033825558000000199,13,1,1.0,"N1H4","H0"),
+        list("[2M+NH4]+",2,1,18.033825558000000199,13,0,0.5,"N1H4","H0"),
+        list("[3M+NH4]+",3,1,18.033825558000000199,13,0,0.25,"N1H4","H0"),
+        list("[M+H+NH4]2+",1,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
+        list("[2M+H+NH4]2+",2,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
+        list("[3M+H+NH4]2+",3,2,19.041102009999999467,14,0,0.25,"N1H5","H0"),
+        list("[M+Na-2H]-",1,-1,20.974668176000001551,15,0,0.75,"Na1","H2"),
+        list("[2M-2H+Na]-",2,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
+        list("[3M-2H+Na]-",3,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
+        list("[M+Na]+",1,1,22.989221080000000086,16,1,1.0,"Na1","H0"),
+        list("[2M+Na]+",2,1,22.989221080000000086,16,0,0.5,"Na1","H0"),
+        list("[3M+Na]+",3,1,22.989221080000000086,16,0,0.25,"Na1","H0"),
+        list("[M+H+Na]2+",1,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
+        list("[2M+H+Na]2+",2,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
+        list("[3M+H+Na]2+",3,2,23.996497531999999353,17,0,0.25,"Na1H1","H0"),
+        list("[M+2H+Na]3+",1,3,25.003773983999998619,18,0,0.25,"H2Na1","H0"),
+        list("[M+CH3OH+H]+",1,1,33.033491200000000276,19,0,0.25,"C1O1H5","H0"),
+        list("[M-H+Cl]2-",1,-2,33.962124838000001148,20,0,1.0,"Cl1","H1"),
+        list("[2M-H+Cl]2-",2,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
+        list("[3M-H+Cl]2-",3,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
+        list("[M+Cl]-",1,-1,34.969401290000000416,21,1,1.0,"Cl1","H0"),
+        list("[2M+Cl]-",2,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
+        list("[3M+Cl]-",3,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
+        list("[M+K-2H]-",1,-1,36.948605415999999479,22,0,0.5,"K1","H2"),
+        list("[2M-2H+K]-",2,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
+        list("[3M-2H+K]-",3,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
+        list("[M+K]+",1,1,38.963158319999998013,23,1,1.0,"K1","H0"),
+        list("[2M+K]+",2,1,38.963158319999998013,23,0,0.5,"K1","H0"),
+        list("[3M+K]+",3,1,38.963158319999998013,23,0,0.25,"K1","H0"),
+        list("[M+H+K]2+",1,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
+        list("[2M+H+K]2+",2,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
+        list("[3M+H+K]2+",3,2,39.970434771999997281,24,0,0.25,"K1H1","H0"),
+        list("[M+ACN+H]+",1,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
+        list("[2M+ACN+H]+",2,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
+        list("[M+2Na-H]+",1,1,44.971165708000000902,26,0,0.5,"Na2","H1"),
+        list("[2M+2Na-H]+",2,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
+        list("[3M+2Na-H]+",3,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
+        list("[2M+FA-H]-",2,-1,44.998202851999998586,27,0,0.25,"C1O2H2","H1"),
+        list("[M+FA-H]-",1,-1,44.998202851999998586,27,0,0.5,"C1O2H2","H1"),
+        list("[M+2Na]2+",1,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
+        list("[2M+2Na]2+",2,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
+        list("[3M+2Na]2+",3,2,45.978442160000000172,28,0,0.25,"Na2","H0"),
+        list("[M+H+2Na]3+",1,3,46.985718611999999438,29,0,0.25,"H1Na2","H0"),
+        list("[M+H+FA]+",1,1,47.012755755999997122,30,0,0.25,"C1O2H3","H0"),
+        list("[M+Hac-H]-",1,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
+        list("[2M+Hac-H]-",2,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
+        list("[M+IsoProp+H]+",1,1,61.064791327999998317,32,0,0.25,"C3H9O1","H0"),
+        list("[M+Na+K]2+",1,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
+        list("[2M+Na+K]2+",2,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
+        list("[3M+Na+K]2+",3,2,61.9523793999999981,33,0,0.25,"Na1K1","H0"),
+        list("[M+NO3]-",1,-1,61.988366450000000895,34,0,0.5,"N1O3","H0"),
+        list("[M+ACN+Na]+",1,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
+        list("[2M+ACN+Na]+",2,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
+        list("[M+NH4+FA]+",1,1,64.039304861999994502,36,0,0.25,"N1C1O2H6","H0"),
+        list("[M-2H+Na+FA]-",1,-1,66.980147479999999405,37,0,0.5,"NaC1O2H2","H2"),
+        list("[M+3Na]3+",1,3,68.967663239999993153,38,0,0.25,"Na3","H0"),
+        list("[M+Na+FA]+",1,1,68.99470038399999794,39,0,0.25,"Na1C1O2H2","H0"),
+        list("[M+2Cl]2-",1,-2,69.938802580000000832,40,0,1.0,"Cl2","H0"),
+        list("[2M+2Cl]2-",2,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
+        list("[3M+2Cl]2-",3,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
+        list("[M+2K-H]+",1,1,76.919040187999996758,41,0,0.5,"K2","H1"),
+        list("[2M+2K-H]+",2,1,76.919040187999996758,41,0,0.25,"K2","H1"),
+        list("[3M+2K-H]+",3,1,76.919040187999996758,41,0,0.25,"K2","H1"),
+        list("[M+2K]2+",1,2,77.926316639999996028,42,0,0.5,"K2","H0"),
+        list("[2M+2K]2+",2,2,77.926316639999996028,42,0,0.5,"K2","H0"),
+        list("[3M+2K]2+",3,2,77.926316639999996028,42,0,0.25,"K2","H0"),
+        list("[M+Br]-",1,-1,78.918886479999997619,43,1,1.0,"Br1","H0"),
+        list("[M+Cl+FA]-",1,-1,80.974880593999998268,44,0,0.5,"Cl1C1O2H2","H0"),
+        list("[M+AcNa-H]-",1,-1,80.995797543999998426,45,0,0.25,"C2H3Na1O2","H1"),
+        list("[M+2ACN+2H]2+",1,2,84.067651115999993292,46,0,0.25,"C4H8N2","H0"),
+        list("[M+K+FA]+",1,1,84.968637623999995868,47,0,0.25,"K1C1O2H2","H0"),
+        list("[M+Cl+Na+FA-H]-",1,-1,102.95682522200000619,48,0,0.5,"Cl1Na1C1O2H2","H1"),
+        list("[2M+3H2O+2H]+",2,1,104.03153939599999944,49,0,0.25,"H8O6","H0"),
+        list("[M+TFA-H]-",1,-1,112.98558742000000165,50,0,0.5,"C2F3O2H1","H1"),
+        list("[M+H+TFA]+",1,1,115.00014032400000019,51,0,0.25,"C2F3O2H2","H0"),
+        list("[M+3ACN+2H]2+",1,2,125.09420022199999778,52,0,0.25,"C6H11N3","H0"),
+        list("[M+NH4+TFA]+",1,1,132.02668943000000468,53,0,0.25,"N1C2F3O2H5","H0"),
+        list("[M+Na+TFA]+",1,1,136.98208495200000811,54,0,0.25,"Na1C2F3O2H1","H0"),
+        list("[M+Cl+TFA]-",1,-1,148.96226516199999423,55,0,0.5,"Cl1C2F3O2H1","H0"),
+        list("[M+K+TFA]+",1,1,152.95602219200000604,56,0,0.25,"K1C2F3O2H1","H0")
+    )
+    dummy_adduct <- orm$adduct()
+    for (adduct in adducts) {
+        i <- 0
+        dummy_adduct$set_name(adduct[[i <- i+1]])
+        dummy_adduct$set_multi(adduct[[i <- i+1]])
+        dummy_adduct$set_charge(adduct[[i <- i+1]])
+        dummy_adduct$set_mass(adduct[[i <- i+1]])
+        dummy_adduct$set_oidscore(adduct[[i <- i+1]])
+        dummy_adduct$set_quasi(adduct[[i <- i+1]])
+        dummy_adduct$set_ips(adduct[[i <- i+1]])
+        dummy_adduct$set_formula_add(adduct[[i <- i+1]])
+        dummy_adduct$set_formula_ded(adduct[[i <- i+1]])
+        dummy_adduct$save()
+        dummy_adduct$clear(unset_id=TRUE)
+    }
+    message("Adducts created")
+}
+
+insert_base_data <- function(orm, path, archetype=FALSE) {
+    if (archetype) {
+        ## not implemented yet
+        return ()
+    }
+    base_data <- readLines(path)
+    for (sql in strsplit(paste(base_data, collapse=" "), ";")[[1]]) {
+        orm$execute(sql)
+    }
+    set_database_version(orm, "enriched")
+}
+
+insert_compounds <- function(orm, compounds_path) {
+    compounds <- read.csv(file=compounds_path, sep="\t")
+    if (is.null(compounds <- translate_compounds(compounds))) {
+        stop("Could not find asked compound's attributes in csv file.")
+    }
+    dummy_compound <- orm$compound()
+    compound_list <- list()
+    for (i in seq_len(nrow(compounds))) {
+        dummy_compound$set_mz(compounds[i, "mz"])
+        dummy_compound$set_name(compounds[i, "name"])
+        dummy_compound$set_common_name(compounds[i, "common_name"])
+        dummy_compound$set_formula(compounds[i, "formula"])
+        # dummy_compound$set_mz(compounds[i, "mz"])
+        # dummy_compound$set_mz(compounds[i, "mz"])
+        compound_list[[length(compound_list)+1]] <- as.list(
+            dummy_compound,
+            c("mz", "name", "common_name", "formula")
+        )
+        dummy_compound$clear(unset_id=TRUE)
+    }
+    dummy_compound$save(bulk=compound_list)
+}
+
+translate_compounds <- function(compounds) {
+    recognized_headers <- list(
+        c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey")
+    )
+    header_translators <- list(
+        hmdb_header_translator
+    )
+    for (index in seq_along(recognized_headers)) {
+        headers <- recognized_headers[[index]]
+        if (identical(colnames(compounds), headers)) {
+            return (header_translators[[index]](compounds))
+        }
+    }
+    if (is.null(translator <- guess_translator(colnames(compounds)))) {
+        return (NULL)
+    }
+    return (csv_header_translator(translator, compounds))
+}
+
+guess_translator <- function(header) {
+    result <- list(
+        # HMDB_ID=NULL,<
+        mz=NULL,
+        name=NULL,
+        common_name=NULL,
+        formula=NULL,
+        # inchi_key=NULL
+    )
+    asked_cols <- names(result)
+    for (asked_col in asked_cols) {
+        for (col in header) {
+            if ((twisted <- tolower(col)) == asked_col
+                || gsub("-", "_", twisted) == asked_col
+                || gsub(" ", "_", twisted) == asked_col
+                || tolower(gsub("(.)([A-Z])", "\\1_\\2", col)) == asked_col
+            ) {
+                result[[asked_col]] <- col
+                next
+            }
+        }
+    }
+    if (any(mapply(is.null, result))) {
+        return (NULL)
+    }
+    return (result)
+}
+
+hmdb_header_translator <- function(compounds) {
+    return (csv_header_translator(
+        list(
+            HMDB_ID="HMDB_ID",
+            mz="MzBank",
+            name="MetName",
+            common_name="MetName",
+            formula="ChemFormula",
+            inchi_key="INChIkey"
+        ), compounds
+    ))
+}
+
+csv_header_translator <- function(translation_table, csv) {
+    header_names <- names(translation_table)
+    result <- data.frame(1:nrow(csv))
+    # colnames(result) <- header_names
+    for (i in seq_along(header_names)) {
+        result[, header_names[[i]]] <- csv[, translation_table[[i]]]
+    }
+    print(result[, "mz"])
+    result[, "mz"] <- as.numeric(result[, "mz"])
+    print(result[, "mz"])
+    return (result)
+}
+
+set_database_version <- function(orm, version) {
+    orm$set_tag(
+        version,
+        tag_name="database_version",
+        tag_table_name="XSeeker_tagging_table"
+    )
+}
+
+process_rdata <- function(orm, rdata, options) {
+    mzml_tmp_dir <- gather_mzml_files(rdata)
+    samples <- names(rdata$singlefile)
+    if (!is.null(options$samples)) {
+        samples <- samples[options$samples %in% samples]
+    }
+    show_percent <- (
+        is.null(options$`not-show-percent`)
+        || options$`not-show-percent` == FALSE
+    )
+    error <- tryCatch({
+        process_sample_list(
+            orm, rdata, samples,
+            show_percent=show_percent
+        )
+        NULL
+    }, error=function(e) {
+        message(e)
+        e
+    })
+    if (!is.null(mzml_tmp_dir)) {
+        unlink(mzml_tmp_dir, recursive=TRUE)
+    }
+    if (!is.null(error)) {
+        stop(error)
+    }
+}
+
+gather_mzml_files <- function(rdata) {
+    if (is.null(rdata$singlefile)) {
+        message("Extracting mxml files")
+        tmp <- tempdir()
+        rdata$singlefile <- utils::unzip(rdata$zipfile, exdir=tmp)
+        names(rdata$singlefile) <- tools::file_path_sans_ext(basename(rdata$singlefile))
+        message("Extracted")
+        return (tmp)
+    } else {
+        message(sprintf("Not a zip file, loading files directly from path: %s", paste(rdata$singlefile, collapse=" ; ")))
+    }
+    return (NULL)
+}
+
+process_sample_list <- function(orm, radta, sample_names, show_percent) {
+    file_grouping_var <- find_grouping_var(rdata$variableMetadata)
+    message("Processing samples.")
+    message(sprintf("File grouping variable: %s", file_grouping_var))
+    if(is.null(file_grouping_var)) {
+        stop("Malformed variableMetada.")
+    }
+
+    process_arg_list <- rdata$listOFlistArguments
+    process_params <- list()
+    for (list_name in names(process_arg_list)) {
+        param_list <- list()
+        for (param_name in names(process_arg_list[[list_name]])) {
+            param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]]
+        }
+        process_params[[length(process_params)+1]] <- param_list
+    }
+    message("Parameters from previous processes extracted.")
+
+    var_meta <- rdata$variableMetadata
+    align_group <- rep(0, nrow(var_meta))
+    var_meta <- cbind(var_meta, align_group)
+    context <- new.env()
+    context$clusters <- list()
+    context$groupidx <- rdata$xa@xcmsSet@groupidx
+    context$peaks <- rdata$xa@xcmsSet@peaks
+    context$show_percent <- show_percent
+
+    indices <- as.numeric(unique(var_meta[, file_grouping_var]))
+    smol_xcms_set <- orm$smol_xcms_set()
+    mz_tab_info <- new.env()
+    xcms_set <- rdata$xa@xcmsSet
+    g <- xcms::groups(xcms_set)
+    mz_tab_info$group_length <- nrow(g)
+    mz_tab_info$dataset_path <- xcms::filepaths(xcms_set)
+    mz_tab_info$sampnames <- xcms::sampnames(xcms_set)
+    mz_tab_info$sampclass <- xcms::sampclass(xcms_set)
+    mz_tab_info$rtmed <- g[,"rtmed"]
+    mz_tab_info$mzmed <- g[,"mzmed"]
+    mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into")
+    blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100))
+    smol_xcms_set$set_raw(blogified)$save()
+    for (no in indices) {
+        sample_name <- names(rdata$singlefile)[[no]]
+        sample_path <- rdata$singlefile[[no]]
+        if (
+            is.na(no)
+            || is.null(sample_path)
+            || !(sample_name %in% sample_names)
+        ) {
+            next
+        }
+        ms_file=xcms::xcmsRaw(sample_path)
+        env <- new.env()
+        env$variableMetadata <- var_meta[var_meta[, file_grouping_var]==no,]
+        env$tic <- ms_file@tic
+        env$mz <- ms_file@env$mz
+        env$scanindex <- ms_file@scanindex
+        env$scantime <- ms_file@scantime
+        env$intensity <- ms_file@env$intensity
+        env$polarity <- as.character(ms_file@polarity[[1]])
+        env$sample_name <- sample_name
+        env$dataset_path <- sample_path
+        env$process_params <- process_params
+        env$enriched_rdata <- TRUE
+        env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
+        env$tool_name <- TOOL_NAME
+        env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
+        context$sample_no <- no
+        add_sample_to_database(orm, env, context, smol_xcms_set)
+    }
+    message("Features enrichment")
+    complete_features(orm, context)
+    message("Features enrichment done.")
+    return (NULL)
+}
+
+find_grouping_var <- function(var_meta) {
+    for (grouping_var in c(".", "Bio")) {
+        if (!is.null(rdata$variableMetadata[[grouping_var]])) {
+            return (grouping_var)
+        }
+    }
+    return (NULL)
+}
+
+add_sample_to_database <- function(orm, env, context, smol_xcms_set) {
+    message(sprintf("Processing sample %s", env$sample_name))
+    sample <- (
+        orm$sample()
+        $set_name(env$sample_name)
+        $set_path(env$dataset_path)
+        $set_kind("enriched_rdata")
+        $set_polarity(
+            if (is.null(env$polarity) || identical(env$polarity, character(0))) ""
+            else env$polarity
+        )
+        $set_smol_xcms_set(smol_xcms_set)
+        $set_raw(blob::blob(fst::compress_fst(
+            serialize(env, NULL),
+            compression=100
+        )))
+        $save()
+    )
+    load_variable_metadata(orm, sample, env$variableMetadata, context)
+    load_process_params(orm, sample, env$process_params)
+    message(sprintf("Sample %s inserted.", env$sample_name))
+    return (sample)
+}
+
+
+load_variable_metadata <- function(orm, sample, var_meta, context) {
+    all_clusters <- orm$cluster()$all()
+
+    next_feature_id <- get_next_id(orm$feature()$all(), "featureID")
+    next_cluster_id <- get_next_id(all_clusters, "clusterID")
+    next_pc_group <- get_next_id(all_clusters, "pc_group")
+    next_align_group <- get_next_id(all_clusters, "align_group")
+    message("Extracting features")
+    invisible(create_features(
+        orm, sample, var_meta, context,
+        next_feature_id, next_cluster_id,
+        next_pc_group, next_align_group
+    ))
+    message("Extracting features done.")
+    return (NULL)
+}
+
+get_next_id <- function(models, attribute) {
+    if ((id <- models$max(attribute)) == Inf || id == -Inf) {
+        return (1)
+    }
+    return (id + 1)
+}
+
+create_features <- function(
+    orm, sample, var_meta, context,
+    next_feature_id, next_cluster_id,
+    next_pc_group, next_align_group
+) {
+    field_names <- as.list(names(orm$feature()$fields__))
+    field_names[field_names=="id"] <- NULL
+
+    features <- list()
+    dummy_feature <- orm$feature()
+
+    if (show_percent <- context$show_percent) {
+        percent <- -1
+        total <- nrow(var_meta)
+    }
+    for (row in seq_len(nrow(var_meta))) {
+        if (show_percent && (row / total) * 100 > percent) {
+            percent <- percent + 1
+            message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
+        }
+
+        curent_var_meta <- var_meta[row, ]
+
+        peak_list <- context$peaks[context$groupidx[[row]], ]
+        sample_peak_list <- peak_list[peak_list[, "sample"] == context$sample_no, , drop=FALSE]
+        if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
+            if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
+                dummy_feature$set_int_o(int_o)
+            }
+            if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
+                dummy_feature$set_int_b(int_b)
+            }
+            if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
+                dummy_feature$set_max_o(max_o)
+            }
+        }
+
+        set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
+
+        dummy_feature$set_featureID(next_feature_id)
+        next_feature_id <- next_feature_id + 1
+        fake_iso <- dummy_feature$get_iso()
+        iso <- extract_iso(fake_iso)
+        clusterID <- extract_clusterID(fake_iso, next_cluster_id)
+        context$clusterID <- clusterID
+        dummy_feature$set_iso(iso)
+        create_associated_cluster(
+            sample, dummy_feature, clusterID,
+            context, curent_var_meta, next_pc_group,
+            next_align_group
+        )
+        next_align_group <- next_align_group + 1
+        features[[length(features)+1]] <- as.list(dummy_feature, field_names)
+        dummy_feature$clear()
+    }
+    message("")## +\n for previous message 
+    message("Saving features")
+    dummy_feature$save(bulk=features)
+    message("Saved.")
+    return (context$clusters)
+}
+
+extract_peak_var <- function(peak_list, var_name, selector=max) {
+    value <- peak_list[, var_name]
+    names(value) <- NULL
+    return (selector(value))
+}
+
+set_feature_fields_from_var_meta <- function(feature, var_meta) {
+    if (!is.null(mz <- var_meta[["mz"]]) && !is.na(mz)) {
+        feature$set_mz(mz)
+    }
+    if (!is.null(mzmin <- var_meta[["mzmin"]]) && !is.na(mzmin)) {
+        feature$set_mz_min(mzmin)
+    }
+    if (!is.null(mzmax <- var_meta[["mzmax"]]) && !is.na(mzmax)) {
+        feature$set_mz_max(mzmax)
+    }
+    if (!is.null(rt <- var_meta[["rt"]]) && !is.na(rt)) {
+        feature$set_rt(rt)
+    }
+    if (!is.null(rtmin <- var_meta[["rtmin"]]) && !is.na(rtmin)) {
+        feature$set_rt_min(rtmin)
+    }
+    if (!is.null(rtmax <- var_meta[["rtmax"]]) && !is.na(rtmax)) {
+        feature$set_rt_max(rtmax)
+    }
+    if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) {
+        feature$set_iso(isotopes)
+    }
+    return (feature)
+}
+
+extract_iso  <- function(weird_data) {
+    if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
+        return (sub("^\\[\\d+\\]", "", weird_data, perl=TRUE))
+    }
+    return (weird_data)
+}
+
+extract_clusterID <- function(weird_data, next_cluster_id){
+    if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
+        clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]")
+        clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+"))
+    } else {
+        clusterID <- 0
+    }
+    return (clusterID + next_cluster_id)
+}
+
+create_associated_cluster <- function(
+    sample, feature, grouping_variable,
+    context, curent_var_meta, next_pc_group, next_align_group
+) {
+    pcgroup <- as.numeric(curent_var_meta[["pcgroup"]])
+    adduct <- as.character(curent_var_meta[["adduct"]])
+    annotation <- curent_var_meta[["isotopes"]]
+    grouping_variable <- as.character(grouping_variable)
+    if (is.null(cluster <- context$clusters[[grouping_variable]])) {
+        cluster <- context$clusters[[grouping_variable]] <- orm$cluster(
+            pc_group=pcgroup + next_pc_group,
+            adduct=adduct,
+            align_group=next_align_group,
+            # curent_group=curent_group,
+            clusterID=context$clusterID,
+            annotation=annotation
+        )$set_sample(sample)
+    } else {
+        if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
+            cluster$set_clusterID(context$clusterID)
+        }
+    }
+    cluster$save()
+    feature$set_cluster(cluster)
+    return (feature)
+}
+
+complete_features <- function(orm, context) {
+    for (cluster in context$clusters) {
+        features <- orm$feature()$load_by(cluster_id=cluster$get_id())
+        if (features$any()) {
+            if (!is.null(rt <- features$mean("rt"))) {
+                cluster$set_mean_rt(rt)$save()
+            }
+            features_df <- as.data.frame(features)
+            central_feature <- features_df[grepl("^\\[M\\]", features_df[, "iso"]), ]
+            central_feature_into <- central_feature[["int_o"]]
+            if (!identical(central_feature_into, numeric(0)) && central_feature_into != 0) {
+                for (feature in as.vector(features)) {
+                    feature$set_abundance(
+                        feature$get_int_o() / central_feature_into * 100
+                    )$save()
+                }
+            }
+        }
+    }
+    return (NULL)
+}
+
+load_process_params <- function(orm, sample, params) {
+    for (param_list in params) {
+        if (is.null(param_list[["xfunction"]])) {
+            next
+        }
+        if (param_list[["xfunction"]] == "annotatediff") {
+            load_process_params_peak_picking(orm, sample, param_list)
+        }
+    }
+    return (sample)
+}
+
+load_process_params_peak_picking <- function(orm, sample, peak_picking_params) {
+    return (add_sample_process_parameters(
+        params=peak_picking_params,
+        params_translation=list(
+            ppm="ppm",
+            maxcharge="maxCharge",
+            maxiso="maxIso"
+        ),
+        param_model_generator=orm$peak_picking_parameters,
+        sample_param_setter=sample$set_peak_picking_parameters
+    ))
+}
+
+add_sample_process_parameters <- function(
+    params,
+    params_translation,
+    param_model_generator,
+    sample_param_setter
+) {
+    model_params <- list()
+    for (rdata_param_name in names(params_translation)) {
+        database_param_name <- params_translation[[rdata_param_name]]
+        if (is.null(rdata_param <- params[[rdata_param_name]])) {
+            next
+        }
+        model_params[[database_param_name]] <- rdata_param
+    }
+    params_models <- do.call(param_model_generator()$load_by, model_params)
+    if (params_models$any()) {
+        params_model <- params_models$first()
+    } else {
+        params_model <- do.call(param_model_generator, model_params)
+        params_model$save()
+    }
+    return (sample_param_setter(params_model)$save())
+}
+
+
+library(optparse)
+
+option_list <- list(
+    optparse::make_option(
+        c("-v", "--version"),
+        action="store_true",
+        help="Display this tool's version and exits"
+    ),
+    optparse::make_option(
+        c("-i", "--input"),
+        type="character",
+        help="The rdata path to import in XSeeker"
+    ),
+    optparse::make_option(
+        c("-s", "--samples"),
+        type="character",
+        help="Samples to visualise in XSeeker"
+    ),
+    optparse::make_option(
+        c("-B", "--archetype"),
+        type="character",
+        help="The name of the base database"
+    ),
+    optparse::make_option(
+        c("-b", "--database"),
+        type="character",
+        help="The base database's path"
+    ),
+    optparse::make_option(
+        c("-c", "--compounds-csv"),
+        type="character",
+        help="The csv containing compounds"
+    ),
+    optparse::make_option(
+        c("-m", "--models"),
+        type="character",
+        help="The path or url (must begin with http[s]:// or git@) to the database's models"
+    ),
+    optparse::make_option(
+        c("-o", "--output"),
+        type="character",
+        help="The path where to output sqlite"
+    ),
+    optparse::make_option(
+        c("-P", "--not-show-percent"),
+        action="store_true",
+        help="Flag not to show the percents",
+        default=FALSE
+    )
+)
+
+options(error=function(){traceback(3)})
+
+parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
+args <- parse_args(parser, positional_arguments=0)
+
+err_code <- 0
+
+if (!is.null(args$options$version)) {
+    message(sprintf("%s %s", TOOL_NAME, VERSION))
+    quit()
+}
+
+models <- get_models(args$options$models)
+orm <- DBModelR::ORM(
+    connection_params=list(dbname=args$options$output),
+    dbms="SQLite"
+)
+
+invisible(orm$models(models))
+invisible(create_database(orm))
+
+message("Database model created")
+
+insert_adducts(orm)
+
+if (!is.null(args$options$database)) {
+    insert_base_data(orm, args$options$database)
+}
+message(sprintf("Base data inserted using %s.", args$options$database))
+
+if (!is.null(args$options$archetype)) {
+    insert_base_data(orm, args$options$archetype, archetype=TRUE)
+}
+if (!is.null(args$options$`compounds-csv`)) {
+    insert_compounds(orm, args$options$`compounds-csv`)
+}
+
+# if (!is.null(args$options$rdata)) {
+#     load_rdata_in_base(args$options$rdata, args$options$samples, args$options$`not-show-percent`)
+# }
+
+
+load(args$options$input, rdata <- new.env())
+
+process_rdata(orm, rdata, args$options)
+
+quit(status=err_code)
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy/tools/LC-MSMS/XSeekerPreparator.xml	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,343 @@
+<tool id="xseeker_pwepawatow"
+      name="XSeeker Preparator"
+      version="1.0.0"
+      profile="20.04"
+>
+    <description>Prepare RData file from CAMERA to be visualized in XSeeker</description>
+    
+    <edam_operations>
+        <edam_operation>operation_1812</edam_operation>
+        <edam_operation>operation_0335</edam_operation>
+    </edam_operations>
+    
+    <requirements>
+        <requirement type="set_environment">R_SCRIPT_PATH</requirement>
+        <!-- 
+        <requirement type="package" version="4.0.0">R</requirement>
+        -->
+        <requirement type="package" version="4.0.0">R</requirement>
+        <requirement type="package" version="1.6.6">optparse</requirement>
+        <requirement type="package" version="3.10.2">xcms</requirement>
+        <requirement type="package" version="1.2.1">blob</requirement>
+        <requirement type="package" version="0.9.4">fst</requirement>
+        <requirement type="package" version="1.4.0">stringr</requirement>
+        <requirement type="package">DBModelR</requirement>
+    </requirements>
+    
+    <stdio>
+
+        <exit_code 
+            range="1"
+            level="warning"
+            description="Selected samples have no data associated to them."
+        />
+
+        <exit_code 
+            range="2"
+            level="warning"
+            description="Some samples have no data associated to them."
+        />
+
+    </stdio>
+
+    <version_command>
+        R_SCRIPT_PATH '$__tool_directory__/XSeekerPreparator.R' -v
+    </version_command>
+
+    <command>
+        <![CDATA[
+
+            R_SCRIPT_PATH '$__tool_directory__/XSeekerPreparator.R'
+
+                -P
+
+                --input '$input'
+                --output '$output'
+
+                #if $samples.selected
+                    --samples '${",".join(samples.selected)}'
+                #end if
+
+                #if $database.archetypes
+                    --archetype '${",".join($database.archetypes)}'
+                #end if
+
+                #if $database.base.kind == "tabular"
+                    --compounds-csv '${database.base.tabular}'
+                #else if $database.base.kind == "sql"
+                    --database '${database.base.sql}'
+                #end if
+
+                #if $database.models.kind == "default"
+                    --models '${base_config}'
+                #else
+                    --models '${database.models.url}'
+                #end if
+
+        ]]>
+
+    </command>
+
+    <inputs>
+        <param 
+            name="input"
+            type="data"
+            multiple="false"
+            label="Rdata to prepare"
+            optional="false"
+            format="rdata"
+        >
+        </param>
+        <section name="samples" title="Samples Options" expanded="false">
+            <param 
+                name="selected"
+                type="data"
+                multiple="true"
+                label="Samples to visualize"
+                optional="true"
+                format="mzml"
+            >
+            </param>
+        </section>
+
+        <section name="database" title="Database Options" expanded="false">
+            <param
+                name="archetypes"
+                type="select"
+                multiple="true"
+                label="Molecule family (for database's compounds enrichment)"
+            >
+                <option value="G" selected="true">General</option>
+                <option value="H">Halogenates</option>
+            </param>
+
+            <conditional name="base">
+                <param name="kind" type="select" label="File containing compound's type">
+                    <option value="none" selected="true">None (deafult)</option>
+                    <option value="tabular">tabular</option>
+                    <option value="sql">sql</option>
+                </param>
+                <when value="tabular">
+                    <param
+                        name="tabular"
+                        type="data"
+                        multiple="true"
+                        label="Tabular file containing compound to use in XSeeker"
+                        optional="true"
+                        format="tabular"
+                    >
+                    </param>
+                </when>
+                <when value="sql">
+                    <param
+                        name="sql"
+                        type="data"
+                        multiple="true"
+                        label="SQL file containing compound to use in XSeeker"
+                        optional="true"
+                        format="sql"
+                    >
+                    </param>
+                </when>
+            </conditional>
+
+            <conditional name="models">
+                <param name="kind" type="select" label="How is the database's model defined">
+                    <option value="default" selected="true">Default (regular XSeeker Database)</option>
+                    <option value="url">Download model file</option>
+                    <option value="git">Get versionned model file</option>
+                </param>
+                <when value="url">
+                    <param name="url" type="text" format="url" label="File URL"/>
+                </when>
+                <when value="git">
+                    <param name="url" type="text" format="url" label="Repo URL"/>
+                </when>
+            </conditional>
+        </section>
+    </inputs>
+
+
+    <outputs>
+        <data format="xseeker.sqlite" name="output" />
+    </outputs>
+
+    <configfiles>
+        <configfile name="base_config">
+tryCatch({
+    DBModelR::ModelDefinition(table="yui", fields=list(yui="INTEGER"))
+}, error=function(e) {
+    stop("Please, install DBModelR before you source this file.")
+})
+
+list(
+    adduct=DBModelR::ModelDefinition(
+        table="adduct",
+        fields=list(
+            name="TEXT",
+            mass="FLOAT",
+            charge="INTEGER",
+            multi="INTEGER",
+            formula_add="TEXT",
+            formula_ded="TEXT",
+            sign="TEXT",
+            oidscore="INTEGER",
+            quasi="INTEGER",
+            ips="FLOAT"
+        )
+    ),
+    cluster=DBModelR::ModelDefinition(
+        table="cluster",
+        fields=list(
+            clusterID="INTEGER",
+            formula="TEXT",
+            annotation="TEXT",
+            coeff="FLOAT",
+            r_squared="FLOAT",
+            charge="INTEGER",
+            mean_rt="FLOAT",
+            score="FLOAT",
+            deviation="FLOAT",
+            status="TEXT",
+            adduct="TEXT",
+            curent_group="INTEGER",
+            pc_group="INTEGER",
+            align_group="INTEGER",
+            xcms_group="INTEGER"
+        ),
+        one=list("sample", "compound")
+    ),
+    compound=DBModelR::ModelDefinition(
+        table="compound",
+        fields=list(
+            name="TEXT",
+            common_name="TEXT",
+            formula="TEXT",
+            charge="INTEGER",
+            date="TEXT",
+            mz="FLOAT"
+        )
+    ),
+    feature=DBModelR::ModelDefinition(
+        table="feature",
+        fields=list(
+            featureID="INTEGER",
+            mz="FLOAT",
+            mz_min="FLOAT",
+            mz_max="FLOAT",
+            rt="FLOAT",
+            rt_min="FLOAT",
+            rt_max="FLOAT",
+            int_o="FLOAT",
+            int_b="FLOAT",
+            max_o="FLOAT",
+            iso="TEXT",
+            abundance="FLOAT"
+        ),
+        one=list("cluster")
+    ),
+    instrument=DBModelR::ModelDefinition(
+        table="instrument",
+        fields=list(
+            model="TEXT",
+            manufacturer="TEXT",
+            analyzer="TEXT",
+            detector_type="TEXT",
+            ion_source="TEXT"
+        )
+    ),
+    instrument_config=DBModelR::ModelDefinition(
+        table="instrument_config",
+        fields=list(
+            resolution="TEXT",
+            agc_target="TEXT",
+            maximum_IT="TEXT",
+            number_of_scan_range="TEXT",
+            scan_range="TEXT",
+            version="TEXT"
+        )
+    ),
+    project=DBModelR::ModelDefinition(
+        table="project",
+        fields=list(
+            name="TEXT",
+            comment="TEXT"
+        ),
+        one=list("sample")
+    ),
+    sample=DBModelR::ModelDefinition(
+        table="sample",
+        fields=list(
+            name="TEXT",
+            path="TEXT",
+            polarity="TEXT",
+            kind="TEXT", ## rdata or mxml or enriched_rdata
+            raw="BLOB"
+        ),
+        one=list(
+            "peak_picking_parameters",
+            "pairing_parameters",
+            "alignmenmt_parameters",
+            "camera_parameters",
+            "instrument",
+            "instrument_config",
+            "software",
+            "smol_xcms_set"
+        )
+    ),
+    smol_xcms_set=DBModelR::ModelDefinition(
+        table="smol_xcms_set",
+        fields=list(
+            raw="BLOB"
+        )
+    ),
+    software=DBModelR::ModelDefinition(
+        table="software",
+        fields=list(
+            name="TEXT",
+            version="TEXT"
+        )
+    ),
+    peak_picking_parameters=DBModelR::ModelDefinition(
+        table="peak_picking_parameters",
+        fields=list(
+            ppm="FLOAT",
+            peakwidth="TEXT",
+            snthresh="TEXT",
+            prefilterStep="TEXT",
+            prefilterLevel="TEXT",
+            mzdiff="TEXT",
+            fitgauss="TEXT",
+            noise="TEXT",
+            mzCenterFun="TEXT",
+            integrate="INTEGER",
+            firstBaselineCheck="TEXT",
+            snthreshIsoROIs="TEXT",
+            maxCharge="INTEGER",
+            maxIso="INTEGER",
+            mzIntervalExtension="TEXT"
+        )
+    ),
+    alignmenmt_parameters=DBModelR::ModelDefinition(
+        table="alignmenmt_parameters",
+        fields=list(
+            binSize="TEXT",
+            centerSample="TEXT",
+            response="TEXT",
+            distFun="TEXT",
+            gapInit="TEXT",
+            gapExtend="TEXT",
+            factorDiag="TEXT",
+            factorGap="TEXT",
+            localAlignment="INTEGER",
+            initPenalty="TEXT",
+            bw="TEXT",
+            minFraction="TEXT",
+            minSamples="TEXT",
+            maxFeatures="TEXT"
+        )
+    )
+)
+        </configfile>
+    </configfiles>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/recreate_full.R	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,35 @@
+#!/home/lain/R/bin/Rscript
+
+ZIP <- FALSE
+
+file.copy("../convert/yann.rdata", "../convert/yann.rdata.old")
+load("../convert/yann.rdata", rdata <- new.env())
+
+listOFlistArguments <- rdata$listOFlistArguments
+diffrep <- rdata$diffrep
+variableMetadata <- rdata$variableMetadata
+xa <- rdata$xa
+if (ZIP) {
+	zipfile <- normalizePath("../convert/yann.zip", mustWork=TRUE)
+	singlefile <- rdata$singlefile
+} else {
+	singles <- list.files("../convert/")
+	singles <- singles[singles != "yann.rdata"]
+	singlefile <- list()
+	for (single in singles) {
+		singlefile[tools::file_path_sans_ext(single)] <- normalizePath(paste0("../convert/", single))
+	}
+	zipfile <- NULL
+	print(singlefile)
+}
+
+save(
+    zipfile,
+    listOFlistArguments,
+    diffrep,
+    variableMetadata,
+    xa,
+    singlefile,
+    file="../convert/yann.rdata"
+    ,version=2
+)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test.sh	Tue Nov 24 18:55:08 2020 +0000
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+currdir=`pwd`
+cd `dirname $(readlink -f $0)`
+
+ln -s ../data/ ./data
+
+~/R/bin/Rscript $(realpath ../XSeekerPreparator.R)  \
+    -i $(realpath ../data/full.rdata)               \
+    -m $(realpath ../data/models.R)                 \
+    -c $(realpath ../data/SERUM_v2019Jan17.tabular) \
+    -o $(realpath ../test.sqlite)                   \
+|| true
+
+
+
+rm -rf "./data"
+cd "${currdir}"