changeset 20:ce94e7a141bb draft default tip

" master branch Updating"
author lain
date Tue, 06 Dec 2022 10:18:10 +0000
parents 2937e72e5891
children
files README.md XSeekerPreparator.R XSeekerPreparator.xml format_versionning.MD galaxy/tools/LC-MSMS/XSeekerPreparator.R
diffstat 5 files changed, 267 insertions(+), 166 deletions(-) [+]
line wrap: on
line diff
--- a/README.md	Tue Oct 18 12:57:28 2022 +0000
+++ b/README.md	Tue Dec 06 10:18:10 2022 +0000
@@ -96,7 +96,7 @@
 --------
 
  - **@name**: XSeekerPreparator
- - **@version**: 1.3.0
+ - **@version**: 1.3.1
  - **@authors**: Lain Pavot
  - **@date creation**: 15/09/2020
 
--- a/XSeekerPreparator.R	Tue Oct 18 12:57:28 2022 +0000
+++ b/XSeekerPreparator.R	Tue Dec 06 10:18:10 2022 +0000
@@ -198,7 +198,7 @@
 search_tree <- function(path, target) {
     target <- tolower(target)
     for (file in list.files(path)) {
-        if (is.dir(file)) {
+        if (fs::is.dir(file)) {
             result <- search_tree(file.path(path, file), target)
             if (!is.null(result)) {
                 return(result)
@@ -400,12 +400,10 @@
 
 guess_translator <- function(header) {
     result <- list(
-        # HMDB_ID = NULL,
         mz = NULL,
         name = NULL,
         common_name = NULL,
-        formula = NULL,
-        # inchi_key = NULL
+        formula = NULL
     )
     asked_cols <- names(result)
     for (asked_col in asked_cols) {
@@ -471,12 +469,12 @@
         process_sample_list(
             orm, rdata, samples,
             show_percent = show_percent,
-            file_grouping_var = options$class
+            file_grouping_var = options$class,
+            options = options
         )
         NULL
     }, error = function(e) {
-        message(e)
-        e
+        return(e)
     })
     if (!is.null(mzml_tmp_dir)) {
         unlink(mzml_tmp_dir, recursive = TRUE)
@@ -484,6 +482,7 @@
     if (!is.null(error)) {
         stop(error)
     }
+    return(!is.null(error))
 }
 
 gather_mzml_files <- function(rdata) {
@@ -510,7 +509,8 @@
     rdata,
     sample_names,
     show_percent,
-    file_grouping_var = NULL
+    file_grouping_var = NULL,
+    options = list()
 ) {
     if (is.null(file_grouping_var)) {
         file_grouping_var <- find_grouping_var(rdata$variableMetadata)
@@ -587,19 +587,6 @@
 
     message("Parameters from previous processes extracted.")
 
-
-    indices <- as.numeric(unique(var_meta[, file_grouping_var]))
-    if (any(is.null(names(singlefile)[indices]))) {
-        stop(sprintf(
-            paste(
-                "Indices defined by grouping variable %s are not all present",
-                "in singlefile names (%s).\nCannot continue. Indices: %s"
-            ),
-            file_grouping_var,
-            paste(names(singlefile), collapse = ", "),
-            paste(indices, collapse = ", ")
-        ))
-    }
     smol_xcms_set <- orm$smol_xcms_set()
     mz_tab_info <- new.env()
     g <- xcms::groups(xcms_set)
@@ -623,7 +610,7 @@
     smol_xcms_set_id <- smol_xcms_set$get_id()
     rm(smol_xcms_set)
 
-    for (no in indices) {
+    for (no in seq_along(names(singlefile))) {
         sample_name <- names(singlefile)[[no]]
         sample_path <- singlefile[[no]]
         if (
@@ -760,7 +747,6 @@
     field_names <- as.list(names(orm$feature()$fields__))
     field_names[field_names == "id"] <- NULL
 
-    features <- list()
     dummy_feature <- orm$feature()
 
     if (show_percent <- context$show_percent) {
@@ -772,7 +758,10 @@
 
         rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))]
     }
-    cluster_row <- list()
+    # features <- list()
+    features <- as.list(rows) ## allocate all memory before processing
+    # cluster_row <- list()
+    cluster_row <- as.list(rows) ## allocate all memory before processing
     for (row in rows) {
         if (show_percent && (row / total) * 100 > percent) {
             percent <- percent + 1
@@ -843,7 +832,8 @@
             next_align_group
         )
         next_align_group <- next_align_group + 1
-        features[[length(features) + 1]] <- as.list(dummy_feature, field_names)
+        features[[row]] <- as.list(dummy_feature, field_names)
+        # features[[length(features) + 1]] <- as.list(dummy_feature, field_names)
         dummy_feature$clear()
     }
     rm(var_meta)
@@ -963,6 +953,7 @@
     }
     cluster$save()
     feature$set_cluster(cluster)
+    feature$save()
     return(cluster)
 }
 
@@ -1064,6 +1055,12 @@
         help = "Display this tool's version and exits"
     ),
     optparse::make_option(
+        c("-V", "--verbose"),
+        action = "store_true",
+        help = "Does more verbose outputs",
+        default = FALSE
+    ),
+    optparse::make_option(
         c("-i", "--input"),
         type = "character",
         help = "The rdata path to import in XSeeker"
@@ -1161,6 +1158,18 @@
 
 load(args$options$input, rdata <- new.env())
 
-process_rdata(orm, rdata, args$options)
+args$options$verbose <- (
+    if (args$options$verbose) {
+        message("Verbose outputs.")
+        \(...) {
+            message(sprintf(...))
+        }
+    } else {
+        \(...) {
+        }
+    }
+)
+
+err_code <- process_rdata(orm, rdata, args$options)
 
 quit(status = err_code)
--- a/XSeekerPreparator.xml	Tue Oct 18 12:57:28 2022 +0000
+++ b/XSeekerPreparator.xml	Tue Dec 06 10:18:10 2022 +0000
@@ -1,34 +1,26 @@
-<tool id="xseeker_preparator" name="XSeeker Preparator" version="1.3.0">
+<tool id="xseeker_preparator" name="XSeeker Preparator" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="22.01">
     <description>prepares RData file from XCMS+CAMERA for XSeeker</description>
+    <macros>
+        <token name="@VERSION@">1.3.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
     <edam_operations>
         <edam_operation>operation_1812</edam_operation>
         <edam_operation>operation_0335</edam_operation>
     </edam_operations>
     <requirements>
-        <requirement type="package" >bioconductor-xcms</requirement>
-        <requirement type="package" version="1.48.0">bioconductor-camera</requirement>
-        <requirement type="package" >git</requirement>
-        <requirement type="package" >r-blob</requirement>
-        <requirement type="package" >r-dbi</requirement>
-        <requirement type="package" >r-fst</requirement>
-        <requirement type="package" >r-optparse</requirement>
-        <requirement type="package" >r-stringr</requirement>
-        <requirement type="package" >r-purrr</requirement>
-        <requirement type="package" >r-rsqlite</requirement>
-        <requirement type="package" >r-dbmodelr</requirement>
-        <!-- 
-        <requirement type="package" version="3.12.0">bioconductor-xcms</requirement>
+        <requirement type="package" version="4.1.3">r-base</requirement>
+        <requirement type="package" version="3.14.0">bioconductor-xcms</requirement>
         <requirement type="package" version="1.48.0">bioconductor-camera</requirement>
         <requirement type="package" version="2.29.2">git</requirement>
-        <requirement type="package" version="1.2.1">r-blob</requirement>
-        <requirement type="package" version="1.1.0">r-dbi</requirement>
-        <requirement type="package" version="0.9.4">r-fst</requirement>
+        <requirement type="package" version="1.2.3">r-blob</requirement>
+        <requirement type="package" version="1.1.3">r-dbi</requirement>
+        <requirement type="package" version="0.9.8">r-fst</requirement>
         <requirement type="package" version="1.6.6">r-optparse</requirement>
         <requirement type="package" version="1.4.0">r-stringr</requirement>
-        <requirement type="package" version="0.3.4">r-purrr</requirement>
-        <requirement type="package" version="2.2.1">r-rsqlite</requirement>
+        <requirement type="package" version="0.3.5">r-purrr</requirement>
+        <requirement type="package" version="2.2.18">r-rsqlite</requirement>
         <requirement type="package" version="0.2.0">r-dbmodelr</requirement>
-         -->
     </requirements>
     <stdio>
         <exit_code 
@@ -67,104 +59,14 @@
                 #else
                     --models '${database.models.url}'
                 #end if
-                #if $class_column
-                    --class '${class_column}'
+                #if $class
+                    --class '${class}'
                 #end if
         ]]>
     </command>
-    <inputs>
-        <param 
-            name="input"
-            type="data"
-            multiple="false"
-            label="Rdata to prepare"
-            optional="false"
-            format="rdata"
-        >
-        </param>
-        <param
-            name="class_column"
-            type="text"
-            value=""
-            label="Column class name"
-            help="
-                The name of the column containing the classes - 
-                leave empty to let xsprep guess
-            "
-            optional="true"
-        >
-        </param>
-        <section name="samples" title="Samples Options" expanded="false">
-            <param 
-                name="selected"
-                type="data"
-                multiple="true"
-                label="Samples to visualize"
-                optional="true"
-                format="mzml"
-            >
-            </param>
-        </section>
-        <section name="database" title="Database Options" expanded="false">
-            <param
-                name="archetypes"
-                type="select"
-                multiple="true"
-                label="Molecule family (for database's compounds enrichment)"
-            >
-                <option value="G" selected="true">General</option>
-                <option value="H">Halogenates</option>
-            </param>
-            <conditional name="base">
-                <param name="kind" type="select" label="File containing compound's type">
-                    <option value="none" selected="true">None (deafult)</option>
-                    <option value="tabular">tabular</option>
-                    <option value="sql">sql</option>
-                </param>
-                <when value="tabular">
-                    <param
-                        name="tabular"
-                        type="data"
-                        multiple="true"
-                        label="Tabular file containing compound to use in XSeeker"
-                        optional="true"
-                        format="tabular"
-                    >
-                    </param>
-                </when>
-                <when value="sql">
-                    <param
-                        name="sql"
-                        type="data"
-                        multiple="true"
-                        label="SQL file containing compound to use in XSeeker"
-                        optional="true"
-                        format="sql"
-                    >
-                    </param>
-                </when>
-            </conditional>
-            <conditional name="models">
-                <param name="kind" type="select" label="How is the database's model defined">
-                    <option value="default" selected="true">Default (regular XSeeker Database)</option>
-                    <option value="url">Download model file</option>
-                    <option value="git">Get versionned model file</option>
-                </param>
-                <when value="url">
-                    <param name="url" type="text" format="url" label="File URL"/>
-                </when>
-                <when value="git">
-                    <param name="url" type="text" format="url" label="Repo URL"/>
-                </when>
-            </conditional>
-        </section>
-    </inputs>
-    <outputs>
-        <data format="sqlite" name="output" />
-        <!-- <data format="xseeker.sqlite" name="output" /> -->
-    </outputs>
     <configfiles>
         <configfile name="base_config">
+<![CDATA[
 tryCatch({
     DBModelR::ModelDefinition(table="yui", fields=list(yui="INTEGER"))
 }, error=function(e) {
@@ -340,6 +242,179 @@
         )
     )
 )
+            ]]>
         </configfile>
     </configfiles>
+    <inputs>
+        <param 
+            argument="input"
+            type="data"
+            multiple="false"
+            optional="false"
+            format="rdata"
+            label="Rdata to prepare"
+            help="
+                This rdata must be produced by a xcms+camera processing
+                and original files must still be in the history.
+            "
+        >
+        </param>
+        <param
+            argument="class"
+            type="text"
+            value=""
+            label="Column class name"
+            help="
+                The name of the column containing the classes - 
+                leave empty to let XSeeker Preparator guess
+            "
+            optional="true"
+        >
+        </param>
+        <section name="samples" title="Samples Options" expanded="false">
+            <param 
+                name="selected"
+                type="data"
+                multiple="true"
+                label="Samples to visualize"
+                optional="true"
+                format="mzml"
+            >
+            </param>
+        </section>
+        <section name="database" title="Database Options" expanded="false">
+            <param
+                name="archetypes"
+                type="select"
+                multiple="true"
+                label="Molecule family (for database's compounds enrichment)"
+            >
+                <option value="G" selected="true">General</option>
+                <option value="H">Halogenates</option>
+            </param>
+            <conditional name="base">
+                <param name="kind" type="select" label="File containing compound's type">
+                    <option value="none" selected="true">None (default)</option>
+                    <option value="tabular">tabular</option>
+                    <option value="sql">sql</option>
+                </param>
+                <when value="none" />
+                <when value="sql" />
+                <when value="tabular">
+                    <param
+                        name="tabular"
+                        type="data"
+                        multiple="true"
+                        label="Tabular file containing compound to use in XSeeker"
+                        optional="true"
+                        format="tabular"
+                    >
+                    </param>
+                </when>
+                <when value="sql">
+                    <param
+                        name="sql"
+                        type="data"
+                        multiple="true"
+                        label="SQL file containing compound to use in XSeeker"
+                        optional="true"
+                        format="sql"
+                    >
+                    </param>
+                </when>
+            </conditional>
+            <conditional name="models">
+                <param name="kind" type="select" label="How is the database's model defined">
+                    <option value="default" selected="true">Default (regular XSeeker Database)</option>
+                    <option value="url">Download model file</option>
+                    <option value="git">Get versionned model file</option>
+                </param>
+                <when value="default" />
+                <when value="url">
+                    <param name="url" type="text" format="url" label="File URL"/>
+                </when>
+                <when value="git">
+                    <param name="url" type="text" format="url" label="Repo URL"/>
+                </when>
+            </conditional>
+        </section>
+    </inputs>
+    <outputs>
+        <data format="sqlite" name="output" />
+        <!-- <data format="xseeker.sqlite" name="output" /> -->
+    </outputs>
+    <help>
+
+.. class:: infomark
+
+**Authors** Lain Pavot (lain.pavot@inrae.fr)
+
+--------------------------------------
+
+==================
+XSeeker Preparator
+==================
+
+-----------
+Description
+-----------
+
+A preparation tool to precalculate and reorginize data from XCMS+CAMERA for
+XSeeker displayer tool to display them faster (from 30 min/some hours to some seconds).
+
+
+-----------------
+Workflow position
+-----------------
+
+**Upstream tools**
+
+================ ======================== =========== ===============
+  Name             output file             format       parameter   
+================ ======================== =========== ===============
+  CAMERA           rdata.camera.quick      rdata                    
+================ ======================== =========== ===============
+
+
+**Downstream tools**
+
+================ ======================== =========== ===============
+  Name             output file              format      parameter   
+================ ======================== =========== ===============
+  XSeeker          sqlite                   sqlite3         NA      
+================ ======================== =========== ===============
+
+
+
+-----------
+Input files
+-----------
+
+Takes a rdata outputed by camera (after xcms) as input. The rdata has must have
+been produced using some mzml files - mxml files which must still be present
+in the history when doing xseekerpreparator:
+The rdata contains only some data and the paths to the original mzml files.
+These mzml files contains a lot of usefull informations needed to prepare
+the data for XSeeker. These files are not given as input, but are still
+needed.
+
+----------
+Parameters
+----------
+Column class name:
+ - the name of the column that defines classes in your sample metadata.
+Sample options:
+ - provide mzml file names to process. Other files defined in the rdata will
+   not be processed, and will not be available in xseeker.
+Database Options:
+ - provide the molecular family you want to annotate, a compound file.
+ - The database's model can be re-defined - only for devs users.
+
+------------
+Output files
+------------
+An SQLite3 file is a database file that organizes data in such a way it is
+easily stored, filtered, modified, retrieved.
+    </help>
+    <citations></citations>
 </tool>
--- a/format_versionning.MD	Tue Oct 18 12:57:28 2022 +0000
+++ b/format_versionning.MD	Tue Dec 06 10:18:10 2022 +0000
@@ -9,6 +9,14 @@
 The first version (the older one) is at the bottom of this file, and the
 modifications provided by the newest versions are on top of the file.
 
+Also, the changelog will be mixed to this file
+
+
+VERSION 1.3.1
+=====
+bugfix in files processing, some files were not processed due to
+a missunderstanding of the meaning of a field in the rdata.
+
 
 VERSION 1.3.0
 =====
--- a/galaxy/tools/LC-MSMS/XSeekerPreparator.R	Tue Oct 18 12:57:28 2022 +0000
+++ b/galaxy/tools/LC-MSMS/XSeekerPreparator.R	Tue Dec 06 10:18:10 2022 +0000
@@ -198,7 +198,7 @@
 search_tree <- function(path, target) {
     target <- tolower(target)
     for (file in list.files(path)) {
-        if (is.dir(file)) {
+        if (fs::is.dir(file)) {
             result <- search_tree(file.path(path, file), target)
             if (!is.null(result)) {
                 return(result)
@@ -400,12 +400,10 @@
 
 guess_translator <- function(header) {
     result <- list(
-        # HMDB_ID = NULL,
         mz = NULL,
         name = NULL,
         common_name = NULL,
-        formula = NULL,
-        # inchi_key = NULL
+        formula = NULL
     )
     asked_cols <- names(result)
     for (asked_col in asked_cols) {
@@ -471,12 +469,12 @@
         process_sample_list(
             orm, rdata, samples,
             show_percent = show_percent,
-            file_grouping_var = options$class
+            file_grouping_var = options$class,
+            options = options
         )
         NULL
     }, error = function(e) {
-        message(e)
-        e
+        return(e)
     })
     if (!is.null(mzml_tmp_dir)) {
         unlink(mzml_tmp_dir, recursive = TRUE)
@@ -484,6 +482,7 @@
     if (!is.null(error)) {
         stop(error)
     }
+    return(!is.null(error))
 }
 
 gather_mzml_files <- function(rdata) {
@@ -510,7 +509,8 @@
     rdata,
     sample_names,
     show_percent,
-    file_grouping_var = NULL
+    file_grouping_var = NULL,
+    options = list()
 ) {
     if (is.null(file_grouping_var)) {
         file_grouping_var <- find_grouping_var(rdata$variableMetadata)
@@ -587,19 +587,6 @@
 
     message("Parameters from previous processes extracted.")
 
-
-    indices <- as.numeric(unique(var_meta[, file_grouping_var]))
-    if (any(is.null(names(singlefile)[indices]))) {
-        stop(sprintf(
-            paste(
-                "Indices defined by grouping variable %s are not all present",
-                "in singlefile names (%s).\nCannot continue. Indices: %s"
-            ),
-            file_grouping_var,
-            paste(names(singlefile), collapse = ", "),
-            paste(indices, collapse = ", ")
-        ))
-    }
     smol_xcms_set <- orm$smol_xcms_set()
     mz_tab_info <- new.env()
     g <- xcms::groups(xcms_set)
@@ -623,7 +610,7 @@
     smol_xcms_set_id <- smol_xcms_set$get_id()
     rm(smol_xcms_set)
 
-    for (no in indices) {
+    for (no in seq_along(names(singlefile))) {
         sample_name <- names(singlefile)[[no]]
         sample_path <- singlefile[[no]]
         if (
@@ -760,7 +747,6 @@
     field_names <- as.list(names(orm$feature()$fields__))
     field_names[field_names == "id"] <- NULL
 
-    features <- list()
     dummy_feature <- orm$feature()
 
     if (show_percent <- context$show_percent) {
@@ -772,7 +758,10 @@
 
         rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))]
     }
-    cluster_row <- list()
+    # features <- list()
+    features <- as.list(rows) ## allocate all memory before processing
+    # cluster_row <- list()
+    cluster_row <- as.list(rows) ## allocate all memory before processing
     for (row in rows) {
         if (show_percent && (row / total) * 100 > percent) {
             percent <- percent + 1
@@ -843,7 +832,8 @@
             next_align_group
         )
         next_align_group <- next_align_group + 1
-        features[[length(features) + 1]] <- as.list(dummy_feature, field_names)
+        features[[row]] <- as.list(dummy_feature, field_names)
+        # features[[length(features) + 1]] <- as.list(dummy_feature, field_names)
         dummy_feature$clear()
     }
     rm(var_meta)
@@ -963,6 +953,7 @@
     }
     cluster$save()
     feature$set_cluster(cluster)
+    feature$save()
     return(cluster)
 }
 
@@ -1064,6 +1055,12 @@
         help = "Display this tool's version and exits"
     ),
     optparse::make_option(
+        c("-V", "--verbose"),
+        action = "store_true",
+        help = "Does more verbose outputs",
+        default = FALSE
+    ),
+    optparse::make_option(
         c("-i", "--input"),
         type = "character",
         help = "The rdata path to import in XSeeker"
@@ -1161,6 +1158,18 @@
 
 load(args$options$input, rdata <- new.env())
 
-process_rdata(orm, rdata, args$options)
+args$options$verbose <- (
+    if (args$options$verbose) {
+        message("Verbose outputs.")
+        \(...) {
+            message(sprintf(...))
+        }
+    } else {
+        \(...) {
+        }
+    }
+)
+
+err_code <- process_rdata(orm, rdata, args$options)
 
 quit(status = err_code)