Mercurial > repos > recetox > recetox_aplcms_compute_clusters

--- a/help.xml	Mon Apr 03 14:56:58 2023 +0000
+++ b/help.xml	Wed May 24 14:48:47 2023 +0000
@@ -110,7 +110,6 @@
 =================================

 Group features with `mz` and `rt` using tolerances within the tolerance into clusters, creating larger features from raw data points.
-Custom tolerances for `mz` and `rt` are computed based on the given parameters.
 The tool takes a collection of all detected features and computes the clusters over a global feature table, adding the `sample_id` and `cluster` columns to the table.

 Example Output
@@ -133,7 +132,7 @@
 recetox-aplcms - correct time
 =============================

-Apply spline-based retention time correction to a feature table given the template table and the computed `mz` and `rt` tolerances.
+Apply spline-based retention time correction to a feature table given the template table and the `mz` and `rt` tolerances.

 Example Output
 --------------
--- a/macros.xml	Mon Apr 03 14:56:58 2023 +0000
+++ b/macros.xml	Wed May 24 14:48:47 2023 +0000
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">0.10.3</token>
+    <token name="@TOOL_VERSION@">0.11.0</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">r-recetox-aplcms</requirement>
@@ -93,32 +93,11 @@
         </section>
     </xml>

-    <xml name="compute_clusters_params">
-        <conditional name="tolerances_input_method">
-            <param name="input_method" type="select" display="radio" label="Tolerances input method"
-                   help="Tolerances can be entered directly or loaded from a file.">
-                <option value="direct" selected="true">direct</option>
-                <option value="file">file</option>
-            </param>
-            <when value="direct">
-                <param name="mz_tol_relative" type="float" optional="true" label="Relative m/z tolerance"
-                       help="Relative m/z tolerance to use for grouping features.
-                       If not provided, it is calculated from the data using kernel density estimation." />
-                <param name="rt_tol_relative" type="float" optional="true" label="Relative rt tolerance [unit corresponds to the retention time]"
-                       help="Relative retention time tolerance to use for grouping features.
-                       If not provided, it is calculated from the data using kernel density estimation." />
-            </when>
-            <when value="file">
-                <param label="Input tolerances values" name="input_tolerances" type="data" format="parquet"
-                       help="Table containing tolerance values." />
-            </when>
-        </conditional>
-        <param name="mz_tol_absolute" type="float" label="Minimal absolute m/z tolerance [Da]" value="1e-05"
-               help="During the clustering, an m/z tolerance is computed based on the data and the specified relative tolerance.
-               This parameter allows the specification of a minimal value of this tolerance." />
-        <param name="mz_max_diff" type="float" label="Maximal m/z difference [Da]" value="0.01"
-               help="Maximum allowed difference between feature m/z values to belong to the same cluster." />
-
+    <xml name="tolerances">
+       <param name="mz_tol_relative_ppm" type="float" value="10" label="Relative m/z tolerance [ppm]"
+              help="Relative m/z tolerance to use for grouping features." />
+       <param name="rt_tol" type="float" value="5" label="Retention time tolerance [unit corresponds to the retention time]"
+              help="Retention time tolerance to use for grouping features." />
     </xml>

     <xml name="recover_weaker_params">
--- a/recetox_aplcms_compute_clusters.xml	Mon Apr 03 14:56:58 2023 +0000
+++ b/recetox_aplcms_compute_clusters.xml	Wed May 24 14:48:47 2023 +0000
@@ -22,42 +22,21 @@
               feature_tables <- feature_tables[ordering]
               sample_names <- sample_names[ordering]

-              #if $tolerances_input_method.input_method == "file"
-              tolerances <- load_data_from_parquet_file('$input_tolerances')
-              #end if
-
-              clusters <- compute_clusters(
+              clusters <- compute_clusters_simple(
                   feature_tables = feature_tables,
-                  #if $tolerances_input_method.input_method == "file"
-                  mz_tol_relative = get_mz_tol(tolerances),
-                  rt_tol_relative = get_rt_tol(tolerances),
-                  #else
-                  #if $tolerances_input_method.mz_tol_relative:
-                  mz_tol_relative = $tolerances_input_method.mz_tol_relative,
-                  #else:
-                  mz_tol_relative = NA,
-                  #end if
-                  #if $tolerances_input_method.rt_tol_relative:
-                  rt_tol_relative = $tolerances_input_method.rt_tol_relative,
-                  #else:
-                  rt_tol_relative = NA,
-                  #end if
-                  #end if
-                  mz_max_diff = 10 * $mz_max_diff,
-                  mz_tol_absolute = $mz_tol_absolute,
                   sample_names = sample_names,
-                  do.plot = FALSE
+                  mz_tol_ppm = $mz_tol_relative_ppm,
+                  rt_tol = $rt_tol
               )

               save_parquet_collection(clusters, sample_names, "clustered")
-              save_tolerances(clusters, '$tolerances')
          ]]></configfile>
     </configfiles>

     <inputs>
         <param name="files" type="data_collection" collection_type="list" format="parquet" label="Input data"
                help="List of tables containing features." />
-        <expand macro="compute_clusters_params"/>
+        <expand macro="tolerances"/>
     </inputs>

     <outputs>
@@ -65,7 +44,6 @@
                     label="${tool.name} feature_tables on ${on_string}">
             <discover_datasets pattern="__designation__" directory="clustered" format="parquet" />
         </collection>
-        <data name="tolerances" format="parquet" label="${tool.name} on ${on_string} (updated tolerances)" />
     </outputs>

     <tests>
Binary file test-data/remove_noise_docker.parquet has changed
--- a/utils.R	Mon Apr 03 14:56:58 2023 +0000
+++ b/utils.R	Wed May 24 14:48:47 2023 +0000
@@ -39,11 +39,11 @@
     return(features)
 }

-save_parquet_collection <- function(table, sample_names, subdir) {
+save_parquet_collection <- function(feature_tables, sample_names, subdir) {
     dir.create(subdir)
-    for (i in seq_len(length(table$feature_tables))) {
+    for (i in seq_len(length(feature_tables))) {
       filename <- file.path(subdir, paste0(subdir, "_", sample_names[i], ".parquet"))
-      feature_table <- as.data.frame(table$feature_tables[[i]])
+      feature_table <- as.data.frame(feature_tables[[i]])
       feature_table <- save_sample_name(feature_table, sample_names[i])
       arrow::write_parquet(feature_table, filename)
     }
@@ -59,14 +59,6 @@
     arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file)
 }

-get_mz_tol <- function(tolerances) {
-    return(tolerances$mz_tolerance)
-}
-
-get_rt_tol <- function(tolerances) {
-    return(tolerances$rt_tolerance)
-}
-
 save_aligned_features <- function(aligned_features, metadata_file, rt_file, intensity_file) {
     save_data_as_parquet_file(aligned_features$metadata, metadata_file)
     save_data_as_parquet_file(aligned_features$rt, rt_file)