Mercurial > repos > recetox > recetox_aplcms_align_features

diff recetox_aplcms_align_features.xml @ 2:abe783e0daca draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 506df2aef355b3791567283e1a175914f06b405a
author: recetox
date: Mon, 13 Feb 2023 10:26:59 +0000
parents: b07fd3d7ffd0
children: 1e2a13bcb5a7
--- a/recetox_aplcms_align_features.xml	Thu Jun 16 10:26:58 2022 +0000
+++ b/recetox_aplcms_align_features.xml	Mon Feb 13 10:26:59 2023 +0000
@@ -1,91 +1,63 @@
-<tool id="recetox_aplcms_align_features" name="RECETOX apLCMS - align features" version="@TOOL_VERSION@+galaxy1">
-    <description>align features from LC/MS spectra across samples</description>
+<tool id="recetox_aplcms_align_features" name="recetox-aplcms - align features" version="@TOOL_VERSION@+galaxy0">
+    <description>align peaks across samples</description>
     <macros>
         <import>macros.xml</import>
-        <import>macros_split.xml</import>
+        <import>help.xml</import>
     </macros>
     <expand macro="creator"/>
+    <expand macro="requirements"/>
 
-    <expand macro="requirements"/>
     <command detect_errors="aggressive"><![CDATA[
-        sh ${symlink_inputs} &&
         Rscript -e 'source("${__tool_directory__}/utils.R")' -e 'source("${run_script}")'
     ]]></command>
     <configfiles>
-        <configfile name="symlink_inputs">
-            #for $infile in $ms_files
-                ln -s '${infile}' '${infile.element_identifier}'
-            #end for
-             #for $infile in $corrected_files
-                ln -s '${infile}' '${infile.element_identifier}'
-            #end for
-        </configfile>
         <configfile name="run_script"><![CDATA[
-            #set filenames_str = str("', '").join([str($f.element_identifier) for $f in $ms_files])
-            files_list <- sort_samples_by_acquisition_number(c('$filenames_str'))
-            sample_names <- get_sample_name(files_list)
+             #set filenames = str("', '").join([str($f) for $f in $files])
+             feature_tables <- load_parquet_collection(c('$filenames'))
+             sample_names <- unlist(lapply(feature_tables, load_sample_name))
+
+             validate_sample_names(sample_names)
+
+             ordering <- order(sample_names)
+             feature_tables <- feature_tables[ordering]
+             sample_names <- sample_names[ordering]
 
-            #set corrected_files = str("', '").join([str($f.element_identifier) for $f in $corrected_files])
-            corrected_features <- load_features(c('$corrected_files'))
+             tolerances <- load_data_from_parquet_file('$input_tolerances')
 
-            aligned <- align_features(
-                sample_names = sample_names,
-                features = corrected_features,
-                min.exp = $min_exp,
-                mz.tol = $peak_alignment.align_mz_tol,
-                chr.tol = $peak_alignment.align_chr_tol,
-                find.tol.max.d = 10 * $mz_tol,
-                max.align.mz.diff = $peak_alignment.max_align_mz_diff,
-                do.plot = FALSE
-            )
+             aligned_features <- create_aligned_feature_table(
+                  features_table = dplyr::bind_rows(feature_tables),
+                  min_occurrence = $min_occurrence,
+                  sample_names = sample_names,
+                  mz_tol_relative = get_mz_tol(tolerances),
+                  rt_tol_relative = get_rt_tol(tolerances)
+             )
 
-            save_aligned_features(aligned, "$rt_cross_table", "$int_cross_table", "$tolerances")
+             save_aligned_features(aligned_features, '$metadata_file', '$rt_file', '$intensity_file')
         ]]></configfile>
     </configfiles>
 
     <inputs>
-        <param name="ms_files" type="data_collection" collection_type="list" format="mzdata,mzml,mzxml,netcdf"
-               label="Input data collection" help="Mass spectrometry file for peak extraction." />
-        <param name="corrected_files" type="data_collection" collection_type="list" format="parquet"
-               label="Input corrected feature samples collection"
-               help="Mass spectrometry files containing corrected feature samples." />
-        <expand macro="mz_tol_macro"/>
-        <param name="min_exp" type="integer" min="1" value="2" label="min_exp"
-               help="If a feature is to be included in the final feature table, it must be present in at least this number of spectra." />
-        <expand macro="peak_alignment"/>
+        <param name="files" type="data_collection" collection_type="list" format="parquet"
+               label="Clustered features" help="List of tables containing clustered features." />
+        <param label="Input tolerances values" name="input_tolerances" type="data" format="parquet"
+               help="Table containing tolerance values." />
+        <param name="min_occurrence" type="integer" min="2" value="2" label="min_occurrence"
+               help="A feature has to show up in at least this number of profiles to be included in the final result." />
     </inputs>
 
     <outputs>
-        <data name="tolerances" format="parquet" label="${tool.name} on ${on_string} (tolerances)" />
-        <data name="rt_cross_table" format="parquet" label="${tool.name} on ${on_string} (rt cross table)" />
-        <data name="int_cross_table" format="parquet" label="${tool.name} on ${on_string} (int cross table)" />
+        <data name="metadata_file" format="parquet" label="${tool.name} on ${on_string} (metadata table)"/>
+        <data name="rt_file" format="parquet" label="${tool.name} on ${on_string} (rt table)"/>
+        <data name="intensity_file" format="parquet" label="${tool.name} on ${on_string} (intensity table)"/>
     </outputs>
 
     <tests>
-        <test>
-            <param name="ms_files">
-                <collection type="list">
-                    <element name="mbr_test0.mzml" value="mbr_test0.mzml"/>
-                    <element name="mbr_test1.mzml" value="mbr_test1.mzml"/>
-                    <element name="mbr_test2.mzml" value="mbr_test2.mzml"/>
-                </collection>
-            </param>
-            <param name="corrected_files">
-                <collection type="list">
-                    <element name="corrected_features_0.parquet" value="corrected_expected/corrected_0.parquet"/>
-                    <element name="corrected_features_1.parquet" value="corrected_expected/corrected_1.parquet"/>
-                    <element name="corrected_features_2.parquet" value="corrected_expected/corrected_2.parquet"/>
-                </collection>
-            </param>
-            <output name="tolerances" file="tolerances.parquet" ftype="parquet"/>
-            <output name="rt_cross_table" file="rt_cross_table.parquet" ftype="parquet"/>
-            <output name="int_cross_table" file="int_cross_table.parquet" ftype="parquet"/>
-        </test>
+
     </tests>
 
     <help>
         <![CDATA[
-            This is a tool which runs apLCMS alignment of features.
+            @ALIGN_FEATURES_HELP@
 
             @GENERAL_HELP@
         ]]>
author	recetox
date	Mon, 13 Feb 2023 10:26:59 +0000
parents	b07fd3d7ffd0
children	1e2a13bcb5a7