Mercurial > repos > ufz > dfpl_predict

diff dfpl_predict.xml @ 0:8956e949d466 draft default tip
planemo upload for repository https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/dfpl commit 66c6acfeff5441c36fba97787ddc5ee3d6a4a6ec
author: ufz
date: Thu, 19 Dec 2024 12:51:08 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dfpl_predict.xml	Thu Dec 19 12:51:08 2024 +0000
@@ -0,0 +1,116 @@
+<tool id="dfpl_predict" name="deepFPlearn predict" version="@TOOL_VERSION@+galaxy0" profile="23.0">
+    <description>association of molecular structures to biological targets</description>
+    <creator>
+        <organization name="Helmholtz Centre for Environmental Research - UFZ, Research Data Management"
+                      url ="https://www.ufz.de/index.php?en=45348"/>
+    </creator>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+    set -o pipefail;
+    ln -s '$fnn_weights' model_weights.h5 &&
+    ln -s '$autoencoder_weights' encoder_weights.h5 &&
+    cat '$inputs'
+        | python '$__tool_directory__/json_flatten.py'
+        | python '$__tool_directory__/json_predict.py'
+        > config.json &&
+    dfpl predict --configFile config.json &&
+    cp predictions.csv '$outputFile'
+    ]]></command>
+    <configfiles>
+        <inputs name="inputs" data_style="paths"/>
+    </configfiles>
+    <inputs>
+        <section title="Input Data" name="input-data" expanded="true">
+            <param label="Input File" argument="--inputFile"
+                   type="data" format="csv"
+                   help="The file containing the data for the prediction in (unquoted) comma-separated CSV format.
+                       The column named 'smiles' or 'fp' contains the field to be predicted.
+                       Please adjust the type that should be predicted (fp or smile) appropriately.
+                       An optional column 'id' is used to assign the outcomes to the original identifiers.
+                       If this column is missing, the results are numbered in the order of their appearance in the input file.
+                       A header is expected and respective column names are used"/>
+            <param label="Chemical Representation" argument="--type"
+                   type="select" optional="true"
+                   help="Type of the chemical representation">
+                <option value="fp" selected="true">fp</option>
+                <option value="smiles">smiles</option>
+            </param>
+            <param label="Fingerprint Type" argument="--fpType"
+                   type="select" optional="true"
+                   help="The type of fingerprint to be generated/used in input file">
+                <option value="topological" selected="true">topological</option>
+                <option value="MACCS">MACCS</option>
+            </param>
+        </section>
+        <conditional name="autoencoder">
+            <param label="Compress Fingerprints with Autoencoder" argument="--compressFeatures"
+                   type="select"
+                   help="Compress the fingerprints using a trained autoencoder (requires a weights file)">
+                <option value="true">Compress fingerprints</option>
+                <option value="false">Use raw fingerprints</option>
+            </param>
+            <when value="true">
+                <param label="Autoencoder Weights" name="autoencoder_weights"
+                       type="data" format="h5" optional="false"
+                       help="The autoencoder weights as generated by ``dfpl train``"/>
+                <param label="Autoencoder Type" argument="--aeType"
+                       type="select" optional="true"
+                       help="Autoencoder type, variational or deterministic">
+                    <option value="variational">Variational</option>
+                    <option value="deterministic" selected="true">Deterministic</option>
+                </param>
+            </when>
+            <when value="false">
+            </when>
+        </conditional>
+        <param label="Model Weights" name="fnn_weights"
+               type="data" format="h5" optional="false"
+               help="The model weights as generated by ``dfpl train``"/>
+    </inputs>
+    <outputs>
+        <data name="outputFile"
+              label="${tool.name} on ${on_string}: predicted values"
+              format="csv" />
+    </outputs>
+    <tests>
+        <!-- TODO add test with https://zenodo.org/records/14409985 .. https://github.com/galaxyproject/galaxy/issues/19346 -->
+        <test>
+            <section name="input-data">
+                <param name="inputFile" value="S_dataset.csv"/>
+                <param name="type" value="smiles"/>
+                <param name="fpType" value="topological"/>
+            </section>
+            <conditional name="autoencoder">
+                <param name="compressFeatures" value="true"/>
+                <param name="autoencoder_weights" value="encoder_weights.h5" location="https://zenodo.org/api/records/14514397/files/encoder_weights.h5/content"/>
+                <param name="aeType" value="deterministic"/>
+            </conditional>
+            <param name="fnn_weights" value="model_weights.h5" location="https://zenodo.org/api/records/14514397/files/model_weights.h5/content"/>
+            <output name="outputFile">
+                <assert_contents>
+                    <has_n_lines n="7249"/>
+                    <has_n_columns n="10" sep=","/>
+                    <has_line n="1" line=",smiles,AR,ER,GR,Aromatase,TR,PPARg,ED,predicted"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text text="Prediction successful"/>
+            </assert_stdout>
+        </test>
+    </tests>
+    <help><![CDATA[
+    This tool is the predict mode of `DeepFPLearn <https://github.com/yigbt/deepFPlearn>`_.
+    It's equivalent to running ``dfpl predict`` from the command line.
+
+    The predict mode uses a model that was trained with ``dfpl train`` to predict
+    the association of molecular structures to a biological target.
+
+    The input file should be a CSV file with a header.
+
+    The tool will output the given CSV file with an additional column containing the predicted values.
+    ]]></help>
+    <expand macro="citations"/>
+</tool>