Mercurial > repos > ufz > dfpl_predict
diff dfpl_predict.xml @ 0:8956e949d466 draft default tip
planemo upload for repository https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/dfpl commit 66c6acfeff5441c36fba97787ddc5ee3d6a4a6ec
author | ufz |
---|---|
date | Thu, 19 Dec 2024 12:51:08 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dfpl_predict.xml Thu Dec 19 12:51:08 2024 +0000 @@ -0,0 +1,116 @@ +<tool id="dfpl_predict" name="deepFPlearn predict" version="@TOOL_VERSION@+galaxy0" profile="23.0"> + <description>association of molecular structures to biological targets</description> + <creator> + <organization name="Helmholtz Centre for Environmental Research - UFZ, Research Data Management" + url ="https://www.ufz.de/index.php?en=45348"/> + </creator> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + set -o pipefail; + ln -s '$fnn_weights' model_weights.h5 && + ln -s '$autoencoder_weights' encoder_weights.h5 && + cat '$inputs' + | python '$__tool_directory__/json_flatten.py' + | python '$__tool_directory__/json_predict.py' + > config.json && + dfpl predict --configFile config.json && + cp predictions.csv '$outputFile' + ]]></command> + <configfiles> + <inputs name="inputs" data_style="paths"/> + </configfiles> + <inputs> + <section title="Input Data" name="input-data" expanded="true"> + <param label="Input File" argument="--inputFile" + type="data" format="csv" + help="The file containing the data for the prediction in (unquoted) comma-separated CSV format. + The column named 'smiles' or 'fp' contains the field to be predicted. + Please adjust the type that should be predicted (fp or smile) appropriately. + An optional column 'id' is used to assign the outcomes to the original identifiers. + If this column is missing, the results are numbered in the order of their appearance in the input file. + A header is expected and respective column names are used"/> + <param label="Chemical Representation" argument="--type" + type="select" optional="true" + help="Type of the chemical representation"> + <option value="fp" selected="true">fp</option> + <option value="smiles">smiles</option> + </param> + <param label="Fingerprint Type" argument="--fpType" + type="select" optional="true" + help="The type of fingerprint to be generated/used in input file"> + <option value="topological" selected="true">topological</option> + <option value="MACCS">MACCS</option> + </param> + </section> + <conditional name="autoencoder"> + <param label="Compress Fingerprints with Autoencoder" argument="--compressFeatures" + type="select" + help="Compress the fingerprints using a trained autoencoder (requires a weights file)"> + <option value="true">Compress fingerprints</option> + <option value="false">Use raw fingerprints</option> + </param> + <when value="true"> + <param label="Autoencoder Weights" name="autoencoder_weights" + type="data" format="h5" optional="false" + help="The autoencoder weights as generated by ``dfpl train``"/> + <param label="Autoencoder Type" argument="--aeType" + type="select" optional="true" + help="Autoencoder type, variational or deterministic"> + <option value="variational">Variational</option> + <option value="deterministic" selected="true">Deterministic</option> + </param> + </when> + <when value="false"> + </when> + </conditional> + <param label="Model Weights" name="fnn_weights" + type="data" format="h5" optional="false" + help="The model weights as generated by ``dfpl train``"/> + </inputs> + <outputs> + <data name="outputFile" + label="${tool.name} on ${on_string}: predicted values" + format="csv" /> + </outputs> + <tests> + <!-- TODO add test with https://zenodo.org/records/14409985 .. https://github.com/galaxyproject/galaxy/issues/19346 --> + <test> + <section name="input-data"> + <param name="inputFile" value="S_dataset.csv"/> + <param name="type" value="smiles"/> + <param name="fpType" value="topological"/> + </section> + <conditional name="autoencoder"> + <param name="compressFeatures" value="true"/> + <param name="autoencoder_weights" value="encoder_weights.h5" location="https://zenodo.org/api/records/14514397/files/encoder_weights.h5/content"/> + <param name="aeType" value="deterministic"/> + </conditional> + <param name="fnn_weights" value="model_weights.h5" location="https://zenodo.org/api/records/14514397/files/model_weights.h5/content"/> + <output name="outputFile"> + <assert_contents> + <has_n_lines n="7249"/> + <has_n_columns n="10" sep=","/> + <has_line n="1" line=",smiles,AR,ER,GR,Aromatase,TR,PPARg,ED,predicted"/> + </assert_contents> + </output> + <assert_stdout> + <has_text text="Prediction successful"/> + </assert_stdout> + </test> + </tests> + <help><![CDATA[ + This tool is the predict mode of `DeepFPLearn <https://github.com/yigbt/deepFPlearn>`_. + It's equivalent to running ``dfpl predict`` from the command line. + + The predict mode uses a model that was trained with ``dfpl train`` to predict + the association of molecular structures to a biological target. + + The input file should be a CSV file with a header. + + The tool will output the given CSV file with an additional column containing the predicted values. + ]]></help> + <expand macro="citations"/> +</tool>