Mercurial > repos > ufz > dfpl_predict
view dfpl_predict.xml @ 0:8956e949d466 draft default tip
planemo upload for repository https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/dfpl commit 66c6acfeff5441c36fba97787ddc5ee3d6a4a6ec
author | ufz |
---|---|
date | Thu, 19 Dec 2024 12:51:08 +0000 |
parents | |
children |
line wrap: on
line source
<tool id="dfpl_predict" name="deepFPlearn predict" version="@TOOL_VERSION@+galaxy0" profile="23.0"> <description>association of molecular structures to biological targets</description> <creator> <organization name="Helmholtz Centre for Environmental Research - UFZ, Research Data Management" url ="https://www.ufz.de/index.php?en=45348"/> </creator> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ set -o pipefail; ln -s '$fnn_weights' model_weights.h5 && ln -s '$autoencoder_weights' encoder_weights.h5 && cat '$inputs' | python '$__tool_directory__/json_flatten.py' | python '$__tool_directory__/json_predict.py' > config.json && dfpl predict --configFile config.json && cp predictions.csv '$outputFile' ]]></command> <configfiles> <inputs name="inputs" data_style="paths"/> </configfiles> <inputs> <section title="Input Data" name="input-data" expanded="true"> <param label="Input File" argument="--inputFile" type="data" format="csv" help="The file containing the data for the prediction in (unquoted) comma-separated CSV format. The column named 'smiles' or 'fp' contains the field to be predicted. Please adjust the type that should be predicted (fp or smile) appropriately. An optional column 'id' is used to assign the outcomes to the original identifiers. If this column is missing, the results are numbered in the order of their appearance in the input file. A header is expected and respective column names are used"/> <param label="Chemical Representation" argument="--type" type="select" optional="true" help="Type of the chemical representation"> <option value="fp" selected="true">fp</option> <option value="smiles">smiles</option> </param> <param label="Fingerprint Type" argument="--fpType" type="select" optional="true" help="The type of fingerprint to be generated/used in input file"> <option value="topological" selected="true">topological</option> <option value="MACCS">MACCS</option> </param> </section> <conditional name="autoencoder"> <param label="Compress Fingerprints with Autoencoder" argument="--compressFeatures" type="select" help="Compress the fingerprints using a trained autoencoder (requires a weights file)"> <option value="true">Compress fingerprints</option> <option value="false">Use raw fingerprints</option> </param> <when value="true"> <param label="Autoencoder Weights" name="autoencoder_weights" type="data" format="h5" optional="false" help="The autoencoder weights as generated by ``dfpl train``"/> <param label="Autoencoder Type" argument="--aeType" type="select" optional="true" help="Autoencoder type, variational or deterministic"> <option value="variational">Variational</option> <option value="deterministic" selected="true">Deterministic</option> </param> </when> <when value="false"> </when> </conditional> <param label="Model Weights" name="fnn_weights" type="data" format="h5" optional="false" help="The model weights as generated by ``dfpl train``"/> </inputs> <outputs> <data name="outputFile" label="${tool.name} on ${on_string}: predicted values" format="csv" /> </outputs> <tests> <!-- TODO add test with https://zenodo.org/records/14409985 .. https://github.com/galaxyproject/galaxy/issues/19346 --> <test> <section name="input-data"> <param name="inputFile" value="S_dataset.csv"/> <param name="type" value="smiles"/> <param name="fpType" value="topological"/> </section> <conditional name="autoencoder"> <param name="compressFeatures" value="true"/> <param name="autoencoder_weights" value="encoder_weights.h5" location="https://zenodo.org/api/records/14514397/files/encoder_weights.h5/content"/> <param name="aeType" value="deterministic"/> </conditional> <param name="fnn_weights" value="model_weights.h5" location="https://zenodo.org/api/records/14514397/files/model_weights.h5/content"/> <output name="outputFile"> <assert_contents> <has_n_lines n="7249"/> <has_n_columns n="10" sep=","/> <has_line n="1" line=",smiles,AR,ER,GR,Aromatase,TR,PPARg,ED,predicted"/> </assert_contents> </output> <assert_stdout> <has_text text="Prediction successful"/> </assert_stdout> </test> </tests> <help><![CDATA[ This tool is the predict mode of `DeepFPLearn <https://github.com/yigbt/deepFPlearn>`_. It's equivalent to running ``dfpl predict`` from the command line. The predict mode uses a model that was trained with ``dfpl train`` to predict the association of molecular structures to a biological target. The input file should be a CSV file with a header. The tool will output the given CSV file with an additional column containing the predicted values. ]]></help> <expand macro="citations"/> </tool>