# HG changeset patch # User ufz # Date 1734612681 0 # Node ID e0bb949eac4593adff5b315c94edc416f8f85911 planemo upload for repository https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/dfpl commit 66c6acfeff5441c36fba97787ddc5ee3d6a4a6ec diff -r 000000000000 -r e0bb949eac45 dfpl_train.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dfpl_train.xml Thu Dec 19 12:51:21 2024 +0000 @@ -0,0 +1,301 @@ + + model to predict association of molecular structures to biological targets + + macros.xml + + + config.json && + mkdir -p 'autoencoder' && + mkdir -p 'model' && + dfpl train --configFile config.json && + cp 'autoencoder/encoder_weights.h5' '$output_autoencoder_weights' && + cp 'model/${model_configuration.target}/model_weights.h5' '$output_model_weights' + ]]> + + + + +
+ + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + +
+ + + + + + + +
+
+ + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + + +
+
+ `_. + It's equivalent to running ``dfpl train`` from the command line. + + The train mode is used to train models to predict the association of molecular structures to biological targets. + The encoding of the molecules is done based on molecular fingerprints. + + The training data contains three targets and you may train models for each with this tool. + + The tool will generate the following outputs: + + - the trained models as a ``.zip`` file including + + - the weights of the trained FNN, if selected + + - the weights of the trained autoencoder, if selected + + - the training histories as tabular data (``.csv``) + + - the training histories as a plot (``.svg``) + ]]> + +
diff -r 000000000000 -r e0bb949eac45 json_flatten.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/json_flatten.py Thu Dec 19 12:51:21 2024 +0000 @@ -0,0 +1,21 @@ +import json +from sys import stdin + +d = json.load(stdin) + + +def flatten(o: dict): + d_flat = {} + for key, value in o.items(): + if type(value) is dict: + value = flatten(value) + for k, v in value.items(): + d_flat[k] = v + else: + d_flat[key] = value + return d_flat + + +d = flatten(d) + +print(json.dumps(d)) diff -r 000000000000 -r e0bb949eac45 json_predict.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/json_predict.py Thu Dec 19 12:51:21 2024 +0000 @@ -0,0 +1,22 @@ +import json +from sys import stdin + +d = json.load(stdin) + +# The directory where the full model of the fnn is loaded from. +d["fnnModelDir"] = "" # 'dfpl predict' looks for "model_weights.h5" in this directory +del d["fnn_weights"] + +d["compressFeatures"] = bool(d["compressFeatures"] == "true") + +# The encoder file where it is loaded from, to compress the fingerprints. +d["ecModelDir"] = "" +d["ecWeightsFile"] = "encoder_weights.h5" + +# Output csv file name which will contain one prediction per input line. +# Default: prefix of input file name. +d["outputFile"] = "predictions.csv" + +d["py/object"] = "dfpl.options.Options" + +print(json.dumps(d)) diff -r 000000000000 -r e0bb949eac45 json_train.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/json_train.py Thu Dec 19 12:51:21 2024 +0000 @@ -0,0 +1,14 @@ +import json +from sys import stdin + +d = json.load(stdin) + +d["py/object"] = "dfpl.options.Options" +d["outputDir"] = "./model/" +d["ecModelDir"] = "./autoencoder/" + +#