# HG changeset patch
# User ufz
# Date 1734612681 0
# Node ID e0bb949eac4593adff5b315c94edc416f8f85911
planemo upload for repository https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/dfpl commit 66c6acfeff5441c36fba97787ddc5ee3d6a4a6ec
diff -r 000000000000 -r e0bb949eac45 dfpl_train.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dfpl_train.xml Thu Dec 19 12:51:21 2024 +0000
@@ -0,0 +1,301 @@
+
+ model to predict association of molecular structures to biological targets
+
+ macros.xml
+
+
+ config.json &&
+ mkdir -p 'autoencoder' &&
+ mkdir -p 'model' &&
+ dfpl train --configFile config.json &&
+ cp 'autoencoder/encoder_weights.h5' '$output_autoencoder_weights' &&
+ cp 'model/${model_configuration.target}/model_weights.h5' '$output_model_weights'
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `_.
+ It's equivalent to running ``dfpl train`` from the command line.
+
+ The train mode is used to train models to predict the association of molecular structures to biological targets.
+ The encoding of the molecules is done based on molecular fingerprints.
+
+ The training data contains three targets and you may train models for each with this tool.
+
+ The tool will generate the following outputs:
+
+ - the trained models as a ``.zip`` file including
+
+ - the weights of the trained FNN, if selected
+
+ - the weights of the trained autoencoder, if selected
+
+ - the training histories as tabular data (``.csv``)
+
+ - the training histories as a plot (``.svg``)
+ ]]>
+
+
diff -r 000000000000 -r e0bb949eac45 json_flatten.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/json_flatten.py Thu Dec 19 12:51:21 2024 +0000
@@ -0,0 +1,21 @@
+import json
+from sys import stdin
+
+d = json.load(stdin)
+
+
+def flatten(o: dict):
+ d_flat = {}
+ for key, value in o.items():
+ if type(value) is dict:
+ value = flatten(value)
+ for k, v in value.items():
+ d_flat[k] = v
+ else:
+ d_flat[key] = value
+ return d_flat
+
+
+d = flatten(d)
+
+print(json.dumps(d))
diff -r 000000000000 -r e0bb949eac45 json_predict.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/json_predict.py Thu Dec 19 12:51:21 2024 +0000
@@ -0,0 +1,22 @@
+import json
+from sys import stdin
+
+d = json.load(stdin)
+
+# The directory where the full model of the fnn is loaded from.
+d["fnnModelDir"] = "" # 'dfpl predict' looks for "model_weights.h5" in this directory
+del d["fnn_weights"]
+
+d["compressFeatures"] = bool(d["compressFeatures"] == "true")
+
+# The encoder file where it is loaded from, to compress the fingerprints.
+d["ecModelDir"] = ""
+d["ecWeightsFile"] = "encoder_weights.h5"
+
+# Output csv file name which will contain one prediction per input line.
+# Default: prefix of input file name.
+d["outputFile"] = "predictions.csv"
+
+d["py/object"] = "dfpl.options.Options"
+
+print(json.dumps(d))
diff -r 000000000000 -r e0bb949eac45 json_train.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/json_train.py Thu Dec 19 12:51:21 2024 +0000
@@ -0,0 +1,14 @@
+import json
+from sys import stdin
+
+d = json.load(stdin)
+
+d["py/object"] = "dfpl.options.Options"
+d["outputDir"] = "./model/"
+d["ecModelDir"] = "./autoencoder/"
+
+#