Mercurial > repos > goeckslab > ludwig_train
annotate ludwig_render_config.py @ 1:4d12452c5361 draft
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit a341ff5627ef7a39489a7f377d96017fb3f42efb
| author | goeckslab | 
|---|---|
| date | Thu, 13 Mar 2025 16:43:12 +0000 | 
| parents | f0be10937f5c | 
| children | 650639a4a75f | 
| rev | line source | 
|---|---|
| 0 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 1 import json | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 2 import logging | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 3 import sys | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 4 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 5 from ludwig.constants import ( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 6 COMBINER, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 7 HYPEROPT, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 8 INPUT_FEATURES, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 9 MODEL_TYPE, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 10 OUTPUT_FEATURES, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 11 PROC_COLUMN, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 12 TRAINER, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 13 ) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 14 from ludwig.schema.model_types.utils import merge_with_defaults | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 15 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 16 import yaml | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 17 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 18 logging.basicConfig(level=logging.DEBUG) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 19 LOG = logging.getLogger(__name__) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 20 inputs = sys.argv[1] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 21 with open(inputs, 'r') as handler: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 22 params = json.load(handler) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 23 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 24 config = {} | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 25 # input features | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 26 config[INPUT_FEATURES] = [] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 27 for ftr in params[INPUT_FEATURES]['input_feature']: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 28 config[INPUT_FEATURES].append(ftr['input_feature_selector']) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 29 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 30 # output features | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 31 config[OUTPUT_FEATURES] = [] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 32 for ftr in params[OUTPUT_FEATURES]['output_feature']: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 33 config[OUTPUT_FEATURES].append(ftr['output_feature_selector']) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 34 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 35 # combiner | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 36 config[COMBINER] = params[COMBINER] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 37 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 38 # training | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 39 config[TRAINER] = params[TRAINER][TRAINER] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 40 config[MODEL_TYPE] = config[TRAINER].pop(MODEL_TYPE) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 41 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 42 # hyperopt | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 43 if params[HYPEROPT]['do_hyperopt'] == 'true': | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 44 config[HYPEROPT] = params[HYPEROPT][HYPEROPT] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 45 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 46 with open('./pre_config.yml', 'w') as f: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 47 yaml.safe_dump(config, f, allow_unicode=True, default_flow_style=False) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 48 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 49 output = sys.argv[2] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 50 output_config = merge_with_defaults(config) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 51 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 52 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 53 def clean_proc_column(config: dict) -> None: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 54 for ftr in config[INPUT_FEATURES]: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 55 ftr.pop(PROC_COLUMN, None) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 56 for ftr in config[OUTPUT_FEATURES]: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 57 ftr.pop(PROC_COLUMN, None) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 58 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 59 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 60 clean_proc_column(output_config) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 61 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 62 with open(output, "w") as f: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 63 yaml.safe_dump(output_config, f, sort_keys=False) | 
