Mercurial > repos > goeckslab > ludwig_train
annotate ludwig_experiment.py @ 2:3f587f0e5a6d draft
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit 008bc5a8ee2c80f70fc224a23b5d2cd0c8ef1810
| author | goeckslab | 
|---|---|
| date | Fri, 14 Mar 2025 16:49:34 +0000 | 
| parents | 4d12452c5361 | 
| children | 650639a4a75f | 
| rev | line source | 
|---|---|
| 
0
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
1 import json | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
2 import logging | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
3 import os | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
4 import pickle | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
5 import sys | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
6 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
7 from ludwig.experiment import cli | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
8 from ludwig.globals import ( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
9 DESCRIPTION_FILE_NAME, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
10 PREDICTIONS_PARQUET_FILE_NAME, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
11 TEST_STATISTICS_FILE_NAME, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
12 TRAIN_SET_METADATA_FILE_NAME | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
13 ) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
14 from ludwig.utils.data_utils import get_split_path | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
15 from ludwig.visualize import get_visualizations_registry | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
16 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
17 from model_unpickler import SafeUnpickler | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
18 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
19 import pandas as pd | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
20 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
21 from utils import ( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
22 encode_image_to_base64, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
23 get_html_closing, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
24 get_html_template | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
25 ) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
26 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
27 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
28 logging.basicConfig(level=logging.DEBUG) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
29 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
30 LOG = logging.getLogger(__name__) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
31 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
32 setattr(pickle, 'Unpickler', SafeUnpickler) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
33 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
34 # visualization | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
35 output_directory = None | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
36 for ix, arg in enumerate(sys.argv): | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
37 if arg == "--output_directory": | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
38 output_directory = sys.argv[ix+1] | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
39 break | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
40 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
41 viz_output_directory = os.path.join(output_directory, "visualizations") | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
42 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
43 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
44 def get_output_feature_name(experiment_dir, output_feature=0): | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
45 """Helper function to extract specified output feature name. | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
46 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
47 :param experiment_dir: Path to the experiment directory | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
48 :param output_feature: position of the output feature the description.json | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
49 :return output_feature_name: name of the first output feature name | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
50 from the experiment | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
51 """ | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
52 if os.path.exists(os.path.join(experiment_dir, DESCRIPTION_FILE_NAME)): | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
53 description_file = os.path.join(experiment_dir, DESCRIPTION_FILE_NAME) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
54 with open(description_file, "rb") as f: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
55 content = json.load(f) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
56 output_feature_name = \ | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
57 content["config"]["output_features"][output_feature]["name"] | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
58 dataset_path = content["dataset"] | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
59 return output_feature_name, dataset_path | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
60 return None, None | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
61 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
62 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
63 def check_file(file_path): | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
64 """Check if the file exists; return None if it doesn't.""" | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
65 return file_path if os.path.exists(file_path) else None | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
66 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
67 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
68 def make_visualizations(ludwig_output_directory_name): | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
69 ludwig_output_directory = os.path.join( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
70 output_directory, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
71 ludwig_output_directory_name, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
72 ) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
73 visualizations = [ | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
74 "confidence_thresholding", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
75 "confidence_thresholding_data_vs_acc", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
76 "confidence_thresholding_data_vs_acc_subset", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
77 "confidence_thresholding_data_vs_acc_subset_per_class", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
78 "confidence_thresholding_2thresholds_2d", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
79 "confidence_thresholding_2thresholds_3d", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
80 "binary_threshold_vs_metric", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
81 "roc_curves", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
82 "roc_curves_from_test_statistics", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
83 "calibration_1_vs_all", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
84 "calibration_multiclass", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
85 "confusion_matrix", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
86 "frequency_vs_f1", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
87 "learning_curves", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
88 ] | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
89 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
90 # Check existence of required files | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
91 training_statistics = check_file(os.path.join( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
92 ludwig_output_directory, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
93 "training_statistics.json", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
94 )) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
95 test_statistics = check_file(os.path.join( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
96 ludwig_output_directory, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
97 TEST_STATISTICS_FILE_NAME, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
98 )) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
99 ground_truth_metadata = check_file(os.path.join( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
100 ludwig_output_directory, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
101 "model", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
102 TRAIN_SET_METADATA_FILE_NAME, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
103 )) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
104 probabilities = check_file(os.path.join( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
105 ludwig_output_directory, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
106 PREDICTIONS_PARQUET_FILE_NAME, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
107 )) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
108 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
109 output_feature, dataset_path = get_output_feature_name( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
110 ludwig_output_directory) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
111 ground_truth = None | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
112 split_file = None | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
113 if dataset_path: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
114 ground_truth = check_file(dataset_path) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
115 split_file = check_file(get_split_path(dataset_path)) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
116 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
117 if (not output_feature) and (test_statistics): | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
118 test_stat = os.path.join(test_statistics) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
119 with open(test_stat, "rb") as f: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
120 content = json.load(f) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
121 output_feature = next(iter(content.keys())) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
122 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
123 for viz in visualizations: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
124 viz_func = get_visualizations_registry()[viz] | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
125 try: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
126 viz_func( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
127 training_statistics=[training_statistics] | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
128 if training_statistics else [], | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
129 test_statistics=[test_statistics] if test_statistics else [], | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
130 probabilities=[probabilities] if probabilities else [], | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
131 top_n_classes=[0], | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
132 output_feature_name=output_feature if output_feature else "", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
133 ground_truth_split=2, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
134 top_k=3, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
135 ground_truth_metadata=ground_truth_metadata, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
136 ground_truth=ground_truth, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
137 split_file=split_file, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
138 output_directory=viz_output_directory, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
139 normalize=False, | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
140 file_format="png", | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
141 ) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
142 except Exception as e: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
143 LOG.info(f"Visualization: {viz}") | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
144 LOG.info(f"Error: {e}") | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
145 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
146 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
147 def convert_parquet_to_csv(ludwig_output_directory_name): | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
148 """Convert the predictions Parquet file to CSV.""" | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
149 ludwig_output_directory = os.path.join( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
150 output_directory, ludwig_output_directory_name) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
151 parquet_path = os.path.join( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
152 ludwig_output_directory, "predictions.parquet") | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
153 csv_path = os.path.join( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
154 ludwig_output_directory, "predictions_parquet.csv") | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
155 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
156 try: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
157 df = pd.read_parquet(parquet_path) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
158 df.to_csv(csv_path, index=False) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
159 LOG.info(f"Converted Parquet to CSV: {csv_path}") | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
160 except Exception as e: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
161 LOG.error(f"Error converting Parquet to CSV: {e}") | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
162 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
163 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
164 def generate_html_report(title, ludwig_output_directory_name): | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
165 # ludwig_output_directory = os.path.join( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
166 # output_directory, ludwig_output_directory_name) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
167 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
168 # test_statistics_html = "" | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
169 # # Read test statistics JSON and convert to HTML table | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
170 # try: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
171 # test_statistics_path = os.path.join( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
172 # ludwig_output_directory, TEST_STATISTICS_FILE_NAME) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
173 # with open(test_statistics_path, "r") as f: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
174 # test_statistics = json.load(f) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
175 # test_statistics_html = "<h2>Test Statistics</h2>" | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
176 # test_statistics_html += json_to_html_table( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
177 # test_statistics) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
178 # except Exception as e: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
179 # LOG.info(f"Error reading test statistics: {e}") | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
180 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
181 # Convert visualizations to HTML | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
182 plots_html = "" | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
183 if len(os.listdir(viz_output_directory)) > 0: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
184 plots_html = "<h2>Visualizations</h2>" | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
185 for plot_file in sorted(os.listdir(viz_output_directory)): | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
186 plot_path = os.path.join(viz_output_directory, plot_file) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
187 if os.path.isfile(plot_path) and plot_file.endswith((".png", ".jpg")): | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
188 encoded_image = encode_image_to_base64(plot_path) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
189 plots_html += ( | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
190 f'<div class="plot">' | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
191 f'<h3>{os.path.splitext(plot_file)[0]}</h3>' | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
192 '<img src="data:image/png;base64,' | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
193 f'{encoded_image}" alt="{plot_file}">' | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
194 f'</div>' | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
195 ) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
196 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
197 # Generate the full HTML content | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
198 html_content = f""" | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
199 {get_html_template()} | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
200 <h1>{title}</h1> | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
201 {plots_html} | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
202 {get_html_closing()} | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
203 """ | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
204 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
205 # Save the HTML report | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
206 title: str | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
207 report_name = title.lower().replace(" ", "_") | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
208 report_path = os.path.join(output_directory, f"{report_name}_report.html") | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
209 with open(report_path, "w") as report_file: | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
210 report_file.write(html_content) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
211 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
212 LOG.info(f"HTML report generated at: {report_path}") | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
213 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
214 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
215 if __name__ == "__main__": | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
216 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
217 cli(sys.argv[1:]) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
218 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
219 ludwig_output_directory_name = "experiment_run" | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
220 | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
221 make_visualizations(ludwig_output_directory_name) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
222 convert_parquet_to_csv(ludwig_output_directory_name) | 
| 
 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 
goeckslab 
parents:  
diff
changeset
 | 
223 generate_html_report("Ludwig Experiment", ludwig_output_directory_name) | 
