Mercurial > repos > goeckslab > ludwig_train
annotate ludwig_experiment.py @ 0:f0be10937f5c draft
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
| author | goeckslab | 
|---|---|
| date | Tue, 07 Jan 2025 22:44:09 +0000 | 
| parents | |
| children | 4d12452c5361 | 
| rev | line source | 
|---|---|
| 0 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 1 import json | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 2 import logging | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 3 import os | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 4 import pickle | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 5 import sys | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 6 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 7 from jinja_report import generate_report | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 8 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 9 from ludwig.experiment import cli | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 10 from ludwig.globals import ( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 11 DESCRIPTION_FILE_NAME, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 12 PREDICTIONS_PARQUET_FILE_NAME, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 13 TEST_STATISTICS_FILE_NAME, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 14 TRAIN_SET_METADATA_FILE_NAME | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 15 ) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 16 from ludwig.utils.data_utils import get_split_path | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 17 from ludwig.visualize import get_visualizations_registry | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 18 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 19 from model_unpickler import SafeUnpickler | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 20 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 21 import pandas as pd | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 22 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 23 from utils import ( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 24 encode_image_to_base64, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 25 get_html_closing, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 26 get_html_template | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 27 ) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 28 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 29 import yaml | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 30 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 31 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 32 logging.basicConfig(level=logging.DEBUG) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 33 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 34 LOG = logging.getLogger(__name__) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 35 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 36 setattr(pickle, 'Unpickler', SafeUnpickler) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 37 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 38 # visualization | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 39 output_directory = None | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 40 for ix, arg in enumerate(sys.argv): | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 41 if arg == "--output_directory": | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 42 output_directory = sys.argv[ix+1] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 43 break | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 44 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 45 viz_output_directory = os.path.join(output_directory, "visualizations") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 46 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 47 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 48 def get_output_feature_name(experiment_dir, output_feature=0): | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 49 """Helper function to extract specified output feature name. | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 50 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 51 :param experiment_dir: Path to the experiment directory | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 52 :param output_feature: position of the output feature the description.json | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 53 :return output_feature_name: name of the first output feature name | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 54 from the experiment | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 55 """ | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 56 if os.path.exists(os.path.join(experiment_dir, DESCRIPTION_FILE_NAME)): | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 57 description_file = os.path.join(experiment_dir, DESCRIPTION_FILE_NAME) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 58 with open(description_file, "rb") as f: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 59 content = json.load(f) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 60 output_feature_name = \ | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 61 content["config"]["output_features"][output_feature]["name"] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 62 dataset_path = content["dataset"] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 63 return output_feature_name, dataset_path | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 64 return None, None | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 65 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 66 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 67 def check_file(file_path): | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 68 """Check if the file exists; return None if it doesn't.""" | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 69 return file_path if os.path.exists(file_path) else None | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 70 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 71 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 72 def make_visualizations(ludwig_output_directory_name): | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 73 ludwig_output_directory = os.path.join( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 74 output_directory, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 75 ludwig_output_directory_name, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 76 ) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 77 visualizations = [ | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 78 "confidence_thresholding", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 79 "confidence_thresholding_data_vs_acc", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 80 "confidence_thresholding_data_vs_acc_subset", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 81 "confidence_thresholding_data_vs_acc_subset_per_class", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 82 "confidence_thresholding_2thresholds_2d", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 83 "confidence_thresholding_2thresholds_3d", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 84 "binary_threshold_vs_metric", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 85 "roc_curves", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 86 "roc_curves_from_test_statistics", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 87 "calibration_1_vs_all", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 88 "calibration_multiclass", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 89 "confusion_matrix", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 90 "frequency_vs_f1", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 91 "learning_curves", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 92 ] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 93 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 94 # Check existence of required files | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 95 training_statistics = check_file(os.path.join( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 96 ludwig_output_directory, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 97 "training_statistics.json", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 98 )) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 99 test_statistics = check_file(os.path.join( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 100 ludwig_output_directory, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 101 TEST_STATISTICS_FILE_NAME, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 102 )) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 103 ground_truth_metadata = check_file(os.path.join( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 104 ludwig_output_directory, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 105 "model", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 106 TRAIN_SET_METADATA_FILE_NAME, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 107 )) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 108 probabilities = check_file(os.path.join( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 109 ludwig_output_directory, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 110 PREDICTIONS_PARQUET_FILE_NAME, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 111 )) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 112 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 113 output_feature, dataset_path = get_output_feature_name( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 114 ludwig_output_directory) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 115 ground_truth = None | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 116 split_file = None | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 117 if dataset_path: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 118 ground_truth = check_file(dataset_path) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 119 split_file = check_file(get_split_path(dataset_path)) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 120 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 121 if (not output_feature) and (test_statistics): | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 122 test_stat = os.path.join(test_statistics) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 123 with open(test_stat, "rb") as f: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 124 content = json.load(f) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 125 output_feature = next(iter(content.keys())) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 126 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 127 for viz in visualizations: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 128 viz_func = get_visualizations_registry()[viz] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 129 try: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 130 viz_func( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 131 training_statistics=[training_statistics] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 132 if training_statistics else [], | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 133 test_statistics=[test_statistics] if test_statistics else [], | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 134 probabilities=[probabilities] if probabilities else [], | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 135 top_n_classes=[0], | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 136 output_feature_name=output_feature if output_feature else "", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 137 ground_truth_split=2, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 138 top_k=3, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 139 ground_truth_metadata=ground_truth_metadata, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 140 ground_truth=ground_truth, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 141 split_file=split_file, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 142 output_directory=viz_output_directory, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 143 normalize=False, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 144 file_format="png", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 145 ) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 146 except Exception as e: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 147 LOG.info(f"Visualization: {viz}") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 148 LOG.info(f"Error: {e}") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 149 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 150 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 151 # report | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 152 def render_report( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 153 title: str, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 154 ludwig_output_directory_name: str, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 155 show_visualization: bool = True | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 156 ): | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 157 ludwig_output_directory = os.path.join( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 158 output_directory, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 159 ludwig_output_directory_name, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 160 ) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 161 report_config = { | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 162 "title": title, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 163 } | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 164 if show_visualization: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 165 report_config["visualizations"] = [ | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 166 { | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 167 "src": f"visualizations/{fl}", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 168 "type": "image" if fl[fl.rindex(".") + 1:] == "png" else | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 169 fl[fl.rindex(".") + 1:], | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 170 } for fl in sorted(os.listdir(viz_output_directory)) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 171 ] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 172 report_config["raw outputs"] = [ | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 173 { | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 174 "src": f"{fl}", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 175 "type": "json" if fl.endswith(".json") else "unclassified", | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 176 } for fl in sorted(os.listdir(ludwig_output_directory)) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 177 if fl.endswith((".json", ".parquet")) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 178 ] | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 179 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 180 with open(os.path.join(output_directory, "report_config.yml"), 'w') as fh: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 181 yaml.safe_dump(report_config, fh) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 182 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 183 report_path = os.path.join(output_directory, "smart_report.html") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 184 generate_report.main( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 185 report_config, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 186 schema={"html_height": 800}, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 187 outfile=report_path, | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 188 ) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 189 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 190 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 191 def convert_parquet_to_csv(ludwig_output_directory_name): | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 192 """Convert the predictions Parquet file to CSV.""" | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 193 ludwig_output_directory = os.path.join( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 194 output_directory, ludwig_output_directory_name) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 195 parquet_path = os.path.join( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 196 ludwig_output_directory, "predictions.parquet") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 197 csv_path = os.path.join( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 198 ludwig_output_directory, "predictions_parquet.csv") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 199 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 200 try: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 201 df = pd.read_parquet(parquet_path) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 202 df.to_csv(csv_path, index=False) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 203 LOG.info(f"Converted Parquet to CSV: {csv_path}") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 204 except Exception as e: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 205 LOG.error(f"Error converting Parquet to CSV: {e}") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 206 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 207 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 208 def generate_html_report(title, ludwig_output_directory_name): | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 209 # ludwig_output_directory = os.path.join( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 210 # output_directory, ludwig_output_directory_name) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 211 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 212 # test_statistics_html = "" | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 213 # # Read test statistics JSON and convert to HTML table | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 214 # try: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 215 # test_statistics_path = os.path.join( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 216 # ludwig_output_directory, TEST_STATISTICS_FILE_NAME) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 217 # with open(test_statistics_path, "r") as f: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 218 # test_statistics = json.load(f) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 219 # test_statistics_html = "<h2>Test Statistics</h2>" | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 220 # test_statistics_html += json_to_html_table( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 221 # test_statistics) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 222 # except Exception as e: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 223 # LOG.info(f"Error reading test statistics: {e}") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 224 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 225 # Convert visualizations to HTML | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 226 plots_html = "" | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 227 if len(os.listdir(viz_output_directory)) > 0: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 228 plots_html = "<h2>Visualizations</h2>" | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 229 for plot_file in sorted(os.listdir(viz_output_directory)): | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 230 plot_path = os.path.join(viz_output_directory, plot_file) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 231 if os.path.isfile(plot_path) and plot_file.endswith((".png", ".jpg")): | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 232 encoded_image = encode_image_to_base64(plot_path) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 233 plots_html += ( | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 234 f'<div class="plot">' | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 235 f'<h3>{os.path.splitext(plot_file)[0]}</h3>' | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 236 '<img src="data:image/png;base64,' | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 237 f'{encoded_image}" alt="{plot_file}">' | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 238 f'</div>' | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 239 ) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 240 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 241 # Generate the full HTML content | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 242 html_content = f""" | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 243 {get_html_template()} | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 244 <h1>{title}</h1> | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 245 {plots_html} | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 246 {get_html_closing()} | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 247 """ | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 248 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 249 # Save the HTML report | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 250 title: str | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 251 report_name = title.lower().replace(" ", "_") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 252 report_path = os.path.join(output_directory, f"{report_name}_report.html") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 253 with open(report_path, "w") as report_file: | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 254 report_file.write(html_content) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 255 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 256 LOG.info(f"HTML report generated at: {report_path}") | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 257 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 258 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 259 if __name__ == "__main__": | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 260 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 261 cli(sys.argv[1:]) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 262 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 263 ludwig_output_directory_name = "experiment_run" | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 264 | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 265 make_visualizations(ludwig_output_directory_name) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 266 # title = "Ludwig Experiment" | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 267 # render_report(title, ludwig_output_directory_name) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 268 convert_parquet_to_csv(ludwig_output_directory_name) | 
| 
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
 goeckslab parents: diff
changeset | 269 generate_html_report("Ludwig Experiment", ludwig_output_directory_name) | 
