Mercurial > repos > bgruening > create_tool_recommendation_model
diff optimise_hyperparameters.py @ 5:4f7e6612906b draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
author | bgruening |
---|---|
date | Fri, 06 May 2022 09:05:18 +0000 |
parents | afec8c595124 |
children |
line wrap: on
line diff
--- a/optimise_hyperparameters.py Tue Jul 07 03:25:49 2020 -0400 +++ b/optimise_hyperparameters.py Fri May 06 09:05:18 2022 +0000 @@ -3,24 +3,29 @@ """ import numpy as np -from hyperopt import fmin, tpe, hp, STATUS_OK, Trials - -from keras.models import Sequential -from keras.layers import Dense, GRU, Dropout -from keras.layers.embeddings import Embedding -from keras.layers.core import SpatialDropout1D -from keras.optimizers import RMSprop -from keras.callbacks import EarlyStopping - import utils +from hyperopt import fmin, hp, STATUS_OK, tpe, Trials +from tensorflow.keras.callbacks import EarlyStopping +from tensorflow.keras.layers import Dense, Dropout, Embedding, GRU, SpatialDropout1D +from tensorflow.keras.models import Sequential +from tensorflow.keras.optimizers import RMSprop class HyperparameterOptimisation: - def __init__(self): """ Init method. """ - def train_model(self, config, reverse_dictionary, train_data, train_labels, test_data, test_labels, tool_tr_samples, class_weights): + def train_model( + self, + config, + reverse_dictionary, + train_data, + train_labels, + test_data, + test_labels, + tool_tr_samples, + class_weights, + ): """ Train a model and report accuracy """ @@ -40,52 +45,101 @@ # get dimensions dimensions = len(reverse_dictionary) + 1 best_model_params = dict() - early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, min_delta=1e-1, restore_best_weights=True) + early_stopping = EarlyStopping( + monitor="val_loss", + mode="min", + verbose=1, + min_delta=1e-1, + restore_best_weights=True, + ) # specify the search space for finding the best combination of parameters using Bayesian optimisation params = { - "embedding_size": hp.quniform("embedding_size", l_embedding_size[0], l_embedding_size[1], 1), + "embedding_size": hp.quniform( + "embedding_size", l_embedding_size[0], l_embedding_size[1], 1 + ), "units": hp.quniform("units", l_units[0], l_units[1], 1), - "batch_size": hp.quniform("batch_size", l_batch_size[0], l_batch_size[1], 1), - "learning_rate": hp.loguniform("learning_rate", np.log(l_learning_rate[0]), np.log(l_learning_rate[1])), + "batch_size": hp.quniform( + "batch_size", l_batch_size[0], l_batch_size[1], 1 + ), + "learning_rate": hp.loguniform( + "learning_rate", np.log(l_learning_rate[0]), np.log(l_learning_rate[1]) + ), "dropout": hp.uniform("dropout", l_dropout[0], l_dropout[1]), - "spatial_dropout": hp.uniform("spatial_dropout", l_spatial_dropout[0], l_spatial_dropout[1]), - "recurrent_dropout": hp.uniform("recurrent_dropout", l_recurrent_dropout[0], l_recurrent_dropout[1]) + "spatial_dropout": hp.uniform( + "spatial_dropout", l_spatial_dropout[0], l_spatial_dropout[1] + ), + "recurrent_dropout": hp.uniform( + "recurrent_dropout", l_recurrent_dropout[0], l_recurrent_dropout[1] + ), } def create_model(params): model = Sequential() - model.add(Embedding(dimensions, int(params["embedding_size"]), mask_zero=True)) + model.add( + Embedding(dimensions, int(params["embedding_size"]), mask_zero=True) + ) model.add(SpatialDropout1D(params["spatial_dropout"])) - model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=True, activation="elu")) + model.add( + GRU( + int(params["units"]), + dropout=params["dropout"], + recurrent_dropout=params["recurrent_dropout"], + return_sequences=True, + activation="elu", + ) + ) model.add(Dropout(params["dropout"])) - model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=False, activation="elu")) + model.add( + GRU( + int(params["units"]), + dropout=params["dropout"], + recurrent_dropout=params["recurrent_dropout"], + return_sequences=False, + activation="elu", + ) + ) model.add(Dropout(params["dropout"])) model.add(Dense(2 * dimensions, activation="sigmoid")) optimizer_rms = RMSprop(lr=params["learning_rate"]) batch_size = int(params["batch_size"]) - model.compile(loss=utils.weighted_loss(class_weights), optimizer=optimizer_rms) + model.compile( + loss=utils.weighted_loss(class_weights), optimizer=optimizer_rms + ) print(model.summary()) - model_fit = model.fit_generator( + model_fit = model.fit( utils.balanced_sample_generator( train_data, train_labels, batch_size, tool_tr_samples, - reverse_dictionary + reverse_dictionary, ), steps_per_epoch=len(train_data) // batch_size, epochs=optimize_n_epochs, callbacks=[early_stopping], validation_data=(test_data, test_labels), verbose=2, - shuffle=True + shuffle=True, ) - return {'loss': model_fit.history["val_loss"][-1], 'status': STATUS_OK, 'model': model} + return { + "loss": model_fit.history["val_loss"][-1], + "status": STATUS_OK, + "model": model, + } + # minimize the objective function using the set of parameters above trials = Trials() - learned_params = fmin(create_model, params, trials=trials, algo=tpe.suggest, max_evals=int(config["max_evals"])) - best_model = trials.results[np.argmin([r['loss'] for r in trials.results])]['model'] + learned_params = fmin( + create_model, + params, + trials=trials, + algo=tpe.suggest, + max_evals=int(config["max_evals"]), + ) + best_model = trials.results[np.argmin([r["loss"] for r in trials.results])][ + "model" + ] # set the best params with respective values for item in learned_params: item_val = learned_params[item]