Mercurial > repos > bgruening > create_tool_recommendation_model
diff optimise_hyperparameters.py @ 3:5b3c08710e47 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
author | bgruening |
---|---|
date | Sat, 09 May 2020 05:38:23 -0400 |
parents | 76251d1ccdcc |
children | afec8c595124 |
line wrap: on
line diff
--- a/optimise_hyperparameters.py Fri Oct 11 18:24:54 2019 -0400 +++ b/optimise_hyperparameters.py Sat May 09 05:38:23 2020 -0400 @@ -17,18 +17,13 @@ class HyperparameterOptimisation: - @classmethod def __init__(self): """ Init method. """ - @classmethod - def train_model(self, config, reverse_dictionary, train_data, train_labels, class_weights): + def train_model(self, config, reverse_dictionary, train_data, train_labels, test_data, test_labels, l_tool_tr_samples, class_weights): """ Train a model and report accuracy """ - l_recurrent_activations = config["activation_recurrent"].split(",") - l_output_activations = config["activation_output"].split(",") - # convert items to integer l_batch_size = list(map(int, config["batch_size"].split(","))) l_embedding_size = list(map(int, config["embedding_size"].split(","))) @@ -41,20 +36,17 @@ l_recurrent_dropout = list(map(float, config["recurrent_dropout"].split(","))) optimize_n_epochs = int(config["optimize_n_epochs"]) - validation_split = float(config["validation_share"]) # get dimensions dimensions = len(reverse_dictionary) + 1 best_model_params = dict() - early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, min_delta=1e-4) + early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, min_delta=1e-1, restore_best_weights=True) # specify the search space for finding the best combination of parameters using Bayesian optimisation params = { "embedding_size": hp.quniform("embedding_size", l_embedding_size[0], l_embedding_size[1], 1), "units": hp.quniform("units", l_units[0], l_units[1], 1), "batch_size": hp.quniform("batch_size", l_batch_size[0], l_batch_size[1], 1), - "activation_recurrent": hp.choice("activation_recurrent", l_recurrent_activations), - "activation_output": hp.choice("activation_output", l_output_activations), "learning_rate": hp.loguniform("learning_rate", np.log(l_learning_rate[0]), np.log(l_learning_rate[1])), "dropout": hp.uniform("dropout", l_dropout[0], l_dropout[1]), "spatial_dropout": hp.uniform("spatial_dropout", l_spatial_dropout[0], l_spatial_dropout[1]), @@ -65,36 +57,36 @@ model = Sequential() model.add(Embedding(dimensions, int(params["embedding_size"]), mask_zero=True)) model.add(SpatialDropout1D(params["spatial_dropout"])) - model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=True, activation=params["activation_recurrent"])) + model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=True, activation="elu")) + model.add(Dropout(params["dropout"])) + model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=False, activation="elu")) model.add(Dropout(params["dropout"])) - model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=False, activation=params["activation_recurrent"])) - model.add(Dropout(params["dropout"])) - model.add(Dense(dimensions, activation=params["activation_output"])) + model.add(Dense(2 * dimensions, activation="sigmoid")) optimizer_rms = RMSprop(lr=params["learning_rate"]) + batch_size = int(params["batch_size"]) model.compile(loss=utils.weighted_loss(class_weights), optimizer=optimizer_rms) - model_fit = model.fit( - train_data, - train_labels, - batch_size=int(params["batch_size"]), + print(model.summary()) + model_fit = model.fit_generator( + utils.balanced_sample_generator( + train_data, + train_labels, + batch_size, + l_tool_tr_samples + ), + steps_per_epoch=len(train_data) // batch_size, epochs=optimize_n_epochs, - shuffle="batch", + callbacks=[early_stopping], + validation_data=(test_data, test_labels), verbose=2, - validation_split=validation_split, - callbacks=[early_stopping] + shuffle=True ) return {'loss': model_fit.history["val_loss"][-1], 'status': STATUS_OK, 'model': model} # minimize the objective function using the set of parameters above trials = Trials() learned_params = fmin(create_model, params, trials=trials, algo=tpe.suggest, max_evals=int(config["max_evals"])) best_model = trials.results[np.argmin([r['loss'] for r in trials.results])]['model'] - # set the best params with respective values for item in learned_params: item_val = learned_params[item] - if item == 'activation_output': - best_model_params[item] = l_output_activations[item_val] - elif item == 'activation_recurrent': - best_model_params[item] = l_recurrent_activations[item_val] - else: - best_model_params[item] = item_val + best_model_params[item] = item_val return best_model_params, best_model