comparison optimise_hyperparameters.py @ 0:9bf25dbe00ad draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
author bgruening
date Wed, 28 Aug 2019 07:19:38 -0400
parents
children 76251d1ccdcc
comparison
equal deleted inserted replaced
-1:000000000000 0:9bf25dbe00ad
1 """
2 Find the optimal combination of hyperparameters
3 """
4
5 import numpy as np
6 from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
7
8 from keras.models import Sequential
9 from keras.layers import Dense, GRU, Dropout
10 from keras.layers.embeddings import Embedding
11 from keras.layers.core import SpatialDropout1D
12 from keras.optimizers import RMSprop
13 from keras.callbacks import EarlyStopping
14
15 import utils
16
17
18 class HyperparameterOptimisation:
19
20 @classmethod
21 def __init__(self):
22 """ Init method. """
23
24 @classmethod
25 def train_model(self, config, reverse_dictionary, train_data, train_labels, test_data, test_labels, class_weights):
26 """
27 Train a model and report accuracy
28 """
29 l_recurrent_activations = config["activation_recurrent"].split(",")
30 l_output_activations = config["activation_output"].split(",")
31
32 # convert items to integer
33 l_batch_size = list(map(int, config["batch_size"].split(",")))
34 l_embedding_size = list(map(int, config["embedding_size"].split(",")))
35 l_units = list(map(int, config["units"].split(",")))
36
37 # convert items to float
38 l_learning_rate = list(map(float, config["learning_rate"].split(",")))
39 l_dropout = list(map(float, config["dropout"].split(",")))
40 l_spatial_dropout = list(map(float, config["spatial_dropout"].split(",")))
41 l_recurrent_dropout = list(map(float, config["recurrent_dropout"].split(",")))
42
43 optimize_n_epochs = int(config["optimize_n_epochs"])
44 validation_split = float(config["validation_share"])
45
46 # get dimensions
47 dimensions = len(reverse_dictionary) + 1
48 best_model_params = dict()
49 early_stopping = EarlyStopping(monitor='val_loss', mode='min', min_delta=1e-4, verbose=1, patience=1)
50
51 # specify the search space for finding the best combination of parameters using Bayesian optimisation
52 params = {
53 "embedding_size": hp.quniform("embedding_size", l_embedding_size[0], l_embedding_size[1], 1),
54 "units": hp.quniform("units", l_units[0], l_units[1], 1),
55 "batch_size": hp.quniform("batch_size", l_batch_size[0], l_batch_size[1], 1),
56 "activation_recurrent": hp.choice("activation_recurrent", l_recurrent_activations),
57 "activation_output": hp.choice("activation_output", l_output_activations),
58 "learning_rate": hp.loguniform("learning_rate", np.log(l_learning_rate[0]), np.log(l_learning_rate[1])),
59 "dropout": hp.uniform("dropout", l_dropout[0], l_dropout[1]),
60 "spatial_dropout": hp.uniform("spatial_dropout", l_spatial_dropout[0], l_spatial_dropout[1]),
61 "recurrent_dropout": hp.uniform("recurrent_dropout", l_recurrent_dropout[0], l_recurrent_dropout[1])
62 }
63
64 def create_model(params):
65 model = Sequential()
66 model.add(Embedding(dimensions, int(params["embedding_size"]), mask_zero=True))
67 model.add(SpatialDropout1D(params["spatial_dropout"]))
68 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=True, activation=params["activation_recurrent"]))
69 model.add(Dropout(params["dropout"]))
70 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=False, activation=params["activation_recurrent"]))
71 model.add(Dropout(params["dropout"]))
72 model.add(Dense(dimensions, activation=params["activation_output"]))
73 optimizer_rms = RMSprop(lr=params["learning_rate"])
74 model.compile(loss=utils.weighted_loss(class_weights), optimizer=optimizer_rms)
75 model_fit = model.fit(
76 train_data,
77 train_labels,
78 batch_size=int(params["batch_size"]),
79 epochs=optimize_n_epochs,
80 shuffle="batch",
81 verbose=2,
82 validation_split=validation_split,
83 callbacks=[early_stopping]
84 )
85 return {'loss': model_fit.history["val_loss"][-1], 'status': STATUS_OK}
86 # minimize the objective function using the set of parameters above4
87 trials = Trials()
88 learned_params = fmin(create_model, params, trials=trials, algo=tpe.suggest, max_evals=int(config["max_evals"]))
89 print(learned_params)
90 # set the best params with respective values
91 for item in learned_params:
92 item_val = learned_params[item]
93 if item == 'activation_output':
94 best_model_params[item] = l_output_activations[item_val]
95 elif item == 'activation_recurrent':
96 best_model_params[item] = l_recurrent_activations[item_val]
97 else:
98 best_model_params[item] = item_val
99 return best_model_params