Mercurial > repos > bgruening > create_tool_recommendation_model
comparison optimise_hyperparameters.py @ 0:9bf25dbe00ad draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
author | bgruening |
---|---|
date | Wed, 28 Aug 2019 07:19:38 -0400 |
parents | |
children | 76251d1ccdcc |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9bf25dbe00ad |
---|---|
1 """ | |
2 Find the optimal combination of hyperparameters | |
3 """ | |
4 | |
5 import numpy as np | |
6 from hyperopt import fmin, tpe, hp, STATUS_OK, Trials | |
7 | |
8 from keras.models import Sequential | |
9 from keras.layers import Dense, GRU, Dropout | |
10 from keras.layers.embeddings import Embedding | |
11 from keras.layers.core import SpatialDropout1D | |
12 from keras.optimizers import RMSprop | |
13 from keras.callbacks import EarlyStopping | |
14 | |
15 import utils | |
16 | |
17 | |
18 class HyperparameterOptimisation: | |
19 | |
20 @classmethod | |
21 def __init__(self): | |
22 """ Init method. """ | |
23 | |
24 @classmethod | |
25 def train_model(self, config, reverse_dictionary, train_data, train_labels, test_data, test_labels, class_weights): | |
26 """ | |
27 Train a model and report accuracy | |
28 """ | |
29 l_recurrent_activations = config["activation_recurrent"].split(",") | |
30 l_output_activations = config["activation_output"].split(",") | |
31 | |
32 # convert items to integer | |
33 l_batch_size = list(map(int, config["batch_size"].split(","))) | |
34 l_embedding_size = list(map(int, config["embedding_size"].split(","))) | |
35 l_units = list(map(int, config["units"].split(","))) | |
36 | |
37 # convert items to float | |
38 l_learning_rate = list(map(float, config["learning_rate"].split(","))) | |
39 l_dropout = list(map(float, config["dropout"].split(","))) | |
40 l_spatial_dropout = list(map(float, config["spatial_dropout"].split(","))) | |
41 l_recurrent_dropout = list(map(float, config["recurrent_dropout"].split(","))) | |
42 | |
43 optimize_n_epochs = int(config["optimize_n_epochs"]) | |
44 validation_split = float(config["validation_share"]) | |
45 | |
46 # get dimensions | |
47 dimensions = len(reverse_dictionary) + 1 | |
48 best_model_params = dict() | |
49 early_stopping = EarlyStopping(monitor='val_loss', mode='min', min_delta=1e-4, verbose=1, patience=1) | |
50 | |
51 # specify the search space for finding the best combination of parameters using Bayesian optimisation | |
52 params = { | |
53 "embedding_size": hp.quniform("embedding_size", l_embedding_size[0], l_embedding_size[1], 1), | |
54 "units": hp.quniform("units", l_units[0], l_units[1], 1), | |
55 "batch_size": hp.quniform("batch_size", l_batch_size[0], l_batch_size[1], 1), | |
56 "activation_recurrent": hp.choice("activation_recurrent", l_recurrent_activations), | |
57 "activation_output": hp.choice("activation_output", l_output_activations), | |
58 "learning_rate": hp.loguniform("learning_rate", np.log(l_learning_rate[0]), np.log(l_learning_rate[1])), | |
59 "dropout": hp.uniform("dropout", l_dropout[0], l_dropout[1]), | |
60 "spatial_dropout": hp.uniform("spatial_dropout", l_spatial_dropout[0], l_spatial_dropout[1]), | |
61 "recurrent_dropout": hp.uniform("recurrent_dropout", l_recurrent_dropout[0], l_recurrent_dropout[1]) | |
62 } | |
63 | |
64 def create_model(params): | |
65 model = Sequential() | |
66 model.add(Embedding(dimensions, int(params["embedding_size"]), mask_zero=True)) | |
67 model.add(SpatialDropout1D(params["spatial_dropout"])) | |
68 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=True, activation=params["activation_recurrent"])) | |
69 model.add(Dropout(params["dropout"])) | |
70 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=False, activation=params["activation_recurrent"])) | |
71 model.add(Dropout(params["dropout"])) | |
72 model.add(Dense(dimensions, activation=params["activation_output"])) | |
73 optimizer_rms = RMSprop(lr=params["learning_rate"]) | |
74 model.compile(loss=utils.weighted_loss(class_weights), optimizer=optimizer_rms) | |
75 model_fit = model.fit( | |
76 train_data, | |
77 train_labels, | |
78 batch_size=int(params["batch_size"]), | |
79 epochs=optimize_n_epochs, | |
80 shuffle="batch", | |
81 verbose=2, | |
82 validation_split=validation_split, | |
83 callbacks=[early_stopping] | |
84 ) | |
85 return {'loss': model_fit.history["val_loss"][-1], 'status': STATUS_OK} | |
86 # minimize the objective function using the set of parameters above4 | |
87 trials = Trials() | |
88 learned_params = fmin(create_model, params, trials=trials, algo=tpe.suggest, max_evals=int(config["max_evals"])) | |
89 print(learned_params) | |
90 # set the best params with respective values | |
91 for item in learned_params: | |
92 item_val = learned_params[item] | |
93 if item == 'activation_output': | |
94 best_model_params[item] = l_output_activations[item_val] | |
95 elif item == 'activation_recurrent': | |
96 best_model_params[item] = l_recurrent_activations[item_val] | |
97 else: | |
98 best_model_params[item] = item_val | |
99 return best_model_params |