comparison optimise_hyperparameters.py @ 3:5b3c08710e47 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
author bgruening
date Sat, 09 May 2020 05:38:23 -0400
parents 76251d1ccdcc
children afec8c595124
comparison
equal deleted inserted replaced
2:76251d1ccdcc 3:5b3c08710e47
15 import utils 15 import utils
16 16
17 17
18 class HyperparameterOptimisation: 18 class HyperparameterOptimisation:
19 19
20 @classmethod
21 def __init__(self): 20 def __init__(self):
22 """ Init method. """ 21 """ Init method. """
23 22
24 @classmethod 23 def train_model(self, config, reverse_dictionary, train_data, train_labels, test_data, test_labels, l_tool_tr_samples, class_weights):
25 def train_model(self, config, reverse_dictionary, train_data, train_labels, class_weights):
26 """ 24 """
27 Train a model and report accuracy 25 Train a model and report accuracy
28 """ 26 """
29 l_recurrent_activations = config["activation_recurrent"].split(",")
30 l_output_activations = config["activation_output"].split(",")
31
32 # convert items to integer 27 # convert items to integer
33 l_batch_size = list(map(int, config["batch_size"].split(","))) 28 l_batch_size = list(map(int, config["batch_size"].split(",")))
34 l_embedding_size = list(map(int, config["embedding_size"].split(","))) 29 l_embedding_size = list(map(int, config["embedding_size"].split(",")))
35 l_units = list(map(int, config["units"].split(","))) 30 l_units = list(map(int, config["units"].split(",")))
36 31
39 l_dropout = list(map(float, config["dropout"].split(","))) 34 l_dropout = list(map(float, config["dropout"].split(",")))
40 l_spatial_dropout = list(map(float, config["spatial_dropout"].split(","))) 35 l_spatial_dropout = list(map(float, config["spatial_dropout"].split(",")))
41 l_recurrent_dropout = list(map(float, config["recurrent_dropout"].split(","))) 36 l_recurrent_dropout = list(map(float, config["recurrent_dropout"].split(",")))
42 37
43 optimize_n_epochs = int(config["optimize_n_epochs"]) 38 optimize_n_epochs = int(config["optimize_n_epochs"])
44 validation_split = float(config["validation_share"])
45 39
46 # get dimensions 40 # get dimensions
47 dimensions = len(reverse_dictionary) + 1 41 dimensions = len(reverse_dictionary) + 1
48 best_model_params = dict() 42 best_model_params = dict()
49 early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, min_delta=1e-4) 43 early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, min_delta=1e-1, restore_best_weights=True)
50 44
51 # specify the search space for finding the best combination of parameters using Bayesian optimisation 45 # specify the search space for finding the best combination of parameters using Bayesian optimisation
52 params = { 46 params = {
53 "embedding_size": hp.quniform("embedding_size", l_embedding_size[0], l_embedding_size[1], 1), 47 "embedding_size": hp.quniform("embedding_size", l_embedding_size[0], l_embedding_size[1], 1),
54 "units": hp.quniform("units", l_units[0], l_units[1], 1), 48 "units": hp.quniform("units", l_units[0], l_units[1], 1),
55 "batch_size": hp.quniform("batch_size", l_batch_size[0], l_batch_size[1], 1), 49 "batch_size": hp.quniform("batch_size", l_batch_size[0], l_batch_size[1], 1),
56 "activation_recurrent": hp.choice("activation_recurrent", l_recurrent_activations),
57 "activation_output": hp.choice("activation_output", l_output_activations),
58 "learning_rate": hp.loguniform("learning_rate", np.log(l_learning_rate[0]), np.log(l_learning_rate[1])), 50 "learning_rate": hp.loguniform("learning_rate", np.log(l_learning_rate[0]), np.log(l_learning_rate[1])),
59 "dropout": hp.uniform("dropout", l_dropout[0], l_dropout[1]), 51 "dropout": hp.uniform("dropout", l_dropout[0], l_dropout[1]),
60 "spatial_dropout": hp.uniform("spatial_dropout", l_spatial_dropout[0], l_spatial_dropout[1]), 52 "spatial_dropout": hp.uniform("spatial_dropout", l_spatial_dropout[0], l_spatial_dropout[1]),
61 "recurrent_dropout": hp.uniform("recurrent_dropout", l_recurrent_dropout[0], l_recurrent_dropout[1]) 53 "recurrent_dropout": hp.uniform("recurrent_dropout", l_recurrent_dropout[0], l_recurrent_dropout[1])
62 } 54 }
63 55
64 def create_model(params): 56 def create_model(params):
65 model = Sequential() 57 model = Sequential()
66 model.add(Embedding(dimensions, int(params["embedding_size"]), mask_zero=True)) 58 model.add(Embedding(dimensions, int(params["embedding_size"]), mask_zero=True))
67 model.add(SpatialDropout1D(params["spatial_dropout"])) 59 model.add(SpatialDropout1D(params["spatial_dropout"]))
68 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=True, activation=params["activation_recurrent"])) 60 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=True, activation="elu"))
69 model.add(Dropout(params["dropout"])) 61 model.add(Dropout(params["dropout"]))
70 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=False, activation=params["activation_recurrent"])) 62 model.add(GRU(int(params["units"]), dropout=params["dropout"], recurrent_dropout=params["recurrent_dropout"], return_sequences=False, activation="elu"))
71 model.add(Dropout(params["dropout"])) 63 model.add(Dropout(params["dropout"]))
72 model.add(Dense(dimensions, activation=params["activation_output"])) 64 model.add(Dense(2 * dimensions, activation="sigmoid"))
73 optimizer_rms = RMSprop(lr=params["learning_rate"]) 65 optimizer_rms = RMSprop(lr=params["learning_rate"])
66 batch_size = int(params["batch_size"])
74 model.compile(loss=utils.weighted_loss(class_weights), optimizer=optimizer_rms) 67 model.compile(loss=utils.weighted_loss(class_weights), optimizer=optimizer_rms)
75 model_fit = model.fit( 68 print(model.summary())
76 train_data, 69 model_fit = model.fit_generator(
77 train_labels, 70 utils.balanced_sample_generator(
78 batch_size=int(params["batch_size"]), 71 train_data,
72 train_labels,
73 batch_size,
74 l_tool_tr_samples
75 ),
76 steps_per_epoch=len(train_data) // batch_size,
79 epochs=optimize_n_epochs, 77 epochs=optimize_n_epochs,
80 shuffle="batch", 78 callbacks=[early_stopping],
79 validation_data=(test_data, test_labels),
81 verbose=2, 80 verbose=2,
82 validation_split=validation_split, 81 shuffle=True
83 callbacks=[early_stopping]
84 ) 82 )
85 return {'loss': model_fit.history["val_loss"][-1], 'status': STATUS_OK, 'model': model} 83 return {'loss': model_fit.history["val_loss"][-1], 'status': STATUS_OK, 'model': model}
86 # minimize the objective function using the set of parameters above 84 # minimize the objective function using the set of parameters above
87 trials = Trials() 85 trials = Trials()
88 learned_params = fmin(create_model, params, trials=trials, algo=tpe.suggest, max_evals=int(config["max_evals"])) 86 learned_params = fmin(create_model, params, trials=trials, algo=tpe.suggest, max_evals=int(config["max_evals"]))
89 best_model = trials.results[np.argmin([r['loss'] for r in trials.results])]['model'] 87 best_model = trials.results[np.argmin([r['loss'] for r in trials.results])]['model']
90
91 # set the best params with respective values 88 # set the best params with respective values
92 for item in learned_params: 89 for item in learned_params:
93 item_val = learned_params[item] 90 item_val = learned_params[item]
94 if item == 'activation_output': 91 best_model_params[item] = item_val
95 best_model_params[item] = l_output_activations[item_val]
96 elif item == 'activation_recurrent':
97 best_model_params[item] = l_recurrent_activations[item_val]
98 else:
99 best_model_params[item] = item_val
100 return best_model_params, best_model 92 return best_model_params, best_model