comparison keras_deep_learning.py @ 37:e76f6dfea5c9 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
author bgruening
date Sat, 01 May 2021 01:16:08 +0000
parents 420a4bf99244
children
comparison
equal deleted inserted replaced
36:420a4bf99244 37:e76f6dfea5c9
8 import pandas as pd 8 import pandas as pd
9 import six 9 import six
10 from galaxy_ml.utils import get_search_params, SafeEval, try_get_attr 10 from galaxy_ml.utils import get_search_params, SafeEval, try_get_attr
11 from keras.models import Model, Sequential 11 from keras.models import Model, Sequential
12 12
13
14 safe_eval = SafeEval() 13 safe_eval = SafeEval()
15 14
16 15
17 def _handle_shape(literal): 16 def _handle_shape(literal):
18 """Eval integer or list/tuple of integers from string 17 """
18 Eval integer or list/tuple of integers from string
19 19
20 Parameters: 20 Parameters:
21 ----------- 21 -----------
22 literal : str. 22 literal : str.
23 """ 23 """
30 print(e) 30 print(e)
31 return literal 31 return literal
32 32
33 33
34 def _handle_regularizer(literal): 34 def _handle_regularizer(literal):
35 """Construct regularizer from string literal 35 """
36 Construct regularizer from string literal
36 37
37 Parameters 38 Parameters
38 ---------- 39 ----------
39 literal : str. E.g. '(0.1, 0)' 40 literal : str. E.g. '(0.1, 0)'
40 """ 41 """
46 47
47 if not l1 and not l2: 48 if not l1 and not l2:
48 return None 49 return None
49 50
50 if l1 is None: 51 if l1 is None:
51 l1 = 0. 52 l1 = 0.0
52 if l2 is None: 53 if l2 is None:
53 l2 = 0. 54 l2 = 0.0
54 55
55 return keras.regularizers.l1_l2(l1=l1, l2=l2) 56 return keras.regularizers.l1_l2(l1=l1, l2=l2)
56 57
57 58
58 def _handle_constraint(config): 59 def _handle_constraint(config):
59 """Construct constraint from galaxy tool parameters. 60 """
61 Construct constraint from galaxy tool parameters.
60 Suppose correct dictionary format 62 Suppose correct dictionary format
61 63
62 Parameters 64 Parameters
63 ---------- 65 ----------
64 config : dict. E.g. 66 config : dict. E.g.
70 }, 72 },
71 "constraint_type": 73 "constraint_type":
72 "MinMaxNorm" 74 "MinMaxNorm"
73 } 75 }
74 """ 76 """
75 constraint_type = config['constraint_type'] 77 constraint_type = config["constraint_type"]
76 if constraint_type in ('None', ''): 78 if constraint_type in ("None", ""):
77 return None 79 return None
78 80
79 klass = getattr(keras.constraints, constraint_type) 81 klass = getattr(keras.constraints, constraint_type)
80 options = config.get('constraint_options', {}) 82 options = config.get("constraint_options", {})
81 if 'axis' in options: 83 if "axis" in options:
82 options['axis'] = literal_eval(options['axis']) 84 options["axis"] = literal_eval(options["axis"])
83 85
84 return klass(**options) 86 return klass(**options)
85 87
86 88
87 def _handle_lambda(literal): 89 def _handle_lambda(literal):
88 return None 90 return None
89 91
90 92
91 def _handle_layer_parameters(params): 93 def _handle_layer_parameters(params):
92 """Access to handle all kinds of parameters 94 """
95 Access to handle all kinds of parameters
93 """ 96 """
94 for key, value in six.iteritems(params): 97 for key, value in six.iteritems(params):
95 if value in ('None', ''): 98 if value in ("None", ""):
96 params[key] = None 99 params[key] = None
97 continue 100 continue
98 101
99 if type(value) in [int, float, bool]\ 102 if type(value) in [int, float, bool] or (
100 or (type(value) is str and value.isalpha()): 103 type(value) is str and value.isalpha()
104 ):
101 continue 105 continue
102 106
103 if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape', 107 if (
104 'target_shape', 'dims', 'kernel_size', 'strides', 108 key
105 'dilation_rate', 'output_padding', 'cropping', 'size', 109 in [
106 'padding', 'pool_size', 'axis', 'shared_axes'] \ 110 "input_shape",
107 and isinstance(value, str): 111 "noise_shape",
112 "shape",
113 "batch_shape",
114 "target_shape",
115 "dims",
116 "kernel_size",
117 "strides",
118 "dilation_rate",
119 "output_padding",
120 "cropping",
121 "size",
122 "padding",
123 "pool_size",
124 "axis",
125 "shared_axes",
126 ]
127 and isinstance(value, str)
128 ):
108 params[key] = _handle_shape(value) 129 params[key] = _handle_shape(value)
109 130
110 elif key.endswith('_regularizer') and isinstance(value, dict): 131 elif key.endswith("_regularizer") and isinstance(value, dict):
111 params[key] = _handle_regularizer(value) 132 params[key] = _handle_regularizer(value)
112 133
113 elif key.endswith('_constraint') and isinstance(value, dict): 134 elif key.endswith("_constraint") and isinstance(value, dict):
114 params[key] = _handle_constraint(value) 135 params[key] = _handle_constraint(value)
115 136
116 elif key == 'function': # No support for lambda/function eval 137 elif key == "function": # No support for lambda/function eval
117 params.pop(key) 138 params.pop(key)
118 139
119 return params 140 return params
120 141
121 142
122 def get_sequential_model(config): 143 def get_sequential_model(config):
123 """Construct keras Sequential model from Galaxy tool parameters 144 """
145 Construct keras Sequential model from Galaxy tool parameters
124 146
125 Parameters: 147 Parameters:
126 ----------- 148 -----------
127 config : dictionary, galaxy tool parameters loaded by JSON 149 config : dictionary, galaxy tool parameters loaded by JSON
128 """ 150 """
129 model = Sequential() 151 model = Sequential()
130 input_shape = _handle_shape(config['input_shape']) 152 input_shape = _handle_shape(config["input_shape"])
131 layers = config['layers'] 153 layers = config["layers"]
132 for layer in layers: 154 for layer in layers:
133 options = layer['layer_selection'] 155 options = layer["layer_selection"]
134 layer_type = options.pop('layer_type') 156 layer_type = options.pop("layer_type")
135 klass = getattr(keras.layers, layer_type) 157 klass = getattr(keras.layers, layer_type)
136 kwargs = options.pop('kwargs', '') 158 kwargs = options.pop("kwargs", "")
137 159
138 # parameters needs special care 160 # parameters needs special care
139 options = _handle_layer_parameters(options) 161 options = _handle_layer_parameters(options)
140 162
141 if kwargs: 163 if kwargs:
142 kwargs = safe_eval('dict(' + kwargs + ')') 164 kwargs = safe_eval("dict(" + kwargs + ")")
143 options.update(kwargs) 165 options.update(kwargs)
144 166
145 # add input_shape to the first layer only 167 # add input_shape to the first layer only
146 if not getattr(model, '_layers') and input_shape is not None: 168 if not getattr(model, "_layers") and input_shape is not None:
147 options['input_shape'] = input_shape 169 options["input_shape"] = input_shape
148 170
149 model.add(klass(**options)) 171 model.add(klass(**options))
150 172
151 return model 173 return model
152 174
153 175
154 def get_functional_model(config): 176 def get_functional_model(config):
155 """Construct keras functional model from Galaxy tool parameters 177 """
178 Construct keras functional model from Galaxy tool parameters
156 179
157 Parameters 180 Parameters
158 ----------- 181 -----------
159 config : dictionary, galaxy tool parameters loaded by JSON 182 config : dictionary, galaxy tool parameters loaded by JSON
160 """ 183 """
161 layers = config['layers'] 184 layers = config["layers"]
162 all_layers = [] 185 all_layers = []
163 for layer in layers: 186 for layer in layers:
164 options = layer['layer_selection'] 187 options = layer["layer_selection"]
165 layer_type = options.pop('layer_type') 188 layer_type = options.pop("layer_type")
166 klass = getattr(keras.layers, layer_type) 189 klass = getattr(keras.layers, layer_type)
167 inbound_nodes = options.pop('inbound_nodes', None) 190 inbound_nodes = options.pop("inbound_nodes", None)
168 kwargs = options.pop('kwargs', '') 191 kwargs = options.pop("kwargs", "")
169 192
170 # parameters needs special care 193 # parameters needs special care
171 options = _handle_layer_parameters(options) 194 options = _handle_layer_parameters(options)
172 195
173 if kwargs: 196 if kwargs:
174 kwargs = safe_eval('dict(' + kwargs + ')') 197 kwargs = safe_eval("dict(" + kwargs + ")")
175 options.update(kwargs) 198 options.update(kwargs)
176 199
177 # merge layers 200 # merge layers
178 if 'merging_layers' in options: 201 if "merging_layers" in options:
179 idxs = literal_eval(options.pop('merging_layers')) 202 idxs = literal_eval(options.pop("merging_layers"))
180 merging_layers = [all_layers[i - 1] for i in idxs] 203 merging_layers = [all_layers[i - 1] for i in idxs]
181 new_layer = klass(**options)(merging_layers) 204 new_layer = klass(**options)(merging_layers)
182 # non-input layers 205 # non-input layers
183 elif inbound_nodes is not None: 206 elif inbound_nodes is not None:
184 new_layer = klass(**options)(all_layers[inbound_nodes - 1]) 207 new_layer = klass(**options)(all_layers[inbound_nodes - 1])
186 else: 209 else:
187 new_layer = klass(**options) 210 new_layer = klass(**options)
188 211
189 all_layers.append(new_layer) 212 all_layers.append(new_layer)
190 213
191 input_indexes = _handle_shape(config['input_layers']) 214 input_indexes = _handle_shape(config["input_layers"])
192 input_layers = [all_layers[i - 1] for i in input_indexes] 215 input_layers = [all_layers[i - 1] for i in input_indexes]
193 216
194 output_indexes = _handle_shape(config['output_layers']) 217 output_indexes = _handle_shape(config["output_layers"])
195 output_layers = [all_layers[i - 1] for i in output_indexes] 218 output_layers = [all_layers[i - 1] for i in output_indexes]
196 219
197 return Model(inputs=input_layers, outputs=output_layers) 220 return Model(inputs=input_layers, outputs=output_layers)
198 221
199 222
200 def get_batch_generator(config): 223 def get_batch_generator(config):
201 """Construct keras online data generator from Galaxy tool parameters 224 """
225 Construct keras online data generator from Galaxy tool parameters
202 226
203 Parameters 227 Parameters
204 ----------- 228 -----------
205 config : dictionary, galaxy tool parameters loaded by JSON 229 config : dictionary, galaxy tool parameters loaded by JSON
206 """ 230 """
207 generator_type = config.pop('generator_type') 231 generator_type = config.pop("generator_type")
208 if generator_type == 'none': 232 if generator_type == "none":
209 return None 233 return None
210 234
211 klass = try_get_attr('galaxy_ml.preprocessors', generator_type) 235 klass = try_get_attr("galaxy_ml.preprocessors", generator_type)
212 236
213 if generator_type == 'GenomicIntervalBatchGenerator': 237 if generator_type == "GenomicIntervalBatchGenerator":
214 config['ref_genome_path'] = 'to_be_determined' 238 config["ref_genome_path"] = "to_be_determined"
215 config['intervals_path'] = 'to_be_determined' 239 config["intervals_path"] = "to_be_determined"
216 config['target_path'] = 'to_be_determined' 240 config["target_path"] = "to_be_determined"
217 config['features'] = 'to_be_determined' 241 config["features"] = "to_be_determined"
218 else: 242 else:
219 config['fasta_path'] = 'to_be_determined' 243 config["fasta_path"] = "to_be_determined"
220 244
221 return klass(**config) 245 return klass(**config)
222 246
223 247
224 def config_keras_model(inputs, outfile): 248 def config_keras_model(inputs, outfile):
225 """ config keras model layers and output JSON 249 """
250 config keras model layers and output JSON
226 251
227 Parameters 252 Parameters
228 ---------- 253 ----------
229 inputs : dict 254 inputs : dict
230 loaded galaxy tool parameters from `keras_model_config` 255 loaded galaxy tool parameters from `keras_model_config`
231 tool. 256 tool.
232 outfile : str 257 outfile : str
233 Path to galaxy dataset containing keras model JSON. 258 Path to galaxy dataset containing keras model JSON.
234 """ 259 """
235 model_type = inputs['model_selection']['model_type'] 260 model_type = inputs["model_selection"]["model_type"]
236 layers_config = inputs['model_selection'] 261 layers_config = inputs["model_selection"]
237 262
238 if model_type == 'sequential': 263 if model_type == "sequential":
239 model = get_sequential_model(layers_config) 264 model = get_sequential_model(layers_config)
240 else: 265 else:
241 model = get_functional_model(layers_config) 266 model = get_functional_model(layers_config)
242 267
243 json_string = model.to_json() 268 json_string = model.to_json()
244 269
245 with open(outfile, 'w') as f: 270 with open(outfile, "w") as f:
246 json.dump(json.loads(json_string), f, indent=2) 271 json.dump(json.loads(json_string), f, indent=2)
247 272
248 273
249 def build_keras_model(inputs, outfile, model_json, infile_weights=None, 274 def build_keras_model(
250 batch_mode=False, outfile_params=None): 275 inputs,
251 """ for `keras_model_builder` tool 276 outfile,
277 model_json,
278 infile_weights=None,
279 batch_mode=False,
280 outfile_params=None,
281 ):
282 """
283 for `keras_model_builder` tool
252 284
253 Parameters 285 Parameters
254 ---------- 286 ----------
255 inputs : dict 287 inputs : dict
256 loaded galaxy tool parameters from `keras_model_builder` tool. 288 loaded galaxy tool parameters from `keras_model_builder` tool.
263 batch_mode : bool, default=False 295 batch_mode : bool, default=False
264 Whether to build online batch classifier. 296 Whether to build online batch classifier.
265 outfile_params : str, default=None 297 outfile_params : str, default=None
266 File path to search parameters output. 298 File path to search parameters output.
267 """ 299 """
268 with open(model_json, 'r') as f: 300 with open(model_json, "r") as f:
269 json_model = json.load(f) 301 json_model = json.load(f)
270 302
271 config = json_model['config'] 303 config = json_model["config"]
272 304
273 options = {} 305 options = {}
274 306
275 if json_model['class_name'] == 'Sequential': 307 if json_model["class_name"] == "Sequential":
276 options['model_type'] = 'sequential' 308 options["model_type"] = "sequential"
277 klass = Sequential 309 klass = Sequential
278 elif json_model['class_name'] == 'Model': 310 elif json_model["class_name"] == "Model":
279 options['model_type'] = 'functional' 311 options["model_type"] = "functional"
280 klass = Model 312 klass = Model
281 else: 313 else:
282 raise ValueError("Unknow Keras model class: %s" 314 raise ValueError("Unknow Keras model class: %s" % json_model["class_name"])
283 % json_model['class_name'])
284 315
285 # load prefitted model 316 # load prefitted model
286 if inputs['mode_selection']['mode_type'] == 'prefitted': 317 if inputs["mode_selection"]["mode_type"] == "prefitted":
287 estimator = klass.from_config(config) 318 estimator = klass.from_config(config)
288 estimator.load_weights(infile_weights) 319 estimator.load_weights(infile_weights)
289 # build train model 320 # build train model
290 else: 321 else:
291 cls_name = inputs['mode_selection']['learning_type'] 322 cls_name = inputs["mode_selection"]["learning_type"]
292 klass = try_get_attr('galaxy_ml.keras_galaxy_models', cls_name) 323 klass = try_get_attr("galaxy_ml.keras_galaxy_models", cls_name)
293 324
294 options['loss'] = (inputs['mode_selection'] 325 options["loss"] = inputs["mode_selection"]["compile_params"]["loss"]
295 ['compile_params']['loss']) 326 options["optimizer"] = (
296 options['optimizer'] =\ 327 inputs["mode_selection"]["compile_params"]["optimizer_selection"][
297 (inputs['mode_selection']['compile_params'] 328 "optimizer_type"
298 ['optimizer_selection']['optimizer_type']).lower() 329 ]
299 330 ).lower()
300 options.update((inputs['mode_selection']['compile_params'] 331
301 ['optimizer_selection']['optimizer_options'])) 332 options.update(
302 333 (
303 train_metrics = inputs['mode_selection']['compile_params']['metrics'] 334 inputs["mode_selection"]["compile_params"]["optimizer_selection"][
304 if train_metrics[-1] == 'none': 335 "optimizer_options"
336 ]
337 )
338 )
339
340 train_metrics = inputs["mode_selection"]["compile_params"]["metrics"]
341 if train_metrics[-1] == "none":
305 train_metrics = train_metrics[:-1] 342 train_metrics = train_metrics[:-1]
306 options['metrics'] = train_metrics 343 options["metrics"] = train_metrics
307 344
308 options.update(inputs['mode_selection']['fit_params']) 345 options.update(inputs["mode_selection"]["fit_params"])
309 options['seed'] = inputs['mode_selection']['random_seed'] 346 options["seed"] = inputs["mode_selection"]["random_seed"]
310 347
311 if batch_mode: 348 if batch_mode:
312 generator = get_batch_generator(inputs['mode_selection'] 349 generator = get_batch_generator(
313 ['generator_selection']) 350 inputs["mode_selection"]["generator_selection"]
314 options['data_batch_generator'] = generator 351 )
315 options['prediction_steps'] = \ 352 options["data_batch_generator"] = generator
316 inputs['mode_selection']['prediction_steps'] 353 options["prediction_steps"] = inputs["mode_selection"]["prediction_steps"]
317 options['class_positive_factor'] = \ 354 options["class_positive_factor"] = inputs["mode_selection"][
318 inputs['mode_selection']['class_positive_factor'] 355 "class_positive_factor"
356 ]
319 estimator = klass(config, **options) 357 estimator = klass(config, **options)
320 if outfile_params: 358 if outfile_params:
321 hyper_params = get_search_params(estimator) 359 hyper_params = get_search_params(estimator)
322 # TODO: remove this after making `verbose` tunable 360 # TODO: remove this after making `verbose` tunable
323 for h_param in hyper_params: 361 for h_param in hyper_params:
324 if h_param[1].endswith('verbose'): 362 if h_param[1].endswith("verbose"):
325 h_param[0] = '@' 363 h_param[0] = "@"
326 df = pd.DataFrame(hyper_params, columns=['', 'Parameter', 'Value']) 364 df = pd.DataFrame(hyper_params, columns=["", "Parameter", "Value"])
327 df.to_csv(outfile_params, sep='\t', index=False) 365 df.to_csv(outfile_params, sep="\t", index=False)
328 366
329 print(repr(estimator)) 367 print(repr(estimator))
330 # save model by pickle 368 # save model by pickle
331 with open(outfile, 'wb') as f: 369 with open(outfile, "wb") as f:
332 pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL) 370 pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL)
333 371
334 372
335 if __name__ == '__main__': 373 if __name__ == "__main__":
336 warnings.simplefilter('ignore') 374 warnings.simplefilter("ignore")
337 375
338 aparser = argparse.ArgumentParser() 376 aparser = argparse.ArgumentParser()
339 aparser.add_argument("-i", "--inputs", dest="inputs", required=True) 377 aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
340 aparser.add_argument("-m", "--model_json", dest="model_json") 378 aparser.add_argument("-m", "--model_json", dest="model_json")
341 aparser.add_argument("-t", "--tool_id", dest="tool_id") 379 aparser.add_argument("-t", "--tool_id", dest="tool_id")
343 aparser.add_argument("-o", "--outfile", dest="outfile") 381 aparser.add_argument("-o", "--outfile", dest="outfile")
344 aparser.add_argument("-p", "--outfile_params", dest="outfile_params") 382 aparser.add_argument("-p", "--outfile_params", dest="outfile_params")
345 args = aparser.parse_args() 383 args = aparser.parse_args()
346 384
347 input_json_path = args.inputs 385 input_json_path = args.inputs
348 with open(input_json_path, 'r') as param_handler: 386 with open(input_json_path, "r") as param_handler:
349 inputs = json.load(param_handler) 387 inputs = json.load(param_handler)
350 388
351 tool_id = args.tool_id 389 tool_id = args.tool_id
352 outfile = args.outfile 390 outfile = args.outfile
353 outfile_params = args.outfile_params 391 outfile_params = args.outfile_params
354 model_json = args.model_json 392 model_json = args.model_json
355 infile_weights = args.infile_weights 393 infile_weights = args.infile_weights
356 394
357 # for keras_model_config tool 395 # for keras_model_config tool
358 if tool_id == 'keras_model_config': 396 if tool_id == "keras_model_config":
359 config_keras_model(inputs, outfile) 397 config_keras_model(inputs, outfile)
360 398
361 # for keras_model_builder tool 399 # for keras_model_builder tool
362 else: 400 else:
363 batch_mode = False 401 batch_mode = False
364 if tool_id == 'keras_batch_models': 402 if tool_id == "keras_batch_models":
365 batch_mode = True 403 batch_mode = True
366 404
367 build_keras_model(inputs=inputs, 405 build_keras_model(
368 model_json=model_json, 406 inputs=inputs,
369 infile_weights=infile_weights, 407 model_json=model_json,
370 batch_mode=batch_mode, 408 infile_weights=infile_weights,
371 outfile=outfile, 409 batch_mode=batch_mode,
372 outfile_params=outfile_params) 410 outfile=outfile,
411 outfile_params=outfile_params,
412 )