comparison keras_deep_learning.py @ 19:28d51b976c29 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author bgruening
date Fri, 09 Aug 2019 07:21:31 -0400
parents
children 203b2ade8097
comparison
equal deleted inserted replaced
18:3badea963c3b 19:28d51b976c29
1 import argparse
2 import json
3 import keras
4 import pandas as pd
5 import pickle
6 import six
7 import warnings
8
9 from ast import literal_eval
10 from keras.models import Sequential, Model
11 from galaxy_ml.utils import try_get_attr, get_search_params
12
13
14 def _handle_shape(literal):
15 """Eval integer or list/tuple of integers from string
16
17 Parameters:
18 -----------
19 literal : str.
20 """
21 literal = literal.strip()
22 if not literal:
23 return None
24 try:
25 return literal_eval(literal)
26 except NameError as e:
27 print(e)
28 return literal
29
30
31 def _handle_regularizer(literal):
32 """Construct regularizer from string literal
33
34 Parameters
35 ----------
36 literal : str. E.g. '(0.1, 0)'
37 """
38 literal = literal.strip()
39 if not literal:
40 return None
41
42 l1, l2 = literal_eval(literal)
43
44 if not l1 and not l2:
45 return None
46
47 if l1 is None:
48 l1 = 0.
49 if l2 is None:
50 l2 = 0.
51
52 return keras.regularizers.l1_l2(l1=l1, l2=l2)
53
54
55 def _handle_constraint(config):
56 """Construct constraint from galaxy tool parameters.
57 Suppose correct dictionary format
58
59 Parameters
60 ----------
61 config : dict. E.g.
62 "bias_constraint":
63 {"constraint_options":
64 {"max_value":1.0,
65 "min_value":0.0,
66 "axis":"[0, 1, 2]"
67 },
68 "constraint_type":
69 "MinMaxNorm"
70 }
71 """
72 constraint_type = config['constraint_type']
73 if constraint_type == 'None':
74 return None
75
76 klass = getattr(keras.constraints, constraint_type)
77 options = config.get('constraint_options', {})
78 if 'axis' in options:
79 options['axis'] = literal_eval(options['axis'])
80
81 return klass(**options)
82
83
84 def _handle_lambda(literal):
85 return None
86
87
88 def _handle_layer_parameters(params):
89 """Access to handle all kinds of parameters
90 """
91 for key, value in six.iteritems(params):
92 if value == 'None':
93 params[key] = None
94 continue
95
96 if type(value) in [int, float, bool]\
97 or (type(value) is str and value.isalpha()):
98 continue
99
100 if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape',
101 'target_shape', 'dims', 'kernel_size', 'strides',
102 'dilation_rate', 'output_padding', 'cropping', 'size',
103 'padding', 'pool_size', 'axis', 'shared_axes']:
104 params[key] = _handle_shape(value)
105
106 elif key.endswith('_regularizer'):
107 params[key] = _handle_regularizer(value)
108
109 elif key.endswith('_constraint'):
110 params[key] = _handle_constraint(value)
111
112 elif key == 'function': # No support for lambda/function eval
113 params.pop(key)
114
115 return params
116
117
118 def get_sequential_model(config):
119 """Construct keras Sequential model from Galaxy tool parameters
120
121 Parameters:
122 -----------
123 config : dictionary, galaxy tool parameters loaded by JSON
124 """
125 model = Sequential()
126 input_shape = _handle_shape(config['input_shape'])
127 layers = config['layers']
128 for layer in layers:
129 options = layer['layer_selection']
130 layer_type = options.pop('layer_type')
131 klass = getattr(keras.layers, layer_type)
132 other_options = options.pop('layer_options', {})
133 options.update(other_options)
134
135 # parameters needs special care
136 options = _handle_layer_parameters(options)
137
138 # add input_shape to the first layer only
139 if not getattr(model, '_layers') and input_shape is not None:
140 options['input_shape'] = input_shape
141
142 model.add(klass(**options))
143
144 return model
145
146
147 def get_functional_model(config):
148 """Construct keras functional model from Galaxy tool parameters
149
150 Parameters
151 -----------
152 config : dictionary, galaxy tool parameters loaded by JSON
153 """
154 layers = config['layers']
155 all_layers = []
156 for layer in layers:
157 options = layer['layer_selection']
158 layer_type = options.pop('layer_type')
159 klass = getattr(keras.layers, layer_type)
160 inbound_nodes = options.pop('inbound_nodes', None)
161 other_options = options.pop('layer_options', {})
162 options.update(other_options)
163
164 # parameters needs special care
165 options = _handle_layer_parameters(options)
166 # merge layers
167 if 'merging_layers' in options:
168 idxs = literal_eval(options.pop('merging_layers'))
169 merging_layers = [all_layers[i-1] for i in idxs]
170 new_layer = klass(**options)(merging_layers)
171 # non-input layers
172 elif inbound_nodes is not None:
173 new_layer = klass(**options)(all_layers[inbound_nodes-1])
174 # input layers
175 else:
176 new_layer = klass(**options)
177
178 all_layers.append(new_layer)
179
180 input_indexes = _handle_shape(config['input_layers'])
181 input_layers = [all_layers[i-1] for i in input_indexes]
182
183 output_indexes = _handle_shape(config['output_layers'])
184 output_layers = [all_layers[i-1] for i in output_indexes]
185
186 return Model(inputs=input_layers, outputs=output_layers)
187
188
189 def get_batch_generator(config):
190 """Construct keras online data generator from Galaxy tool parameters
191
192 Parameters
193 -----------
194 config : dictionary, galaxy tool parameters loaded by JSON
195 """
196 generator_type = config.pop('generator_type')
197 klass = try_get_attr('galaxy_ml.preprocessors', generator_type)
198
199 if generator_type == 'GenomicIntervalBatchGenerator':
200 config['ref_genome_path'] = 'to_be_determined'
201 config['intervals_path'] = 'to_be_determined'
202 config['target_path'] = 'to_be_determined'
203 config['features'] = 'to_be_determined'
204 else:
205 config['fasta_path'] = 'to_be_determined'
206
207 return klass(**config)
208
209
210 def config_keras_model(inputs, outfile):
211 """ config keras model layers and output JSON
212
213 Parameters
214 ----------
215 inputs : dict
216 loaded galaxy tool parameters from `keras_model_config`
217 tool.
218 outfile : str
219 Path to galaxy dataset containing keras model JSON.
220 """
221 model_type = inputs['model_selection']['model_type']
222 layers_config = inputs['model_selection']
223
224 if model_type == 'sequential':
225 model = get_sequential_model(layers_config)
226 else:
227 model = get_functional_model(layers_config)
228
229 json_string = model.to_json()
230
231 with open(outfile, 'w') as f:
232 f.write(json_string)
233
234
235 def build_keras_model(inputs, outfile, model_json, infile_weights=None,
236 batch_mode=False, outfile_params=None):
237 """ for `keras_model_builder` tool
238
239 Parameters
240 ----------
241 inputs : dict
242 loaded galaxy tool parameters from `keras_model_builder` tool.
243 outfile : str
244 Path to galaxy dataset containing the keras_galaxy model output.
245 model_json : str
246 Path to dataset containing keras model JSON.
247 infile_weights : str or None
248 If string, path to dataset containing model weights.
249 batch_mode : bool, default=False
250 Whether to build online batch classifier.
251 outfile_params : str, default=None
252 File path to search parameters output.
253 """
254 with open(model_json, 'r') as f:
255 json_model = json.load(f)
256
257 config = json_model['config']
258
259 options = {}
260
261 if json_model['class_name'] == 'Sequential':
262 options['model_type'] = 'sequential'
263 klass = Sequential
264 elif json_model['class_name'] == 'Model':
265 options['model_type'] = 'functional'
266 klass = Model
267 else:
268 raise ValueError("Unknow Keras model class: %s"
269 % json_model['class_name'])
270
271 # load prefitted model
272 if inputs['mode_selection']['mode_type'] == 'prefitted':
273 estimator = klass.from_config(config)
274 estimator.load_weights(infile_weights)
275 # build train model
276 else:
277 cls_name = inputs['mode_selection']['learning_type']
278 klass = try_get_attr('galaxy_ml.keras_galaxy_models', cls_name)
279
280 options['loss'] = (inputs['mode_selection']
281 ['compile_params']['loss'])
282 options['optimizer'] =\
283 (inputs['mode_selection']['compile_params']
284 ['optimizer_selection']['optimizer_type']).lower()
285
286 options.update((inputs['mode_selection']['compile_params']
287 ['optimizer_selection']['optimizer_options']))
288
289 train_metrics = (inputs['mode_selection']['compile_params']
290 ['metrics']).split(',')
291 if train_metrics[-1] == 'none':
292 train_metrics = train_metrics[:-1]
293 options['metrics'] = train_metrics
294
295 options.update(inputs['mode_selection']['fit_params'])
296 options['seed'] = inputs['mode_selection']['random_seed']
297
298 if batch_mode:
299 generator = get_batch_generator(inputs['mode_selection']
300 ['generator_selection'])
301 options['data_batch_generator'] = generator
302 options['prediction_steps'] = \
303 inputs['mode_selection']['prediction_steps']
304 options['class_positive_factor'] = \
305 inputs['mode_selection']['class_positive_factor']
306 estimator = klass(config, **options)
307 if outfile_params:
308 hyper_params = get_search_params(estimator)
309 # TODO: remove this after making `verbose` tunable
310 for h_param in hyper_params:
311 if h_param[1].endswith('verbose'):
312 h_param[0] = '@'
313 df = pd.DataFrame(hyper_params, columns=['', 'Parameter', 'Value'])
314 df.to_csv(outfile_params, sep='\t', index=False)
315
316 print(repr(estimator))
317 # save model by pickle
318 with open(outfile, 'wb') as f:
319 pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL)
320
321
322 if __name__ == '__main__':
323 warnings.simplefilter('ignore')
324
325 aparser = argparse.ArgumentParser()
326 aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
327 aparser.add_argument("-m", "--model_json", dest="model_json")
328 aparser.add_argument("-t", "--tool_id", dest="tool_id")
329 aparser.add_argument("-w", "--infile_weights", dest="infile_weights")
330 aparser.add_argument("-o", "--outfile", dest="outfile")
331 aparser.add_argument("-p", "--outfile_params", dest="outfile_params")
332 args = aparser.parse_args()
333
334 input_json_path = args.inputs
335 with open(input_json_path, 'r') as param_handler:
336 inputs = json.load(param_handler)
337
338 tool_id = args.tool_id
339 outfile = args.outfile
340 outfile_params = args.outfile_params
341 model_json = args.model_json
342 infile_weights = args.infile_weights
343
344 # for keras_model_config tool
345 if tool_id == 'keras_model_config':
346 config_keras_model(inputs, outfile)
347
348 # for keras_model_builder tool
349 else:
350 batch_mode = False
351 if tool_id == 'keras_batch_models':
352 batch_mode = True
353
354 build_keras_model(inputs=inputs,
355 model_json=model_json,
356 infile_weights=infile_weights,
357 batch_mode=batch_mode,
358 outfile=outfile,
359 outfile_params=outfile_params)