Mercurial > repos > bgruening > sklearn_build_pipeline
comparison keras_deep_learning.py @ 10:775b004b7920 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author | bgruening |
---|---|
date | Fri, 09 Aug 2019 07:18:27 -0400 |
parents | |
children | 653be9c354ec |
comparison
equal
deleted
inserted
replaced
9:019bd8289224 | 10:775b004b7920 |
---|---|
1 import argparse | |
2 import json | |
3 import keras | |
4 import pandas as pd | |
5 import pickle | |
6 import six | |
7 import warnings | |
8 | |
9 from ast import literal_eval | |
10 from keras.models import Sequential, Model | |
11 from galaxy_ml.utils import try_get_attr, get_search_params | |
12 | |
13 | |
14 def _handle_shape(literal): | |
15 """Eval integer or list/tuple of integers from string | |
16 | |
17 Parameters: | |
18 ----------- | |
19 literal : str. | |
20 """ | |
21 literal = literal.strip() | |
22 if not literal: | |
23 return None | |
24 try: | |
25 return literal_eval(literal) | |
26 except NameError as e: | |
27 print(e) | |
28 return literal | |
29 | |
30 | |
31 def _handle_regularizer(literal): | |
32 """Construct regularizer from string literal | |
33 | |
34 Parameters | |
35 ---------- | |
36 literal : str. E.g. '(0.1, 0)' | |
37 """ | |
38 literal = literal.strip() | |
39 if not literal: | |
40 return None | |
41 | |
42 l1, l2 = literal_eval(literal) | |
43 | |
44 if not l1 and not l2: | |
45 return None | |
46 | |
47 if l1 is None: | |
48 l1 = 0. | |
49 if l2 is None: | |
50 l2 = 0. | |
51 | |
52 return keras.regularizers.l1_l2(l1=l1, l2=l2) | |
53 | |
54 | |
55 def _handle_constraint(config): | |
56 """Construct constraint from galaxy tool parameters. | |
57 Suppose correct dictionary format | |
58 | |
59 Parameters | |
60 ---------- | |
61 config : dict. E.g. | |
62 "bias_constraint": | |
63 {"constraint_options": | |
64 {"max_value":1.0, | |
65 "min_value":0.0, | |
66 "axis":"[0, 1, 2]" | |
67 }, | |
68 "constraint_type": | |
69 "MinMaxNorm" | |
70 } | |
71 """ | |
72 constraint_type = config['constraint_type'] | |
73 if constraint_type == 'None': | |
74 return None | |
75 | |
76 klass = getattr(keras.constraints, constraint_type) | |
77 options = config.get('constraint_options', {}) | |
78 if 'axis' in options: | |
79 options['axis'] = literal_eval(options['axis']) | |
80 | |
81 return klass(**options) | |
82 | |
83 | |
84 def _handle_lambda(literal): | |
85 return None | |
86 | |
87 | |
88 def _handle_layer_parameters(params): | |
89 """Access to handle all kinds of parameters | |
90 """ | |
91 for key, value in six.iteritems(params): | |
92 if value == 'None': | |
93 params[key] = None | |
94 continue | |
95 | |
96 if type(value) in [int, float, bool]\ | |
97 or (type(value) is str and value.isalpha()): | |
98 continue | |
99 | |
100 if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape', | |
101 'target_shape', 'dims', 'kernel_size', 'strides', | |
102 'dilation_rate', 'output_padding', 'cropping', 'size', | |
103 'padding', 'pool_size', 'axis', 'shared_axes']: | |
104 params[key] = _handle_shape(value) | |
105 | |
106 elif key.endswith('_regularizer'): | |
107 params[key] = _handle_regularizer(value) | |
108 | |
109 elif key.endswith('_constraint'): | |
110 params[key] = _handle_constraint(value) | |
111 | |
112 elif key == 'function': # No support for lambda/function eval | |
113 params.pop(key) | |
114 | |
115 return params | |
116 | |
117 | |
118 def get_sequential_model(config): | |
119 """Construct keras Sequential model from Galaxy tool parameters | |
120 | |
121 Parameters: | |
122 ----------- | |
123 config : dictionary, galaxy tool parameters loaded by JSON | |
124 """ | |
125 model = Sequential() | |
126 input_shape = _handle_shape(config['input_shape']) | |
127 layers = config['layers'] | |
128 for layer in layers: | |
129 options = layer['layer_selection'] | |
130 layer_type = options.pop('layer_type') | |
131 klass = getattr(keras.layers, layer_type) | |
132 other_options = options.pop('layer_options', {}) | |
133 options.update(other_options) | |
134 | |
135 # parameters needs special care | |
136 options = _handle_layer_parameters(options) | |
137 | |
138 # add input_shape to the first layer only | |
139 if not getattr(model, '_layers') and input_shape is not None: | |
140 options['input_shape'] = input_shape | |
141 | |
142 model.add(klass(**options)) | |
143 | |
144 return model | |
145 | |
146 | |
147 def get_functional_model(config): | |
148 """Construct keras functional model from Galaxy tool parameters | |
149 | |
150 Parameters | |
151 ----------- | |
152 config : dictionary, galaxy tool parameters loaded by JSON | |
153 """ | |
154 layers = config['layers'] | |
155 all_layers = [] | |
156 for layer in layers: | |
157 options = layer['layer_selection'] | |
158 layer_type = options.pop('layer_type') | |
159 klass = getattr(keras.layers, layer_type) | |
160 inbound_nodes = options.pop('inbound_nodes', None) | |
161 other_options = options.pop('layer_options', {}) | |
162 options.update(other_options) | |
163 | |
164 # parameters needs special care | |
165 options = _handle_layer_parameters(options) | |
166 # merge layers | |
167 if 'merging_layers' in options: | |
168 idxs = literal_eval(options.pop('merging_layers')) | |
169 merging_layers = [all_layers[i-1] for i in idxs] | |
170 new_layer = klass(**options)(merging_layers) | |
171 # non-input layers | |
172 elif inbound_nodes is not None: | |
173 new_layer = klass(**options)(all_layers[inbound_nodes-1]) | |
174 # input layers | |
175 else: | |
176 new_layer = klass(**options) | |
177 | |
178 all_layers.append(new_layer) | |
179 | |
180 input_indexes = _handle_shape(config['input_layers']) | |
181 input_layers = [all_layers[i-1] for i in input_indexes] | |
182 | |
183 output_indexes = _handle_shape(config['output_layers']) | |
184 output_layers = [all_layers[i-1] for i in output_indexes] | |
185 | |
186 return Model(inputs=input_layers, outputs=output_layers) | |
187 | |
188 | |
189 def get_batch_generator(config): | |
190 """Construct keras online data generator from Galaxy tool parameters | |
191 | |
192 Parameters | |
193 ----------- | |
194 config : dictionary, galaxy tool parameters loaded by JSON | |
195 """ | |
196 generator_type = config.pop('generator_type') | |
197 klass = try_get_attr('galaxy_ml.preprocessors', generator_type) | |
198 | |
199 if generator_type == 'GenomicIntervalBatchGenerator': | |
200 config['ref_genome_path'] = 'to_be_determined' | |
201 config['intervals_path'] = 'to_be_determined' | |
202 config['target_path'] = 'to_be_determined' | |
203 config['features'] = 'to_be_determined' | |
204 else: | |
205 config['fasta_path'] = 'to_be_determined' | |
206 | |
207 return klass(**config) | |
208 | |
209 | |
210 def config_keras_model(inputs, outfile): | |
211 """ config keras model layers and output JSON | |
212 | |
213 Parameters | |
214 ---------- | |
215 inputs : dict | |
216 loaded galaxy tool parameters from `keras_model_config` | |
217 tool. | |
218 outfile : str | |
219 Path to galaxy dataset containing keras model JSON. | |
220 """ | |
221 model_type = inputs['model_selection']['model_type'] | |
222 layers_config = inputs['model_selection'] | |
223 | |
224 if model_type == 'sequential': | |
225 model = get_sequential_model(layers_config) | |
226 else: | |
227 model = get_functional_model(layers_config) | |
228 | |
229 json_string = model.to_json() | |
230 | |
231 with open(outfile, 'w') as f: | |
232 f.write(json_string) | |
233 | |
234 | |
235 def build_keras_model(inputs, outfile, model_json, infile_weights=None, | |
236 batch_mode=False, outfile_params=None): | |
237 """ for `keras_model_builder` tool | |
238 | |
239 Parameters | |
240 ---------- | |
241 inputs : dict | |
242 loaded galaxy tool parameters from `keras_model_builder` tool. | |
243 outfile : str | |
244 Path to galaxy dataset containing the keras_galaxy model output. | |
245 model_json : str | |
246 Path to dataset containing keras model JSON. | |
247 infile_weights : str or None | |
248 If string, path to dataset containing model weights. | |
249 batch_mode : bool, default=False | |
250 Whether to build online batch classifier. | |
251 outfile_params : str, default=None | |
252 File path to search parameters output. | |
253 """ | |
254 with open(model_json, 'r') as f: | |
255 json_model = json.load(f) | |
256 | |
257 config = json_model['config'] | |
258 | |
259 options = {} | |
260 | |
261 if json_model['class_name'] == 'Sequential': | |
262 options['model_type'] = 'sequential' | |
263 klass = Sequential | |
264 elif json_model['class_name'] == 'Model': | |
265 options['model_type'] = 'functional' | |
266 klass = Model | |
267 else: | |
268 raise ValueError("Unknow Keras model class: %s" | |
269 % json_model['class_name']) | |
270 | |
271 # load prefitted model | |
272 if inputs['mode_selection']['mode_type'] == 'prefitted': | |
273 estimator = klass.from_config(config) | |
274 estimator.load_weights(infile_weights) | |
275 # build train model | |
276 else: | |
277 cls_name = inputs['mode_selection']['learning_type'] | |
278 klass = try_get_attr('galaxy_ml.keras_galaxy_models', cls_name) | |
279 | |
280 options['loss'] = (inputs['mode_selection'] | |
281 ['compile_params']['loss']) | |
282 options['optimizer'] =\ | |
283 (inputs['mode_selection']['compile_params'] | |
284 ['optimizer_selection']['optimizer_type']).lower() | |
285 | |
286 options.update((inputs['mode_selection']['compile_params'] | |
287 ['optimizer_selection']['optimizer_options'])) | |
288 | |
289 train_metrics = (inputs['mode_selection']['compile_params'] | |
290 ['metrics']).split(',') | |
291 if train_metrics[-1] == 'none': | |
292 train_metrics = train_metrics[:-1] | |
293 options['metrics'] = train_metrics | |
294 | |
295 options.update(inputs['mode_selection']['fit_params']) | |
296 options['seed'] = inputs['mode_selection']['random_seed'] | |
297 | |
298 if batch_mode: | |
299 generator = get_batch_generator(inputs['mode_selection'] | |
300 ['generator_selection']) | |
301 options['data_batch_generator'] = generator | |
302 options['prediction_steps'] = \ | |
303 inputs['mode_selection']['prediction_steps'] | |
304 options['class_positive_factor'] = \ | |
305 inputs['mode_selection']['class_positive_factor'] | |
306 estimator = klass(config, **options) | |
307 if outfile_params: | |
308 hyper_params = get_search_params(estimator) | |
309 # TODO: remove this after making `verbose` tunable | |
310 for h_param in hyper_params: | |
311 if h_param[1].endswith('verbose'): | |
312 h_param[0] = '@' | |
313 df = pd.DataFrame(hyper_params, columns=['', 'Parameter', 'Value']) | |
314 df.to_csv(outfile_params, sep='\t', index=False) | |
315 | |
316 print(repr(estimator)) | |
317 # save model by pickle | |
318 with open(outfile, 'wb') as f: | |
319 pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL) | |
320 | |
321 | |
322 if __name__ == '__main__': | |
323 warnings.simplefilter('ignore') | |
324 | |
325 aparser = argparse.ArgumentParser() | |
326 aparser.add_argument("-i", "--inputs", dest="inputs", required=True) | |
327 aparser.add_argument("-m", "--model_json", dest="model_json") | |
328 aparser.add_argument("-t", "--tool_id", dest="tool_id") | |
329 aparser.add_argument("-w", "--infile_weights", dest="infile_weights") | |
330 aparser.add_argument("-o", "--outfile", dest="outfile") | |
331 aparser.add_argument("-p", "--outfile_params", dest="outfile_params") | |
332 args = aparser.parse_args() | |
333 | |
334 input_json_path = args.inputs | |
335 with open(input_json_path, 'r') as param_handler: | |
336 inputs = json.load(param_handler) | |
337 | |
338 tool_id = args.tool_id | |
339 outfile = args.outfile | |
340 outfile_params = args.outfile_params | |
341 model_json = args.model_json | |
342 infile_weights = args.infile_weights | |
343 | |
344 # for keras_model_config tool | |
345 if tool_id == 'keras_model_config': | |
346 config_keras_model(inputs, outfile) | |
347 | |
348 # for keras_model_builder tool | |
349 else: | |
350 batch_mode = False | |
351 if tool_id == 'keras_batch_models': | |
352 batch_mode = True | |
353 | |
354 build_keras_model(inputs=inputs, | |
355 model_json=model_json, | |
356 infile_weights=infile_weights, | |
357 batch_mode=batch_mode, | |
358 outfile=outfile, | |
359 outfile_params=outfile_params) |