diff keras_deep_learning.py @ 11:caf7d2b71a48 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
author bgruening
date Sat, 01 May 2021 01:47:26 +0000
parents a9e0b963b7bb
children 2eb5c017958d
line wrap: on
line diff
--- a/keras_deep_learning.py	Tue Apr 13 22:04:06 2021 +0000
+++ b/keras_deep_learning.py	Sat May 01 01:47:26 2021 +0000
@@ -10,12 +10,12 @@
 from galaxy_ml.utils import get_search_params, SafeEval, try_get_attr
 from keras.models import Model, Sequential
 
-
 safe_eval = SafeEval()
 
 
 def _handle_shape(literal):
-    """Eval integer or list/tuple of integers from string
+    """
+    Eval integer or list/tuple of integers from string
 
     Parameters:
     -----------
@@ -32,7 +32,8 @@
 
 
 def _handle_regularizer(literal):
-    """Construct regularizer from string literal
+    """
+    Construct regularizer from string literal
 
     Parameters
     ----------
@@ -48,15 +49,16 @@
         return None
 
     if l1 is None:
-        l1 = 0.
+        l1 = 0.0
     if l2 is None:
-        l2 = 0.
+        l2 = 0.0
 
     return keras.regularizers.l1_l2(l1=l1, l2=l2)
 
 
 def _handle_constraint(config):
-    """Construct constraint from galaxy tool parameters.
+    """
+    Construct constraint from galaxy tool parameters.
     Suppose correct dictionary format
 
     Parameters
@@ -72,14 +74,14 @@
                 "MinMaxNorm"
             }
     """
-    constraint_type = config['constraint_type']
-    if constraint_type in ('None', ''):
+    constraint_type = config["constraint_type"]
+    if constraint_type in ("None", ""):
         return None
 
     klass = getattr(keras.constraints, constraint_type)
-    options = config.get('constraint_options', {})
-    if 'axis' in options:
-        options['axis'] = literal_eval(options['axis'])
+    options = config.get("constraint_options", {})
+    if "axis" in options:
+        options["axis"] = literal_eval(options["axis"])
 
     return klass(**options)
 
@@ -89,62 +91,82 @@
 
 
 def _handle_layer_parameters(params):
-    """Access to handle all kinds of parameters
+    """
+    Access to handle all kinds of parameters
     """
     for key, value in six.iteritems(params):
-        if value in ('None', ''):
+        if value in ("None", ""):
             params[key] = None
             continue
 
-        if type(value) in [int, float, bool]\
-                or (type(value) is str and value.isalpha()):
+        if type(value) in [int, float, bool] or (
+            type(value) is str and value.isalpha()
+        ):
             continue
 
-        if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape',
-                   'target_shape', 'dims', 'kernel_size', 'strides',
-                   'dilation_rate', 'output_padding', 'cropping', 'size',
-                   'padding', 'pool_size', 'axis', 'shared_axes'] \
-                and isinstance(value, str):
+        if (
+            key
+            in [
+                "input_shape",
+                "noise_shape",
+                "shape",
+                "batch_shape",
+                "target_shape",
+                "dims",
+                "kernel_size",
+                "strides",
+                "dilation_rate",
+                "output_padding",
+                "cropping",
+                "size",
+                "padding",
+                "pool_size",
+                "axis",
+                "shared_axes",
+            ]
+            and isinstance(value, str)
+        ):
             params[key] = _handle_shape(value)
 
-        elif key.endswith('_regularizer') and isinstance(value, dict):
+        elif key.endswith("_regularizer") and isinstance(value, dict):
             params[key] = _handle_regularizer(value)
 
-        elif key.endswith('_constraint') and isinstance(value, dict):
+        elif key.endswith("_constraint") and isinstance(value, dict):
             params[key] = _handle_constraint(value)
 
-        elif key == 'function':  # No support for lambda/function eval
+        elif key == "function":  # No support for lambda/function eval
             params.pop(key)
 
     return params
 
 
 def get_sequential_model(config):
-    """Construct keras Sequential model from Galaxy tool parameters
+    """
+    Construct keras Sequential model from Galaxy tool parameters
 
     Parameters:
     -----------
     config : dictionary, galaxy tool parameters loaded by JSON
     """
     model = Sequential()
-    input_shape = _handle_shape(config['input_shape'])
-    layers = config['layers']
+    input_shape = _handle_shape(config["input_shape"])
+    layers = config["layers"]
     for layer in layers:
-        options = layer['layer_selection']
-        layer_type = options.pop('layer_type')
+        options = layer["layer_selection"]
+        layer_type = options.pop("layer_type")
         klass = getattr(keras.layers, layer_type)
-        kwargs = options.pop('kwargs', '')
+        kwargs = options.pop("kwargs", "")
 
         # parameters needs special care
         options = _handle_layer_parameters(options)
 
         if kwargs:
-            kwargs = safe_eval('dict(' + kwargs + ')')
+            kwargs = safe_eval("dict(" + kwargs + ")")
             options.update(kwargs)
 
         # add input_shape to the first layer only
-        if not getattr(model, '_layers') and input_shape is not None:
-            options['input_shape'] = input_shape
+        if not getattr(model, "_layers") and input_shape is not None:
+            options["input_shape"] = input_shape
 
         model.add(klass(**options))
 
@@ -152,31 +174,32 @@
 
 
 def get_functional_model(config):
-    """Construct keras functional model from Galaxy tool parameters
+    """
+    Construct keras functional model from Galaxy tool parameters
 
     Parameters
     -----------
     config : dictionary, galaxy tool parameters loaded by JSON
     """
-    layers = config['layers']
+    layers = config["layers"]
     all_layers = []
     for layer in layers:
-        options = layer['layer_selection']
-        layer_type = options.pop('layer_type')
+        options = layer["layer_selection"]
+        layer_type = options.pop("layer_type")
         klass = getattr(keras.layers, layer_type)
-        inbound_nodes = options.pop('inbound_nodes', None)
-        kwargs = options.pop('kwargs', '')
+        inbound_nodes = options.pop("inbound_nodes", None)
+        kwargs = options.pop("kwargs", "")
 
         # parameters needs special care
         options = _handle_layer_parameters(options)
 
         if kwargs:
-            kwargs = safe_eval('dict(' + kwargs + ')')
+            kwargs = safe_eval("dict(" + kwargs + ")")
             options.update(kwargs)
 
         # merge layers
-        if 'merging_layers' in options:
-            idxs = literal_eval(options.pop('merging_layers'))
+        if "merging_layers" in options:
+            idxs = literal_eval(options.pop("merging_layers"))
             merging_layers = [all_layers[i - 1] for i in idxs]
             new_layer = klass(**options)(merging_layers)
         # non-input layers
@@ -188,41 +211,43 @@
 
         all_layers.append(new_layer)
 
-    input_indexes = _handle_shape(config['input_layers'])
+    input_indexes = _handle_shape(config["input_layers"])
     input_layers = [all_layers[i - 1] for i in input_indexes]
 
-    output_indexes = _handle_shape(config['output_layers'])
+    output_indexes = _handle_shape(config["output_layers"])
     output_layers = [all_layers[i - 1] for i in output_indexes]
 
     return Model(inputs=input_layers, outputs=output_layers)
 
 
 def get_batch_generator(config):
-    """Construct keras online data generator from Galaxy tool parameters
+    """
+    Construct keras online data generator from Galaxy tool parameters
 
     Parameters
     -----------
     config : dictionary, galaxy tool parameters loaded by JSON
     """
-    generator_type = config.pop('generator_type')
-    if generator_type == 'none':
+    generator_type = config.pop("generator_type")
+    if generator_type == "none":
         return None
 
-    klass = try_get_attr('galaxy_ml.preprocessors', generator_type)
+    klass = try_get_attr("galaxy_ml.preprocessors", generator_type)
 
-    if generator_type == 'GenomicIntervalBatchGenerator':
-        config['ref_genome_path'] = 'to_be_determined'
-        config['intervals_path'] = 'to_be_determined'
-        config['target_path'] = 'to_be_determined'
-        config['features'] = 'to_be_determined'
+    if generator_type == "GenomicIntervalBatchGenerator":
+        config["ref_genome_path"] = "to_be_determined"
+        config["intervals_path"] = "to_be_determined"
+        config["target_path"] = "to_be_determined"
+        config["features"] = "to_be_determined"
     else:
-        config['fasta_path'] = 'to_be_determined'
+        config["fasta_path"] = "to_be_determined"
 
     return klass(**config)
 
 
 def config_keras_model(inputs, outfile):
-    """ config keras model layers and output JSON
+    """
+    config keras model layers and output JSON
 
     Parameters
     ----------
@@ -232,23 +257,30 @@
     outfile : str
         Path to galaxy dataset containing keras model JSON.
     """
-    model_type = inputs['model_selection']['model_type']
-    layers_config = inputs['model_selection']
+    model_type = inputs["model_selection"]["model_type"]
+    layers_config = inputs["model_selection"]
 
-    if model_type == 'sequential':
+    if model_type == "sequential":
         model = get_sequential_model(layers_config)
     else:
         model = get_functional_model(layers_config)
 
     json_string = model.to_json()
 
-    with open(outfile, 'w') as f:
+    with open(outfile, "w") as f:
         json.dump(json.loads(json_string), f, indent=2)
 
 
-def build_keras_model(inputs, outfile, model_json, infile_weights=None,
-                      batch_mode=False, outfile_params=None):
-    """ for `keras_model_builder` tool
+def build_keras_model(
+    inputs,
+    outfile,
+    model_json,
+    infile_weights=None,
+    batch_mode=False,
+    outfile_params=None,
+):
+    """
+    for `keras_model_builder` tool
 
     Parameters
     ----------
@@ -265,75 +297,81 @@
     outfile_params : str, default=None
         File path to search parameters output.
     """
-    with open(model_json, 'r') as f:
+    with open(model_json, "r") as f:
         json_model = json.load(f)
 
-    config = json_model['config']
+    config = json_model["config"]
 
     options = {}
 
-    if json_model['class_name'] == 'Sequential':
-        options['model_type'] = 'sequential'
+    if json_model["class_name"] == "Sequential":
+        options["model_type"] = "sequential"
         klass = Sequential
-    elif json_model['class_name'] == 'Model':
-        options['model_type'] = 'functional'
+    elif json_model["class_name"] == "Model":
+        options["model_type"] = "functional"
         klass = Model
     else:
-        raise ValueError("Unknow Keras model class: %s"
-                         % json_model['class_name'])
+        raise ValueError("Unknow Keras model class: %s" % json_model["class_name"])
 
     # load prefitted model
-    if inputs['mode_selection']['mode_type'] == 'prefitted':
+    if inputs["mode_selection"]["mode_type"] == "prefitted":
         estimator = klass.from_config(config)
         estimator.load_weights(infile_weights)
     # build train model
     else:
-        cls_name = inputs['mode_selection']['learning_type']
-        klass = try_get_attr('galaxy_ml.keras_galaxy_models', cls_name)
+        cls_name = inputs["mode_selection"]["learning_type"]
+        klass = try_get_attr("galaxy_ml.keras_galaxy_models", cls_name)
 
-        options['loss'] = (inputs['mode_selection']
-                           ['compile_params']['loss'])
-        options['optimizer'] =\
-            (inputs['mode_selection']['compile_params']
-             ['optimizer_selection']['optimizer_type']).lower()
+        options["loss"] = inputs["mode_selection"]["compile_params"]["loss"]
+        options["optimizer"] = (
+            inputs["mode_selection"]["compile_params"]["optimizer_selection"][
+                "optimizer_type"
+            ]
+        ).lower()
 
-        options.update((inputs['mode_selection']['compile_params']
-                        ['optimizer_selection']['optimizer_options']))
+        options.update(
+            (
+                inputs["mode_selection"]["compile_params"]["optimizer_selection"][
+                    "optimizer_options"
+                ]
+            )
+        )
 
-        train_metrics = inputs['mode_selection']['compile_params']['metrics']
-        if train_metrics[-1] == 'none':
+        train_metrics = inputs["mode_selection"]["compile_params"]["metrics"]
+        if train_metrics[-1] == "none":
             train_metrics = train_metrics[:-1]
-        options['metrics'] = train_metrics
+        options["metrics"] = train_metrics
 
-        options.update(inputs['mode_selection']['fit_params'])
-        options['seed'] = inputs['mode_selection']['random_seed']
+        options.update(inputs["mode_selection"]["fit_params"])
+        options["seed"] = inputs["mode_selection"]["random_seed"]
 
         if batch_mode:
-            generator = get_batch_generator(inputs['mode_selection']
-                                            ['generator_selection'])
-            options['data_batch_generator'] = generator
-            options['prediction_steps'] = \
-                inputs['mode_selection']['prediction_steps']
-            options['class_positive_factor'] = \
-                inputs['mode_selection']['class_positive_factor']
+            generator = get_batch_generator(
+                inputs["mode_selection"]["generator_selection"]
+            )
+            options["data_batch_generator"] = generator
+            options["prediction_steps"] = inputs["mode_selection"]["prediction_steps"]
+            options["class_positive_factor"] = inputs["mode_selection"][
+                "class_positive_factor"
+            ]
         estimator = klass(config, **options)
         if outfile_params:
             hyper_params = get_search_params(estimator)
             # TODO: remove this after making `verbose` tunable
             for h_param in hyper_params:
-                if h_param[1].endswith('verbose'):
-                    h_param[0] = '@'
-            df = pd.DataFrame(hyper_params, columns=['', 'Parameter', 'Value'])
-            df.to_csv(outfile_params, sep='\t', index=False)
+                if h_param[1].endswith("verbose"):
+                    h_param[0] = "@"
+            df = pd.DataFrame(hyper_params, columns=["", "Parameter", "Value"])
+            df.to_csv(outfile_params, sep="\t", index=False)
 
     print(repr(estimator))
     # save model by pickle
-    with open(outfile, 'wb') as f:
+    with open(outfile, "wb") as f:
         pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL)
 
 
-if __name__ == '__main__':
-    warnings.simplefilter('ignore')
+if __name__ == "__main__":
+    warnings.simplefilter("ignore")
 
     aparser = argparse.ArgumentParser()
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
@@ -345,7 +383,7 @@
     args = aparser.parse_args()
 
     input_json_path = args.inputs
-    with open(input_json_path, 'r') as param_handler:
+    with open(input_json_path, "r") as param_handler:
         inputs = json.load(param_handler)
 
     tool_id = args.tool_id
@@ -355,18 +393,20 @@
     infile_weights = args.infile_weights
 
     # for keras_model_config tool
-    if tool_id == 'keras_model_config':
+    if tool_id == "keras_model_config":
         config_keras_model(inputs, outfile)
 
     # for keras_model_builder tool
     else:
         batch_mode = False
-        if tool_id == 'keras_batch_models':
+        if tool_id == "keras_batch_models":
             batch_mode = True
 
-        build_keras_model(inputs=inputs,
-                          model_json=model_json,
-                          infile_weights=infile_weights,
-                          batch_mode=batch_mode,
-                          outfile=outfile,
-                          outfile_params=outfile_params)
+        build_keras_model(
+            inputs=inputs,
+            model_json=model_json,
+            infile_weights=infile_weights,
+            batch_mode=batch_mode,
+            outfile=outfile,
+            outfile_params=outfile_params,
+        )