Galaxy |

Changeset 11:7d7379dfef8b (2019-09-13)

Previous changeset 10:153f237ddb36 (2019-08-09) Next changeset 12:73d2ef652879 (2019-10-02)

Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"

modified:
keras_deep_learning.py
keras_macros.xml
main_macros.xml
model_prediction.py
search_model_validation.py
test-data/keras02.json
test-data/keras_batch_model01
test-data/keras_batch_model02
test-data/keras_batch_model03
test-data/pipeline14
test-data/pipeline16
train_test_eval.py

added:
ml_visualization_ex.py
test-data/grid_scores_.tabular
test-data/ml_vis01.html
test-data/ml_vis02.html
test-data/ml_vis03.html
test-data/ml_vis04.html
test-data/ml_vis05.html
test-data/ml_vis05.png
test-data/y_score.tabular
test-data/y_true.tabular

diff -r 153f237ddb36 -r 7d7379dfef8b keras_deep_learning.py
--- a/keras_deep_learning.py Fri Aug 09 07:08:07 2019 -0400
+++ b/keras_deep_learning.py Fri Sep 13 12:12:23 2019 -0400

[

@@ -8,7 +8,10 @@

from ast import literal_eval
from keras.models import Sequential, Model
-from galaxy_ml.utils import try_get_attr, get_search_params
+from galaxy_ml.utils import try_get_attr, get_search_params, SafeEval
+
+
+safe_eval = SafeEval()

def _handle_shape(literal):
@@ -100,13 +103,14 @@
         if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape',
                    'target_shape', 'dims', 'kernel_size', 'strides',
                    'dilation_rate', 'output_padding', 'cropping', 'size',
-                   'padding', 'pool_size', 'axis', 'shared_axes']:
+                   'padding', 'pool_size', 'axis', 'shared_axes'] \
+                and isinstance(value, str):
             params[key] = _handle_shape(value)

-        elif key.endswith('_regularizer'):
+        elif key.endswith('_regularizer') and isinstance(value, dict):
             params[key] = _handle_regularizer(value)

-        elif key.endswith('_constraint'):
+        elif key.endswith('_constraint') and isinstance(value, dict):
             params[key] = _handle_constraint(value)

         elif key == 'function':  # No support for lambda/function eval
@@ -129,12 +133,15 @@
         options = layer['layer_selection']
         layer_type = options.pop('layer_type')
         klass = getattr(keras.layers, layer_type)
-        other_options = options.pop('layer_options', {})
-        options.update(other_options)
+        kwargs = options.pop('kwargs', '')

         # parameters needs special care
         options = _handle_layer_parameters(options)

+        if kwargs:
+            kwargs = safe_eval('dict(' + kwargs + ')')
+            options.update(kwargs)
+
         # add input_shape to the first layer only
         if not getattr(model, '_layers') and input_shape is not None:
             options['input_shape'] = input_shape
@@ -158,11 +165,15 @@
         layer_type = options.pop('layer_type')
         klass = getattr(keras.layers, layer_type)
         inbound_nodes = options.pop('inbound_nodes', None)
-        other_options = options.pop('layer_options', {})
-        options.update(other_options)
+        kwargs = options.pop('kwargs', '')

         # parameters needs special care
         options = _handle_layer_parameters(options)
+
+        if kwargs:
+            kwargs = safe_eval('dict(' + kwargs + ')')
+            options.update(kwargs)
+
         # merge layers
         if 'merging_layers' in options:
             idxs = literal_eval(options.pop('merging_layers'))

diff -r 153f237ddb36 -r 7d7379dfef8b keras_macros.xml
--- a/keras_macros.xml Fri Aug 09 07:08:07 2019 -0400
+++ b/keras_macros.xml Fri Sep 13 12:12:23 2019 -0400

[

b'@@ -1,5 +1,5 @@\n <macros>\n- <token name="@KERAS_VERSION@">0.4.0</token>\n+ <token name="@KERAS_VERSION@">0.4.2</token>\n \n <xml name="macro_stdio">\n <stdio>\n@@ -24,34 +24,34 @@\n </expand>\n </xml>\n \n- <xml name="keras_activations">\n+ <xml name="keras_activations" token_none="true" token_tanh="false">\n <param argument="activation" type="select" label="Activation function">\n- <option value="linear" selected="true">None / linear (default)</option>\n+ <option value="linear" selected="@NONE@">None / linear (default)</option>\n <option value="softmax">softmax</option>\n <option value="elu">elu</option>\n <option value="selu">selu</option>\n <option value="softplus">softplus</option>\n <option value="softsign">softsign</option>\n <option value="relu">relu</option>\n- <option value="tanh">tanh</option>\n+ <option value="tanh" selected="@TANH@">tanh</option>\n <option value="sigmoid">sigmoid</option>\n <option value="hard_sigmoid">hard_sigmoid</option>\n <option value="exponential">tanh</option>\n </param>\n </xml>\n \n- <xml name="keras_initializers" token_argument="kernel_initializer" token_default_kernel="false" token_default_bias="false" token_default_embeddings="false">\n+ <xml name="keras_initializers" token_argument="kernel_initializer" token_default_glorot_uniform="false" token_default_zeros="false" token_default_random_uniform="false" token_default_ones="false">\n <param argument="@ARGUMENT@" type="select" label="@ARGUMENT@">\n- <option value="zeros" selected="@DEFAULT_BIAS@">zero / zeros / Zeros</option>\n- <option value="ones">one / ones / Ones</option>\n+ <option value="zeros" selected="@DEFAULT_ZEROS@">zero / zeros / Zeros</option>\n+ <option value="ones" selected="@DEFAULT_ONES@">one / ones / Ones</option>\n <option value="constant">constant / Constant</option>\n <option value="random_normal">normal / random_normal / RandomNormal</option>\n- <option value="random_uniform" selected="@DEFAULT_EMBEDDINGS@">uniform / random_uniform / RandomUniform</option>\n+ <option value="random_uniform" selected="@DEFAULT_RANDOM_UNIFORM@">uniform / random_uniform / RandomUniform</option>\n <option value="truncated_normal">truncated_normal / TruncatedNormal</option>\n <option value="orthogonal">orthogonal / Orthogonal</option>\n <option value="identity">identity / Identity</option>\n <option value="glorot_normal">glorot_normal</option>\n- <option value="glorot_uniform" selected="@DEFAULT_KERNEL@">glorot_uniform</option>\n+ <option value="glorot_uniform" selected="@DEFAULT_GLOROT_UNIFORM@">glorot_uniform</option>\n <option value="he_normal">he_normal</option>\n <option value="he_uniform">he_uniform</option>\n <option value="lecun_normal">lecun_normal</option>\n@@ -109,133 +109,120 @@\n </xml>\n \n <xml name="keras_layer_types_core">\n- <option value="Dense">Dense</option>\n- <option value="Activation">Activation</option>\n- <option value="Dropout">Dropout</option>\n- <option value="Flatten">Flatten</option>\n- <option value="Reshape">Reshape</option>\n- <option value="Permute">Permute</option>\n- <option value="RepeatVector">RepeatVector</option>\n+ <option value="Dense">Core -- Dense</option>\n+ <option value="Activation">Core -- Activation</option>\n+ <option value="Dropout">Core -- Dropout</option>\n+ <option value="Flatten">Core -- Flatten</option>\n+ <option value="Reshape">Core -- Reshape</option>\n+ <option value="Permute">Core -- Permute</option>\n+ <option value="RepeatVector">Core -- RepeatVector</option>\n \n- <option value="ActivityRegularization">ActivityRegularization</option>\n- <option value="Masking">Masking</option>\n- <option value="SpatialDropout1D">SpatialDropout1D</option>\n- <option value="SpatialDropout2D">SpatialDropout2D</option>\n- <option value="SpatialDro'..b'r_CuDNNLSTM">\n+ <param argument="units" type="integer" value="" min="1" help="Positive integer, dimensionality of the output space."/>\n+ <expand macro="simple_kwargs" help="For example: kernel_initializer=\'glorot_uniform\', recurrent_initializer=\'orthogonal\', bias_initializer=\'zeros\', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=False, return_state=False, stateful=False. Leave blank for default."/>\n+ <yield/>\n+ </xml>\n+\n+\n \n \n <xml name="layer_Embedding">\n <param argument="input_dim" type="integer" value="" min="0" help="int > 0. Size of the vocabulary, i.e. maximum integer index + 1."/>\n <param argument="output_dim" type="integer" value="" min="0" help="int >= 0. Dimension of the dense embedding."/>\n- <section name="layer_options" title="Layer Advanced Options" expanded="false">\n- <expand macro="keras_initializers" argument="embeddings_initializer" default_embeddings="true"/>\n- <expand macro="keras_regularizers" argument="embeddings_regularizer"/>\n- <expand macro="keras_regularizers" argument="activity_regularizer"/>\n- <expand macro="keras_constraints" argument="embeddings_constraint"/>\n- <param argument="mask_zero" type="boolean" truevalue="booltrue" falsevalue="boolfalse" optional="true" checked="false"/>\n- <param argument="input_length" type="integer" value="" optional="true" min="0" help="Length of input sequences. Required if connecting Flatten then Dense layers upstream"/>\n- </section>\n+ <expand macro="simple_kwargs" help="For example: embeddings_initializer=\'uniform\', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None, mask_zero=False, input_length=None. Leave blank for default."/>\n <yield/>\n </xml>\n \n@@ -901,7 +637,7 @@\n </xml>\n \n <xml name="layer_PReLU">\n- <expand macro="keras_initializers" argument="alpha_initializer" default_bias="true"/>\n+ <expand macro="keras_initializers" argument="alpha_initializer" default_zeros="true"/>\n <expand macro="keras_regularizers" argument="alpha_regularizer"/>\n <expand macro="keras_constraints" argument="alpha_constraint"/>\n <param argument="shared_axes" type="text" value="" help="the axes along which to share learnable parameters for the activation function. E.g. [1, 2]">\n@@ -939,13 +675,34 @@\n \n \n \n+ <xml name="layer_BatchNormalization">\n+ <expand macro="simple_kwargs" help="For example: axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer=\'zeros\', gamma_initializer=\'ones\', moving_mean_initializer=\'zeros\', moving_variance_initializer=\'ones\', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None. Leave blank for default."/>\n+ </xml>\n+\n \n \n+ <xml name="layer_GaussianNoise">\n+ <param argument="stddev" type="float" value="" help="float, standard deviation of the noise distribution."/>\n+ </xml>\n+\n+ <xml name="layer_GaussianDropout">\n+ <param argument="rate" type="float" value="" help="drop probability, (as with `Dropout`). The multiplicative noise will have standard deviation `sqrt(rate / (1 - rate))`"/>\n+ </xml>\n+\n+ <xml name="layer_AlphaDropout">\n+ <expand macro="layer_Dropout"/>\n+ </xml>\n+\n <xml name="inbound_nodes_index">\n <param name="inbound_nodes" type="integer" value="" label="Type the index number of input layer"\n help="Find the index number at the left top corner of layer configuration block"/>\n </xml>\n \n+ \n+\n+ <xml name="simple_kwargs" token_help="Leave blank for default.">\n+ <param argument="kwargs" type="text" value="" label="Type in key words arguments if different from the default" help="@HELP@"/>\n+ </xml>\n \n \n \n'

diff -r 153f237ddb36 -r 7d7379dfef8b main_macros.xml
--- a/main_macros.xml Fri Aug 09 07:08:07 2019 -0400
+++ b/main_macros.xml Fri Sep 13 12:12:23 2019 -0400

@@ -1,12 +1,12 @@
<macros>
-  <token name="@VERSION@">1.0.7.10</token>
+  <token name="@VERSION@">1.0.7.12</token>

   <token name="@ENSEMBLE_VERSION@">0.2.0</token>

   <xml name="python_requirements">
       <requirements>
           <requirement type="package" version="3.6">python</requirement>
-          <requirement type="package" version="0.7.10">Galaxy-ML</requirement>
+          <requirement type="package" version="0.7.12">Galaxy-ML</requirement>
           <yield/>
       </requirements>
   </xml>
@@ -1379,7 +1379,7 @@
       <expand macro="model_validation_common_options"/>
       
       <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/>
-      <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/>
+      <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset. Be aware that `refit=True` invokes extra computation, but it's REQUIRED for outputting the best estimator!"/>
       <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised."/>
       <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help=""/>
   </xml>

diff -r 153f237ddb36 -r 7d7379dfef8b ml_visualization_ex.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ml_visualization_ex.py Fri Sep 13 12:12:23 2019 -0400

[

b'@@ -0,0 +1,305 @@\n+import argparse\n+import json\n+import numpy as np\n+import pandas as pd\n+import plotly\n+import plotly.graph_objs as go\n+import warnings\n+\n+from keras.models import model_from_json\n+from keras.utils import plot_model\n+from sklearn.feature_selection.base import SelectorMixin\n+from sklearn.metrics import precision_recall_curve, average_precision_score\n+from sklearn.metrics import roc_curve, auc\n+from sklearn.pipeline import Pipeline\n+from galaxy_ml.utils import load_model, read_columns, SafeEval\n+\n+\n+safe_eval = SafeEval()\n+\n+\n+def main(inputs, infile_estimator=None, infile1=None,\n+ infile2=None, outfile_result=None,\n+ outfile_object=None, groups=None,\n+ ref_seq=None, intervals=None,\n+ targets=None, fasta_path=None,\n+ model_config=None):\n+ """\n+ Parameter\n+ ---------\n+ inputs : str\n+ File path to galaxy tool parameter\n+\n+ infile_estimator : str, default is None\n+ File path to estimator\n+\n+ infile1 : str, default is None\n+ File path to dataset containing features or true labels.\n+\n+ infile2 : str, default is None\n+ File path to dataset containing target values or predicted\n+ probabilities.\n+\n+ outfile_result : str, default is None\n+ File path to save the results, either cv_results or test result\n+\n+ outfile_object : str, default is None\n+ File path to save searchCV object\n+\n+ groups : str, default is None\n+ File path to dataset containing groups labels\n+\n+ ref_seq : str, default is None\n+ File path to dataset containing genome sequence file\n+\n+ intervals : str, default is None\n+ File path to dataset containing interval file\n+\n+ targets : str, default is None\n+ File path to dataset compressed target bed file\n+\n+ fasta_path : str, default is None\n+ File path to dataset containing fasta file\n+\n+ model_config : str, default is None\n+ File path to dataset containing JSON config for neural networks\n+ """\n+ warnings.simplefilter(\'ignore\')\n+\n+ with open(inputs, \'r\') as param_handler:\n+ params = json.load(param_handler)\n+\n+ title = params[\'plotting_selection\'][\'title\'].strip()\n+ plot_type = params[\'plotting_selection\'][\'plot_type\']\n+ if plot_type == \'feature_importances\':\n+ with open(infile_estimator, \'rb\') as estimator_handler:\n+ estimator = load_model(estimator_handler)\n+\n+ column_option = (params[\'plotting_selection\']\n+ [\'column_selector_options\']\n+ [\'selected_column_selector_option\'])\n+ if column_option in [\'by_index_number\', \'all_but_by_index_number\',\n+ \'by_header_name\', \'all_but_by_header_name\']:\n+ c = (params[\'plotting_selection\']\n+ [\'column_selector_options\'][\'col1\'])\n+ else:\n+ c = None\n+\n+ _, input_df = read_columns(infile1, c=c,\n+ c_option=column_option,\n+ return_df=True,\n+ sep=\'\\t\', header=\'infer\',\n+ parse_dates=True)\n+\n+ feature_names = input_df.columns.values\n+\n+ if isinstance(estimator, Pipeline):\n+ for st in estimator.steps[:-1]:\n+ if isinstance(st[-1], SelectorMixin):\n+ mask = st[-1].get_support()\n+ feature_names = feature_names[mask]\n+ estimator = estimator.steps[-1][-1]\n+\n+ if hasattr(estimator, \'coef_\'):\n+ coefs = estimator.coef_\n+ else:\n+ coefs = getattr(estimator, \'feature_importances_\', None)\n+ if coefs is None:\n+ raise RuntimeError(\'The classifier does not expose \'\n+ \'"coef_" or "feature_importances_" \'\n+ \'attributes\')\n+\n+ threshold = params[\'plotting_selection\'][\'threshold\']\n+ if '..b'o.Scatter(x=[0, 1], y=[0, 1], \n+ mode=\'lines\', \n+ line=dict(color=\'black\', dash=\'dash\'),\n+ showlegend=False)\n+ data.append(trace)\n+\n+ layout = go.Layout(\n+ title=title or "Receiver operating characteristic curve",\n+ xaxis=dict(title=\'False Positive Rate\'),\n+ yaxis=dict(title=\'True Positive Rate\')\n+ )\n+\n+ fig = go.Figure(data=data, layout=layout)\n+\n+ elif plot_type == \'rfecv_gridscores\':\n+ input_df = pd.read_csv(infile1, sep=\'\\t\', header=\'infer\')\n+ scores = input_df.iloc[:, 0]\n+ steps = params[\'plotting_selection\'][\'steps\'].strip()\n+ steps = safe_eval(steps)\n+\n+ data = go.Scatter(\n+ x=list(range(len(scores))),\n+ y=scores,\n+ text=[str(_) for _ in steps] if steps else None,\n+ mode=\'lines\'\n+ )\n+ layout = go.Layout(\n+ xaxis=dict(title="Number of features selected"),\n+ yaxis=dict(title="Cross validation score"),\n+ title=title or None\n+ )\n+\n+ fig = go.Figure(data=[data], layout=layout)\n+\n+ elif plot_type == \'learning_curve\':\n+ input_df = pd.read_csv(infile1, sep=\'\\t\', header=\'infer\')\n+ plot_std_err = params[\'plotting_selection\'][\'plot_std_err\']\n+ data1 = go.Scatter(\n+ x=input_df[\'train_sizes_abs\'],\n+ y=input_df[\'mean_train_scores\'],\n+ error_y=dict(\n+ array=input_df[\'std_train_scores\']\n+ ) if plot_std_err else None,\n+ mode=\'lines\',\n+ name="Train Scores",\n+ )\n+ data2 = go.Scatter(\n+ x=input_df[\'train_sizes_abs\'],\n+ y=input_df[\'mean_test_scores\'],\n+ error_y=dict(\n+ array=input_df[\'std_test_scores\']\n+ ) if plot_std_err else None,\n+ mode=\'lines\',\n+ name="Test Scores",\n+ )\n+ layout = dict(\n+ xaxis=dict(\n+ title=\'No. of samples\'\n+ ),\n+ yaxis=dict(\n+ title=\'Performance Score\'\n+ ),\n+ title=title or \'Learning Curve\'\n+ )\n+ fig = go.Figure(data=[data1, data2], layout=layout)\n+\n+ elif plot_type == \'keras_plot_model\':\n+ with open(model_config, \'r\') as f:\n+ model_str = f.read()\n+ model = model_from_json(model_str)\n+ plot_model(model, to_file="output.png")\n+ __import__(\'os\').rename(\'output.png\', \'output\')\n+\n+ return 0\n+\n+ plotly.offline.plot(fig, filename="output.html",\n+ auto_open=False)\n+ # to be discovered by `from_work_dir`\n+ __import__(\'os\').rename(\'output.html\', \'output\')\n+\n+\n+if __name__ == \'__main__\':\n+ aparser = argparse.ArgumentParser()\n+ aparser.add_argument("-i", "--inputs", dest="inputs", required=True)\n+ aparser.add_argument("-e", "--estimator", dest="infile_estimator")\n+ aparser.add_argument("-X", "--infile1", dest="infile1")\n+ aparser.add_argument("-y", "--infile2", dest="infile2")\n+ aparser.add_argument("-O", "--outfile_result", dest="outfile_result")\n+ aparser.add_argument("-o", "--outfile_object", dest="outfile_object")\n+ aparser.add_argument("-g", "--groups", dest="groups")\n+ aparser.add_argument("-r", "--ref_seq", dest="ref_seq")\n+ aparser.add_argument("-b", "--intervals", dest="intervals")\n+ aparser.add_argument("-t", "--targets", dest="targets")\n+ aparser.add_argument("-f", "--fasta_path", dest="fasta_path")\n+ aparser.add_argument("-c", "--model_config", dest="model_config")\n+ args = aparser.parse_args()\n+\n+ main(args.inputs, args.infile_estimator, args.infile1, args.infile2,\n+ args.outfile_result, outfile_object=args.outfile_object,\n+ groups=args.groups, ref_seq=args.ref_seq, intervals=args.intervals,\n+ targets=args.targets, fasta_path=args.fasta_path,\n+ model_config=args.model_config)\n'

diff -r 153f237ddb36 -r 7d7379dfef8b model_prediction.py
--- a/model_prediction.py Fri Aug 09 07:08:07 2019 -0400
+++ b/model_prediction.py Fri Sep 13 12:12:23 2019 -0400

[

@@ -2,11 +2,13 @@
import json
import numpy as np
import pandas as pd
+import tabix
import warnings

from scipy.io import mmread
from sklearn.pipeline import Pipeline

+from galaxy_ml.externals.selene_sdk.sequences import Genome
from galaxy_ml.utils import (load_model, read_columns,
                              get_module, try_get_attr)

@@ -138,53 +140,108 @@

         pred_data_generator.fit()

-        preds = estimator.model_.predict_generator(
-            pred_data_generator.flow(batch_size=32),
-            workers=N_JOBS,
-            use_multiprocessing=True)
+        variants = pred_data_generator.variants
+        # TODO : remove the following block after galaxy-ml v0.7.13
+        blacklist_tabix = getattr(pred_data_generator.reference_genome_,
+                                  '_blacklist_tabix', None)
+        clean_variants = []
+        if blacklist_tabix:
+            start_radius = pred_data_generator.start_radius_
+            end_radius = pred_data_generator.end_radius_
+
+            for chrom, pos, name, ref, alt, strand in variants:
+                center = pos + len(ref) // 2
+                start = center - start_radius
+                end = center + end_radius

-        if preds.min() < 0. or preds.max() > 1.:
-            warnings.warn('Network returning invalid probability values. '
-                          'The last layer might not normalize predictions '
-                          'into probabilities '
-                          '(like softmax or sigmoid would).')
+                if isinstance(pred_data_generator.reference_genome_, Genome):
+                    if "chr" not in chrom:
+                        chrom = "chr" + chrom
+                    if "MT" in chrom:
+                        chrom = chrom[:-1]
+                try:
+                    rows = blacklist_tabix.query(chrom, start, end)
+                    found = 0
+                    for row in rows:
+                        found = 1
+                        break
+                    if found:
+                        continue
+                except tabix.TabixError:
+                    pass

-        if params['method'] == 'predict_proba' and preds.shape[1] == 1:
-            # first column is probability of class 0 and second is of class 1
-            preds = np.hstack([1 - preds, preds])
+                clean_variants.append((chrom, pos, name, ref, alt, strand))
+        else:
+            clean_variants = variants
+
+        setattr(pred_data_generator, 'variants', clean_variants)
+
+        variants = np.array(clean_variants)
+        # predict 1600 sample at once then write to file
+        gen_flow = pred_data_generator.flow(batch_size=1600)
+
+        file_writer = open(outfile_predict, 'w')
+        header_row = '\t'.join(['chrom', 'pos', 'name', 'ref',
+                                'alt', 'strand'])
+        file_writer.write(header_row)
+        header_done = False

-        elif params['method'] == 'predict':
-            if preds.shape[-1] > 1:
-                # if the last activation is `softmax`, the sum of all
-                # probibilities will 1, the classification is considered as
-                # multi-class problem, otherwise, we take it as multi-label.
-                act = getattr(estimator.model_.layers[-1], 'activation', None)
-                if act and act.__name__ == 'softmax':
-                    classes = preds.argmax(axis=-1)
+        steps_done = 0
+
+        # TODO: multiple threading
+        try:
+            while steps_done < len(gen_flow):
+                index_array = next(gen_flow.index_generator)
+                batch_X = gen_flow._get_batches_of_transformed_samples(
+                    index_array)
+
+                if params['method'] == 'predict':
+                    batch_preds = estimator.predict(
+                        batch_X,
+                        # The presence of `pred_data_generator` below is to
+                        # override model carrying data_generator if there
+                        # is any.
+                        data_generator=pred_data_generator)
                 else:
-                    preds = (preds > 0.5).astype('int32')
-            else:
-                classes = (preds > 0.5).astype('int32')
+                    batch_preds = estimator.predict_proba(
+                        batch_X,
+                        # The presence of `pred_data_generator` below is to
+                        # override model carrying data_generator if there
+                        # is any.
+                        data_generator=pred_data_generator)
+
+                if batch_preds.ndim == 1:
+                    batch_preds = batch_preds[:, np.newaxis]
+
+                batch_meta = variants[index_array]
+                batch_out = np.column_stack([batch_meta, batch_preds])

-            preds = estimator.classes_[classes]
+                if not header_done:
+                    heads = np.arange(batch_preds.shape[-1]).astype(str)
+                    heads_str = '\t'.join(heads)
+                    file_writer.write("\t%s\n" % heads_str)
+                    header_done = True
+
+                for row in batch_out:
+                    row_str = '\t'.join(row)
+                    file_writer.write("%s\n" % row_str)
+
+                steps_done += 1
+
+        finally:
+            file_writer.close()
+            # TODO: make api `pred_data_generator.close()`
+            pred_data_generator.close()
+        return 0
     # end input

     # output
-    if input_type == 'variant_effect':   # TODO: save in batchs
-        rval = pd.DataFrame(preds)
-        meta = pd.DataFrame(
-            pred_data_generator.variants,
-            columns=['chrom', 'pos', 'name', 'ref', 'alt', 'strand'])
-
-        rval = pd.concat([meta, rval], axis=1)
-
-    elif len(preds.shape) == 1:
+    if len(preds.shape) == 1:
         rval = pd.DataFrame(preds, columns=['Predicted'])
     else:
         rval = pd.DataFrame(preds)

-    rval.to_csv(outfile_predict, sep='\t',
-                header=True, index=False)
+    rval.to_csv(outfile_predict, sep='\t', header=True, index=False)

if __name__ == '__main__':

diff -r 153f237ddb36 -r 7d7379dfef8b search_model_validation.py
--- a/search_model_validation.py Fri Aug 09 07:08:07 2019 -0400
+++ b/search_model_validation.py Fri Sep 13 12:12:23 2019 -0400

[

@@ -213,6 +213,16 @@
     with open(inputs, 'r') as param_handler:
         params = json.load(param_handler)

+    # conflict param checker
+    if params['outer_split']['split_mode'] == 'nested_cv' \
+            and params['save'] != 'nope':
+        raise ValueError("Save best estimator is not possible for nested CV!")
+
+    if not (params['search_schemes']['options']['refit']) \
+            and params['save'] != 'nope':
+        raise ValueError("Save best estimator is not possible when refit "
+                         "is False!")
+
     params_builder = params['search_schemes']['search_params_builder']

     with open(infile_estimator, 'rb') as estimator_handler:
@@ -542,7 +552,6 @@
             del main_est.validation_data
             if getattr(main_est, 'data_generator_', None):
                 del main_est.data_generator_
-                del main_est.data_batch_generator

         with open(outfile_object, 'wb') as output_handler:
             pickle.dump(best_estimator_, output_handler,

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/grid_scores_.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/grid_scores_.tabular Fri Sep 13 12:12:23 2019 -0400

@@ -0,0 +1,18 @@
+grid_scores_
+0.7634899597102532
+0.7953981831108754
+0.7937021172447345
+0.7951323776809974
+0.793206654688313
+0.8046265123256906
+0.7972524937034748
+0.8106427221191455
+0.8072746749161711
+0.8146665413082648
+0.8155998800333571
+0.8056801877422021
+0.8123573954396127
+0.8155472512482351
+0.8164562575257928
+0.8151250518677203
+0.8107710182153142

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/keras02.json
--- a/test-data/keras02.json Fri Aug 09 07:08:07 2019 -0400
+++ b/test-data/keras02.json Fri Sep 13 12:12:23 2019 -0400

[

b'@@ -1,1 +1,1 @@\n-{"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "main_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 100], "dtype": "int32", "sparse": false, "name": "main_input"}, "inbound_nodes": []}, {"name": "embedding_1", "class_name": "Embedding", "config": {"name": "embedding_1", "trainable": true, "batch_input_shape": [null, 100], "dtype": "float32", "input_dim": 10000, "output_dim": 512, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 100}, "inbound_nodes": [[["main_input", 0, 0, {}]]]}, {"name": "lstm_1", "class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 32, "activation": "linear", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}, "inbound_nodes": [[["embedding_1", 0, 0, {}]]]}, {"name": "dense_1", "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["lstm_1", 0, 0, {}]]]}, {"name": "aux_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 5], "dtype": "float32", "sparse": false, "name": "aux_input"}, "inbound_nodes": []}, {"name": "concatenate_1", "class_name": "Concatenate", "config": {"name": "concatenate_1", "trainable": true, "axis": -1}, "inbound_nodes": [[["dense_1", 0, 0, {}], ["aux_input", 0, 0, {}]]]}, {"name": "dense_2", "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["concatenate_1", 0, 0, {}]]]}, {"name": "dense_3", "class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_2", 0, 0, {}]]]}, {"name": "dense_4", "class_name": "Dense", "config": {"name": "dense_4", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_re'..b'ackwards": false, "stateful": false, "unroll": false, "units": 32, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}, "inbound_nodes": [[["embedding_1", 0, 0, {}]]]}, {"name": "dense_1", "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["lstm_1", 0, 0, {}]]]}, {"name": "aux_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 5], "dtype": "float32", "sparse": false, "name": "aux_input"}, "inbound_nodes": []}, {"name": "concatenate_1", "class_name": "Concatenate", "config": {"name": "concatenate_1", "trainable": true, "axis": -1}, "inbound_nodes": [[["dense_1", 0, 0, {}], ["aux_input", 0, 0, {}]]]}, {"name": "dense_2", "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["concatenate_1", 0, 0, {}]]]}, {"name": "dense_3", "class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_2", 0, 0, {}]]]}, {"name": "dense_4", "class_name": "Dense", "config": {"name": "dense_4", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_3", 0, 0, {}]]]}, {"name": "dense_5", "class_name": "Dense", "config": {"name": "dense_5", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_4", 0, 0, {}]]]}], "input_layers": [["main_input", 0, 0], ["aux_input", 0, 0]], "output_layers": [["dense_1", 0, 0], ["dense_5", 0, 0]]}, "keras_version": "2.2.4", "backend": "tensorflow"}\n\\ No newline at end of file\n'

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/keras_batch_model01

Binary file test-data/keras_batch_model01 has changed

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/keras_batch_model02

Binary file test-data/keras_batch_model02 has changed

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/keras_batch_model03

Binary file test-data/keras_batch_model03 has changed

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/ml_vis01.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ml_vis01.html Fri Sep 13 12:12:23 2019 -0400

[

b'@@ -0,0 +1,14 @@\n+<html><head><meta charset="utf-8" /></head><body><script type="text/javascript">/**\n+* plotly.js v1.39.4\n+* Copyright 2012-2018, Plotly, Inc.\n+* All rights reserved.\n+* Licensed under the MIT license\n+*/\n+!function(t){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=t();else if("function"==typeof define&&define.amd)define([],t);else{("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).Plotly=t()}}(function(){return function(){return function t(e,r,n){function i(o,s){if(!r[o]){if(!e[o]){var l="function"==typeof require&&require;if(!s&&l)return l(o,!0);if(a)return a(o,!0);var c=new Error("Cannot find module \'"+o+"\'");throw c.code="MODULE_NOT_FOUND",c}var u=r[o]={exports:{}};e[o][0].call(u.exports,function(t){var r=e[o][1][t];return i(r||t)},u,u.exports,t,e,r,n)}return r[o].exports}for(var a="function"==typeof require&&require,o=0;o<n.length;o++)i(n[o]);return i}}()({1:[function(t,e,r){"use strict";var n=t("../src/lib"),i={"X,X div":"direction:ltr;font-family:\'Open Sans\', verdana, arial, sans-serif;margin:0;padding:0;","X input,X button":"font-family:\'Open Sans\', verdana, arial, sans-serif;","X input:focus,X button:focus":"outline:none;","X a":"text-decoration:none;","X a:hover":"text-decoration:none;","X .crisp":"shape-rendering:crispEdges;","X .user-select-none":"-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;","X svg":"overflow:hidden;","X svg a":"fill:#447adb;","X svg a:hover":"fill:#3c6dc5;","X .main-svg":"position:absolute;top:0;left:0;pointer-events:none;","X .main-svg .draglayer":"pointer-events:all;","X .cursor-default":"cursor:default;","X .cursor-pointer":"cursor:pointer;","X .cursor-crosshair":"cursor:crosshair;","X .cursor-move":"cursor:move;","X .cursor-col-resize":"cursor:col-resize;","X .cursor-row-resize":"cursor:row-resize;","X .cursor-ns-resize":"cursor:ns-resize;","X .cursor-ew-resize":"cursor:ew-resize;","X .cursor-sw-resize":"cursor:sw-resize;","X .cursor-s-resize":"cursor:s-resize;","X .cursor-se-resize":"cursor:se-resize;","X .cursor-w-resize":"cursor:w-resize;","X .cursor-e-resize":"cursor:e-resize;","X .cursor-nw-resize":"cursor:nw-resize;","X .cursor-n-resize":"cursor:n-resize;","X .cursor-ne-resize":"cursor:ne-resize;","X .cursor-grab":"cursor:-webkit-grab;cursor:grab;","X .modebar":"position:absolute;top:2px;right:2px;z-index:1001;background:rgba(255,255,255,0.7);","X .modebar--hover":"opacity:0;-webkit-transition:opacity 0.3s ease 0s;-moz-transition:opacity 0.3s ease 0s;-ms-transition:opacity 0.3s ease 0s;-o-transition:opacity 0.3s ease 0s;transition:opacity 0.3s ease 0s;","X:hover .modebar--hover":"opacity:1;","X .modebar-group":"float:left;display:inline-block;box-sizing:border-box;margin-left:8px;position:relative;vertical-align:middle;white-space:nowrap;","X .modebar-group:first-child":"margin-left:0px;","X .modebar-btn":"position:relative;font-size:16px;padding:3px 4px;cursor:pointer;line-height:normal;box-sizing:border-box;","X .modebar-btn svg":"position:relative;top:2px;","X .modebar-btn path":"fill:rgba(0,31,95,0.3);","X .modebar-btn.active path,X .modebar-btn:hover path":"fill:rgba(0,22,72,0.5);","X .modebar-btn.modebar-btn--logo":"padding:3px 1px;","X .modebar-btn.modebar-btn--logo path":"fill:#447adb !important;","X [data-title]:before,X [data-title]:after":"position:absolute;-webkit-transform:translate3d(0, 0, 0);-moz-transform:translate3d(0, 0, 0);-ms-transform:translate3d(0, 0, 0);-o-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0);display:none;opacity:0;z-index:1001;pointer-events:none;top:110%;right:50%;","X [data-title]:hover:before,X [data-title]:hover:after":"display:block;opacity:1;","X [data-title]:before":"content:\'\';position:absolute;background:transparent;border:6px solid transparent;z-index:1002;margin-top:-12px;border-bottom-color:#69738a;margin-right:-6px;","X [data-title]:after":"content:attr(data-title);backgr'..b' l(t,e){return function(r,i,a){n.keyedContainer(r,"transforms["+e+"].styles","target","value."+i).set(String(t),a)}}r.moduleType="transform",r.name="groupby",r.attributes={enabled:{valType:"boolean",dflt:!0,editType:"calc"},groups:{valType:"data_array",dflt:[],editType:"calc"},nameformat:{valType:"string",editType:"calc"},styles:{_isLinkedToArray:"style",target:{valType:"string",editType:"calc"},value:{valType:"any",dflt:{},editType:"calc",_compareAsJSON:!0},editType:"calc"},editType:"calc"},r.supplyDefaults=function(t,e,i){var a,o={};function s(e,i){return n.coerce(t,o,r.attributes,e,i)}if(!s("enabled"))return o;s("groups"),s("nameformat",i._dataLength>1?"%{group} (%{trace})":"%{group}");var l=t.styles,c=o.styles=[];if(l)for(a=0;a<l.length;a++){var u=c[a]={};n.coerce(l[a],c[a],r.attributes.styles,"target");var h=n.coerce(l[a],c[a],r.attributes.styles,"value");n.isPlainObject(h)?u.value=n.extendDeep({},h):h&&delete u.value}return o},r.transform=function(t,e){var r,n,i,a=[];for(n=0;n<t.length;n++)for(r=s(t[n],e),i=0;i<r.length;i++)a.push(r[i]);return a}},{"../lib":684,"../plot_api/plot_schema":721,"../plots/plots":795,"./helpers":1128}],1128:[function(t,e,r){"use strict";r.pointsAccessorFunction=function(t,e){for(var r,n,i=0;i<t.length&&(r=t[i])!==e;i++)r._indexToPoints&&!1!==r.enabled&&(n=r._indexToPoints);return n?function(t){return n[t]}:function(t){return[t]}}},{}],1129:[function(t,e,r){"use strict";var n=t("../lib"),i=t("../plots/cartesian/axes"),a=t("./helpers").pointsAccessorFunction;r.moduleType="transform",r.name="sort",r.attributes={enabled:{valType:"boolean",dflt:!0,editType:"calc"},target:{valType:"string",strict:!0,noBlank:!0,arrayOk:!0,dflt:"x",editType:"calc"},order:{valType:"enumerated",values:["ascending","descending"],dflt:"ascending",editType:"calc"},editType:"calc"},r.supplyDefaults=function(t){var e={};function i(i,a){return n.coerce(t,e,r.attributes,i,a)}return i("enabled")&&(i("target"),i("order")),e},r.calcTransform=function(t,e,r){if(r.enabled){var o=n.getTargetArray(e,r);if(o){var s=r.target,l=o.length;e._length&&(l=Math.min(l,e._length));var c,u,h=e._arrayAttrs,f=function(t,e,r,n){var i,a=new Array(n),o=new Array(n);for(i=0;i<n;i++)a[i]={v:e[i],i:i};for(a.sort(function(t,e){switch(t.order){case"ascending":return function(t,r){return e(t.v)-e(r.v)};case"descending":return function(t,r){return e(r.v)-e(t.v)}}}(t,r)),i=0;i<n;i++)o[i]=a[i].i;return o}(r,o,i.getDataToCoordFunc(t,e,s,o),l),p=a(e.transforms,r),d={};for(c=0;c<h.length;c++){var g=n.nestedProperty(e,h[c]),m=g.get(),v=new Array(l);for(u=0;u<l;u++)v[u]=m[f[u]];g.set(v)}for(u=0;u<l;u++)d[u]=p(f[u]);r._indexToPoints=d,e._length=l}}}},{"../lib":684,"../plots/cartesian/axes":732,"./helpers":1128}]},{},[21])(21)});</script><div id="fa7ac07f-0942-4484-8341-187a2f547557" style="height: 100%; width: 100%;" class="plotly-graph-div"></div><script type="text/javascript">window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";\n+ Plotly.plot(\n+ \'fa7ac07f-0942-4484-8341-187a2f547557\',\n+ [{"x": ["average", "temp_1", "temp_2", "friend", "forecast_acc", "forecast_under", "forecast_noaa", "week_Fri", "week_Sun", "week_Wed"], "y": [0.2207130789756775, 0.2037351429462433, 0.15959252417087555, 0.1307300478219986, 0.0984719842672348, 0.06960950791835785, 0.06281833350658417, 0.02716468647122383, 0.02207130752503872, 0.005093378480523825], "type": "bar", "uid": "8e643be2-c63f-11e9-ae51-acbc32846fd5"}],\n+ {"title": "Feature Importances"},\n+ {"showLink": true, "linkText": "Export to plot.ly"}\n+ ).then(function () {return Plotly.addFrames(\'fa7ac07f-0942-4484-8341-187a2f547557\',{});}).then(function(){Plotly.animate(\'fa7ac07f-0942-4484-8341-187a2f547557\');})\n+ </script><script type="text/javascript">window.addEventListener("resize", function(){Plotly.Plots.resize(document.getElementById("fa7ac07f-0942-4484-8341-187a2f547557"));});</script></body></html>\n\\ No newline at end of file\n'

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/ml_vis02.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ml_vis02.html Fri Sep 13 12:12:23 2019 -0400

[

b'@@ -0,0 +1,14 @@\n+<html><head><meta charset="utf-8" /></head><body><script type="text/javascript">/**\n+* plotly.js v1.39.4\n+* Copyright 2012-2018, Plotly, Inc.\n+* All rights reserved.\n+* Licensed under the MIT license\n+*/\n+!function(t){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=t();else if("function"==typeof define&&define.amd)define([],t);else{("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).Plotly=t()}}(function(){return function(){return function t(e,r,n){function i(o,s){if(!r[o]){if(!e[o]){var l="function"==typeof require&&require;if(!s&&l)return l(o,!0);if(a)return a(o,!0);var c=new Error("Cannot find module \'"+o+"\'");throw c.code="MODULE_NOT_FOUND",c}var u=r[o]={exports:{}};e[o][0].call(u.exports,function(t){var r=e[o][1][t];return i(r||t)},u,u.exports,t,e,r,n)}return r[o].exports}for(var a="function"==typeof require&&require,o=0;o<n.length;o++)i(n[o]);return i}}()({1:[function(t,e,r){"use strict";var n=t("../src/lib"),i={"X,X div":"direction:ltr;font-family:\'Open Sans\', verdana, arial, sans-serif;margin:0;padding:0;","X input,X button":"font-family:\'Open Sans\', verdana, arial, sans-serif;","X input:focus,X button:focus":"outline:none;","X a":"text-decoration:none;","X a:hover":"text-decoration:none;","X .crisp":"shape-rendering:crispEdges;","X .user-select-none":"-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;","X svg":"overflow:hidden;","X svg a":"fill:#447adb;","X svg a:hover":"fill:#3c6dc5;","X .main-svg":"position:absolute;top:0;left:0;pointer-events:none;","X .main-svg .draglayer":"pointer-events:all;","X .cursor-default":"cursor:default;","X .cursor-pointer":"cursor:pointer;","X .cursor-crosshair":"cursor:crosshair;","X .cursor-move":"cursor:move;","X .cursor-col-resize":"cursor:col-resize;","X .cursor-row-resize":"cursor:row-resize;","X .cursor-ns-resize":"cursor:ns-resize;","X .cursor-ew-resize":"cursor:ew-resize;","X .cursor-sw-resize":"cursor:sw-resize;","X .cursor-s-resize":"cursor:s-resize;","X .cursor-se-resize":"cursor:se-resize;","X .cursor-w-resize":"cursor:w-resize;","X .cursor-e-resize":"cursor:e-resize;","X .cursor-nw-resize":"cursor:nw-resize;","X .cursor-n-resize":"cursor:n-resize;","X .cursor-ne-resize":"cursor:ne-resize;","X .cursor-grab":"cursor:-webkit-grab;cursor:grab;","X .modebar":"position:absolute;top:2px;right:2px;z-index:1001;background:rgba(255,255,255,0.7);","X .modebar--hover":"opacity:0;-webkit-transition:opacity 0.3s ease 0s;-moz-transition:opacity 0.3s ease 0s;-ms-transition:opacity 0.3s ease 0s;-o-transition:opacity 0.3s ease 0s;transition:opacity 0.3s ease 0s;","X:hover .modebar--hover":"opacity:1;","X .modebar-group":"float:left;display:inline-block;box-sizing:border-box;margin-left:8px;position:relative;vertical-align:middle;white-space:nowrap;","X .modebar-group:first-child":"margin-left:0px;","X .modebar-btn":"position:relative;font-size:16px;padding:3px 4px;cursor:pointer;line-height:normal;box-sizing:border-box;","X .modebar-btn svg":"position:relative;top:2px;","X .modebar-btn path":"fill:rgba(0,31,95,0.3);","X .modebar-btn.active path,X .modebar-btn:hover path":"fill:rgba(0,22,72,0.5);","X .modebar-btn.modebar-btn--logo":"padding:3px 1px;","X .modebar-btn.modebar-btn--logo path":"fill:#447adb !important;","X [data-title]:before,X [data-title]:after":"position:absolute;-webkit-transform:translate3d(0, 0, 0);-moz-transform:translate3d(0, 0, 0);-ms-transform:translate3d(0, 0, 0);-o-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0);display:none;opacity:0;z-index:1001;pointer-events:none;top:110%;right:50%;","X [data-title]:hover:before,X [data-title]:hover:after":"display:block;opacity:1;","X [data-title]:before":"content:\'\';position:absolute;background:transparent;border:6px solid transparent;z-index:1002;margin-top:-12px;border-bottom-color:#69738a;margin-right:-6px;","X [data-title]:after":"content:attr(data-title);backgr'..b'es={enabled:{valType:"boolean",dflt:!0,editType:"calc"},groups:{valType:"data_array",dflt:[],editType:"calc"},nameformat:{valType:"string",editType:"calc"},styles:{_isLinkedToArray:"style",target:{valType:"string",editType:"calc"},value:{valType:"any",dflt:{},editType:"calc",_compareAsJSON:!0},editType:"calc"},editType:"calc"},r.supplyDefaults=function(t,e,i){var a,o={};function s(e,i){return n.coerce(t,o,r.attributes,e,i)}if(!s("enabled"))return o;s("groups"),s("nameformat",i._dataLength>1?"%{group} (%{trace})":"%{group}");var l=t.styles,c=o.styles=[];if(l)for(a=0;a<l.length;a++){var u=c[a]={};n.coerce(l[a],c[a],r.attributes.styles,"target");var h=n.coerce(l[a],c[a],r.attributes.styles,"value");n.isPlainObject(h)?u.value=n.extendDeep({},h):h&&delete u.value}return o},r.transform=function(t,e){var r,n,i,a=[];for(n=0;n<t.length;n++)for(r=s(t[n],e),i=0;i<r.length;i++)a.push(r[i]);return a}},{"../lib":684,"../plot_api/plot_schema":721,"../plots/plots":795,"./helpers":1128}],1128:[function(t,e,r){"use strict";r.pointsAccessorFunction=function(t,e){for(var r,n,i=0;i<t.length&&(r=t[i])!==e;i++)r._indexToPoints&&!1!==r.enabled&&(n=r._indexToPoints);return n?function(t){return n[t]}:function(t){return[t]}}},{}],1129:[function(t,e,r){"use strict";var n=t("../lib"),i=t("../plots/cartesian/axes"),a=t("./helpers").pointsAccessorFunction;r.moduleType="transform",r.name="sort",r.attributes={enabled:{valType:"boolean",dflt:!0,editType:"calc"},target:{valType:"string",strict:!0,noBlank:!0,arrayOk:!0,dflt:"x",editType:"calc"},order:{valType:"enumerated",values:["ascending","descending"],dflt:"ascending",editType:"calc"},editType:"calc"},r.supplyDefaults=function(t){var e={};function i(i,a){return n.coerce(t,e,r.attributes,i,a)}return i("enabled")&&(i("target"),i("order")),e},r.calcTransform=function(t,e,r){if(r.enabled){var o=n.getTargetArray(e,r);if(o){var s=r.target,l=o.length;e._length&&(l=Math.min(l,e._length));var c,u,h=e._arrayAttrs,f=function(t,e,r,n){var i,a=new Array(n),o=new Array(n);for(i=0;i<n;i++)a[i]={v:e[i],i:i};for(a.sort(function(t,e){switch(t.order){case"ascending":return function(t,r){return e(t.v)-e(r.v)};case"descending":return function(t,r){return e(r.v)-e(t.v)}}}(t,r)),i=0;i<n;i++)o[i]=a[i].i;return o}(r,o,i.getDataToCoordFunc(t,e,s,o),l),p=a(e.transforms,r),d={};for(c=0;c<h.length;c++){var g=n.nestedProperty(e,h[c]),m=g.get(),v=new Array(l);for(u=0;u<l;u++)v[u]=m[f[u]];g.set(v)}for(u=0;u<l;u++)d[u]=p(f[u]);r._indexToPoints=d,e._length=l}}}},{"../lib":684,"../plots/cartesian/axes":732,"./helpers":1128}]},{},[21])(21)});</script><div id="452e7600-7341-488f-8b38-c08c5bda7150" style="height: 100%; width: 100%;" class="plotly-graph-div"></div><script type="text/javascript">window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";\n+ Plotly.plot(\n+ \'452e7600-7341-488f-8b38-c08c5bda7150\',\n+ [{"mode": "lines", "name": "Train Scores", "x": [17.0, 56.0, 95.0, 134.0, 174.0], "y": [0.9668700841937652, 0.973000860241936, 0.9728783377589098, 0.9739086338111184, 0.9726218628287784], "type": "scatter", "uid": "a1e79fe8-c63f-11e9-a0e4-acbc32846fd5"}, {"mode": "lines", "name": "Test Scores", "x": [17.0, 56.0, 95.0, 134.0, 174.0], "y": [0.7008862995946905, 0.7963376762427242, 0.814592845745573, 0.7985540571195479, 0.8152971572131146], "type": "scatter", "uid": "a1e8f0f4-c63f-11e9-85e7-acbc32846fd5"}],\n+ {"title": "Learning Curve", "xaxis": {"title": "No. of samples"}, "yaxis": {"title": "Performance Score"}},\n+ {"showLink": true, "linkText": "Export to plot.ly"}\n+ ).then(function () {return Plotly.addFrames(\'452e7600-7341-488f-8b38-c08c5bda7150\',{});}).then(function(){Plotly.animate(\'452e7600-7341-488f-8b38-c08c5bda7150\');})\n+ </script><script type="text/javascript">window.addEventListener("resize", function(){Plotly.Plots.resize(document.getElementById("452e7600-7341-488f-8b38-c08c5bda7150"));});</script></body></html>\n\\ No newline at end of file\n'

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/ml_vis03.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ml_vis03.html Fri Sep 13 12:12:23 2019 -0400

[

b'@@ -0,0 +1,14 @@\n+<html><head><meta charset="utf-8" /></head><body><script type="text/javascript">/**\n+* plotly.js v1.39.4\n+* Copyright 2012-2018, Plotly, Inc.\n+* All rights reserved.\n+* Licensed under the MIT license\n+*/\n+!function(t){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=t();else if("function"==typeof define&&define.amd)define([],t);else{("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).Plotly=t()}}(function(){return function(){return function t(e,r,n){function i(o,s){if(!r[o]){if(!e[o]){var l="function"==typeof require&&require;if(!s&&l)return l(o,!0);if(a)return a(o,!0);var c=new Error("Cannot find module \'"+o+"\'");throw c.code="MODULE_NOT_FOUND",c}var u=r[o]={exports:{}};e[o][0].call(u.exports,function(t){var r=e[o][1][t];return i(r||t)},u,u.exports,t,e,r,n)}return r[o].exports}for(var a="function"==typeof require&&require,o=0;o<n.length;o++)i(n[o]);return i}}()({1:[function(t,e,r){"use strict";var n=t("../src/lib"),i={"X,X div":"direction:ltr;font-family:\'Open Sans\', verdana, arial, sans-serif;margin:0;padding:0;","X input,X button":"font-family:\'Open Sans\', verdana, arial, sans-serif;","X input:focus,X button:focus":"outline:none;","X a":"text-decoration:none;","X a:hover":"text-decoration:none;","X .crisp":"shape-rendering:crispEdges;","X .user-select-none":"-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;","X svg":"overflow:hidden;","X svg a":"fill:#447adb;","X svg a:hover":"fill:#3c6dc5;","X .main-svg":"position:absolute;top:0;left:0;pointer-events:none;","X .main-svg .draglayer":"pointer-events:all;","X .cursor-default":"cursor:default;","X .cursor-pointer":"cursor:pointer;","X .cursor-crosshair":"cursor:crosshair;","X .cursor-move":"cursor:move;","X .cursor-col-resize":"cursor:col-resize;","X .cursor-row-resize":"cursor:row-resize;","X .cursor-ns-resize":"cursor:ns-resize;","X .cursor-ew-resize":"cursor:ew-resize;","X .cursor-sw-resize":"cursor:sw-resize;","X .cursor-s-resize":"cursor:s-resize;","X .cursor-se-resize":"cursor:se-resize;","X .cursor-w-resize":"cursor:w-resize;","X .cursor-e-resize":"cursor:e-resize;","X .cursor-nw-resize":"cursor:nw-resize;","X .cursor-n-resize":"cursor:n-resize;","X .cursor-ne-resize":"cursor:ne-resize;","X .cursor-grab":"cursor:-webkit-grab;cursor:grab;","X .modebar":"position:absolute;top:2px;right:2px;z-index:1001;background:rgba(255,255,255,0.7);","X .modebar--hover":"opacity:0;-webkit-transition:opacity 0.3s ease 0s;-moz-transition:opacity 0.3s ease 0s;-ms-transition:opacity 0.3s ease 0s;-o-transition:opacity 0.3s ease 0s;transition:opacity 0.3s ease 0s;","X:hover .modebar--hover":"opacity:1;","X .modebar-group":"float:left;display:inline-block;box-sizing:border-box;margin-left:8px;position:relative;vertical-align:middle;white-space:nowrap;","X .modebar-group:first-child":"margin-left:0px;","X .modebar-btn":"position:relative;font-size:16px;padding:3px 4px;cursor:pointer;line-height:normal;box-sizing:border-box;","X .modebar-btn svg":"position:relative;top:2px;","X .modebar-btn path":"fill:rgba(0,31,95,0.3);","X .modebar-btn.active path,X .modebar-btn:hover path":"fill:rgba(0,22,72,0.5);","X .modebar-btn.modebar-btn--logo":"padding:3px 1px;","X .modebar-btn.modebar-btn--logo path":"fill:#447adb !important;","X [data-title]:before,X [data-title]:after":"position:absolute;-webkit-transform:translate3d(0, 0, 0);-moz-transform:translate3d(0, 0, 0);-ms-transform:translate3d(0, 0, 0);-o-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0);display:none;opacity:0;z-index:1001;pointer-events:none;top:110%;right:50%;","X [data-title]:hover:before,X [data-title]:hover:after":"display:block;opacity:1;","X [data-title]:before":"content:\'\';position:absolute;background:transparent;border:6px solid transparent;z-index:1002;margin-top:-12px;border-bottom-color:#69738a;margin-right:-6px;","X [data-title]:after":"content:attr(data-title);backgr'..b'!0,arrayOk:!0,dflt:"x",editType:"calc"},order:{valType:"enumerated",values:["ascending","descending"],dflt:"ascending",editType:"calc"},editType:"calc"},r.supplyDefaults=function(t){var e={};function i(i,a){return n.coerce(t,e,r.attributes,i,a)}return i("enabled")&&(i("target"),i("order")),e},r.calcTransform=function(t,e,r){if(r.enabled){var o=n.getTargetArray(e,r);if(o){var s=r.target,l=o.length;e._length&&(l=Math.min(l,e._length));var c,u,h=e._arrayAttrs,f=function(t,e,r,n){var i,a=new Array(n),o=new Array(n);for(i=0;i<n;i++)a[i]={v:e[i],i:i};for(a.sort(function(t,e){switch(t.order){case"ascending":return function(t,r){return e(t.v)-e(r.v)};case"descending":return function(t,r){return e(r.v)-e(t.v)}}}(t,r)),i=0;i<n;i++)o[i]=a[i].i;return o}(r,o,i.getDataToCoordFunc(t,e,s,o),l),p=a(e.transforms,r),d={};for(c=0;c<h.length;c++){var g=n.nestedProperty(e,h[c]),m=g.get(),v=new Array(l);for(u=0;u<l;u++)v[u]=m[f[u]];g.set(v)}for(u=0;u<l;u++)d[u]=p(f[u]);r._indexToPoints=d,e._length=l}}}},{"../lib":684,"../plots/cartesian/axes":732,"./helpers":1128}]},{},[21])(21)});</script><div id="315d311c-52cd-46dc-a366-ad0d4c28f48f" style="height: 100%; width: 100%;" class="plotly-graph-div"></div><script type="text/javascript">window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";\n+ Plotly.plot(\n+ \'315d311c-52cd-46dc-a366-ad0d4c28f48f\',\n+ [{"mode": "lines", "name": "column 0 (area = 0.80)", "x": [1.0, 0.9565217391304348, 0.9565217391304348, 0.9565217391304348, 0.9565217391304348, 0.9565217391304348, 0.9565217391304348, 0.9565217391304348, 0.9565217391304348, 0.9565217391304348, 0.9565217391304348, 0.9565217391304348, 0.9130434782608695, 0.9130434782608695, 0.9130434782608695, 0.9130434782608695, 0.9130434782608695, 0.8695652173913043, 0.8695652173913043, 0.8695652173913043, 0.8695652173913043, 0.8260869565217391, 0.8260869565217391, 0.8260869565217391, 0.8260869565217391, 0.8260869565217391, 0.782608695652174, 0.7391304347826086, 0.6956521739130435, 0.6956521739130435, 0.6521739130434783, 0.6521739130434783, 0.6521739130434783, 0.6086956521739131, 0.5652173913043478, 0.5652173913043478, 0.5217391304347826, 0.5217391304347826, 0.5217391304347826, 0.5217391304347826, 0.5217391304347826, 0.4782608695652174, 0.4782608695652174, 0.43478260869565216, 0.391304347826087, 0.391304347826087, 0.34782608695652173, 0.30434782608695654, 0.2608695652173913, 0.21739130434782608, 0.17391304347826086, 0.13043478260869565, 0.08695652173913043, 0.043478260869565216, 0.0], "y": [0.42592592592592593, 0.41509433962264153, 0.4230769230769231, 0.43137254901960786, 0.44, 0.4489795918367347, 0.4583333333333333, 0.46808510638297873, 0.4782608695652174, 0.4888888888888889, 0.5, 0.5116279069767442, 0.5, 0.5121951219512195, 0.525, 0.5384615384615384, 0.5526315789473685, 0.5405405405405406, 0.5555555555555556, 0.5714285714285714, 0.5882352941176471, 0.5757575757575758, 0.59375, 0.6129032258064516, 0.6333333333333333, 0.6551724137931034, 0.6428571428571429, 0.6296296296296297, 0.6153846153846154, 0.64, 0.625, 0.6521739130434783, 0.6818181818181818, 0.6666666666666666, 0.65, 0.6842105263157895, 0.6666666666666666, 0.7058823529411765, 0.75, 0.8, 0.8571428571428571, 0.8461538461538461, 0.9166666666666666, 0.9090909090909091, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "type": "scatter", "uid": "b87beca2-c63f-11e9-93bf-acbc32846fd5"}],\n+ {"title": "Precision-Recall curve", "xaxis": {"title": "Recall"}, "yaxis": {"title": "Precision"}},\n+ {"showLink": true, "linkText": "Export to plot.ly"}\n+ ).then(function () {return Plotly.addFrames(\'315d311c-52cd-46dc-a366-ad0d4c28f48f\',{});}).then(function(){Plotly.animate(\'315d311c-52cd-46dc-a366-ad0d4c28f48f\');})\n+ </script><script type="text/javascript">window.addEventListener("resize", function(){Plotly.Plots.resize(document.getElementById("315d311c-52cd-46dc-a366-ad0d4c28f48f"));});</script></body></html>\n\\ No newline at end of file\n'

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/ml_vis04.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ml_vis04.html Fri Sep 13 12:12:23 2019 -0400

[

b'@@ -0,0 +1,14 @@\n+<html><head><meta charset="utf-8" /></head><body><script type="text/javascript">/**\n+* plotly.js v1.39.4\n+* Copyright 2012-2018, Plotly, Inc.\n+* All rights reserved.\n+* Licensed under the MIT license\n+*/\n+!function(t){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=t();else if("function"==typeof define&&define.amd)define([],t);else{("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).Plotly=t()}}(function(){return function(){return function t(e,r,n){function i(o,s){if(!r[o]){if(!e[o]){var l="function"==typeof require&&require;if(!s&&l)return l(o,!0);if(a)return a(o,!0);var c=new Error("Cannot find module \'"+o+"\'");throw c.code="MODULE_NOT_FOUND",c}var u=r[o]={exports:{}};e[o][0].call(u.exports,function(t){var r=e[o][1][t];return i(r||t)},u,u.exports,t,e,r,n)}return r[o].exports}for(var a="function"==typeof require&&require,o=0;o<n.length;o++)i(n[o]);return i}}()({1:[function(t,e,r){"use strict";var n=t("../src/lib"),i={"X,X div":"direction:ltr;font-family:\'Open Sans\', verdana, arial, sans-serif;margin:0;padding:0;","X input,X button":"font-family:\'Open Sans\', verdana, arial, sans-serif;","X input:focus,X button:focus":"outline:none;","X a":"text-decoration:none;","X a:hover":"text-decoration:none;","X .crisp":"shape-rendering:crispEdges;","X .user-select-none":"-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;","X svg":"overflow:hidden;","X svg a":"fill:#447adb;","X svg a:hover":"fill:#3c6dc5;","X .main-svg":"position:absolute;top:0;left:0;pointer-events:none;","X .main-svg .draglayer":"pointer-events:all;","X .cursor-default":"cursor:default;","X .cursor-pointer":"cursor:pointer;","X .cursor-crosshair":"cursor:crosshair;","X .cursor-move":"cursor:move;","X .cursor-col-resize":"cursor:col-resize;","X .cursor-row-resize":"cursor:row-resize;","X .cursor-ns-resize":"cursor:ns-resize;","X .cursor-ew-resize":"cursor:ew-resize;","X .cursor-sw-resize":"cursor:sw-resize;","X .cursor-s-resize":"cursor:s-resize;","X .cursor-se-resize":"cursor:se-resize;","X .cursor-w-resize":"cursor:w-resize;","X .cursor-e-resize":"cursor:e-resize;","X .cursor-nw-resize":"cursor:nw-resize;","X .cursor-n-resize":"cursor:n-resize;","X .cursor-ne-resize":"cursor:ne-resize;","X .cursor-grab":"cursor:-webkit-grab;cursor:grab;","X .modebar":"position:absolute;top:2px;right:2px;z-index:1001;background:rgba(255,255,255,0.7);","X .modebar--hover":"opacity:0;-webkit-transition:opacity 0.3s ease 0s;-moz-transition:opacity 0.3s ease 0s;-ms-transition:opacity 0.3s ease 0s;-o-transition:opacity 0.3s ease 0s;transition:opacity 0.3s ease 0s;","X:hover .modebar--hover":"opacity:1;","X .modebar-group":"float:left;display:inline-block;box-sizing:border-box;margin-left:8px;position:relative;vertical-align:middle;white-space:nowrap;","X .modebar-group:first-child":"margin-left:0px;","X .modebar-btn":"position:relative;font-size:16px;padding:3px 4px;cursor:pointer;line-height:normal;box-sizing:border-box;","X .modebar-btn svg":"position:relative;top:2px;","X .modebar-btn path":"fill:rgba(0,31,95,0.3);","X .modebar-btn.active path,X .modebar-btn:hover path":"fill:rgba(0,22,72,0.5);","X .modebar-btn.modebar-btn--logo":"padding:3px 1px;","X .modebar-btn.modebar-btn--logo path":"fill:#447adb !important;","X [data-title]:before,X [data-title]:after":"position:absolute;-webkit-transform:translate3d(0, 0, 0);-moz-transform:translate3d(0, 0, 0);-ms-transform:translate3d(0, 0, 0);-o-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0);display:none;opacity:0;z-index:1001;pointer-events:none;top:110%;right:50%;","X [data-title]:hover:before,X [data-title]:hover:after":"display:block;opacity:1;","X [data-title]:before":"content:\'\';position:absolute;background:transparent;border:6px solid transparent;z-index:1002;margin-top:-12px;border-bottom-color:#69738a;margin-right:-6px;","X [data-title]:after":"content:attr(data-title);backgr'..b';n.isPlainObject(h)?u.value=n.extendDeep({},h):h&&delete u.value}return o},r.transform=function(t,e){var r,n,i,a=[];for(n=0;n<t.length;n++)for(r=s(t[n],e),i=0;i<r.length;i++)a.push(r[i]);return a}},{"../lib":684,"../plot_api/plot_schema":721,"../plots/plots":795,"./helpers":1128}],1128:[function(t,e,r){"use strict";r.pointsAccessorFunction=function(t,e){for(var r,n,i=0;i<t.length&&(r=t[i])!==e;i++)r._indexToPoints&&!1!==r.enabled&&(n=r._indexToPoints);return n?function(t){return n[t]}:function(t){return[t]}}},{}],1129:[function(t,e,r){"use strict";var n=t("../lib"),i=t("../plots/cartesian/axes"),a=t("./helpers").pointsAccessorFunction;r.moduleType="transform",r.name="sort",r.attributes={enabled:{valType:"boolean",dflt:!0,editType:"calc"},target:{valType:"string",strict:!0,noBlank:!0,arrayOk:!0,dflt:"x",editType:"calc"},order:{valType:"enumerated",values:["ascending","descending"],dflt:"ascending",editType:"calc"},editType:"calc"},r.supplyDefaults=function(t){var e={};function i(i,a){return n.coerce(t,e,r.attributes,i,a)}return i("enabled")&&(i("target"),i("order")),e},r.calcTransform=function(t,e,r){if(r.enabled){var o=n.getTargetArray(e,r);if(o){var s=r.target,l=o.length;e._length&&(l=Math.min(l,e._length));var c,u,h=e._arrayAttrs,f=function(t,e,r,n){var i,a=new Array(n),o=new Array(n);for(i=0;i<n;i++)a[i]={v:e[i],i:i};for(a.sort(function(t,e){switch(t.order){case"ascending":return function(t,r){return e(t.v)-e(r.v)};case"descending":return function(t,r){return e(r.v)-e(t.v)}}}(t,r)),i=0;i<n;i++)o[i]=a[i].i;return o}(r,o,i.getDataToCoordFunc(t,e,s,o),l),p=a(e.transforms,r),d={};for(c=0;c<h.length;c++){var g=n.nestedProperty(e,h[c]),m=g.get(),v=new Array(l);for(u=0;u<l;u++)v[u]=m[f[u]];g.set(v)}for(u=0;u<l;u++)d[u]=p(f[u]);r._indexToPoints=d,e._length=l}}}},{"../lib":684,"../plots/cartesian/axes":732,"./helpers":1128}]},{},[21])(21)});</script><div id="1fb80e80-e509-483d-9953-2833c228f571" style="height: 100%; width: 100%;" class="plotly-graph-div"></div><script type="text/javascript">window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";\n+ Plotly.plot(\n+ \'1fb80e80-e509-483d-9953-2833c228f571\',\n+ [{"mode": "lines", "name": "column 0 (area = 0.88)", "x": [0.0, 0.0, 0.0, 0.019230769230769232, 0.019230769230769232, 0.038461538461538464, 0.038461538461538464, 0.11538461538461539, 0.11538461538461539, 0.1346153846153846, 0.1346153846153846, 0.17307692307692307, 0.17307692307692307, 0.19230769230769232, 0.19230769230769232, 0.2692307692307692, 0.2692307692307692, 0.3269230769230769, 0.3269230769230769, 0.40384615384615385, 0.40384615384615385, 0.5961538461538461, 0.5961538461538461, 1.0], "y": [0.0, 0.043478260869565216, 0.391304347826087, 0.391304347826087, 0.4782608695652174, 0.4782608695652174, 0.5217391304347826, 0.5217391304347826, 0.5652173913043478, 0.5652173913043478, 0.6521739130434783, 0.6521739130434783, 0.6956521739130435, 0.6956521739130435, 0.8260869565217391, 0.8260869565217391, 0.8695652173913043, 0.8695652173913043, 0.9130434782608695, 0.9130434782608695, 0.9565217391304348, 0.9565217391304348, 1.0, 1.0], "type": "scatter", "uid": "cf54f2fa-c63f-11e9-96b2-acbc32846fd5"}, {"line": {"color": "black", "dash": "dash"}, "mode": "lines", "showlegend": false, "x": [0, 1], "y": [0, 1], "type": "scatter", "uid": "cf563868-c63f-11e9-ac6a-acbc32846fd5"}],\n+ {"title": "Receiver operating characteristic curve", "xaxis": {"title": "False Positive Rate"}, "yaxis": {"title": "True Positive Rate"}},\n+ {"showLink": true, "linkText": "Export to plot.ly"}\n+ ).then(function () {return Plotly.addFrames(\'1fb80e80-e509-483d-9953-2833c228f571\',{});}).then(function(){Plotly.animate(\'1fb80e80-e509-483d-9953-2833c228f571\');})\n+ </script><script type="text/javascript">window.addEventListener("resize", function(){Plotly.Plots.resize(document.getElementById("1fb80e80-e509-483d-9953-2833c228f571"));});</script></body></html>\n\\ No newline at end of file\n'

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/ml_vis05.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ml_vis05.html Fri Sep 13 12:12:23 2019 -0400

[

b'@@ -0,0 +1,14 @@\n+<html><head><meta charset="utf-8" /></head><body><script type="text/javascript">/**\n+* plotly.js v1.39.4\n+* Copyright 2012-2018, Plotly, Inc.\n+* All rights reserved.\n+* Licensed under the MIT license\n+*/\n+!function(t){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=t();else if("function"==typeof define&&define.amd)define([],t);else{("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).Plotly=t()}}(function(){return function(){return function t(e,r,n){function i(o,s){if(!r[o]){if(!e[o]){var l="function"==typeof require&&require;if(!s&&l)return l(o,!0);if(a)return a(o,!0);var c=new Error("Cannot find module \'"+o+"\'");throw c.code="MODULE_NOT_FOUND",c}var u=r[o]={exports:{}};e[o][0].call(u.exports,function(t){var r=e[o][1][t];return i(r||t)},u,u.exports,t,e,r,n)}return r[o].exports}for(var a="function"==typeof require&&require,o=0;o<n.length;o++)i(n[o]);return i}}()({1:[function(t,e,r){"use strict";var n=t("../src/lib"),i={"X,X div":"direction:ltr;font-family:\'Open Sans\', verdana, arial, sans-serif;margin:0;padding:0;","X input,X button":"font-family:\'Open Sans\', verdana, arial, sans-serif;","X input:focus,X button:focus":"outline:none;","X a":"text-decoration:none;","X a:hover":"text-decoration:none;","X .crisp":"shape-rendering:crispEdges;","X .user-select-none":"-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;","X svg":"overflow:hidden;","X svg a":"fill:#447adb;","X svg a:hover":"fill:#3c6dc5;","X .main-svg":"position:absolute;top:0;left:0;pointer-events:none;","X .main-svg .draglayer":"pointer-events:all;","X .cursor-default":"cursor:default;","X .cursor-pointer":"cursor:pointer;","X .cursor-crosshair":"cursor:crosshair;","X .cursor-move":"cursor:move;","X .cursor-col-resize":"cursor:col-resize;","X .cursor-row-resize":"cursor:row-resize;","X .cursor-ns-resize":"cursor:ns-resize;","X .cursor-ew-resize":"cursor:ew-resize;","X .cursor-sw-resize":"cursor:sw-resize;","X .cursor-s-resize":"cursor:s-resize;","X .cursor-se-resize":"cursor:se-resize;","X .cursor-w-resize":"cursor:w-resize;","X .cursor-e-resize":"cursor:e-resize;","X .cursor-nw-resize":"cursor:nw-resize;","X .cursor-n-resize":"cursor:n-resize;","X .cursor-ne-resize":"cursor:ne-resize;","X .cursor-grab":"cursor:-webkit-grab;cursor:grab;","X .modebar":"position:absolute;top:2px;right:2px;z-index:1001;background:rgba(255,255,255,0.7);","X .modebar--hover":"opacity:0;-webkit-transition:opacity 0.3s ease 0s;-moz-transition:opacity 0.3s ease 0s;-ms-transition:opacity 0.3s ease 0s;-o-transition:opacity 0.3s ease 0s;transition:opacity 0.3s ease 0s;","X:hover .modebar--hover":"opacity:1;","X .modebar-group":"float:left;display:inline-block;box-sizing:border-box;margin-left:8px;position:relative;vertical-align:middle;white-space:nowrap;","X .modebar-group:first-child":"margin-left:0px;","X .modebar-btn":"position:relative;font-size:16px;padding:3px 4px;cursor:pointer;line-height:normal;box-sizing:border-box;","X .modebar-btn svg":"position:relative;top:2px;","X .modebar-btn path":"fill:rgba(0,31,95,0.3);","X .modebar-btn.active path,X .modebar-btn:hover path":"fill:rgba(0,22,72,0.5);","X .modebar-btn.modebar-btn--logo":"padding:3px 1px;","X .modebar-btn.modebar-btn--logo path":"fill:#447adb !important;","X [data-title]:before,X [data-title]:after":"position:absolute;-webkit-transform:translate3d(0, 0, 0);-moz-transform:translate3d(0, 0, 0);-ms-transform:translate3d(0, 0, 0);-o-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0);display:none;opacity:0;z-index:1001;pointer-events:none;top:110%;right:50%;","X [data-title]:hover:before,X [data-title]:hover:after":"display:block;opacity:1;","X [data-title]:before":"content:\'\';position:absolute;background:transparent;border:6px solid transparent;z-index:1002;margin-top:-12px;border-bottom-color:#69738a;margin-right:-6px;","X [data-title]:after":"content:attr(data-title);backgr'..b'"groupby",r.attributes={enabled:{valType:"boolean",dflt:!0,editType:"calc"},groups:{valType:"data_array",dflt:[],editType:"calc"},nameformat:{valType:"string",editType:"calc"},styles:{_isLinkedToArray:"style",target:{valType:"string",editType:"calc"},value:{valType:"any",dflt:{},editType:"calc",_compareAsJSON:!0},editType:"calc"},editType:"calc"},r.supplyDefaults=function(t,e,i){var a,o={};function s(e,i){return n.coerce(t,o,r.attributes,e,i)}if(!s("enabled"))return o;s("groups"),s("nameformat",i._dataLength>1?"%{group} (%{trace})":"%{group}");var l=t.styles,c=o.styles=[];if(l)for(a=0;a<l.length;a++){var u=c[a]={};n.coerce(l[a],c[a],r.attributes.styles,"target");var h=n.coerce(l[a],c[a],r.attributes.styles,"value");n.isPlainObject(h)?u.value=n.extendDeep({},h):h&&delete u.value}return o},r.transform=function(t,e){var r,n,i,a=[];for(n=0;n<t.length;n++)for(r=s(t[n],e),i=0;i<r.length;i++)a.push(r[i]);return a}},{"../lib":684,"../plot_api/plot_schema":721,"../plots/plots":795,"./helpers":1128}],1128:[function(t,e,r){"use strict";r.pointsAccessorFunction=function(t,e){for(var r,n,i=0;i<t.length&&(r=t[i])!==e;i++)r._indexToPoints&&!1!==r.enabled&&(n=r._indexToPoints);return n?function(t){return n[t]}:function(t){return[t]}}},{}],1129:[function(t,e,r){"use strict";var n=t("../lib"),i=t("../plots/cartesian/axes"),a=t("./helpers").pointsAccessorFunction;r.moduleType="transform",r.name="sort",r.attributes={enabled:{valType:"boolean",dflt:!0,editType:"calc"},target:{valType:"string",strict:!0,noBlank:!0,arrayOk:!0,dflt:"x",editType:"calc"},order:{valType:"enumerated",values:["ascending","descending"],dflt:"ascending",editType:"calc"},editType:"calc"},r.supplyDefaults=function(t){var e={};function i(i,a){return n.coerce(t,e,r.attributes,i,a)}return i("enabled")&&(i("target"),i("order")),e},r.calcTransform=function(t,e,r){if(r.enabled){var o=n.getTargetArray(e,r);if(o){var s=r.target,l=o.length;e._length&&(l=Math.min(l,e._length));var c,u,h=e._arrayAttrs,f=function(t,e,r,n){var i,a=new Array(n),o=new Array(n);for(i=0;i<n;i++)a[i]={v:e[i],i:i};for(a.sort(function(t,e){switch(t.order){case"ascending":return function(t,r){return e(t.v)-e(r.v)};case"descending":return function(t,r){return e(r.v)-e(t.v)}}}(t,r)),i=0;i<n;i++)o[i]=a[i].i;return o}(r,o,i.getDataToCoordFunc(t,e,s,o),l),p=a(e.transforms,r),d={};for(c=0;c<h.length;c++){var g=n.nestedProperty(e,h[c]),m=g.get(),v=new Array(l);for(u=0;u<l;u++)v[u]=m[f[u]];g.set(v)}for(u=0;u<l;u++)d[u]=p(f[u]);r._indexToPoints=d,e._length=l}}}},{"../lib":684,"../plots/cartesian/axes":732,"./helpers":1128}]},{},[21])(21)});</script><div id="edd029ca-914f-4bdd-b3bb-a1170240666b" style="height: 100%; width: 100%;" class="plotly-graph-div"></div><script type="text/javascript">window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";\n+ Plotly.plot(\n+ \'edd029ca-914f-4bdd-b3bb-a1170240666b\',\n+ [{"mode": "lines", "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "y": [0.7634899597102532, 0.7953981831108754, 0.7937021172447345, 0.7951323776809974, 0.793206654688313, 0.8046265123256906, 0.7972524937034748, 0.8106427221191455, 0.8072746749161711, 0.8146665413082648, 0.8155998800333571, 0.8056801877422021, 0.8123573954396127, 0.8155472512482351, 0.8164562575257928, 0.8151250518677203, 0.8107710182153142], "type": "scatter", "uid": "793d6528-c63f-11e9-9baf-acbc32846fd5"}],\n+ {"xaxis": {"title": "Number of features selected"}, "yaxis": {"title": "Cross validation score"}},\n+ {"showLink": true, "linkText": "Export to plot.ly"}\n+ ).then(function () {return Plotly.addFrames(\'edd029ca-914f-4bdd-b3bb-a1170240666b\',{});}).then(function(){Plotly.animate(\'edd029ca-914f-4bdd-b3bb-a1170240666b\');})\n+ </script><script type="text/javascript">window.addEventListener("resize", function(){Plotly.Plots.resize(document.getElementById("edd029ca-914f-4bdd-b3bb-a1170240666b"));});</script></body></html>\n\\ No newline at end of file\n'

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/ml_vis05.png

Binary file test-data/ml_vis05.png has changed

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/pipeline14

Binary file test-data/pipeline14 has changed

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/pipeline16

Binary file test-data/pipeline16 has changed

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/y_score.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/y_score.tabular Fri Sep 13 12:12:23 2019 -0400

@@ -0,0 +1,75 @@
+0.04521016253284027
+-0.0017878318955413253
+-0.3380009790698638
+-0.15416229901482092
+-0.008989122568787922
+0.3775746361984437
+-0.20342288788672414
+0.21787658306027935
+-0.5322523189136876
+-0.6361907868807346
+-0.036875765955103335
+-0.24857077769453662
+-0.5305978020035378
+-0.5288479779433272
+-0.22579627342382325
+0.4905346629557697
+-0.12238193946346121
+-0.42773421293023084
+0.16878080982659216
+0.051637548704625946
+0.023623352380110763
+-0.3553978552068183
+-0.4597636722184091
+-0.36924223816393
+-0.539585171546133
+-0.4138055622986405
+-0.25401950905817183
+0.35124248378117207
+-0.5767911246317095
+-0.4452974937020068
+0.13456824841567622
+-0.08366761511503285
+-0.5855411774730717
+0.4493951821813167
+-0.0008118901312900162
+-0.375188782981553
+-0.052180286682808386
+-0.3624923116131733
+-0.3212899940903371
+-0.6326134385656439
+-0.5951558341213625
+-0.026698968757988106
+-0.6389295278289815
+-0.4665622957151918
+0.24683878631472084
+0.06670297201702563
+-0.09995075976356604
+-0.0026791784207790825
+-0.26843502542172126
+-0.23167967546053814
+-0.5500853075669638
+-0.07278578744420061
+-0.1908269856404199
+-0.10431209677312014
+-0.40541232698507823
+-1.3031302463301446
+-0.10509162333664135
+-0.06155868232417461
+-0.4347097510343062
+-0.8391150198454305
+-0.5372307413404114
+-0.46030478301666744
+-0.11618205513493052
+-0.021278188504645024
+-0.16029035414173087
+-0.35975375227600914
+-0.4814892536194141
+-0.1385760560857231
+0.3409736022465082
+-0.5355178831501075
+0.22534151535735567
+0.07294052191693523
+-0.3386178239054628
+0.15540977852505278
+0.07383896651967975

diff -r 153f237ddb36 -r 7d7379dfef8b test-data/y_true.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/y_true.tabular Fri Sep 13 12:12:23 2019 -0400

@@ -0,0 +1,75 @@
+0
+1
+0
+0
+1
+1
+1
+1
+0
+0
+0
+0
+0
+0
+0
+1
+0
+1
+1
+0
+0
+0
+0
+0
+0
+0
+0
+1
+0
+0
+1
+1
+0
+1
+0
+0
+1
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+0
+1
+0
+0
+1
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+0
+0
+0
+1
+1
+0
+1
+0
+0
+0
+1

diff -r 153f237ddb36 -r 7d7379dfef8b train_test_eval.py
--- a/train_test_eval.py Fri Aug 09 07:08:07 2019 -0400
+++ b/train_test_eval.py Fri Sep 13 12:12:23 2019 -0400

@@ -403,7 +403,6 @@
             del main_est.validation_data
             if getattr(main_est, 'data_generator_', None):
                 del main_est.data_generator_
-                del main_est.data_batch_generator

         with open(outfile_object, 'wb') as output_handler:
             pickle.dump(estimator, output_handler,