# HG changeset patch # User bgruening # Date 1576491420 18000 # Node ID 64b771b1471ad0cf830468b3e82678a6a22ec989 # Parent 18b39ada6f3545b64aece44bdc655282ac838003 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5b2ac730ec6d3b762faa9034eddd19ad1b347476" diff -r 18b39ada6f35 -r 64b771b1471a discriminant.xml --- a/discriminant.xml Thu Nov 07 05:25:28 2019 -0500 +++ b/discriminant.xml Mon Dec 16 05:17:00 2019 -0500 @@ -35,7 +35,7 @@ classifier_object = load_model(model_handler) header = 'infer' if params["selected_tasks"]["header"] else None -data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) +data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None) prediction = classifier_object.predict(data) prediction_df = pandas.DataFrame(prediction) res = pandas.concat([data, prediction_df], axis=1) diff -r 18b39ada6f35 -r 64b771b1471a keras_deep_learning.py --- a/keras_deep_learning.py Thu Nov 07 05:25:28 2019 -0500 +++ b/keras_deep_learning.py Mon Dec 16 05:17:00 2019 -0500 @@ -73,7 +73,7 @@ } """ constraint_type = config['constraint_type'] - if constraint_type == 'None': + if constraint_type in ('None', ''): return None klass = getattr(keras.constraints, constraint_type) @@ -92,7 +92,7 @@ """Access to handle all kinds of parameters """ for key, value in six.iteritems(params): - if value == 'None': + if value in ('None', ''): params[key] = None continue @@ -205,6 +205,9 @@ config : dictionary, galaxy tool parameters loaded by JSON """ generator_type = config.pop('generator_type') + if generator_type == 'none': + return None + klass = try_get_attr('galaxy_ml.preprocessors', generator_type) if generator_type == 'GenomicIntervalBatchGenerator': @@ -240,7 +243,7 @@ json_string = model.to_json() with open(outfile, 'w') as f: - f.write(json_string) + json.dump(json.loads(json_string), f, indent=2) def build_keras_model(inputs, outfile, model_json, infile_weights=None, diff -r 18b39ada6f35 -r 64b771b1471a keras_macros.xml --- a/keras_macros.xml Thu Nov 07 05:25:28 2019 -0500 +++ b/keras_macros.xml Mon Dec 16 05:17:00 2019 -0500 @@ -1,5 +1,5 @@ - 0.4.2 + 0.5.0 @@ -18,7 +18,7 @@ - + @@ -885,7 +885,7 @@ - + diff -r 18b39ada6f35 -r 64b771b1471a keras_train_and_eval.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/keras_train_and_eval.py Mon Dec 16 05:17:00 2019 -0500 @@ -0,0 +1,491 @@ +import argparse +import joblib +import json +import numpy as np +import os +import pandas as pd +import pickle +import warnings +from itertools import chain +from scipy.io import mmread +from sklearn.pipeline import Pipeline +from sklearn.metrics.scorer import _check_multimetric_scoring +from sklearn import model_selection +from sklearn.model_selection._validation import _score +from sklearn.model_selection import _search, _validation +from sklearn.utils import indexable, safe_indexing + +from galaxy_ml.externals.selene_sdk.utils import compute_score +from galaxy_ml.model_validations import train_test_split +from galaxy_ml.keras_galaxy_models import _predict_generator +from galaxy_ml.utils import (SafeEval, get_scoring, load_model, + read_columns, try_get_attr, get_module, + clean_params, get_main_estimator) + + +_fit_and_score = try_get_attr('galaxy_ml.model_validations', '_fit_and_score') +setattr(_search, '_fit_and_score', _fit_and_score) +setattr(_validation, '_fit_and_score', _fit_and_score) + +N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1)) +CACHE_DIR = os.path.join(os.getcwd(), 'cached') +del os +NON_SEARCHABLE = ('n_jobs', 'pre_dispatch', 'memory', '_path', + 'nthread', 'callbacks') +ALLOWED_CALLBACKS = ('EarlyStopping', 'TerminateOnNaN', 'ReduceLROnPlateau', + 'CSVLogger', 'None') + + +def _eval_swap_params(params_builder): + swap_params = {} + + for p in params_builder['param_set']: + swap_value = p['sp_value'].strip() + if swap_value == '': + continue + + param_name = p['sp_name'] + if param_name.lower().endswith(NON_SEARCHABLE): + warnings.warn("Warning: `%s` is not eligible for search and was " + "omitted!" % param_name) + continue + + if not swap_value.startswith(':'): + safe_eval = SafeEval(load_scipy=True, load_numpy=True) + ev = safe_eval(swap_value) + else: + # Have `:` before search list, asks for estimator evaluatio + safe_eval_es = SafeEval(load_estimators=True) + swap_value = swap_value[1:].strip() + # TODO maybe add regular express check + ev = safe_eval_es(swap_value) + + swap_params[param_name] = ev + + return swap_params + + +def train_test_split_none(*arrays, **kwargs): + """extend train_test_split to take None arrays + and support split by group names. + """ + nones = [] + new_arrays = [] + for idx, arr in enumerate(arrays): + if arr is None: + nones.append(idx) + else: + new_arrays.append(arr) + + if kwargs['shuffle'] == 'None': + kwargs['shuffle'] = None + + group_names = kwargs.pop('group_names', None) + + if group_names is not None and group_names.strip(): + group_names = [name.strip() for name in + group_names.split(',')] + new_arrays = indexable(*new_arrays) + groups = kwargs['labels'] + n_samples = new_arrays[0].shape[0] + index_arr = np.arange(n_samples) + test = index_arr[np.isin(groups, group_names)] + train = index_arr[~np.isin(groups, group_names)] + rval = list(chain.from_iterable( + (safe_indexing(a, train), + safe_indexing(a, test)) for a in new_arrays)) + else: + rval = train_test_split(*new_arrays, **kwargs) + + for pos in nones: + rval[pos * 2: 2] = [None, None] + + return rval + + +def _evaluate(y_true, pred_probas, scorer, is_multimetric=True): + """ output scores based on input scorer + + Parameters + ---------- + y_true : array + True label or target values + pred_probas : array + Prediction values, probability for classification problem + scorer : dict + dict of `sklearn.metrics.scorer.SCORER` + is_multimetric : bool, default is True + """ + if y_true.ndim == 1 or y_true.shape[-1] == 1: + pred_probas = pred_probas.ravel() + pred_labels = (pred_probas > 0.5).astype('int32') + targets = y_true.ravel().astype('int32') + if not is_multimetric: + preds = pred_labels if scorer.__class__.__name__ == \ + '_PredictScorer' else pred_probas + score = scorer._score_func(targets, preds, **scorer._kwargs) + + return score + else: + scores = {} + for name, one_scorer in scorer.items(): + preds = pred_labels if one_scorer.__class__.__name__\ + == '_PredictScorer' else pred_probas + score = one_scorer._score_func(targets, preds, + **one_scorer._kwargs) + scores[name] = score + + # TODO: multi-class metrics + # multi-label + else: + pred_labels = (pred_probas > 0.5).astype('int32') + targets = y_true.astype('int32') + if not is_multimetric: + preds = pred_labels if scorer.__class__.__name__ == \ + '_PredictScorer' else pred_probas + score, _ = compute_score(preds, targets, + scorer._score_func) + return score + else: + scores = {} + for name, one_scorer in scorer.items(): + preds = pred_labels if one_scorer.__class__.__name__\ + == '_PredictScorer' else pred_probas + score, _ = compute_score(preds, targets, + one_scorer._score_func) + scores[name] = score + + return scores + + +def main(inputs, infile_estimator, infile1, infile2, + outfile_result, outfile_object=None, + outfile_weights=None, outfile_y_true=None, + outfile_y_preds=None, groups=None, + ref_seq=None, intervals=None, targets=None, + fasta_path=None): + """ + Parameter + --------- + inputs : str + File path to galaxy tool parameter + + infile_estimator : str + File path to estimator + + infile1 : str + File path to dataset containing features + + infile2 : str + File path to dataset containing target values + + outfile_result : str + File path to save the results, either cv_results or test result + + outfile_object : str, optional + File path to save searchCV object + + outfile_weights : str, optional + File path to save deep learning model weights + + outfile_y_true : str, optional + File path to target values for prediction + + outfile_y_preds : str, optional + File path to save deep learning model weights + + groups : str + File path to dataset containing groups labels + + ref_seq : str + File path to dataset containing genome sequence file + + intervals : str + File path to dataset containing interval file + + targets : str + File path to dataset compressed target bed file + + fasta_path : str + File path to dataset containing fasta file + """ + warnings.simplefilter('ignore') + + with open(inputs, 'r') as param_handler: + params = json.load(param_handler) + + # load estimator + with open(infile_estimator, 'rb') as estimator_handler: + estimator = load_model(estimator_handler) + + estimator = clean_params(estimator) + + # swap hyperparameter + swapping = params['experiment_schemes']['hyperparams_swapping'] + swap_params = _eval_swap_params(swapping) + estimator.set_params(**swap_params) + + estimator_params = estimator.get_params() + + # store read dataframe object + loaded_df = {} + + input_type = params['input_options']['selected_input'] + # tabular input + if input_type == 'tabular': + header = 'infer' if params['input_options']['header1'] else None + column_option = (params['input_options']['column_selector_options_1'] + ['selected_column_selector_option']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = params['input_options']['column_selector_options_1']['col1'] + else: + c = None + + df_key = infile1 + repr(header) + df = pd.read_csv(infile1, sep='\t', header=header, + parse_dates=True) + loaded_df[df_key] = df + + X = read_columns(df, c=c, c_option=column_option).astype(float) + # sparse input + elif input_type == 'sparse': + X = mmread(open(infile1, 'r')) + + # fasta_file input + elif input_type == 'seq_fasta': + pyfaidx = get_module('pyfaidx') + sequences = pyfaidx.Fasta(fasta_path) + n_seqs = len(sequences.keys()) + X = np.arange(n_seqs)[:, np.newaxis] + for param in estimator_params.keys(): + if param.endswith('fasta_path'): + estimator.set_params( + **{param: fasta_path}) + break + else: + raise ValueError( + "The selected estimator doesn't support " + "fasta file input! Please consider using " + "KerasGBatchClassifier with " + "FastaDNABatchGenerator/FastaProteinBatchGenerator " + "or having GenomeOneHotEncoder/ProteinOneHotEncoder " + "in pipeline!") + + elif input_type == 'refseq_and_interval': + path_params = { + 'data_batch_generator__ref_genome_path': ref_seq, + 'data_batch_generator__intervals_path': intervals, + 'data_batch_generator__target_path': targets + } + estimator.set_params(**path_params) + n_intervals = sum(1 for line in open(intervals)) + X = np.arange(n_intervals)[:, np.newaxis] + + # Get target y + header = 'infer' if params['input_options']['header2'] else None + column_option = (params['input_options']['column_selector_options_2'] + ['selected_column_selector_option2']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = params['input_options']['column_selector_options_2']['col2'] + else: + c = None + + df_key = infile2 + repr(header) + if df_key in loaded_df: + infile2 = loaded_df[df_key] + else: + infile2 = pd.read_csv(infile2, sep='\t', + header=header, parse_dates=True) + loaded_df[df_key] = infile2 + + y = read_columns( + infile2, + c=c, + c_option=column_option, + sep='\t', + header=header, + parse_dates=True) + if len(y.shape) == 2 and y.shape[1] == 1: + y = y.ravel() + if input_type == 'refseq_and_interval': + estimator.set_params( + data_batch_generator__features=y.ravel().tolist()) + y = None + # end y + + # load groups + if groups: + groups_selector = (params['experiment_schemes']['test_split'] + ['split_algos']).pop('groups_selector') + + header = 'infer' if groups_selector['header_g'] else None + column_option = \ + (groups_selector['column_selector_options_g'] + ['selected_column_selector_option_g']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = groups_selector['column_selector_options_g']['col_g'] + else: + c = None + + df_key = groups + repr(header) + if df_key in loaded_df: + groups = loaded_df[df_key] + + groups = read_columns( + groups, + c=c, + c_option=column_option, + sep='\t', + header=header, + parse_dates=True) + groups = groups.ravel() + + # del loaded_df + del loaded_df + + # cache iraps_core fits could increase search speed significantly + memory = joblib.Memory(location=CACHE_DIR, verbose=0) + main_est = get_main_estimator(estimator) + if main_est.__class__.__name__ == 'IRAPSClassifier': + main_est.set_params(memory=memory) + + # handle scorer, convert to scorer dict + scoring = params['experiment_schemes']['metrics']['scoring'] + scorer = get_scoring(scoring) + scorer, _ = _check_multimetric_scoring(estimator, scoring=scorer) + + # handle test (first) split + test_split_options = (params['experiment_schemes'] + ['test_split']['split_algos']) + + if test_split_options['shuffle'] == 'group': + test_split_options['labels'] = groups + if test_split_options['shuffle'] == 'stratified': + if y is not None: + test_split_options['labels'] = y + else: + raise ValueError("Stratified shuffle split is not " + "applicable on empty target values!") + + X_train, X_test, y_train, y_test, groups_train, groups_test = \ + train_test_split_none(X, y, groups, **test_split_options) + + exp_scheme = params['experiment_schemes']['selected_exp_scheme'] + + # handle validation (second) split + if exp_scheme == 'train_val_test': + val_split_options = (params['experiment_schemes'] + ['val_split']['split_algos']) + + if val_split_options['shuffle'] == 'group': + val_split_options['labels'] = groups_train + if val_split_options['shuffle'] == 'stratified': + if y_train is not None: + val_split_options['labels'] = y_train + else: + raise ValueError("Stratified shuffle split is not " + "applicable on empty target values!") + + X_train, X_val, y_train, y_val, groups_train, groups_val = \ + train_test_split_none(X_train, y_train, groups_train, + **val_split_options) + + # train and eval + if hasattr(estimator, 'validation_data'): + if exp_scheme == 'train_val_test': + estimator.fit(X_train, y_train, + validation_data=(X_val, y_val)) + else: + estimator.fit(X_train, y_train, + validation_data=(X_test, y_test)) + else: + estimator.fit(X_train, y_train) + + if hasattr(estimator, 'evaluate'): + steps = estimator.prediction_steps + batch_size = estimator.batch_size + generator = estimator.data_generator_.flow(X_test, y=y_test, + batch_size=batch_size) + predictions, y_true = _predict_generator(estimator.model_, generator, + steps=steps) + scores = _evaluate(y_true, predictions, scorer, is_multimetric=True) + + else: + if hasattr(estimator, 'predict_proba'): + predictions = estimator.predict_proba(X_test) + else: + predictions = estimator.predict(X_test) + + y_true = y_test + scores = _score(estimator, X_test, y_test, scorer, + is_multimetric=True) + if outfile_y_true: + try: + pd.DataFrame(y_true).to_csv(outfile_y_true, sep='\t', + index=False) + pd.DataFrame(predictions).astype(np.float32).to_csv( + outfile_y_preds, sep='\t', index=False, + float_format='%g', chunksize=10000) + except Exception as e: + print("Error in saving predictions: %s" % e) + + # handle output + for name, score in scores.items(): + scores[name] = [score] + df = pd.DataFrame(scores) + df = df[sorted(df.columns)] + df.to_csv(path_or_buf=outfile_result, sep='\t', + header=True, index=False) + + memory.clear(warn=False) + + if outfile_object: + main_est = estimator + if isinstance(estimator, Pipeline): + main_est = estimator.steps[-1][-1] + + if hasattr(main_est, 'model_') \ + and hasattr(main_est, 'save_weights'): + if outfile_weights: + main_est.save_weights(outfile_weights) + del main_est.model_ + del main_est.fit_params + del main_est.model_class_ + del main_est.validation_data + if getattr(main_est, 'data_generator_', None): + del main_est.data_generator_ + + with open(outfile_object, 'wb') as output_handler: + pickle.dump(estimator, output_handler, + pickle.HIGHEST_PROTOCOL) + + +if __name__ == '__main__': + aparser = argparse.ArgumentParser() + aparser.add_argument("-i", "--inputs", dest="inputs", required=True) + aparser.add_argument("-e", "--estimator", dest="infile_estimator") + aparser.add_argument("-X", "--infile1", dest="infile1") + aparser.add_argument("-y", "--infile2", dest="infile2") + aparser.add_argument("-O", "--outfile_result", dest="outfile_result") + aparser.add_argument("-o", "--outfile_object", dest="outfile_object") + aparser.add_argument("-w", "--outfile_weights", dest="outfile_weights") + aparser.add_argument("-l", "--outfile_y_true", dest="outfile_y_true") + aparser.add_argument("-p", "--outfile_y_preds", dest="outfile_y_preds") + aparser.add_argument("-g", "--groups", dest="groups") + aparser.add_argument("-r", "--ref_seq", dest="ref_seq") + aparser.add_argument("-b", "--intervals", dest="intervals") + aparser.add_argument("-t", "--targets", dest="targets") + aparser.add_argument("-f", "--fasta_path", dest="fasta_path") + args = aparser.parse_args() + + main(args.inputs, args.infile_estimator, args.infile1, args.infile2, + args.outfile_result, outfile_object=args.outfile_object, + outfile_weights=args.outfile_weights, + outfile_y_true=args.outfile_y_true, + outfile_y_preds=args.outfile_y_preds, + groups=args.groups, + ref_seq=args.ref_seq, intervals=args.intervals, + targets=args.targets, fasta_path=args.fasta_path) diff -r 18b39ada6f35 -r 64b771b1471a main_macros.xml --- a/main_macros.xml Thu Nov 07 05:25:28 2019 -0500 +++ b/main_macros.xml Mon Dec 16 05:17:00 2019 -0500 @@ -1,12 +1,10 @@ - 1.0.7.12 - - 0.2.0 + 1.0.8.1 python - Galaxy-ML + Galaxy-ML @@ -235,8 +233,8 @@ - - + + @@ -763,6 +761,9 @@ + + + @@ -837,6 +838,42 @@ label="Use a copy of data for inplace scaling" help=" "/> + +
+ + + + + + + + +
+
+ +
+ + + + + +
+
+ +
+ + + + + + + + + + + +
+
@@ -1261,6 +1298,7 @@ + @@ -1291,6 +1329,7 @@ + @@ -1329,6 +1368,7 @@ +
@@ -1343,32 +1383,6 @@ - - -
- - - - - - - - - - - - - - - - - - - - -
-
-
@@ -1398,7 +1412,7 @@ - + @@ -1475,6 +1489,8 @@ + + diff -r 18b39ada6f35 -r 64b771b1471a ml_visualization_ex.py --- a/ml_visualization_ex.py Thu Nov 07 05:25:28 2019 -0500 +++ b/ml_visualization_ex.py Mon Dec 16 05:17:00 2019 -0500 @@ -1,6 +1,9 @@ import argparse import json +import matplotlib +import matplotlib.pyplot as plt import numpy as np +import os import pandas as pd import plotly import plotly.graph_objs as go @@ -17,6 +20,251 @@ safe_eval = SafeEval() +# plotly default colors +default_colors = [ + '#1f77b4', # muted blue + '#ff7f0e', # safety orange + '#2ca02c', # cooked asparagus green + '#d62728', # brick red + '#9467bd', # muted purple + '#8c564b', # chestnut brown + '#e377c2', # raspberry yogurt pink + '#7f7f7f', # middle gray + '#bcbd22', # curry yellow-green + '#17becf' # blue-teal +] + + +def visualize_pr_curve_plotly(df1, df2, pos_label, title=None): + """output pr-curve in html using plotly + + df1 : pandas.DataFrame + Containing y_true + df2 : pandas.DataFrame + Containing y_score + pos_label : None + The label of positive class + title : str + Plot title + """ + data = [] + for idx in range(df1.shape[1]): + y_true = df1.iloc[:, idx].values + y_score = df2.iloc[:, idx].values + + precision, recall, _ = precision_recall_curve( + y_true, y_score, pos_label=pos_label) + ap = average_precision_score( + y_true, y_score, pos_label=pos_label or 1) + + trace = go.Scatter( + x=recall, + y=precision, + mode='lines', + marker=dict( + color=default_colors[idx % len(default_colors)] + ), + name='%s (area = %.3f)' % (idx, ap) + ) + data.append(trace) + + layout = go.Layout( + xaxis=dict( + title='Recall', + linecolor='lightslategray', + linewidth=1 + ), + yaxis=dict( + title='Precision', + linecolor='lightslategray', + linewidth=1 + ), + title=dict( + text=title or 'Precision-Recall Curve', + x=0.5, + y=0.92, + xanchor='center', + yanchor='top' + ), + font=dict( + family="sans-serif", + size=11 + ), + # control backgroud colors + plot_bgcolor='rgba(255,255,255,0)' + ) + """ + legend=dict( + x=0.95, + y=0, + traceorder="normal", + font=dict( + family="sans-serif", + size=9, + color="black" + ), + bgcolor="LightSteelBlue", + bordercolor="Black", + borderwidth=2 + ),""" + + fig = go.Figure(data=data, layout=layout) + + plotly.offline.plot(fig, filename="output.html", auto_open=False) + # to be discovered by `from_work_dir` + os.rename('output.html', 'output') + + +def visualize_pr_curve_matplotlib(df1, df2, pos_label, title=None): + """visualize pr-curve using matplotlib and output svg image + """ + backend = matplotlib.get_backend() + if "inline" not in backend: + matplotlib.use("SVG") + plt.style.use('seaborn-colorblind') + plt.figure() + + for idx in range(df1.shape[1]): + y_true = df1.iloc[:, idx].values + y_score = df2.iloc[:, idx].values + + precision, recall, _ = precision_recall_curve( + y_true, y_score, pos_label=pos_label) + ap = average_precision_score( + y_true, y_score, pos_label=pos_label or 1) + + plt.step(recall, precision, 'r-', color="black", alpha=0.3, + lw=1, where="post", label='%s (area = %.3f)' % (idx, ap)) + + plt.xlim([0.0, 1.0]) + plt.ylim([0.0, 1.05]) + plt.xlabel('Recall') + plt.ylabel('Precision') + title = title or 'Precision-Recall Curve' + plt.title(title) + folder = os.getcwd() + plt.savefig(os.path.join(folder, "output.svg"), format="svg") + os.rename(os.path.join(folder, "output.svg"), + os.path.join(folder, "output")) + + +def visualize_roc_curve_plotly(df1, df2, pos_label, + drop_intermediate=True, + title=None): + """output roc-curve in html using plotly + + df1 : pandas.DataFrame + Containing y_true + df2 : pandas.DataFrame + Containing y_score + pos_label : None + The label of positive class + drop_intermediate : bool + Whether to drop some suboptimal thresholds + title : str + Plot title + """ + data = [] + for idx in range(df1.shape[1]): + y_true = df1.iloc[:, idx].values + y_score = df2.iloc[:, idx].values + + fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=pos_label, + drop_intermediate=drop_intermediate) + roc_auc = auc(fpr, tpr) + + trace = go.Scatter( + x=fpr, + y=tpr, + mode='lines', + marker=dict( + color=default_colors[idx % len(default_colors)] + ), + name='%s (area = %.3f)' % (idx, roc_auc) + ) + data.append(trace) + + layout = go.Layout( + xaxis=dict( + title='False Positive Rate', + linecolor='lightslategray', + linewidth=1 + ), + yaxis=dict( + title='True Positive Rate', + linecolor='lightslategray', + linewidth=1 + ), + title=dict( + text=title or 'Receiver Operating Characteristic (ROC) Curve', + x=0.5, + y=0.92, + xanchor='center', + yanchor='top' + ), + font=dict( + family="sans-serif", + size=11 + ), + # control backgroud colors + plot_bgcolor='rgba(255,255,255,0)' + ) + """ + # legend=dict( + # x=0.95, + # y=0, + # traceorder="normal", + # font=dict( + # family="sans-serif", + # size=9, + # color="black" + # ), + # bgcolor="LightSteelBlue", + # bordercolor="Black", + # borderwidth=2 + # ), + """ + + fig = go.Figure(data=data, layout=layout) + + plotly.offline.plot(fig, filename="output.html", auto_open=False) + # to be discovered by `from_work_dir` + os.rename('output.html', 'output') + + +def visualize_roc_curve_matplotlib(df1, df2, pos_label, + drop_intermediate=True, + title=None): + """visualize roc-curve using matplotlib and output svg image + """ + backend = matplotlib.get_backend() + if "inline" not in backend: + matplotlib.use("SVG") + plt.style.use('seaborn-colorblind') + plt.figure() + + for idx in range(df1.shape[1]): + y_true = df1.iloc[:, idx].values + y_score = df2.iloc[:, idx].values + + fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=pos_label, + drop_intermediate=drop_intermediate) + roc_auc = auc(fpr, tpr) + + plt.step(fpr, tpr, 'r-', color="black", alpha=0.3, lw=1, + where="post", label='%s (area = %.3f)' % (idx, roc_auc)) + + plt.xlim([0.0, 1.0]) + plt.ylim([0.0, 1.05]) + plt.xlabel('False Positive Rate') + plt.ylabel('True Positive Rate') + title = title or 'Receiver Operating Characteristic (ROC) Curve' + plt.title(title) + folder = os.getcwd() + plt.savefig(os.path.join(folder, "output.svg"), format="svg") + os.rename(os.path.join(folder, "output.svg"), + os.path.join(folder, "output")) + def main(inputs, infile_estimator=None, infile1=None, infile2=None, outfile_result=None, @@ -71,6 +319,8 @@ title = params['plotting_selection']['title'].strip() plot_type = params['plotting_selection']['plot_type'] + plot_format = params['plotting_selection']['plot_format'] + if plot_type == 'feature_importances': with open(infile_estimator, 'rb') as estimator_handler: estimator = load_model(estimator_handler) @@ -123,98 +373,46 @@ layout = go.Layout(title=title or "Feature Importances") fig = go.Figure(data=[trace], layout=layout) - elif plot_type == 'pr_curve': - df1 = pd.read_csv(infile1, sep='\t', header=None) - df2 = pd.read_csv(infile2, sep='\t', header=None) + plotly.offline.plot(fig, filename="output.html", + auto_open=False) + # to be discovered by `from_work_dir` + os.rename('output.html', 'output') + + return 0 - precision = {} - recall = {} - ap = {} + elif plot_type in ('pr_curve', 'roc_curve'): + df1 = pd.read_csv(infile1, sep='\t', header='infer') + df2 = pd.read_csv(infile2, sep='\t', header='infer').astype(np.float32) + + minimum = params['plotting_selection']['report_minimum_n_positives'] + # filter out columns whose n_positives is beblow the threhold + if minimum: + mask = df1.sum(axis=0) >= minimum + df1 = df1.loc[:, mask] + df2 = df2.loc[:, mask] pos_label = params['plotting_selection']['pos_label'].strip() \ or None - for col in df1.columns: - y_true = df1[col].values - y_score = df2[col].values - - precision[col], recall[col], _ = precision_recall_curve( - y_true, y_score, pos_label=pos_label) - ap[col] = average_precision_score( - y_true, y_score, pos_label=pos_label or 1) - - if len(df1.columns) > 1: - precision["micro"], recall["micro"], _ = precision_recall_curve( - df1.values.ravel(), df2.values.ravel(), pos_label=pos_label) - ap['micro'] = average_precision_score( - df1.values, df2.values, average='micro', - pos_label=pos_label or 1) - - data = [] - for key in precision.keys(): - trace = go.Scatter( - x=recall[key], - y=precision[key], - mode='lines', - name='%s (area = %.2f)' % (key, ap[key]) if key == 'micro' - else 'column %s (area = %.2f)' % (key, ap[key]) - ) - data.append(trace) - - layout = go.Layout( - title=title or "Precision-Recall curve", - xaxis=dict(title='Recall'), - yaxis=dict(title='Precision') - ) - - fig = go.Figure(data=data, layout=layout) - - elif plot_type == 'roc_curve': - df1 = pd.read_csv(infile1, sep='\t', header=None) - df2 = pd.read_csv(infile2, sep='\t', header=None) - fpr = {} - tpr = {} - roc_auc = {} - - pos_label = params['plotting_selection']['pos_label'].strip() \ - or None - for col in df1.columns: - y_true = df1[col].values - y_score = df2[col].values - - fpr[col], tpr[col], _ = roc_curve( - y_true, y_score, pos_label=pos_label) - roc_auc[col] = auc(fpr[col], tpr[col]) - - if len(df1.columns) > 1: - fpr["micro"], tpr["micro"], _ = roc_curve( - df1.values.ravel(), df2.values.ravel(), pos_label=pos_label) - roc_auc['micro'] = auc(fpr["micro"], tpr["micro"]) + if plot_type == 'pr_curve': + if plot_format == 'plotly_html': + visualize_pr_curve_plotly(df1, df2, pos_label, title=title) + else: + visualize_pr_curve_matplotlib(df1, df2, pos_label, title) + else: # 'roc_curve' + drop_intermediate = (params['plotting_selection'] + ['drop_intermediate']) + if plot_format == 'plotly_html': + visualize_roc_curve_plotly(df1, df2, pos_label, + drop_intermediate=drop_intermediate, + title=title) + else: + visualize_roc_curve_matplotlib( + df1, df2, pos_label, + drop_intermediate=drop_intermediate, + title=title) - data = [] - for key in fpr.keys(): - trace = go.Scatter( - x=fpr[key], - y=tpr[key], - mode='lines', - name='%s (area = %.2f)' % (key, roc_auc[key]) if key == 'micro' - else 'column %s (area = %.2f)' % (key, roc_auc[key]) - ) - data.append(trace) - - trace = go.Scatter(x=[0, 1], y=[0, 1], - mode='lines', - line=dict(color='black', dash='dash'), - showlegend=False) - data.append(trace) - - layout = go.Layout( - title=title or "Receiver operating characteristic curve", - xaxis=dict(title='False Positive Rate'), - yaxis=dict(title='True Positive Rate') - ) - - fig = go.Figure(data=data, layout=layout) + return 0 elif plot_type == 'rfecv_gridscores': input_df = pd.read_csv(infile1, sep='\t', header='infer') @@ -231,10 +429,43 @@ layout = go.Layout( xaxis=dict(title="Number of features selected"), yaxis=dict(title="Cross validation score"), - title=title or None + title=dict( + text=title or None, + x=0.5, + y=0.92, + xanchor='center', + yanchor='top' + ), + font=dict( + family="sans-serif", + size=11 + ), + # control backgroud colors + plot_bgcolor='rgba(255,255,255,0)' ) + """ + # legend=dict( + # x=0.95, + # y=0, + # traceorder="normal", + # font=dict( + # family="sans-serif", + # size=9, + # color="black" + # ), + # bgcolor="LightSteelBlue", + # bordercolor="Black", + # borderwidth=2 + # ), + """ fig = go.Figure(data=[data], layout=layout) + plotly.offline.plot(fig, filename="output.html", + auto_open=False) + # to be discovered by `from_work_dir` + os.rename('output.html', 'output') + + return 0 elif plot_type == 'learning_curve': input_df = pd.read_csv(infile1, sep='\t', header='infer') @@ -264,23 +495,57 @@ yaxis=dict( title='Performance Score' ), - title=title or 'Learning Curve' + # modify these configurations to customize image + title=dict( + text=title or 'Learning Curve', + x=0.5, + y=0.92, + xanchor='center', + yanchor='top' + ), + font=dict( + family="sans-serif", + size=11 + ), + # control backgroud colors + plot_bgcolor='rgba(255,255,255,0)' ) + """ + # legend=dict( + # x=0.95, + # y=0, + # traceorder="normal", + # font=dict( + # family="sans-serif", + # size=9, + # color="black" + # ), + # bgcolor="LightSteelBlue", + # bordercolor="Black", + # borderwidth=2 + # ), + """ + fig = go.Figure(data=[data1, data2], layout=layout) + plotly.offline.plot(fig, filename="output.html", + auto_open=False) + # to be discovered by `from_work_dir` + os.rename('output.html', 'output') + + return 0 elif plot_type == 'keras_plot_model': with open(model_config, 'r') as f: model_str = f.read() model = model_from_json(model_str) plot_model(model, to_file="output.png") - __import__('os').rename('output.png', 'output') + os.rename('output.png', 'output') return 0 - plotly.offline.plot(fig, filename="output.html", - auto_open=False) - # to be discovered by `from_work_dir` - __import__('os').rename('output.html', 'output') + # save pdf file to disk + # fig.write_image("image.pdf", format='pdf') + # fig.write_image("image.pdf", format='pdf', width=340*2, height=226*2) if __name__ == '__main__': diff -r 18b39ada6f35 -r 64b771b1471a model_prediction.py --- a/model_prediction.py Thu Nov 07 05:25:28 2019 -0500 +++ b/model_prediction.py Mon Dec 16 05:17:00 2019 -0500 @@ -2,13 +2,11 @@ import json import numpy as np import pandas as pd -import tabix import warnings from scipy.io import mmread from sklearn.pipeline import Pipeline -from galaxy_ml.externals.selene_sdk.sequences import Genome from galaxy_ml.utils import (load_model, read_columns, get_module, try_get_attr) @@ -138,45 +136,10 @@ pred_data_generator = klass( ref_genome_path=ref_seq, vcf_path=vcf_path, **options) - pred_data_generator.fit() + pred_data_generator.set_processing_attrs() variants = pred_data_generator.variants - # TODO : remove the following block after galaxy-ml v0.7.13 - blacklist_tabix = getattr(pred_data_generator.reference_genome_, - '_blacklist_tabix', None) - clean_variants = [] - if blacklist_tabix: - start_radius = pred_data_generator.start_radius_ - end_radius = pred_data_generator.end_radius_ - for chrom, pos, name, ref, alt, strand in variants: - center = pos + len(ref) // 2 - start = center - start_radius - end = center + end_radius - - if isinstance(pred_data_generator.reference_genome_, Genome): - if "chr" not in chrom: - chrom = "chr" + chrom - if "MT" in chrom: - chrom = chrom[:-1] - try: - rows = blacklist_tabix.query(chrom, start, end) - found = 0 - for row in rows: - found = 1 - break - if found: - continue - except tabix.TabixError: - pass - - clean_variants.append((chrom, pos, name, ref, alt, strand)) - else: - clean_variants = variants - - setattr(pred_data_generator, 'variants', clean_variants) - - variants = np.array(clean_variants) # predict 1600 sample at once then write to file gen_flow = pred_data_generator.flow(batch_size=1600) diff -r 18b39ada6f35 -r 64b771b1471a search_model_validation.py --- a/search_model_validation.py Thu Nov 07 05:25:28 2019 -0500 +++ b/search_model_validation.py Mon Dec 16 05:17:00 2019 -0500 @@ -4,41 +4,35 @@ import joblib import json import numpy as np +import os import pandas as pd import pickle import skrebate -import sklearn import sys -import xgboost import warnings -from imblearn import under_sampling, over_sampling, combine from scipy.io import mmread -from mlxtend import classifier, regressor -from sklearn.base import clone -from sklearn import (cluster, compose, decomposition, ensemble, - feature_extraction, feature_selection, - gaussian_process, kernel_approximation, metrics, - model_selection, naive_bayes, neighbors, - pipeline, preprocessing, svm, linear_model, - tree, discriminant_analysis) +from sklearn import (cluster, decomposition, feature_selection, + kernel_approximation, model_selection, preprocessing) from sklearn.exceptions import FitFailedWarning from sklearn.model_selection._validation import _score, cross_validate from sklearn.model_selection import _search, _validation +from sklearn.pipeline import Pipeline from galaxy_ml.utils import (SafeEval, get_cv, get_scoring, load_model, - read_columns, try_get_attr, get_module) + read_columns, try_get_attr, get_module, + clean_params, get_main_estimator) _fit_and_score = try_get_attr('galaxy_ml.model_validations', '_fit_and_score') setattr(_search, '_fit_and_score', _fit_and_score) setattr(_validation, '_fit_and_score', _fit_and_score) -N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) -CACHE_DIR = './cached' +N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1)) +# handle disk cache +CACHE_DIR = os.path.join(os.getcwd(), 'cached') +del os NON_SEARCHABLE = ('n_jobs', 'pre_dispatch', 'memory', '_path', 'nthread', 'callbacks') -ALLOWED_CALLBACKS = ('EarlyStopping', 'TerminateOnNaN', 'ReduceLROnPlateau', - 'CSVLogger', 'None') def _eval_search_params(params_builder): @@ -164,74 +158,40 @@ return search_params -def main(inputs, infile_estimator, infile1, infile2, - outfile_result, outfile_object=None, - outfile_weights=None, groups=None, - ref_seq=None, intervals=None, targets=None, - fasta_path=None): - """ - Parameter - --------- - inputs : str - File path to galaxy tool parameter +def _handle_X_y(estimator, params, infile1, infile2, loaded_df={}, + ref_seq=None, intervals=None, targets=None, + fasta_path=None): + """read inputs - infile_estimator : str - File path to estimator - + Params + ------- + estimator : estimator object + params : dict + Galaxy tool parameter inputs infile1 : str File path to dataset containing features - infile2 : str File path to dataset containing target values - - outfile_result : str - File path to save the results, either cv_results or test result - - outfile_object : str, optional - File path to save searchCV object - - outfile_weights : str, optional - File path to save model weights - - groups : str - File path to dataset containing groups labels - + loaded_df : dict + Contains loaded DataFrame objects with file path as keys ref_seq : str File path to dataset containing genome sequence file - - intervals : str + interval : str File path to dataset containing interval file - targets : str File path to dataset compressed target bed file - fasta_path : str File path to dataset containing fasta file - """ - warnings.simplefilter('ignore') - with open(inputs, 'r') as param_handler: - params = json.load(param_handler) - - # conflict param checker - if params['outer_split']['split_mode'] == 'nested_cv' \ - and params['save'] != 'nope': - raise ValueError("Save best estimator is not possible for nested CV!") - if not (params['search_schemes']['options']['refit']) \ - and params['save'] != 'nope': - raise ValueError("Save best estimator is not possible when refit " - "is False!") - - params_builder = params['search_schemes']['search_params_builder'] - - with open(infile_estimator, 'rb') as estimator_handler: - estimator = load_model(estimator_handler) + Returns + ------- + estimator : estimator object after setting new attributes + X : numpy array + y : numpy array + """ estimator_params = estimator.get_params() - # store read dataframe object - loaded_df = {} - input_type = params['input_options']['selected_input'] # tabular input if input_type == 'tabular': @@ -245,6 +205,10 @@ c = None df_key = infile1 + repr(header) + + if df_key in loaded_df: + infile1 = loaded_df[df_key] + df = pd.read_csv(infile1, sep='\t', header=header, parse_dates=True) loaded_df[df_key] = df @@ -317,6 +281,196 @@ y = None # end y + return estimator, X, y + + +def _do_outer_cv(searcher, X, y, outer_cv, scoring, error_score='raise', + outfile=None): + """Do outer cross-validation for nested CV + + Parameters + ---------- + searcher : object + SearchCV object + X : numpy array + Containing features + y : numpy array + Target values or labels + outer_cv : int or CV splitter + Control the cv splitting + scoring : object + Scorer + error_score: str, float or numpy float + Whether to raise fit error or return an value + outfile : str + File path to store the restuls + """ + if error_score == 'raise': + rval = cross_validate( + searcher, X, y, scoring=scoring, + cv=outer_cv, n_jobs=N_JOBS, verbose=0, + error_score=error_score) + else: + warnings.simplefilter('always', FitFailedWarning) + with warnings.catch_warnings(record=True) as w: + try: + rval = cross_validate( + searcher, X, y, + scoring=scoring, + cv=outer_cv, n_jobs=N_JOBS, + verbose=0, + error_score=error_score) + except ValueError: + pass + for warning in w: + print(repr(warning.message)) + + keys = list(rval.keys()) + for k in keys: + if k.startswith('test'): + rval['mean_' + k] = np.mean(rval[k]) + rval['std_' + k] = np.std(rval[k]) + if k.endswith('time'): + rval.pop(k) + rval = pd.DataFrame(rval) + rval = rval[sorted(rval.columns)] + rval.to_csv(path_or_buf=outfile, sep='\t', header=True, index=False) + + +def _do_train_test_split_val(searcher, X, y, params, error_score='raise', + primary_scoring=None, groups=None, + outfile=None): + """ do train test split, searchCV validates on the train and then use + the best_estimator_ to evaluate on the test + + Returns + -------- + Fitted SearchCV object + """ + train_test_split = try_get_attr( + 'galaxy_ml.model_validations', 'train_test_split') + split_options = params['outer_split'] + + # splits + if split_options['shuffle'] == 'stratified': + split_options['labels'] = y + X, X_test, y, y_test = train_test_split(X, y, **split_options) + elif split_options['shuffle'] == 'group': + if groups is None: + raise ValueError("No group based CV option was choosen for " + "group shuffle!") + split_options['labels'] = groups + if y is None: + X, X_test, groups, _ =\ + train_test_split(X, groups, **split_options) + else: + X, X_test, y, y_test, groups, _ =\ + train_test_split(X, y, groups, **split_options) + else: + if split_options['shuffle'] == 'None': + split_options['shuffle'] = None + X, X_test, y, y_test =\ + train_test_split(X, y, **split_options) + + if error_score == 'raise': + searcher.fit(X, y, groups=groups) + else: + warnings.simplefilter('always', FitFailedWarning) + with warnings.catch_warnings(record=True) as w: + try: + searcher.fit(X, y, groups=groups) + except ValueError: + pass + for warning in w: + print(repr(warning.message)) + + scorer_ = searcher.scorer_ + if isinstance(scorer_, collections.Mapping): + is_multimetric = True + else: + is_multimetric = False + + best_estimator_ = getattr(searcher, 'best_estimator_') + + # TODO Solve deep learning models in pipeline + if best_estimator_.__class__.__name__ == 'KerasGBatchClassifier': + test_score = best_estimator_.evaluate( + X_test, scorer=scorer_, is_multimetric=is_multimetric) + else: + test_score = _score(best_estimator_, X_test, + y_test, scorer_, + is_multimetric=is_multimetric) + + if not is_multimetric: + test_score = {primary_scoring: test_score} + for key, value in test_score.items(): + test_score[key] = [value] + result_df = pd.DataFrame(test_score) + result_df.to_csv(path_or_buf=outfile, sep='\t', header=True, + index=False) + + return searcher + + +def main(inputs, infile_estimator, infile1, infile2, + outfile_result, outfile_object=None, + outfile_weights=None, groups=None, + ref_seq=None, intervals=None, targets=None, + fasta_path=None): + """ + Parameter + --------- + inputs : str + File path to galaxy tool parameter + + infile_estimator : str + File path to estimator + + infile1 : str + File path to dataset containing features + + infile2 : str + File path to dataset containing target values + + outfile_result : str + File path to save the results, either cv_results or test result + + outfile_object : str, optional + File path to save searchCV object + + outfile_weights : str, optional + File path to save model weights + + groups : str + File path to dataset containing groups labels + + ref_seq : str + File path to dataset containing genome sequence file + + intervals : str + File path to dataset containing interval file + + targets : str + File path to dataset compressed target bed file + + fasta_path : str + File path to dataset containing fasta file + """ + warnings.simplefilter('ignore') + + # store read dataframe object + loaded_df = {} + + with open(inputs, 'r') as param_handler: + params = json.load(param_handler) + + # Override the refit parameter + params['search_schemes']['options']['refit'] = True \ + if params['save'] != 'nope' else False + + with open(infile_estimator, 'rb') as estimator_handler: + estimator = load_model(estimator_handler) + optimizer = params['search_schemes']['selected_search_scheme'] optimizer = getattr(model_selection, optimizer) @@ -337,8 +491,10 @@ c = None df_key = groups + repr(header) - if df_key in loaded_df: - groups = loaded_df[df_key] + + groups = pd.read_csv(groups, sep='\t', header=header, + parse_dates=True) + loaded_df[df_key] = groups groups = read_columns( groups, @@ -352,7 +508,6 @@ splitter, groups = get_cv(options.pop('cv_selector')) options['cv'] = splitter - options['n_jobs'] = N_JOBS primary_scoring = options['scoring']['primary_scoring'] options['scoring'] = get_scoring(options['scoring']) if options['error_score']: @@ -364,55 +519,56 @@ if 'pre_dispatch' in options and options['pre_dispatch'] == '': options['pre_dispatch'] = None - # del loaded_df - del loaded_df + params_builder = params['search_schemes']['search_params_builder'] + param_grid = _eval_search_params(params_builder) + + estimator = clean_params(estimator) - # handle memory - memory = joblib.Memory(location=CACHE_DIR, verbose=0) + # save the SearchCV object without fit + if params['save'] == 'save_no_fit': + searcher = optimizer(estimator, param_grid, **options) + print(searcher) + with open(outfile_object, 'wb') as output_handler: + pickle.dump(searcher, output_handler, + pickle.HIGHEST_PROTOCOL) + return 0 + + # read inputs and loads new attributes, like paths + estimator, X, y = _handle_X_y(estimator, params, infile1, infile2, + loaded_df=loaded_df, ref_seq=ref_seq, + intervals=intervals, targets=targets, + fasta_path=fasta_path) + # cache iraps_core fits could increase search speed significantly - if estimator.__class__.__name__ == 'IRAPSClassifier': - estimator.set_params(memory=memory) - else: - # For iraps buried in pipeline - for p, v in estimator_params.items(): - if p.endswith('memory'): - # for case of `__irapsclassifier__memory` - if len(p) > 8 and p[:-8].endswith('irapsclassifier'): - # cache iraps_core fits could increase search - # speed significantly - new_params = {p: memory} - estimator.set_params(**new_params) - # security reason, we don't want memory being - # modified unexpectedly - elif v: - new_params = {p, None} - estimator.set_params(**new_params) - # For now, 1 CPU is suggested for iprasclassifier - elif p.endswith('n_jobs'): - new_params = {p: 1} - estimator.set_params(**new_params) - # for security reason, types of callbacks are limited - elif p.endswith('callbacks'): - for cb in v: - cb_type = cb['callback_selection']['callback_type'] - if cb_type not in ALLOWED_CALLBACKS: - raise ValueError( - "Prohibited callback type: %s!" % cb_type) + memory = joblib.Memory(location=CACHE_DIR, verbose=0) + main_est = get_main_estimator(estimator) + if main_est.__class__.__name__ == 'IRAPSClassifier': + main_est.set_params(memory=memory) - param_grid = _eval_search_params(params_builder) searcher = optimizer(estimator, param_grid, **options) - # do nested split split_mode = params['outer_split'].pop('split_mode') - # nested CV, outer cv using cross_validate + if split_mode == 'nested_cv': + # make sure refit is choosen + # this could be True for sklearn models, but not the case for + # deep learning models + if not options['refit'] and \ + not all(hasattr(estimator, attr) + for attr in ('config', 'model_type')): + warnings.warn("Refit is change to `True` for nested validation!") + setattr(searcher, 'refit', True) + outer_cv, _ = get_cv(params['outer_split']['cv_selector']) - + # nested CV, outer cv using cross_validate if options['error_score'] == 'raise': rval = cross_validate( searcher, X, y, scoring=options['scoring'], - cv=outer_cv, n_jobs=N_JOBS, verbose=0, - error_score=options['error_score']) + cv=outer_cv, n_jobs=N_JOBS, + verbose=options['verbose'], + return_estimator=(params['save'] == 'save_estimator'), + error_score=options['error_score'], + return_train_score=True) else: warnings.simplefilter('always', FitFailedWarning) with warnings.catch_warnings(record=True) as w: @@ -421,13 +577,38 @@ searcher, X, y, scoring=options['scoring'], cv=outer_cv, n_jobs=N_JOBS, - verbose=0, - error_score=options['error_score']) + verbose=options['verbose'], + return_estimator=(params['save'] == 'save_estimator'), + error_score=options['error_score'], + return_train_score=True) except ValueError: pass for warning in w: print(repr(warning.message)) + fitted_searchers = rval.pop('estimator', []) + if fitted_searchers: + import os + pwd = os.getcwd() + save_dir = os.path.join(pwd, 'cv_results_in_folds') + try: + os.mkdir(save_dir) + for idx, obj in enumerate(fitted_searchers): + target_name = 'cv_results_' + '_' + 'split%d' % idx + target_path = os.path.join(pwd, save_dir, target_name) + cv_results_ = getattr(obj, 'cv_results_', None) + if not cv_results_: + print("%s is not available" % target_name) + continue + cv_results_ = pd.DataFrame(cv_results_) + cv_results_ = cv_results_[sorted(cv_results_.columns)] + cv_results_.to_csv(target_path, sep='\t', header=True, + index=False) + except Exception as e: + print(e) + finally: + del os + keys = list(rval.keys()) for k in keys: if k.startswith('test'): @@ -437,46 +618,22 @@ rval.pop(k) rval = pd.DataFrame(rval) rval = rval[sorted(rval.columns)] - rval.to_csv(path_or_buf=outfile_result, sep='\t', - header=True, index=False) - else: - if split_mode == 'train_test_split': - train_test_split = try_get_attr( - 'galaxy_ml.model_validations', 'train_test_split') - # make sure refit is choosen - # this could be True for sklearn models, but not the case for - # deep learning models - if not options['refit'] and \ - not all(hasattr(estimator, attr) - for attr in ('config', 'model_type')): - warnings.warn("Refit is change to `True` for nested " - "validation!") - setattr(searcher, 'refit', True) - split_options = params['outer_split'] + rval.to_csv(path_or_buf=outfile_result, sep='\t', header=True, + index=False) + + return 0 - # splits - if split_options['shuffle'] == 'stratified': - split_options['labels'] = y - X, X_test, y, y_test = train_test_split(X, y, **split_options) - elif split_options['shuffle'] == 'group': - if groups is None: - raise ValueError("No group based CV option was " - "choosen for group shuffle!") - split_options['labels'] = groups - if y is None: - X, X_test, groups, _ =\ - train_test_split(X, groups, **split_options) - else: - X, X_test, y, y_test, groups, _ =\ - train_test_split(X, y, groups, **split_options) - else: - if split_options['shuffle'] == 'None': - split_options['shuffle'] = None - X, X_test, y, y_test =\ - train_test_split(X, y, **split_options) - # end train_test_split + # deprecate train test split mode + """searcher = _do_train_test_split_val( + searcher, X, y, params, + primary_scoring=primary_scoring, + error_score=options['error_score'], + groups=groups, + outfile=outfile_result)""" - # shared by both train_test_split and non-split + # no outer split + else: + searcher.set_params(n_jobs=N_JOBS) if options['error_score'] == 'raise': searcher.fit(X, y, groups=groups) else: @@ -489,47 +646,14 @@ for warning in w: print(repr(warning.message)) - # no outer split - if split_mode == 'no': - # save results - cv_results = pd.DataFrame(searcher.cv_results_) - cv_results = cv_results[sorted(cv_results.columns)] - cv_results.to_csv(path_or_buf=outfile_result, sep='\t', - header=True, index=False) - - # train_test_split, output test result using best_estimator_ - # or rebuild the trained estimator using weights if applicable. - else: - scorer_ = searcher.scorer_ - if isinstance(scorer_, collections.Mapping): - is_multimetric = True - else: - is_multimetric = False - - best_estimator_ = getattr(searcher, 'best_estimator_', None) - if not best_estimator_: - raise ValueError("GridSearchCV object has no " - "`best_estimator_` when `refit`=False!") - - if best_estimator_.__class__.__name__ == 'KerasGBatchClassifier' \ - and hasattr(estimator.data_batch_generator, 'target_path'): - test_score = best_estimator_.evaluate( - X_test, scorer=scorer_, is_multimetric=is_multimetric) - else: - test_score = _score(best_estimator_, X_test, - y_test, scorer_, - is_multimetric=is_multimetric) - - if not is_multimetric: - test_score = {primary_scoring: test_score} - for key, value in test_score.items(): - test_score[key] = [value] - result_df = pd.DataFrame(test_score) - result_df.to_csv(path_or_buf=outfile_result, sep='\t', - header=True, index=False) + cv_results = pd.DataFrame(searcher.cv_results_) + cv_results = cv_results[sorted(cv_results.columns)] + cv_results.to_csv(path_or_buf=outfile_result, sep='\t', + header=True, index=False) memory.clear(warn=False) + # output best estimator, and weights if applicable if outfile_object: best_estimator_ = getattr(searcher, 'best_estimator_', None) if not best_estimator_: @@ -538,9 +662,10 @@ "nested gridsearch or `refit` is False!") return - main_est = best_estimator_ - if isinstance(best_estimator_, pipeline.Pipeline): - main_est = best_estimator_.steps[-1][-1] + # clean prams + best_estimator_ = clean_params(best_estimator_) + + main_est = get_main_estimator(best_estimator_) if hasattr(main_est, 'model_') \ and hasattr(main_est, 'save_weights'): @@ -554,6 +679,7 @@ del main_est.data_generator_ with open(outfile_object, 'wb') as output_handler: + print("Best estimator is saved: %s " % repr(best_estimator_)) pickle.dump(best_estimator_, output_handler, pickle.HIGHEST_PROTOCOL) diff -r 18b39ada6f35 -r 64b771b1471a test-data/RandomForestClassifier.zip Binary file test-data/RandomForestClassifier.zip has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/StackingCVRegressor01.zip Binary file test-data/StackingCVRegressor01.zip has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/StackingRegressor02.zip Binary file test-data/StackingRegressor02.zip has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/StackingVoting03.zip Binary file test-data/StackingVoting03.zip has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/abc_model01 Binary file test-data/abc_model01 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/abr_model01 Binary file test-data/abr_model01 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/best_estimator_.zip Binary file test-data/best_estimator_.zip has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/brier_score_loss.txt --- a/test-data/brier_score_loss.txt Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/brier_score_loss.txt Mon Dec 16 05:17:00 2019 -0500 @@ -1,2 +1,2 @@ brier_score_loss : -0.5641025641025641 +0.24051282051282052 diff -r 18b39ada6f35 -r 64b771b1471a test-data/classification_report.txt --- a/test-data/classification_report.txt Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/classification_report.txt Mon Dec 16 05:17:00 2019 -0500 @@ -5,7 +5,7 @@ 1 1.00 0.62 0.77 16 2 0.60 1.00 0.75 9 - micro avg 0.85 0.85 0.85 39 + accuracy 0.85 39 macro avg 0.87 0.88 0.84 39 weighted avg 0.91 0.85 0.85 39 diff -r 18b39ada6f35 -r 64b771b1471a test-data/gbc_model01 Binary file test-data/gbc_model01 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/gbr_model01 Binary file test-data/gbr_model01 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/get_params05.tabular --- a/test-data/get_params05.tabular Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/get_params05.tabular Mon Dec 16 05:17:00 2019 -0500 @@ -1,31 +1,18 @@ Parameter Value -* memory memory: None -* steps "steps: [('randomforestregressor', RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None, - max_features='auto', max_leaf_nodes=None, - min_impurity_decrease=0.0, min_impurity_split=None, - min_samples_leaf=1, min_samples_split=2, - min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1, - oob_score=False, random_state=42, verbose=0, warm_start=False))]" -@ randomforestregressor "randomforestregressor: RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None, - max_features='auto', max_leaf_nodes=None, - min_impurity_decrease=0.0, min_impurity_split=None, - min_samples_leaf=1, min_samples_split=2, - min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1, - oob_score=False, random_state=42, verbose=0, warm_start=False)" -@ randomforestregressor__bootstrap randomforestregressor__bootstrap: True -@ randomforestregressor__criterion randomforestregressor__criterion: 'mse' -@ randomforestregressor__max_depth randomforestregressor__max_depth: None -@ randomforestregressor__max_features randomforestregressor__max_features: 'auto' -@ randomforestregressor__max_leaf_nodes randomforestregressor__max_leaf_nodes: None -@ randomforestregressor__min_impurity_decrease randomforestregressor__min_impurity_decrease: 0.0 -@ randomforestregressor__min_impurity_split randomforestregressor__min_impurity_split: None -@ randomforestregressor__min_samples_leaf randomforestregressor__min_samples_leaf: 1 -@ randomforestregressor__min_samples_split randomforestregressor__min_samples_split: 2 -@ randomforestregressor__min_weight_fraction_leaf randomforestregressor__min_weight_fraction_leaf: 0.0 -@ randomforestregressor__n_estimators randomforestregressor__n_estimators: 100 -* randomforestregressor__n_jobs randomforestregressor__n_jobs: 1 -@ randomforestregressor__oob_score randomforestregressor__oob_score: False -@ randomforestregressor__random_state randomforestregressor__random_state: 42 -* randomforestregressor__verbose randomforestregressor__verbose: 0 -@ randomforestregressor__warm_start randomforestregressor__warm_start: False - Note: @, searchable params in searchcv too. +@ bootstrap bootstrap: True +@ criterion criterion: 'mse' +@ max_depth max_depth: None +@ max_features max_features: 'auto' +@ max_leaf_nodes max_leaf_nodes: None +@ min_impurity_decrease min_impurity_decrease: 0.0 +@ min_impurity_split min_impurity_split: None +@ min_samples_leaf min_samples_leaf: 1 +@ min_samples_split min_samples_split: 2 +@ min_weight_fraction_leaf min_weight_fraction_leaf: 0.0 +@ n_estimators n_estimators: 100 +* n_jobs n_jobs: 1 +@ oob_score oob_score: False +@ random_state random_state: 42 +* verbose verbose: 0 +@ warm_start warm_start: False + Note: @, params eligible for search in searchcv tool. diff -r 18b39ada6f35 -r 64b771b1471a test-data/get_params12.tabular --- a/test-data/get_params12.tabular Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/get_params12.tabular Mon Dec 16 05:17:00 2019 -0500 @@ -1,47 +1,32 @@ Parameter Value -* memory memory: None -* steps "steps: [('rfe', RFE(estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, - max_depth=3, min_child_weight=1, missing=nan, n_estimators=100, - n_jobs=1, nthread=None, objective='reg:linear', random_state=0, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, - silent=True, subsample=1), - n_features_to_select=None, step=1, verbose=0))]" -@ rfe "rfe: RFE(estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, - max_depth=3, min_child_weight=1, missing=nan, n_estimators=100, - n_jobs=1, nthread=None, objective='reg:linear', random_state=0, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, - silent=True, subsample=1), - n_features_to_select=None, step=1, verbose=0)" -@ rfe__estimator__base_score rfe__estimator__base_score: 0.5 -@ rfe__estimator__booster rfe__estimator__booster: 'gbtree' -@ rfe__estimator__colsample_bylevel rfe__estimator__colsample_bylevel: 1 -@ rfe__estimator__colsample_bytree rfe__estimator__colsample_bytree: 1 -@ rfe__estimator__gamma rfe__estimator__gamma: 0 -@ rfe__estimator__learning_rate rfe__estimator__learning_rate: 0.1 -@ rfe__estimator__max_delta_step rfe__estimator__max_delta_step: 0 -@ rfe__estimator__max_depth rfe__estimator__max_depth: 3 -@ rfe__estimator__min_child_weight rfe__estimator__min_child_weight: 1 -@ rfe__estimator__missing rfe__estimator__missing: nan -@ rfe__estimator__n_estimators rfe__estimator__n_estimators: 100 -* rfe__estimator__n_jobs rfe__estimator__n_jobs: 1 -* rfe__estimator__nthread rfe__estimator__nthread: None -@ rfe__estimator__objective rfe__estimator__objective: 'reg:linear' -@ rfe__estimator__random_state rfe__estimator__random_state: 0 -@ rfe__estimator__reg_alpha rfe__estimator__reg_alpha: 0 -@ rfe__estimator__reg_lambda rfe__estimator__reg_lambda: 1 -@ rfe__estimator__scale_pos_weight rfe__estimator__scale_pos_weight: 1 -@ rfe__estimator__seed rfe__estimator__seed: None -@ rfe__estimator__silent rfe__estimator__silent: True -@ rfe__estimator__subsample rfe__estimator__subsample: 1 -@ rfe__estimator "rfe__estimator: XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1, +@ estimator "estimator: XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3, min_child_weight=1, missing=nan, n_estimators=100, n_jobs=1, nthread=None, objective='reg:linear', random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, silent=True, subsample=1)" -@ rfe__n_features_to_select rfe__n_features_to_select: None -@ rfe__step rfe__step: 1 -* rfe__verbose rfe__verbose: 0 - Note: @, searchable params in searchcv too. +@ n_features_to_select n_features_to_select: None +* step step: 1 +* verbose verbose: 0 +@ estimator__base_score estimator__base_score: 0.5 +@ estimator__booster estimator__booster: 'gbtree' +@ estimator__colsample_bylevel estimator__colsample_bylevel: 1 +@ estimator__colsample_bytree estimator__colsample_bytree: 1 +@ estimator__gamma estimator__gamma: 0 +@ estimator__learning_rate estimator__learning_rate: 0.1 +@ estimator__max_delta_step estimator__max_delta_step: 0 +@ estimator__max_depth estimator__max_depth: 3 +@ estimator__min_child_weight estimator__min_child_weight: 1 +@ estimator__missing estimator__missing: nan +@ estimator__n_estimators estimator__n_estimators: 100 +* estimator__n_jobs estimator__n_jobs: 1 +* estimator__nthread estimator__nthread: None +@ estimator__objective estimator__objective: 'reg:linear' +@ estimator__random_state estimator__random_state: 0 +@ estimator__reg_alpha estimator__reg_alpha: 0 +@ estimator__reg_lambda estimator__reg_lambda: 1 +@ estimator__scale_pos_weight estimator__scale_pos_weight: 1 +@ estimator__seed estimator__seed: None +@ estimator__silent estimator__silent: True +@ estimator__subsample estimator__subsample: 1 + Note: @, params eligible for search in searchcv tool. diff -r 18b39ada6f35 -r 64b771b1471a test-data/glm_model01 Binary file test-data/glm_model01 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/glm_model02 Binary file test-data/glm_model02 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/glm_model03 Binary file test-data/glm_model03 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/glm_model04 Binary file test-data/glm_model04 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/glm_model05 Binary file test-data/glm_model05 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/glm_model06 Binary file test-data/glm_model06 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/glm_model07 Binary file test-data/glm_model07 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/glm_model08 Binary file test-data/glm_model08 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/glm_result01 --- a/test-data/glm_result01 Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/glm_result01 Mon Dec 16 05:17:00 2019 -0500 @@ -1,5 +1,5 @@ -86.97021227350001 1.00532111569 -1.01739601979 -0.613139481654 0.641846874331 3703215242836.872 -91.2021798817 -0.6215229712070001 1.11914889596 0.390012184498 1.28956938152 3875943636708.156 --47.4101632272 -0.638416457964 -0.7327774684530001 -0.8640261049779999 -1.06109770116 -2071574726112.0168 -61.712804630200004 -1.0999480057700002 -0.739679672932 0.585657963012 1.4890682753600002 2642119730255.405 --206.998295124 0.130238853011 0.70574123041 1.3320656526399999 -1.3322092373799999 -8851040854159.11 +86.97021227350001 1.00532111569 -1.01739601979 -0.613139481654 0.641846874331 20479602419382.055 +91.2021798817 -0.6215229712070001 1.11914889596 0.390012184498 1.28956938152 21460309408632.004 +-47.4101632272 -0.638416457964 -0.7327774684530001 -0.8640261049779999 -1.06109770116 -11245419999724.842 +61.712804630200004 -1.0999480057700002 -0.739679672932 0.585657963012 1.4890682753600002 14574106078789.26 +-206.998295124 0.130238853011 0.70574123041 1.3320656526399999 -1.3322092373799999 -48782519807586.32 diff -r 18b39ada6f35 -r 64b771b1471a test-data/glm_result02 --- a/test-data/glm_result02 Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/glm_result02 Mon Dec 16 05:17:00 2019 -0500 @@ -1,5 +1,5 @@ 3.68258022948 2.82110345641 -3.9901407239999998 -1.9523364774 1 0.015942057224 -0.7119585943469999 0.125502976978 -0.972218263337 0 -2.0869076882499997 0.929399321468 -2.1292408448400004 -1.9971402218799998 0 -1.4132105208399999 0.523750660422 -1.4210539291 -1.49298569451 0 +2.0869076882499997 0.929399321468 -2.1292408448400004 -1.9971402218799998 1 +1.4132105208399999 0.523750660422 -1.4210539291 -1.49298569451 1 0.7683140439399999 1.38267855169 -0.989045048734 0.649504257894 1 diff -r 18b39ada6f35 -r 64b771b1471a test-data/glm_result08 --- a/test-data/glm_result08 Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/glm_result08 Mon Dec 16 05:17:00 2019 -0500 @@ -1,4 +1,4 @@ -3.68258022948 2.82110345641 -3.9901407239999998 -1.9523364774 0 +3.68258022948 2.82110345641 -3.9901407239999998 -1.9523364774 1 0.015942057224 -0.7119585943469999 0.125502976978 -0.972218263337 0 2.0869076882499997 0.929399321468 -2.1292408448400004 -1.9971402218799998 0 1.4132105208399999 0.523750660422 -1.4210539291 -1.49298569451 0 diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras01.json --- a/test-data/keras01.json Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/keras01.json Mon Dec 16 05:17:00 2019 -0500 @@ -1,1 +1,90 @@ -{"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "batch_input_shape": [null, 784], "dtype": "float32", "units": 32, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "activation": "relu"}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 10, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_2", "trainable": true, "activation": "softmax"}}]}, "keras_version": "2.2.4", "backend": "tensorflow"} \ No newline at end of file +{ + "class_name": "Sequential", + "config": { + "name": "sequential_1", + "layers": [ + { + "class_name": "Dense", + "config": { + "name": "dense_1", + "trainable": true, + "batch_input_shape": [ + null, + 784 + ], + "dtype": "float32", + "units": 32, + "activation": "linear", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1.0, + "mode": "fan_avg", + "distribution": "uniform", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": null, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null + } + }, + { + "class_name": "Activation", + "config": { + "name": "activation_1", + "trainable": true, + "dtype": "float32", + "activation": "relu" + } + }, + { + "class_name": "Dense", + "config": { + "name": "dense_2", + "trainable": true, + "dtype": "float32", + "units": 10, + "activation": "linear", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1.0, + "mode": "fan_avg", + "distribution": "uniform", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": null, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null + } + }, + { + "class_name": "Activation", + "config": { + "name": "activation_2", + "trainable": true, + "dtype": "float32", + "activation": "softmax" + } + } + ] + }, + "keras_version": "2.3.1", + "backend": "tensorflow" +} \ No newline at end of file diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras02.json --- a/test-data/keras02.json Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/keras02.json Mon Dec 16 05:17:00 2019 -0500 @@ -1,1 +1,385 @@ -{"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "main_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 100], "dtype": "int32", "sparse": false, "name": "main_input"}, "inbound_nodes": []}, {"name": "embedding_1", "class_name": "Embedding", "config": {"name": "embedding_1", "trainable": true, "batch_input_shape": [null, 100], "dtype": "float32", "input_dim": 10000, "output_dim": 512, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 100}, "inbound_nodes": [[["main_input", 0, 0, {}]]]}, {"name": "lstm_1", "class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 32, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}, "inbound_nodes": [[["embedding_1", 0, 0, {}]]]}, {"name": "dense_1", "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["lstm_1", 0, 0, {}]]]}, {"name": "aux_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 5], "dtype": "float32", "sparse": false, "name": "aux_input"}, "inbound_nodes": []}, {"name": "concatenate_1", "class_name": "Concatenate", "config": {"name": "concatenate_1", "trainable": true, "axis": -1}, "inbound_nodes": [[["dense_1", 0, 0, {}], ["aux_input", 0, 0, {}]]]}, {"name": "dense_2", "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["concatenate_1", 0, 0, {}]]]}, {"name": "dense_3", "class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_2", 0, 0, {}]]]}, {"name": "dense_4", "class_name": "Dense", "config": {"name": "dense_4", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_3", 0, 0, {}]]]}, {"name": "dense_5", "class_name": "Dense", "config": {"name": "dense_5", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_4", 0, 0, {}]]]}], "input_layers": [["main_input", 0, 0], ["aux_input", 0, 0]], "output_layers": [["dense_1", 0, 0], ["dense_5", 0, 0]]}, "keras_version": "2.2.4", "backend": "tensorflow"} \ No newline at end of file +{ + "class_name": "Model", + "config": { + "name": "model_1", + "layers": [ + { + "name": "main_input", + "class_name": "InputLayer", + "config": { + "batch_input_shape": [ + null, + 100 + ], + "dtype": "int32", + "sparse": false, + "name": "main_input" + }, + "inbound_nodes": [] + }, + { + "name": "embedding_1", + "class_name": "Embedding", + "config": { + "name": "embedding_1", + "trainable": true, + "batch_input_shape": [ + null, + 100 + ], + "dtype": "float32", + "input_dim": 10000, + "output_dim": 512, + "embeddings_initializer": { + "class_name": "RandomUniform", + "config": { + "minval": -0.05, + "maxval": 0.05, + "seed": null + } + }, + "embeddings_regularizer": null, + "activity_regularizer": null, + "embeddings_constraint": null, + "mask_zero": false, + "input_length": 100 + }, + "inbound_nodes": [ + [ + [ + "main_input", + 0, + 0, + {} + ] + ] + ] + }, + { + "name": "lstm_1", + "class_name": "LSTM", + "config": { + "name": "lstm_1", + "trainable": true, + "dtype": "float32", + "return_sequences": false, + "return_state": false, + "go_backwards": false, + "stateful": false, + "unroll": false, + "units": 32, + "activation": "tanh", + "recurrent_activation": "sigmoid", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1.0, + "mode": "fan_avg", + "distribution": "uniform", + "seed": null + } + }, + "recurrent_initializer": { + "class_name": "Orthogonal", + "config": { + "gain": 1.0, + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "unit_forget_bias": true, + "kernel_regularizer": null, + "recurrent_regularizer": null, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "recurrent_constraint": null, + "bias_constraint": null, + "dropout": 0.0, + "recurrent_dropout": 0.0, + "implementation": 2 + }, + "inbound_nodes": [ + [ + [ + "embedding_1", + 0, + 0, + {} + ] + ] + ] + }, + { + "name": "dense_1", + "class_name": "Dense", + "config": { + "name": "dense_1", + "trainable": true, + "dtype": "float32", + "units": 1, + "activation": "sigmoid", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1.0, + "mode": "fan_avg", + "distribution": "uniform", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": null, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null + }, + "inbound_nodes": [ + [ + [ + "lstm_1", + 0, + 0, + {} + ] + ] + ] + }, + { + "name": "aux_input", + "class_name": "InputLayer", + "config": { + "batch_input_shape": [ + null, + 5 + ], + "dtype": "float32", + "sparse": false, + "name": "aux_input" + }, + "inbound_nodes": [] + }, + { + "name": "concatenate_1", + "class_name": "Concatenate", + "config": { + "name": "concatenate_1", + "trainable": true, + "dtype": "float32", + "axis": -1 + }, + "inbound_nodes": [ + [ + [ + "dense_1", + 0, + 0, + {} + ], + [ + "aux_input", + 0, + 0, + {} + ] + ] + ] + }, + { + "name": "dense_2", + "class_name": "Dense", + "config": { + "name": "dense_2", + "trainable": true, + "dtype": "float32", + "units": 64, + "activation": "relu", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1.0, + "mode": "fan_avg", + "distribution": "uniform", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": null, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null + }, + "inbound_nodes": [ + [ + [ + "concatenate_1", + 0, + 0, + {} + ] + ] + ] + }, + { + "name": "dense_3", + "class_name": "Dense", + "config": { + "name": "dense_3", + "trainable": true, + "dtype": "float32", + "units": 64, + "activation": "relu", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1.0, + "mode": "fan_avg", + "distribution": "uniform", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": null, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null + }, + "inbound_nodes": [ + [ + [ + "dense_2", + 0, + 0, + {} + ] + ] + ] + }, + { + "name": "dense_4", + "class_name": "Dense", + "config": { + "name": "dense_4", + "trainable": true, + "dtype": "float32", + "units": 64, + "activation": "relu", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1.0, + "mode": "fan_avg", + "distribution": "uniform", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": null, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null + }, + "inbound_nodes": [ + [ + [ + "dense_3", + 0, + 0, + {} + ] + ] + ] + }, + { + "name": "dense_5", + "class_name": "Dense", + "config": { + "name": "dense_5", + "trainable": true, + "dtype": "float32", + "units": 1, + "activation": "sigmoid", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1.0, + "mode": "fan_avg", + "distribution": "uniform", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": null, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null + }, + "inbound_nodes": [ + [ + [ + "dense_4", + 0, + 0, + {} + ] + ] + ] + } + ], + "input_layers": [ + [ + "main_input", + 0, + 0 + ], + [ + "aux_input", + 0, + 0 + ] + ], + "output_layers": [ + [ + "dense_1", + 0, + 0 + ], + [ + "dense_5", + 0, + 0 + ] + ] + }, + "keras_version": "2.3.1", + "backend": "tensorflow" +} \ No newline at end of file diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras04.json --- a/test-data/keras04.json Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/keras04.json Mon Dec 16 05:17:00 2019 -0500 @@ -1,1 +1,90 @@ -{"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "batch_input_shape": [null, 17], "dtype": "float32", "units": 32, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "activation": "linear"}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_2", "trainable": true, "activation": "linear"}}]}, "keras_version": "2.2.4", "backend": "tensorflow"} \ No newline at end of file +{ + "class_name": "Sequential", + "config": { + "name": "sequential_1", + "layers": [ + { + "class_name": "Dense", + "config": { + "name": "dense_1", + "trainable": true, + "batch_input_shape": [ + null, + 17 + ], + "dtype": "float32", + "units": 32, + "activation": "linear", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1.0, + "mode": "fan_avg", + "distribution": "uniform", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": null, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null + } + }, + { + "class_name": "Activation", + "config": { + "name": "activation_1", + "trainable": true, + "dtype": "float32", + "activation": "linear" + } + }, + { + "class_name": "Dense", + "config": { + "name": "dense_2", + "trainable": true, + "dtype": "float32", + "units": 1, + "activation": "linear", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1.0, + "mode": "fan_avg", + "distribution": "uniform", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": null, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null + } + }, + { + "class_name": "Activation", + "config": { + "name": "activation_2", + "trainable": true, + "dtype": "float32", + "activation": "linear" + } + } + ] + }, + "keras_version": "2.3.1", + "backend": "tensorflow" +} \ No newline at end of file diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_batch_model01 Binary file test-data/keras_batch_model01 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_batch_model02 Binary file test-data/keras_batch_model02 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_batch_model04 Binary file test-data/keras_batch_model04 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_batch_params01.tabular --- a/test-data/keras_batch_params01.tabular Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/keras_batch_params01.tabular Mon Dec 16 05:17:00 2019 -0500 @@ -6,15 +6,14 @@ @ callbacks callbacks: [{'callback_selection': {'callback_type': 'None'}}] @ class_positive_factor class_positive_factor: 1.0 @ config config: {'name': 'sequential_1', 'layers': [{'class_name': 'Dense', 'config': {'name': 'dense_1', 'trainable -@ data_batch_generator "data_batch_generator: FastaDNABatchGenerator(fasta_path='to_be_determined', seed=999, - seq_length=1000, shuffle=True)" +@ data_batch_generator "data_batch_generator: FastaDNABatchGenerator(fasta_path='to_be_determined', seed=999, seq_length=1000, + shuffle=True)" @ decay decay: 0.0 @ epochs epochs: 100 -@ epsilon epsilon: None @ layers_0_Dense layers_0_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_1', 'trainable': True, 'batch_input_shape': [None, -@ layers_1_Activation layers_1_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_1', 'trainable': True, 'activation': 're -@ layers_2_Dense layers_2_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_2', 'trainable': True, 'units': 10, 'activation': -@ layers_3_Activation layers_3_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_2', 'trainable': True, 'activation': 'so +@ layers_1_Activation layers_1_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_1', 'trainable': True, 'dtype': 'float32 +@ layers_2_Dense layers_2_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_2', 'trainable': True, 'dtype': 'float32', 'units' +@ layers_3_Activation layers_3_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_2', 'trainable': True, 'dtype': 'float32 @ loss loss: 'binary_crossentropy' @ lr lr: 0.01 @ metrics metrics: ['acc'] @@ -60,12 +59,13 @@ @ layers_0_Dense__config__units layers_0_Dense__config__units: 32 @ layers_0_Dense__config__use_bias layers_0_Dense__config__use_bias: True * layers_1_Activation__class_name layers_1_Activation__class_name: 'Activation' -@ layers_1_Activation__config layers_1_Activation__config: {'name': 'activation_1', 'trainable': True, 'activation': 'relu'} +@ layers_1_Activation__config layers_1_Activation__config: {'name': 'activation_1', 'trainable': True, 'dtype': 'float32', 'activation': 'relu'} @ layers_1_Activation__config__activation layers_1_Activation__config__activation: 'relu' +@ layers_1_Activation__config__dtype layers_1_Activation__config__dtype: 'float32' * layers_1_Activation__config__name layers_1_Activation__config__name: 'activation_1' @ layers_1_Activation__config__trainable layers_1_Activation__config__trainable: True * layers_2_Dense__class_name layers_2_Dense__class_name: 'Dense' -@ layers_2_Dense__config layers_2_Dense__config: {'name': 'dense_2', 'trainable': True, 'units': 10, 'activation': 'linear', 'use_bias': True, 'kerne +@ layers_2_Dense__config layers_2_Dense__config: {'name': 'dense_2', 'trainable': True, 'dtype': 'float32', 'units': 10, 'activation': 'linear', 'use @ layers_2_Dense__config__activation layers_2_Dense__config__activation: 'linear' @ layers_2_Dense__config__activity_regularizer layers_2_Dense__config__activity_regularizer: None @ layers_2_Dense__config__bias_constraint layers_2_Dense__config__bias_constraint: None @@ -73,6 +73,7 @@ * layers_2_Dense__config__bias_initializer__class_name layers_2_Dense__config__bias_initializer__class_name: 'Zeros' @ layers_2_Dense__config__bias_initializer__config layers_2_Dense__config__bias_initializer__config: {} @ layers_2_Dense__config__bias_regularizer layers_2_Dense__config__bias_regularizer: None +@ layers_2_Dense__config__dtype layers_2_Dense__config__dtype: 'float32' @ layers_2_Dense__config__kernel_constraint layers_2_Dense__config__kernel_constraint: None @ layers_2_Dense__config__kernel_initializer layers_2_Dense__config__kernel_initializer: {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'unifo * layers_2_Dense__config__kernel_initializer__class_name layers_2_Dense__config__kernel_initializer__class_name: 'VarianceScaling' @@ -87,8 +88,9 @@ @ layers_2_Dense__config__units layers_2_Dense__config__units: 10 @ layers_2_Dense__config__use_bias layers_2_Dense__config__use_bias: True * layers_3_Activation__class_name layers_3_Activation__class_name: 'Activation' -@ layers_3_Activation__config layers_3_Activation__config: {'name': 'activation_2', 'trainable': True, 'activation': 'softmax'} +@ layers_3_Activation__config layers_3_Activation__config: {'name': 'activation_2', 'trainable': True, 'dtype': 'float32', 'activation': 'softmax'} @ layers_3_Activation__config__activation layers_3_Activation__config__activation: 'softmax' +@ layers_3_Activation__config__dtype layers_3_Activation__config__dtype: 'float32' * layers_3_Activation__config__name layers_3_Activation__config__name: 'activation_2' @ layers_3_Activation__config__trainable layers_3_Activation__config__trainable: True Note: @, params eligible for search in searchcv tool. diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_batch_params04.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/keras_batch_params04.tabular Mon Dec 16 05:17:00 2019 -0500 @@ -0,0 +1,91 @@ + Parameter Value +@ amsgrad amsgrad: None +@ batch_size batch_size: 32 +@ beta_1 beta_1: None +@ beta_2 beta_2: None +@ callbacks callbacks: [{'callback_selection': {'callback_type': 'None'}}] +@ class_positive_factor class_positive_factor: 1.0 +@ config config: {'name': 'sequential_1', 'layers': [{'class_name': 'Dense', 'config': {'name': 'dense_1', 'trainable +@ data_batch_generator data_batch_generator: None +@ decay decay: 0.0 +@ epochs epochs: 100 +@ layers_0_Dense layers_0_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_1', 'trainable': True, 'batch_input_shape': [None, +@ layers_1_Activation layers_1_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_1', 'trainable': True, 'dtype': 'float32 +@ layers_2_Dense layers_2_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_2', 'trainable': True, 'dtype': 'float32', 'units' +@ layers_3_Activation layers_3_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_2', 'trainable': True, 'dtype': 'float32 +@ loss loss: 'binary_crossentropy' +@ lr lr: 0.01 +@ metrics metrics: ['acc'] +@ model_type model_type: 'sequential' +@ momentum momentum: 0.0 +* n_jobs n_jobs: 1 +@ nesterov nesterov: False +@ optimizer optimizer: 'sgd' +@ prediction_steps prediction_steps: None +@ rho rho: None +@ schedule_decay schedule_decay: None +@ seed seed: None +@ steps_per_epoch steps_per_epoch: None +@ validation_data validation_data: None +@ validation_steps validation_steps: None +@ verbose verbose: 0 +* layers_0_Dense__class_name layers_0_Dense__class_name: 'Dense' +@ layers_0_Dense__config layers_0_Dense__config: {'name': 'dense_1', 'trainable': True, 'batch_input_shape': [None, 784], 'dtype': 'float32', 'units' +@ layers_0_Dense__config__activation layers_0_Dense__config__activation: 'linear' +@ layers_0_Dense__config__activity_regularizer layers_0_Dense__config__activity_regularizer: None +@ layers_0_Dense__config__batch_input_shape layers_0_Dense__config__batch_input_shape: [None, 784] +@ layers_0_Dense__config__bias_constraint layers_0_Dense__config__bias_constraint: None +@ layers_0_Dense__config__bias_initializer layers_0_Dense__config__bias_initializer: {'class_name': 'Zeros', 'config': {}} +* layers_0_Dense__config__bias_initializer__class_name layers_0_Dense__config__bias_initializer__class_name: 'Zeros' +@ layers_0_Dense__config__bias_initializer__config layers_0_Dense__config__bias_initializer__config: {} +@ layers_0_Dense__config__bias_regularizer layers_0_Dense__config__bias_regularizer: None +@ layers_0_Dense__config__dtype layers_0_Dense__config__dtype: 'float32' +@ layers_0_Dense__config__kernel_constraint layers_0_Dense__config__kernel_constraint: None +@ layers_0_Dense__config__kernel_initializer layers_0_Dense__config__kernel_initializer: {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'unifo +* layers_0_Dense__config__kernel_initializer__class_name layers_0_Dense__config__kernel_initializer__class_name: 'VarianceScaling' +@ layers_0_Dense__config__kernel_initializer__config layers_0_Dense__config__kernel_initializer__config: {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'uniform', 'seed': None} +@ layers_0_Dense__config__kernel_initializer__config__distribution layers_0_Dense__config__kernel_initializer__config__distribution: 'uniform' +@ layers_0_Dense__config__kernel_initializer__config__mode layers_0_Dense__config__kernel_initializer__config__mode: 'fan_avg' +@ layers_0_Dense__config__kernel_initializer__config__scale layers_0_Dense__config__kernel_initializer__config__scale: 1.0 +@ layers_0_Dense__config__kernel_initializer__config__seed layers_0_Dense__config__kernel_initializer__config__seed: None +@ layers_0_Dense__config__kernel_regularizer layers_0_Dense__config__kernel_regularizer: None +* layers_0_Dense__config__name layers_0_Dense__config__name: 'dense_1' +@ layers_0_Dense__config__trainable layers_0_Dense__config__trainable: True +@ layers_0_Dense__config__units layers_0_Dense__config__units: 32 +@ layers_0_Dense__config__use_bias layers_0_Dense__config__use_bias: True +* layers_1_Activation__class_name layers_1_Activation__class_name: 'Activation' +@ layers_1_Activation__config layers_1_Activation__config: {'name': 'activation_1', 'trainable': True, 'dtype': 'float32', 'activation': 'relu'} +@ layers_1_Activation__config__activation layers_1_Activation__config__activation: 'relu' +@ layers_1_Activation__config__dtype layers_1_Activation__config__dtype: 'float32' +* layers_1_Activation__config__name layers_1_Activation__config__name: 'activation_1' +@ layers_1_Activation__config__trainable layers_1_Activation__config__trainable: True +* layers_2_Dense__class_name layers_2_Dense__class_name: 'Dense' +@ layers_2_Dense__config layers_2_Dense__config: {'name': 'dense_2', 'trainable': True, 'dtype': 'float32', 'units': 10, 'activation': 'linear', 'use +@ layers_2_Dense__config__activation layers_2_Dense__config__activation: 'linear' +@ layers_2_Dense__config__activity_regularizer layers_2_Dense__config__activity_regularizer: None +@ layers_2_Dense__config__bias_constraint layers_2_Dense__config__bias_constraint: None +@ layers_2_Dense__config__bias_initializer layers_2_Dense__config__bias_initializer: {'class_name': 'Zeros', 'config': {}} +* layers_2_Dense__config__bias_initializer__class_name layers_2_Dense__config__bias_initializer__class_name: 'Zeros' +@ layers_2_Dense__config__bias_initializer__config layers_2_Dense__config__bias_initializer__config: {} +@ layers_2_Dense__config__bias_regularizer layers_2_Dense__config__bias_regularizer: None +@ layers_2_Dense__config__dtype layers_2_Dense__config__dtype: 'float32' +@ layers_2_Dense__config__kernel_constraint layers_2_Dense__config__kernel_constraint: None +@ layers_2_Dense__config__kernel_initializer layers_2_Dense__config__kernel_initializer: {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'unifo +* layers_2_Dense__config__kernel_initializer__class_name layers_2_Dense__config__kernel_initializer__class_name: 'VarianceScaling' +@ layers_2_Dense__config__kernel_initializer__config layers_2_Dense__config__kernel_initializer__config: {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'uniform', 'seed': None} +@ layers_2_Dense__config__kernel_initializer__config__distribution layers_2_Dense__config__kernel_initializer__config__distribution: 'uniform' +@ layers_2_Dense__config__kernel_initializer__config__mode layers_2_Dense__config__kernel_initializer__config__mode: 'fan_avg' +@ layers_2_Dense__config__kernel_initializer__config__scale layers_2_Dense__config__kernel_initializer__config__scale: 1.0 +@ layers_2_Dense__config__kernel_initializer__config__seed layers_2_Dense__config__kernel_initializer__config__seed: None +@ layers_2_Dense__config__kernel_regularizer layers_2_Dense__config__kernel_regularizer: None +* layers_2_Dense__config__name layers_2_Dense__config__name: 'dense_2' +@ layers_2_Dense__config__trainable layers_2_Dense__config__trainable: True +@ layers_2_Dense__config__units layers_2_Dense__config__units: 10 +@ layers_2_Dense__config__use_bias layers_2_Dense__config__use_bias: True +* layers_3_Activation__class_name layers_3_Activation__class_name: 'Activation' +@ layers_3_Activation__config layers_3_Activation__config: {'name': 'activation_2', 'trainable': True, 'dtype': 'float32', 'activation': 'softmax'} +@ layers_3_Activation__config__activation layers_3_Activation__config__activation: 'softmax' +@ layers_3_Activation__config__dtype layers_3_Activation__config__dtype: 'float32' +* layers_3_Activation__config__name layers_3_Activation__config__name: 'activation_2' +@ layers_3_Activation__config__trainable layers_3_Activation__config__trainable: True + Note: @, params eligible for search in searchcv tool. diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_model01 Binary file test-data/keras_model01 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_model02 Binary file test-data/keras_model02 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_model04 Binary file test-data/keras_model04 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_params04.tabular --- a/test-data/keras_params04.tabular Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/keras_params04.tabular Mon Dec 16 05:17:00 2019 -0500 @@ -7,11 +7,10 @@ @ config config: {'name': 'sequential_1', 'layers': [{'class_name': 'Dense', 'config': {'name': 'dense_1', 'trainable @ decay decay: 0.0 @ epochs epochs: 100 -@ epsilon epsilon: None @ layers_0_Dense layers_0_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_1', 'trainable': True, 'batch_input_shape': [None, -@ layers_1_Activation layers_1_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_1', 'trainable': True, 'activation': 'li -@ layers_2_Dense layers_2_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_2', 'trainable': True, 'units': 1, 'activation': ' -@ layers_3_Activation layers_3_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_2', 'trainable': True, 'activation': 'li +@ layers_1_Activation layers_1_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_1', 'trainable': True, 'dtype': 'float32 +@ layers_2_Dense layers_2_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_2', 'trainable': True, 'dtype': 'float32', 'units' +@ layers_3_Activation layers_3_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_2', 'trainable': True, 'dtype': 'float32 @ loss loss: 'mean_squared_error' @ lr lr: 0.001 @ metrics metrics: ['mse'] @@ -51,12 +50,13 @@ @ layers_0_Dense__config__units layers_0_Dense__config__units: 32 @ layers_0_Dense__config__use_bias layers_0_Dense__config__use_bias: True * layers_1_Activation__class_name layers_1_Activation__class_name: 'Activation' -@ layers_1_Activation__config layers_1_Activation__config: {'name': 'activation_1', 'trainable': True, 'activation': 'linear'} +@ layers_1_Activation__config layers_1_Activation__config: {'name': 'activation_1', 'trainable': True, 'dtype': 'float32', 'activation': 'linear'} @ layers_1_Activation__config__activation layers_1_Activation__config__activation: 'linear' +@ layers_1_Activation__config__dtype layers_1_Activation__config__dtype: 'float32' * layers_1_Activation__config__name layers_1_Activation__config__name: 'activation_1' @ layers_1_Activation__config__trainable layers_1_Activation__config__trainable: True * layers_2_Dense__class_name layers_2_Dense__class_name: 'Dense' -@ layers_2_Dense__config layers_2_Dense__config: {'name': 'dense_2', 'trainable': True, 'units': 1, 'activation': 'linear', 'use_bias': True, 'kernel +@ layers_2_Dense__config layers_2_Dense__config: {'name': 'dense_2', 'trainable': True, 'dtype': 'float32', 'units': 1, 'activation': 'linear', 'use_ @ layers_2_Dense__config__activation layers_2_Dense__config__activation: 'linear' @ layers_2_Dense__config__activity_regularizer layers_2_Dense__config__activity_regularizer: None @ layers_2_Dense__config__bias_constraint layers_2_Dense__config__bias_constraint: None @@ -64,6 +64,7 @@ * layers_2_Dense__config__bias_initializer__class_name layers_2_Dense__config__bias_initializer__class_name: 'Zeros' @ layers_2_Dense__config__bias_initializer__config layers_2_Dense__config__bias_initializer__config: {} @ layers_2_Dense__config__bias_regularizer layers_2_Dense__config__bias_regularizer: None +@ layers_2_Dense__config__dtype layers_2_Dense__config__dtype: 'float32' @ layers_2_Dense__config__kernel_constraint layers_2_Dense__config__kernel_constraint: None @ layers_2_Dense__config__kernel_initializer layers_2_Dense__config__kernel_initializer: {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'unifo * layers_2_Dense__config__kernel_initializer__class_name layers_2_Dense__config__kernel_initializer__class_name: 'VarianceScaling' @@ -78,8 +79,9 @@ @ layers_2_Dense__config__units layers_2_Dense__config__units: 1 @ layers_2_Dense__config__use_bias layers_2_Dense__config__use_bias: True * layers_3_Activation__class_name layers_3_Activation__class_name: 'Activation' -@ layers_3_Activation__config layers_3_Activation__config: {'name': 'activation_2', 'trainable': True, 'activation': 'linear'} +@ layers_3_Activation__config layers_3_Activation__config: {'name': 'activation_2', 'trainable': True, 'dtype': 'float32', 'activation': 'linear'} @ layers_3_Activation__config__activation layers_3_Activation__config__activation: 'linear' +@ layers_3_Activation__config__dtype layers_3_Activation__config__dtype: 'float32' * layers_3_Activation__config__name layers_3_Activation__config__name: 'activation_2' @ layers_3_Activation__config__trainable layers_3_Activation__config__trainable: True Note: @, params eligible for search in searchcv tool. diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_prefitted01.zip Binary file test-data/keras_prefitted01.zip has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_save_weights01.h5 Binary file test-data/keras_save_weights01.h5 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/keras_train_eval_y_true02.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/keras_train_eval_y_true02.tabular Mon Dec 16 05:17:00 2019 -0500 @@ -0,0 +1,54 @@ +0 +54 +54 +41 +48 +46 +74 +57 +52 +54 +54 +45 +57 +54 +51 +68 +71 +68 +68 +40 +46 +79 +46 +49 +55 +68 +76 +85 +42 +79 +77 +80 +64 +59 +48 +67 +50 +77 +88 +76 +75 +66 +61 +89 +49 +59 +71 +60 +55 +77 +75 +54 +75 +60 diff -r 18b39ada6f35 -r 64b771b1471a test-data/lda_model01 Binary file test-data/lda_model01 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/lda_model02 Binary file test-data/lda_model02 has changed diff -r 18b39ada6f35 -r 64b771b1471a test-data/ml_vis01.html --- a/test-data/ml_vis01.html Thu Nov 07 05:25:28 2019 -0500 +++ b/test-data/ml_vis01.html Mon Dec 16 05:17:00 2019 -0500 @@ -1,14 +1,31 @@ - +