Mercurial > repos > bgruening > plotly_regression_performance_plots
view plot_regression_performance.py @ 1:389227fa1864 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/plotly_regression_performance_plots commit 2473a53fde6d8e646e90d2a5201999c8c6a48695
author | bgruening |
---|---|
date | Wed, 09 Jan 2019 02:55:46 -0500 |
parents | 0800a1b66bbd |
children |
line wrap: on
line source
import argparse import pandas as pd import numpy as np import plotly import plotly.graph_objs as go def main(infile_input, infile_output): """ Produce an interactive actual vs predicted curves and residual plots Args: infile_input: str, input tabular file with true values infile_output: str, input tabular file with predicted values """ df_input = pd.read_csv(infile_input, sep='\t', parse_dates=True) df_output = pd.read_csv(infile_output, sep='\t', parse_dates=True) true_values = df_input.iloc[:, -1].copy() predicted_values = df_output.iloc[:, -1].copy() axis_labels = list(range(1, len(true_values)+1)) # true vs predicted curves trace_true = go.Scatter( x=axis_labels, y=true_values, mode='lines+markers', name='True values' ) trace_predicted = go.Scatter( x=axis_labels, y=predicted_values, mode='lines+markers', name='Predicted values' ) layout_tp = go.Layout( title='True vs predicted values', xaxis=dict(title='Number of data points'), yaxis=dict(title='Values') ) data_tp = [trace_true, trace_predicted] fig_tp = go.Figure(data=data_tp, layout=layout_tp) plotly.offline.plot(fig_tp, filename="output_actual_vs_pred.html", auto_open=False) # scatter plot max_tv = int(max(true_values)) x_y_values = list(range(0, max_tv)) true_mean = np.mean(true_values) res_true_predicted = np.sum((true_values - predicted_values) ** 2) res_total = np.sum((true_values - true_mean) ** 2) r2 = 1 - (res_true_predicted / float(res_total)) rmse = np.sqrt(np.mean([(x - y) ** 2 for x, y in zip(true_values, predicted_values)])) trace_x_eq_y = go.Scatter( x=x_y_values, y=x_y_values, mode='lines', name='X = Y curve' ) trace_true_pred = go.Scatter( x=true_values, y=predicted_values, mode='markers', name='True and predicted values' ) layout_true_pred = go.Layout( title='True vs predicted values (RMSE: %s, R2: %s)' % (str(np.round(rmse, 2)), str(np.round(r2, 2))), xaxis=dict(title='True values'), yaxis=dict(title='Predicted values') ) data_true_pred = [trace_true_pred, trace_x_eq_y] fig_true_pred = go.Figure(data=data_true_pred, layout=layout_true_pred) plotly.offline.plot(fig_true_pred, filename="output_scatter_plot.html", auto_open=False) # residual plot residual = predicted_values - true_values trace_residual = go.Scatter( x=predicted_values, y=residual, mode='markers' ) layout_residual = go.Layout( title='Residual vs predicted values', xaxis=dict(title='Predicted values'), yaxis=dict(title='Residual (Predicted - True)') ) data_residual = [trace_residual] fig_residual = go.Figure(data=data_residual, layout=layout_residual) plotly.offline.plot(fig_residual, filename="output_residual_plot.html", auto_open=False) if __name__ == "__main__": aparser = argparse.ArgumentParser() aparser.add_argument("-i", "--input", dest="infile_input", required=True) aparser.add_argument("-j", "--output", dest="infile_output", required=True) args = aparser.parse_args() main(args.infile_input, args.infile_output)