view regression_metrics.xml @ 32:7acb9b46c066 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8850f42c2c3763e614f7454c9c006f3d2ff572c0
author bgruening
date Fri, 27 May 2022 11:59:03 +0000
parents 0cc5098a9bff
children 642452aeed0d
line wrap: on
line source

<tool id="sklearn_regression_metrics" name="Calculate metrics" version="@VERSION@" profile="20.05">
    <description>for regression performance</description>
    <macros>
        <import>main_macros.xml</import>
    </macros>
    <expand macro="python_requirements" />
    <expand macro="macro_stdio" />
    <version_command>echo "@VERSION@"</version_command>
    <command>
        <![CDATA[
        python "$regression_metrics_script" '$inputs'
        ]]>
    </command>
    <configfiles>
        <inputs name="inputs" />
        <configfile name="regression_metrics_script">
            <![CDATA[
import sys
import json
import pandas
import numpy as np

from sklearn import metrics
from galaxy_ml.utils import read_columns


input_json_path = sys.argv[1]
with open(input_json_path, "r") as param_handler:
    params = json.load(param_handler)

header='infer' if params["regression_metrics"]["header1"] else None
column_option = params["regression_metrics"]["column_selector_options_1"]["selected_column_selector_option"]
if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
    c = params["regression_metrics"]["column_selector_options_1"]["col1"]
else:
    c = None
y_t = read_columns(
        "$regression_metrics.infile1",
        c = c,
        c_option = column_option,
        sep='\t',
        header=header,
        parse_dates=True)

header='infer' if params["regression_metrics"]["header2"] else None
column_option = params["regression_metrics"]["column_selector_options_2"]["selected_column_selector_option2"]
if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
    c = params["regression_metrics"]["column_selector_options_2"]["col2"]
else:
    c = None
y_p = read_columns(
        "$regression_metrics.infile2",
        c = c,
        c_option = column_option,
        sep='\t',
        header=header,
        parse_dates=True)

options = params["regression_metrics"].get("options", {})
if options and options.get('average', '') == 'None':
    options['average'] = None
metric = params["regression_metrics"]["selected_metric"]
metric_function = getattr(metrics, metric)
res = metric_function(y_t,y_p,**options)
res= format(res, '.4f')
with open("$outfile", 'w+') as out_file:
    out_file.write( metric + ' : ' + '\n' + str(res) + '\n' )

            ]]>
        </configfile>
    </configfiles>
    <inputs>
        <conditional name="regression_metrics">
            <param name="selected_metric" type="select" label="Metrics">
                <option value="explained_variance_score" selected="true">explained_variance_score - Explained variance regression score function</option>
                <option value="mean_absolute_error">mean_absolute_error - Mean absolute error regression loss</option>
                <option value="mean_squared_error">mean_squared_error - Mean squared error regression loss</option>
                <option value="mean_squared_log_error">mean_squared_log_error - Mean squared logarithmic error regression loss</option>
                <option value="median_absolute_error">median_absolute_error - Median absolute error regression loss</option>
                <option value="r2_score">r2_score - R^2 (coefficient of determination) regression score function</option>
            </param>
            <when value="explained_variance_score">
                <expand macro="clf_inputs" />
                <!--section name="options" title="Advanced Options" expanded="False">
                    <!- -sample_weight- ->
                    <!- -multioutput- ->
                </section-->
            </when>
            <when value="mean_absolute_error">
                <expand macro="clf_inputs" />
                <!--section name="options" title="Advanced Options" expanded="False">
                    <!- -sample_weight- ->
                    <!- -multioutput- ->
                </section-->
            </when>
            <when value="mean_squared_error">
                <expand macro="clf_inputs" />
                <!--section name="options" title="Advanced Options" expanded="False">
                    <!- -sample_weight- ->
                    <!- -multioutput- ->
                </section-->
            </when>
            <when value="mean_squared_log_error">
                <expand macro="clf_inputs" />
                <!--section name="options" title="Advanced Options" expanded="False">
                    <!- -sample_weight- ->
                    <!- -multioutput- ->
                </section-->
            </when>
            <when value="median_absolute_error">
                <expand macro="clf_inputs" />
            </when>
            <when value="r2_score">
                <expand macro="clf_inputs" />
                <!--section name="options" title="Advanced Options" expanded="False">
                    <!- -sample_weight- ->
                    <!- -multioutput- ->
                </section-->
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="txt" name="outfile" />
    </outputs>
    <tests>
        <test>
            <param name="selected_metric" value="explained_variance_score" />
            <param name="infile1" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header1" value="True" />
            <param name="col1" value="1" />
            <param name="infile2" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header2" value="True" />
            <param name="col2" value="2" />
            <output name="outfile" file="regression_metrics_result01" />
        </test>
        <test>
            <param name="selected_metric" value="mean_absolute_error" />
            <param name="infile1" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header1" value="True" />
            <param name="col1" value="1" />
            <param name="infile2" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header2" value="True" />
            <param name="col2" value="2" />
            <output name="outfile" file="regression_metrics_result02" />
        </test>
        <test>
            <param name="selected_metric" value="mean_squared_error" />
            <param name="infile1" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header1" value="True" />
            <param name="col1" value="1" />
            <param name="infile2" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header2" value="True" />
            <param name="col2" value="2" />
            <output name="outfile" file="regression_metrics_result03" />
        </test>
        <test>
            <param name="selected_metric" value="mean_squared_log_error" />
            <param name="infile1" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header1" value="True" />
            <param name="col1" value="1" />
            <param name="infile2" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header2" value="True" />
            <param name="col2" value="2" />
            <output name="outfile" file="regression_metrics_result04" />
        </test>
        <test>
            <param name="selected_metric" value="median_absolute_error" />
            <param name="infile1" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header1" value="True" />
            <param name="selected_column_selector_option" value="all_but_by_index_number" />
            <param name="col1" value="2" />
            <param name="infile2" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header2" value="True" />
            <param name="col2" value="2" />
            <output name="outfile" file="regression_metrics_result05" />
        </test>
        <test>
            <param name="selected_metric" value="r2_score" />
            <param name="infile1" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header1" value="True" />
            <param name="col1" value="1" />
            <param name="infile2" value="regression_test_y.tabular" ftype="tabular" />
            <param name="header2" value="True" />
            <param name="selected_column_selector_option2" value="all_but_by_index_number" />
            <param name="col2" value="1" />
            <output name="outfile" file="regression_metrics_result06" />
        </test>
    </tests>
    <help>
        <![CDATA[
**What it does**
This tool provides several loss, score, and utility functions to measure classification performance. Some metrics might require probability estimates of the positive class, confidence values, or binary decisions values. This tool is based on
sklearn.metrics package.
For information about classification metric functions and their parameter settings please refer to `Scikit-learn classification metrics`_.

.. _`Scikit-learn classification metrics`: http://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics
        ]]>
    </help>
    <expand macro="sklearn_citation" />
</tool>