Mercurial > repos > bgruening > sklearn_clf_metrics
view clf_metrics.xml @ 40:0ee984c1cbd6 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f031d8ddfb73cec24572648666ac44ee47f08aad
author | bgruening |
---|---|
date | Thu, 11 Aug 2022 09:27:14 +0000 |
parents | a7067bbf1e58 |
children | e3c787437a6d |
line wrap: on
line source
<tool id="sklearn_clf_metrics" name="Calculate metrics" version="@VERSION@" profile="20.05"> <description>for classification performance</description> <macros> <import>main_macros.xml</import> </macros> <expand macro="python_requirements" /> <expand macro="macro_stdio" /> <version_command>echo "@VERSION@"</version_command> <command> <![CDATA[ python "$clf_metrics_script" '$inputs' ]]> </command> <configfiles> <inputs name="inputs" /> <configfile name="clf_metrics_script"> <![CDATA[ import sys import json import pandas import numpy as np import sys from sklearn import metrics from galaxy_ml.utils import read_columns input_json_path = sys.argv[1] with open(input_json_path, "r") as param_handler: params = json.load(param_handler) header='infer' if params["clf_metrics"].get("header1", None) else None column_option = params["clf_metrics"]["column_selector_options_1"]["selected_column_selector_option"] if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: c = params["clf_metrics"]["column_selector_options_1"]["col1"] else: c = None y_t = read_columns( "$clf_metrics.infile1", c = c, c_option = column_option, sep='\t', header=header, parse_dates=True) header='infer' if params["clf_metrics"].get("header2", None) else None column_option = params["clf_metrics"]["column_selector_options_2"]["selected_column_selector_option2"] if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: c = params["clf_metrics"]["column_selector_options_2"]["col2"] else: c = None y_p = read_columns( "$clf_metrics.infile2", c = c, c_option = column_option, sep='\t', header=header, parse_dates=True) options = params["clf_metrics"].get("options", {}) print(options) if options and options.get('average', '') == 'None': options['average'] = None metric = params["clf_metrics"]["selected_metric"] metric_function = getattr(metrics, metric) res = metric_function(y_t,y_p,**options) with open("$outfile", 'w+') as out_file: out_file.write( metric + ' : ' + '\n' + str(res) + '\n' ) ]]> </configfile> </configfiles> <inputs> <conditional name="clf_metrics"> <param name="selected_metric" type="select" label="Metrics"> <option value="accuracy_score" selected="true">Accuracy classification score</option> <option value="classification_report">Text report of the main classification metrics</option> <option value="f1_score">F1 score (aka balanced F-score or F-measure)</option> <option value="fbeta_score">F-beta score</option> <option value="hamming_loss">Average Hamming loss</option> <option value="jaccard_similarity_score">Jaccard similarity coefficient score</option> <option value="precision_recall_fscore_support">Compute precision, recall, F-measure and support for each class</option> <option value="precision_score">Precision</option> <option value="recall_score">Recall</option> <option value="zero_one_loss">Zero-one classification loss</option> <option value="auc">Area under the curve (AUC)</option> <option value="brier_score_loss">Brier score</option> <option value="matthews_corrcoef">Matthews correlation coefficient (MCC) for binary classes</option> <option value="confusion_matrix">Confusion matrix (evaluate the accuracy of a classification)</option> <option value="precision_recall_curve">Precision-recall pairs for different probability thresholds</option> <option value="roc_curve">Receiver operating characteristic (ROC)</option> <option value="hinge_loss">Average hinge loss (non-regularized)</option> <option value="log_loss">Log loss (aka logistic loss or cross-entropy loss)</option> <option value="average_precision_score">Average precision (AP) from prediction scores</option> <option value="roc_auc_score">Area Under the Curve (AUC) from prediction scores</option> </param> <when value="accuracy_score"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <!--sample_weight--> <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Normalize" help="If false, returns the number of correctly classified samples. Otherwise, returns the fraction of correctly classified samples. " /> </section> </when> <when value="classification_report"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <!--labels--> <!--target_names--> <!--sample_weight--> <param argument="digits" type="integer" value="2" label="Number of digits for formatting output floating point values" help="" /> </section> </when> <when value="f1_score"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <!--labels--> <!--sample_weight--> <expand macro="pos_label" default_value="1" /> <expand macro="average"> <option value="binary" selected="true">Only report results for the class specified by pos_label. This is applicable only if targets (y_{true,pred}) are binary. (binary)</option> </expand> </section> </when> <when value="fbeta_score"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <expand macro="beta" /> <!--labels--> <!--sample_weight--> <expand macro="pos_label" default_value="1" /> <expand macro="average"> <option value="binary" selected="true">Only report results for the class specified by pos_label. This is applicable only if targets (y_{true,pred}) are binary. (binary)</option> </expand> </section> </when> <when value="hamming_loss"> <expand macro="clf_inputs" /> <!--section name="options" title="Advanced Options" expanded="False"> <!- -classes- -> </section--> </when> <when value="jaccard_similarity_score"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Normalize" help="If false, returns the sum of the Jaccard similarity coefficient over the sample set. Otherwise, returns the average of Jaccard similarity coefficient. " /> </section> </when> <when value="precision_recall_fscore_support"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <expand macro="beta" /> <!--labels--> <!--sample_weight--> <!--warn_for--> <expand macro="pos_label" default_value="1" /> <expand macro="average"> <option value="binary" selected="true">Only report results for the class specified by pos_label. Applicable only on binary classification. (binary)</option> </expand> </section> </when> <when value="precision_score"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <!--labels--> <!--sample_weight--> <expand macro="pos_label" default_value="1" /> <expand macro="average"> <option value="binary" selected="true">Only report results for the class specified by pos_label. This is applicable only if targets (y_{true,pred}) are binary. (binary)</option> </expand> </section> </when> <when value="recall_score"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <!--labels--> <!--sample_weight--> <expand macro="pos_label" default_value="1" /> <expand macro="average"> <option value="binary" selected="true">Only report results for the class specified by pos_label. This is applicable only if targets (y_{true,pred}) are binary. (binary)</option> </expand> </section> </when> <when value="zero_one_loss"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <!--sample_weight--> <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Normalize" help="If false, returns the number of misclassifications. Otherwise, returns the fraction of misclassifications. " /> </section> </when> <when value="auc"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <param argument="reorder" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Assume an ascending curve in the case of ties" help="If the curve is non-ascending, the result will be wrong. " /> </section> </when> <when value="brier_score_loss"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <!--weights--> <expand macro="pos_label" /> </section> </when> <when value="matthews_corrcoef"> <expand macro="clf_inputs" /> <!--section name="options" title="Advanced Options" expanded="False"> </section--> </when> <when value="confusion_matrix"> <expand macro="clf_inputs" /> <!--section name="options" title="Advanced Options" expanded="False"> <!- -labels- -> </section--> </when> <when value="precision_recall_curve"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <expand macro="pos_label" /> <!--sample_weight--> </section> </when> <when value="roc_curve"> <expand macro="clf_inputs" /> <section name="options" title="Advanced Options" expanded="False"> <!--sample_weight--> <expand macro="pos_label" /> <param argument="drop_intermediate" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label=" Drop suboptimal thresholds" help="This is useful in order to create lighter ROC curves. " /> </section> </when> <when value="hinge_loss"> <expand macro="clf_inputs" multiple="true" /> <!--section name="options" title="Advanced Options" expanded="False"> <!- -labels- -> <!- -sample_weight- -> </section--> </when> <when value="log_loss"> <expand macro="clf_inputs" multiple="true" /> <section name="options" title="Advanced Options" expanded="False"> <!--sample_weight--> <param argument="eps" type="float" value="1e-15" label="Clipping factor" help="Log loss is undefined for p=0 or p=1, so probabilities are clipped to max(eps, min(1 - eps, p)). " /> <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Normalize" help="If true, returns the mean loss per sample. Otherwise, returns the sum of the per-sample losses. " /> </section> </when> <when value="average_precision_score"> <expand macro="clf_inputs" multiple1="true" multiple="true" /> <!-- section name="options" title="Advanced Options" expanded="False"> <!- -average='macro', sample_weight=None- -> </section--> </when> <when value="roc_auc_score"> <expand macro="clf_inputs" multiple1="true" multiple="true" /> <section name="options" title="Advanced Options" expanded="False"> <expand macro="average"> </expand> </section> </when> </conditional> </inputs> <outputs> <data format="txt" name="outfile" /> </outputs> <tests> <test> <param name="selected_metric" value="accuracy_score" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <output name="outfile" file="accuracy_score.txt" /> </test> <test> <param name="selected_metric" value="classification_report" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <output name="outfile" file="classification_report.txt" /> </test> <test> <param name="selected_metric" value="f1_score" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <param name="average" value="micro" /> <output name="outfile" file="f1_score.txt" /> </test> <test> <param name="selected_metric" value="fbeta_score" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <param name="average" value="micro" /> <output name="outfile" file="fbeta_score.txt" /> </test> <test> <param name="selected_metric" value="hamming_loss" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <output name="outfile" file="hamming_loss.txt" /> </test> <test> <param name="selected_metric" value="jaccard_similarity_score" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <output name="outfile" file="jaccard_similarity_score.txt" /> </test> <test> <param name="selected_metric" value="precision_recall_fscore_support" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <param name="average" value="micro" /> <output name="outfile" file="precision_recall_fscore_support.txt" /> </test> <test> <param name="selected_metric" value="precision_score" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <param name="average" value="micro" /> <output name="outfile" file="precision_score.txt" /> </test> <test> <param name="selected_metric" value="recall_score" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <param name="average" value="micro" /> <output name="outfile" file="recall_score.txt" /> </test> <test> <param name="selected_metric" value="zero_one_loss" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <output name="outfile" file="zero_one_loss.txt" /> </test> <test> <param name="selected_metric" value="auc" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <param name="reorder" value="true" /> <output name="outfile" file="auc.txt" /> </test> <test> <param name="selected_metric" value="brier_score_loss" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="6" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="4" /> <output name="outfile" file="brier_score_loss.txt" /> </test> <test> <param name="selected_metric" value="matthews_corrcoef" /> <param name="infile1" value="y.tabular" /> <param name="col1" value="6" /> <param name="infile2" value="y.tabular" /> <param name="col2" value="7" /> <output name="outfile" file="matthews_corrcoef.txt" /> </test> <test> <param name="selected_metric" value="confusion_matrix" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="2" /> <output name="outfile" file="confusion_matrix.txt" /> </test> <test> <param name="selected_metric" value="precision_recall_curve" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="6" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="7" /> <output name="outfile" file="precision_recall_curve.txt" /> </test> <test> <param name="selected_metric" value="roc_curve" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="6" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="7" /> <output name="outfile" file="roc_curve.txt" /> </test> <test> <param name="selected_metric" value="hinge_loss" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="1" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="8,9,10" /> <output name="outfile" file="hinge_loss.txt" /> </test> <test> <param name="selected_metric" value="log_loss" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="2" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="3,4,5" /> <output name="outfile" file="log_loss.txt" /> </test> <test> <param name="selected_metric" value="average_precision_score" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="6" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="7" /> <output name="outfile" file="average_precision_score.txt" /> </test> <test> <param name="selected_metric" value="roc_auc_score" /> <param name="infile1" value="y.tabular" ftype="tabular" /> <param name="col1" value="6" /> <param name="infile2" value="y.tabular" ftype="tabular" /> <param name="col2" value="7" /> <output name="outfile" file="roc_auc_score.txt" /> </test> </tests> <help> <![CDATA[ **What it does** This tool provides several loss, score, and utility functions to measure classification performance. Some metrics might require probability estimates of the positive class, confidence values, or binary decisions values. This tool is based on sklearn.metrics package. For information about classification metric functions and their parameter settings please refer to `Scikit-learn classification metrics`_. .. _`Scikit-learn classification metrics`: http://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics ]]> </help> <expand macro="sklearn_citation" /> </tool>