diff estimator_attributes.xml @ 0:2ad4c2798be7 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author bgruening
date Tue, 14 May 2019 18:12:53 -0400
parents
children c411ff569a26
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/estimator_attributes.xml	Tue May 14 18:12:53 2019 -0400
@@ -0,0 +1,179 @@
+<tool id="sklearn_estimator_attributes" name="Estimator attributes" version="@VERSION@">
+    <description>get important attributes from an estimator or scikit object</description>
+    <macros>
+        <import>main_macros.xml</import>
+    </macros>
+    <expand macro="python_requirements"/>
+    <expand macro="macro_stdio"/>
+    <version_command>echo "@VERSION@"</version_command>
+    <command>
+        <![CDATA[
+        python '$main_script'
+        ]]>
+    </command>
+    <configfiles>
+        <configfile name="main_script">
+            <![CDATA[
+import json
+import pandas
+import pickle
+import skrebate
+import sys
+import warnings
+import xgboost
+from mlxtend import regressor, classifier
+from sklearn import (
+    cluster, compose, decomposition, ensemble, feature_extraction,
+    feature_selection, gaussian_process, kernel_approximation, metrics,
+    model_selection, naive_bayes, neighbors, pipeline, preprocessing,
+    svm, linear_model, tree, discriminant_analysis)
+from imblearn.pipeline import Pipeline as imbPipeline
+from sklearn.pipeline import Pipeline
+
+sys.path.insert(0, '$__tool_directory__')
+from utils import load_model, get_search_params
+
+warnings.simplefilter('ignore')
+
+infile_object = '$infile_object'
+attribute = '$attribute_type'
+
+with open(infile_object, 'rb') as f:
+    est_obj = load_model(f)
+
+if attribute == 'get_params':
+    ## get_params()
+    results = get_search_params(est_obj)
+    df = pandas.DataFrame(results, columns=['', 'Parameter', 'Value'])
+    df.to_csv('$outfile', sep='\t', index=False)
+elif attribute == 'final_estimator':
+    res = est_obj.steps[-1][-1]
+    print(repr(res))
+    with open('$outfile', 'wb') as f:
+        pickle.dump(res, f, pickle.HIGHEST_PROTOCOL)
+elif attribute in ['best_estimator_', 'init_']:
+    res = getattr(est_obj, attribute)
+    print(repr(res))
+    with open('$outfile', 'wb') as f:
+        pickle.dump(res, f, pickle.HIGHEST_PROTOCOL)
+elif attribute in ['oob_score_', 'best_score_', 'n_features_']:
+    res = getattr(est_obj, attribute)
+    res = pandas.DataFrame([res], columns=[attribute])
+    res.to_csv('$outfile', sep='\t', index=False)
+elif attribute in ['best_params_', 'named_steps']:
+    res = getattr(est_obj, attribute)
+    with open('$outfile', 'w') as f:
+        f.write(repr(res))
+elif attribute == 'cv_results_':
+    res = pandas.DataFrame(est_obj.cv_results_)
+    res = res[sorted(res.columns)]
+    res.to_csv('$outfile', sep='\t', index=False)
+else:
+    res = getattr(est_obj, attribute)
+    columns = []
+    if res.ndim == 1 or res.shape[-1] == 1:
+        columns = [attribute]
+    else:
+        for i in range(res.shape[-1]):
+            columns.append(attribute + '_' + str(i))
+    res = pandas.DataFrame(res, columns=columns)
+    res.to_csv('$outfile', sep='\t', index=False)
+
+            ]]>
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="infile_object" type="data" format="zip" label="Choose the dataset containing estimator/pipeline object"/>
+        <param name="attribute_type" type="select" label="Select an attribute retrival type">
+            <option value="get_params" selected="true">Estimator - get_params()</option>
+            <option value="feature_importances_" >Fitted estimator - feature_importances_ </option>
+            <option value="coef_">Fitted estimator - coef_ </option>
+            <option value="train_score_">Fitted estimator - train_score_ </option>
+            <option value="oob_score_">Fitted estimator - oob_score_ </option>
+            <option value="init_">Fitted estimator - init_ </option>
+            <option value="named_steps">Pipeline - named_steps </option>
+            <option value="final_estimator">Pipeline - final_estimator </option>
+            <option value="cv_results_">SearchCV - cv_results_ </option>
+            <option value="best_estimator_">SearchCV - best_estimator_ </option>
+            <option value="best_score_">SearchCV - best_score_ </option>
+            <option value="best_params_">SearchCV - best_params_ </option>
+            <option value="scores_">Feature_selection - scores_ </option>
+            <option value="pvalues_">Feature_selection - pvalues_ </option>
+            <option value="ranking_">Feature_selection - ranking_ </option>
+            <option value="n_features_">Feature_selection - n_features_ </option>
+            <option value="grid_scores_">Feature_selection - grid_scores_ </option>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="outfile" label="${attribute_type} from ${on_string}">
+            <change_format>
+                <when input="attribute_type" value="named_steps" format="txt" />
+                <when input="attribute_type" value="best_params_" format="txt" />
+                <when input="attribute_type" value="final_estimator" format="zip" />
+                <when input="attribute_type" value="best_estimator_" format="zip" />
+                <when input="attribute_type" value="init_" format="zip" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/>
+            <param name="attribute_type" value="best_score_"/>
+            <output name="outfile" file="best_score_.tabular"/>
+        </test>
+        <test>
+            <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/>
+            <param name="attribute_type" value="best_params_"/>
+            <output name="outfile" file="best_params_.txt"/>
+        </test>
+        <test>
+            <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/>
+            <param name="attribute_type" value="best_estimator_"/>
+            <output name="outfile" file="best_estimator_.zip" compare="sim_size" delta="10"/>
+        </test>
+         <test>
+            <param name="infile_object" value="best_estimator_.zip" ftype="zip"/>
+            <param name="attribute_type" value="final_estimator"/>
+            <output name="outfile" file="final_estimator.zip" compare="sim_size" delta="10"/>
+        </test>
+        <test>
+            <param name="infile_object" value="best_estimator_.zip" ftype="zip"/>
+            <param name="attribute_type" value="named_steps"/>
+            <output name="outfile" file="named_steps.txt" compare="sim_size" delta="5"/>
+        </test>
+        <test>
+            <param name="infile_object" value="final_estimator.zip" ftype="zip"/>
+            <param name="attribute_type" value="feature_importances_"/>
+            <output name="outfile" file="feature_importances_.tabular"/>
+        </test>
+        <test>
+            <param name="infile_object" value="RFE.zip" ftype="zip"/>
+            <param name="attribute_type" value="ranking_"/>
+            <output name="outfile" file="ranking_.tabular"/>
+        </test>
+        <test>
+            <param name="infile_object" value="LinearRegression02.zip" ftype="zip"/>
+            <param name="attribute_type" value="get_params"/>
+            <output name="outfile" value="get_params.tabular"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+**What it does**
+Output attribute from an estimator or any scikit object.
+
+Common attributes are :
+
+  - ``estimator.`` *feature_importances_*
+  - ``RFE``. *ranking_*
+  - ``RFECV``. *grid_scores_*
+  - ``GridSearchCV``. *best_estimator_*
+
+        ]]>
+    </help>
+    <expand macro="sklearn_citation">
+        <expand macro="skrebate_citation"/>
+        <expand macro="xgboost_citation"/>
+        <expand macro="imblearn_citation"/>
+    </expand>
+</tool>