Mercurial > repos > bgruening > sklearn_estimator_attributes
diff estimator_attributes.xml @ 0:2ad4c2798be7 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author | bgruening |
---|---|
date | Tue, 14 May 2019 18:12:53 -0400 |
parents | |
children | c411ff569a26 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/estimator_attributes.xml Tue May 14 18:12:53 2019 -0400 @@ -0,0 +1,179 @@ +<tool id="sklearn_estimator_attributes" name="Estimator attributes" version="@VERSION@"> + <description>get important attributes from an estimator or scikit object</description> + <macros> + <import>main_macros.xml</import> + </macros> + <expand macro="python_requirements"/> + <expand macro="macro_stdio"/> + <version_command>echo "@VERSION@"</version_command> + <command> + <![CDATA[ + python '$main_script' + ]]> + </command> + <configfiles> + <configfile name="main_script"> + <![CDATA[ +import json +import pandas +import pickle +import skrebate +import sys +import warnings +import xgboost +from mlxtend import regressor, classifier +from sklearn import ( + cluster, compose, decomposition, ensemble, feature_extraction, + feature_selection, gaussian_process, kernel_approximation, metrics, + model_selection, naive_bayes, neighbors, pipeline, preprocessing, + svm, linear_model, tree, discriminant_analysis) +from imblearn.pipeline import Pipeline as imbPipeline +from sklearn.pipeline import Pipeline + +sys.path.insert(0, '$__tool_directory__') +from utils import load_model, get_search_params + +warnings.simplefilter('ignore') + +infile_object = '$infile_object' +attribute = '$attribute_type' + +with open(infile_object, 'rb') as f: + est_obj = load_model(f) + +if attribute == 'get_params': + ## get_params() + results = get_search_params(est_obj) + df = pandas.DataFrame(results, columns=['', 'Parameter', 'Value']) + df.to_csv('$outfile', sep='\t', index=False) +elif attribute == 'final_estimator': + res = est_obj.steps[-1][-1] + print(repr(res)) + with open('$outfile', 'wb') as f: + pickle.dump(res, f, pickle.HIGHEST_PROTOCOL) +elif attribute in ['best_estimator_', 'init_']: + res = getattr(est_obj, attribute) + print(repr(res)) + with open('$outfile', 'wb') as f: + pickle.dump(res, f, pickle.HIGHEST_PROTOCOL) +elif attribute in ['oob_score_', 'best_score_', 'n_features_']: + res = getattr(est_obj, attribute) + res = pandas.DataFrame([res], columns=[attribute]) + res.to_csv('$outfile', sep='\t', index=False) +elif attribute in ['best_params_', 'named_steps']: + res = getattr(est_obj, attribute) + with open('$outfile', 'w') as f: + f.write(repr(res)) +elif attribute == 'cv_results_': + res = pandas.DataFrame(est_obj.cv_results_) + res = res[sorted(res.columns)] + res.to_csv('$outfile', sep='\t', index=False) +else: + res = getattr(est_obj, attribute) + columns = [] + if res.ndim == 1 or res.shape[-1] == 1: + columns = [attribute] + else: + for i in range(res.shape[-1]): + columns.append(attribute + '_' + str(i)) + res = pandas.DataFrame(res, columns=columns) + res.to_csv('$outfile', sep='\t', index=False) + + ]]> + </configfile> + </configfiles> + <inputs> + <param name="infile_object" type="data" format="zip" label="Choose the dataset containing estimator/pipeline object"/> + <param name="attribute_type" type="select" label="Select an attribute retrival type"> + <option value="get_params" selected="true">Estimator - get_params()</option> + <option value="feature_importances_" >Fitted estimator - feature_importances_ </option> + <option value="coef_">Fitted estimator - coef_ </option> + <option value="train_score_">Fitted estimator - train_score_ </option> + <option value="oob_score_">Fitted estimator - oob_score_ </option> + <option value="init_">Fitted estimator - init_ </option> + <option value="named_steps">Pipeline - named_steps </option> + <option value="final_estimator">Pipeline - final_estimator </option> + <option value="cv_results_">SearchCV - cv_results_ </option> + <option value="best_estimator_">SearchCV - best_estimator_ </option> + <option value="best_score_">SearchCV - best_score_ </option> + <option value="best_params_">SearchCV - best_params_ </option> + <option value="scores_">Feature_selection - scores_ </option> + <option value="pvalues_">Feature_selection - pvalues_ </option> + <option value="ranking_">Feature_selection - ranking_ </option> + <option value="n_features_">Feature_selection - n_features_ </option> + <option value="grid_scores_">Feature_selection - grid_scores_ </option> + </param> + </inputs> + <outputs> + <data format="tabular" name="outfile" label="${attribute_type} from ${on_string}"> + <change_format> + <when input="attribute_type" value="named_steps" format="txt" /> + <when input="attribute_type" value="best_params_" format="txt" /> + <when input="attribute_type" value="final_estimator" format="zip" /> + <when input="attribute_type" value="best_estimator_" format="zip" /> + <when input="attribute_type" value="init_" format="zip" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/> + <param name="attribute_type" value="best_score_"/> + <output name="outfile" file="best_score_.tabular"/> + </test> + <test> + <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/> + <param name="attribute_type" value="best_params_"/> + <output name="outfile" file="best_params_.txt"/> + </test> + <test> + <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/> + <param name="attribute_type" value="best_estimator_"/> + <output name="outfile" file="best_estimator_.zip" compare="sim_size" delta="10"/> + </test> + <test> + <param name="infile_object" value="best_estimator_.zip" ftype="zip"/> + <param name="attribute_type" value="final_estimator"/> + <output name="outfile" file="final_estimator.zip" compare="sim_size" delta="10"/> + </test> + <test> + <param name="infile_object" value="best_estimator_.zip" ftype="zip"/> + <param name="attribute_type" value="named_steps"/> + <output name="outfile" file="named_steps.txt" compare="sim_size" delta="5"/> + </test> + <test> + <param name="infile_object" value="final_estimator.zip" ftype="zip"/> + <param name="attribute_type" value="feature_importances_"/> + <output name="outfile" file="feature_importances_.tabular"/> + </test> + <test> + <param name="infile_object" value="RFE.zip" ftype="zip"/> + <param name="attribute_type" value="ranking_"/> + <output name="outfile" file="ranking_.tabular"/> + </test> + <test> + <param name="infile_object" value="LinearRegression02.zip" ftype="zip"/> + <param name="attribute_type" value="get_params"/> + <output name="outfile" value="get_params.tabular"/> + </test> + </tests> + <help> + <![CDATA[ +**What it does** +Output attribute from an estimator or any scikit object. + +Common attributes are : + + - ``estimator.`` *feature_importances_* + - ``RFE``. *ranking_* + - ``RFECV``. *grid_scores_* + - ``GridSearchCV``. *best_estimator_* + + ]]> + </help> + <expand macro="sklearn_citation"> + <expand macro="skrebate_citation"/> + <expand macro="xgboost_citation"/> + <expand macro="imblearn_citation"/> + </expand> +</tool>