Mercurial > repos > bgruening > sklearn_estimator_attributes
comparison estimator_attributes.xml @ 0:2ad4c2798be7 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author | bgruening |
---|---|
date | Tue, 14 May 2019 18:12:53 -0400 |
parents | |
children | c411ff569a26 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2ad4c2798be7 |
---|---|
1 <tool id="sklearn_estimator_attributes" name="Estimator attributes" version="@VERSION@"> | |
2 <description>get important attributes from an estimator or scikit object</description> | |
3 <macros> | |
4 <import>main_macros.xml</import> | |
5 </macros> | |
6 <expand macro="python_requirements"/> | |
7 <expand macro="macro_stdio"/> | |
8 <version_command>echo "@VERSION@"</version_command> | |
9 <command> | |
10 <![CDATA[ | |
11 python '$main_script' | |
12 ]]> | |
13 </command> | |
14 <configfiles> | |
15 <configfile name="main_script"> | |
16 <![CDATA[ | |
17 import json | |
18 import pandas | |
19 import pickle | |
20 import skrebate | |
21 import sys | |
22 import warnings | |
23 import xgboost | |
24 from mlxtend import regressor, classifier | |
25 from sklearn import ( | |
26 cluster, compose, decomposition, ensemble, feature_extraction, | |
27 feature_selection, gaussian_process, kernel_approximation, metrics, | |
28 model_selection, naive_bayes, neighbors, pipeline, preprocessing, | |
29 svm, linear_model, tree, discriminant_analysis) | |
30 from imblearn.pipeline import Pipeline as imbPipeline | |
31 from sklearn.pipeline import Pipeline | |
32 | |
33 sys.path.insert(0, '$__tool_directory__') | |
34 from utils import load_model, get_search_params | |
35 | |
36 warnings.simplefilter('ignore') | |
37 | |
38 infile_object = '$infile_object' | |
39 attribute = '$attribute_type' | |
40 | |
41 with open(infile_object, 'rb') as f: | |
42 est_obj = load_model(f) | |
43 | |
44 if attribute == 'get_params': | |
45 ## get_params() | |
46 results = get_search_params(est_obj) | |
47 df = pandas.DataFrame(results, columns=['', 'Parameter', 'Value']) | |
48 df.to_csv('$outfile', sep='\t', index=False) | |
49 elif attribute == 'final_estimator': | |
50 res = est_obj.steps[-1][-1] | |
51 print(repr(res)) | |
52 with open('$outfile', 'wb') as f: | |
53 pickle.dump(res, f, pickle.HIGHEST_PROTOCOL) | |
54 elif attribute in ['best_estimator_', 'init_']: | |
55 res = getattr(est_obj, attribute) | |
56 print(repr(res)) | |
57 with open('$outfile', 'wb') as f: | |
58 pickle.dump(res, f, pickle.HIGHEST_PROTOCOL) | |
59 elif attribute in ['oob_score_', 'best_score_', 'n_features_']: | |
60 res = getattr(est_obj, attribute) | |
61 res = pandas.DataFrame([res], columns=[attribute]) | |
62 res.to_csv('$outfile', sep='\t', index=False) | |
63 elif attribute in ['best_params_', 'named_steps']: | |
64 res = getattr(est_obj, attribute) | |
65 with open('$outfile', 'w') as f: | |
66 f.write(repr(res)) | |
67 elif attribute == 'cv_results_': | |
68 res = pandas.DataFrame(est_obj.cv_results_) | |
69 res = res[sorted(res.columns)] | |
70 res.to_csv('$outfile', sep='\t', index=False) | |
71 else: | |
72 res = getattr(est_obj, attribute) | |
73 columns = [] | |
74 if res.ndim == 1 or res.shape[-1] == 1: | |
75 columns = [attribute] | |
76 else: | |
77 for i in range(res.shape[-1]): | |
78 columns.append(attribute + '_' + str(i)) | |
79 res = pandas.DataFrame(res, columns=columns) | |
80 res.to_csv('$outfile', sep='\t', index=False) | |
81 | |
82 ]]> | |
83 </configfile> | |
84 </configfiles> | |
85 <inputs> | |
86 <param name="infile_object" type="data" format="zip" label="Choose the dataset containing estimator/pipeline object"/> | |
87 <param name="attribute_type" type="select" label="Select an attribute retrival type"> | |
88 <option value="get_params" selected="true">Estimator - get_params()</option> | |
89 <option value="feature_importances_" >Fitted estimator - feature_importances_ </option> | |
90 <option value="coef_">Fitted estimator - coef_ </option> | |
91 <option value="train_score_">Fitted estimator - train_score_ </option> | |
92 <option value="oob_score_">Fitted estimator - oob_score_ </option> | |
93 <option value="init_">Fitted estimator - init_ </option> | |
94 <option value="named_steps">Pipeline - named_steps </option> | |
95 <option value="final_estimator">Pipeline - final_estimator </option> | |
96 <option value="cv_results_">SearchCV - cv_results_ </option> | |
97 <option value="best_estimator_">SearchCV - best_estimator_ </option> | |
98 <option value="best_score_">SearchCV - best_score_ </option> | |
99 <option value="best_params_">SearchCV - best_params_ </option> | |
100 <option value="scores_">Feature_selection - scores_ </option> | |
101 <option value="pvalues_">Feature_selection - pvalues_ </option> | |
102 <option value="ranking_">Feature_selection - ranking_ </option> | |
103 <option value="n_features_">Feature_selection - n_features_ </option> | |
104 <option value="grid_scores_">Feature_selection - grid_scores_ </option> | |
105 </param> | |
106 </inputs> | |
107 <outputs> | |
108 <data format="tabular" name="outfile" label="${attribute_type} from ${on_string}"> | |
109 <change_format> | |
110 <when input="attribute_type" value="named_steps" format="txt" /> | |
111 <when input="attribute_type" value="best_params_" format="txt" /> | |
112 <when input="attribute_type" value="final_estimator" format="zip" /> | |
113 <when input="attribute_type" value="best_estimator_" format="zip" /> | |
114 <when input="attribute_type" value="init_" format="zip" /> | |
115 </change_format> | |
116 </data> | |
117 </outputs> | |
118 <tests> | |
119 <test> | |
120 <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/> | |
121 <param name="attribute_type" value="best_score_"/> | |
122 <output name="outfile" file="best_score_.tabular"/> | |
123 </test> | |
124 <test> | |
125 <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/> | |
126 <param name="attribute_type" value="best_params_"/> | |
127 <output name="outfile" file="best_params_.txt"/> | |
128 </test> | |
129 <test> | |
130 <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/> | |
131 <param name="attribute_type" value="best_estimator_"/> | |
132 <output name="outfile" file="best_estimator_.zip" compare="sim_size" delta="10"/> | |
133 </test> | |
134 <test> | |
135 <param name="infile_object" value="best_estimator_.zip" ftype="zip"/> | |
136 <param name="attribute_type" value="final_estimator"/> | |
137 <output name="outfile" file="final_estimator.zip" compare="sim_size" delta="10"/> | |
138 </test> | |
139 <test> | |
140 <param name="infile_object" value="best_estimator_.zip" ftype="zip"/> | |
141 <param name="attribute_type" value="named_steps"/> | |
142 <output name="outfile" file="named_steps.txt" compare="sim_size" delta="5"/> | |
143 </test> | |
144 <test> | |
145 <param name="infile_object" value="final_estimator.zip" ftype="zip"/> | |
146 <param name="attribute_type" value="feature_importances_"/> | |
147 <output name="outfile" file="feature_importances_.tabular"/> | |
148 </test> | |
149 <test> | |
150 <param name="infile_object" value="RFE.zip" ftype="zip"/> | |
151 <param name="attribute_type" value="ranking_"/> | |
152 <output name="outfile" file="ranking_.tabular"/> | |
153 </test> | |
154 <test> | |
155 <param name="infile_object" value="LinearRegression02.zip" ftype="zip"/> | |
156 <param name="attribute_type" value="get_params"/> | |
157 <output name="outfile" value="get_params.tabular"/> | |
158 </test> | |
159 </tests> | |
160 <help> | |
161 <![CDATA[ | |
162 **What it does** | |
163 Output attribute from an estimator or any scikit object. | |
164 | |
165 Common attributes are : | |
166 | |
167 - ``estimator.`` *feature_importances_* | |
168 - ``RFE``. *ranking_* | |
169 - ``RFECV``. *grid_scores_* | |
170 - ``GridSearchCV``. *best_estimator_* | |
171 | |
172 ]]> | |
173 </help> | |
174 <expand macro="sklearn_citation"> | |
175 <expand macro="skrebate_citation"/> | |
176 <expand macro="xgboost_citation"/> | |
177 <expand macro="imblearn_citation"/> | |
178 </expand> | |
179 </tool> |