comparison estimator_attributes.xml @ 0:2ad4c2798be7 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author bgruening
date Tue, 14 May 2019 18:12:53 -0400
parents
children c411ff569a26
comparison
equal deleted inserted replaced
-1:000000000000 0:2ad4c2798be7
1 <tool id="sklearn_estimator_attributes" name="Estimator attributes" version="@VERSION@">
2 <description>get important attributes from an estimator or scikit object</description>
3 <macros>
4 <import>main_macros.xml</import>
5 </macros>
6 <expand macro="python_requirements"/>
7 <expand macro="macro_stdio"/>
8 <version_command>echo "@VERSION@"</version_command>
9 <command>
10 <![CDATA[
11 python '$main_script'
12 ]]>
13 </command>
14 <configfiles>
15 <configfile name="main_script">
16 <![CDATA[
17 import json
18 import pandas
19 import pickle
20 import skrebate
21 import sys
22 import warnings
23 import xgboost
24 from mlxtend import regressor, classifier
25 from sklearn import (
26 cluster, compose, decomposition, ensemble, feature_extraction,
27 feature_selection, gaussian_process, kernel_approximation, metrics,
28 model_selection, naive_bayes, neighbors, pipeline, preprocessing,
29 svm, linear_model, tree, discriminant_analysis)
30 from imblearn.pipeline import Pipeline as imbPipeline
31 from sklearn.pipeline import Pipeline
32
33 sys.path.insert(0, '$__tool_directory__')
34 from utils import load_model, get_search_params
35
36 warnings.simplefilter('ignore')
37
38 infile_object = '$infile_object'
39 attribute = '$attribute_type'
40
41 with open(infile_object, 'rb') as f:
42 est_obj = load_model(f)
43
44 if attribute == 'get_params':
45 ## get_params()
46 results = get_search_params(est_obj)
47 df = pandas.DataFrame(results, columns=['', 'Parameter', 'Value'])
48 df.to_csv('$outfile', sep='\t', index=False)
49 elif attribute == 'final_estimator':
50 res = est_obj.steps[-1][-1]
51 print(repr(res))
52 with open('$outfile', 'wb') as f:
53 pickle.dump(res, f, pickle.HIGHEST_PROTOCOL)
54 elif attribute in ['best_estimator_', 'init_']:
55 res = getattr(est_obj, attribute)
56 print(repr(res))
57 with open('$outfile', 'wb') as f:
58 pickle.dump(res, f, pickle.HIGHEST_PROTOCOL)
59 elif attribute in ['oob_score_', 'best_score_', 'n_features_']:
60 res = getattr(est_obj, attribute)
61 res = pandas.DataFrame([res], columns=[attribute])
62 res.to_csv('$outfile', sep='\t', index=False)
63 elif attribute in ['best_params_', 'named_steps']:
64 res = getattr(est_obj, attribute)
65 with open('$outfile', 'w') as f:
66 f.write(repr(res))
67 elif attribute == 'cv_results_':
68 res = pandas.DataFrame(est_obj.cv_results_)
69 res = res[sorted(res.columns)]
70 res.to_csv('$outfile', sep='\t', index=False)
71 else:
72 res = getattr(est_obj, attribute)
73 columns = []
74 if res.ndim == 1 or res.shape[-1] == 1:
75 columns = [attribute]
76 else:
77 for i in range(res.shape[-1]):
78 columns.append(attribute + '_' + str(i))
79 res = pandas.DataFrame(res, columns=columns)
80 res.to_csv('$outfile', sep='\t', index=False)
81
82 ]]>
83 </configfile>
84 </configfiles>
85 <inputs>
86 <param name="infile_object" type="data" format="zip" label="Choose the dataset containing estimator/pipeline object"/>
87 <param name="attribute_type" type="select" label="Select an attribute retrival type">
88 <option value="get_params" selected="true">Estimator - get_params()</option>
89 <option value="feature_importances_" >Fitted estimator - feature_importances_ </option>
90 <option value="coef_">Fitted estimator - coef_ </option>
91 <option value="train_score_">Fitted estimator - train_score_ </option>
92 <option value="oob_score_">Fitted estimator - oob_score_ </option>
93 <option value="init_">Fitted estimator - init_ </option>
94 <option value="named_steps">Pipeline - named_steps </option>
95 <option value="final_estimator">Pipeline - final_estimator </option>
96 <option value="cv_results_">SearchCV - cv_results_ </option>
97 <option value="best_estimator_">SearchCV - best_estimator_ </option>
98 <option value="best_score_">SearchCV - best_score_ </option>
99 <option value="best_params_">SearchCV - best_params_ </option>
100 <option value="scores_">Feature_selection - scores_ </option>
101 <option value="pvalues_">Feature_selection - pvalues_ </option>
102 <option value="ranking_">Feature_selection - ranking_ </option>
103 <option value="n_features_">Feature_selection - n_features_ </option>
104 <option value="grid_scores_">Feature_selection - grid_scores_ </option>
105 </param>
106 </inputs>
107 <outputs>
108 <data format="tabular" name="outfile" label="${attribute_type} from ${on_string}">
109 <change_format>
110 <when input="attribute_type" value="named_steps" format="txt" />
111 <when input="attribute_type" value="best_params_" format="txt" />
112 <when input="attribute_type" value="final_estimator" format="zip" />
113 <when input="attribute_type" value="best_estimator_" format="zip" />
114 <when input="attribute_type" value="init_" format="zip" />
115 </change_format>
116 </data>
117 </outputs>
118 <tests>
119 <test>
120 <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/>
121 <param name="attribute_type" value="best_score_"/>
122 <output name="outfile" file="best_score_.tabular"/>
123 </test>
124 <test>
125 <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/>
126 <param name="attribute_type" value="best_params_"/>
127 <output name="outfile" file="best_params_.txt"/>
128 </test>
129 <test>
130 <param name="infile_object" value="GridSearchCV.zip" ftype="zip"/>
131 <param name="attribute_type" value="best_estimator_"/>
132 <output name="outfile" file="best_estimator_.zip" compare="sim_size" delta="10"/>
133 </test>
134 <test>
135 <param name="infile_object" value="best_estimator_.zip" ftype="zip"/>
136 <param name="attribute_type" value="final_estimator"/>
137 <output name="outfile" file="final_estimator.zip" compare="sim_size" delta="10"/>
138 </test>
139 <test>
140 <param name="infile_object" value="best_estimator_.zip" ftype="zip"/>
141 <param name="attribute_type" value="named_steps"/>
142 <output name="outfile" file="named_steps.txt" compare="sim_size" delta="5"/>
143 </test>
144 <test>
145 <param name="infile_object" value="final_estimator.zip" ftype="zip"/>
146 <param name="attribute_type" value="feature_importances_"/>
147 <output name="outfile" file="feature_importances_.tabular"/>
148 </test>
149 <test>
150 <param name="infile_object" value="RFE.zip" ftype="zip"/>
151 <param name="attribute_type" value="ranking_"/>
152 <output name="outfile" file="ranking_.tabular"/>
153 </test>
154 <test>
155 <param name="infile_object" value="LinearRegression02.zip" ftype="zip"/>
156 <param name="attribute_type" value="get_params"/>
157 <output name="outfile" value="get_params.tabular"/>
158 </test>
159 </tests>
160 <help>
161 <![CDATA[
162 **What it does**
163 Output attribute from an estimator or any scikit object.
164
165 Common attributes are :
166
167 - ``estimator.`` *feature_importances_*
168 - ``RFE``. *ranking_*
169 - ``RFECV``. *grid_scores_*
170 - ``GridSearchCV``. *best_estimator_*
171
172 ]]>
173 </help>
174 <expand macro="sklearn_citation">
175 <expand macro="skrebate_citation"/>
176 <expand macro="xgboost_citation"/>
177 <expand macro="imblearn_citation"/>
178 </expand>
179 </tool>