comparison estimator_attributes.xml @ 17:a01fa4e8fe4f draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 12:54:40 +0000
parents 27fabe5feedc
children
comparison
equal deleted inserted replaced
16:d0352e8b4c10 17:a01fa4e8fe4f
1 <tool id="sklearn_estimator_attributes" name="Estimator attributes" version="@VERSION@" profile="20.05"> 1 <tool id="sklearn_estimator_attributes" name="Estimator attributes" version="@VERSION@" profile="@PROFILE@">
2 <description>get important attributes from an estimator or scikit object</description> 2 <description>get important attributes from an estimator or scikit object</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="python_requirements" /> 6 <expand macro="python_requirements" />
14 <configfiles> 14 <configfiles>
15 <configfile name="main_script"> 15 <configfile name="main_script">
16 <![CDATA[ 16 <![CDATA[
17 import json 17 import json
18 import pandas 18 import pandas
19 import pickle
20 import skrebate 19 import skrebate
21 import sys 20 import sys
22 import warnings 21 import warnings
23 import xgboost 22 import xgboost
24 from mlxtend import regressor, classifier 23 from mlxtend import regressor, classifier
28 model_selection, naive_bayes, neighbors, pipeline, preprocessing, 27 model_selection, naive_bayes, neighbors, pipeline, preprocessing,
29 svm, linear_model, tree, discriminant_analysis) 28 svm, linear_model, tree, discriminant_analysis)
30 from imblearn.pipeline import Pipeline as imbPipeline 29 from imblearn.pipeline import Pipeline as imbPipeline
31 from sklearn.pipeline import Pipeline 30 from sklearn.pipeline import Pipeline
32 31
33 from galaxy_ml.utils import load_model, get_search_params 32 from galaxy_ml.model_persist import load_model_from_h5, dump_model_to_h5
33 from galaxy_ml.utils import get_search_params
34 34
35 warnings.simplefilter('ignore') 35 warnings.simplefilter('ignore')
36 36
37 infile_object = '$infile_object' 37 infile_object = '$infile_object'
38 attribute = '$attribute_type' 38 attribute = '$attribute_type'
39 39
40 with open(infile_object, 'rb') as f: 40 est_obj = load_model_from_h5(infile_object)
41 est_obj = load_model(f)
42 41
43 if attribute == 'get_params': 42 if attribute == 'get_params':
44 ## get_params() 43 ## get_params()
45 results = get_search_params(est_obj) 44 results = get_search_params(est_obj)
46 df = pandas.DataFrame(results, columns=['', 'Parameter', 'Value']) 45 df = pandas.DataFrame(results, columns=['', 'Parameter', 'Value'])
47 df.to_csv('$outfile', sep='\t', index=False) 46 df.to_csv('$outfile', sep='\t', index=False)
48 elif attribute == 'final_estimator': 47 elif attribute == 'final_estimator':
49 res = est_obj.steps[-1][-1] 48 res = est_obj.steps[-1][-1]
50 print(repr(res)) 49 print(repr(res))
51 with open('$outfile', 'wb') as f: 50 dump_model_to_h5(res, '$outfile')
52 pickle.dump(res, f, pickle.HIGHEST_PROTOCOL)
53 elif attribute in ['best_estimator_', 'init_', 'classifier_', 'regressor_']: 51 elif attribute in ['best_estimator_', 'init_', 'classifier_', 'regressor_']:
54 res = getattr(est_obj, attribute) 52 res = getattr(est_obj, attribute)
55 print(repr(res)) 53 print(repr(res))
56 with open('$outfile', 'wb') as f: 54 dump_model_to_h5(res, '$outfile')
57 pickle.dump(res, f, pickle.HIGHEST_PROTOCOL) 55 elif attribute in ['oob_score_', 'best_score_', 'n_features_in']:
58 elif attribute in ['oob_score_', 'best_score_', 'n_features_']:
59 res = getattr(est_obj, attribute) 56 res = getattr(est_obj, attribute)
60 res = pandas.DataFrame([res], columns=[attribute]) 57 res = pandas.DataFrame([res], columns=[attribute])
61 res.to_csv('$outfile', sep='\t', index=False) 58 res.to_csv('$outfile', sep='\t', index=False)
62 elif attribute in ['best_params_', 'named_steps']: 59 elif attribute in ['best_params_', 'named_steps']:
63 res = getattr(est_obj, attribute) 60 res = getattr(est_obj, attribute)
65 f.write(repr(res)) 62 f.write(repr(res))
66 elif attribute == 'cv_results_': 63 elif attribute == 'cv_results_':
67 res = pandas.DataFrame(est_obj.cv_results_) 64 res = pandas.DataFrame(est_obj.cv_results_)
68 res = res[sorted(res.columns)] 65 res = res[sorted(res.columns)]
69 res.to_csv('$outfile', sep='\t', index=False) 66 res.to_csv('$outfile', sep='\t', index=False)
70 elif attribute == 'save_weights':
71 est_obj.save_weights('$outfile')
72 else: 67 else:
73 if attribute == 'get_signature': 68 if attribute == 'get_signature':
74 res = est_obj.get_signature() 69 res = est_obj.get_signature()
75 else: 70 else:
76 res = getattr(est_obj, attribute) 71 res = getattr(est_obj, attribute)
85 80
86 ]]> 81 ]]>
87 </configfile> 82 </configfile>
88 </configfiles> 83 </configfiles>
89 <inputs> 84 <inputs>
90 <param name="infile_object" type="data" format="zip" label="Choose the dataset containing estimator/pipeline object" /> 85 <param name="infile_object" type="data" format="h5mlm" label="Choose the dataset containing estimator/pipeline object" />
91 <param name="attribute_type" type="select" label="Select an attribute retrival type"> 86 <param name="attribute_type" type="select" label="Select an attribute retrival type">
92 <option value="get_params" selected="true">Estimator - get_params()</option> 87 <option value="get_params" selected="true">Estimator - get_params()</option>
93 <option value="feature_importances_">Fitted estimator - feature_importances_ </option> 88 <option value="feature_importances_" >Fitted estimator - feature_importances_ </option>
94 <option value="coef_">Fitted estimator - coef_ </option> 89 <option value="coef_">Fitted estimator - coef_ </option>
95 <option value="train_score_">Fitted estimator - train_score_ </option> 90 <option value="train_score_">Fitted estimator - train_score_ </option>
96 <option value="oob_score_">Fitted estimator - oob_score_ </option> 91 <option value="oob_score_">Fitted estimator - oob_score_ </option>
97 <option value="init_">Fitted estimator - init_ </option> 92 <option value="init_">Fitted estimator - init_ </option>
98 <option value="classifier_">Fitted BinarizeTargetClassifier - classifier_</option> 93 <option value="classifier_">Fitted BinarizeTargetClassifier - classifier_</option>
105 <option value="best_score_">SearchCV - best_score_ </option> 100 <option value="best_score_">SearchCV - best_score_ </option>
106 <option value="best_params_">SearchCV - best_params_ </option> 101 <option value="best_params_">SearchCV - best_params_ </option>
107 <option value="scores_">Feature_selection - scores_ </option> 102 <option value="scores_">Feature_selection - scores_ </option>
108 <option value="pvalues_">Feature_selection - pvalues_ </option> 103 <option value="pvalues_">Feature_selection - pvalues_ </option>
109 <option value="ranking_">Feature_selection - ranking_ </option> 104 <option value="ranking_">Feature_selection - ranking_ </option>
110 <option value="n_features_">Feature_selection - n_features_ </option> 105 <option value="n_features_in">Feature_selection - n_features_in </option>
111 <option value="grid_scores_">Feature_selection - grid_scores_ </option> 106 <option value="grid_scores_">Feature_selection - grid_scores_ </option>
112 <option value="save_weights">KerasGClassifier/KerasGRegressor - save_weights</option>
113 </param> 107 </param>
114 </inputs> 108 </inputs>
115 <outputs> 109 <outputs>
116 <data format="tabular" name="outfile" label="${attribute_type} from ${on_string}"> 110 <data format="tabular" name="outfile" label="${attribute_type} from ${on_string}">
117 <change_format> 111 <change_format>
118 <when input="attribute_type" value="named_steps" format="txt" /> 112 <when input="attribute_type" value="named_steps" format="txt" />
119 <when input="attribute_type" value="best_params_" format="txt" /> 113 <when input="attribute_type" value="best_params_" format="txt" />
120 <when input="attribute_type" value="final_estimator" format="zip" /> 114 <when input="attribute_type" value="final_estimator" format="h5mlm" />
121 <when input="attribute_type" value="best_estimator_" format="zip" /> 115 <when input="attribute_type" value="best_estimator_" format="h5mlm" />
122 <when input="attribute_type" value="init_" format="zip" /> 116 <when input="attribute_type" value="init_" format="h5mlm" />
123 <when input="attribute_type" value="classifier_" format="zip" /> 117 <when input="attribute_type" value="classifier_" format="h5mlm" />
124 <when input="attribute_type" value="regressor_" format="zip" /> 118 <when input="attribute_type" value="regressor_" format="h5mlm" />
125 <when input="attribute_type" value="save_weights" format="h5" />
126 </change_format> 119 </change_format>
127 </data> 120 </data>
128 </outputs> 121 </outputs>
129 <tests> 122 <tests>
130 <test> 123 <test>
131 <param name="infile_object" value="GridSearchCV.zip" ftype="zip" /> 124 <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" />
132 <param name="attribute_type" value="best_score_" /> 125 <param name="attribute_type" value="best_score_" />
133 <output name="outfile" file="best_score_.tabular" /> 126 <output name="outfile" file="best_score_.tabular" />
134 </test> 127 </test>
135 <test> 128 <test>
136 <param name="infile_object" value="GridSearchCV.zip" ftype="zip" /> 129 <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" />
137 <param name="attribute_type" value="best_params_" /> 130 <param name="attribute_type" value="best_params_" />
138 <output name="outfile" file="best_params_.txt" /> 131 <output name="outfile" file="best_params_.txt" />
139 </test> 132 </test>
140 <test> 133 <test>
141 <param name="infile_object" value="GridSearchCV.zip" ftype="zip" /> 134 <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" />
142 <param name="attribute_type" value="best_estimator_" /> 135 <param name="attribute_type" value="best_estimator_" />
143 <output name="outfile" file="best_estimator_.zip" compare="sim_size" delta="10" /> 136 <output name="outfile" file="best_estimator_.h5mlm" compare="sim_size" delta="10" />
137 </test>
138 <test>
139 <param name="infile_object" value="searchCV01" ftype="h5mlm" />
140 <param name="attribute_type" value="final_estimator" />
141 <output name="outfile" file="final_estimator.h5mlm" compare="sim_size" delta="10" />
144 </test> 142 </test>
145 <test> 143 <test>
146 <param name="infile_object" value="best_estimator_.zip" ftype="zip" /> 144 <param name="infile_object" value="searchCV01" ftype="h5mlm" />
147 <param name="attribute_type" value="final_estimator" />
148 <output name="outfile" file="final_estimator.zip" compare="sim_size" delta="10" />
149 </test>
150 <test>
151 <param name="infile_object" value="best_estimator_.zip" ftype="zip" />
152 <param name="attribute_type" value="named_steps" /> 145 <param name="attribute_type" value="named_steps" />
153 <output name="outfile" file="named_steps.txt" compare="sim_size" delta="5" /> 146 <output name="outfile" file="named_steps.txt" compare="sim_size" delta="5" />
154 </test> 147 </test>
155 <test> 148 <test>
156 <param name="infile_object" value="final_estimator.zip" ftype="zip" /> 149 <param name="infile_object" value="best_estimator_.h5mlm" ftype="h5mlm" />
157 <param name="attribute_type" value="feature_importances_" /> 150 <param name="attribute_type" value="feature_importances_" />
158 <output name="outfile" file="feature_importances_.tabular" /> 151 <output name="outfile" file="feature_importances_.tabular" />
159 </test> 152 </test>
160 <test> 153 <test>
161 <param name="infile_object" value="RFE.zip" ftype="zip" /> 154 <param name="infile_object" value="RFE.h5mlm" ftype="h5mlm" />
162 <param name="attribute_type" value="ranking_" /> 155 <param name="attribute_type" value="ranking_" />
163 <output name="outfile" file="ranking_.tabular" /> 156 <output name="outfile" file="ranking_.tabular" />
164 </test> 157 </test>
165 <test> 158 <test>
166 <param name="infile_object" value="LinearRegression02.zip" ftype="zip" /> 159 <param name="infile_object" value="LinearRegression01.h5mlm" ftype="h5mlm" />
167 <param name="attribute_type" value="get_params" /> 160 <param name="attribute_type" value="get_params" />
168 <output name="outfile" value="get_params.tabular" /> 161 <output name="outfile" value="get_params.tabular" />
169 </test>
170 <test>
171 <param name="infile_object" value="fitted_keras_g_regressor01.zip" ftype="zip" />
172 <param name="attribute_type" value="save_weights" />
173 <output name="outfile" value="keras_save_weights01.h5" compare="sim_size" delta="5" />
174 </test> 162 </test>
175 </tests> 163 </tests>
176 <help> 164 <help>
177 <![CDATA[ 165 <![CDATA[
178 **What it does** 166 **What it does**