Mercurial > repos > bgruening > sklearn_build_pipeline
diff pipeline.xml @ 10:775b004b7920 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author | bgruening |
---|---|
date | Fri, 09 Aug 2019 07:18:27 -0400 |
parents | 913ee94945f3 |
children | 3f3c6dc38f3e |
line wrap: on
line diff
--- a/pipeline.xml Tue Jul 09 19:27:47 2019 -0400 +++ b/pipeline.xml Fri Aug 09 07:18:27 2019 -0400 @@ -17,12 +17,12 @@ <![CDATA[ import imblearn import json +import pandas as pd import pickle import pprint import skrebate import sys import warnings -from mlxtend import classifier, regressor from sklearn import ( cluster, compose, decomposition, ensemble, feature_extraction, feature_selection, gaussian_process, kernel_approximation, metrics, @@ -30,11 +30,9 @@ svm, linear_model, tree, discriminant_analysis) from sklearn.pipeline import make_pipeline from imblearn.pipeline import make_pipeline as imb_make_pipeline - -sys.path.insert(0, '$__tool_directory__') +from galaxy_ml.utils import (SafeEval, feature_selector, get_estimator, + try_get_attr, get_search_params) -from utils import SafeEval, feature_selector, get_estimator, try_get_attr -from preprocessors import Z_RandomOverSampler N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) @@ -69,6 +67,11 @@ if input_json['component_type'] == 'pre_processor': preprocessor = input_json['pre_processors']['selected_pre_processor'] pre_processor_options = input_json['pre_processors']['options'] + if 'feature_range' in pre_processor_options: + feature_range = safe_eval(pre_processor_options['feature_range'].strip()) + if not feature_range: + feature_range = (0, 1) + pre_processor_options['feature_range'] = feature_range my_class = getattr(preprocessing, preprocessor) obj = my_class(**pre_processor_options) elif input_json['component_type'] == 'feature_selection': @@ -110,6 +113,8 @@ if algorithm == 'over_sampling.SMOTENC': obj = over_sampling.SMOTENC(categorical_features=[]) elif algorithm == 'Z_RandomOverSampler': + Z_RandomOverSampler = try_get_attr('galaxy_ml.preprocessors', + 'Z_RandomOverSampler') obj = Z_RandomOverSampler() else: globals = algorithm.split('.') @@ -120,7 +125,7 @@ options = safe_eval( 'dict(' + options + ')' ) obj.set_params(**options) elif input_json['component_type'] == 'IRAPS': - iraps_core = try_get_attr('iraps_classifier','IRAPSCore')() + iraps_core = try_get_attr('galaxy_ml.iraps_classifier','IRAPSCore')() core_params = input_json['text_params'].strip() if core_params != '': try: @@ -137,8 +142,14 @@ options['occurrence'] = input_json['occurrence'] if input_json['discretize'] is not None: options['discretize'] = input_json['discretize'] - IRAPSClassifier = try_get_attr('iraps_classifier','IRAPSClassifier') + IRAPSClassifier = try_get_attr('galaxy_ml.iraps_classifier','IRAPSClassifier') obj = IRAPSClassifier(iraps_core, **options) + elif input_json['component_type'] == 'preprocessors': + encoder_selection = input_json['encoder_selection'] + encoder_type = encoder_selection.pop('encoder_type') + klass = try_get_attr('galaxy_ml.preprocessors', encoder_type) + obj = klass(**encoder_selection) + if 'n_jobs' in obj.get_params(): obj.set_params( n_jobs=N_JOBS ) return obj, is_imblearn @@ -172,6 +183,7 @@ final_est = pipeline_steps[-1] print(final_est) pickle.dump(final_est, out_handler, pickle.HIGHEST_PROTOCOL) +out_obj = final_est #else: if has_imblearn: pipeline = imb_make_pipeline(*pipeline_steps) @@ -181,6 +193,13 @@ with open('$outfile', 'wb') as out_handler: pickle.dump(pipeline, out_handler, pickle.HIGHEST_PROTOCOL) +out_obj = pipeline +#end if + +#if $get_params +results = get_search_params(out_obj) +df = pd.DataFrame(results, columns=['', 'Parameter', 'Value']) +df.to_csv('$outfile_params', sep='\t', index=False) #end if ]]> </configfile> @@ -195,9 +214,10 @@ <option value="decomposition">Matrix Decomposition</option> <option value="kernel_approximation">Kernel Approximation</option> <option value="FeatureAgglomeration">Agglomerate Features</option> - <option value="skrebate">SK-rebate feature selection</option> - <option value="imblearn">imbalanced-learn sampling</option> + <option value="skrebate">SK-rebate Feature Selection</option> + <option value="imblearn">Imbalanced-learn Sampling</option> <option value="IRAPS">IRAPS -- feature selector and classifier</option> + <option value="preprocessors">Bio-sequence Encoders</option> </param> <when value="None"/> <when value="pre_processor"> @@ -233,6 +253,9 @@ <param argument="occurrence" type="float" value="0.7" label="reservation factor" help="Float. default=0.7"/> <param argument="discretize" type="float" value="-1" label="The z_score threshold to discretize target value" help="Float. default=-1"/> </when> + <when value="preprocessors"> + <expand macro="preprocessors_sequence_encoders"/> + </when> </conditional> </repeat> <section name="final_estimator" title="Final Estimator" expanded="true"> @@ -266,9 +289,14 @@ <option value="Pipeline_Builder" selected="true">Pipeline</option> <option value="Final_Estimator_Builder">Final Estimator</option> </param> + <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Output parameters for searchCV?" + help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/> </inputs> <outputs> <data format="zip" name="outfile" label="${output_type}"/> + <data format="tabular" name="outfile_params" label="get_params for ${output_type}"> + <filter>get_params</filter> + </data> </outputs> <tests> <test> @@ -472,6 +500,23 @@ <param name="output_type" value="Final_Estimator_Builder"/> <output name="outfile" file="pipeline15" compare="sim_size" delta="5"/> </test> + <test> + <conditional name="component_selector"> + <param name="component_type" value="preprocessors"/> + <conditional name="encoder_selection"> + <param name="encoder_type" value="GenomeOneHotEncoder"/> + <param name="seq_length" value="1000"/> + <param name="padding" value="True"/> + </conditional> + </conditional> + <section name="final_estimator"> + <conditional name="estimator_selector"> + <param name="selected_module" value="custom_estimator"/> + <param name="c_estimator" value="keras_model02" ftype="zip"/> + </conditional> + </section> + <output name="outfile" file="pipeline16" compare="sim_size" delta="5"/> + </test> </tests> <help> <![CDATA[