Mercurial > repos > bgruening > sklearn_feature_selection
changeset 3:3a1acc39b39b draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 4ed8c4f6ef9ece81797a398b17a99bbaf49a6978
author | bgruening |
---|---|
date | Wed, 30 May 2018 08:25:49 -0400 |
parents | 2eb90e73f0d5 |
children | 44e26f8a82c6 |
files | feature_selection.xml main_macros.xml test-data/mv_result07.tabular |
diffstat | 3 files changed, 57 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/feature_selection.xml Tue May 22 19:31:59 2018 -0400 +++ b/feature_selection.xml Wed May 30 08:25:49 2018 -0400 @@ -25,6 +25,8 @@ @COLUMNS_FUNCTION@ +@FEATURE_SELECTOR_FUNCTION@ + input_json_path = sys.argv[1] params = json.load(open(input_json_path, "r")) @@ -57,42 +59,10 @@ y=y.ravel() ## Create feature selector -selector = params["feature_selection_algorithms"]["selected_algorithm"] -selector = getattr(sklearn.feature_selection, selector) -options = params["feature_selection_algorithms"]["options"] - -if params['feature_selection_algorithms']['selected_algorithm'] == 'SelectFromModel': - if not options['threshold'] or options['threshold'] == 'None': - options['threshold'] = None - if 'extra_estimator' in params['feature_selection_algorithms'] and params['feature_selection_algorithms']['extra_estimator']['has_estimator'] == 'no_load': - fitted_estimator = pickle.load(open("params['feature_selection_algorithms']['extra_estimator']['fitted_estimator']", 'r')) - new_selector = selector(fitted_estimator, prefit=True, **options) - else: - estimator=params["feature_selection_algorithms"]["estimator"] - if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': - estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] - estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) - new_selector = selector(estimator, **options) - new_selector.fit(X, y) - -elif params['feature_selection_algorithms']['selected_algorithm'] in ['RFE', 'RFECV']: - if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): - options['scoring'] = None - estimator=params["feature_selection_algorithms"]["estimator"] - if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': - estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] - estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) - new_selector = selector(estimator, **options) - new_selector.fit(X, y) - -elif params['feature_selection_algorithms']['selected_algorithm'] == "VarianceThreshold": - new_selector = selector(**options) - new_selector.fit(X, y) - -else: - score_func = params["feature_selection_algorithms"]["score_func"] - score_func = getattr(sklearn.feature_selection, score_func) - new_selector = selector(score_func, **options) +new_selector = feature_selector(params['feature_selection_algorithms']) +if params['feature_selection_algorithms']['selected_algorithm'] != 'SelectFromModel' or \ + 'extra_estimator' not in params['feature_selection_algorithms'] or \ + params['feature_selection_algorithms']['extra_estimator']['has_estimator'] != 'no_load' : new_selector.fit(X, y) ## Transform to select features
--- a/main_macros.xml Tue May 22 19:31:59 2018 -0400 +++ b/main_macros.xml Wed May 30 08:25:49 2018 -0400 @@ -16,6 +16,47 @@ return y </token> +## generate an instance for one of sklearn.feature_selection classes +## must call "@COLUMNS_FUNCTION@" + <token name="@FEATURE_SELECTOR_FUNCTION@"> +def feature_selector(inputs): + selector = inputs["selected_algorithm"] + selector = getattr(sklearn.feature_selection, selector) + options = inputs["options"] + + if inputs['selected_algorithm'] == 'SelectFromModel': + if not options['threshold'] or options['threshold'] == 'None': + options['threshold'] = None + if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load': + fitted_estimator = pickle.load(open("inputs['extra_estimator']['fitted_estimator']", 'r')) + new_selector = selector(fitted_estimator, prefit=True, **options) + else: + estimator=inputs["estimator"] + if inputs["extra_estimator"]["has_estimator"]=='no': + estimator=inputs["extra_estimator"]["new_estimator"] + estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) + new_selector = selector(estimator, **options) + + elif inputs['selected_algorithm'] in ['RFE', 'RFECV']: + if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): + options['scoring'] = None + estimator=inputs["estimator"] + if inputs["extra_estimator"]["has_estimator"]=='no': + estimator=inputs["extra_estimator"]["new_estimator"] + estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) + new_selector = selector(estimator, **options) + + elif inputs['selected_algorithm'] == "VarianceThreshold": + new_selector = selector(**options) + + else: + score_func = inputs["score_func"] + score_func = getattr(sklearn.feature_selection, score_func) + new_selector = selector(score_func, **options) + + return new_selector + </token> + <xml name="python_requirements"> <requirements> <requirement type="package" version="2.7">python</requirement> @@ -794,6 +835,13 @@ </when> <yield/> </xml> + <xml name="estimator_input_no_fit"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + </xml> <xml name="feature_selection_all"> <conditional name="feature_selection_algorithms"> <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> @@ -975,8 +1023,8 @@ <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A metric used to evaluate the estimator"/> </xml> - <xml name="pre_dispatch"> - <param argument="pre_dispatch" type="text" value="all" optional="true" label="pre_dispatch" help="Number of predispatched jobs for parallel execution"/> + <xml name="pre_dispatch" token_type="text" token_default_value="all" token_help="Number of predispatched jobs for parallel execution"> + <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/> </xml> <!-- Outputs -->