# HG changeset patch # User bgruening # Date 1527031919 14400 # Node ID 2eb90e73f0d5a6d659cedf3ceea948e7cf7630a9 # Parent 58322d3c7bd3ae0327bde60b9824bb3a75034225 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 79fe42239dcf077b13f85cbcd6c6e30d7e1e4832 diff -r 58322d3c7bd3 -r 2eb90e73f0d5 feature_selection.xml --- a/feature_selection.xml Sat Apr 28 18:08:48 2018 -0400 +++ b/feature_selection.xml Tue May 22 19:31:59 2018 -0400 @@ -1,4 +1,4 @@ - + module, including univariate filter selection methods and recursive feature elimination algorithm main_macros.xml @@ -28,12 +28,16 @@ input_json_path = sys.argv[1] params = json.load(open(input_json_path, "r")) +## Read features +features_has_header = params["input_options"]["header1"] input_type = params["input_options"]["selected_input"] if input_type=="tabular": + header = 'infer' if features_has_header else None header = 'infer' if params["input_options"]["header1"] else None - X = read_columns( + X, input_df = read_columns( "$input_options.infile1", "$input_options.col1", + return_df = True, sep='\t', header=header, parse_dates=True @@ -41,6 +45,7 @@ else: X = mmread(open("$input_options.infile1", 'r')) +## Read labels header = 'infer' if params["input_options"]["header2"] else None y = read_columns( "$input_options.infile2", @@ -51,54 +56,55 @@ ) y=y.ravel() +## Create feature selector selector = params["feature_selection_algorithms"]["selected_algorithm"] selector = getattr(sklearn.feature_selection, selector) options = params["feature_selection_algorithms"]["options"] -#if $feature_selection_algorithms.selected_algorithm == 'SelectFromModel': -if not options['threshold'] or options['threshold'] == 'None': - options['threshold'] = None -#if $feature_selection_algorithms.extra_estimator.has_estimator == 'no_load': -fitted_estimator = pickle.load(open("$feature_selection_algorithms.extra_estimator.fitted_estimator", 'r')) -new_selector = selector(fitted_estimator, prefit=True, **options) -#else: -estimator=params["feature_selection_algorithms"]["estimator"] -if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': - estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] -estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) -new_selector = selector(estimator, **options) -new_selector.fit(X, y) -#end if +if params['feature_selection_algorithms']['selected_algorithm'] == 'SelectFromModel': + if not options['threshold'] or options['threshold'] == 'None': + options['threshold'] = None + if 'extra_estimator' in params['feature_selection_algorithms'] and params['feature_selection_algorithms']['extra_estimator']['has_estimator'] == 'no_load': + fitted_estimator = pickle.load(open("params['feature_selection_algorithms']['extra_estimator']['fitted_estimator']", 'r')) + new_selector = selector(fitted_estimator, prefit=True, **options) + else: + estimator=params["feature_selection_algorithms"]["estimator"] + if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': + estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] + estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) + new_selector = selector(estimator, **options) + new_selector.fit(X, y) -#elif $feature_selection_algorithms.selected_algorithm in ['RFE', 'RFECV']: -if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): - options['scoring'] = None -estimator=params["feature_selection_algorithms"]["estimator"] -if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': - estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] -estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) -new_selector = selector(estimator, **options) -new_selector.fit(X, y) +elif params['feature_selection_algorithms']['selected_algorithm'] in ['RFE', 'RFECV']: + if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): + options['scoring'] = None + estimator=params["feature_selection_algorithms"]["estimator"] + if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no': + estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"] + estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) + new_selector = selector(estimator, **options) + new_selector.fit(X, y) -#elif $feature_selection_algorithms.selected_algorithm == "VarianceThreshold": -new_selector = selector(**options) -new_selector.fit(X, y) +elif params['feature_selection_algorithms']['selected_algorithm'] == "VarianceThreshold": + new_selector = selector(**options) + new_selector.fit(X, y) -#else: -score_func = params["feature_selection_algorithms"]["score_func"] -score_func = getattr(sklearn.feature_selection, score_func) -new_selector = selector(score_func, **options) -new_selector.fit(X, y) -#end if +else: + score_func = params["feature_selection_algorithms"]["score_func"] + score_func = getattr(sklearn.feature_selection, score_func) + new_selector = selector(score_func, **options) + new_selector.fit(X, y) -#if $select_methods.selected_method == "fit_transform": -res = new_selector.transform(X) +## Transform to select features +selected_names = None +if "$select_methods.selected_method" == "fit_transform": + res = new_selector.transform(X) + if features_has_header: + selected_names = input_df.columns[new_selector.get_support(indices=True)] +else: + res = new_selector.get_support(params["select_methods"]["indices"]) -#else: -res = new_selector.get_support(params["select_methods"]["indices"]) -#end if - -res = pandas.DataFrame(res) +res = pandas.DataFrame(res, columns = selected_names) res.to_csv(path_or_buf="$outfile", sep='\t', index=False) @@ -106,131 +112,12 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

- - -

- - - -

- - - - - - - - -

- - - -

- -

- - - -

- -

- - - -

- -

- - - -

- -

- - - -

- -

- - - - - - - -

- - - -

- - - - - - - -

- - - - - -

- - -

- -

- - - + - + diff -r 58322d3c7bd3 -r 2eb90e73f0d5 main_macros.xml --- a/main_macros.xml Sat Apr 28 18:08:48 2018 -0400 +++ b/main_macros.xml Tue May 22 19:31:59 2018 -0400 @@ -2,12 +2,17 @@ 0.9 -def read_columns(f, c, **args): +def read_columns(f, c, return_df=False, **args): data = pandas.read_csv(f, **args) cols = c.split (',') cols = map(int, cols) cols = list(map(lambda x: x - 1, cols)) - y = data.iloc[:,cols].values + data = data.iloc[:,cols] + y = data.values + if return_df: + return y, data + else: + return y return y @@ -789,6 +794,128 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+ + +

+ + + +

+ + + + + + + + +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + + + + + +

+ + + +

+ + + + + + + +

+ + + + + +

+ + +

+ +

+ + + + diff -r 58322d3c7bd3 -r 2eb90e73f0d5 test-data/feature_selection_result01 --- a/test-data/feature_selection_result01 Sat Apr 28 18:08:48 2018 -0400 +++ b/test-data/feature_selection_result01 Tue May 22 19:31:59 2018 -0400 @@ -1,4 +1,4 @@ -0 1 +temp_1 average 69.0 69.7 59.0 58.1 88.0 77.3 diff -r 58322d3c7bd3 -r 2eb90e73f0d5 test-data/feature_selection_result02 --- a/test-data/feature_selection_result02 Sat Apr 28 18:08:48 2018 -0400 +++ b/test-data/feature_selection_result02 Tue May 22 19:31:59 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 +temp_2 temp_1 forecast_noaa friend 68.0 69.0 65.0 88.0 60.0 59.0 57.0 66.0 85.0 88.0 75.0 70.0 diff -r 58322d3c7bd3 -r 2eb90e73f0d5 test-data/feature_selection_result03 --- a/test-data/feature_selection_result03 Sat Apr 28 18:08:48 2018 -0400 +++ b/test-data/feature_selection_result03 Tue May 22 19:31:59 2018 -0400 @@ -1,4 +1,4 @@ -0 1 +temp_1 friend 69.0 88.0 59.0 66.0 88.0 70.0 diff -r 58322d3c7bd3 -r 2eb90e73f0d5 test-data/feature_selection_result04 --- a/test-data/feature_selection_result04 Sat Apr 28 18:08:48 2018 -0400 +++ b/test-data/feature_selection_result04 Tue May 22 19:31:59 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 9 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Mon 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 1.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0 diff -r 58322d3c7bd3 -r 2eb90e73f0d5 test-data/feature_selection_result05 --- a/test-data/feature_selection_result05 Sat Apr 28 18:08:48 2018 -0400 +++ b/test-data/feature_selection_result05 Tue May 22 19:31:59 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 diff -r 58322d3c7bd3 -r 2eb90e73f0d5 test-data/feature_selection_result06 --- a/test-data/feature_selection_result06 Sat Apr 28 18:08:48 2018 -0400 +++ b/test-data/feature_selection_result06 Tue May 22 19:31:59 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 diff -r 58322d3c7bd3 -r 2eb90e73f0d5 test-data/feature_selection_result07 --- a/test-data/feature_selection_result07 Sat Apr 28 18:08:48 2018 -0400 +++ b/test-data/feature_selection_result07 Tue May 22 19:31:59 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 diff -r 58322d3c7bd3 -r 2eb90e73f0d5 test-data/feature_selection_result08 --- a/test-data/feature_selection_result08 Sat Apr 28 18:08:48 2018 -0400 +++ b/test-data/feature_selection_result08 Tue May 22 19:31:59 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 +day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 diff -r 58322d3c7bd3 -r 2eb90e73f0d5 test-data/feature_selection_result09 --- a/test-data/feature_selection_result09 Sat Apr 28 18:08:48 2018 -0400 +++ b/test-data/feature_selection_result09 Tue May 22 19:31:59 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 9 10 11 12 13 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Fri week_Mon week_Sat week_Sun week_Tues 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 0.0 1.0 0.0 0.0 0.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0 0.0 0.0 0.0 0.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0 0.0 1.0 0.0 0.0 diff -r 58322d3c7bd3 -r 2eb90e73f0d5 test-data/feature_selection_result10 --- a/test-data/feature_selection_result10 Sat Apr 28 18:08:48 2018 -0400 +++ b/test-data/feature_selection_result10 Tue May 22 19:31:59 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Fri week_Mon week_Sat week_Sun week_Thurs week_Tues week_Wed 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0