0.9 def read_columns(f, c=None, c_option='by_index_number', return_df=False, **args): data = pandas.read_csv(f, **args) if c_option == 'by_index_number': cols = list(map(lambda x: x - 1, c)) data = data.iloc[:,cols] if c_option == 'all_but_by_index_number': cols = list(map(lambda x: x - 1, c)) data.drop(data.columns[cols], axis=1, inplace=True) if c_option == 'by_header_name': cols = [e.strip() for e in c.split(',')] data = data[cols] if c_option == 'all_but_by_header_name': cols = [e.strip() for e in c.split(',')] data.drop(cols, axis=1, inplace=True) y = data.values if return_df: return y, data else: return y return y ## generate an instance for one of sklearn.feature_selection classes def feature_selector(inputs): selector = inputs["selected_algorithm"] selector = getattr(sklearn.feature_selection, selector) options = inputs["options"] if inputs['selected_algorithm'] == 'SelectFromModel': if not options['threshold'] or options['threshold'] == 'None': options['threshold'] = None if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load': with open("inputs['extra_estimator']['fitted_estimator']", 'rb') as model_handler: fitted_estimator = pickle.load(model_handler) new_selector = selector(fitted_estimator, prefit=True, **options) else: estimator=inputs["estimator"] if inputs["extra_estimator"]["has_estimator"]=='no': estimator=inputs["extra_estimator"]["new_estimator"] estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) new_selector = selector(estimator, **options) elif inputs['selected_algorithm'] in ['RFE', 'RFECV']: if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): options['scoring'] = None estimator=inputs["estimator"] if inputs["extra_estimator"]["has_estimator"]=='no': estimator=inputs["extra_estimator"]["new_estimator"] estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) new_selector = selector(estimator, **options) elif inputs['selected_algorithm'] == "VarianceThreshold": new_selector = selector(**options) else: score_func = inputs["score_func"] score_func = getattr(sklearn.feature_selection, score_func) new_selector = selector(score_func, **options) return new_selector def get_X_y(params, file1, file2): input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"] if input_type=="tabular": header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"] if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"] else: c = None X = read_columns( file1, c = c, c_option = column_option, sep='\t', header=header, parse_dates=True ) else: X = mmread(file1) header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"] else: c = None y = read_columns( file2, c = c, c_option = column_option, sep='\t', header=header, parse_dates=True ) y=y.ravel() return X, y python scikit-learn pandas

selected_tasks['selected_task'] == 'load' selected_tasks['selected_task'] == 'train' 10.5281/zenodo.15094 @article{scikit-learn, title={Scikit-learn: Machine Learning in {P}ython}, author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, journal={Journal of Machine Learning Research}, volume={12}, pages={2825--2830}, year={2011} url = {https://github.com/scikit-learn/scikit-learn} } @Misc{, author = {Eric Jones and Travis Oliphant and Pearu Peterson and others}, title = {{SciPy}: Open source scientific tools for {Python}}, year = {2001--}, url = "http://www.scipy.org/", note = {[Online; accessed 2016-04-09]} }