0.9
def read_columns(f, c, return_df=False, **args):
data = pandas.read_csv(f, **args)
cols = c.split (',')
cols = map(int, cols)
cols = list(map(lambda x: x - 1, cols))
data = data.iloc[:,cols]
y = data.values
if return_df:
return y, data
else:
return y
return y
## generate an instance for one of sklearn.feature_selection classes
## must call "@COLUMNS_FUNCTION@"
def feature_selector(inputs):
selector = inputs["selected_algorithm"]
selector = getattr(sklearn.feature_selection, selector)
options = inputs["options"]
if inputs['selected_algorithm'] == 'SelectFromModel':
if not options['threshold'] or options['threshold'] == 'None':
options['threshold'] = None
if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load':
fitted_estimator = pickle.load(open("inputs['extra_estimator']['fitted_estimator']", 'r'))
new_selector = selector(fitted_estimator, prefit=True, **options)
else:
estimator=inputs["estimator"]
if inputs["extra_estimator"]["has_estimator"]=='no':
estimator=inputs["extra_estimator"]["new_estimator"]
estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
new_selector = selector(estimator, **options)
elif inputs['selected_algorithm'] in ['RFE', 'RFECV']:
if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'):
options['scoring'] = None
estimator=inputs["estimator"]
if inputs["extra_estimator"]["has_estimator"]=='no':
estimator=inputs["extra_estimator"]["new_estimator"]
estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
new_selector = selector(estimator, **options)
elif inputs['selected_algorithm'] == "VarianceThreshold":
new_selector = selector(**options)
else:
score_func = inputs["score_func"]
score_func = getattr(sklearn.feature_selection, score_func)
new_selector = selector(score_func, **options)
return new_selector
pythonscikit-learnpandasselected_tasks['selected_task'] == 'load'selected_tasks['selected_task'] == 'train'10.5281/zenodo.15094
@article{scikit-learn,
title={Scikit-learn: Machine Learning in {P}ython},
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
journal={Journal of Machine Learning Research},
volume={12},
pages={2825--2830},
year={2011}
url = {https://github.com/scikit-learn/scikit-learn}
}
@Misc{,
author = {Eric Jones and Travis Oliphant and Pearu Peterson and others},
title = {{SciPy}: Open source scientific tools for {Python}},
year = {2001--},
url = "http://www.scipy.org/",
note = {[Online; accessed 2016-04-09]}
}