0.9
def read_columns(f, c=None, c_option='by_index_number', return_df=False, **args):
data = pandas.read_csv(f, **args)
if c_option == 'by_index_number':
cols = list(map(lambda x: x - 1, c))
data = data.iloc[:,cols]
if c_option == 'all_but_by_index_number':
cols = list(map(lambda x: x - 1, c))
data.drop(data.columns[cols], axis=1, inplace=True)
if c_option == 'by_header_name':
cols = [e.strip() for e in c.split(',')]
data = data[cols]
if c_option == 'all_but_by_header_name':
cols = [e.strip() for e in c.split(',')]
data.drop(cols, axis=1, inplace=True)
y = data.values
if return_df:
return y, data
else:
return y
return y
## generate an instance for one of sklearn.feature_selection classes
def feature_selector(inputs):
selector = inputs["selected_algorithm"]
selector = getattr(sklearn.feature_selection, selector)
options = inputs["options"]
if inputs['selected_algorithm'] == 'SelectFromModel':
if not options['threshold'] or options['threshold'] == 'None':
options['threshold'] = None
if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load':
with open("inputs['extra_estimator']['fitted_estimator']", 'rb') as model_handler:
fitted_estimator = pickle.load(model_handler)
new_selector = selector(fitted_estimator, prefit=True, **options)
else:
estimator=inputs["estimator"]
if inputs["extra_estimator"]["has_estimator"]=='no':
estimator=inputs["extra_estimator"]["new_estimator"]
estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
new_selector = selector(estimator, **options)
elif inputs['selected_algorithm'] in ['RFE', 'RFECV']:
if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'):
options['scoring'] = None
estimator=inputs["estimator"]
if inputs["extra_estimator"]["has_estimator"]=='no':
estimator=inputs["extra_estimator"]["new_estimator"]
estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
new_selector = selector(estimator, **options)
elif inputs['selected_algorithm'] == "VarianceThreshold":
new_selector = selector(**options)
else:
score_func = inputs["score_func"]
score_func = getattr(sklearn.feature_selection, score_func)
new_selector = selector(score_func, **options)
return new_selector
def get_X_y(params, file1, file2):
input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"]
if input_type=="tabular":
header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None
column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"]
else:
c = None
X = read_columns(
file1,
c = c,
c_option = column_option,
sep='\t',
header=header,
parse_dates=True
)
else:
X = mmread(file1)
header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None
column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"]
else:
c = None
y = read_columns(
file2,
c = c,
c_option = column_option,
sep='\t',
header=header,
parse_dates=True
)
y=y.ravel()
return X, y
pythonscikit-learnpandasselected_tasks['selected_task'] == 'load'selected_tasks['selected_task'] == 'train'10.5281/zenodo.15094
@article{scikit-learn,
title={Scikit-learn: Machine Learning in {P}ython},
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
journal={Journal of Machine Learning Research},
volume={12},
pages={2825--2830},
year={2011}
url = {https://github.com/scikit-learn/scikit-learn}
}
@Misc{,
author = {Eric Jones and Travis Oliphant and Pearu Peterson and others},
title = {{SciPy}: Open source scientific tools for {Python}},
year = {2001--},
url = "http://www.scipy.org/",
note = {[Online; accessed 2016-04-09]}
}