Mercurial > repos > bgruening > sklearn_ensemble
comparison ensemble.xml @ 21:9ce3e347506c draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
author | bgruening |
---|---|
date | Sat, 29 Sep 2018 07:30:08 -0400 |
parents | 038cecaa9e7c |
children | 2e69c6ca6e91 |
comparison
equal
deleted
inserted
replaced
20:038cecaa9e7c | 21:9ce3e347506c |
---|---|
20 import numpy as np | 20 import numpy as np |
21 import sklearn.ensemble | 21 import sklearn.ensemble |
22 import pandas | 22 import pandas |
23 from scipy.io import mmread | 23 from scipy.io import mmread |
24 | 24 |
25 execfile("$__tool_directory__/sk_whitelist.py") | 25 with open("$__tool_directory__/sk_whitelist.json", "r") as f: |
26 execfile("$__tool_directory__/utils.py", globals()) | 26 sk_whitelist = json.load(f) |
27 exec(open("$__tool_directory__/utils.py").read(), globals()) | |
27 | 28 |
28 # Get inputs, outputs. | 29 # Get inputs, outputs. |
29 input_json_path = sys.argv[1] | 30 input_json_path = sys.argv[1] |
30 with open(input_json_path, "r") as param_handler: | 31 with open(input_json_path, "r") as param_handler: |
31 params = json.load(param_handler) | 32 params = json.load(param_handler) |
73 with open(outfile_fit, 'wb') as out_handler: | 74 with open(outfile_fit, 'wb') as out_handler: |
74 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) | 75 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) |
75 | 76 |
76 else: | 77 else: |
77 with open(infile_model, 'rb') as model_handler: | 78 with open(infile_model, 'rb') as model_handler: |
78 classifier_object = SafePickler.load(model_handler) | 79 classifier_object = load_model(model_handler) |
79 header = 'infer' if params["selected_tasks"]["header"] else None | 80 header = 'infer' if params["selected_tasks"]["header"] else None |
80 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) | 81 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) |
81 prediction = classifier_object.predict(data) | 82 prediction = classifier_object.predict(data) |
82 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) | 83 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) |
83 res = pandas.concat([data, prediction_df], axis=1) | 84 res = pandas.concat([data, prediction_df], axis=1) |
263 <param name="header2" value="True"/> | 264 <param name="header2" value="True"/> |
264 <param name="col2" value="1"/> | 265 <param name="col2" value="1"/> |
265 <param name="selected_task" value="train"/> | 266 <param name="selected_task" value="train"/> |
266 <param name="selected_algorithm" value="GradientBoostingRegressor"/> | 267 <param name="selected_algorithm" value="GradientBoostingRegressor"/> |
267 <param name="max_features" value="number_input"/> | 268 <param name="max_features" value="number_input"/> |
268 <param name="num_max_features" value=""/> | 269 <param name="num_max_features" value="0.5"/> |
269 <param name="random_state" value="42"/> | 270 <param name="random_state" value="42"/> |
270 <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="500"/> | 271 <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="500"/> |
271 </test> | 272 </test> |
272 <test> | 273 <test> |
273 <param name="infile_model" value="gbr_model01" ftype="zip"/> | 274 <param name="infile_model" value="gbr_model01" ftype="zip"/> |