Mercurial > repos > bgruening > sklearn_ensemble
comparison ensemble.xml @ 15:f02eeabab5d1 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f54ff2ba2f8e7542d68966ce5a6b17d7f624ac48
author | bgruening |
---|---|
date | Fri, 13 Jul 2018 03:55:19 -0400 |
parents | 84724d805bfa |
children | 4570575d060c |
comparison
equal
deleted
inserted
replaced
14:84724d805bfa | 15:f02eeabab5d1 |
---|---|
25 @COLUMNS_FUNCTION@ | 25 @COLUMNS_FUNCTION@ |
26 @GET_X_y_FUNCTION@ | 26 @GET_X_y_FUNCTION@ |
27 | 27 |
28 # Get inputs, outputs. | 28 # Get inputs, outputs. |
29 input_json_path = sys.argv[1] | 29 input_json_path = sys.argv[1] |
30 params = json.load(open(input_json_path, "r")) | 30 with open(input_json_path, "r") as param_handler: |
31 print params | 31 params = json.load(param_handler) |
32 print(params) | |
32 | 33 |
33 # Put all cheetah up here to avoid confusion. | 34 # Put all cheetah up here to avoid confusion. |
34 #if $selected_tasks.selected_task == "train": | 35 #if $selected_tasks.selected_task == "train": |
35 infile1 = "$selected_tasks.selected_algorithms.input_options.infile1" | 36 infile1 = "$selected_tasks.selected_algorithms.input_options.infile1" |
36 infile2 = "$selected_tasks.selected_algorithms.input_options.infile2" | 37 infile2 = "$selected_tasks.selected_algorithms.input_options.infile2" |
61 options["min_samples_leaf"] = 1 | 62 options["min_samples_leaf"] = 1 |
62 if "min_samples_split" in options and options["min_samples_split"] > 1.0: | 63 if "min_samples_split" in options and options["min_samples_split"] > 1.0: |
63 options["min_samples_split"] = int(options["min_samples_split"]) | 64 options["min_samples_split"] = int(options["min_samples_split"]) |
64 | 65 |
65 X, y = get_X_y(params, infile1, infile2) | 66 X, y = get_X_y(params, infile1, infile2) |
66 | 67 |
67 my_class = getattr(sklearn.ensemble, algorithm) | 68 my_class = getattr(sklearn.ensemble, algorithm) |
68 estimator = my_class(**options) | 69 estimator = my_class(**options) |
69 estimator.fit(X,y) | 70 estimator.fit(X,y) |
70 pickle.dump(estimator,open(outfile_fit, 'w+'), pickle.HIGHEST_PROTOCOL) | 71 with open(outfile_fit, 'wb') as out_handler: |
72 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) | |
71 | 73 |
72 else: | 74 else: |
73 classifier_object = pickle.load(open(infile_model, 'r')) | 75 with open(infile_model, 'rb') as model_handler: |
76 classifier_object = pickle.load(model_handler) | |
74 header = 'infer' if params["selected_tasks"]["header"] else None | 77 header = 'infer' if params["selected_tasks"]["header"] else None |
75 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) | 78 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) |
76 prediction = classifier_object.predict(data) | 79 prediction = classifier_object.predict(data) |
77 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) | 80 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) |
78 res = pandas.concat([data, prediction_df], axis=1) | 81 res = pandas.concat([data, prediction_df], axis=1) |