Mercurial > repos > bgruening > sklearn_ensemble
diff ensemble.xml @ 14:84724d805bfa draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 7c2fd140e89605fe689c39e21d70a400545e38cf
author | bgruening |
---|---|
date | Tue, 10 Jul 2018 03:11:34 -0400 |
parents | 6352834b1c99 |
children | f02eeabab5d1 |
line wrap: on
line diff
--- a/ensemble.xml Mon Jul 09 14:32:15 2018 -0400 +++ b/ensemble.xml Tue Jul 10 03:11:34 2018 -0400 @@ -25,45 +25,58 @@ @COLUMNS_FUNCTION@ @GET_X_y_FUNCTION@ +# Get inputs, outputs. input_json_path = sys.argv[1] params = json.load(open(input_json_path, "r")) +print params +# Put all cheetah up here to avoid confusion. #if $selected_tasks.selected_task == "train": +infile1 = "$selected_tasks.selected_algorithms.input_options.infile1" +infile2 = "$selected_tasks.selected_algorithms.input_options.infile2" +#else: +infile_model = "$selected_tasks.infile_model" +infile_data = "$selected_tasks.infile_data" +#end if +outfile_fit = "$outfile_fit" +outfile_predict = "$outfile_predict" + +# All Python from here on out: -algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] -options = params["selected_tasks"]["selected_algorithms"]["options"] -if "select_max_features" in options: - if options["select_max_features"]["max_features"] == "number_input": - options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"] - options["select_max_features"].pop("num_max_features") - options["max_features"] = options["select_max_features"]["max_features"] - options.pop("select_max_features") -if "presort" in options: - if options["presort"] == "true": - options["presort"] = True - if options["presort"] == "false": - options["presort"] = False -if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: - options["min_samples_leaf"] = 1 -if "min_samples_split" in options and options["min_samples_split"] > 1.0: - options["min_samples_split"] = int(options["min_samples_split"]) +if params["selected_tasks"]["selected_task"] == "train": + algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] + options = params["selected_tasks"]["selected_algorithms"]["options"] + if "select_max_features" in options: + if options["select_max_features"]["max_features"] == "number_input": + options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"] + options["select_max_features"].pop("num_max_features") + options["max_features"] = options["select_max_features"]["max_features"] + options.pop("select_max_features") + if "presort" in options: + if options["presort"] == "true": + options["presort"] = True + if options["presort"] == "false": + options["presort"] = False + if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: + options["min_samples_leaf"] = 1 + if "min_samples_split" in options and options["min_samples_split"] > 1.0: + options["min_samples_split"] = int(options["min_samples_split"]) -X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2") - -my_class = getattr(sklearn.ensemble, algorithm) -estimator = my_class(**options) -estimator.fit(X,y) -pickle.dump(estimator,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL) + X, y = get_X_y(params, infile1, infile2) + + my_class = getattr(sklearn.ensemble, algorithm) + estimator = my_class(**options) + estimator.fit(X,y) + pickle.dump(estimator,open(outfile_fit, 'w+'), pickle.HIGHEST_PROTOCOL) -#else: -classifier_object = pickle.load(open("$selected_tasks.infile_model", 'r')) -header = 'infer' if params["selected_tasks"]["header"] else None -data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) -prediction = classifier_object.predict(data) -prediction_df = pandas.DataFrame(prediction) -res = pandas.concat([data, prediction_df], axis=1) -res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) -#end if +else: + classifier_object = pickle.load(open(infile_model, 'r')) + header = 'infer' if params["selected_tasks"]["header"] else None + data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) + prediction = classifier_object.predict(data) + prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) + res = pandas.concat([data, prediction_df], axis=1) + res.to_csv(path_or_buf = outfile_predict, sep="\t", index=False) ]]> </configfile>