Mercurial > repos > bgruening > sklearn_ensemble
comparison ensemble.xml @ 14:84724d805bfa draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 7c2fd140e89605fe689c39e21d70a400545e38cf
author | bgruening |
---|---|
date | Tue, 10 Jul 2018 03:11:34 -0400 |
parents | 6352834b1c99 |
children | f02eeabab5d1 |
comparison
equal
deleted
inserted
replaced
13:6352834b1c99 | 14:84724d805bfa |
---|---|
23 from scipy.io import mmread | 23 from scipy.io import mmread |
24 | 24 |
25 @COLUMNS_FUNCTION@ | 25 @COLUMNS_FUNCTION@ |
26 @GET_X_y_FUNCTION@ | 26 @GET_X_y_FUNCTION@ |
27 | 27 |
28 # Get inputs, outputs. | |
28 input_json_path = sys.argv[1] | 29 input_json_path = sys.argv[1] |
29 params = json.load(open(input_json_path, "r")) | 30 params = json.load(open(input_json_path, "r")) |
30 | 31 print params |
32 | |
33 # Put all cheetah up here to avoid confusion. | |
31 #if $selected_tasks.selected_task == "train": | 34 #if $selected_tasks.selected_task == "train": |
32 | 35 infile1 = "$selected_tasks.selected_algorithms.input_options.infile1" |
33 algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] | 36 infile2 = "$selected_tasks.selected_algorithms.input_options.infile2" |
34 options = params["selected_tasks"]["selected_algorithms"]["options"] | |
35 if "select_max_features" in options: | |
36 if options["select_max_features"]["max_features"] == "number_input": | |
37 options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"] | |
38 options["select_max_features"].pop("num_max_features") | |
39 options["max_features"] = options["select_max_features"]["max_features"] | |
40 options.pop("select_max_features") | |
41 if "presort" in options: | |
42 if options["presort"] == "true": | |
43 options["presort"] = True | |
44 if options["presort"] == "false": | |
45 options["presort"] = False | |
46 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: | |
47 options["min_samples_leaf"] = 1 | |
48 if "min_samples_split" in options and options["min_samples_split"] > 1.0: | |
49 options["min_samples_split"] = int(options["min_samples_split"]) | |
50 | |
51 X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2") | |
52 | |
53 my_class = getattr(sklearn.ensemble, algorithm) | |
54 estimator = my_class(**options) | |
55 estimator.fit(X,y) | |
56 pickle.dump(estimator,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL) | |
57 | |
58 #else: | 37 #else: |
59 classifier_object = pickle.load(open("$selected_tasks.infile_model", 'r')) | 38 infile_model = "$selected_tasks.infile_model" |
60 header = 'infer' if params["selected_tasks"]["header"] else None | 39 infile_data = "$selected_tasks.infile_data" |
61 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) | |
62 prediction = classifier_object.predict(data) | |
63 prediction_df = pandas.DataFrame(prediction) | |
64 res = pandas.concat([data, prediction_df], axis=1) | |
65 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) | |
66 #end if | 40 #end if |
41 outfile_fit = "$outfile_fit" | |
42 outfile_predict = "$outfile_predict" | |
43 | |
44 # All Python from here on out: | |
45 | |
46 if params["selected_tasks"]["selected_task"] == "train": | |
47 algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] | |
48 options = params["selected_tasks"]["selected_algorithms"]["options"] | |
49 if "select_max_features" in options: | |
50 if options["select_max_features"]["max_features"] == "number_input": | |
51 options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"] | |
52 options["select_max_features"].pop("num_max_features") | |
53 options["max_features"] = options["select_max_features"]["max_features"] | |
54 options.pop("select_max_features") | |
55 if "presort" in options: | |
56 if options["presort"] == "true": | |
57 options["presort"] = True | |
58 if options["presort"] == "false": | |
59 options["presort"] = False | |
60 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: | |
61 options["min_samples_leaf"] = 1 | |
62 if "min_samples_split" in options and options["min_samples_split"] > 1.0: | |
63 options["min_samples_split"] = int(options["min_samples_split"]) | |
64 | |
65 X, y = get_X_y(params, infile1, infile2) | |
66 | |
67 my_class = getattr(sklearn.ensemble, algorithm) | |
68 estimator = my_class(**options) | |
69 estimator.fit(X,y) | |
70 pickle.dump(estimator,open(outfile_fit, 'w+'), pickle.HIGHEST_PROTOCOL) | |
71 | |
72 else: | |
73 classifier_object = pickle.load(open(infile_model, 'r')) | |
74 header = 'infer' if params["selected_tasks"]["header"] else None | |
75 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) | |
76 prediction = classifier_object.predict(data) | |
77 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) | |
78 res = pandas.concat([data, prediction_df], axis=1) | |
79 res.to_csv(path_or_buf = outfile_predict, sep="\t", index=False) | |
67 | 80 |
68 ]]> | 81 ]]> |
69 </configfile> | 82 </configfile> |
70 </configfiles> | 83 </configfiles> |
71 <inputs> | 84 <inputs> |