comparison ensemble.xml @ 15:f02eeabab5d1 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f54ff2ba2f8e7542d68966ce5a6b17d7f624ac48
author bgruening
date Fri, 13 Jul 2018 03:55:19 -0400
parents 84724d805bfa
children 4570575d060c
comparison
equal deleted inserted replaced
14:84724d805bfa 15:f02eeabab5d1
25 @COLUMNS_FUNCTION@ 25 @COLUMNS_FUNCTION@
26 @GET_X_y_FUNCTION@ 26 @GET_X_y_FUNCTION@
27 27
28 # Get inputs, outputs. 28 # Get inputs, outputs.
29 input_json_path = sys.argv[1] 29 input_json_path = sys.argv[1]
30 params = json.load(open(input_json_path, "r")) 30 with open(input_json_path, "r") as param_handler:
31 print params 31 params = json.load(param_handler)
32 print(params)
32 33
33 # Put all cheetah up here to avoid confusion. 34 # Put all cheetah up here to avoid confusion.
34 #if $selected_tasks.selected_task == "train": 35 #if $selected_tasks.selected_task == "train":
35 infile1 = "$selected_tasks.selected_algorithms.input_options.infile1" 36 infile1 = "$selected_tasks.selected_algorithms.input_options.infile1"
36 infile2 = "$selected_tasks.selected_algorithms.input_options.infile2" 37 infile2 = "$selected_tasks.selected_algorithms.input_options.infile2"
61 options["min_samples_leaf"] = 1 62 options["min_samples_leaf"] = 1
62 if "min_samples_split" in options and options["min_samples_split"] > 1.0: 63 if "min_samples_split" in options and options["min_samples_split"] > 1.0:
63 options["min_samples_split"] = int(options["min_samples_split"]) 64 options["min_samples_split"] = int(options["min_samples_split"])
64 65
65 X, y = get_X_y(params, infile1, infile2) 66 X, y = get_X_y(params, infile1, infile2)
66 67
67 my_class = getattr(sklearn.ensemble, algorithm) 68 my_class = getattr(sklearn.ensemble, algorithm)
68 estimator = my_class(**options) 69 estimator = my_class(**options)
69 estimator.fit(X,y) 70 estimator.fit(X,y)
70 pickle.dump(estimator,open(outfile_fit, 'w+'), pickle.HIGHEST_PROTOCOL) 71 with open(outfile_fit, 'wb') as out_handler:
72 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL)
71 73
72 else: 74 else:
73 classifier_object = pickle.load(open(infile_model, 'r')) 75 with open(infile_model, 'rb') as model_handler:
76 classifier_object = pickle.load(model_handler)
74 header = 'infer' if params["selected_tasks"]["header"] else None 77 header = 'infer' if params["selected_tasks"]["header"] else None
75 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) 78 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
76 prediction = classifier_object.predict(data) 79 prediction = classifier_object.predict(data)
77 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) 80 prediction_df = pandas.DataFrame(prediction, columns=["predicted"])
78 res = pandas.concat([data, prediction_df], axis=1) 81 res = pandas.concat([data, prediction_df], axis=1)