comparison ensemble.xml @ 14:84724d805bfa draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 7c2fd140e89605fe689c39e21d70a400545e38cf
author bgruening
date Tue, 10 Jul 2018 03:11:34 -0400
parents 6352834b1c99
children f02eeabab5d1
comparison
equal deleted inserted replaced
13:6352834b1c99 14:84724d805bfa
23 from scipy.io import mmread 23 from scipy.io import mmread
24 24
25 @COLUMNS_FUNCTION@ 25 @COLUMNS_FUNCTION@
26 @GET_X_y_FUNCTION@ 26 @GET_X_y_FUNCTION@
27 27
28 # Get inputs, outputs.
28 input_json_path = sys.argv[1] 29 input_json_path = sys.argv[1]
29 params = json.load(open(input_json_path, "r")) 30 params = json.load(open(input_json_path, "r"))
30 31 print params
32
33 # Put all cheetah up here to avoid confusion.
31 #if $selected_tasks.selected_task == "train": 34 #if $selected_tasks.selected_task == "train":
32 35 infile1 = "$selected_tasks.selected_algorithms.input_options.infile1"
33 algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] 36 infile2 = "$selected_tasks.selected_algorithms.input_options.infile2"
34 options = params["selected_tasks"]["selected_algorithms"]["options"]
35 if "select_max_features" in options:
36 if options["select_max_features"]["max_features"] == "number_input":
37 options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"]
38 options["select_max_features"].pop("num_max_features")
39 options["max_features"] = options["select_max_features"]["max_features"]
40 options.pop("select_max_features")
41 if "presort" in options:
42 if options["presort"] == "true":
43 options["presort"] = True
44 if options["presort"] == "false":
45 options["presort"] = False
46 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0:
47 options["min_samples_leaf"] = 1
48 if "min_samples_split" in options and options["min_samples_split"] > 1.0:
49 options["min_samples_split"] = int(options["min_samples_split"])
50
51 X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2")
52
53 my_class = getattr(sklearn.ensemble, algorithm)
54 estimator = my_class(**options)
55 estimator.fit(X,y)
56 pickle.dump(estimator,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL)
57
58 #else: 37 #else:
59 classifier_object = pickle.load(open("$selected_tasks.infile_model", 'r')) 38 infile_model = "$selected_tasks.infile_model"
60 header = 'infer' if params["selected_tasks"]["header"] else None 39 infile_data = "$selected_tasks.infile_data"
61 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
62 prediction = classifier_object.predict(data)
63 prediction_df = pandas.DataFrame(prediction)
64 res = pandas.concat([data, prediction_df], axis=1)
65 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False)
66 #end if 40 #end if
41 outfile_fit = "$outfile_fit"
42 outfile_predict = "$outfile_predict"
43
44 # All Python from here on out:
45
46 if params["selected_tasks"]["selected_task"] == "train":
47 algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"]
48 options = params["selected_tasks"]["selected_algorithms"]["options"]
49 if "select_max_features" in options:
50 if options["select_max_features"]["max_features"] == "number_input":
51 options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"]
52 options["select_max_features"].pop("num_max_features")
53 options["max_features"] = options["select_max_features"]["max_features"]
54 options.pop("select_max_features")
55 if "presort" in options:
56 if options["presort"] == "true":
57 options["presort"] = True
58 if options["presort"] == "false":
59 options["presort"] = False
60 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0:
61 options["min_samples_leaf"] = 1
62 if "min_samples_split" in options and options["min_samples_split"] > 1.0:
63 options["min_samples_split"] = int(options["min_samples_split"])
64
65 X, y = get_X_y(params, infile1, infile2)
66
67 my_class = getattr(sklearn.ensemble, algorithm)
68 estimator = my_class(**options)
69 estimator.fit(X,y)
70 pickle.dump(estimator,open(outfile_fit, 'w+'), pickle.HIGHEST_PROTOCOL)
71
72 else:
73 classifier_object = pickle.load(open(infile_model, 'r'))
74 header = 'infer' if params["selected_tasks"]["header"] else None
75 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
76 prediction = classifier_object.predict(data)
77 prediction_df = pandas.DataFrame(prediction, columns=["predicted"])
78 res = pandas.concat([data, prediction_df], axis=1)
79 res.to_csv(path_or_buf = outfile_predict, sep="\t", index=False)
67 80
68 ]]> 81 ]]>
69 </configfile> 82 </configfile>
70 </configfiles> 83 </configfiles>
71 <inputs> 84 <inputs>