comparison ensemble.xml @ 13:6352834b1c99 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5d71c93a3dd804b1469852240a86021ab9130364
author bgruening
date Mon, 09 Jul 2018 14:32:15 -0400
parents a78f96f64939
children 84724d805bfa
comparison
equal deleted inserted replaced
12:a78f96f64939 13:6352834b1c99
21 import pandas 21 import pandas
22 import pickle 22 import pickle
23 from scipy.io import mmread 23 from scipy.io import mmread
24 24
25 @COLUMNS_FUNCTION@ 25 @COLUMNS_FUNCTION@
26 @GET_X_y_FUNCTION@
26 27
27 input_json_path = sys.argv[1] 28 input_json_path = sys.argv[1]
28 params = json.load(open(input_json_path, "r")) 29 params = json.load(open(input_json_path, "r"))
29 30
30 #if $selected_tasks.selected_task == "train": 31 #if $selected_tasks.selected_task == "train":
44 options["presort"] = False 45 options["presort"] = False
45 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: 46 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0:
46 options["min_samples_leaf"] = 1 47 options["min_samples_leaf"] = 1
47 if "min_samples_split" in options and options["min_samples_split"] > 1.0: 48 if "min_samples_split" in options and options["min_samples_split"] > 1.0:
48 options["min_samples_split"] = int(options["min_samples_split"]) 49 options["min_samples_split"] = int(options["min_samples_split"])
49 input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"] 50
50 if input_type=="tabular": 51 X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2")
51 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None
52 column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
53 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
54 c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"]
55 else:
56 c = None
57 X = read_columns(
58 "$selected_tasks.selected_algorithms.input_options.infile1",
59 c = c,
60 c_option = column_option,
61 sep='\t',
62 header=header,
63 parse_dates=True
64 )
65 else:
66 X = mmread(open("$selected_tasks.selected_algorithms.input_options.infile1", 'r'))
67
68 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None
69 column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
70 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
71 c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"]
72 else:
73 c = None
74 y = read_columns(
75 "$selected_tasks.selected_algorithms.input_options.infile2",
76 c = c,
77 c_option = column_option,
78 sep='\t',
79 header=header,
80 parse_dates=True
81 )
82 y=y.ravel()
83 52
84 my_class = getattr(sklearn.ensemble, algorithm) 53 my_class = getattr(sklearn.ensemble, algorithm)
85 estimator = my_class(**options) 54 estimator = my_class(**options)
86 estimator.fit(X,y) 55 estimator.fit(X,y)
87 pickle.dump(estimator,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL) 56 pickle.dump(estimator,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL)