Repository 'sklearn_ensemble'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_ensemble

Changeset 14:84724d805bfa (2018-07-10)
Previous changeset 13:6352834b1c99 (2018-07-09) Next changeset 15:f02eeabab5d1 (2018-07-13)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 7c2fd140e89605fe689c39e21d70a400545e38cf
modified:
ensemble.xml
test-data/gbc_result01
test-data/rfr_result01
b
diff -r 6352834b1c99 -r 84724d805bfa ensemble.xml
--- a/ensemble.xml Mon Jul 09 14:32:15 2018 -0400
+++ b/ensemble.xml Tue Jul 10 03:11:34 2018 -0400
[
@@ -25,45 +25,58 @@
 @COLUMNS_FUNCTION@
 @GET_X_y_FUNCTION@
 
+# Get inputs, outputs.
 input_json_path = sys.argv[1]
 params = json.load(open(input_json_path, "r"))
+print params
 
+# Put all cheetah up here to avoid confusion.
 #if $selected_tasks.selected_task == "train":
+infile1 = "$selected_tasks.selected_algorithms.input_options.infile1"
+infile2 = "$selected_tasks.selected_algorithms.input_options.infile2"
+#else:
+infile_model = "$selected_tasks.infile_model"
+infile_data = "$selected_tasks.infile_data"
+#end if
+outfile_fit = "$outfile_fit"
+outfile_predict = "$outfile_predict"
+
+# All Python from here on out:
 
-algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"]
-options = params["selected_tasks"]["selected_algorithms"]["options"]
-if "select_max_features" in options:
-    if options["select_max_features"]["max_features"] == "number_input":
-        options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"]
-        options["select_max_features"].pop("num_max_features")
-    options["max_features"] = options["select_max_features"]["max_features"]
-    options.pop("select_max_features")
-if "presort" in options:
-    if options["presort"] == "true":
-        options["presort"] = True
-    if options["presort"] == "false":
-        options["presort"] = False
-if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0:
-    options["min_samples_leaf"] = 1
-if "min_samples_split" in options and options["min_samples_split"] > 1.0:
-    options["min_samples_split"] = int(options["min_samples_split"])
+if params["selected_tasks"]["selected_task"] == "train":
+    algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"]
+    options = params["selected_tasks"]["selected_algorithms"]["options"]
+    if "select_max_features" in options:
+        if options["select_max_features"]["max_features"] == "number_input":
+            options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"]
+            options["select_max_features"].pop("num_max_features")
+        options["max_features"] = options["select_max_features"]["max_features"]
+        options.pop("select_max_features")
+    if "presort" in options:
+        if options["presort"] == "true":
+            options["presort"] = True
+        if options["presort"] == "false":
+            options["presort"] = False
+    if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0:
+        options["min_samples_leaf"] = 1
+    if "min_samples_split" in options and options["min_samples_split"] > 1.0:
+        options["min_samples_split"] = int(options["min_samples_split"])
 
-X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2")
-
-my_class = getattr(sklearn.ensemble, algorithm)
-estimator = my_class(**options)
-estimator.fit(X,y)
-pickle.dump(estimator,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL)
+    X, y = get_X_y(params, infile1, infile2)
+                   
+    my_class = getattr(sklearn.ensemble, algorithm)
+    estimator = my_class(**options)
+    estimator.fit(X,y)
+    pickle.dump(estimator,open(outfile_fit, 'w+'), pickle.HIGHEST_PROTOCOL)
 
-#else:
-classifier_object = pickle.load(open("$selected_tasks.infile_model", 'r'))
-header = 'infer' if params["selected_tasks"]["header"] else None
-data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
-prediction = classifier_object.predict(data)
-prediction_df = pandas.DataFrame(prediction)
-res = pandas.concat([data, prediction_df], axis=1)
-res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False)
-#end if
+else:
+    classifier_object = pickle.load(open(infile_model, 'r'))
+    header = 'infer' if params["selected_tasks"]["header"] else None
+    data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
+    prediction = classifier_object.predict(data)
+    prediction_df = pandas.DataFrame(prediction, columns=["predicted"])
+    res = pandas.concat([data, prediction_df], axis=1)
+    res.to_csv(path_or_buf = outfile_predict, sep="\t", index=False)
 
 ]]>
         </configfile>
b
diff -r 6352834b1c99 -r 84724d805bfa test-data/gbc_result01
--- a/test-data/gbc_result01 Mon Jul 09 14:32:15 2018 -0400
+++ b/test-data/gbc_result01 Tue Jul 10 03:11:34 2018 -0400
b
@@ -1,4 +1,4 @@
-0 1 2 3 0
+0 1 2 3 predicted
 3.68258022948 2.82110345641 -3.990140724 -1.9523364774 1
 0.015942057224 -0.711958594347 0.125502976978 -0.972218263337 0
 2.08690768825 0.929399321468 -2.12924084484 -1.99714022188 1
b
diff -r 6352834b1c99 -r 84724d805bfa test-data/rfr_result01
--- a/test-data/rfr_result01 Mon Jul 09 14:32:15 2018 -0400
+++ b/test-data/rfr_result01 Tue Jul 10 03:11:34 2018 -0400
b
@@ -1,4 +1,4 @@
-86.9702122735 1.00532111569 -1.01739601979 -0.613139481654 0.641846874331 0
+86.9702122735 1.00532111569 -1.01739601979 -0.613139481654 0.641846874331 predicted
 91.2021798817 -0.6215229712070001 1.11914889596 0.390012184498 1.28956938152 0.8511213285107001
 -47.4101632272 -0.638416457964 -0.7327774684530001 -0.8640261049779999 -1.06109770116 0.05344095304070007
 61.712804630200004 -1.0999480057700002 -0.739679672932 0.585657963012 1.4890682753600002 1.1892759745694002