# HG changeset patch
# User bgruening
# Date 1531161108 14400
# Node ID 478034e9826b14dc649d9dabc96f0760421aeb74
# Parent 20cd3b38d194c1ccc2928ad071cf74e5064c808e
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5d71c93a3dd804b1469852240a86021ab9130364
diff -r 20cd3b38d194 -r 478034e9826b main_macros.xml
--- a/main_macros.xml Sun Jul 01 03:19:30 2018 -0400
+++ b/main_macros.xml Mon Jul 09 14:31:48 2018 -0400
@@ -64,6 +64,45 @@
return new_selector
+
+def get_X_y(params, file1, file2):
+ input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"]
+ if input_type=="tabular":
+ header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None
+ column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+ if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
+ c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"]
+ else:
+ c = None
+ X = read_columns(
+ file1,
+ c = c,
+ c_option = column_option,
+ sep='\t',
+ header=header,
+ parse_dates=True
+ )
+ else:
+ X = mmread(open(file1, 'r'))
+
+ header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None
+ column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
+ if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
+ c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"]
+ else:
+ c = None
+ y = read_columns(
+ file2,
+ c = c,
+ c_option = column_option,
+ sep='\t',
+ header=header,
+ parse_dates=True
+ )
+ y=y.ravel()
+ return X, y
+
+
python
@@ -81,34 +120,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r 20cd3b38d194 -r 478034e9826b nn_classifier.xml
--- a/nn_classifier.xml Sun Jul 01 03:19:30 2018 -0400
+++ b/nn_classifier.xml Mon Jul 09 14:31:48 2018 -0400
@@ -21,6 +21,9 @@
import pandas
import pickle
+@COLUMNS_FUNCTION@
+@GET_X_y_FUNCTION@
+
input_json_path = sys.argv[1]
params = json.load(open(input_json_path, "r"))
@@ -29,7 +32,8 @@
classifier_object = pickle.load(open("$infile_model", 'r'))
-data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False )
+header = 'infer' if params["selected_tasks"]["header"] else None
+data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
prediction = classifier_object.predict(data)
prediction_df = pandas.DataFrame(prediction)
res = pandas.concat([data, prediction_df], axis=1)
@@ -37,10 +41,7 @@
#else:
-data_train = pandas.read_csv("$selected_tasks.infile_train", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False )
-
-data = data_train.ix[:,0:len(data_train.columns)-1]
-labels = np.array(data_train[data_train.columns[len(data_train.columns)-1]])
+X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2")
selected_algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"]
@@ -55,7 +56,7 @@
my_class = getattr(sklearn.neighbors, classifier)
classifier_object = my_class(**options)
-classifier_object.fit(data,labels)
+classifier_object.fit(X, y)
pickle.dump(classifier_object,open("$outfile_fit", 'w+'))
@@ -65,12 +66,13 @@
-
+
+
@@ -90,6 +92,7 @@
+
@@ -104,7 +107,12 @@
-
+
+
+
+
+
+
@@ -112,7 +120,12 @@
-
+
+
+
+
+
+
@@ -120,7 +133,12 @@
-
+
+
+
+
+
+
@@ -128,18 +146,21 @@
+
+
+