# HG changeset patch
# User bgruening
# Date 1531161121 14400
# Node ID 40f3318b61c285a04fb2150d1084f9805808c451
# Parent 3bd31820d63ea50d1c04489e3c0a93a1700c2a42
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5d71c93a3dd804b1469852240a86021ab9130364
diff -r 3bd31820d63e -r 40f3318b61c2 main_macros.xml
--- a/main_macros.xml Sun Jul 01 03:19:41 2018 -0400
+++ b/main_macros.xml Mon Jul 09 14:32:01 2018 -0400
@@ -64,6 +64,45 @@
return new_selector
+
+def get_X_y(params, file1, file2):
+ input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"]
+ if input_type=="tabular":
+ header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None
+ column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+ if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
+ c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"]
+ else:
+ c = None
+ X = read_columns(
+ file1,
+ c = c,
+ c_option = column_option,
+ sep='\t',
+ header=header,
+ parse_dates=True
+ )
+ else:
+ X = mmread(open(file1, 'r'))
+
+ header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None
+ column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
+ if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
+ c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"]
+ else:
+ c = None
+ y = read_columns(
+ file2,
+ c = c,
+ c_option = column_option,
+ sep='\t',
+ header=header,
+ parse_dates=True
+ )
+ y=y.ravel()
+ return X, y
+
+
python
@@ -81,34 +120,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r 3bd31820d63e -r 40f3318b61c2 numeric_clustering.xml
--- a/numeric_clustering.xml Sun Jul 01 03:19:41 2018 -0400
+++ b/numeric_clustering.xml Mon Jul 09 14:32:01 2018 -0400
@@ -22,6 +22,8 @@
from sklearn import metrics
from scipy.io import mmread
+@COLUMNS_FUNCTION@
+
input_json_path = sys.argv[1]
params = json.load(open(input_json_path, "r"))
@@ -37,17 +39,22 @@
data_matrix = mmread(open("$infile", 'r'))
#else:
data = pandas.read_csv("$infile", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False )
-
-start_column = $input_types.start_column
-end_column = $input_types.end_column
-
-if end_column and start_column:
- if end_column >= start_column:
- data_matrix = data.values[:, start_column-1:end_column]
- else:
- data_matrix = data.values
+header = 'infer' if params["input_types"]["header"] else None
+column_option = params["input_types"]["column_selector_options"]["selected_column_selector_option"]
+if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
+ c = params["input_types"]["column_selector_options"]["col"]
else:
- data_matrix = data.values
+ c = None
+data_matrix = read_columns(
+ "$infile",
+ c = c,
+ c_option = column_option,
+ sep='\t',
+ header=header,
+ parse_dates=True,
+ encoding=None,
+ tupleize_cols=False
+)
#end if
prediction = cluster_object.fit_predict( data_matrix )
@@ -82,8 +89,10 @@
-
-
+
+
+
+
@@ -168,8 +177,7 @@
-
-
+
@@ -179,8 +187,7 @@
-
-
+
@@ -190,8 +197,7 @@
-
-
+
@@ -201,8 +207,7 @@
-
-
+
@@ -211,8 +216,7 @@
-
-
+
@@ -220,8 +224,7 @@
-
-
+
@@ -230,8 +233,7 @@
-
-
+
@@ -239,8 +241,7 @@
-
-
+
@@ -248,8 +249,7 @@
-
-
+
@@ -257,8 +257,7 @@
-
-
+
@@ -268,8 +267,7 @@
-
-
+
@@ -278,8 +276,7 @@
-
-
+
@@ -291,8 +288,7 @@
-
-
+
@@ -302,8 +298,7 @@
-
-
+
@@ -313,8 +308,7 @@
-
-
+
@@ -325,8 +319,7 @@
-
-
+