Repository 'sklearn_model_validation'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_model_validation

Changeset 3:424d8d21744d (2018-06-05)
Previous changeset 2:dd502cb0d567 (2018-05-30) Next changeset 4:60b8a683ba99 (2018-06-06)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 97c4f22cdcfa6cddeeffc7b102c418a7ff12a888
modified:
main_macros.xml
model_validation.xml
b
diff -r dd502cb0d567 -r 424d8d21744d main_macros.xml
--- a/main_macros.xml Wed May 30 08:27:01 2018 -0400
+++ b/main_macros.xml Tue Jun 05 06:48:01 2018 -0400
[
@@ -2,12 +2,11 @@
   <token name="@VERSION@">0.9</token>
 
   <token name="@COLUMNS_FUNCTION@">
-def read_columns(f, c, return_df=False, **args):
+def read_columns(f, c=None, c_option='by_index_number', return_df=False, **args):
   data = pandas.read_csv(f, **args)
-  cols = c.split (',')
-  cols = map(int, cols)
-  cols = list(map(lambda x: x - 1, cols))
-  data = data.iloc[:,cols]
+  if c_option == 'by_index_number':
+    cols = list(map(lambda x: x - 1, c))
+    data = data.iloc[:,cols]
   y = data.values
   if return_df:
     return y, data
@@ -17,7 +16,6 @@
   </token>
 
 ## generate an instance for one of sklearn.feature_selection classes
-## must call "@COLUMNS_FUNCTION@"
   <token name="@FEATURE_SELECTOR_FUNCTION@">
 def feature_selector(inputs):
   selector = inputs["selected_algorithm"]
@@ -428,16 +426,37 @@
     <yield/>
   </xml>
 
-  <xml name="samples_tabular" token_multiple1="False" token_multiple2="False">
+  <xml name="samples_tabular" token_multiple1="false" token_multiple2="false">
     <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/>
-    <param name="header1" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
-    <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/>
+    <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
+    <conditional name="column_selector_options_1">
+      <expand macro="samples_column_selector_options" multiple="@MULTIPLE1@"/>
+    </conditional>
     <param name="infile2" type="data" format="tabular" label="Dataset containing class labels:"/>
-    <param name="header2" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
-    <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
+    <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
+    <conditional name="column_selector_options_2">
+      <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="@MULTIPLE2@" infile="infile2"/>
+    </conditional>
     <yield/>
   </xml>
 
+  <xml name="samples_column_selector_options" token_column_option="selected_column_selector_option" token_col_name="col1" token_multiple="False" token_infile="infile1">
+    <param name="@COLUMN_OPTION@" type="select" label="Choose how to select data by column:">
+      <option value="by_index_number" selected="true">Select columns by column index number(s)</option>
+      <!--
+      <option value="by_header_name">Select columns by column header name(s)</option>
+      <option value="all_but_by_index_number">All columns but by column index number(s)</option>
+      <option value="all_but_by_header_name">All columns but by column header name(s)</option> 
+      -->
+      <option value="all_columns">All columns</option>
+    </param>
+    <when value="by_index_number">
+      <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
+    </when>
+    <when value="all_columns">
+    </when>
+  </xml>
+
   <xml name="clf_inputs_extended" token_label1=" " token_label2=" " token_multiple="False">
     <conditional name="true_columns">
       <param name="selected_input1" type="select" label="Select the input type of true labels dataset:">
@@ -470,10 +489,14 @@
   <xml name="clf_inputs" token_label1="Dataset containing true labels (tabular):" token_label2="Dataset containing predicted values (tabular):" token_multiple1="False" token_multiple="False">
     <param name="infile1" type="data" format="tabular" label="@LABEL1@"/>
     <param name="header1" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
-    <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select the target column:"/>
+    <conditional name="column_selector_options_1">
+      <expand macro="samples_column_selector_options" multiple="@MULTIPLE1@"/>
+    </conditional>
     <param name="infile2" type="data" format="tabular" label="@LABEL2@"/>
     <param name="header2" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
-    <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
+    <conditional name="column_selector_options_2">
+      <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="@MULTIPLE@" infile="infile2"/>
+    </conditional>
   </xml>
 
   <xml name="multiple_input" token_name="input_files" token_max_num="10" token_format="txt" token_label="Sparse matrix file (.mtx, .txt)" token_help_text="Specify a sparse matrix file in .txt format.">
b
diff -r dd502cb0d567 -r 424d8d21744d model_validation.xml
--- a/model_validation.xml Wed May 30 08:27:01 2018 -0400
+++ b/model_validation.xml Tue Jun 05 06:48:01 2018 -0400
[
@@ -35,9 +35,15 @@
 input_type = params["input_options"]["selected_input"]
 if input_type=="tabular":
     header = 'infer' if params["input_options"]["header1"] else None
+    column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+    if column_option == "by_index_number":
+        c = params["input_options"]["column_selector_options_1"]["col1"]
+    else:
+        c = None
     X = read_columns(
             "$input_options.infile1",
-            "$input_options.col1",
+            c = c,
+            c_option = column_option,
             sep='\t',
             header=header,
             parse_dates=True
@@ -46,9 +52,15 @@
     X = mmread(open("$input_options.infile1", 'r'))
 
 header = 'infer' if params["input_options"]["header2"] else None
+column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
+if column_option == "by_index_number":
+    c = params["input_options"]["column_selector_options_2"]["col2"]
+else:
+    c = None
 y = read_columns(
         "$input_options.infile2",
-        "$input_options.col2",
+        c = c,
+        c_option = column_option,
         sep='\t',
         header=header,
         parse_dates=True
@@ -318,7 +330,7 @@
             <param name="has_estimator" value="yes"/>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
-            <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
+            <param name="selected_column_selector_option" value="all_columns"/>
             <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
             <param name="header2" value="true" />
             <param name="col2" value="1"/>
@@ -336,10 +348,10 @@
             <param name="return_type" value="best_score_"/>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
-            <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
+            <param name="selected_column_selector_option" value="all_columns"/>
             <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
             <param name="header2" value="true" />
-            <param name="col2" value="1"/>
+            <param name="selected_column_selector_option2" value="all_columns"/>
             <output name="outfile" file="mv_result07.tabular"/>
         </test>
     </tests>