Repository 'sklearn_feature_selection'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_feature_selection

Changeset 6:b0d554b38770 (2018-07-01)
Previous changeset 5:2d681d0f9393 (2018-06-06) Next changeset 7:9de6fb8c4a56 (2018-07-09)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit cd4a8b019168acd5a513c57a1b1f380622f230f6
modified:
feature_selection.xml
main_macros.xml
added:
test-data/feature_selection_result11
test-data/test3.tabular
b
diff -r 2d681d0f9393 -r b0d554b38770 feature_selection.xml
--- a/feature_selection.xml Wed Jun 06 17:43:13 2018 -0400
+++ b/feature_selection.xml Sun Jul 01 03:20:10 2018 -0400
[
@@ -36,7 +36,7 @@
 if input_type=="tabular":
     header = 'infer' if features_has_header else None
     column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
-    if column_option in ["by_index_number", "all_but_by_index_number"]:
+    if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
         c = params["input_options"]["column_selector_options_1"]["col1"]
     else:
         c = None
@@ -55,7 +55,7 @@
 ## Read labels
 header = 'infer' if params["input_options"]["header2"] else None
 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
-if column_option in ["by_index_number", "all_but_by_index_number"]:
+if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
     c = params["input_options"]["column_selector_options_2"]["col2"]
 else:
     c = None
@@ -212,6 +212,19 @@
             <param name="header2" value="True"/>
             <output name="outfile" file="feature_selection_result10"/>
         </test>
+        <test>
+            <param name="selected_algorithm" value="SelectKBest"/>
+            <param name="k" value="3"/>
+            <param name="infile1" value="test3.tabular" ftype="tabular"/>
+            <param name="header1" value="True"/>
+            <param name="selected_column_selector_option" value="all_but_by_header_name"/>
+            <param name="col1" value="target"/>
+            <param name="infile2" value="test3.tabular" ftype="tabular"/>
+            <param name="header2" value="True"/>
+            <param name="selected_column_selector_option2" value="by_header_name"/>
+            <param name="col2" value="target"/>
+            <output name="outfile" file="feature_selection_result11"/>
+        </test>
     </tests>
     <help>
         <![CDATA[
b
diff -r 2d681d0f9393 -r b0d554b38770 main_macros.xml
--- a/main_macros.xml Wed Jun 06 17:43:13 2018 -0400
+++ b/main_macros.xml Sun Jul 01 03:20:10 2018 -0400
[
@@ -10,6 +10,12 @@
   if c_option == 'all_but_by_index_number':
     cols = list(map(lambda x: x - 1, c))
     data.drop(data.columns[cols], axis=1, inplace=True)
+  if c_option == 'by_header_name':
+    cols = [e.strip() for e in c.split(',')]
+    data = data[cols]
+  if c_option == 'all_but_by_header_name':
+    cols = [e.strip() for e in c.split(',')]
+    data.drop(cols, axis=1, inplace=True)
   y = data.values
   if return_df:
     return y, data
@@ -446,17 +452,23 @@
   <xml name="samples_column_selector_options" token_column_option="selected_column_selector_option" token_col_name="col1" token_multiple="False" token_infile="infile1">
     <param name="@COLUMN_OPTION@" type="select" label="Choose how to select data by column:">
       <option value="by_index_number" selected="true">Select columns by column index number(s)</option>
-      <!--<option value="by_header_name">Select columns by column header name(s)</option>-->
+      <option value="by_header_name">Select columns by column header name(s)</option>
       <option value="all_but_by_index_number">All columns but by column index number(s)</option>
-      <!--<option value="all_but_by_header_name">All columns but by column header name(s)</option> -->
+      <option value="all_but_by_header_name">All columns but by column header name(s)</option>
       <option value="all_columns">All columns</option>
     </param>
     <when value="by_index_number">
       <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
     </when>
+    <when value="by_header_name">
+      <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="String seperate by colon. For example: target1,target2"/>
+    </when>
     <when value="all_but_by_index_number">
       <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
     </when>
+    <when value="all_but_by_header_name">
+      <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="String seperate by colon. For example: target1,target2"/>
+    </when>
     <when value="all_columns">
     </when>
   </xml>
b
diff -r 2d681d0f9393 -r b0d554b38770 test-data/feature_selection_result11
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/feature_selection_result11 Sun Jul 01 03:20:10 2018 -0400
b
@@ -0,0 +1,51 @@
+Race AIDS Total
+4.0 2555.0 14443382.0
+4.0 55300.0 14704293.0
+4.0 82334.0 16641977.0
+4.0 38006.0 13888285.0
+4.0 16068.0 21845911.0
+2.0 2489.0 2367256.0
+2.0 34204.0 2410019.0
+2.0 51776.0 2727604.0
+2.0 23896.0 2276276.0
+2.0 10169.0 3580523.0
+3.0 1363.0 1542563.0
+3.0 20712.0 1570428.0
+3.0 27200.0 1777374.0
+3.0 11251.0 1483278.0
+3.0 4674.0 2333158.0
+1.0 38.0 699627.0
+1.0 731.0 712265.0
+1.0 1162.0 806125.0
+1.0 560.0 672738.0
+1.0 258.0 1058200.0
+0.0 26.0 169115.0
+0.0 390.0 172170.0
+0.0 417.0 194858.0
+0.0 140.0 162616.0
+0.0 48.0 255790.0
+4.0 490.0 14999423.0
+4.0 4788.0 15270378.0
+4.0 5377.0 17282659.0
+4.0 2152.0 14422956.0
+4.0 1790.0 22686934.0
+2.0 1490.0 2458391.0
+2.0 12280.0 2502800.0
+2.0 15713.0 2832611.0
+2.0 5788.0 2363908.0
+2.0 2534.0 3718366.0
+3.0 493.0 1601948.0
+3.0 4660.0 1630887.0
+3.0 5153.0 1845800.0
+3.0 1944.0 1540381.0
+3.0 910.0 2422980.0
+1.0 6.0 726561.0
+1.0 83.0 739686.0
+1.0 106.0 837159.0
+1.0 69.0 698637.0
+1.0 55.0 1098938.0
+0.0 3.0 175626.0
+0.0 78.0 178798.0
+0.0 77.0 202360.0
+0.0 31.0 168876.0
+0.0 14.0 265637.0
b
diff -r 2d681d0f9393 -r b0d554b38770 test-data/test3.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test3.tabular Sun Jul 01 03:20:10 2018 -0400
b
@@ -0,0 +1,51 @@
+Age Race AIDS Total target
+0 4 2555.0 14443382.0 1
+1 4 55300.0 14704293.0 1
+2 4 82334.0 16641977.0 1
+3 4 38006.0 13888285.0 1
+4 4 16068.0 21845911.0 1
+0 2 2489.0 2367256.0 1
+1 2 34204.0 2410019.0 1
+2 2 51776.0 2727604.0 1
+3 2 23896.0 2276276.0 1
+4 2 10169.0 3580523.0 1
+0 3 1363.0 1542563.0 1
+1 3 20712.0 1570428.0 1
+2 3 27200.0 1777374.0 1
+3 3 11251.0 1483278.0 1
+4 3 4674.0 2333158.0 1
+0 1 38.0 699627.0 1
+1 1 731.0 712265.0 1
+2 1 1162.0 806125.0 1
+3 1 560.0 672738.0 1
+4 1 258.0 1058200.0 1
+0 0 26.0 169115.0 1
+1 0 390.0 172170.0 1
+2 0 417.0 194858.0 1
+3 0 140.0 162616.0 1
+4 0 48.0 255790.0 1
+0 4 490.0 14999423.0 0
+1 4 4788.0 15270378.0 0
+2 4 5377.0 17282659.0 0
+3 4 2152.0 14422956.0 0
+4 4 1790.0 22686934.0 0
+0 2 1490.0 2458391.0 0
+1 2 12280.0 2502800.0 0
+2 2 15713.0 2832611.0 0
+3 2 5788.0 2363908.0 0
+4 2 2534.0 3718366.0 0
+0 3 493.0 1601948.0 0
+1 3 4660.0 1630887.0 0
+2 3 5153.0 1845800.0 0
+3 3 1944.0 1540381.0 0
+4 3 910.0 2422980.0 0
+0 1 6.0 726561.0 0
+1 1 83.0 739686.0 0
+2 1 106.0 837159.0 0
+3 1 69.0 698637.0 0
+4 1 55.0 1098938.0 0
+0 0 3.0 175626.0 0
+1 0 78.0 178798.0 0
+2 0 77.0 202360.0 0
+3 0 31.0 168876.0 0
+4 0 14.0 265637.0 0