comparison feature_selection/featureSelection.xml @ 0:76a728a52df6 draft default tip

planemo upload for repository https://github.com/jaidevjoshi83/MicroBiomML commit 5ef78d4decc95ac107c468499328e7f086289ff9-dirty
author jay
date Tue, 17 Feb 2026 10:52:45 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:76a728a52df6
1 <tool id="feature_selector" name="Feature Selector" version="1.0.0">
2 <description>Perform feature selection using SequentialFeatureSelector for microbiome data analysis.</description>
3
4 <requirements>
5 <requirement type="package" version="2.1.4">pandas</requirement>
6 <requirement type="package" version="1.3.2">scikit-learn</requirement>
7 <requirement type="package" version="0.1.18">hdlib</requirement>
8 </requirements>
9
10 <command detect_errors="exit_code">
11 <![CDATA[
12 python3 '$__tool_directory__/featureSelection.py'
13 --input '$input'
14 --metadata '$metadata_file'
15 --threads '$threads'
16 --classifier '$classifier'
17 --label '$label_clm'
18 --tol '$tol'
19 --log '$log_file'
20 --feature_out '$selected_features'
21 ]]>
22 </command>
23
24 <inputs>
25 <param name="input" type="data" format="tabular" label="Count Matrix File" help="A TSV file containing the count matrix with a header row." />
26 <param name="metadata_file" type="data" format="tabular" label="Metadata File" help="A TSV file containing the metadata with a header row." />
27
28 <conditional name='drop_columns'>
29 <param name="advanced_setup" type="select" label="Drop Columns from Training Data.">
30 <option value="default" selected="true">Do Not Drop Columns</option>
31 <option value="settings">Drop Columns</option>
32 </param>
33
34 <when value="default">
35 </when>
36
37 <when value="settings">
38 <param name="columns_to_drop" type='data_column' data_ref="input" label="Columns to Drop from Training Data" argument="--dp_columns" multiple="true" use_header_names="true" help="Select the columns to drop from the training data." />
39 </when>
40 </conditional>
41
42 <param name="threads" type="integer" value="4" label="Number of Threads"
43 help="The number of threads to use for SequentialFeatureSelector." />
44
45 <param name="classifier" type="select" label="Classifier"
46 help="The classifier to use for feature selection.">
47 <option value="lr">Logistic Regression</option>
48 <option value="dt">Decision Tree</option>
49 <option value="sv">Support Vector Classifier</option>
50 <option value="rf">Random Forest</option>
51 <option value="hdc">HDC Classifier</option>
52 </param>
53
54 <param name="label_clm" type="data_column" data_ref="metadata_file" multiple="false" use_header_names="true" label="Class Label Column" help="Select the column in the metadata file that contains the class labels for feature selection." >
55
56 </param>
57
58 <param name="tol" type="float" value="0.00001" label="Tolerance" help="The tolerance for SequentialFeatureSelector convergence. Lower values mean stricter convergence (default: 0.00001)." />
59
60 </inputs>
61
62 <outputs>
63 <data name="log_file" format="txt" label="Feature Selection Log."/>
64 <data name="selected_features" format="tsv" label="Selected Features."/>
65 </outputs>
66
67 <tests>
68 <test>
69 <param name="input" value="test_count.tsv"/>
70 <param name="metadata_file" value="test_metadata.tsv"/>
71 <param name="threads" value="4"/>
72 <param name="classifier" value="lr"/>
73 <param name="label_clm" value='2'/>
74 <param name="tol" value="1e-05"/>
75 <output name="log_file" file="out.log" />
76 <output name="selected_features" file="out.tsv"/>
77 </test>
78
79 <test>
80 <param name="input" value="test_count.tsv"/>
81 <param name="metadata_file" value="test_metadata.tsv"/>
82 <param name="threads" value="4"/>
83 <param name="classifier" value="dt"/>
84 <param name="label_clm" value='2'/>
85 <param name="tol" value="1e-05"/>
86 <output name="log_file" file="out.log" />
87 <output name="selected_features" file="out.tsv"/>
88 </test>
89
90 <test>
91 <param name="input" value="test_count.tsv"/>
92 <param name="metadata_file" value="test_metadata.tsv"/>
93 <param name="threads" value="4"/>
94 <param name="classifier" value="sv"/>
95 <param name="label_clm" value='2'/>
96 <param name="tol" value="1e-05"/>
97 <output name="log_file" file="out.log" />
98 <output name="selected_features" file="out.tsv"/>
99 </test>
100
101 <test>
102 <param name="input" value="test_count.tsv"/>
103 <param name="metadata_file" value="test_metadata.tsv"/>
104 <param name="threads" value="4"/>
105 <param name="classifier" value="rf"/>
106 <param name="label_clm" value='2'/>
107 <param name="tol" value="1e-05"/>
108 <output name="log_file" file="out.log" />
109 <output name="selected_features" file="out.tsv"/>
110 </test>
111
112 <test>
113 <param name="input" value="test_count.tsv"/>
114 <param name="metadata_file" value="test_metadata.tsv"/>
115 <param name="threads" value="4"/>
116 <param name="classifier" value="hdc"/>
117 <param name="label_clm" value='2'/>
118 <param name="tol" value="1e-05"/>
119 <output name="log_file" file="out.log" />
120 <output name="selected_features" file="out.tsv"/>
121 </test>
122
123 </tests>
124
125 <help><![CDATA[
126 **Feature Selector**
127
128 This tool performs feature selection on a single TSV file using scikit-learn's `SequentialFeatureSelector`.
129 You can choose from multiple classifiers and configure parameters such as tolerance and the number of threads.
130
131 **Inputs**
132
133 - **Count Matrix File**: A TSV file containing the features (columns) and samples (rows).
134 - **Metadata File**: A TSV file containing the sample metadata.
135 - **Class Label Column**: The column in the metadata file that contains the class labels for feature selection.
136 - **Classifier**: The classifier type to use for feature selection.
137 - **Number of Threads**: The number of threads to use for computation.
138 - **Tolerance**: The convergence tolerance for the SequentialFeatureSelector (optional).
139
140 **Outputs**
141
142 - **Feature Selection Log**: A text file containing the run details and timing information.
143 - **Selected Features**: A TSV file listing the selected feature names.
144 ]]></help>
145 <citations>
146 <citation type="bibtex">
147 @article{cumbo2023hdlib,
148 title={hdlib: A Python library for designing Vector-Symbolic Architectures},
149 author={Cumbo, Fabio and Weitschek, Emanuel and Blankenberg, Daniel},
150 journal={Journal of Open Source Software},
151 volume={8},
152 number={89},
153 pages={5704},
154 year={2023}
155 }
156 </citation>
157 <citation type="bibtex">
158 @article{cumbo2025feature,
159 title={Feature selection with vector-symbolic architectures: a case study on microbial profiles of shotgun metagenomic samples of colorectal cancer},
160 author={Cumbo, Fabio and Truglia, Simone and Weitschek, Emanuel and Blankenberg, Daniel},
161 journal={Briefings in Bioinformatics},
162 volume={26},
163 number={2},
164 pages={bbaf177},
165 year={2025},
166 publisher={Oxford University Press}
167 }
168 </citation>
169 </citations>
170 </tool>