Mercurial > repos > jay > feature_selector
view feature_selection/featureSelection.xml @ 0:76a728a52df6 draft default tip
planemo upload for repository https://github.com/jaidevjoshi83/MicroBiomML commit 5ef78d4decc95ac107c468499328e7f086289ff9-dirty
| author | jay |
|---|---|
| date | Tue, 17 Feb 2026 10:52:45 +0000 |
| parents | |
| children |
line wrap: on
line source
<tool id="feature_selector" name="Feature Selector" version="1.0.0"> <description>Perform feature selection using SequentialFeatureSelector for microbiome data analysis.</description> <requirements> <requirement type="package" version="2.1.4">pandas</requirement> <requirement type="package" version="1.3.2">scikit-learn</requirement> <requirement type="package" version="0.1.18">hdlib</requirement> </requirements> <command detect_errors="exit_code"> <![CDATA[ python3 '$__tool_directory__/featureSelection.py' --input '$input' --metadata '$metadata_file' --threads '$threads' --classifier '$classifier' --label '$label_clm' --tol '$tol' --log '$log_file' --feature_out '$selected_features' ]]> </command> <inputs> <param name="input" type="data" format="tabular" label="Count Matrix File" help="A TSV file containing the count matrix with a header row." /> <param name="metadata_file" type="data" format="tabular" label="Metadata File" help="A TSV file containing the metadata with a header row." /> <conditional name='drop_columns'> <param name="advanced_setup" type="select" label="Drop Columns from Training Data."> <option value="default" selected="true">Do Not Drop Columns</option> <option value="settings">Drop Columns</option> </param> <when value="default"> </when> <when value="settings"> <param name="columns_to_drop" type='data_column' data_ref="input" label="Columns to Drop from Training Data" argument="--dp_columns" multiple="true" use_header_names="true" help="Select the columns to drop from the training data." /> </when> </conditional> <param name="threads" type="integer" value="4" label="Number of Threads" help="The number of threads to use for SequentialFeatureSelector." /> <param name="classifier" type="select" label="Classifier" help="The classifier to use for feature selection."> <option value="lr">Logistic Regression</option> <option value="dt">Decision Tree</option> <option value="sv">Support Vector Classifier</option> <option value="rf">Random Forest</option> <option value="hdc">HDC Classifier</option> </param> <param name="label_clm" type="data_column" data_ref="metadata_file" multiple="false" use_header_names="true" label="Class Label Column" help="Select the column in the metadata file that contains the class labels for feature selection." > </param> <param name="tol" type="float" value="0.00001" label="Tolerance" help="The tolerance for SequentialFeatureSelector convergence. Lower values mean stricter convergence (default: 0.00001)." /> </inputs> <outputs> <data name="log_file" format="txt" label="Feature Selection Log."/> <data name="selected_features" format="tsv" label="Selected Features."/> </outputs> <tests> <test> <param name="input" value="test_count.tsv"/> <param name="metadata_file" value="test_metadata.tsv"/> <param name="threads" value="4"/> <param name="classifier" value="lr"/> <param name="label_clm" value='2'/> <param name="tol" value="1e-05"/> <output name="log_file" file="out.log" /> <output name="selected_features" file="out.tsv"/> </test> <test> <param name="input" value="test_count.tsv"/> <param name="metadata_file" value="test_metadata.tsv"/> <param name="threads" value="4"/> <param name="classifier" value="dt"/> <param name="label_clm" value='2'/> <param name="tol" value="1e-05"/> <output name="log_file" file="out.log" /> <output name="selected_features" file="out.tsv"/> </test> <test> <param name="input" value="test_count.tsv"/> <param name="metadata_file" value="test_metadata.tsv"/> <param name="threads" value="4"/> <param name="classifier" value="sv"/> <param name="label_clm" value='2'/> <param name="tol" value="1e-05"/> <output name="log_file" file="out.log" /> <output name="selected_features" file="out.tsv"/> </test> <test> <param name="input" value="test_count.tsv"/> <param name="metadata_file" value="test_metadata.tsv"/> <param name="threads" value="4"/> <param name="classifier" value="rf"/> <param name="label_clm" value='2'/> <param name="tol" value="1e-05"/> <output name="log_file" file="out.log" /> <output name="selected_features" file="out.tsv"/> </test> <test> <param name="input" value="test_count.tsv"/> <param name="metadata_file" value="test_metadata.tsv"/> <param name="threads" value="4"/> <param name="classifier" value="hdc"/> <param name="label_clm" value='2'/> <param name="tol" value="1e-05"/> <output name="log_file" file="out.log" /> <output name="selected_features" file="out.tsv"/> </test> </tests> <help><![CDATA[ **Feature Selector** This tool performs feature selection on a single TSV file using scikit-learn's `SequentialFeatureSelector`. You can choose from multiple classifiers and configure parameters such as tolerance and the number of threads. **Inputs** - **Count Matrix File**: A TSV file containing the features (columns) and samples (rows). - **Metadata File**: A TSV file containing the sample metadata. - **Class Label Column**: The column in the metadata file that contains the class labels for feature selection. - **Classifier**: The classifier type to use for feature selection. - **Number of Threads**: The number of threads to use for computation. - **Tolerance**: The convergence tolerance for the SequentialFeatureSelector (optional). **Outputs** - **Feature Selection Log**: A text file containing the run details and timing information. - **Selected Features**: A TSV file listing the selected feature names. ]]></help> <citations> <citation type="bibtex"> @article{cumbo2023hdlib, title={hdlib: A Python library for designing Vector-Symbolic Architectures}, author={Cumbo, Fabio and Weitschek, Emanuel and Blankenberg, Daniel}, journal={Journal of Open Source Software}, volume={8}, number={89}, pages={5704}, year={2023} } </citation> <citation type="bibtex"> @article{cumbo2025feature, title={Feature selection with vector-symbolic architectures: a case study on microbial profiles of shotgun metagenomic samples of colorectal cancer}, author={Cumbo, Fabio and Truglia, Simone and Weitschek, Emanuel and Blankenberg, Daniel}, journal={Briefings in Bioinformatics}, volume={26}, number={2}, pages={bbaf177}, year={2025}, publisher={Oxford University Press} } </citation> </citations> </tool>
