Mercurial > repos > jay > feature_selector
comparison feature_selection/featureSelection.xml @ 0:76a728a52df6 draft default tip
planemo upload for repository https://github.com/jaidevjoshi83/MicroBiomML commit 5ef78d4decc95ac107c468499328e7f086289ff9-dirty
| author | jay |
|---|---|
| date | Tue, 17 Feb 2026 10:52:45 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:76a728a52df6 |
|---|---|
| 1 <tool id="feature_selector" name="Feature Selector" version="1.0.0"> | |
| 2 <description>Perform feature selection using SequentialFeatureSelector for microbiome data analysis.</description> | |
| 3 | |
| 4 <requirements> | |
| 5 <requirement type="package" version="2.1.4">pandas</requirement> | |
| 6 <requirement type="package" version="1.3.2">scikit-learn</requirement> | |
| 7 <requirement type="package" version="0.1.18">hdlib</requirement> | |
| 8 </requirements> | |
| 9 | |
| 10 <command detect_errors="exit_code"> | |
| 11 <![CDATA[ | |
| 12 python3 '$__tool_directory__/featureSelection.py' | |
| 13 --input '$input' | |
| 14 --metadata '$metadata_file' | |
| 15 --threads '$threads' | |
| 16 --classifier '$classifier' | |
| 17 --label '$label_clm' | |
| 18 --tol '$tol' | |
| 19 --log '$log_file' | |
| 20 --feature_out '$selected_features' | |
| 21 ]]> | |
| 22 </command> | |
| 23 | |
| 24 <inputs> | |
| 25 <param name="input" type="data" format="tabular" label="Count Matrix File" help="A TSV file containing the count matrix with a header row." /> | |
| 26 <param name="metadata_file" type="data" format="tabular" label="Metadata File" help="A TSV file containing the metadata with a header row." /> | |
| 27 | |
| 28 <conditional name='drop_columns'> | |
| 29 <param name="advanced_setup" type="select" label="Drop Columns from Training Data."> | |
| 30 <option value="default" selected="true">Do Not Drop Columns</option> | |
| 31 <option value="settings">Drop Columns</option> | |
| 32 </param> | |
| 33 | |
| 34 <when value="default"> | |
| 35 </when> | |
| 36 | |
| 37 <when value="settings"> | |
| 38 <param name="columns_to_drop" type='data_column' data_ref="input" label="Columns to Drop from Training Data" argument="--dp_columns" multiple="true" use_header_names="true" help="Select the columns to drop from the training data." /> | |
| 39 </when> | |
| 40 </conditional> | |
| 41 | |
| 42 <param name="threads" type="integer" value="4" label="Number of Threads" | |
| 43 help="The number of threads to use for SequentialFeatureSelector." /> | |
| 44 | |
| 45 <param name="classifier" type="select" label="Classifier" | |
| 46 help="The classifier to use for feature selection."> | |
| 47 <option value="lr">Logistic Regression</option> | |
| 48 <option value="dt">Decision Tree</option> | |
| 49 <option value="sv">Support Vector Classifier</option> | |
| 50 <option value="rf">Random Forest</option> | |
| 51 <option value="hdc">HDC Classifier</option> | |
| 52 </param> | |
| 53 | |
| 54 <param name="label_clm" type="data_column" data_ref="metadata_file" multiple="false" use_header_names="true" label="Class Label Column" help="Select the column in the metadata file that contains the class labels for feature selection." > | |
| 55 | |
| 56 </param> | |
| 57 | |
| 58 <param name="tol" type="float" value="0.00001" label="Tolerance" help="The tolerance for SequentialFeatureSelector convergence. Lower values mean stricter convergence (default: 0.00001)." /> | |
| 59 | |
| 60 </inputs> | |
| 61 | |
| 62 <outputs> | |
| 63 <data name="log_file" format="txt" label="Feature Selection Log."/> | |
| 64 <data name="selected_features" format="tsv" label="Selected Features."/> | |
| 65 </outputs> | |
| 66 | |
| 67 <tests> | |
| 68 <test> | |
| 69 <param name="input" value="test_count.tsv"/> | |
| 70 <param name="metadata_file" value="test_metadata.tsv"/> | |
| 71 <param name="threads" value="4"/> | |
| 72 <param name="classifier" value="lr"/> | |
| 73 <param name="label_clm" value='2'/> | |
| 74 <param name="tol" value="1e-05"/> | |
| 75 <output name="log_file" file="out.log" /> | |
| 76 <output name="selected_features" file="out.tsv"/> | |
| 77 </test> | |
| 78 | |
| 79 <test> | |
| 80 <param name="input" value="test_count.tsv"/> | |
| 81 <param name="metadata_file" value="test_metadata.tsv"/> | |
| 82 <param name="threads" value="4"/> | |
| 83 <param name="classifier" value="dt"/> | |
| 84 <param name="label_clm" value='2'/> | |
| 85 <param name="tol" value="1e-05"/> | |
| 86 <output name="log_file" file="out.log" /> | |
| 87 <output name="selected_features" file="out.tsv"/> | |
| 88 </test> | |
| 89 | |
| 90 <test> | |
| 91 <param name="input" value="test_count.tsv"/> | |
| 92 <param name="metadata_file" value="test_metadata.tsv"/> | |
| 93 <param name="threads" value="4"/> | |
| 94 <param name="classifier" value="sv"/> | |
| 95 <param name="label_clm" value='2'/> | |
| 96 <param name="tol" value="1e-05"/> | |
| 97 <output name="log_file" file="out.log" /> | |
| 98 <output name="selected_features" file="out.tsv"/> | |
| 99 </test> | |
| 100 | |
| 101 <test> | |
| 102 <param name="input" value="test_count.tsv"/> | |
| 103 <param name="metadata_file" value="test_metadata.tsv"/> | |
| 104 <param name="threads" value="4"/> | |
| 105 <param name="classifier" value="rf"/> | |
| 106 <param name="label_clm" value='2'/> | |
| 107 <param name="tol" value="1e-05"/> | |
| 108 <output name="log_file" file="out.log" /> | |
| 109 <output name="selected_features" file="out.tsv"/> | |
| 110 </test> | |
| 111 | |
| 112 <test> | |
| 113 <param name="input" value="test_count.tsv"/> | |
| 114 <param name="metadata_file" value="test_metadata.tsv"/> | |
| 115 <param name="threads" value="4"/> | |
| 116 <param name="classifier" value="hdc"/> | |
| 117 <param name="label_clm" value='2'/> | |
| 118 <param name="tol" value="1e-05"/> | |
| 119 <output name="log_file" file="out.log" /> | |
| 120 <output name="selected_features" file="out.tsv"/> | |
| 121 </test> | |
| 122 | |
| 123 </tests> | |
| 124 | |
| 125 <help><![CDATA[ | |
| 126 **Feature Selector** | |
| 127 | |
| 128 This tool performs feature selection on a single TSV file using scikit-learn's `SequentialFeatureSelector`. | |
| 129 You can choose from multiple classifiers and configure parameters such as tolerance and the number of threads. | |
| 130 | |
| 131 **Inputs** | |
| 132 | |
| 133 - **Count Matrix File**: A TSV file containing the features (columns) and samples (rows). | |
| 134 - **Metadata File**: A TSV file containing the sample metadata. | |
| 135 - **Class Label Column**: The column in the metadata file that contains the class labels for feature selection. | |
| 136 - **Classifier**: The classifier type to use for feature selection. | |
| 137 - **Number of Threads**: The number of threads to use for computation. | |
| 138 - **Tolerance**: The convergence tolerance for the SequentialFeatureSelector (optional). | |
| 139 | |
| 140 **Outputs** | |
| 141 | |
| 142 - **Feature Selection Log**: A text file containing the run details and timing information. | |
| 143 - **Selected Features**: A TSV file listing the selected feature names. | |
| 144 ]]></help> | |
| 145 <citations> | |
| 146 <citation type="bibtex"> | |
| 147 @article{cumbo2023hdlib, | |
| 148 title={hdlib: A Python library for designing Vector-Symbolic Architectures}, | |
| 149 author={Cumbo, Fabio and Weitschek, Emanuel and Blankenberg, Daniel}, | |
| 150 journal={Journal of Open Source Software}, | |
| 151 volume={8}, | |
| 152 number={89}, | |
| 153 pages={5704}, | |
| 154 year={2023} | |
| 155 } | |
| 156 </citation> | |
| 157 <citation type="bibtex"> | |
| 158 @article{cumbo2025feature, | |
| 159 title={Feature selection with vector-symbolic architectures: a case study on microbial profiles of shotgun metagenomic samples of colorectal cancer}, | |
| 160 author={Cumbo, Fabio and Truglia, Simone and Weitschek, Emanuel and Blankenberg, Daniel}, | |
| 161 journal={Briefings in Bioinformatics}, | |
| 162 volume={26}, | |
| 163 number={2}, | |
| 164 pages={bbaf177}, | |
| 165 year={2025}, | |
| 166 publisher={Oxford University Press} | |
| 167 } | |
| 168 </citation> | |
| 169 </citations> | |
| 170 </tool> |
