comparison chopin2.xml @ 0:d49893faf877 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/chopin2 commit 77562e028c37bc7afd254b33be7b48a21003818c
author iuc
date Tue, 31 Jan 2023 16:31:19 +0000
parents
children 693bfd012601
comparison
equal deleted inserted replaced
-1:000000000000 0:d49893faf877
1 <?xml version="1.0"?>
2 <tool name="chopin2" id="chopin2" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@" profile="@PROFILE@" license="GPL-3.0-or-later">
3 <description>Domain-Agnostic Supervised Learning with Hyperdimensional Computing</description>
4
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <expand macro="creator"/>
9 <expand macro="requirements"/>
10
11 <command detect_errors="exit_code">
12 <![CDATA[
13 ln -s '$dataset' '${dataset.element_identifier}' &&
14
15 chopin2
16
17 --dataset '${dataset.element_identifier}'
18 #if $dataset.ext == 'csv':
19 --fieldsep ','
20 #else:
21 --fieldsep \$'\t'
22 #end if
23
24 --dimensionality ${dimensionality}
25 --levels ${levels}
26 --retrain ${retrain}
27 --stop
28 --crossv_k ${folds}
29
30 #if $feature_selection.enable_fs == "true":
31 --select_features
32 --group_min ${feature_selection.group_min}
33 --accuracy_threshold ${feature_selection.accuracy_threshold}
34 --accuracy_uncertainty_perc ${feature_selection.accuracy_uncertainty_perc}
35 #end if
36
37 --dump
38 --cleanup
39 --nproc "\${GALAXY_SLOTS:-4}"
40 --verbose
41 ]]>
42 </command>
43
44 <inputs>
45 <param name="dataset" type="data" format="csv,tabular"
46 label="Select a dataset"
47 help="Input dataset with features on columns and observations on rows. The first column must contain the observation IDs, while the last column must contain classes. The header line is also required." />
48
49 <param name="dimensionality" type="integer" value="10000" min="100"
50 label="Vectors dimensionality"
51 help="Size of hypervectors is usually 10,000 in vector-symbolic architectures. However, lower values could work
52 with small datasets in terms of number of features and observations. Please note that you may require
53 to increase this number in case of datasets with a huge number of features." />
54
55 <param name="levels" type="integer" value="1000" min="2"
56 label="Levels"
57 help="Number of level vectors. You may consider to look at the distribution of your data in order to choose
58 the most appropriate value." />
59
60 <param name="retrain" type="integer" value="0" min="0"
61 label="Model retraining iterations"
62 help="Maximum number of retraining iterations. Class hypervectors are retrained to minimize errors caused by noise." />
63
64 <param name="folds" type="integer" value="2" min="2"
65 label="Number of folds for cross-validation"
66 help="This tool makes use of k-folds cross-validation to evaluate the accuracy of the hyperdimensional model.
67 Make sure to choose a good number of folds for validating the classification model. Please note that higher number
68 of folds could significantly increase the running time." />
69
70 <conditional name="feature_selection">
71 <param name="enable_fs" type="select"
72 label="Enable feature selection"
73 help="If selected, this will extract a set of features with the better discriminative power among classes.
74 The feature selection algorithm is defined as a backward variable selection method.">
75 <option value="false" selected="true">Disabled</option>
76 <option value="true">Enabled</option>
77 </param>
78
79 <when value="false" />
80
81 <when value="true">
82 <param name="group_min" type="integer" value="1" min="1"
83 label="Minimum number of selected features"
84 help="Tool will stop removing features if its number will reach this value." />
85
86 <param name="accuracy_threshold" type="float" value="60.0" min="0.0" max="100.0"
87 label="Accuracy threshold"
88 help="Stop the execution if the best accuracy reached for a group of features is lower than this value." />
89
90 <param name="accuracy_uncertainty_perc" type="float" value="5.0" min="0.0" max="100.0"
91 label="Accuracy uncertainty percentage"
92 help="Consider non optimal solutions if model accuracy is greater than the best accuracy minus this percentage." />
93 </when>
94 </conditional>
95 </inputs>
96
97 <outputs>
98 <data format="tabular" name="summary" label="${tool.name} on ${on_string}: Summary" from_work_dir="summary.txt">
99 <actions>
100 <action name="column_names" type="metadata" default="Run ID,Group Size,Retraining,Accuracy,Excluded Feature" />
101 <action name="column_types" type="metadata" default="str,int,int,float,str" />
102 <action name="comment_lines" type="metadata" default="7" />
103 </actions>
104 </data>
105
106 <data format="tabular" name="selection" label="${tool.name} on ${on_string}: Selection" from_work_dir="selection.txt">
107 <filter>feature_selection["enable_fs"]</filter>
108 <actions>
109 <action name="column_names" type="metadata" default="Selected Features:" />
110 <action name="column_types" type="metadata" default="str" />
111 <action name="comment_lines" type="metadata" default="3" />
112 </actions>
113 </data>
114 </outputs>
115
116 <tests>
117 <test>
118 <param name="dataset" value="iris.csv" />
119 <param name="dimensionality" value="1000" />
120 <param name="levels" value="100" />
121 <param name="retrain" value="10" />
122 <param name="folds" value="5" />
123
124 <output name="summary" ftype="tabular" value="summary.txt">
125 <assert_contents>
126 <has_text_matching expression="# Run ID\tGroup Size\tRetraining\tAccuracy"/>
127 <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c" />
128 </assert_contents>
129 </output>
130 </test>
131
132 <test>
133 <param name="dataset" value="iris.tabular" />
134 <param name="dimensionality" value="1000" />
135 <param name="levels" value="100" />
136 <param name="retrain" value="10" />
137 <param name="folds" value="5" />
138
139 <output name="summary" ftype="tabular" value="summary.txt">
140 <assert_contents>
141 <has_text_matching expression="# Run ID\tGroup Size\tRetraining\tAccuracy"/>
142 <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c" />
143 </assert_contents>
144 </output>
145 </test>
146
147 <test>
148 <param name="dataset" value="iris.csv" />
149 <param name="dimensionality" value="1000" />
150 <param name="levels" value="100" />
151 <param name="retrain" value="10" />
152 <param name="folds" value="5" />
153
154 <conditional name="feature_selection">
155 <param name="enable_fs" value="true" />
156 <param name="group_min" value="1" />
157 <param name="accuracy_threshold" value="60.0" />
158 <param name="accuracy_uncertainty_perc" value="5.0" />
159 </conditional>
160
161 <output name="summary" ftype="tabular" value="summary.txt">
162 <assert_contents>
163 <has_text_matching expression="# Run ID\tGroup Size\tRetraining\tAccuracy" />
164 <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c" />
165 </assert_contents>
166 </output>
167
168 <output name="selection" ftype="tabular" value="selection.txt">
169 <assert_contents>
170 <has_text text="# Selected Features:" />
171 <has_text text="PetalLengthCm" />
172 <has_text text="PetalWidthCm" />
173 <has_text text="SepalLengthCm" />
174 <has_text text="SepalWidthCm" />
175 </assert_contents>
176 </output>
177 </test>
178 </tests>
179
180 <help><![CDATA[
181 chopin2 is a domain-agnostic supervised learning classifier built according to the Hyperdimensional Computing paradigm.
182 It also implements a feature selection method based on the backward variable elimination strategy.
183
184 -----
185
186 **Input**
187
188 The input is a CSV file representing a matrix with the observations on the rows and features on columns.
189 The first column must contain the observation IDs, while the last column contains the classes.
190 Also, the first line must contain the header with the column names.
191
192 The tool doesn't support datasets with missing values. It also supports numerical datasets only.
193 Please note that categorical values are allowed under the first and last columns.
194
195 -----
196
197 **Output**
198
199 The output is a summary table with information about the accuracy of the hyperdimensional model and
200 the number of retraining iterations that were required to achieve that level of accuracy.
201
202 In case the feature selection is enabled, it also returns a file with the list of selected features
203 that come out from the hyperdimensional classification model with the best accuracy.
204
205 -----
206
207 .. class:: infomark
208
209 **Notes**
210
211 Please visit the official GitHub repository_ for other information about `chopin2`.
212
213 .. _repository: https://github.com/cumbof/chopin2
214 ]]></help>
215
216 <expand macro="citations"/>
217 </tool>