comparison sample_generator.xml @ 35:1e99cfb71f40 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
author bgruening
date Tue, 13 Apr 2021 17:52:15 +0000
parents 4ba68dd788b3
children 7f8fa89929e0
comparison
equal deleted inserted replaced
34:7068b5fcd623 35:1e99cfb71f40
1 <tool id="sklearn_sample_generator" name="Generate" version="@VERSION@"> 1 <tool id="sklearn_sample_generator" name="Generate" version="@VERSION@" profile="20.05">
2 <description>random samples with controlled size and complexity</description> 2 <description>random samples with controlled size and complexity</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="python_requirements"/> 6 <expand macro="python_requirements" />
7 <expand macro="macro_stdio"/> 7 <expand macro="macro_stdio" />
8 <version_command>echo "@VERSION@"</version_command> 8 <version_command>echo "@VERSION@"</version_command>
9 <command> 9 <command>
10 <![CDATA[ 10 <![CDATA[
11 python "$sample_generator_script" '$inputs' 11 python "$sample_generator_script" '$inputs'
12 ]]> 12 ]]>
61 <!--option value="make_biclusters">Array with constant block diagonal structure for biclustering</option> 61 <!--option value="make_biclusters">Array with constant block diagonal structure for biclustering</option>
62 <option value="make_checkerboard">Array with block checkerboard structure for biclustering</option--> 62 <option value="make_checkerboard">Array with block checkerboard structure for biclustering</option-->
63 </param> 63 </param>
64 <when value="make_blobs"> 64 <when value="make_blobs">
65 <section name="options" title="Advanced Options" expanded="False"> 65 <section name="options" title="Advanced Options" expanded="False">
66 <expand macro="n_samples"/> 66 <expand macro="n_samples" />
67 <expand macro="n_features"/> 67 <expand macro="n_features" />
68 <param argument="centers" type="integer" optional="true" value="3" label="Number of centers to generate" help=" "/> 68 <param argument="centers" type="integer" optional="true" value="3" label="Number of centers to generate" help=" " />
69 <!--todo: expand centers type : int or array of shape [n_centers, n_features]--> 69 <!--todo: expand centers type : int or array of shape [n_centers, n_features]-->
70 <param argument="cluster_std" type="float" optional="true" value="1.0" label="Standard deviation of the clusters" help=" "/> 70 <param argument="cluster_std" type="float" optional="true" value="1.0" label="Standard deviation of the clusters" help=" " />
71 <!--todo: expand cluster_std type : float or sequence of floats--> 71 <!--todo: expand cluster_std type : float or sequence of floats-->
72 <!--param argument=center_box--> 72 <!--param argument=center_box-->
73 <expand macro="shuffle" label="Shuffle the samples"/> 73 <expand macro="shuffle" label="Shuffle the samples" />
74 <expand macro="random_state"/> 74 <expand macro="random_state" />
75 </section> 75 </section>
76 </when> 76 </when>
77 <when value="make_classification"> 77 <when value="make_classification">
78 <section name="options" title="Advanced Options" expanded="False"> 78 <section name="options" title="Advanced Options" expanded="False">
79 <expand macro="n_samples"/> 79 <expand macro="n_samples" />
80 <expand macro="n_features" default_value="20"/> 80 <expand macro="n_features" default_value="20" />
81 <param argument="n_informative" type="integer" optional="true" value="2" label="Number of informative features" help="Each class is composed of a number of gaussian clusters each located around the vertices of a hypercube in a subspace of dimension n_informative. For each cluster, informative features are drawn independently from N(0, 1) and then randomly linearly combined within each cluster in order to add covariance. The clusters are then placed on the vertices of the hypercube. "/> 81 <param argument="n_informative" type="integer" optional="true" value="2" label="Number of informative features" help="Each class is composed of a number of gaussian clusters each located around the vertices of a hypercube in a subspace of dimension n_informative. For each cluster, informative features are drawn independently from N(0, 1) and then randomly linearly combined within each cluster in order to add covariance. The clusters are then placed on the vertices of the hypercube. " />
82 <param argument="n_redundant" type="integer" optional="true" value="2" label="Number of redundant features" help="These features are generated as random linear combinations of the informative features. "/> 82 <param argument="n_redundant" type="integer" optional="true" value="2" label="Number of redundant features" help="These features are generated as random linear combinations of the informative features. " />
83 <param argument="n_repeated" type="integer" optional="true" value="0" label="Number of duplicated features" help="These are drawn randomly from the informative and the redundant features. "/> 83 <param argument="n_repeated" type="integer" optional="true" value="0" label="Number of duplicated features" help="These are drawn randomly from the informative and the redundant features. " />
84 <param argument="n_classes" type="integer" optional="true" value="2" label="Number of classes" help="The number of classes (or labels) of the classification problem. "/> 84 <param argument="n_classes" type="integer" optional="true" value="2" label="Number of classes" help="The number of classes (or labels) of the classification problem. " />
85 <param argument="n_clusters_per_class" type="integer" optional="true" value="2" label="Number of clusters per class" help=" "/> 85 <param argument="n_clusters_per_class" type="integer" optional="true" value="2" label="Number of clusters per class" help=" " />
86 <!--param argument = weights--> 86 <!--param argument = weights-->
87 <param argument="flip_y" type="float" optional="true" value="0.01" label="Fraction of samples with randomly exchanged class labels" help=" "/> 87 <param argument="flip_y" type="float" optional="true" value="0.01" label="Fraction of samples with randomly exchanged class labels" help=" " />
88 <!--param argument = class_sep--> 88 <!--param argument = class_sep-->
89 <!--param argument = hypercube--> 89 <!--param argument = hypercube-->
90 <!--param argument = shift--> 90 <!--param argument = shift-->
91 <!--param argument = scale--> 91 <!--param argument = scale-->
92 <expand macro="shuffle" label="Shuffle the samples"/> 92 <expand macro="shuffle" label="Shuffle the samples" />
93 <expand macro="random_state"/> 93 <expand macro="random_state" />
94 </section> 94 </section>
95 </when> 95 </when>
96 <when value="make_gaussian_quantiles"> 96 <when value="make_gaussian_quantiles">
97 <section name="options" title="Advanced Options" expanded="False"> 97 <section name="options" title="Advanced Options" expanded="False">
98 <!--param argument = mean--> 98 <!--param argument = mean-->
99 <expand macro="n_samples"/> 99 <expand macro="n_samples" />
100 <expand macro="n_features"/> 100 <expand macro="n_features" />
101 <param argument="cov" type="float" optional="true" value="1" label="Unit matrix coefficient" help="The covariance matrix will be this value times the unit matrix. This dataset only produces symmetric normal distributions. "/> 101 <param argument="cov" type="float" optional="true" value="1" label="Unit matrix coefficient" help="The covariance matrix will be this value times the unit matrix. This dataset only produces symmetric normal distributions. " />
102 <param argument="n_classes" type="integer" optional="true" value="2" label="Number of classes" help="The number of classes (or labels) of the classification problem. "/> 102 <param argument="n_classes" type="integer" optional="true" value="2" label="Number of classes" help="The number of classes (or labels) of the classification problem. " />
103 <expand macro="shuffle" label="Shuffle the samples"/> 103 <expand macro="shuffle" label="Shuffle the samples" />
104 <expand macro="random_state"/> 104 <expand macro="random_state" />
105 </section> 105 </section>
106 </when> 106 </when>
107 <when value="make_hastie_10_2"> 107 <when value="make_hastie_10_2">
108 <section name="options" title="Advanced Options" expanded="False"> 108 <section name="options" title="Advanced Options" expanded="False">
109 <expand macro="n_samples" default_value="12000"/> 109 <expand macro="n_samples" default_value="12000" />
110 <expand macro="random_state"/> 110 <expand macro="random_state" />
111 </section> 111 </section>
112 </when> 112 </when>
113 <when value="make_circles"> 113 <when value="make_circles">
114 <section name="options" title="Advanced Options" expanded="False"> 114 <section name="options" title="Advanced Options" expanded="False">
115 <expand macro="n_samples"/> 115 <expand macro="n_samples" />
116 <expand macro="shuffle" label="Shuffle the samples"/> 116 <expand macro="shuffle" label="Shuffle the samples" />
117 <expand macro="noise" default_value=""/> 117 <expand macro="noise" default_value="" />
118 <param argument="factor" type="float" optional="true" value="0.8" label="Scale factor between inner and outer circle" help=" Floating point number less than 1. "/> 118 <param argument="factor" type="float" optional="true" value="0.8" label="Scale factor between inner and outer circle" help=" Floating point number less than 1. " />
119 <expand macro="random_state"/> 119 <expand macro="random_state" />
120 </section> 120 </section>
121 </when> 121 </when>
122 <when value="make_moons"> 122 <when value="make_moons">
123 <section name="options" title="Advanced Options" expanded="False"> 123 <section name="options" title="Advanced Options" expanded="False">
124 <expand macro="n_samples"/> 124 <expand macro="n_samples" />
125 <expand macro="shuffle" label="Shuffle the samples"/> 125 <expand macro="shuffle" label="Shuffle the samples" />
126 <expand macro="noise" default_value=""/> 126 <expand macro="noise" default_value="" />
127 <expand macro="random_state"/> 127 <expand macro="random_state" />
128 </section> 128 </section>
129 </when> 129 </when>
130 <when value="make_regression"> 130 <when value="make_regression">
131 <section name="options" title="Advanced Options" expanded="False"> 131 <section name="options" title="Advanced Options" expanded="False">
132 <expand macro="n_samples"/> 132 <expand macro="n_samples" />
133 <expand macro="n_features" default_value="100"/> 133 <expand macro="n_features" default_value="100" />
134 <param argument="n_informative" type="integer" optional="true" value="10" label="Number of informative features" help="the number of features used to build the linear model used to generate the output "/> 134 <param argument="n_informative" type="integer" optional="true" value="10" label="Number of informative features" help="the number of features used to build the linear model used to generate the output " />
135 <param argument="n_targets" type="integer" optional="true" value="1" label="Number of regression targets" help="The dimension of the y output vector associated with a sample. By default, the output is a scalar."/> 135 <param argument="n_targets" type="integer" optional="true" value="1" label="Number of regression targets" help="The dimension of the y output vector associated with a sample. By default, the output is a scalar." />
136 <param argument="bias" type="float" optional="true" value="0.0" label="Bias of the true function" help="The bias term in the underlying linear model. "/> 136 <param argument="bias" type="float" optional="true" value="0.0" label="Bias of the true function" help="The bias term in the underlying linear model. " />
137 <!--param argument = effective_rank--> 137 <!--param argument = effective_rank-->
138 <!--param argument = tail_strength--> 138 <!--param argument = tail_strength-->
139 <!--param argument = coef--> 139 <!--param argument = coef-->
140 <expand macro="noise"/> 140 <expand macro="noise" />
141 <expand macro="random_state"/> 141 <expand macro="random_state" />
142 </section> 142 </section>
143 </when> 143 </when>
144 <when value="make_sparse_uncorrelated"> 144 <when value="make_sparse_uncorrelated">
145 <section name="options" title="Advanced Options" expanded="False"> 145 <section name="options" title="Advanced Options" expanded="False">
146 <expand macro="n_samples"/> 146 <expand macro="n_samples" />
147 <expand macro="n_features" default_value="10"/> 147 <expand macro="n_features" default_value="10" />
148 <expand macro="random_state"/> 148 <expand macro="random_state" />
149 </section> 149 </section>
150 </when> 150 </when>
151 <when value="make_friedman1"> 151 <when value="make_friedman1">
152 <section name="options" title="Advanced Options" expanded="False"> 152 <section name="options" title="Advanced Options" expanded="False">
153 <expand macro="n_samples"/> 153 <expand macro="n_samples" />
154 <expand macro="n_features" default_value="10"/> 154 <expand macro="n_features" default_value="10" />
155 <expand macro="noise"/> 155 <expand macro="noise" />
156 <expand macro="random_state"/> 156 <expand macro="random_state" />
157 </section> 157 </section>
158 </when> 158 </when>
159 <when value="make_friedman2"> 159 <when value="make_friedman2">
160 <section name="options" title="Advanced Options" expanded="False"> 160 <section name="options" title="Advanced Options" expanded="False">
161 <expand macro="n_samples"/> 161 <expand macro="n_samples" />
162 <expand macro="noise"/> 162 <expand macro="noise" />
163 <expand macro="random_state"/> 163 <expand macro="random_state" />
164 </section> 164 </section>
165 </when> 165 </when>
166 <when value="make_friedman3"> 166 <when value="make_friedman3">
167 <section name="options" title="Advanced Options" expanded="False"> 167 <section name="options" title="Advanced Options" expanded="False">
168 <expand macro="n_samples"/> 168 <expand macro="n_samples" />
169 <expand macro="noise"/> 169 <expand macro="noise" />
170 <expand macro="random_state"/> 170 <expand macro="random_state" />
171 </section> 171 </section>
172 </when> 172 </when>
173 <!--when value="make_low_rank_matrix"> 173 <!--when value="make_low_rank_matrix">
174 <section name="options" title="Advanced Options" expanded="False"> 174 <section name="options" title="Advanced Options" expanded="False">
175 <expand macro="n_samples"/> 175 <expand macro="n_samples"/>
184 <expand macro="random_state"/> 184 <expand macro="random_state"/>
185 </section> 185 </section>
186 </when--> 186 </when-->
187 <when value="make_s_curve"> 187 <when value="make_s_curve">
188 <section name="options" title="Advanced Options" expanded="False"> 188 <section name="options" title="Advanced Options" expanded="False">
189 <expand macro="n_samples"/> 189 <expand macro="n_samples" />
190 <expand macro="noise"/> 190 <expand macro="noise" />
191 <expand macro="random_state"/> 191 <expand macro="random_state" />
192 </section> 192 </section>
193 </when> 193 </when>
194 <when value="make_swiss_roll"> 194 <when value="make_swiss_roll">
195 <section name="options" title="Advanced Options" expanded="False"> 195 <section name="options" title="Advanced Options" expanded="False">
196 <expand macro="n_samples"/> 196 <expand macro="n_samples" />
197 <expand macro="noise"/> 197 <expand macro="noise" />
198 <expand macro="random_state"/> 198 <expand macro="random_state" />
199 </section> 199 </section>
200 </when> 200 </when>
201 <!--when value="make_sparse_coded_signal"> 201 <!--when value="make_sparse_coded_signal">
202 <section name="options" title="Advanced Options" expanded="False"> 202 <section name="options" title="Advanced Options" expanded="False">
203 <expand macro="n_samples" default_value=""/> 203 <expand macro="n_samples" default_value=""/>
230 </section> 230 </section>
231 </when--> 231 </when-->
232 </conditional> 232 </conditional>
233 </inputs> 233 </inputs>
234 <outputs> 234 <outputs>
235 <data format="tabular" name="outfile"/> 235 <data format="tabular" name="outfile" />
236 </outputs> 236 </outputs>
237 <tests> 237 <tests>
238 <test> 238 <test>
239 <param name="selected_generator" value="make_blobs"/> 239 <param name="selected_generator" value="make_blobs" />
240 <param name="random_state" value="100"/> 240 <param name="random_state" value="100" />
241 <output name="outfile" file="blobs.txt" compare="sim_size" delta="1"/> 241 <output name="outfile" file="blobs.txt" compare="sim_size" delta="1" />
242 </test> 242 </test>
243 <test> 243 <test>
244 <param name="selected_generator" value="make_classification"/> 244 <param name="selected_generator" value="make_classification" />
245 <param name="random_state" value="100"/> 245 <param name="random_state" value="100" />
246 <output name="outfile" file="class.txt" compare="sim_size" /> 246 <output name="outfile" file="class.txt" compare="sim_size" />
247 </test> 247 </test>
248 <test> 248 <test>
249 <param name="selected_generator" value="make_circles"/> 249 <param name="selected_generator" value="make_circles" />
250 <param name="random_state" value="100"/> 250 <param name="random_state" value="100" />
251 <output name="outfile" file="circles.txt" compare="sim_size" delta="1"/> 251 <output name="outfile" file="circles.txt" compare="sim_size" delta="1" />
252 </test> 252 </test>
253 <test> 253 <test>
254 <param name="selected_generator" value="make_friedman1"/> 254 <param name="selected_generator" value="make_friedman1" />
255 <param name="random_state" value="100"/> 255 <param name="random_state" value="100" />
256 <output name="outfile" file="friedman1.txt" compare="sim_size" delta="1"/> 256 <output name="outfile" file="friedman1.txt" compare="sim_size" delta="1" />
257 </test> 257 </test>
258 <test> 258 <test>
259 <param name="selected_generator" value="make_friedman2"/> 259 <param name="selected_generator" value="make_friedman2" />
260 <param name="random_state" value="100"/> 260 <param name="random_state" value="100" />
261 <output name="outfile" file="friedman2.txt" compare="sim_size" delta="1"/> 261 <output name="outfile" file="friedman2.txt" compare="sim_size" delta="1" />
262 </test> 262 </test>
263 <test> 263 <test>
264 <param name="selected_generator" value="make_friedman3"/> 264 <param name="selected_generator" value="make_friedman3" />
265 <param name="random_state" value="100"/> 265 <param name="random_state" value="100" />
266 <output name="outfile" file="friedman3.txt" compare="sim_size" delta="1"/> 266 <output name="outfile" file="friedman3.txt" compare="sim_size" delta="1" />
267 </test> 267 </test>
268 <test> 268 <test>
269 <param name="selected_generator" value="make_gaussian_quantiles"/> 269 <param name="selected_generator" value="make_gaussian_quantiles" />
270 <param name="random_state" value="100"/> 270 <param name="random_state" value="100" />
271 <output name="outfile" file="gaus.txt" compare="sim_size" delta="1"/> 271 <output name="outfile" file="gaus.txt" compare="sim_size" delta="1" />
272 </test> 272 </test>
273 <test> 273 <test>
274 <param name="selected_generator" value="make_hastie_10_2"/> 274 <param name="selected_generator" value="make_hastie_10_2" />
275 <param name="random_state" value="100"/> 275 <param name="random_state" value="100" />
276 <output name="outfile" file="hastie.txt" compare="sim_size" delta="1"/> 276 <output name="outfile" file="hastie.txt" compare="sim_size" delta="1" />
277 </test> 277 </test>
278 <test> 278 <test>
279 <param name="selected_generator" value="make_moons"/> 279 <param name="selected_generator" value="make_moons" />
280 <param name="random_state" value="100"/> 280 <param name="random_state" value="100" />
281 <output name="outfile" file="moons.txt" compare="sim_size" delta="1"/> 281 <output name="outfile" file="moons.txt" compare="sim_size" delta="1" />
282 </test> 282 </test>
283 <test> 283 <test>
284 <param name="selected_generator" value="make_regression"/> 284 <param name="selected_generator" value="make_regression" />
285 <param name="random_state" value="100"/> 285 <param name="random_state" value="100" />
286 <output name="outfile" file="regression.txt" compare="sim_size" delta="1" /> 286 <output name="outfile" file="regression.txt" compare="sim_size" delta="1" />
287 </test> 287 </test>
288 <test> 288 <test>
289 <param name="selected_generator" value="make_s_curve"/> 289 <param name="selected_generator" value="make_s_curve" />
290 <param name="random_state" value="100"/> 290 <param name="random_state" value="100" />
291 <output name="outfile" file="scurve.txt" compare="sim_size" delta="1"/> 291 <output name="outfile" file="scurve.txt" compare="sim_size" delta="1" />
292 </test> 292 </test>
293 <test> 293 <test>
294 <param name="selected_generator" value="make_sparse_uncorrelated"/> 294 <param name="selected_generator" value="make_sparse_uncorrelated" />
295 <param name="random_state" value="100"/> 295 <param name="random_state" value="100" />
296 <output name="outfile" file="sparse_u.txt" compare="sim_size" delta="1"/> 296 <output name="outfile" file="sparse_u.txt" compare="sim_size" delta="1" />
297 </test> 297 </test>
298 <test> 298 <test>
299 <param name="selected_generator" value="make_swiss_roll"/> 299 <param name="selected_generator" value="make_swiss_roll" />
300 <param name="random_state" value="100"/> 300 <param name="random_state" value="100" />
301 <output name="outfile" file="swiss_r.txt" compare="sim_size" delta="1"/> 301 <output name="outfile" file="swiss_r.txt" compare="sim_size" delta="1" />
302 </test> 302 </test>
303 </tests> 303 </tests>
304 <help> 304 <help>
305 <![CDATA[ 305 <![CDATA[
306 **What it does** 306 **What it does**
372 372
373 Generators belonging to this group produce datasets suitable for non-linear dimensionality reduction problems. The idea behind this type of problem is that the dimensionality of many data sets is only artificially high. **S curve dataset** and **Swiss roll dataset** produce the same points-targets output format, sample points are 3-dimensional and the target column indicates the univariate position of the sample according to the main dimension of the points in the manifold. 373 Generators belonging to this group produce datasets suitable for non-linear dimensionality reduction problems. The idea behind this type of problem is that the dimensionality of many data sets is only artificially high. **S curve dataset** and **Swiss roll dataset** produce the same points-targets output format, sample points are 3-dimensional and the target column indicates the univariate position of the sample according to the main dimension of the points in the manifold.
374 374
375 ]]> 375 ]]>
376 </help> 376 </help>
377 <expand macro="sklearn_citation"/> 377 <expand macro="sklearn_citation" />
378 </tool> 378 </tool>