comparison sampler.xml @ 5:5e7468a35380 draft

planemo upload for repository ['https://github.com/brsynth/icfree-ml', 'https://github.com/pablocarb/doebase'] commit 20770dfcb79499aa201b8b6faed420babef8d053
author tduigou
date Tue, 23 Jul 2024 09:30:47 +0000
parents d69cb64336e7
children 04159a12c757
comparison
equal deleted inserted replaced
4:d69cb64336e7 5:5e7468a35380
1 <tool id="doe_synbio_sampler" name="iCFree sampler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="@LICENCE@"> 1 <tool id="doe_synbio_sampler" name="iCFree sampler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="@LICENCE@">
2 <description>Generate data points using latin hypercube sampling (LHS)</description> 2 <description>Generate Latin Hypercube Samples for given components.</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 <token name="@TOOL_VERSION@">2.2.0</token> 5 <token name="@TOOL_VERSION@">2.4.0</token>
6 </macros> 6 </macros>
7 <requirements> 7 <requirements>
8 <requirement type="package" version="@TOOL_VERSION@">icfree</requirement> 8 <requirement type="package" version="@TOOL_VERSION@">icfree-ml</requirement>
9 </requirements> 9 </requirements>
10 <expand macro="stdio"/> 10 <expand macro="stdio"/>
11 <command detect_errors="exit_code"><![CDATA[ 11 <command detect_errors="exit_code"><![CDATA[
12 python -m icfree.sampler 12 python -m icfree.sampler
13 '$cfps_file' 13 '$input_file'
14 --nb-sampling-steps '$adv.nb_sampling_steps' 14 '$output_sampling'
15 --nb-samples '$nb_samples' 15 '$num_samples'
16 #if str($adv.sampling_ratio) != "" 16 --step '$adv.step'
17 #set ratios = ' '.join(['"' + $x + '"' for $x in $adv.sampling_ratio.split()])
18 --sampling-ratios $ratios
19 #end if
20 #if str($adv.seed_cond.seed_param) == 'not_random' 17 #if str($adv.seed_cond.seed_param) == 'not_random'
21 --seed '$adv.seed_cond.seed' 18 --seed '$adv.seed_cond.seed'
22 #end if 19 #end if
23 --output-format 'tsv'
24 --output-folder . &&
25 mv sampling.tsv '$output_sampling'
26 ]]></command> 20 ]]></command>
27 <inputs> 21 <inputs>
28 <param name="cfps_file" type="data" format="tabular" label="CFPS parameters and features" /> 22 <param name="input_file" type="data" format="tabular" label="Input file with components and their max values." />
29 <param name="nb_samples" type="integer" value="99" min="1" max="198" label="Number of samples to generate" /> 23 <param name="num_samples" type="integer" value="99" min="1" max="198" label="Number of samples to generate" />
30 <section name="adv" title="Advanced Options" expanded="false"> 24 <section name="adv" title="Advanced Options" expanded="false">
31 <param name="nb_sampling_steps" type="integer" value="5" min="1" max="10" label="Number of values for all factors when performing the sampling" /> 25 <param name="step" type="float" value="2.5" min="1" max="10" label="Step size for creating discrete ranges" />
32 <param name="sampling_ratio" type="text" value="" label="Ratios associated for all factors" help="Only float, space separated, are allowed">
33 <validator type="regex" message="Float separated by a space between in the range: 0.0 - 1.0">^(?:(0.\d+|1\.0)(\s0?\.\d+|1\.0)*)*$</validator>
34 </param>
35 <conditional name="seed_cond"> 26 <conditional name="seed_cond">
36 <param name="seed_param" type="select" label="Seed" help="Choose a seed or let it as random"> 27 <param name="seed_param" type="select" label="Seed" help="Choose a seed or let it as random">
37 <option value="random" selected="true">random</option> 28 <option value="random" selected="true">random</option>
38 <option value="not_random">fixed</option> 29 <option value="not_random">fixed</option>
39 </param> 30 </param>
51 <data name="output_sampling" format="tabular" label="${tool.name}" /> 42 <data name="output_sampling" format="tabular" label="${tool.name}" />
52 </outputs> 43 </outputs>
53 <tests> 44 <tests>
54 <!-- test 1: check if identical outputs are produced with default parameters --> 45 <!-- test 1: check if identical outputs are produced with default parameters -->
55 <test> 46 <test>
56 <param name="cfps_file" value="sampler/input/parameters.tsv" /> 47 <param name="input_file" value="sampler/input/components.tsv" />
48 <param name="num_samples" value="3" />
57 <param name="seed_param" value="not_random" /> 49 <param name="seed_param" value="not_random" />
58 <param name="seed" value="0" /> 50 <param name="seed" value="0" />
59 <output name="output_sampling" file="sampler/output/sampling.test-1.tsv" ftype="tabular" compare="diff" /> 51 <output name="output_sampling" file="sampler/output/sampling.test-1.tsv" ftype="tabular" compare="diff" />
60 </test> 52 </test>
61 <!-- test 2: advanced arguments --> 53 <!-- test 1: check if identical outputs are produced with default parameters -->
62 <test> 54 <test>
63 <param name="cfps_file" value="sampler/input/parameters.tsv" /> 55 <param name="input_file" value="sampler/input/components.tsv" />
64 <param name="nb_sampling_steps" value="3" /> 56 <param name="num_samples" value="4" />
65 <param name="nb_samples" value="100" /> 57 <param name="step" value="3.1" />
66 <param name="sampling_ratio" value="0.4 0.2 0.3" />
67 <param name="seed_param" value="not_random" /> 58 <param name="seed_param" value="not_random" />
68 <param name="seed" value="0" /> 59 <param name="seed" value="0" />
69 <output name="output_sampling" file="sampler/output/sampling.test-2.tsv" ftype="tabular" compare="diff" /> 60 <output name="output_sampling" file="sampler/output/sampling.test-2.tsv" ftype="tabular" compare="diff" />
70 </test> 61 </test>
71 </tests> 62 </tests>
72 <help><![CDATA[ 63 <help><![CDATA[
73 Sampler 64 Sampler
74 ======= 65 =======
75 66
76 This module generates a list of values for all parameters given in the input file. 67 Generate Latin Hypercube Samples for given components.
77 The values are generated using a Latin Hypercube Sampling (LHS) method (lhs function from the pyDOE package).
78 The number of values generated is given by the user and the values are saved in csv or tsv file.
79
80 It is important to note that the user can pass some values that he whishes to combine. In this case, we are dealing with discrete space, and because LHS is working on continuous space the result sampling can contain duplicates.
81 To avoid this, we have set some filters to select the appropriate sampling method:
82
83 * If the result sampling contain duplicates, then we replace them by random samples.
84 * Proceed with full random sampling.
85 * Generate all the combinations.
86
87 Input
88 -----
89
90 Below is an example of an input file:
91
92 +-----------+----------+---------------------+
93 | Parameter | maxValue | Ratios |
94 +===========+==========+=====================+
95 | CP | 125 | 0.0 0.1 0.3 0.5 1.0 |
96 +-----------+----------+---------------------+
97 | CPK | 125 | 1 |
98 +-----------+----------+---------------------+
99 | tRNA | 125 | |
100 +-----------+----------+---------------------+
101 | AA | 125 | |
102 +-----------+----------+---------------------+
103 | ribosomes | 125 | |
104 +-----------+----------+---------------------+
105 | mRNA | 125 | |
106 +-----------+----------+---------------------+
107 | Mg | 125 | |
108 +-----------+----------+---------------------+
109 | K | 125 | |
110 +-----------+----------+---------------------+
111
112 The first column is the parameter (or factor) names.
113
114 The second column is the maxValue of the parameter that will be used in the sampling.
115
116 The third column is the specific ratios we want to have for this parameter. If nothing defined, then take ratios given in program options.
117 If one single number is given, then take this number as a const value.
118 If no value is given, then take the default ratios (`nb_samples` linear ratios from 0 to 1).
119
120 Advanced options:
121 -----------------
122 * **Number of sampling steps**: Number of values for all factors when performing the sampling (default: 5)
123 * **Sampling ratios**: Ratios for all factors when performing the sampling
124 * **Number of samples**: Number of samples to generate for all factors when performing the sampling (default: 99)
125
126 Output
127 ------
128 * a TSV file with the sampling values for each parameter
129 ]]></help> 68 ]]></help>
130 <expand macro="creator"/> 69 <expand macro="creator"/>
131 <citations> 70 <citations>
132 <citation type="bibtex"> 71 <citation type="bibtex">
133 @unpublished{icfree 72 @unpublished{icfree