Mercurial > repos > tduigou > doe_synbio_sampler
changeset 5:5e7468a35380 draft
planemo upload for repository ['https://github.com/brsynth/icfree-ml', 'https://github.com/pablocarb/doebase'] commit 20770dfcb79499aa201b8b6faed420babef8d053
author | tduigou |
---|---|
date | Tue, 23 Jul 2024 09:30:47 +0000 |
parents | d69cb64336e7 |
children | 04159a12c757 |
files | sampler.xml test-data/sampler/input/components.tsv test-data/sampler/input/parameters.tsv test-data/sampler/output/sampling.test-1.tsv test-data/sampler/output/sampling.test-2.tsv |
diffstat | 5 files changed, 31 insertions(+), 288 deletions(-) [+] |
line wrap: on
line diff
--- a/sampler.xml Wed Feb 14 15:24:19 2024 +0000 +++ b/sampler.xml Tue Jul 23 09:30:47 2024 +0000 @@ -1,37 +1,28 @@ <tool id="doe_synbio_sampler" name="iCFree sampler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="@LICENCE@"> - <description>Generate data points using latin hypercube sampling (LHS)</description> + <description>Generate Latin Hypercube Samples for given components.</description> <macros> <import>macros.xml</import> - <token name="@TOOL_VERSION@">2.2.0</token> + <token name="@TOOL_VERSION@">2.4.0</token> </macros> <requirements> - <requirement type="package" version="@TOOL_VERSION@">icfree</requirement> + <requirement type="package" version="@TOOL_VERSION@">icfree-ml</requirement> </requirements> <expand macro="stdio"/> <command detect_errors="exit_code"><![CDATA[ python -m icfree.sampler - '$cfps_file' - --nb-sampling-steps '$adv.nb_sampling_steps' - --nb-samples '$nb_samples' - #if str($adv.sampling_ratio) != "" - #set ratios = ' '.join(['"' + $x + '"' for $x in $adv.sampling_ratio.split()]) - --sampling-ratios $ratios - #end if + '$input_file' + '$output_sampling' + '$num_samples' + --step '$adv.step' #if str($adv.seed_cond.seed_param) == 'not_random' --seed '$adv.seed_cond.seed' #end if - --output-format 'tsv' - --output-folder . && - mv sampling.tsv '$output_sampling' ]]></command> <inputs> - <param name="cfps_file" type="data" format="tabular" label="CFPS parameters and features" /> - <param name="nb_samples" type="integer" value="99" min="1" max="198" label="Number of samples to generate" /> + <param name="input_file" type="data" format="tabular" label="Input file with components and their max values." /> + <param name="num_samples" type="integer" value="99" min="1" max="198" label="Number of samples to generate" /> <section name="adv" title="Advanced Options" expanded="false"> - <param name="nb_sampling_steps" type="integer" value="5" min="1" max="10" label="Number of values for all factors when performing the sampling" /> - <param name="sampling_ratio" type="text" value="" label="Ratios associated for all factors" help="Only float, space separated, are allowed"> - <validator type="regex" message="Float separated by a space between in the range: 0.0 - 1.0">^(?:(0.\d+|1\.0)(\s0?\.\d+|1\.0)*)*$</validator> - </param> + <param name="step" type="float" value="2.5" min="1" max="10" label="Step size for creating discrete ranges" /> <conditional name="seed_cond"> <param name="seed_param" type="select" label="Seed" help="Choose a seed or let it as random"> <option value="random" selected="true">random</option> @@ -53,17 +44,17 @@ <tests> <!-- test 1: check if identical outputs are produced with default parameters --> <test> - <param name="cfps_file" value="sampler/input/parameters.tsv" /> + <param name="input_file" value="sampler/input/components.tsv" /> + <param name="num_samples" value="3" /> <param name="seed_param" value="not_random" /> <param name="seed" value="0" /> <output name="output_sampling" file="sampler/output/sampling.test-1.tsv" ftype="tabular" compare="diff" /> </test> - <!-- test 2: advanced arguments --> + <!-- test 1: check if identical outputs are produced with default parameters --> <test> - <param name="cfps_file" value="sampler/input/parameters.tsv" /> - <param name="nb_sampling_steps" value="3" /> - <param name="nb_samples" value="100" /> - <param name="sampling_ratio" value="0.4 0.2 0.3" /> + <param name="input_file" value="sampler/input/components.tsv" /> + <param name="num_samples" value="4" /> + <param name="step" value="3.1" /> <param name="seed_param" value="not_random" /> <param name="seed" value="0" /> <output name="output_sampling" file="sampler/output/sampling.test-2.tsv" ftype="tabular" compare="diff" /> @@ -73,59 +64,7 @@ Sampler ======= -This module generates a list of values for all parameters given in the input file. -The values are generated using a Latin Hypercube Sampling (LHS) method (lhs function from the pyDOE package). -The number of values generated is given by the user and the values are saved in csv or tsv file. - -It is important to note that the user can pass some values that he whishes to combine. In this case, we are dealing with discrete space, and because LHS is working on continuous space the result sampling can contain duplicates. -To avoid this, we have set some filters to select the appropriate sampling method: - -* If the result sampling contain duplicates, then we replace them by random samples. -* Proceed with full random sampling. -* Generate all the combinations. - -Input ------ - -Below is an example of an input file: - -+-----------+----------+---------------------+ -| Parameter | maxValue | Ratios | -+===========+==========+=====================+ -| CP | 125 | 0.0 0.1 0.3 0.5 1.0 | -+-----------+----------+---------------------+ -| CPK | 125 | 1 | -+-----------+----------+---------------------+ -| tRNA | 125 | | -+-----------+----------+---------------------+ -| AA | 125 | | -+-----------+----------+---------------------+ -| ribosomes | 125 | | -+-----------+----------+---------------------+ -| mRNA | 125 | | -+-----------+----------+---------------------+ -| Mg | 125 | | -+-----------+----------+---------------------+ -| K | 125 | | -+-----------+----------+---------------------+ - -The first column is the parameter (or factor) names. - -The second column is the maxValue of the parameter that will be used in the sampling. - -The third column is the specific ratios we want to have for this parameter. If nothing defined, then take ratios given in program options. -If one single number is given, then take this number as a const value. -If no value is given, then take the default ratios (`nb_samples` linear ratios from 0 to 1). - -Advanced options: ------------------ -* **Number of sampling steps**: Number of values for all factors when performing the sampling (default: 5) -* **Sampling ratios**: Ratios for all factors when performing the sampling -* **Number of samples**: Number of samples to generate for all factors when performing the sampling (default: 99) - -Output ------- -* a TSV file with the sampling values for each parameter +Generate Latin Hypercube Samples for given components. ]]></help> <expand macro="creator"/> <citations>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sampler/input/components.tsv Tue Jul 23 09:30:47 2024 +0000 @@ -0,0 +1,5 @@ +Component maxValue +Hela lysate 1000 +Access prot 50% 400 +Reaction mix 400 +RNA 1ug/uL 200
--- a/test-data/sampler/input/parameters.tsv Wed Feb 14 15:24:19 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -Component maxValue Ratios -Component_1 125 0.0 0.1 0.3 0.5 1.0 -Component_2 125 1 -Component_3 125 -Component_4 125 -Component_5 125 -Component_6 125 -Component_7 125 -Component_8 125
--- a/test-data/sampler/output/sampling.test-1.tsv Wed Feb 14 15:24:19 2024 +0000 +++ b/test-data/sampler/output/sampling.test-1.tsv Tue Jul 23 09:30:47 2024 +0000 @@ -1,100 +1,4 @@ -Component_1 Component_2 Component_3 Component_4 Component_5 Component_6 Component_7 Component_8 -0.0 125.0 125.0 0.0 31.25 93.75 62.5 93.75 -37.5 125.0 31.25 93.75 125.0 62.5 125.0 125.0 -37.5 125.0 0.0 93.75 31.25 31.25 31.25 0.0 -37.5 125.0 31.25 62.5 125.0 125.0 93.75 93.75 -12.5 125.0 31.25 31.25 93.75 125.0 31.25 31.25 -125.0 125.0 31.25 93.75 125.0 0.0 125.0 93.75 -0.0 125.0 93.75 0.0 93.75 125.0 31.25 125.0 -125.0 125.0 125.0 31.25 0.0 0.0 125.0 31.25 -37.5 125.0 125.0 125.0 125.0 93.75 62.5 0.0 -125.0 125.0 62.5 62.5 62.5 62.5 0.0 0.0 -62.5 125.0 93.75 0.0 93.75 62.5 62.5 62.5 -37.5 125.0 93.75 62.5 93.75 0.0 93.75 0.0 -125.0 125.0 62.5 62.5 93.75 93.75 93.75 125.0 -37.5 125.0 0.0 93.75 125.0 125.0 62.5 0.0 -0.0 125.0 31.25 0.0 0.0 0.0 93.75 0.0 -37.5 125.0 62.5 31.25 0.0 93.75 93.75 62.5 -12.5 125.0 31.25 62.5 125.0 125.0 125.0 62.5 -12.5 125.0 62.5 62.5 31.25 125.0 125.0 125.0 -125.0 125.0 0.0 125.0 31.25 62.5 93.75 31.25 -12.5 125.0 31.25 0.0 62.5 93.75 125.0 93.75 -62.5 125.0 0.0 125.0 0.0 31.25 125.0 31.25 -12.5 125.0 0.0 93.75 62.5 93.75 0.0 93.75 -0.0 125.0 62.5 125.0 62.5 125.0 0.0 125.0 -62.5 125.0 31.25 125.0 62.5 31.25 0.0 93.75 -37.5 125.0 0.0 62.5 0.0 62.5 62.5 31.25 -62.5 125.0 0.0 0.0 93.75 31.25 62.5 0.0 -12.5 125.0 0.0 93.75 0.0 62.5 31.25 31.25 -0.0 125.0 62.5 0.0 31.25 62.5 31.25 31.25 -125.0 125.0 0.0 125.0 125.0 31.25 62.5 62.5 -0.0 125.0 31.25 31.25 125.0 31.25 125.0 31.25 -12.5 125.0 31.25 0.0 125.0 0.0 62.5 93.75 -125.0 125.0 62.5 31.25 62.5 93.75 0.0 62.5 -37.5 125.0 125.0 62.5 0.0 0.0 125.0 0.0 -125.0 125.0 93.75 31.25 93.75 125.0 0.0 0.0 -0.0 125.0 62.5 31.25 0.0 62.5 93.75 62.5 -37.5 125.0 125.0 93.75 93.75 62.5 62.5 93.75 -12.5 125.0 31.25 62.5 0.0 0.0 0.0 93.75 -125.0 125.0 31.25 125.0 93.75 0.0 31.25 0.0 -62.5 125.0 93.75 0.0 62.5 125.0 31.25 125.0 -12.5 125.0 0.0 62.5 31.25 31.25 31.25 93.75 -62.5 125.0 93.75 31.25 125.0 125.0 62.5 125.0 -0.0 125.0 62.5 93.75 125.0 93.75 62.5 31.25 -37.5 125.0 93.75 93.75 62.5 31.25 0.0 0.0 -37.5 125.0 93.75 125.0 0.0 0.0 93.75 0.0 -0.0 125.0 62.5 0.0 31.25 93.75 93.75 0.0 -125.0 125.0 125.0 31.25 31.25 125.0 31.25 93.75 -62.5 125.0 125.0 93.75 31.25 31.25 0.0 125.0 -0.0 125.0 62.5 125.0 93.75 93.75 0.0 31.25 -37.5 125.0 31.25 31.25 0.0 62.5 62.5 0.0 -12.5 125.0 0.0 0.0 31.25 93.75 62.5 125.0 -62.5 125.0 0.0 125.0 125.0 93.75 93.75 0.0 -0.0 125.0 93.75 93.75 31.25 125.0 0.0 62.5 -125.0 125.0 0.0 0.0 93.75 31.25 31.25 62.5 -37.5 125.0 125.0 31.25 93.75 0.0 93.75 93.75 -62.5 125.0 31.25 0.0 93.75 31.25 125.0 0.0 -0.0 125.0 62.5 62.5 62.5 31.25 31.25 93.75 -37.5 125.0 125.0 62.5 62.5 0.0 93.75 62.5 -62.5 125.0 62.5 125.0 62.5 125.0 31.25 62.5 -125.0 125.0 0.0 31.25 125.0 93.75 93.75 31.25 -37.5 125.0 62.5 125.0 62.5 62.5 31.25 62.5 -12.5 125.0 0.0 31.25 0.0 62.5 62.5 125.0 -12.5 125.0 93.75 93.75 31.25 125.0 125.0 125.0 -12.5 125.0 93.75 93.75 31.25 31.25 31.25 93.75 -62.5 125.0 0.0 31.25 31.25 31.25 125.0 62.5 -37.5 125.0 31.25 93.75 125.0 62.5 0.0 62.5 -0.0 125.0 125.0 125.0 0.0 62.5 125.0 125.0 -62.5 125.0 31.25 125.0 0.0 31.25 93.75 31.25 -125.0 125.0 93.75 125.0 125.0 125.0 31.25 62.5 -12.5 125.0 62.5 31.25 62.5 93.75 125.0 31.25 -12.5 125.0 0.0 125.0 93.75 0.0 31.25 31.25 -12.5 125.0 93.75 125.0 125.0 62.5 93.75 31.25 -0.0 125.0 93.75 93.75 62.5 31.25 0.0 125.0 -62.5 125.0 125.0 0.0 93.75 0.0 125.0 93.75 -0.0 125.0 0.0 0.0 31.25 62.5 0.0 62.5 -62.5 125.0 62.5 31.25 31.25 62.5 31.25 125.0 -62.5 125.0 125.0 93.75 31.25 0.0 0.0 93.75 -125.0 125.0 62.5 0.0 125.0 125.0 93.75 62.5 -125.0 125.0 31.25 125.0 0.0 31.25 93.75 62.5 -0.0 125.0 125.0 62.5 93.75 0.0 0.0 31.25 -125.0 125.0 125.0 31.25 62.5 0.0 0.0 62.5 -0.0 125.0 0.0 31.25 93.75 31.25 62.5 31.25 -125.0 125.0 93.75 93.75 93.75 93.75 62.5 125.0 -0.0 125.0 125.0 31.25 62.5 0.0 62.5 125.0 -12.5 125.0 93.75 62.5 0.0 125.0 93.75 93.75 -12.5 125.0 125.0 0.0 125.0 62.5 0.0 62.5 -37.5 125.0 31.25 93.75 0.0 125.0 0.0 125.0 -0.0 125.0 93.75 0.0 62.5 125.0 31.25 125.0 -62.5 125.0 125.0 93.75 125.0 31.25 125.0 0.0 -125.0 125.0 93.75 62.5 62.5 0.0 125.0 125.0 -62.5 125.0 93.75 62.5 31.25 93.75 31.25 125.0 -37.5 125.0 31.25 0.0 93.75 93.75 125.0 93.75 -0.0 125.0 0.0 125.0 31.25 93.75 125.0 31.25 -62.5 125.0 125.0 31.25 62.5 62.5 62.5 0.0 -62.5 125.0 62.5 93.75 93.75 31.25 93.75 0.0 -62.5 125.0 93.75 62.5 31.25 0.0 125.0 93.75 -12.5 125.0 31.25 0.0 0.0 0.0 93.75 93.75 -12.5 125.0 125.0 62.5 125.0 93.75 62.5 31.25 -125.0 125.0 125.0 125.0 0.0 93.75 31.25 31.25 -125.0 125.0 62.5 62.5 0.0 125.0 0.0 0.0 +Hela lysate,Access prot 50%,Reaction mix,RNA 1ug/uL +492.5,25.0,310.0,152.5 +970.0,165.0,257.5,112.5 +537.5,325.0,192.5,185.0
--- a/test-data/sampler/output/sampling.test-2.tsv Wed Feb 14 15:24:19 2024 +0000 +++ b/test-data/sampler/output/sampling.test-2.tsv Tue Jul 23 09:30:47 2024 +0000 @@ -1,101 +1,5 @@ -Component_1 Component_2 Component_3 Component_4 Component_5 Component_6 Component_7 Component_8 -62.5 125.0 25.0 50.0 50.0 25.0 25.0 37.5 -0.0 125.0 25.0 37.5 25.0 50.0 37.5 25.0 -125.0 125.0 25.0 25.0 37.5 25.0 50.0 50.0 -12.5 125.0 25.0 25.0 50.0 37.5 37.5 25.0 -37.5 125.0 25.0 25.0 37.5 37.5 50.0 37.5 -62.5 125.0 25.0 37.5 25.0 25.0 37.5 25.0 -37.5 125.0 50.0 25.0 50.0 50.0 37.5 25.0 -62.5 125.0 37.5 50.0 37.5 50.0 37.5 37.5 -125.0 125.0 37.5 50.0 25.0 50.0 25.0 50.0 -37.5 125.0 50.0 37.5 50.0 50.0 25.0 37.5 -62.5 125.0 50.0 37.5 25.0 25.0 25.0 25.0 -12.5 125.0 25.0 37.5 37.5 37.5 37.5 37.5 -0.0 125.0 25.0 50.0 37.5 37.5 37.5 37.5 -0.0 125.0 37.5 25.0 50.0 50.0 50.0 37.5 -62.5 125.0 50.0 25.0 37.5 50.0 50.0 25.0 -37.5 125.0 25.0 50.0 37.5 50.0 37.5 25.0 -125.0 125.0 37.5 50.0 50.0 50.0 25.0 25.0 -62.5 125.0 37.5 50.0 37.5 37.5 25.0 37.5 -12.5 125.0 25.0 50.0 25.0 50.0 50.0 25.0 -125.0 125.0 25.0 37.5 50.0 50.0 25.0 50.0 -62.5 125.0 25.0 25.0 37.5 25.0 37.5 50.0 -62.5 125.0 50.0 25.0 37.5 25.0 50.0 50.0 -12.5 125.0 25.0 50.0 37.5 25.0 25.0 50.0 -12.5 125.0 37.5 25.0 25.0 50.0 50.0 25.0 -125.0 125.0 25.0 25.0 37.5 25.0 37.5 37.5 -0.0 125.0 50.0 37.5 37.5 37.5 25.0 37.5 -62.5 125.0 50.0 37.5 37.5 50.0 25.0 37.5 -125.0 125.0 50.0 25.0 50.0 37.5 50.0 50.0 -125.0 125.0 37.5 25.0 25.0 25.0 37.5 25.0 -12.5 125.0 50.0 50.0 50.0 25.0 37.5 50.0 -125.0 125.0 25.0 37.5 25.0 25.0 50.0 50.0 -62.5 125.0 37.5 37.5 25.0 50.0 37.5 37.5 -62.5 125.0 50.0 50.0 37.5 37.5 25.0 25.0 -0.0 125.0 37.5 37.5 25.0 25.0 50.0 50.0 -0.0 125.0 50.0 37.5 37.5 37.5 37.5 50.0 -37.5 125.0 25.0 37.5 50.0 37.5 37.5 37.5 -37.5 125.0 25.0 25.0 37.5 50.0 50.0 50.0 -62.5 125.0 37.5 37.5 50.0 50.0 50.0 37.5 -125.0 125.0 37.5 25.0 50.0 37.5 50.0 50.0 -37.5 125.0 50.0 50.0 50.0 37.5 25.0 25.0 -12.5 125.0 37.5 50.0 25.0 37.5 50.0 25.0 -0.0 125.0 25.0 50.0 25.0 25.0 37.5 50.0 -62.5 125.0 50.0 50.0 50.0 37.5 50.0 37.5 -12.5 125.0 25.0 37.5 50.0 50.0 25.0 37.5 -12.5 125.0 50.0 25.0 37.5 25.0 37.5 50.0 -37.5 125.0 25.0 25.0 37.5 50.0 37.5 50.0 -0.0 125.0 25.0 37.5 25.0 37.5 25.0 25.0 -62.5 125.0 50.0 25.0 37.5 37.5 25.0 25.0 -12.5 125.0 50.0 50.0 50.0 37.5 37.5 25.0 -37.5 125.0 37.5 37.5 37.5 37.5 25.0 37.5 -37.5 125.0 50.0 25.0 37.5 25.0 25.0 25.0 -37.5 125.0 37.5 37.5 25.0 50.0 25.0 37.5 -62.5 125.0 25.0 37.5 37.5 37.5 50.0 25.0 -125.0 125.0 25.0 25.0 25.0 25.0 50.0 37.5 -37.5 125.0 37.5 50.0 50.0 25.0 50.0 25.0 -125.0 125.0 37.5 25.0 25.0 37.5 50.0 25.0 -37.5 125.0 50.0 50.0 37.5 50.0 37.5 37.5 -12.5 125.0 25.0 25.0 37.5 25.0 37.5 25.0 -37.5 125.0 37.5 50.0 37.5 37.5 25.0 50.0 -125.0 125.0 37.5 25.0 50.0 25.0 37.5 50.0 -0.0 125.0 25.0 25.0 50.0 25.0 25.0 37.5 -125.0 125.0 50.0 50.0 50.0 37.5 37.5 50.0 -37.5 125.0 25.0 37.5 37.5 25.0 50.0 50.0 -12.5 125.0 50.0 25.0 37.5 37.5 37.5 50.0 -0.0 125.0 50.0 50.0 50.0 25.0 50.0 25.0 -125.0 125.0 37.5 37.5 50.0 50.0 25.0 50.0 -0.0 125.0 37.5 50.0 25.0 25.0 50.0 37.5 -0.0 125.0 25.0 50.0 25.0 25.0 25.0 50.0 -62.5 125.0 37.5 25.0 50.0 37.5 50.0 37.5 -0.0 125.0 50.0 25.0 25.0 25.0 50.0 50.0 -37.5 125.0 50.0 50.0 25.0 50.0 50.0 50.0 -12.5 125.0 37.5 50.0 25.0 50.0 25.0 37.5 -12.5 125.0 50.0 37.5 37.5 37.5 50.0 37.5 -37.5 125.0 50.0 37.5 50.0 37.5 37.5 50.0 -0.0 125.0 50.0 50.0 50.0 50.0 37.5 25.0 -37.5 125.0 50.0 25.0 50.0 25.0 25.0 25.0 -37.5 125.0 50.0 37.5 50.0 37.5 37.5 25.0 -12.5 125.0 25.0 25.0 50.0 37.5 25.0 50.0 -12.5 125.0 25.0 25.0 25.0 50.0 50.0 25.0 -12.5 125.0 50.0 25.0 37.5 25.0 50.0 25.0 -0.0 125.0 37.5 50.0 25.0 37.5 37.5 37.5 -62.5 125.0 37.5 37.5 50.0 25.0 25.0 37.5 -0.0 125.0 37.5 37.5 25.0 50.0 37.5 37.5 -125.0 125.0 25.0 25.0 37.5 50.0 25.0 25.0 -62.5 125.0 50.0 50.0 50.0 37.5 25.0 50.0 -62.5 125.0 25.0 37.5 25.0 25.0 25.0 37.5 -12.5 125.0 50.0 25.0 25.0 50.0 50.0 25.0 -62.5 125.0 25.0 37.5 37.5 25.0 50.0 25.0 -0.0 125.0 37.5 50.0 25.0 25.0 25.0 50.0 -125.0 125.0 50.0 37.5 25.0 25.0 37.5 25.0 -0.0 125.0 37.5 37.5 25.0 50.0 50.0 37.5 -125.0 125.0 37.5 37.5 25.0 37.5 50.0 50.0 -0.0 125.0 50.0 50.0 50.0 25.0 50.0 50.0 -12.5 125.0 37.5 25.0 50.0 50.0 25.0 25.0 -125.0 125.0 25.0 37.5 37.5 37.5 37.5 37.5 -12.5 125.0 37.5 50.0 50.0 50.0 25.0 37.5 -37.5 125.0 37.5 37.5 25.0 37.5 25.0 50.0 -0.0 125.0 25.0 50.0 25.0 50.0 37.5 37.5 -125.0 125.0 25.0 50.0 25.0 25.0 37.5 50.0 -0.0 125.0 50.0 50.0 37.5 25.0 37.5 37.5 +Hela lysate,Access prot 50%,Reaction mix,RNA 1ug/uL +610.7,403.0,378.2,52.7 +666.5,384.40000000000003,282.1,111.60000000000001 +62.0,319.3,170.5,52.7 +409.2,238.70000000000002,399.90000000000003,37.2