Mercurial > repos > muon-spectroscopy-computational-project > muspinsim_combine
changeset 0:06aabd70b869 draft default tip
planemo upload for repository https://github.com/muon-spectroscopy-computational-project/muon-galaxy-tools/main/muspinsim_combine commit 70a4d37ecdf5d586703cfc509922311e95d3205c
author | muon-spectroscopy-computational-project |
---|---|
date | Tue, 18 Jul 2023 13:26:03 +0000 |
parents | |
children | |
files | combine.py muon_macros.xml muspinsim_combine.xml test-data/high.dat test-data/low.dat test-data/mid.dat test-data/out_all.dat test-data/out_all_10.dat test-data/out_low.dat test-data/out_low_high.dat |
diffstat | 10 files changed, 189 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/combine.py Tue Jul 18 13:26:03 2023 +0000 @@ -0,0 +1,50 @@ +import json +import sys + +import numpy as np + +import scipy.stats as stats + + +def main(): + input_json_path = sys.argv[1] + params = json.load(open(input_json_path, "r")) + + x_equal = True + data = np.loadtxt(params["data_in"][0], usecols=(0, 1)) + x_values = [data[:, 0]] + y_values = [data[:, 1]] + bins = len(data) + + for path in params["data_in"][1:]: + data = np.loadtxt(path, usecols=(0, 1)) + x_values.append(data[:, 0]) + y_values.append(data[:, 1]) + length_equal = bins == len(data) + bins = min(bins, len(data)) + x_equal = ( + x_equal and length_equal and np.allclose(x_values, x_values[-1]) + ) + + if x_equal: + print( + "All x ranges were identical, performing direct average over " + f"{len(x_values)} files" + ) + means = np.mean(y_values, axis=0) + np.savetxt("data_out.dat", np.column_stack((x_values[0], means))) + return + + if params["bins"] is not None: + bins = params["bins"] + + x_flat = np.concatenate(x_values) + y_flat = np.concatenate(y_values) + print(f"Averaging {len(x_flat)} data points into {bins} bins") + means, edges, _ = stats.binned_statistic(x_flat, y_flat, bins=bins) + data_out = np.column_stack(((edges[1:] + edges[:-1]) / 2, means)) + np.savetxt("data_out.dat", data_out) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/muon_macros.xml Tue Jul 18 13:26:03 2023 +0000 @@ -0,0 +1,29 @@ +<macros> + <xml name="dftb_set"> + <param type="select" name="dftb_set" value="3ob-3-1" display="radio" label="DFTB parameter set" help="The parameter set to use for DFTB+. Currently supported are: 3ob-3-1 and pbc-0-3. See help section at the bottom of the page for details."> + <option value="3ob-3-1">3ob-3-1</option> + <option value="pbc-0-3">pbc-0-3</option> + </param> + </xml> + <xml name="dftb+"> + <expand macro="dftb_set"/> + <param type="boolean" name="dftb_pbc" label="Use periodic boundary conditions" help="Whether to turn on periodic boundary conditions in DFTB+." checked="true"/> + </xml> + <xml name="dftb_optionals"> + <param type="text" argument="dftb_optionals" value="[]" optional="true" label="DFTB optional files" help="Additional optional json files to activate for DFTBArgs (for example, dftd3.json will use DFTD3 dispersion forces for 3ob-3-1 if DFTB+ has been compiled to support them)."/> + </xml> + <xml name="k_points_grid"> + <param type="text" argument="k_points_grid" value="[1,1,1]" label="K-points grid" help="List of three integer k-points. Default is [1,1,1]."> + <validator type="regex" message="Input should only contain whitespace, '[', ']', ',' and digits.">^[\s\d,\[\]]+$</validator> + </param> + </xml> + <!-- version of underlying tool (PEP 440) --> + <!-- citation should be updated with every underlying tool version --> + <!-- concept is not updated, and should only be used for referencing the idea of the software --> + <token name="@MUSPINSIM_VERSION@">2.2.1</token> + <token name="@MUSPINSIM_CITATION@">10.5281/zenodo.7733979</token> + <token name="@MUSPINSIM_CONCEPT@">10.5281/zenodo.6517626</token> + <token name="@PYMUONSUITE_VERSION@">0.3.0</token> + <token name="@PYMUONSUITE_CITATION@">10.5281/zenodo.8026711</token> + <token name="@PYMUONSUITE_CONCEPT@">10.5281/zenodo.7025643</token> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/muspinsim_combine.xml Tue Jul 18 13:26:03 2023 +0000 @@ -0,0 +1,75 @@ +<tool id="muspinsim_combine" name="MuSpinSim Combine" version="@MUSPINSIM_VERSION@+galaxy@WRAPPER_VERSION@" python_template_version="3.5" profile="22.05" license="MIT"> + <description>combine datafiles generated from MuSpinSim</description> + <macros> + <!-- Don't have MuSpinSim as dependency, but use that to version as need + to maintain compatibility with its output files --> + <!-- version of this tool wrapper (integer) --> + <token name="@WRAPPER_VERSION@">0</token> + <import>muon_macros.xml</import> + </macros> + <creator> + <person givenName="Patrick" familyName="Austin" url="https://github.com/patrick-austin" identifier="https://orcid.org/0000-0002-6279-7823"/> + <organization url="https://muon-spectroscopy-computational-project.github.io/index.html" name="The Muon Spectroscopy Computational Project"/> + </creator> + <requirements> + <!-- Use the same dependency versions as the current MuSpinSim has --> + <requirement type="package" version="1.22.0">numpy</requirement> + <requirement type="package" version="1.8.1">scipy</requirement> + </requirements> + <required_files> + <include type="literal" path="combine.py"/> + </required_files> + <command detect_errors="exit_code"><![CDATA[ + python '${__tool_directory__}/combine.py' inputs.json + ]]></command> + <configfiles> + <inputs name="inputs" data_style="paths" filename="inputs.json"/> + </configfiles> + <inputs> + <param name="data_in" type="data" format="txt" multiple="true" label="Muspinsim Experiment Data (.dat)"/> + <param name="bins" type="integer" min="1" optional="true" label="Number of X Bins" help="Optional. If unset, then will use as many bins as there are x points in the smallest .dat file."/> + </inputs> + <outputs> + <data format="txt" name="data_out" from_work_dir="data_out.dat"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="data_in" value="low.dat"/> + <output name="data_out" ftype="txt" file="out_low.dat"/> + </test> + <test expect_num_outputs="1"> + <param name="data_in" value="low.dat,low.dat"/> + <output name="data_out" ftype="txt" file="out_low.dat"/> + </test> + <test expect_num_outputs="1"> + <param name="data_in" value="low.dat,high.dat"/> + <output name="data_out" ftype="txt" file="out_low_high.dat"/> + </test> + <test expect_num_outputs="1"> + <param name="data_in" value="low.dat,mid.dat,high.dat"/> + <output name="data_out" ftype="txt" file="out_all.dat"/> + </test> + <test expect_num_outputs="1"> + <param name="data_in" value="low.dat,mid.dat,high.dat"/> + <param name="bins" value="10"/> + <output name="data_out" ftype="txt" file="out_all_10.dat"/> + </test> + </tests> + <help><![CDATA[ + Utility tool for combining multiple MuSpinSim `.dat` files. + + In cases where all input files have the same x values, will simply + take the mean value at each point. In cases where this isn't the case, + the data is binned, and the y values in each bin averaged. The bins are + chosen to be equally spaced and cover the entire x range. Note that the + x points in the resultant `.dat` file will be taken as the centre of + each bin, and so generally will not align with any of the original + ranges. + + In cases where there is an empty bin (for example when the range covered + by two files do not overlap), the output will be be `nan`. + ]]></help> + <citations> + <citation type="doi">@MUSPINSIM_CITATION@</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/high.dat Tue Jul 18 13:26:03 2023 +0000 @@ -0,0 +1,3 @@ +7.000000000000000000e-02 1.500000000000000000e-01 +8.000000000000000000e-02 1.000000000000000000e-01 +9.000000000000000000e-02 0.500000000000000000e-01
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/low.dat Tue Jul 18 13:26:03 2023 +0000 @@ -0,0 +1,3 @@ +0.000000000000000000e+00 5.000000000000000000e-01 +1.000000000000000000e-02 4.500000000000000000e-01 +2.000000000000000000e-02 4.000000000000000000e-01
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mid.dat Tue Jul 18 13:26:03 2023 +0000 @@ -0,0 +1,10 @@ +0.500000000000000000e-02 2.500000000000000000e-01 +1.500000000000000000e-02 2.500000000000000000e-01 +2.500000000000000000e-02 2.500000000000000000e-01 +3.500000000000000000e-02 2.500000000000000000e-01 +4.500000000000000000e-02 2.500000000000000000e-01 +5.500000000000000000e-02 2.500000000000000000e-01 +6.500000000000000000e-02 2.500000000000000000e-01 +7.500000000000000000e-02 2.500000000000000000e-01 +8.500000000000000000e-02 2.500000000000000000e-01 +9.500000000000000000e-02 2.500000000000000000e-01
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_all.dat Tue Jul 18 13:26:03 2023 +0000 @@ -0,0 +1,3 @@ +1.583333333333333467e-02 3.500000000000000333e-01 +4.750000000000000056e-02 2.500000000000000000e-01 +7.916666666666666297e-02 1.857142857142857206e-01
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_all_10.dat Tue Jul 18 13:26:03 2023 +0000 @@ -0,0 +1,10 @@ +4.749999999999999882e-03 3.750000000000000000e-01 +1.424999999999999878e-02 3.499999999999999778e-01 +2.375000000000000028e-02 3.250000000000000111e-01 +3.325000000000000178e-02 2.500000000000000000e-01 +4.274999999999999634e-02 2.500000000000000000e-01 +5.224999999999999784e-02 2.500000000000000000e-01 +6.174999999999999933e-02 2.500000000000000000e-01 +7.125000000000000777e-02 2.000000000000000111e-01 +8.074999999999998845e-02 1.749999999999999889e-01 +9.024999999999999689e-02 1.499999999999999944e-01