Repository 'muspinsim_combine'
hg clone https://toolshed.g2.bx.psu.edu/repos/muon-spectroscopy-computational-project/muspinsim_combine

Changeset 0:06aabd70b869 (2023-07-18)
Commit message:
planemo upload for repository https://github.com/muon-spectroscopy-computational-project/muon-galaxy-tools/main/muspinsim_combine commit 70a4d37ecdf5d586703cfc509922311e95d3205c
added:
combine.py
muon_macros.xml
muspinsim_combine.xml
test-data/high.dat
test-data/low.dat
test-data/mid.dat
test-data/out_all.dat
test-data/out_all_10.dat
test-data/out_low.dat
test-data/out_low_high.dat
b
diff -r 000000000000 -r 06aabd70b869 combine.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/combine.py Tue Jul 18 13:26:03 2023 +0000
[
@@ -0,0 +1,50 @@
+import json
+import sys
+
+import numpy as np
+
+import scipy.stats as stats
+
+
+def main():
+    input_json_path = sys.argv[1]
+    params = json.load(open(input_json_path, "r"))
+
+    x_equal = True
+    data = np.loadtxt(params["data_in"][0], usecols=(0, 1))
+    x_values = [data[:, 0]]
+    y_values = [data[:, 1]]
+    bins = len(data)
+
+    for path in params["data_in"][1:]:
+        data = np.loadtxt(path, usecols=(0, 1))
+        x_values.append(data[:, 0])
+        y_values.append(data[:, 1])
+        length_equal = bins == len(data)
+        bins = min(bins, len(data))
+        x_equal = (
+            x_equal and length_equal and np.allclose(x_values, x_values[-1])
+        )
+
+    if x_equal:
+        print(
+            "All x ranges were identical, performing direct average over "
+            f"{len(x_values)} files"
+        )
+        means = np.mean(y_values, axis=0)
+        np.savetxt("data_out.dat", np.column_stack((x_values[0], means)))
+        return
+
+    if params["bins"] is not None:
+        bins = params["bins"]
+
+    x_flat = np.concatenate(x_values)
+    y_flat = np.concatenate(y_values)
+    print(f"Averaging {len(x_flat)} data points into {bins} bins")
+    means, edges, _ = stats.binned_statistic(x_flat, y_flat, bins=bins)
+    data_out = np.column_stack(((edges[1:] + edges[:-1]) / 2, means))
+    np.savetxt("data_out.dat", data_out)
+
+
+if __name__ == "__main__":
+    main()
b
diff -r 000000000000 -r 06aabd70b869 muon_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/muon_macros.xml Tue Jul 18 13:26:03 2023 +0000
[
@@ -0,0 +1,29 @@
+<macros>
+    <xml name="dftb_set">
+        <param type="select" name="dftb_set" value="3ob-3-1" display="radio" label="DFTB parameter set" help="The parameter set to use for DFTB+. Currently supported are: 3ob-3-1 and pbc-0-3. See help section at the bottom of the page for details.">
+            <option value="3ob-3-1">3ob-3-1</option>
+            <option value="pbc-0-3">pbc-0-3</option>
+        </param>
+    </xml>
+    <xml name="dftb+">
+        <expand macro="dftb_set"/>
+        <param type="boolean" name="dftb_pbc" label="Use periodic boundary conditions" help="Whether to turn on periodic boundary conditions in DFTB+." checked="true"/>
+    </xml>
+    <xml name="dftb_optionals">
+        <param type="text" argument="dftb_optionals" value="[]" optional="true" label="DFTB optional files" help="Additional optional json files to activate for DFTBArgs (for example, dftd3.json will use DFTD3 dispersion forces for 3ob-3-1 if DFTB+ has been compiled to support them)."/>
+    </xml>
+    <xml name="k_points_grid">
+        <param type="text" argument="k_points_grid" value="[1,1,1]" label="K-points grid" help="List of three integer k-points. Default is [1,1,1].">
+            <validator type="regex" message="Input should only contain whitespace, '[', ']', ',' and digits.">^[\s\d,\[\]]+$</validator>
+        </param>
+    </xml>
+    <!-- version of underlying tool (PEP 440) -->
+    <!-- citation should be updated with every underlying tool version -->
+    <!-- concept is not updated, and should only be used for referencing the idea of the software -->
+    <token name="@MUSPINSIM_VERSION@">2.2.1</token>
+    <token name="@MUSPINSIM_CITATION@">10.5281/zenodo.7733979</token>
+    <token name="@MUSPINSIM_CONCEPT@">10.5281/zenodo.6517626</token>
+    <token name="@PYMUONSUITE_VERSION@">0.3.0</token>
+    <token name="@PYMUONSUITE_CITATION@">10.5281/zenodo.8026711</token>
+    <token name="@PYMUONSUITE_CONCEPT@">10.5281/zenodo.7025643</token>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r 06aabd70b869 muspinsim_combine.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/muspinsim_combine.xml Tue Jul 18 13:26:03 2023 +0000
[
@@ -0,0 +1,75 @@
+<tool id="muspinsim_combine" name="MuSpinSim Combine" version="@MUSPINSIM_VERSION@+galaxy@WRAPPER_VERSION@" python_template_version="3.5" profile="22.05" license="MIT">
+    <description>combine datafiles generated from MuSpinSim</description>
+    <macros>
+        <!-- Don't have MuSpinSim as dependency, but use that to version as need
+             to maintain compatibility with its output files -->
+        <!-- version of this tool wrapper (integer) -->
+        <token name="@WRAPPER_VERSION@">0</token>
+        <import>muon_macros.xml</import>
+    </macros>
+    <creator>
+        <person givenName="Patrick" familyName="Austin" url="https://github.com/patrick-austin" identifier="https://orcid.org/0000-0002-6279-7823"/>
+        <organization url="https://muon-spectroscopy-computational-project.github.io/index.html" name="The Muon Spectroscopy Computational Project"/>
+    </creator>
+    <requirements>
+        <!-- Use the same dependency versions as the current MuSpinSim has -->
+        <requirement type="package" version="1.22.0">numpy</requirement>
+        <requirement type="package" version="1.8.1">scipy</requirement>
+    </requirements>
+    <required_files>
+        <include type="literal" path="combine.py"/>
+    </required_files>
+    <command detect_errors="exit_code"><![CDATA[
+       python '${__tool_directory__}/combine.py' inputs.json
+    ]]></command>
+    <configfiles>
+        <inputs name="inputs" data_style="paths" filename="inputs.json"/>
+    </configfiles>
+    <inputs>
+        <param name="data_in" type="data" format="txt" multiple="true" label="Muspinsim Experiment Data (.dat)"/>
+        <param name="bins" type="integer" min="1" optional="true" label="Number of X Bins" help="Optional. If unset, then will use as many bins as there are x points in the smallest .dat file."/>
+    </inputs>
+    <outputs>
+        <data format="txt" name="data_out" from_work_dir="data_out.dat"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="data_in" value="low.dat"/>
+            <output name="data_out" ftype="txt" file="out_low.dat"/>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="data_in" value="low.dat,low.dat"/>
+            <output name="data_out" ftype="txt" file="out_low.dat"/>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="data_in" value="low.dat,high.dat"/>
+            <output name="data_out" ftype="txt" file="out_low_high.dat"/>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="data_in" value="low.dat,mid.dat,high.dat"/>
+            <output name="data_out" ftype="txt" file="out_all.dat"/>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="data_in" value="low.dat,mid.dat,high.dat"/>
+            <param name="bins" value="10"/>
+            <output name="data_out" ftype="txt" file="out_all_10.dat"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Utility tool for combining multiple MuSpinSim `.dat` files.
+
+        In cases where all input files have the same x values, will simply
+        take the mean value at each point. In cases where this isn't the case,
+        the data is binned, and the y values in each bin averaged. The bins are
+        chosen to be equally spaced and cover the entire x range. Note that the
+        x points in the resultant `.dat` file will be taken as the centre of
+        each bin, and so generally will not align with any of the original
+        ranges.
+
+        In cases where there is an empty bin (for example when the range covered
+        by two files do not overlap), the output will be be `nan`.
+    ]]></help>
+    <citations>
+        <citation type="doi">@MUSPINSIM_CITATION@</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 06aabd70b869 test-data/high.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/high.dat Tue Jul 18 13:26:03 2023 +0000
b
@@ -0,0 +1,3 @@
+7.000000000000000000e-02 1.500000000000000000e-01
+8.000000000000000000e-02 1.000000000000000000e-01
+9.000000000000000000e-02 0.500000000000000000e-01
b
diff -r 000000000000 -r 06aabd70b869 test-data/low.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/low.dat Tue Jul 18 13:26:03 2023 +0000
b
@@ -0,0 +1,3 @@
+0.000000000000000000e+00 5.000000000000000000e-01
+1.000000000000000000e-02 4.500000000000000000e-01
+2.000000000000000000e-02 4.000000000000000000e-01
b
diff -r 000000000000 -r 06aabd70b869 test-data/mid.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mid.dat Tue Jul 18 13:26:03 2023 +0000
b
@@ -0,0 +1,10 @@
+0.500000000000000000e-02 2.500000000000000000e-01
+1.500000000000000000e-02 2.500000000000000000e-01
+2.500000000000000000e-02 2.500000000000000000e-01
+3.500000000000000000e-02 2.500000000000000000e-01
+4.500000000000000000e-02 2.500000000000000000e-01
+5.500000000000000000e-02 2.500000000000000000e-01
+6.500000000000000000e-02 2.500000000000000000e-01
+7.500000000000000000e-02 2.500000000000000000e-01
+8.500000000000000000e-02 2.500000000000000000e-01
+9.500000000000000000e-02 2.500000000000000000e-01
b
diff -r 000000000000 -r 06aabd70b869 test-data/out_all.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_all.dat Tue Jul 18 13:26:03 2023 +0000
b
@@ -0,0 +1,3 @@
+1.583333333333333467e-02 3.500000000000000333e-01
+4.750000000000000056e-02 2.500000000000000000e-01
+7.916666666666666297e-02 1.857142857142857206e-01
b
diff -r 000000000000 -r 06aabd70b869 test-data/out_all_10.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_all_10.dat Tue Jul 18 13:26:03 2023 +0000
b
@@ -0,0 +1,10 @@
+4.749999999999999882e-03 3.750000000000000000e-01
+1.424999999999999878e-02 3.499999999999999778e-01
+2.375000000000000028e-02 3.250000000000000111e-01
+3.325000000000000178e-02 2.500000000000000000e-01
+4.274999999999999634e-02 2.500000000000000000e-01
+5.224999999999999784e-02 2.500000000000000000e-01
+6.174999999999999933e-02 2.500000000000000000e-01
+7.125000000000000777e-02 2.000000000000000111e-01
+8.074999999999998845e-02 1.749999999999999889e-01
+9.024999999999999689e-02 1.499999999999999944e-01
b
diff -r 000000000000 -r 06aabd70b869 test-data/out_low.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_low.dat Tue Jul 18 13:26:03 2023 +0000
b
@@ -0,0 +1,3 @@
+0.000000000000000000e+00 5.000000000000000000e-01
+1.000000000000000021e-02 4.500000000000000111e-01
+2.000000000000000042e-02 4.000000000000000222e-01
b
diff -r 000000000000 -r 06aabd70b869 test-data/out_low_high.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_low_high.dat Tue Jul 18 13:26:03 2023 +0000
b
@@ -0,0 +1,3 @@
+1.499999999999999944e-02 4.500000000000000111e-01
+4.499999999999999833e-02 nan
+7.499999999999999722e-02 9.999999999999999167e-02