changeset 4:c2d5bfef5b63 draft

planemo upload for repository https://github.com/MaterialsGalaxy/larch-tools/tree/main/larch_lcf commit 4814f53888643f1d3667789050914675fffb7d59
author muon-spectroscopy-computational-project
date Fri, 23 Aug 2024 14:10:44 +0000
parents 6bb927e155fc
children 90a69f15ab92
files common.py larch_lcf.py larch_lcf.xml test-data/lcf.zip
diffstat 4 files changed, 105 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/common.py	Thu Apr 11 09:02:19 2024 +0000
+++ b/common.py	Fri Aug 23 14:10:44 2024 +0000
@@ -1,3 +1,4 @@
+import re
 from typing import Iterable
 
 from larch.io import extract_athenagroup, read_athena
@@ -10,8 +11,6 @@
     group_keys = list(athena_group.keys())
     if key is None:
         key = group_keys[0]
-    else:
-        key = key.replace("-", "_")
 
     try:
         return extract_athenagroup(athena_group.groups[key])
@@ -19,7 +18,7 @@
         raise KeyError(f"{key} not in {group_keys}") from e
 
 
-def read_all_groups(dat_file: str, key: str = None) -> "dict[str, Group]":
+def read_all_groups(dat_file: str) -> "dict[str, Group]":
     # Cannot rely on do_ABC as _larch is None
     athena_group = read_athena(
         dat_file,
@@ -39,14 +38,20 @@
 
 
 def read_group(dat_file: str, key: str = None):
+    if key:
+        match_ = key.replace(" ", "_").replace("-", "_").replace(".", "_")
+    else:
+        match_ = None
+
     # Cannot rely on do_ABC as _larch is None
     athena_group = read_athena(
         dat_file,
+        match=match_,
         do_preedge=False,
         do_bkg=False,
         do_fft=False,
     )
-    group = get_group(athena_group, key)
+    group = get_group(athena_group, match_)
     pre_edge_with_defaults(group=group)
     xftf_with_defaults(group=group)
     return group
@@ -168,3 +173,7 @@
 def read_groups(dat_files: "list[str]", key: str = None) -> Iterable[Group]:
     for dat_file in dat_files:
         yield read_group(dat_file=dat_file, key=key)
+
+
+def sorting_key(filename: str) -> str:
+    return re.findall(r"\d+", filename)[-1]
--- a/larch_lcf.py	Thu Apr 11 09:02:19 2024 +0000
+++ b/larch_lcf.py	Fri Aug 23 14:10:44 2024 +0000
@@ -1,7 +1,8 @@
 import json
+import os
 import sys
 
-from common import read_group
+from common import read_group, sorting_key
 
 from larch.math.lincombo_fitting import get_label, lincombo_fit
 from larch.symboltable import Group
@@ -15,6 +16,7 @@
     fit_group: Group,
     x_limit_min: float,
     x_limit_max: float,
+    prj_id: str,
 ):
     formatted_label = ""
     for label, weight in fit_group.weights.items():
@@ -41,7 +43,7 @@
     plt.xlabel("Energy (eV)")
     plt.ylabel("normalised x$\mu$(E)")  # noqa: W605
     plt.legend()
-    plt.savefig("plot.png", format="png")
+    plt.savefig(f"plot/{prj_id}.png", format="png")
     plt.close("all")
 
 
@@ -52,12 +54,7 @@
         component_group.filename = get_label(component_group)
 
 
-if __name__ == "__main__":
-    # larch imports set this to an interactive backend, so need to change it
-    matplotlib.use("Agg")
-    prj_file = sys.argv[1]
-    input_values = json.load(open(sys.argv[2], "r", encoding="utf-8"))
-
+def main(prj_file: str, input_values: dict, prj_id: str = "plot"):
     group_to_fit = read_group(prj_file)
     set_label(group_to_fit, input_values["label"])
 
@@ -79,4 +76,29 @@
 
     x_limit_min = input_values["x_limit_min"]
     x_limit_max = input_values["x_limit_max"]
-    plot(group_to_fit, fit_group, x_limit_min, x_limit_max)
+    plot(group_to_fit, fit_group, x_limit_min, x_limit_max, prj_id)
+
+
+if __name__ == "__main__":
+    # larch imports set this to an interactive backend, so need to change it
+    matplotlib.use("Agg")
+    prj_file = sys.argv[1]
+    input_values = json.load(open(sys.argv[2], "r", encoding="utf-8"))
+
+    if input_values["execution"]["execution"] == "parallel":
+        main(prj_file, input_values)
+
+    else:
+        if os.path.isdir(prj_file):
+            # Sort the unzipped directory, all filenames should be zero-padded
+            paths = os.listdir(prj_file)
+            filepaths = [os.path.join(prj_file, p) for p in paths]
+            filepaths.sort(key=sorting_key)
+        else:
+            # DO NOT sort if we have multiple Galaxy datasets - the filenames
+            # are arbitrary but should be in order
+            filepaths = prj_file.split(",")
+
+        id_length = len(str(len(filepaths)))
+        for i, prj_file in enumerate(filepaths):
+            main(prj_file, input_values, str(i).zfill(id_length))
--- a/larch_lcf.xml	Thu Apr 11 09:02:19 2024 +0000
+++ b/larch_lcf.xml	Fri Aug 23 14:10:44 2024 +0000
@@ -2,7 +2,7 @@
     <description>perform linear combination fit on XAS data</description>
     <macros>
         <!-- version of underlying tool (PEP 440) -->
-        <token name="@TOOL_VERSION@">0.9.75</token>
+        <token name="@TOOL_VERSION@">0.9.80</token>
         <!-- version of this tool wrapper (integer) -->
         <token name="@WRAPPER_VERSION@">0</token>
         <!-- citation should be updated with every underlying tool version -->
@@ -22,14 +22,35 @@
         <include type="literal" path="common.py"/>
     </required_files>
     <command detect_errors="exit_code"><![CDATA[
-        python '${__tool_directory__}/larch_lcf.py' '$prj_file' '$inputs'
+        mkdir plot &&
+        #if $execution.execution=="zipped":
+            unzip '$execution.prj_file' &&
+            python '${__tool_directory__}/larch_lcf.py' prj '$inputs'
+        #else
+            python '${__tool_directory__}/larch_lcf.py' '$execution.prj_file' '$inputs'
+        #end if
     ]]></command>
     <configfiles>
         <inputs name="inputs" data_style="paths"/>
     </configfiles>
     <inputs>
         <param name="label" type="text" optional="true" label="Main data label" help="The label to use for the main data (target of the fit). If unset, the label will be taken from the Athena project metadata."/>
-        <param name="prj_file" type="data" format="prj" label="Athena project to fit" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format, to be fit."/>
+        <conditional name="execution" >
+            <param name="execution" type="select" display="radio" label="Execution mode" help="Whether to execute: on individual Athena projects as parallel jobs, as one job with each project fit occurring in series, or as one job using a zipped input.">
+                <option value="parallel" selected="true">Parallel</option>
+                <option value="series">Series</option>
+                <option value="zipped">Zipped</option>
+            </param>
+            <when value="parallel">
+                <param name="prj_file" type="data" format="prj" label="Athena project file" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format. If a collection of files is provided, these will be submitted and executed in parallel."/>
+            </when>
+            <when value="series">
+                <param name="prj_file" type="data" format="prj" multiple="true" label="Athena project files" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format. These will be submitted as a single job working in series, enabling transition search."/>
+            </when>
+            <when value="zipped">
+                <param name="prj_file" type="data" format="zip" label="Zipped Athena outputs" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format, and zipped. These will be submitted as a single job working in series, enabling transition search."/>
+            </when>
+        </conditional>
         <repeat name="components" title="Fit components">
             <param name="label" type="text" optional="true" label="Component label" help="The label to use for this component. If unset, the label will be taken from the Athena project metadata."/>
             <param name="component_file" type="data" format="prj" label="Athena project to fit" help="Reference X-ray Absorption Fine Structure (XAFS) data, in Athena project format, to be used as components in the fit."/>
@@ -41,7 +62,13 @@
         <expand macro="plot_limits_energy"/>
     </inputs>
     <outputs>
-        <data name="plot" format="png" from_work_dir="plot.png"/>
+        <data name="plot" format="png" from_work_dir="plot/plot.png">
+            <filter>execution["execution"]=="parallel"</filter>
+        </data>
+        <collection name="plot_collection" format="png" type="list" label="Larch LCF on series data ${on_string}">
+            <discover_datasets pattern="__name_and_ext__" directory="plot"/>
+            <filter>execution["execution"]!="parallel"</filter>
+        </collection>
     </outputs>
     <tests>
         <!-- 1 -->
@@ -62,6 +89,36 @@
                 <has_text text="Goodness of fit (rfactor): 0.532130%"/>
             </assert_stdout>
         </test>
+        <!-- 2: series -->
+        <test expect_num_outputs="1">
+            <param name="execution" value="series"/>
+            <param name="prj_file" value="PtSn_OCO_Abu_1_29204.5.prj"/>
+            <param name="component_file" value="Sn_foil_29200.prj"/>
+            <param name="component_file" value="SnO2_29206.prj"/>
+            <param name="energy_min" value="29190"/>
+            <param name="energy_max" value="29230"/>
+            <param name="x_limit_min" value="29190"/>
+            <param name="x_limit_max" value="29230"/>
+            <output_collection name="plot_collection" type="list" count="1"/>
+            <assert_stdout>
+                <has_text text="Goodness of fit (rfactor): 0.532130%"/>
+            </assert_stdout>
+        </test>
+        <!-- 3: zipped -->
+        <test expect_num_outputs="1">
+            <param name="execution" value="zipped"/>
+            <param name="prj_file" value="lcf.zip"/>
+            <param name="component_file" value="Sn_foil_29200.prj"/>
+            <param name="component_file" value="SnO2_29206.prj"/>
+            <param name="energy_min" value="29190"/>
+            <param name="energy_max" value="29230"/>
+            <param name="x_limit_min" value="29190"/>
+            <param name="x_limit_max" value="29230"/>
+            <output_collection name="plot_collection" type="list" count="2"/>
+            <assert_stdout>
+                <has_text text="Goodness of fit (rfactor): 0.532130%"/>
+            </assert_stdout>
+        </test>
     </tests>
     <help><![CDATA[
         Performs a Linear Combination Fit (LCF) on an Athena project, using a number of other projects as components to the fit.
Binary file test-data/lcf.zip has changed