changeset 0:2752b2dd7ad6 draft

planemo upload for repository https://github.com/MaterialsGalaxy/larch-tools/tree/main/larch_artemis commit 5be486890442dedfb327289d597e1c8110240735
author muon-spectroscopy-computational-project
date Tue, 14 Nov 2023 15:34:23 +0000
parents
children 84c8e04bc1a1
files common.py larch_artemis.py larch_artemis.xml macros.xml test-data/NaN.binary test-data/[FEFF_paths_of_test.inp].zip test-data/criteria_report.csv test-data/fit_report.txt test-data/gds.csv test-data/sp.csv test-data/test.prj test-data/test.zip
diffstat 12 files changed, 897 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/common.py	Tue Nov 14 15:34:23 2023 +0000
@@ -0,0 +1,46 @@
+from typing import Iterable
+
+from larch.io import extract_athenagroup, read_athena
+from larch.io.athena_project import AthenaGroup
+from larch.symboltable import Group
+from larch.xafs import autobk, pre_edge, xftf
+
+
+def get_group(athena_group: AthenaGroup, key: str = None) -> Group:
+    if key is None:
+        group_keys = list(athena_group._athena_groups.keys())
+        key = group_keys[0]
+    return extract_athenagroup(athena_group._athena_groups[key])
+
+
+def read_group(dat_file: str, key: str = None, xftf_params: dict = None):
+    athena_group = read_athena(dat_file)
+    group = get_group(athena_group, key)
+    bkg_parameters = group.athena_params.bkg
+    print(group.athena_params.fft)
+    print(group.athena_params.fft.__dict__)
+    pre_edge(
+        group,
+        e0=bkg_parameters.e0,
+        pre1=bkg_parameters.pre1,
+        pre2=bkg_parameters.pre2,
+        norm1=bkg_parameters.nor1,
+        norm2=bkg_parameters.nor2,
+        nnorm=bkg_parameters.nnorm,
+        make_flat=bkg_parameters.flatten,
+    )
+    autobk(group)
+    if xftf_params is None:
+        xftf(group)
+    else:
+        print(xftf_params)
+        xftf(group, **xftf_params)
+        xftf_details = Group()
+        setattr(xftf_details, "call_args", xftf_params)
+        group.xftf_details = xftf_details
+    return group
+
+
+def read_groups(dat_files: "list[str]", key: str = None) -> Iterable[Group]:
+    for dat_file in dat_files:
+        yield read_group(dat_file=dat_file, key=key)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/larch_artemis.py	Tue Nov 14 15:34:23 2023 +0000
@@ -0,0 +1,386 @@
+import csv
+import faulthandler
+import gc
+import json
+import os
+import sys
+
+from common import get_group
+
+from larch.fitting import guess, param, param_group
+from larch.io import read_athena
+from larch.symboltable import Group
+from larch.xafs import (
+    FeffPathGroup,
+    FeffitDataSet,
+    TransformGroup,
+    autobk,
+    feffit,
+    feffit_report,
+    pre_edge,
+    xftf,
+)
+
+import matplotlib
+import matplotlib.pyplot as plt
+
+import numpy as np
+
+
+def read_csv_data(input_file, id_field="id"):
+    csv_data = {}
+    try:
+        with open(input_file, encoding="utf8") as csvfile:
+            reader = csv.DictReader(csvfile, skipinitialspace=True)
+            for row in reader:
+                csv_data[int(row[id_field])] = row
+    except FileNotFoundError:
+        print("The specified file does not exist")
+    return csv_data
+
+
+def calc_with_defaults(xafs_group: Group) -> Group:
+    """Calculate pre_edge and background with default arguments"""
+    pre_edge(xafs_group)
+    autobk(xafs_group)
+    xftf(xafs_group)
+    return xafs_group
+
+
+def dict_to_gds(data_dict):
+    dgs_group = param_group()
+    for par_idx in data_dict:
+        # gds file structure:
+        gds_name = data_dict[par_idx]["name"]
+        gds_val = 0.0
+        gds_expr = ""
+        try:
+            gds_val = float(data_dict[par_idx]["value"])
+        except ValueError:
+            gds_val = 0.00
+        gds_expr = data_dict[par_idx]["expr"]
+        gds_vary = (
+            True
+            if str(data_dict[par_idx]["vary"]).strip().capitalize() == "True"
+            else False
+        )
+        one_par = None
+        if gds_vary:
+            # equivalent to a guess parameter in Demeter
+            one_par = guess(
+                name=gds_name, value=gds_val, vary=gds_vary, expr=gds_expr
+            )
+        else:
+            # equivalent to a defined parameter in Demeter
+            one_par = param(
+                name=gds_name, value=gds_val, vary=gds_vary, expr=gds_expr
+            )
+        if one_par is not None:
+            dgs_group.__setattr__(gds_name, one_par)
+    return dgs_group
+
+
+def plot_rmr(path: str, data_set, rmin, rmax):
+    plt.figure()
+    plt.plot(data_set.data.r, data_set.data.chir_mag, color="b")
+    plt.plot(data_set.data.r, data_set.data.chir_re, color="b", label="expt.")
+    plt.plot(data_set.model.r, data_set.model.chir_mag, color="r")
+    plt.plot(data_set.model.r, data_set.model.chir_re, color="r", label="fit")
+    plt.ylabel(
+        "Magnitude of Fourier Transform of "
+        r"$k^2 \cdot \chi$/$\mathrm{\AA}^{-3}$"
+    )
+    plt.xlabel(r"Radial distance/$\mathrm{\AA}$")
+    plt.xlim(0, 5)
+
+    plt.fill(
+        [rmin, rmin, rmax, rmax],
+        [-rmax, rmax, rmax, -rmax],
+        color="g",
+        alpha=0.1,
+    )
+    plt.text(rmax - 0.65, -rmax + 0.5, "fit range")
+    plt.legend()
+    plt.savefig(path, format="png")
+    plt.close("all")
+
+
+def plot_chikr(path: str, data_set, rmin, rmax, kmin, kmax):
+    fig = plt.figure(figsize=(16, 4))
+    ax1 = fig.add_subplot(121)
+    ax2 = fig.add_subplot(122)
+    ax1.plot(
+        data_set.data.k,
+        data_set.data.chi * data_set.data.k**2,
+        color="b",
+        label="expt.",
+    )
+    ax1.plot(
+        data_set.model.k,
+        data_set.model.chi * data_set.data.k**2,
+        color="r",
+        label="fit",
+    )
+    ax1.set_xlim(0, 15)
+    ax1.set_xlabel(r"$k (\mathrm{\AA})^{-1}$")
+    ax1.set_ylabel(r"$k^2$ $\chi (k)(\mathrm{\AA})^{-2}$")
+
+    ax1.fill(
+        [kmin, kmin, kmax, kmax],
+        [-rmax, rmax, rmax, -rmax],
+        color="g",
+        alpha=0.1,
+    )
+    ax1.text(kmax - 1.65, -rmax + 0.5, "fit range")
+    ax1.legend()
+
+    ax2.plot(data_set.data.r, data_set.data.chir_mag, color="b", label="expt.")
+    ax2.plot(data_set.model.r, data_set.model.chir_mag, color="r", label="fit")
+    ax2.set_xlim(0, 5)
+    ax2.set_xlabel(r"$R(\mathrm{\AA})$")
+    ax2.set_ylabel(r"$|\chi(R)|(\mathrm{\AA}^{-3})$")
+    ax2.legend(loc="upper right")
+
+    ax2.fill(
+        [rmin, rmin, rmax, rmax],
+        [-rmax, rmax, rmax, -rmax],
+        color="g",
+        alpha=0.1,
+    )
+    ax2.text(rmax - 0.65, -rmax + 0.5, "fit range")
+    fig.savefig(path, format="png")
+    plt.close("all")
+
+
+def read_gds(gds_file):
+    gds_pars = read_csv_data(gds_file)
+    dgs_group = dict_to_gds(gds_pars)
+    return dgs_group
+
+
+def read_selected_paths_list(file_name):
+    sp_dict = read_csv_data(file_name)
+    sp_list = []
+    for path_id in sp_dict:
+        filename = sp_dict[path_id]["filename"]
+        print(f"Reading selected path for file {filename}")
+        new_path = FeffPathGroup(
+            filename=filename,
+            label=sp_dict[path_id]["label"],
+            s02=sp_dict[path_id]["s02"],
+            e0=sp_dict[path_id]["e0"],
+            sigma2=sp_dict[path_id]["sigma2"],
+            deltar=sp_dict[path_id]["deltar"],
+        )
+        sp_list.append(new_path)
+    return sp_list
+
+
+def run_fit(data_group, gds, selected_paths, fv):
+    # create the transform group (prepare the fit space).
+    trans = TransformGroup(
+        fitspace=fv["fitspace"],
+        kmin=fv["kmin"],
+        kmax=fv["kmax"],
+        kweight=fv["kweight"],
+        dk=fv["dk"],
+        window=fv["window"],
+        rmin=fv["rmin"],
+        rmax=fv["rmax"],
+    )
+
+    dset = FeffitDataSet(
+        data=data_group, pathlist=selected_paths, transform=trans
+    )
+
+    out = feffit(gds, dset)
+    return dset, out
+
+
+def main(
+    prj_file: str,
+    gds_file: str,
+    sp_file: str,
+    fit_vars: dict,
+    plot_graph: bool,
+    series_id: str = "",
+) -> Group:
+    report_path = f"report/fit_report{series_id}.txt"
+    rmr_path = f"rmr/rmr{series_id}.png"
+    chikr_path = f"chikr/chikr{series_id}.png"
+
+    athena_project = read_athena(prj_file)
+    athena_group = get_group(athena_project)
+    # calc_with_defaults will hang indefinitely (>6 hours recorded) if the
+    # data contains any NaNs - consider adding an early error here if this is
+    # not fixed in Larch?
+    data_group = calc_with_defaults(athena_group)
+
+    print(f"Fitting project from file {data_group.filename}")
+
+    gds = read_gds(gds_file)
+    selected_paths = read_selected_paths_list(sp_file)
+    dset, out = run_fit(data_group, gds, selected_paths, fit_vars)
+
+    fit_report = feffit_report(out)
+    with open(report_path, "w") as fit_report_file:
+        fit_report_file.write(fit_report)
+
+    if plot_graph:
+        plot_rmr(rmr_path, dset, fit_vars["rmin"], fit_vars["rmax"])
+        plot_chikr(
+            chikr_path,
+            dset,
+            fit_vars["rmin"],
+            fit_vars["rmax"],
+            fit_vars["kmin"],
+            fit_vars["kmax"],
+        )
+    return out
+
+
+def check_threshold(
+    series_id: str,
+    threshold: float,
+    variable: str,
+    value: float,
+    early_stopping: bool = False,
+):
+    if abs(value) > threshold:
+        if early_stopping:
+            message = (
+                "ERROR: Stopping series fit after project "
+                f"{series_id} as {variable} > {threshold}"
+            )
+        else:
+            message = (
+                f"WARNING: Project {series_id} has {variable} > {threshold}"
+            )
+
+        print(message)
+        return early_stopping
+
+    return False
+
+
+def series_execution(
+    filepaths: "list[str]",
+    gds_file: str,
+    sp_file: str,
+    fit_vars: dict,
+    plot_graph: bool,
+    report_criteria: "list[dict]",
+    stop_on_error: bool,
+) -> "list[list[str]]":
+    report_criteria = input_values["execution"]["report_criteria"]
+    id_length = len(str(len(filepaths)))
+    stop = False
+    rows = [[f"{c['variable']:>12s}" for c in report_criteria]]
+    for series_index, series_file in enumerate(filepaths):
+        series_id = str(series_index).zfill(id_length)
+        try:
+            out = main(
+                series_file,
+                gds_file,
+                sp_file,
+                fit_vars,
+                plot_graph,
+                f"_{series_id}",
+            )
+        except ValueError as e:
+            rows.append([np.NaN for _ in report_criteria])
+            if stop_on_error:
+                print(
+                    f"ERROR: fitting failed for {series_id}"
+                    f" due to following error, stopping:\n{e}"
+                )
+                break
+            else:
+                print(
+                    f"WARNING: fitting failed for {series_id} due to following"
+                    f" error, continuing to next project:\n{e}"
+                )
+                continue
+
+        row = []
+        for criterium in report_criteria:
+            stop = parse_row(series_id, out, row, criterium) or stop
+        rows.append(row)
+
+        gc.collect()
+
+        if stop:
+            break
+
+    return rows
+
+
+def parse_row(series_id: str, group: Group, row: "list[str]", criterium: dict):
+    action = criterium["action"]["action"]
+    variable = criterium["variable"]
+    try:
+        value = group.__getattribute__(variable)
+    except AttributeError:
+        value = group.params[variable].value
+
+    row.append(f"{value:>12f}")
+    if action == "stop":
+        return check_threshold(
+            series_id,
+            criterium["action"]["threshold"],
+            variable,
+            value,
+            True,
+        )
+    elif action == "warn":
+        return check_threshold(
+            series_id,
+            criterium["action"]["threshold"],
+            variable,
+            value,
+            False,
+        )
+
+    return False
+
+
+if __name__ == "__main__":
+    faulthandler.enable()
+    # larch imports set this to an interactive backend, so need to change it
+    matplotlib.use("Agg")
+
+    prj_file = sys.argv[1]
+    gds_file = sys.argv[2]
+    sp_file = sys.argv[3]
+    input_values = json.load(open(sys.argv[4], "r", encoding="utf-8"))
+    fit_vars = input_values["fit_vars"]
+    plot_graph = input_values["plot_graph"]
+
+    if input_values["execution"]["execution"] == "parallel":
+        main(prj_file, gds_file, sp_file, fit_vars, plot_graph)
+
+    else:
+        if os.path.isdir(prj_file):
+            # Sort the unzipped directory, all filenames should be zero-padded
+            filepaths = [
+                os.path.join(prj_file, p) for p in os.listdir(prj_file)
+            ]
+            filepaths.sort()
+        else:
+            # DO NOT sort if we have multiple Galaxy datasets - the filenames
+            # are arbitrary but should be in order
+            filepaths = prj_file.split(",")
+
+        rows = series_execution(
+            filepaths,
+            gds_file,
+            sp_file,
+            fit_vars,
+            plot_graph,
+            input_values["execution"]["report_criteria"],
+            input_values["execution"]["stop_on_error"],
+        )
+        if len(rows[0]) > 0:
+            with open("criteria_report.csv", "w") as f:
+                writer = csv.writer(f)
+                writer.writerows(rows)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/larch_artemis.xml	Tue Nov 14 15:34:23 2023 +0000
@@ -0,0 +1,323 @@
+<tool id="larch_artemis" name="Larch Artemis" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" python_template_version="3.5" profile="22.05" license="MIT">
+    <description>generate Artemis projects from XAFS data</description>
+    <macros>
+        <!-- version of underlying tool (PEP 440) -->
+        <token name="@TOOL_VERSION@">0.9.71</token>
+        <!-- version of this tool wrapper (integer) -->
+        <token name="@WRAPPER_VERSION@">0</token>
+        <!-- citation should be updated with every underlying tool version -->
+        <!-- typical fields to update are version, month, year, and doi -->
+        <token name="@TOOL_CITATION@">10.1088/1742-6596/430/1/012007</token>
+        <xml name="series_options">
+            <param name="stop_on_error" type="boolean" label="Stop on error" help="Whether to stop gracefully or continue with subsequent projects if an error is encountered when fitting."/>
+            <repeat name="report_criteria" title="Report Criteria">
+                <param name="variable" type="text" label="Variable" help="Name of the variable of that appears in the statistics or variables sections of the report."/>
+                <conditional name="action">
+                    <param name="action" type="select" display="radio" label="Action" help="What to do with the named variable.">
+                        <option value="log" selected="true">Log</option>
+                        <option value="warn">Warn</option>
+                        <option value="stop">Early stopping</option>
+                    </param>
+                    <when value="log"/>
+                    <when value="warn">
+                        <param name="threshold" type="float" value="0.0" min="0.0" label="Warning threshold" help="In addition to logging, a warning will be printed if the absolute value of this variable goes above this threshold."/>
+                    </when>
+                    <when value="stop">
+                        <param name="threshold" type="float" value="0.0" min="0.0" label="Early stopping threshold" help="In addition to logging, execution will stop if the absolute value of this variable goes above this threshold."/>
+                    </when>
+                </conditional>
+            </repeat>
+        </xml>
+        <import>macros.xml</import>
+    </macros>
+    <creator>
+        <person givenName="Patrick" familyName="Austin" url="https://github.com/patrick-austin" identifier="https://orcid.org/0000-0002-6279-7823"/>
+    </creator>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">xraylarch</requirement>
+        <requirement type="package" version="3.5.2">matplotlib</requirement>
+        <requirement type="package" version="3.0">zip</requirement>
+    </requirements>
+    <required_files>
+        <include type="literal" path="larch_artemis.py"/>
+    </required_files>
+    <command detect_errors="exit_code"><![CDATA[
+        unzip '$feff_paths'
+        && mkdir report rmr chikr
+        #if $execution.execution=="zipped":
+            && unzip '$execution.prj_file'
+            && python '${__tool_directory__}/larch_artemis.py' prj '$gds_file' '$sp_file' '$inputs'
+        #else
+            && python '${__tool_directory__}/larch_artemis.py' '$execution.prj_file' '$gds_file' '$sp_file' '$inputs'
+        #end if
+        #if $zip_outputs:
+            && zip out_zip.zip report/* rmr/* chikr/* >/dev/null
+        #end if
+    ]]></command>
+    <configfiles>
+        <inputs name="inputs"/>
+    </configfiles>
+    <inputs>
+        <conditional name="execution" >
+            <param name="execution" type="select" display="radio" label="Execution mode" help="Whether to execute: on individual Athena projects as parallel jobs, as one job with each project fit occurring in series, or as one job using a zipped input.">
+                <option value="parallel" selected="true">Parallel</option>
+                <option value="series">Series</option>
+                <option value="zipped">Zipped</option>
+            </param>
+            <when value="parallel">
+                <param name="prj_file" type="data" format="prj" label="Athena project file" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format. If a collection of files is provided, these will be submitted and executed in parallel."/>
+            </when>
+            <when value="series">
+                <param name="prj_file" type="data" format="prj" multiple="true" label="Athena project files" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format. These will be submitted as a single job working in series, enabling early stopping criteria."/>
+                <expand macro="series_options"/>
+            </when>
+            <when value="zipped">
+                <param name="prj_file" type="data" format="zip" label="Zipped Athena outputs" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format, and zipped. These will be submitted as a single job working in series, enabling early stopping criteria."/>
+                <expand macro="series_options"/>
+            </when>
+        </conditional>
+        <param name="feff_paths" type="data" format="zip" label="FEFF paths file" help="Zipped directory of the FEFF paths."/>
+        <param name="gds_file" type="data" format="gds" label="GDS parameters file" help="File defining the fitting parameters as a `guess` (to be varied in the fit), `def` (defined by an expression evaluated throughout fitting) or `set` (evaluated at the start of fitting, then left unchanged)."/>
+        <param name="sp_file" type="data" format="sp" label="SP parameters file" help="File defining the scattering paths."/>
+        <section name="fit_vars" title="Fitting Variables">
+            <param argument="fitspace" type="select" help="Space in which the fitting is performed.">
+                <option value="r" selected="true">Real</option>
+                <option value="k">Unfiltered k-space</option>
+                <option value="q">Filtered k-space</option>
+                <option value="w">Wavelet transform</option>
+            </param>
+            <expand macro="xftf_params"/>
+        </section>
+        <param name="plot_graph" type="boolean" label="Plot graph" help="Whether to plot the data."/>
+        <param name="zip_outputs" type="boolean" label="Zip outputs" help="Whether to zip all outputs into one dataset."/>
+    </inputs>
+    <outputs>
+        <data name="out_zip" format="zip" from_work_dir="out_zip.zip" label="Zipped Artemis outputs on ${on_string}">
+            <filter>zip_outputs</filter>
+        </data>
+        <data name="fit_report" format="feffit" from_work_dir="report/fit_report.txt" label="Fit report on ${on_string}">
+            <filter>execution["execution"]=="parallel"</filter>
+            <filter>not zip_outputs</filter>
+        </data>
+        <data name="rmr" format="png" from_work_dir="rmr/rmr.png" label="RMR plot on ${on_string}">
+            <filter>execution["execution"]=="parallel"</filter>
+            <filter>plot_graph</filter>
+            <filter>not zip_outputs</filter>
+        </data>
+        <data name="chikr" format="png" from_work_dir="chikr/chikr.png" label="ChiKR plot on ${on_string}">
+            <filter>execution["execution"]=="parallel"</filter>
+            <filter>plot_graph</filter>
+            <filter>not zip_outputs</filter>
+        </data>
+        <collection name="fit_report_collection" format="feffit" type="list" label="Fit reports on ${on_string}">
+            <discover_datasets pattern="__name_and_ext__" directory="report"/>
+            <filter>execution["execution"]!="parallel"</filter>
+            <filter>not zip_outputs</filter>
+        </collection>
+        <collection name="rmr_collection" format="png" type="list" label="RMR plots on ${on_string}">
+            <discover_datasets pattern="__name_and_ext__" directory="rmr"/>
+            <filter>execution["execution"]!="parallel"</filter>
+            <filter>plot_graph</filter>
+            <filter>not zip_outputs</filter>
+        </collection>
+        <collection name="chikr_collection" format="png" type="list" label="ChiKR plots on ${on_string}">
+            <discover_datasets pattern="__name_and_ext__" directory="chikr"/>
+            <filter>execution["execution"]!="parallel"</filter>
+            <filter>plot_graph</filter>
+            <filter>not zip_outputs</filter>
+        </collection>
+        <data name="criteria_report" format="csv" from_work_dir="criteria_report.csv" label="Selected criteria from ${on_string}">
+            <filter>execution["execution"]!="parallel"</filter>
+            <filter>len(execution["report_criteria"])>0</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Parallel, no plot -->
+        <test expect_num_outputs="1">
+            <param name="execution" value="parallel"/>
+            <param name="prj_file" value="test.prj"/>
+            <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/>
+            <param name="gds_file" value="gds.csv"/>
+            <param name="sp_file" value="sp.csv"/>
+            <param name="fitspace" value="r"/>
+            <param name="kmin" value="3"/>
+            <param name="kmax" value="14"/>
+            <param name="kweight" value="2"/>
+            <param name="dk" value="1"/>
+            <param name="window" value="hanning"/>
+            <param name="rmin" value="1.4"/>
+            <param name="rmax" value="3.0"/>
+            <output name="fit_report" file="fit_report.txt" compare="re_match"/>
+        </test>
+        <!-- Parallel, plot -->
+        <test expect_num_outputs="3">
+            <param name="execution" value="parallel"/>
+            <param name="prj_file" value="test.prj"/>
+            <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/>
+            <param name="gds_file" value="gds.csv"/>
+            <param name="sp_file" value="sp.csv"/>
+            <param name="fitspace" value="r"/>
+            <param name="kmin" value="3"/>
+            <param name="kmax" value="14"/>
+            <param name="kweight" value="2"/>
+            <param name="dk" value="1"/>
+            <param name="window" value="hanning"/>
+            <param name="rmin" value="1.4"/>
+            <param name="rmax" value="3.0"/>
+            <param name="plot_graph" value="true"/>
+            <output name="fit_report" file="fit_report.txt" compare="re_match"/>
+            <output name="rmr">
+                <assert_contents>
+                    <has_size value="55000" delta="1000"/>
+                </assert_contents>
+            </output>
+            <output name="chikr">
+                <assert_contents>
+                    <has_size value="65000" delta="1000"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Series, no plot -->
+        <test expect_num_outputs="1">
+            <param name="execution" value="series"/>
+            <param name="prj_file" value="test.prj,test.prj"/>
+            <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/>
+            <param name="gds_file" value="gds.csv"/>
+            <param name="sp_file" value="sp.csv"/>
+            <param name="fitspace" value="r"/>
+            <param name="kmin" value="3"/>
+            <param name="kmax" value="14"/>
+            <param name="kweight" value="2"/>
+            <param name="dk" value="1"/>
+            <param name="window" value="hanning"/>
+            <param name="rmin" value="1.4"/>
+            <param name="rmax" value="3.0"/>
+            <output_collection name="fit_report_collection" type="list" count="2"/>
+        </test>
+        <!-- Series, plot -->
+        <test expect_num_outputs="3">
+            <param name="execution" value="series"/>
+            <param name="prj_file" value="test.prj"/>
+            <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/>
+            <param name="gds_file" value="gds.csv"/>
+            <param name="sp_file" value="sp.csv"/>
+            <param name="fitspace" value="r"/>
+            <param name="kmin" value="3"/>
+            <param name="kmax" value="14"/>
+            <param name="kweight" value="2"/>
+            <param name="dk" value="1"/>
+            <param name="window" value="hanning"/>
+            <param name="rmin" value="1.4"/>
+            <param name="rmax" value="3.0"/>
+            <param name="plot_graph" value="true"/>
+            <output_collection name="fit_report_collection" type="list" count="1"/>
+            <output_collection name="rmr_collection" type="list" count="1"/>
+            <output_collection name="chikr_collection" type="list" count="1"/>
+        </test>
+        <!-- Series, plot, continue on error -->
+        <!-- NB: Using NaNs to force an error now causes an error at an earlier stage where it is not caught, so cannot use these tests -->
+        <!-- <test expect_num_outputs="3">
+            <param name="execution" value="series"/>
+            <param name="prj_file" value="NaN.binary,test.prj"/>
+            <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/>
+            <param name="gds_file" value="gds.csv"/>
+            <param name="sp_file" value="sp.csv"/>
+            <param name="fitspace" value="r"/>
+            <param name="kmin" value="3"/>
+            <param name="kmax" value="14"/>
+            <param name="kweight" value="2"/>
+            <param name="dk" value="1"/>
+            <param name="window" value="hanning"/>
+            <param name="rmin" value="1.4"/>
+            <param name="rmax" value="3.0"/>
+            <param name="plot_graph" value="true"/>
+            <output_collection name="fit_report_collection" type="list" count="1"/>
+            <output_collection name="rmr_collection" type="list" count="1"/>
+            <output_collection name="chikr_collection" type="list" count="1"/>
+        </test> -->
+        <!-- Series, plot, stop on error -->
+        <!-- NB: Using NaNs to force an error now causes an error at an earlier stage where it is not caught, so cannot use these tests -->
+        <!-- <test expect_num_outputs="3">
+            <param name="execution" value="series"/>
+            <param name="prj_file" value="NaN.binary,test.prj"/>
+            <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/>
+            <param name="gds_file" value="gds.csv"/>
+            <param name="sp_file" value="sp.csv"/>
+            <param name="fitspace" value="r"/>
+            <param name="kmin" value="3"/>
+            <param name="kmax" value="14"/>
+            <param name="kweight" value="2"/>
+            <param name="dk" value="1"/>
+            <param name="window" value="hanning"/>
+            <param name="rmin" value="1.4"/>
+            <param name="rmax" value="3.0"/>
+            <param name="plot_graph" value="true"/>
+            <output_collection name="fit_report_collection" type="list" count="0"/>
+            <output_collection name="rmr_collection" type="list" count="0"/>
+            <output_collection name="chikr_collection" type="list" count="0"/>
+        </test> -->
+        <!-- Series, early stopping -->
+        <test expect_num_outputs="2">
+            <param name="execution" value="series"/>
+            <param name="prj_file" value="test.prj,test.prj"/>
+            <param name="variable" value="rfactor"/>
+            <param name="action" value="stop"/>
+            <param name="variable" value="alpha"/>
+            <param name="action" value="log"/>
+            <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/>
+            <param name="gds_file" value="gds.csv"/>
+            <param name="sp_file" value="sp.csv"/>
+            <param name="fitspace" value="r"/>
+            <param name="kmin" value="3"/>
+            <param name="kmax" value="14"/>
+            <param name="kweight" value="2"/>
+            <param name="dk" value="1"/>
+            <param name="window" value="hanning"/>
+            <param name="rmin" value="1.4"/>
+            <param name="rmax" value="3.0"/>
+            <output_collection name="fit_report_collection" type="list" count="1"/>
+            <output name="criteria_report" file="criteria_report.csv" compare="re_match"/>
+        </test>
+        <!-- Zipped -->
+        <test expect_num_outputs="1">
+            <param name="execution" value="zipped"/>
+            <param name="prj_file" value="test.zip"/>
+            <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/>
+            <param name="gds_file" value="gds.csv"/>
+            <param name="sp_file" value="sp.csv"/>
+            <param name="fitspace" value="r"/>
+            <param name="kmin" value="3"/>
+            <param name="kmax" value="14"/>
+            <param name="kweight" value="2"/>
+            <param name="dk" value="1"/>
+            <param name="window" value="hanning"/>
+            <param name="rmin" value="1.4"/>
+            <param name="rmax" value="3.0"/>
+            <param name="plot_graph" value="true"/>
+            <param name="zip_outputs" value="true"/>
+            <output name="out_zip">
+                <assert_contents>
+                    <has_size value="230000" delta="500"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Using Larch, perform fitting on an Athena project file, originally from the input X-ray Absorption Fine Structure (XAFS) data file.
+
+        Optionally, plot the xμ data along with RMR and ChiKR plots for visual inspection of the fit. 
+
+        By default, if multiple files or a collection is provided then these jobs will be executed independently, in parallel.
+        
+        Alternatively, series execution will submit a single job with each file being executed in order as part of the job.
+        A single zip file containing multiple Athena projects can also be extracted and run in series.
+        If a fit is unsuccessful and results in an error, the job can either be stopped there or the fit attempted for the remaining projects.
+        Criteria of interest from each individual report can be collected and logged.
+        Additionally, thresholds can be set, with either warnings being printed or the job stopping early if these are exceeded by the criteria of interest.
+        Parameters in both the "Statistics" and "Variables" sections of the report are supported.
+    ]]></help>
+    <citations>
+        <citation type="doi">@TOOL_CITATION@</citation>
+        <citation type="doi">10.1107/S0909049505012719</citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Nov 14 15:34:23 2023 +0000
@@ -0,0 +1,26 @@
+<macros>
+    <xml name="energy_limits">
+        <param name="energy_format" type="select" display="radio" label="Energy limits" help="Whether to limit the energy relative to the absorption edge or with absolute values.">
+            <option value="relative" selected="true">Relative</option>
+            <option value="absolute">Absolute</option>
+        </param>
+        <param name="energy_min" type="float" label="Minimum energy (eV)" optional="true" help="If set, data will be cropped below this value in electron volts."/>
+        <param name="energy_max" type="float" label="Maximum energy (eV)" optional="true" help="If set, data will be cropped above this value in electron volts."/>
+    </xml>
+    <xml name="xftf_params">
+        <param argument="kmin" type="float" value="0" min="0.0" help="Minimum k value."/>
+        <param argument="kmax" type="float" value="20" min="0.0" help="Maximum k value."/>
+        <param argument="kweight" type="float" value="2" help="Exponent for weighting spectra by raising k to this power."/>
+        <param argument="dk" type="float" value="4" help="Tapering parameter for Fourier Transform window."/>
+        <param argument="window" type="select" help="Fourier Transform window type.">
+            <option value="hanning">Hanning (cosine-squared taper)</option>
+            <option value="parzen">Parzen (linear taper)</option>
+            <option value="welch">Welch (quadratic taper)</option>
+            <option value="gaussian">Gaussian function window</option>
+            <option value="sine">Sine function window</option>
+            <option value="kaiser" selected="true">Kaiser-Bessel function-derived window</option>
+        </param>
+        <param argument="rmin" type="float" value="0.0" min="0.0" help="Minimum radial distance."/>
+        <param argument="rmax" type="float" value="10.0" min="0.0" help="Maximum radial distance."/>
+    </xml>
+</macros>
\ No newline at end of file
Binary file test-data/NaN.binary has changed
Binary file test-data/[FEFF_paths_of_test.inp].zip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/criteria_report.csv	Tue Nov 14 15:34:23 2023 +0000
@@ -0,0 +1,2 @@
+       alpha,     rfactor
+    0\.34\d{4},    0\.59\d{4}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fit_report.txt	Tue Nov 14 15:34:23 2023 +0000
@@ -0,0 +1,101 @@
+=================== FEFFIT RESULTS ====================
+\[\[Statistics\]\]
+   nvarys, npts       =  6, 104
+   n_independent      =  12\.205
+   chi_square         =  [\d\.]{10}
+   reduced chi_square =  [\d\.]{10}
+   r-factor           =  [\d\.]{10}
+   Akaike info crit   =  [\d\.]{10}
+   Bayesian info crit =  [\d\.]{10}
+
+\[\[Data\]\]
+   fit space          = 'r'
+   r-range            = 1\.400, 3\.000
+   k-range            = 3\.000, 14\.000
+   k window, dk       = 'hanning', 1\.000
+   paths used in fit  = \['feff/feff0001\.dat', 'feff/feff0002\.dat', 'feff/feff0003\.dat', 'feff/feff0004\.dat'\]
+   k-weight           = 2
+   epsilon_k          = Array\(mean=5\.19133e-4, std=4\.56760e-4\)
+   epsilon_r          = [\d\.]{10}
+   n_independent      = 12\.205
+
+\[\[Variables\]\]
+   alpha          = [-\s][\d\.]{10} \+/- [\d\.]{10}   \(init=  1\.00000e-7\)
+   amp            = [-\s][\d\.]{10} \+/- [\d\.]{10}   \(init=  1\.00000000\)
+   enot           = [-\s][\d\.]{10} \+/- [\d\.]{10}   \(init=  1\.00000e-7\)
+   ss             = [-\s][\d\.]{10} \+/- [\d\.]{10}   \(init=  0\.00300000\)
+   ss2            = [-\s][\d\.]{10} \+/- [\d\.]{10}   \(init=  0\.00300000\)
+   ss3            = [-\s][\d\.]{10} \+/- [\d\.]{10}  = 'ss2'
+   ssfe           = [-\s][\d\.]{10} \+/- [\d\.]{10}   \(init=  0\.00300000\)
+
+\[\[Correlations\]\]    \(unreported correlations are <  0\.100\)
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+   \w+, \w+\s+= [\s\-]0\.\d{3}
+
+
+\[\[Paths\]\]
+ = Path 'S1' = Rh K Edge
+    feffdat file = feff/feff0001\.dat, from feff run 'feff'
+    geometry  atom      x        y        z      ipot
+              Rh       0\.0000,  0\.0000,  0\.0000  0 \(absorber\)
+               C      -0\.7410,  0\.2885, -1\.7419  3
+     reff   =  [\d\.]{10}
+     degen  =  1\.00000000
+     n\*s02  = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'amp'
+     e0     = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'enot'
+     r      = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'reff \+ alpha\*reff'
+     deltar = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'alpha\*reff'
+     sigma2 = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'ss'
+
+ = Path 'S2' = Rh K Edge
+    feffdat file = feff/feff0002\.dat, from feff run 'feff'
+    geometry  atom      x        y        z      ipot
+              Rh       0\.0000,  0\.0000,  0\.0000  0 \(absorber\)
+               C       1\.4414,  0\.4279,  1\.2965  3
+     reff   =  [\d\.]{10}
+     degen  =  1\.00000000
+     n\*s02  = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'amp'
+     e0     = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'enot'
+     r      = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'reff \+ alpha\*reff'
+     deltar = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'alpha\*reff'
+     sigma2 = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'ss2'
+
+ = Path 'S3' = Rh K Edge
+    feffdat file = feff/feff0003\.dat, from feff run 'feff'
+    geometry  atom      x        y        z      ipot
+              Rh       0\.0000,  0\.0000,  0\.0000  0 \(absorber\)
+               C      -1\.6586, -0\.1094,  1\.2084  3
+     reff   =  [\d\.]{10}
+     degen  =  1\.00000000
+     n\*s02  = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'amp'
+     e0     = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'enot'
+     r      = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'reff \+ alpha\*reff'
+     deltar = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'alpha\*reff'
+     sigma2 = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'ss3'
+
+ = Path 'Fe' = Rh K Edge
+    feffdat file = feff/feff0004\.dat, from feff run 'feff'
+    geometry  atom      x        y        z      ipot
+              Rh       0\.0000,  0\.0000,  0\.0000  0 \(absorber\)
+               C       0\.6043, -2\.0001,  0\.0975  3
+     reff   =  [\d\.]{10}
+     degen  =  1\.00000000
+     n\*s02  = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'amp'
+     e0     = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'enot'
+     r      = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'reff \+ alpha\*reff'
+     deltar = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'alpha\*reff'
+     sigma2 = [-\s][\d\.]{10} \+/- [\d\.]{10}  := 'ssfe'
+
+=======================================================
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gds.csv	Tue Nov 14 15:34:23 2023 +0000
@@ -0,0 +1,8 @@
+id,name,value,expr,vary
+1,alpha,1e-07,,True
+2,amp,1.0,,True
+3,enot,1e-07,,True
+4,ss,0.003,,True
+5,ss2,0.003,,True
+6,ss3,0.003,ss2,False
+7,ssfe,0.003,,True
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sp.csv	Tue Nov 14 15:34:23 2023 +0000
@@ -0,0 +1,5 @@
+id,filename,label,s02,e0,sigma2,deltar
+1,feff/feff0001.dat,S1,amp,enot,ss,alpha*reff
+2,feff/feff0002.dat,S2,amp,enot,ss2,alpha*reff
+3,feff/feff0003.dat,S3,amp,enot,ss3,alpha*reff
+4,feff/feff0004.dat,Fe,amp,enot,ssfe,alpha*reff
\ No newline at end of file
Binary file test-data/test.prj has changed
Binary file test-data/test.zip has changed