Mercurial > repos > muon-spectroscopy-computational-project > larch_artemis
changeset 0:2752b2dd7ad6 draft
planemo upload for repository https://github.com/MaterialsGalaxy/larch-tools/tree/main/larch_artemis commit 5be486890442dedfb327289d597e1c8110240735
author | muon-spectroscopy-computational-project |
---|---|
date | Tue, 14 Nov 2023 15:34:23 +0000 |
parents | |
children | 84c8e04bc1a1 |
files | common.py larch_artemis.py larch_artemis.xml macros.xml test-data/NaN.binary test-data/[FEFF_paths_of_test.inp].zip test-data/criteria_report.csv test-data/fit_report.txt test-data/gds.csv test-data/sp.csv test-data/test.prj test-data/test.zip |
diffstat | 12 files changed, 897 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/common.py Tue Nov 14 15:34:23 2023 +0000 @@ -0,0 +1,46 @@ +from typing import Iterable + +from larch.io import extract_athenagroup, read_athena +from larch.io.athena_project import AthenaGroup +from larch.symboltable import Group +from larch.xafs import autobk, pre_edge, xftf + + +def get_group(athena_group: AthenaGroup, key: str = None) -> Group: + if key is None: + group_keys = list(athena_group._athena_groups.keys()) + key = group_keys[0] + return extract_athenagroup(athena_group._athena_groups[key]) + + +def read_group(dat_file: str, key: str = None, xftf_params: dict = None): + athena_group = read_athena(dat_file) + group = get_group(athena_group, key) + bkg_parameters = group.athena_params.bkg + print(group.athena_params.fft) + print(group.athena_params.fft.__dict__) + pre_edge( + group, + e0=bkg_parameters.e0, + pre1=bkg_parameters.pre1, + pre2=bkg_parameters.pre2, + norm1=bkg_parameters.nor1, + norm2=bkg_parameters.nor2, + nnorm=bkg_parameters.nnorm, + make_flat=bkg_parameters.flatten, + ) + autobk(group) + if xftf_params is None: + xftf(group) + else: + print(xftf_params) + xftf(group, **xftf_params) + xftf_details = Group() + setattr(xftf_details, "call_args", xftf_params) + group.xftf_details = xftf_details + return group + + +def read_groups(dat_files: "list[str]", key: str = None) -> Iterable[Group]: + for dat_file in dat_files: + yield read_group(dat_file=dat_file, key=key)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/larch_artemis.py Tue Nov 14 15:34:23 2023 +0000 @@ -0,0 +1,386 @@ +import csv +import faulthandler +import gc +import json +import os +import sys + +from common import get_group + +from larch.fitting import guess, param, param_group +from larch.io import read_athena +from larch.symboltable import Group +from larch.xafs import ( + FeffPathGroup, + FeffitDataSet, + TransformGroup, + autobk, + feffit, + feffit_report, + pre_edge, + xftf, +) + +import matplotlib +import matplotlib.pyplot as plt + +import numpy as np + + +def read_csv_data(input_file, id_field="id"): + csv_data = {} + try: + with open(input_file, encoding="utf8") as csvfile: + reader = csv.DictReader(csvfile, skipinitialspace=True) + for row in reader: + csv_data[int(row[id_field])] = row + except FileNotFoundError: + print("The specified file does not exist") + return csv_data + + +def calc_with_defaults(xafs_group: Group) -> Group: + """Calculate pre_edge and background with default arguments""" + pre_edge(xafs_group) + autobk(xafs_group) + xftf(xafs_group) + return xafs_group + + +def dict_to_gds(data_dict): + dgs_group = param_group() + for par_idx in data_dict: + # gds file structure: + gds_name = data_dict[par_idx]["name"] + gds_val = 0.0 + gds_expr = "" + try: + gds_val = float(data_dict[par_idx]["value"]) + except ValueError: + gds_val = 0.00 + gds_expr = data_dict[par_idx]["expr"] + gds_vary = ( + True + if str(data_dict[par_idx]["vary"]).strip().capitalize() == "True" + else False + ) + one_par = None + if gds_vary: + # equivalent to a guess parameter in Demeter + one_par = guess( + name=gds_name, value=gds_val, vary=gds_vary, expr=gds_expr + ) + else: + # equivalent to a defined parameter in Demeter + one_par = param( + name=gds_name, value=gds_val, vary=gds_vary, expr=gds_expr + ) + if one_par is not None: + dgs_group.__setattr__(gds_name, one_par) + return dgs_group + + +def plot_rmr(path: str, data_set, rmin, rmax): + plt.figure() + plt.plot(data_set.data.r, data_set.data.chir_mag, color="b") + plt.plot(data_set.data.r, data_set.data.chir_re, color="b", label="expt.") + plt.plot(data_set.model.r, data_set.model.chir_mag, color="r") + plt.plot(data_set.model.r, data_set.model.chir_re, color="r", label="fit") + plt.ylabel( + "Magnitude of Fourier Transform of " + r"$k^2 \cdot \chi$/$\mathrm{\AA}^{-3}$" + ) + plt.xlabel(r"Radial distance/$\mathrm{\AA}$") + plt.xlim(0, 5) + + plt.fill( + [rmin, rmin, rmax, rmax], + [-rmax, rmax, rmax, -rmax], + color="g", + alpha=0.1, + ) + plt.text(rmax - 0.65, -rmax + 0.5, "fit range") + plt.legend() + plt.savefig(path, format="png") + plt.close("all") + + +def plot_chikr(path: str, data_set, rmin, rmax, kmin, kmax): + fig = plt.figure(figsize=(16, 4)) + ax1 = fig.add_subplot(121) + ax2 = fig.add_subplot(122) + ax1.plot( + data_set.data.k, + data_set.data.chi * data_set.data.k**2, + color="b", + label="expt.", + ) + ax1.plot( + data_set.model.k, + data_set.model.chi * data_set.data.k**2, + color="r", + label="fit", + ) + ax1.set_xlim(0, 15) + ax1.set_xlabel(r"$k (\mathrm{\AA})^{-1}$") + ax1.set_ylabel(r"$k^2$ $\chi (k)(\mathrm{\AA})^{-2}$") + + ax1.fill( + [kmin, kmin, kmax, kmax], + [-rmax, rmax, rmax, -rmax], + color="g", + alpha=0.1, + ) + ax1.text(kmax - 1.65, -rmax + 0.5, "fit range") + ax1.legend() + + ax2.plot(data_set.data.r, data_set.data.chir_mag, color="b", label="expt.") + ax2.plot(data_set.model.r, data_set.model.chir_mag, color="r", label="fit") + ax2.set_xlim(0, 5) + ax2.set_xlabel(r"$R(\mathrm{\AA})$") + ax2.set_ylabel(r"$|\chi(R)|(\mathrm{\AA}^{-3})$") + ax2.legend(loc="upper right") + + ax2.fill( + [rmin, rmin, rmax, rmax], + [-rmax, rmax, rmax, -rmax], + color="g", + alpha=0.1, + ) + ax2.text(rmax - 0.65, -rmax + 0.5, "fit range") + fig.savefig(path, format="png") + plt.close("all") + + +def read_gds(gds_file): + gds_pars = read_csv_data(gds_file) + dgs_group = dict_to_gds(gds_pars) + return dgs_group + + +def read_selected_paths_list(file_name): + sp_dict = read_csv_data(file_name) + sp_list = [] + for path_id in sp_dict: + filename = sp_dict[path_id]["filename"] + print(f"Reading selected path for file {filename}") + new_path = FeffPathGroup( + filename=filename, + label=sp_dict[path_id]["label"], + s02=sp_dict[path_id]["s02"], + e0=sp_dict[path_id]["e0"], + sigma2=sp_dict[path_id]["sigma2"], + deltar=sp_dict[path_id]["deltar"], + ) + sp_list.append(new_path) + return sp_list + + +def run_fit(data_group, gds, selected_paths, fv): + # create the transform group (prepare the fit space). + trans = TransformGroup( + fitspace=fv["fitspace"], + kmin=fv["kmin"], + kmax=fv["kmax"], + kweight=fv["kweight"], + dk=fv["dk"], + window=fv["window"], + rmin=fv["rmin"], + rmax=fv["rmax"], + ) + + dset = FeffitDataSet( + data=data_group, pathlist=selected_paths, transform=trans + ) + + out = feffit(gds, dset) + return dset, out + + +def main( + prj_file: str, + gds_file: str, + sp_file: str, + fit_vars: dict, + plot_graph: bool, + series_id: str = "", +) -> Group: + report_path = f"report/fit_report{series_id}.txt" + rmr_path = f"rmr/rmr{series_id}.png" + chikr_path = f"chikr/chikr{series_id}.png" + + athena_project = read_athena(prj_file) + athena_group = get_group(athena_project) + # calc_with_defaults will hang indefinitely (>6 hours recorded) if the + # data contains any NaNs - consider adding an early error here if this is + # not fixed in Larch? + data_group = calc_with_defaults(athena_group) + + print(f"Fitting project from file {data_group.filename}") + + gds = read_gds(gds_file) + selected_paths = read_selected_paths_list(sp_file) + dset, out = run_fit(data_group, gds, selected_paths, fit_vars) + + fit_report = feffit_report(out) + with open(report_path, "w") as fit_report_file: + fit_report_file.write(fit_report) + + if plot_graph: + plot_rmr(rmr_path, dset, fit_vars["rmin"], fit_vars["rmax"]) + plot_chikr( + chikr_path, + dset, + fit_vars["rmin"], + fit_vars["rmax"], + fit_vars["kmin"], + fit_vars["kmax"], + ) + return out + + +def check_threshold( + series_id: str, + threshold: float, + variable: str, + value: float, + early_stopping: bool = False, +): + if abs(value) > threshold: + if early_stopping: + message = ( + "ERROR: Stopping series fit after project " + f"{series_id} as {variable} > {threshold}" + ) + else: + message = ( + f"WARNING: Project {series_id} has {variable} > {threshold}" + ) + + print(message) + return early_stopping + + return False + + +def series_execution( + filepaths: "list[str]", + gds_file: str, + sp_file: str, + fit_vars: dict, + plot_graph: bool, + report_criteria: "list[dict]", + stop_on_error: bool, +) -> "list[list[str]]": + report_criteria = input_values["execution"]["report_criteria"] + id_length = len(str(len(filepaths))) + stop = False + rows = [[f"{c['variable']:>12s}" for c in report_criteria]] + for series_index, series_file in enumerate(filepaths): + series_id = str(series_index).zfill(id_length) + try: + out = main( + series_file, + gds_file, + sp_file, + fit_vars, + plot_graph, + f"_{series_id}", + ) + except ValueError as e: + rows.append([np.NaN for _ in report_criteria]) + if stop_on_error: + print( + f"ERROR: fitting failed for {series_id}" + f" due to following error, stopping:\n{e}" + ) + break + else: + print( + f"WARNING: fitting failed for {series_id} due to following" + f" error, continuing to next project:\n{e}" + ) + continue + + row = [] + for criterium in report_criteria: + stop = parse_row(series_id, out, row, criterium) or stop + rows.append(row) + + gc.collect() + + if stop: + break + + return rows + + +def parse_row(series_id: str, group: Group, row: "list[str]", criterium: dict): + action = criterium["action"]["action"] + variable = criterium["variable"] + try: + value = group.__getattribute__(variable) + except AttributeError: + value = group.params[variable].value + + row.append(f"{value:>12f}") + if action == "stop": + return check_threshold( + series_id, + criterium["action"]["threshold"], + variable, + value, + True, + ) + elif action == "warn": + return check_threshold( + series_id, + criterium["action"]["threshold"], + variable, + value, + False, + ) + + return False + + +if __name__ == "__main__": + faulthandler.enable() + # larch imports set this to an interactive backend, so need to change it + matplotlib.use("Agg") + + prj_file = sys.argv[1] + gds_file = sys.argv[2] + sp_file = sys.argv[3] + input_values = json.load(open(sys.argv[4], "r", encoding="utf-8")) + fit_vars = input_values["fit_vars"] + plot_graph = input_values["plot_graph"] + + if input_values["execution"]["execution"] == "parallel": + main(prj_file, gds_file, sp_file, fit_vars, plot_graph) + + else: + if os.path.isdir(prj_file): + # Sort the unzipped directory, all filenames should be zero-padded + filepaths = [ + os.path.join(prj_file, p) for p in os.listdir(prj_file) + ] + filepaths.sort() + else: + # DO NOT sort if we have multiple Galaxy datasets - the filenames + # are arbitrary but should be in order + filepaths = prj_file.split(",") + + rows = series_execution( + filepaths, + gds_file, + sp_file, + fit_vars, + plot_graph, + input_values["execution"]["report_criteria"], + input_values["execution"]["stop_on_error"], + ) + if len(rows[0]) > 0: + with open("criteria_report.csv", "w") as f: + writer = csv.writer(f) + writer.writerows(rows)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/larch_artemis.xml Tue Nov 14 15:34:23 2023 +0000 @@ -0,0 +1,323 @@ +<tool id="larch_artemis" name="Larch Artemis" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" python_template_version="3.5" profile="22.05" license="MIT"> + <description>generate Artemis projects from XAFS data</description> + <macros> + <!-- version of underlying tool (PEP 440) --> + <token name="@TOOL_VERSION@">0.9.71</token> + <!-- version of this tool wrapper (integer) --> + <token name="@WRAPPER_VERSION@">0</token> + <!-- citation should be updated with every underlying tool version --> + <!-- typical fields to update are version, month, year, and doi --> + <token name="@TOOL_CITATION@">10.1088/1742-6596/430/1/012007</token> + <xml name="series_options"> + <param name="stop_on_error" type="boolean" label="Stop on error" help="Whether to stop gracefully or continue with subsequent projects if an error is encountered when fitting."/> + <repeat name="report_criteria" title="Report Criteria"> + <param name="variable" type="text" label="Variable" help="Name of the variable of that appears in the statistics or variables sections of the report."/> + <conditional name="action"> + <param name="action" type="select" display="radio" label="Action" help="What to do with the named variable."> + <option value="log" selected="true">Log</option> + <option value="warn">Warn</option> + <option value="stop">Early stopping</option> + </param> + <when value="log"/> + <when value="warn"> + <param name="threshold" type="float" value="0.0" min="0.0" label="Warning threshold" help="In addition to logging, a warning will be printed if the absolute value of this variable goes above this threshold."/> + </when> + <when value="stop"> + <param name="threshold" type="float" value="0.0" min="0.0" label="Early stopping threshold" help="In addition to logging, execution will stop if the absolute value of this variable goes above this threshold."/> + </when> + </conditional> + </repeat> + </xml> + <import>macros.xml</import> + </macros> + <creator> + <person givenName="Patrick" familyName="Austin" url="https://github.com/patrick-austin" identifier="https://orcid.org/0000-0002-6279-7823"/> + </creator> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">xraylarch</requirement> + <requirement type="package" version="3.5.2">matplotlib</requirement> + <requirement type="package" version="3.0">zip</requirement> + </requirements> + <required_files> + <include type="literal" path="larch_artemis.py"/> + </required_files> + <command detect_errors="exit_code"><![CDATA[ + unzip '$feff_paths' + && mkdir report rmr chikr + #if $execution.execution=="zipped": + && unzip '$execution.prj_file' + && python '${__tool_directory__}/larch_artemis.py' prj '$gds_file' '$sp_file' '$inputs' + #else + && python '${__tool_directory__}/larch_artemis.py' '$execution.prj_file' '$gds_file' '$sp_file' '$inputs' + #end if + #if $zip_outputs: + && zip out_zip.zip report/* rmr/* chikr/* >/dev/null + #end if + ]]></command> + <configfiles> + <inputs name="inputs"/> + </configfiles> + <inputs> + <conditional name="execution" > + <param name="execution" type="select" display="radio" label="Execution mode" help="Whether to execute: on individual Athena projects as parallel jobs, as one job with each project fit occurring in series, or as one job using a zipped input."> + <option value="parallel" selected="true">Parallel</option> + <option value="series">Series</option> + <option value="zipped">Zipped</option> + </param> + <when value="parallel"> + <param name="prj_file" type="data" format="prj" label="Athena project file" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format. If a collection of files is provided, these will be submitted and executed in parallel."/> + </when> + <when value="series"> + <param name="prj_file" type="data" format="prj" multiple="true" label="Athena project files" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format. These will be submitted as a single job working in series, enabling early stopping criteria."/> + <expand macro="series_options"/> + </when> + <when value="zipped"> + <param name="prj_file" type="data" format="zip" label="Zipped Athena outputs" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format, and zipped. These will be submitted as a single job working in series, enabling early stopping criteria."/> + <expand macro="series_options"/> + </when> + </conditional> + <param name="feff_paths" type="data" format="zip" label="FEFF paths file" help="Zipped directory of the FEFF paths."/> + <param name="gds_file" type="data" format="gds" label="GDS parameters file" help="File defining the fitting parameters as a `guess` (to be varied in the fit), `def` (defined by an expression evaluated throughout fitting) or `set` (evaluated at the start of fitting, then left unchanged)."/> + <param name="sp_file" type="data" format="sp" label="SP parameters file" help="File defining the scattering paths."/> + <section name="fit_vars" title="Fitting Variables"> + <param argument="fitspace" type="select" help="Space in which the fitting is performed."> + <option value="r" selected="true">Real</option> + <option value="k">Unfiltered k-space</option> + <option value="q">Filtered k-space</option> + <option value="w">Wavelet transform</option> + </param> + <expand macro="xftf_params"/> + </section> + <param name="plot_graph" type="boolean" label="Plot graph" help="Whether to plot the data."/> + <param name="zip_outputs" type="boolean" label="Zip outputs" help="Whether to zip all outputs into one dataset."/> + </inputs> + <outputs> + <data name="out_zip" format="zip" from_work_dir="out_zip.zip" label="Zipped Artemis outputs on ${on_string}"> + <filter>zip_outputs</filter> + </data> + <data name="fit_report" format="feffit" from_work_dir="report/fit_report.txt" label="Fit report on ${on_string}"> + <filter>execution["execution"]=="parallel"</filter> + <filter>not zip_outputs</filter> + </data> + <data name="rmr" format="png" from_work_dir="rmr/rmr.png" label="RMR plot on ${on_string}"> + <filter>execution["execution"]=="parallel"</filter> + <filter>plot_graph</filter> + <filter>not zip_outputs</filter> + </data> + <data name="chikr" format="png" from_work_dir="chikr/chikr.png" label="ChiKR plot on ${on_string}"> + <filter>execution["execution"]=="parallel"</filter> + <filter>plot_graph</filter> + <filter>not zip_outputs</filter> + </data> + <collection name="fit_report_collection" format="feffit" type="list" label="Fit reports on ${on_string}"> + <discover_datasets pattern="__name_and_ext__" directory="report"/> + <filter>execution["execution"]!="parallel"</filter> + <filter>not zip_outputs</filter> + </collection> + <collection name="rmr_collection" format="png" type="list" label="RMR plots on ${on_string}"> + <discover_datasets pattern="__name_and_ext__" directory="rmr"/> + <filter>execution["execution"]!="parallel"</filter> + <filter>plot_graph</filter> + <filter>not zip_outputs</filter> + </collection> + <collection name="chikr_collection" format="png" type="list" label="ChiKR plots on ${on_string}"> + <discover_datasets pattern="__name_and_ext__" directory="chikr"/> + <filter>execution["execution"]!="parallel"</filter> + <filter>plot_graph</filter> + <filter>not zip_outputs</filter> + </collection> + <data name="criteria_report" format="csv" from_work_dir="criteria_report.csv" label="Selected criteria from ${on_string}"> + <filter>execution["execution"]!="parallel"</filter> + <filter>len(execution["report_criteria"])>0</filter> + </data> + </outputs> + <tests> + <!-- Parallel, no plot --> + <test expect_num_outputs="1"> + <param name="execution" value="parallel"/> + <param name="prj_file" value="test.prj"/> + <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/> + <param name="gds_file" value="gds.csv"/> + <param name="sp_file" value="sp.csv"/> + <param name="fitspace" value="r"/> + <param name="kmin" value="3"/> + <param name="kmax" value="14"/> + <param name="kweight" value="2"/> + <param name="dk" value="1"/> + <param name="window" value="hanning"/> + <param name="rmin" value="1.4"/> + <param name="rmax" value="3.0"/> + <output name="fit_report" file="fit_report.txt" compare="re_match"/> + </test> + <!-- Parallel, plot --> + <test expect_num_outputs="3"> + <param name="execution" value="parallel"/> + <param name="prj_file" value="test.prj"/> + <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/> + <param name="gds_file" value="gds.csv"/> + <param name="sp_file" value="sp.csv"/> + <param name="fitspace" value="r"/> + <param name="kmin" value="3"/> + <param name="kmax" value="14"/> + <param name="kweight" value="2"/> + <param name="dk" value="1"/> + <param name="window" value="hanning"/> + <param name="rmin" value="1.4"/> + <param name="rmax" value="3.0"/> + <param name="plot_graph" value="true"/> + <output name="fit_report" file="fit_report.txt" compare="re_match"/> + <output name="rmr"> + <assert_contents> + <has_size value="55000" delta="1000"/> + </assert_contents> + </output> + <output name="chikr"> + <assert_contents> + <has_size value="65000" delta="1000"/> + </assert_contents> + </output> + </test> + <!-- Series, no plot --> + <test expect_num_outputs="1"> + <param name="execution" value="series"/> + <param name="prj_file" value="test.prj,test.prj"/> + <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/> + <param name="gds_file" value="gds.csv"/> + <param name="sp_file" value="sp.csv"/> + <param name="fitspace" value="r"/> + <param name="kmin" value="3"/> + <param name="kmax" value="14"/> + <param name="kweight" value="2"/> + <param name="dk" value="1"/> + <param name="window" value="hanning"/> + <param name="rmin" value="1.4"/> + <param name="rmax" value="3.0"/> + <output_collection name="fit_report_collection" type="list" count="2"/> + </test> + <!-- Series, plot --> + <test expect_num_outputs="3"> + <param name="execution" value="series"/> + <param name="prj_file" value="test.prj"/> + <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/> + <param name="gds_file" value="gds.csv"/> + <param name="sp_file" value="sp.csv"/> + <param name="fitspace" value="r"/> + <param name="kmin" value="3"/> + <param name="kmax" value="14"/> + <param name="kweight" value="2"/> + <param name="dk" value="1"/> + <param name="window" value="hanning"/> + <param name="rmin" value="1.4"/> + <param name="rmax" value="3.0"/> + <param name="plot_graph" value="true"/> + <output_collection name="fit_report_collection" type="list" count="1"/> + <output_collection name="rmr_collection" type="list" count="1"/> + <output_collection name="chikr_collection" type="list" count="1"/> + </test> + <!-- Series, plot, continue on error --> + <!-- NB: Using NaNs to force an error now causes an error at an earlier stage where it is not caught, so cannot use these tests --> + <!-- <test expect_num_outputs="3"> + <param name="execution" value="series"/> + <param name="prj_file" value="NaN.binary,test.prj"/> + <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/> + <param name="gds_file" value="gds.csv"/> + <param name="sp_file" value="sp.csv"/> + <param name="fitspace" value="r"/> + <param name="kmin" value="3"/> + <param name="kmax" value="14"/> + <param name="kweight" value="2"/> + <param name="dk" value="1"/> + <param name="window" value="hanning"/> + <param name="rmin" value="1.4"/> + <param name="rmax" value="3.0"/> + <param name="plot_graph" value="true"/> + <output_collection name="fit_report_collection" type="list" count="1"/> + <output_collection name="rmr_collection" type="list" count="1"/> + <output_collection name="chikr_collection" type="list" count="1"/> + </test> --> + <!-- Series, plot, stop on error --> + <!-- NB: Using NaNs to force an error now causes an error at an earlier stage where it is not caught, so cannot use these tests --> + <!-- <test expect_num_outputs="3"> + <param name="execution" value="series"/> + <param name="prj_file" value="NaN.binary,test.prj"/> + <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/> + <param name="gds_file" value="gds.csv"/> + <param name="sp_file" value="sp.csv"/> + <param name="fitspace" value="r"/> + <param name="kmin" value="3"/> + <param name="kmax" value="14"/> + <param name="kweight" value="2"/> + <param name="dk" value="1"/> + <param name="window" value="hanning"/> + <param name="rmin" value="1.4"/> + <param name="rmax" value="3.0"/> + <param name="plot_graph" value="true"/> + <output_collection name="fit_report_collection" type="list" count="0"/> + <output_collection name="rmr_collection" type="list" count="0"/> + <output_collection name="chikr_collection" type="list" count="0"/> + </test> --> + <!-- Series, early stopping --> + <test expect_num_outputs="2"> + <param name="execution" value="series"/> + <param name="prj_file" value="test.prj,test.prj"/> + <param name="variable" value="rfactor"/> + <param name="action" value="stop"/> + <param name="variable" value="alpha"/> + <param name="action" value="log"/> + <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/> + <param name="gds_file" value="gds.csv"/> + <param name="sp_file" value="sp.csv"/> + <param name="fitspace" value="r"/> + <param name="kmin" value="3"/> + <param name="kmax" value="14"/> + <param name="kweight" value="2"/> + <param name="dk" value="1"/> + <param name="window" value="hanning"/> + <param name="rmin" value="1.4"/> + <param name="rmax" value="3.0"/> + <output_collection name="fit_report_collection" type="list" count="1"/> + <output name="criteria_report" file="criteria_report.csv" compare="re_match"/> + </test> + <!-- Zipped --> + <test expect_num_outputs="1"> + <param name="execution" value="zipped"/> + <param name="prj_file" value="test.zip"/> + <param name="feff_paths" value="[FEFF_paths_of_test.inp].zip"/> + <param name="gds_file" value="gds.csv"/> + <param name="sp_file" value="sp.csv"/> + <param name="fitspace" value="r"/> + <param name="kmin" value="3"/> + <param name="kmax" value="14"/> + <param name="kweight" value="2"/> + <param name="dk" value="1"/> + <param name="window" value="hanning"/> + <param name="rmin" value="1.4"/> + <param name="rmax" value="3.0"/> + <param name="plot_graph" value="true"/> + <param name="zip_outputs" value="true"/> + <output name="out_zip"> + <assert_contents> + <has_size value="230000" delta="500"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + Using Larch, perform fitting on an Athena project file, originally from the input X-ray Absorption Fine Structure (XAFS) data file. + + Optionally, plot the xμ data along with RMR and ChiKR plots for visual inspection of the fit. + + By default, if multiple files or a collection is provided then these jobs will be executed independently, in parallel. + + Alternatively, series execution will submit a single job with each file being executed in order as part of the job. + A single zip file containing multiple Athena projects can also be extracted and run in series. + If a fit is unsuccessful and results in an error, the job can either be stopped there or the fit attempted for the remaining projects. + Criteria of interest from each individual report can be collected and logged. + Additionally, thresholds can be set, with either warnings being printed or the job stopping early if these are exceeded by the criteria of interest. + Parameters in both the "Statistics" and "Variables" sections of the report are supported. + ]]></help> + <citations> + <citation type="doi">@TOOL_CITATION@</citation> + <citation type="doi">10.1107/S0909049505012719</citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Nov 14 15:34:23 2023 +0000 @@ -0,0 +1,26 @@ +<macros> + <xml name="energy_limits"> + <param name="energy_format" type="select" display="radio" label="Energy limits" help="Whether to limit the energy relative to the absorption edge or with absolute values."> + <option value="relative" selected="true">Relative</option> + <option value="absolute">Absolute</option> + </param> + <param name="energy_min" type="float" label="Minimum energy (eV)" optional="true" help="If set, data will be cropped below this value in electron volts."/> + <param name="energy_max" type="float" label="Maximum energy (eV)" optional="true" help="If set, data will be cropped above this value in electron volts."/> + </xml> + <xml name="xftf_params"> + <param argument="kmin" type="float" value="0" min="0.0" help="Minimum k value."/> + <param argument="kmax" type="float" value="20" min="0.0" help="Maximum k value."/> + <param argument="kweight" type="float" value="2" help="Exponent for weighting spectra by raising k to this power."/> + <param argument="dk" type="float" value="4" help="Tapering parameter for Fourier Transform window."/> + <param argument="window" type="select" help="Fourier Transform window type."> + <option value="hanning">Hanning (cosine-squared taper)</option> + <option value="parzen">Parzen (linear taper)</option> + <option value="welch">Welch (quadratic taper)</option> + <option value="gaussian">Gaussian function window</option> + <option value="sine">Sine function window</option> + <option value="kaiser" selected="true">Kaiser-Bessel function-derived window</option> + </param> + <param argument="rmin" type="float" value="0.0" min="0.0" help="Minimum radial distance."/> + <param argument="rmax" type="float" value="10.0" min="0.0" help="Maximum radial distance."/> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/criteria_report.csv Tue Nov 14 15:34:23 2023 +0000 @@ -0,0 +1,2 @@ + alpha, rfactor + 0\.34\d{4}, 0\.59\d{4}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fit_report.txt Tue Nov 14 15:34:23 2023 +0000 @@ -0,0 +1,101 @@ +=================== FEFFIT RESULTS ==================== +\[\[Statistics\]\] + nvarys, npts = 6, 104 + n_independent = 12\.205 + chi_square = [\d\.]{10} + reduced chi_square = [\d\.]{10} + r-factor = [\d\.]{10} + Akaike info crit = [\d\.]{10} + Bayesian info crit = [\d\.]{10} + +\[\[Data\]\] + fit space = 'r' + r-range = 1\.400, 3\.000 + k-range = 3\.000, 14\.000 + k window, dk = 'hanning', 1\.000 + paths used in fit = \['feff/feff0001\.dat', 'feff/feff0002\.dat', 'feff/feff0003\.dat', 'feff/feff0004\.dat'\] + k-weight = 2 + epsilon_k = Array\(mean=5\.19133e-4, std=4\.56760e-4\) + epsilon_r = [\d\.]{10} + n_independent = 12\.205 + +\[\[Variables\]\] + alpha = [-\s][\d\.]{10} \+/- [\d\.]{10} \(init= 1\.00000e-7\) + amp = [-\s][\d\.]{10} \+/- [\d\.]{10} \(init= 1\.00000000\) + enot = [-\s][\d\.]{10} \+/- [\d\.]{10} \(init= 1\.00000e-7\) + ss = [-\s][\d\.]{10} \+/- [\d\.]{10} \(init= 0\.00300000\) + ss2 = [-\s][\d\.]{10} \+/- [\d\.]{10} \(init= 0\.00300000\) + ss3 = [-\s][\d\.]{10} \+/- [\d\.]{10} = 'ss2' + ssfe = [-\s][\d\.]{10} \+/- [\d\.]{10} \(init= 0\.00300000\) + +\[\[Correlations\]\] \(unreported correlations are < 0\.100\) + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + \w+, \w+\s+= [\s\-]0\.\d{3} + + +\[\[Paths\]\] + = Path 'S1' = Rh K Edge + feffdat file = feff/feff0001\.dat, from feff run 'feff' + geometry atom x y z ipot + Rh 0\.0000, 0\.0000, 0\.0000 0 \(absorber\) + C -0\.7410, 0\.2885, -1\.7419 3 + reff = [\d\.]{10} + degen = 1\.00000000 + n\*s02 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'amp' + e0 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'enot' + r = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'reff \+ alpha\*reff' + deltar = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'alpha\*reff' + sigma2 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'ss' + + = Path 'S2' = Rh K Edge + feffdat file = feff/feff0002\.dat, from feff run 'feff' + geometry atom x y z ipot + Rh 0\.0000, 0\.0000, 0\.0000 0 \(absorber\) + C 1\.4414, 0\.4279, 1\.2965 3 + reff = [\d\.]{10} + degen = 1\.00000000 + n\*s02 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'amp' + e0 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'enot' + r = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'reff \+ alpha\*reff' + deltar = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'alpha\*reff' + sigma2 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'ss2' + + = Path 'S3' = Rh K Edge + feffdat file = feff/feff0003\.dat, from feff run 'feff' + geometry atom x y z ipot + Rh 0\.0000, 0\.0000, 0\.0000 0 \(absorber\) + C -1\.6586, -0\.1094, 1\.2084 3 + reff = [\d\.]{10} + degen = 1\.00000000 + n\*s02 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'amp' + e0 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'enot' + r = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'reff \+ alpha\*reff' + deltar = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'alpha\*reff' + sigma2 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'ss3' + + = Path 'Fe' = Rh K Edge + feffdat file = feff/feff0004\.dat, from feff run 'feff' + geometry atom x y z ipot + Rh 0\.0000, 0\.0000, 0\.0000 0 \(absorber\) + C 0\.6043, -2\.0001, 0\.0975 3 + reff = [\d\.]{10} + degen = 1\.00000000 + n\*s02 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'amp' + e0 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'enot' + r = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'reff \+ alpha\*reff' + deltar = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'alpha\*reff' + sigma2 = [-\s][\d\.]{10} \+/- [\d\.]{10} := 'ssfe' + +======================================================= \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gds.csv Tue Nov 14 15:34:23 2023 +0000 @@ -0,0 +1,8 @@ +id,name,value,expr,vary +1,alpha,1e-07,,True +2,amp,1.0,,True +3,enot,1e-07,,True +4,ss,0.003,,True +5,ss2,0.003,,True +6,ss3,0.003,ss2,False +7,ssfe,0.003,,True \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sp.csv Tue Nov 14 15:34:23 2023 +0000 @@ -0,0 +1,5 @@ +id,filename,label,s02,e0,sigma2,deltar +1,feff/feff0001.dat,S1,amp,enot,ss,alpha*reff +2,feff/feff0002.dat,S2,amp,enot,ss2,alpha*reff +3,feff/feff0003.dat,S3,amp,enot,ss3,alpha*reff +4,feff/feff0004.dat,Fe,amp,enot,ssfe,alpha*reff \ No newline at end of file