Mercurial > repos > muon-spectroscopy-computational-project > larch_lcf
changeset 4:c2d5bfef5b63 draft
planemo upload for repository https://github.com/MaterialsGalaxy/larch-tools/tree/main/larch_lcf commit 4814f53888643f1d3667789050914675fffb7d59
author | muon-spectroscopy-computational-project |
---|---|
date | Fri, 23 Aug 2024 14:10:44 +0000 |
parents | 6bb927e155fc |
children | 90a69f15ab92 |
files | common.py larch_lcf.py larch_lcf.xml test-data/lcf.zip |
diffstat | 4 files changed, 105 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/common.py Thu Apr 11 09:02:19 2024 +0000 +++ b/common.py Fri Aug 23 14:10:44 2024 +0000 @@ -1,3 +1,4 @@ +import re from typing import Iterable from larch.io import extract_athenagroup, read_athena @@ -10,8 +11,6 @@ group_keys = list(athena_group.keys()) if key is None: key = group_keys[0] - else: - key = key.replace("-", "_") try: return extract_athenagroup(athena_group.groups[key]) @@ -19,7 +18,7 @@ raise KeyError(f"{key} not in {group_keys}") from e -def read_all_groups(dat_file: str, key: str = None) -> "dict[str, Group]": +def read_all_groups(dat_file: str) -> "dict[str, Group]": # Cannot rely on do_ABC as _larch is None athena_group = read_athena( dat_file, @@ -39,14 +38,20 @@ def read_group(dat_file: str, key: str = None): + if key: + match_ = key.replace(" ", "_").replace("-", "_").replace(".", "_") + else: + match_ = None + # Cannot rely on do_ABC as _larch is None athena_group = read_athena( dat_file, + match=match_, do_preedge=False, do_bkg=False, do_fft=False, ) - group = get_group(athena_group, key) + group = get_group(athena_group, match_) pre_edge_with_defaults(group=group) xftf_with_defaults(group=group) return group @@ -168,3 +173,7 @@ def read_groups(dat_files: "list[str]", key: str = None) -> Iterable[Group]: for dat_file in dat_files: yield read_group(dat_file=dat_file, key=key) + + +def sorting_key(filename: str) -> str: + return re.findall(r"\d+", filename)[-1]
--- a/larch_lcf.py Thu Apr 11 09:02:19 2024 +0000 +++ b/larch_lcf.py Fri Aug 23 14:10:44 2024 +0000 @@ -1,7 +1,8 @@ import json +import os import sys -from common import read_group +from common import read_group, sorting_key from larch.math.lincombo_fitting import get_label, lincombo_fit from larch.symboltable import Group @@ -15,6 +16,7 @@ fit_group: Group, x_limit_min: float, x_limit_max: float, + prj_id: str, ): formatted_label = "" for label, weight in fit_group.weights.items(): @@ -41,7 +43,7 @@ plt.xlabel("Energy (eV)") plt.ylabel("normalised x$\mu$(E)") # noqa: W605 plt.legend() - plt.savefig("plot.png", format="png") + plt.savefig(f"plot/{prj_id}.png", format="png") plt.close("all") @@ -52,12 +54,7 @@ component_group.filename = get_label(component_group) -if __name__ == "__main__": - # larch imports set this to an interactive backend, so need to change it - matplotlib.use("Agg") - prj_file = sys.argv[1] - input_values = json.load(open(sys.argv[2], "r", encoding="utf-8")) - +def main(prj_file: str, input_values: dict, prj_id: str = "plot"): group_to_fit = read_group(prj_file) set_label(group_to_fit, input_values["label"]) @@ -79,4 +76,29 @@ x_limit_min = input_values["x_limit_min"] x_limit_max = input_values["x_limit_max"] - plot(group_to_fit, fit_group, x_limit_min, x_limit_max) + plot(group_to_fit, fit_group, x_limit_min, x_limit_max, prj_id) + + +if __name__ == "__main__": + # larch imports set this to an interactive backend, so need to change it + matplotlib.use("Agg") + prj_file = sys.argv[1] + input_values = json.load(open(sys.argv[2], "r", encoding="utf-8")) + + if input_values["execution"]["execution"] == "parallel": + main(prj_file, input_values) + + else: + if os.path.isdir(prj_file): + # Sort the unzipped directory, all filenames should be zero-padded + paths = os.listdir(prj_file) + filepaths = [os.path.join(prj_file, p) for p in paths] + filepaths.sort(key=sorting_key) + else: + # DO NOT sort if we have multiple Galaxy datasets - the filenames + # are arbitrary but should be in order + filepaths = prj_file.split(",") + + id_length = len(str(len(filepaths))) + for i, prj_file in enumerate(filepaths): + main(prj_file, input_values, str(i).zfill(id_length))
--- a/larch_lcf.xml Thu Apr 11 09:02:19 2024 +0000 +++ b/larch_lcf.xml Fri Aug 23 14:10:44 2024 +0000 @@ -2,7 +2,7 @@ <description>perform linear combination fit on XAS data</description> <macros> <!-- version of underlying tool (PEP 440) --> - <token name="@TOOL_VERSION@">0.9.75</token> + <token name="@TOOL_VERSION@">0.9.80</token> <!-- version of this tool wrapper (integer) --> <token name="@WRAPPER_VERSION@">0</token> <!-- citation should be updated with every underlying tool version --> @@ -22,14 +22,35 @@ <include type="literal" path="common.py"/> </required_files> <command detect_errors="exit_code"><![CDATA[ - python '${__tool_directory__}/larch_lcf.py' '$prj_file' '$inputs' + mkdir plot && + #if $execution.execution=="zipped": + unzip '$execution.prj_file' && + python '${__tool_directory__}/larch_lcf.py' prj '$inputs' + #else + python '${__tool_directory__}/larch_lcf.py' '$execution.prj_file' '$inputs' + #end if ]]></command> <configfiles> <inputs name="inputs" data_style="paths"/> </configfiles> <inputs> <param name="label" type="text" optional="true" label="Main data label" help="The label to use for the main data (target of the fit). If unset, the label will be taken from the Athena project metadata."/> - <param name="prj_file" type="data" format="prj" label="Athena project to fit" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format, to be fit."/> + <conditional name="execution" > + <param name="execution" type="select" display="radio" label="Execution mode" help="Whether to execute: on individual Athena projects as parallel jobs, as one job with each project fit occurring in series, or as one job using a zipped input."> + <option value="parallel" selected="true">Parallel</option> + <option value="series">Series</option> + <option value="zipped">Zipped</option> + </param> + <when value="parallel"> + <param name="prj_file" type="data" format="prj" label="Athena project file" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format. If a collection of files is provided, these will be submitted and executed in parallel."/> + </when> + <when value="series"> + <param name="prj_file" type="data" format="prj" multiple="true" label="Athena project files" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format. These will be submitted as a single job working in series, enabling transition search."/> + </when> + <when value="zipped"> + <param name="prj_file" type="data" format="zip" label="Zipped Athena outputs" help="Normalised X-ray Absorption Fine Structure (XAFS) data, in Athena project format, and zipped. These will be submitted as a single job working in series, enabling transition search."/> + </when> + </conditional> <repeat name="components" title="Fit components"> <param name="label" type="text" optional="true" label="Component label" help="The label to use for this component. If unset, the label will be taken from the Athena project metadata."/> <param name="component_file" type="data" format="prj" label="Athena project to fit" help="Reference X-ray Absorption Fine Structure (XAFS) data, in Athena project format, to be used as components in the fit."/> @@ -41,7 +62,13 @@ <expand macro="plot_limits_energy"/> </inputs> <outputs> - <data name="plot" format="png" from_work_dir="plot.png"/> + <data name="plot" format="png" from_work_dir="plot/plot.png"> + <filter>execution["execution"]=="parallel"</filter> + </data> + <collection name="plot_collection" format="png" type="list" label="Larch LCF on series data ${on_string}"> + <discover_datasets pattern="__name_and_ext__" directory="plot"/> + <filter>execution["execution"]!="parallel"</filter> + </collection> </outputs> <tests> <!-- 1 --> @@ -62,6 +89,36 @@ <has_text text="Goodness of fit (rfactor): 0.532130%"/> </assert_stdout> </test> + <!-- 2: series --> + <test expect_num_outputs="1"> + <param name="execution" value="series"/> + <param name="prj_file" value="PtSn_OCO_Abu_1_29204.5.prj"/> + <param name="component_file" value="Sn_foil_29200.prj"/> + <param name="component_file" value="SnO2_29206.prj"/> + <param name="energy_min" value="29190"/> + <param name="energy_max" value="29230"/> + <param name="x_limit_min" value="29190"/> + <param name="x_limit_max" value="29230"/> + <output_collection name="plot_collection" type="list" count="1"/> + <assert_stdout> + <has_text text="Goodness of fit (rfactor): 0.532130%"/> + </assert_stdout> + </test> + <!-- 3: zipped --> + <test expect_num_outputs="1"> + <param name="execution" value="zipped"/> + <param name="prj_file" value="lcf.zip"/> + <param name="component_file" value="Sn_foil_29200.prj"/> + <param name="component_file" value="SnO2_29206.prj"/> + <param name="energy_min" value="29190"/> + <param name="energy_max" value="29230"/> + <param name="x_limit_min" value="29190"/> + <param name="x_limit_max" value="29230"/> + <output_collection name="plot_collection" type="list" count="2"/> + <assert_stdout> + <has_text text="Goodness of fit (rfactor): 0.532130%"/> + </assert_stdout> + </test> </tests> <help><![CDATA[ Performs a Linear Combination Fit (LCF) on an Athena project, using a number of other projects as components to the fit.