Mercurial > repos > muon-spectroscopy-computational-project > larch_criteria_report
changeset 0:aa9cb2b42741 draft default tip
planemo upload for repository https://github.com/MaterialsGalaxy/larch-tools/tree/main/larch_criteria_report commit 5be486890442dedfb327289d597e1c8110240735
author | muon-spectroscopy-computational-project |
---|---|
date | Tue, 14 Nov 2023 15:34:55 +0000 |
parents | |
children | |
files | larch_criteria_report.py larch_criteria_report.xml test-data/0.txt test-data/1.txt test-data/artemis_outputs.zip test-data/criteria_report.csv test-data/criteria_report_alpha.csv test-data/criteria_report_zipped.csv |
diffstat | 8 files changed, 306 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/larch_criteria_report.py Tue Nov 14 15:34:55 2023 +0000 @@ -0,0 +1,103 @@ +import csv +import json +import os +import sys +from typing import Iterable + +import matplotlib.pyplot as plt + +import numpy as np + + +def plot(variable: str, column: Iterable[float]): + variable_stripped = variable.strip() + path = f"plots/{variable_stripped}.png" + plt.figure(figsize=(8, 4)) + plt.plot(column) + plt.xlim((0, len(column))) + ticks, _ = plt.xticks() + plt.xticks(np.array(ticks).astype("int")) + plt.xlabel("Dataset number") + plt.ylabel(variable_stripped) + plt.savefig(path, format="png") + + +def load(filepath: str) -> "list[list[str|float]]": + with open(filepath) as f: + reader = csv.reader(f) + header = next(reader) + columns = [[h] for h in header] + for row in reader: + for i, value in enumerate(row): + columns[i].append(float(value)) + + return columns + + +def parse_reports(input_data: str) -> "dict[str, list[float]]": + # Need to scrape variables from individual files + report_criteria = input_values["format"]["report_criteria"] + data = {c["variable"]: [] for c in report_criteria} + headers = list(data.keys()) + with open("criteria_report.csv", "w") as f_out: + writer = csv.writer(f_out) + writer.writerow([f"{h:>12s}" for h in headers]) + + if os.path.isdir(input_data): + input_files = [ + os.path.join(input_data, f) for f in os.listdir(input_data) + ] + input_files.sort() + else: + input_files = input_data.split(",") + + for input_file in input_files: + row = parse_row(data, headers, input_file) + writer.writerow(row) + + return data + + +def parse_row( + data: "dict[str, list[float]]", headers: "list[str]", input_file: str +) -> "list[str]": + row = [None] * len(headers) + with open(input_file) as f_in: + line = f_in.readline() + while line: + words = line.split() + try: + variable = words[0] + value = words[2] + if variable in headers: + row[headers.index(variable)] = f"{value:>12s}" + data[variable].append(float(value)) + if all(row): + return row + except IndexError: + # Not all lines will have potential variables/values + # so just pass + pass + + line = f_in.readline() + + # Only reach here if we run out of lines without finding a value for each + # variable + raise RuntimeError( + f"One or more criteria missing, was looking for {headers} but found " + f"{row}" + ) + + +if __name__ == "__main__": + input_data = sys.argv[1] + input_values = json.load(open(sys.argv[2], "r", encoding="utf-8")) + + if "report_criteria" in input_values["format"]: + data = parse_reports(input_data) + for variable, column in data.items(): + plot(variable, column) + else: + columns = load(input_data) + for column in columns: + plot(column[0], column[1:])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/larch_criteria_report.xml Tue Nov 14 15:34:55 2023 +0000 @@ -0,0 +1,98 @@ +<tool id="larch_criteria_report" name="Larch Criteria Report" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" python_template_version="3.5" profile="22.05" license="MIT"> + <description>generate reports on Artemis fitting from XAFS data</description> + <macros> + <!-- version of underlying tool (PEP 440) --> + <token name="@TOOL_VERSION@">3.5.2</token> + <!-- version of this tool wrapper (integer) --> + <token name="@WRAPPER_VERSION@">0</token> + <!-- citation should be updated with every underlying tool version --> + <!-- typical fields to update are version, month, year, and doi --> + <token name="@TOOL_CITATION@">10.1109/MCSE.2007.55</token> + </macros> + <creator> + <person givenName="Patrick" familyName="Austin" url="https://github.com/patrick-austin" identifier="https://orcid.org/0000-0002-6279-7823"/> + </creator> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">matplotlib</requirement> + <requirement type="package" version="3.0">zip</requirement> + </requirements> + <required_files> + <include type="literal" path="larch_criteria_report.py"/> + </required_files> + <command detect_errors="exit_code"><![CDATA[ + mkdir plots + && input_file='$format.input_data' + #if $format.format == 'zipped': + && unzip \$input_file + && input_file=report + #end if + && python '${__tool_directory__}/larch_criteria_report.py' \$input_file '$inputs' + ]]></command> + <configfiles> + <inputs name="inputs"/> + </configfiles> + <inputs> + <conditional name="format" > + <param name="format" type="select" display="radio" label="Input format" help="Format of input file(s)."> + <option value="csv" selected="true">Single CSV</option> + <option value="collection">Multiple FEFFIT reports</option> + <option value="zipped">Zipped FEFFIT reports</option> + </param> + <when value="csv"> + <param name="input_data" type="data" format="csv" label="Criteria report" help="Report summarising logged criteria from an execution of Larch Artemis."/> + </when> + <when value="collection"> + <param name="input_data" type="data" format="feffit" multiple="true" label="Fit reports" help="Individual fit reports from independent executions of Larch Artemis."/> + <repeat name="report_criteria" title="Report Criteria"> + <param name="variable" type="text" label="Variable" help="Name of the variable of that appears in the statistics or variables sections of the report."/> + </repeat> + </when> + <when value="zipped"> + <param name="input_data" type="data" format="zip" label="Zipped Artemis outputs" help="Zipped fit reports from series executions of Larch Artemis."/> + <repeat name="report_criteria" title="Report Criteria"> + <param name="variable" type="text" label="Variable" help="Name of the variable of that appears in the statistics or variables sections of the report."/> + </repeat> + </when> + </conditional> + </inputs> + <outputs> + <data name="criteria_report" format="csv" from_work_dir="criteria_report.csv" label="Tabulated criteria from ${on_string}"> + <filter>format["format"]!="csv"</filter> + </data> + <collection name="plots" format="png" type="list" label="Plots of criteria from ${on_string}"> + <discover_datasets pattern="__name_and_ext__" directory="plots"/> + </collection> + </outputs> + <tests> + <!-- From CSV --> + <test expect_num_outputs="1"> + <param name="input_data" value="criteria_report.csv"/> + <output_collection name="plots" type="list" count="2"/> + </test> + <!-- From TXT --> + <test expect_num_outputs="2"> + <param name="format" value="collection"/> + <param name="input_data" value="0.txt,1.txt"/> + <param name="variable" value="alpha"/> + <output name="criteria_report" file="criteria_report_alpha.csv"/> + <output_collection name="plots" type="list" count="1"/> + </test> + <!-- From ZIP --> + <test expect_num_outputs="2"> + <param name="format" value="zipped"/> + <param name="input_data" value="artemis_outputs.zip"/> + <param name="variable" value="alpha"/> + <output name="criteria_report" file="criteria_report_zipped.csv"/> + <output_collection name="plots" type="list" count="1"/> + </test> + </tests> + <help><![CDATA[ + Plot criteria of interest from multiple Artemis fittings, for example where each dataset represents a sequential point in time. + + Either accepts a CSV summary from series execution (with criteria already selected), a zipped directory, or multiple datasets/a collection of fit reports from parallel execution. + In the latter cases, the criteria of interest need to be specified. + ]]></help> + <citations> + <citation type="doi">@TOOL_CITATION@</citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/0.txt Tue Nov 14 15:34:55 2023 +0000 @@ -0,0 +1,48 @@ +=================== FEFFIT RESULTS ==================== +[[Statistics]] + nvarys, npts = 4, 650 + n_independent = 128.324 + chi_square = 388.409614 + reduced chi_square = 3.12417358 + r-factor = 0.91706729 + Akaike info crit = 150.119104 + Bayesian info crit = 161.537336 + +[[Data]] + fit space = 'r' + r-range = 0.000, 10.000 + k-range = 0.000, 20.000 + k window, dk = 'kaiser', 4.000 + paths used in fit = ['feff/feff0001.dat'] + k-weight = 2 + epsilon_k = Array(mean=0.00569908, std=5.9337e-04) + epsilon_r = 0.28759079 + n_independent = 128.324 + +[[Variables]] + alpha = -0.05372917 +/- 0.01466662 (init= 1.0000e-07) + amp = 1.25665640 +/- 1.15241865 (init= 1.00000000) + enot = -42.2811582 +/- 23.5707953 (init= 1.0000e-07) + scrh1 = -0.00395100 +/- 0.00234364 (init= 0.00300000) + +[[Correlations]] (unreported correlations are < 0.100) + amp, scrh1 = 0.944 + alpha, enot = 0.916 + enot, scrh1 = -0.151 + amp, enot = -0.111 + +[[Paths]] + = Path 'C.Rh.1' = Rh K Edge + feffdat file = feff/feff0001.dat, from feff run 'feff' + geometry atom x y z ipot + Rh 0.0000, 0.0000, 0.0000 0 (absorber) + C 0.7410, 0.2885, 1.7419 1 + reff = 1.91490000 + degen = 1.00000000 + n*s02 = 1.25665640 +/- 1.15241865 := 'amp' + e0 = -42.2811582 +/- 23.5707953 := 'enot' + r = 1.81201400 +/- 0.02808511 := 'reff + alpha*reff' + deltar = -0.10288600 +/- 0.02808511 := 'alpha*reff' + sigma2 = -0.00395100 +/- 0.00234364 := 'scrh1' + +======================================================= \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.txt Tue Nov 14 15:34:55 2023 +0000 @@ -0,0 +1,48 @@ +=================== FEFFIT RESULTS ==================== +[[Statistics]] + nvarys, npts = 4, 650 + n_independent = 128.324 + chi_square = 2051.40364 + reduced chi_square = 16.5004697 + r-factor = 0.98287213 + Akaike info crit = 363.678274 + Bayesian info crit = 375.096506 + +[[Data]] + fit space = 'r' + r-range = 0.000, 10.000 + k-range = 0.000, 20.000 + k window, dk = 'kaiser', 4.000 + paths used in fit = ['feff/feff0001.dat'] + k-weight = 2 + epsilon_k = Array(mean=0.00212337, std=5.2471e-04) + epsilon_r = 0.10715072 + n_independent = 128.324 + +[[Variables]] + alpha = -0.04411765 +/- 0.03125420 (init= 1.0000e-07) + amp = 0.84987801 +/- 1.49776699 (init= 1.00000000) + enot = -44.0066655 +/- 42.3148172 (init= 1.0000e-07) + scrh1 = -0.00232543 +/- 0.00518279 (init= 0.00300000) + +[[Correlations]] (unreported correlations are < 0.100) + amp, scrh1 = 0.922 + alpha, enot = 0.890 + enot, scrh1 = -0.121 + amp, enot = -0.119 + +[[Paths]] + = Path 'C.Rh.1' = Rh K Edge + feffdat file = feff/feff0001.dat, from feff run 'feff' + geometry atom x y z ipot + Rh 0.0000, 0.0000, 0.0000 0 (absorber) + C 0.7410, 0.2885, 1.7419 1 + reff = 1.91490000 + degen = 1.00000000 + n*s02 = 0.84987801 +/- 1.49776699 := 'amp' + e0 = -44.0066655 +/- 42.3148172 := 'enot' + r = 1.83041912 +/- 0.05984867 := 'reff + alpha*reff' + deltar = -0.08448088 +/- 0.05984867 := 'alpha*reff' + sigma2 = -0.00232543 +/- 0.00518279 := 'scrh1' + +======================================================= \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/criteria_report.csv Tue Nov 14 15:34:55 2023 +0000 @@ -0,0 +1,3 @@ + alpha, r-factor + -0.05372917, 0.91706729 + -0.04411765, 0.98287213