changeset 0:aa9cb2b42741 draft default tip

planemo upload for repository https://github.com/MaterialsGalaxy/larch-tools/tree/main/larch_criteria_report commit 5be486890442dedfb327289d597e1c8110240735
author muon-spectroscopy-computational-project
date Tue, 14 Nov 2023 15:34:55 +0000
parents
children
files larch_criteria_report.py larch_criteria_report.xml test-data/0.txt test-data/1.txt test-data/artemis_outputs.zip test-data/criteria_report.csv test-data/criteria_report_alpha.csv test-data/criteria_report_zipped.csv
diffstat 8 files changed, 306 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/larch_criteria_report.py	Tue Nov 14 15:34:55 2023 +0000
@@ -0,0 +1,103 @@
+import csv
+import json
+import os
+import sys
+from typing import Iterable
+
+import matplotlib.pyplot as plt
+
+import numpy as np
+
+
+def plot(variable: str, column: Iterable[float]):
+    variable_stripped = variable.strip()
+    path = f"plots/{variable_stripped}.png"
+    plt.figure(figsize=(8, 4))
+    plt.plot(column)
+    plt.xlim((0, len(column)))
+    ticks, _ = plt.xticks()
+    plt.xticks(np.array(ticks).astype("int"))
+    plt.xlabel("Dataset number")
+    plt.ylabel(variable_stripped)
+    plt.savefig(path, format="png")
+
+
+def load(filepath: str) -> "list[list[str|float]]":
+    with open(filepath) as f:
+        reader = csv.reader(f)
+        header = next(reader)
+        columns = [[h] for h in header]
+        for row in reader:
+            for i, value in enumerate(row):
+                columns[i].append(float(value))
+
+    return columns
+
+
+def parse_reports(input_data: str) -> "dict[str, list[float]]":
+    # Need to scrape variables from individual files
+    report_criteria = input_values["format"]["report_criteria"]
+    data = {c["variable"]: [] for c in report_criteria}
+    headers = list(data.keys())
+    with open("criteria_report.csv", "w") as f_out:
+        writer = csv.writer(f_out)
+        writer.writerow([f"{h:>12s}" for h in headers])
+
+        if os.path.isdir(input_data):
+            input_files = [
+                os.path.join(input_data, f) for f in os.listdir(input_data)
+            ]
+            input_files.sort()
+        else:
+            input_files = input_data.split(",")
+
+        for input_file in input_files:
+            row = parse_row(data, headers, input_file)
+            writer.writerow(row)
+
+    return data
+
+
+def parse_row(
+    data: "dict[str, list[float]]", headers: "list[str]", input_file: str
+) -> "list[str]":
+    row = [None] * len(headers)
+    with open(input_file) as f_in:
+        line = f_in.readline()
+        while line:
+            words = line.split()
+            try:
+                variable = words[0]
+                value = words[2]
+                if variable in headers:
+                    row[headers.index(variable)] = f"{value:>12s}"
+                    data[variable].append(float(value))
+                    if all(row):
+                        return row
+            except IndexError:
+                # Not all lines will have potential variables/values
+                # so just pass
+                pass
+
+            line = f_in.readline()
+
+    # Only reach here if we run out of lines without finding a value for each
+    # variable
+    raise RuntimeError(
+        f"One or more criteria missing, was looking for {headers} but found "
+        f"{row}"
+    )
+
+
+if __name__ == "__main__":
+    input_data = sys.argv[1]
+    input_values = json.load(open(sys.argv[2], "r", encoding="utf-8"))
+
+    if "report_criteria" in input_values["format"]:
+        data = parse_reports(input_data)
+        for variable, column in data.items():
+            plot(variable, column)
+    else:
+        columns = load(input_data)
+        for column in columns:
+            plot(column[0], column[1:])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/larch_criteria_report.xml	Tue Nov 14 15:34:55 2023 +0000
@@ -0,0 +1,98 @@
+<tool id="larch_criteria_report" name="Larch Criteria Report" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" python_template_version="3.5" profile="22.05" license="MIT">
+    <description>generate reports on Artemis fitting from XAFS data</description>
+    <macros>
+        <!-- version of underlying tool (PEP 440) -->
+        <token name="@TOOL_VERSION@">3.5.2</token>
+        <!-- version of this tool wrapper (integer) -->
+        <token name="@WRAPPER_VERSION@">0</token>
+        <!-- citation should be updated with every underlying tool version -->
+        <!-- typical fields to update are version, month, year, and doi -->
+        <token name="@TOOL_CITATION@">10.1109/MCSE.2007.55</token>
+    </macros>
+    <creator>
+        <person givenName="Patrick" familyName="Austin" url="https://github.com/patrick-austin" identifier="https://orcid.org/0000-0002-6279-7823"/>
+    </creator>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">matplotlib</requirement>
+        <requirement type="package" version="3.0">zip</requirement>
+    </requirements>
+    <required_files>
+        <include type="literal" path="larch_criteria_report.py"/>
+    </required_files>
+    <command detect_errors="exit_code"><![CDATA[
+        mkdir plots
+        && input_file='$format.input_data'
+        #if $format.format == 'zipped':
+            && unzip \$input_file
+            && input_file=report
+        #end if
+        && python '${__tool_directory__}/larch_criteria_report.py' \$input_file '$inputs'
+    ]]></command>
+    <configfiles>
+        <inputs name="inputs"/>
+    </configfiles>
+    <inputs>
+        <conditional name="format" >
+            <param name="format" type="select" display="radio" label="Input format" help="Format of input file(s).">
+                <option value="csv" selected="true">Single CSV</option>
+                <option value="collection">Multiple FEFFIT reports</option>
+                <option value="zipped">Zipped FEFFIT reports</option>
+            </param>
+            <when value="csv">
+                <param name="input_data" type="data" format="csv" label="Criteria report" help="Report summarising logged criteria from an execution of Larch Artemis."/>
+            </when>
+            <when value="collection">
+                <param name="input_data" type="data" format="feffit" multiple="true" label="Fit reports" help="Individual fit reports from independent executions of Larch Artemis."/>
+                <repeat name="report_criteria" title="Report Criteria">
+                    <param name="variable" type="text" label="Variable" help="Name of the variable of that appears in the statistics or variables sections of the report."/>
+                </repeat>
+            </when>
+            <when value="zipped">
+                <param name="input_data" type="data" format="zip" label="Zipped Artemis outputs" help="Zipped fit reports from series executions of Larch Artemis."/>
+                <repeat name="report_criteria" title="Report Criteria">
+                    <param name="variable" type="text" label="Variable" help="Name of the variable of that appears in the statistics or variables sections of the report."/>
+                </repeat>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="criteria_report" format="csv" from_work_dir="criteria_report.csv" label="Tabulated criteria from ${on_string}">
+            <filter>format["format"]!="csv"</filter>
+        </data>
+        <collection name="plots" format="png" type="list" label="Plots of criteria from ${on_string}">
+            <discover_datasets pattern="__name_and_ext__" directory="plots"/>
+        </collection>
+    </outputs>
+    <tests>
+        <!-- From CSV -->
+        <test expect_num_outputs="1">
+            <param name="input_data" value="criteria_report.csv"/>
+            <output_collection name="plots" type="list" count="2"/>
+        </test>
+        <!-- From TXT -->
+        <test expect_num_outputs="2">
+            <param name="format" value="collection"/>
+            <param name="input_data" value="0.txt,1.txt"/>
+            <param name="variable" value="alpha"/>
+            <output name="criteria_report" file="criteria_report_alpha.csv"/>
+            <output_collection name="plots" type="list" count="1"/>
+        </test>
+        <!-- From ZIP -->
+        <test expect_num_outputs="2">
+            <param name="format" value="zipped"/>
+            <param name="input_data" value="artemis_outputs.zip"/>
+            <param name="variable" value="alpha"/>
+            <output name="criteria_report" file="criteria_report_zipped.csv"/>
+            <output_collection name="plots" type="list" count="1"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Plot criteria of interest from multiple Artemis fittings, for example where each dataset represents a sequential point in time.
+
+        Either accepts a CSV summary from series execution (with criteria already selected), a zipped directory, or multiple datasets/a collection of fit reports from parallel execution.
+        In the latter cases, the criteria of interest need to be specified.
+    ]]></help>
+    <citations>
+        <citation type="doi">@TOOL_CITATION@</citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/0.txt	Tue Nov 14 15:34:55 2023 +0000
@@ -0,0 +1,48 @@
+=================== FEFFIT RESULTS ====================
+[[Statistics]]
+   nvarys, npts       =  4, 650
+   n_independent      =  128.324
+   chi_square         =  388.409614
+   reduced chi_square =  3.12417358
+   r-factor           =  0.91706729
+   Akaike info crit   =  150.119104
+   Bayesian info crit =  161.537336
+ 
+[[Data]]
+   fit space          = 'r'
+   r-range            = 0.000, 10.000
+   k-range            = 0.000, 20.000
+   k window, dk       = 'kaiser', 4.000
+   paths used in fit  = ['feff/feff0001.dat']
+   k-weight           = 2
+   epsilon_k          = Array(mean=0.00569908, std=5.9337e-04)
+   epsilon_r          = 0.28759079
+   n_independent      = 128.324
+ 
+[[Variables]]
+   alpha          = -0.05372917 +/- 0.01466662   (init=  1.0000e-07)
+   amp            =  1.25665640 +/- 1.15241865   (init=  1.00000000)
+   enot           = -42.2811582 +/- 23.5707953   (init=  1.0000e-07)
+   scrh1          = -0.00395100 +/- 0.00234364   (init=  0.00300000)
+ 
+[[Correlations]]    (unreported correlations are <  0.100)
+   amp, scrh1           =  0.944
+   alpha, enot          =  0.916
+   enot, scrh1          = -0.151
+   amp, enot            = -0.111
+ 
+[[Paths]]
+ = Path 'C.Rh.1' = Rh K Edge
+    feffdat file = feff/feff0001.dat, from feff run 'feff'
+    geometry  atom      x        y        z      ipot
+              Rh       0.0000,  0.0000,  0.0000  0 (absorber)
+               C       0.7410,  0.2885,  1.7419  1
+     reff   =  1.91490000
+     degen  =  1.00000000
+     n*s02  =  1.25665640 +/- 1.15241865  := 'amp'
+     e0     = -42.2811582 +/- 23.5707953  := 'enot'
+     r      =  1.81201400 +/- 0.02808511  := 'reff + alpha*reff'
+     deltar = -0.10288600 +/- 0.02808511  := 'alpha*reff'
+     sigma2 = -0.00395100 +/- 0.00234364  := 'scrh1'
+
+=======================================================
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1.txt	Tue Nov 14 15:34:55 2023 +0000
@@ -0,0 +1,48 @@
+=================== FEFFIT RESULTS ====================
+[[Statistics]]
+   nvarys, npts       =  4, 650
+   n_independent      =  128.324
+   chi_square         =  2051.40364
+   reduced chi_square =  16.5004697
+   r-factor           =  0.98287213
+   Akaike info crit   =  363.678274
+   Bayesian info crit =  375.096506
+ 
+[[Data]]
+   fit space          = 'r'
+   r-range            = 0.000, 10.000
+   k-range            = 0.000, 20.000
+   k window, dk       = 'kaiser', 4.000
+   paths used in fit  = ['feff/feff0001.dat']
+   k-weight           = 2
+   epsilon_k          = Array(mean=0.00212337, std=5.2471e-04)
+   epsilon_r          = 0.10715072
+   n_independent      = 128.324
+ 
+[[Variables]]
+   alpha          = -0.04411765 +/- 0.03125420   (init=  1.0000e-07)
+   amp            =  0.84987801 +/- 1.49776699   (init=  1.00000000)
+   enot           = -44.0066655 +/- 42.3148172   (init=  1.0000e-07)
+   scrh1          = -0.00232543 +/- 0.00518279   (init=  0.00300000)
+ 
+[[Correlations]]    (unreported correlations are <  0.100)
+   amp, scrh1           =  0.922
+   alpha, enot          =  0.890
+   enot, scrh1          = -0.121
+   amp, enot            = -0.119
+ 
+[[Paths]]
+ = Path 'C.Rh.1' = Rh K Edge
+    feffdat file = feff/feff0001.dat, from feff run 'feff'
+    geometry  atom      x        y        z      ipot
+              Rh       0.0000,  0.0000,  0.0000  0 (absorber)
+               C       0.7410,  0.2885,  1.7419  1
+     reff   =  1.91490000
+     degen  =  1.00000000
+     n*s02  =  0.84987801 +/- 1.49776699  := 'amp'
+     e0     = -44.0066655 +/- 42.3148172  := 'enot'
+     r      =  1.83041912 +/- 0.05984867  := 'reff + alpha*reff'
+     deltar = -0.08448088 +/- 0.05984867  := 'alpha*reff'
+     sigma2 = -0.00232543 +/- 0.00518279  := 'scrh1'
+
+=======================================================
\ No newline at end of file
Binary file test-data/artemis_outputs.zip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/criteria_report.csv	Tue Nov 14 15:34:55 2023 +0000
@@ -0,0 +1,3 @@
+       alpha,    r-factor
+ -0.05372917,  0.91706729
+ -0.04411765,  0.98287213
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/criteria_report_alpha.csv	Tue Nov 14 15:34:55 2023 +0000
@@ -0,0 +1,3 @@
+       alpha
+ -0.05372917
+ -0.04411765
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/criteria_report_zipped.csv	Tue Nov 14 15:34:55 2023 +0000
@@ -0,0 +1,3 @@
+       alpha
+ -0.01985413
+ -0.02473746