view larch_select_paths.xml @ 1:7fdca938d90c draft

planemo upload for repository https://github.com/MaterialsGalaxy/larch-tools/tree/main/larch_select_paths commit 1cf6d7160497ba58fe16a51f00d088a20934eba6
author muon-spectroscopy-computational-project
date Wed, 06 Dec 2023 13:04:15 +0000
parents 2e827836f0ad
children 200534149c4d
line wrap: on
line source

<tool id="larch_select_paths" name="Larch Select Paths" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" python_template_version="3.5" profile="22.05" license="MIT">
    <description>select FEFF paths for XAFS data</description>
    <macros>
        <!-- version of underlying tool (PEP 440) -->
        <token name="@TOOL_VERSION@">0.9.71</token>
        <!-- version of this tool wrapper (integer) -->
        <token name="@WRAPPER_VERSION@">1</token>
        <!-- citation should be updated with every underlying tool version -->
        <!-- typical fields to update are version, month, year, and doi -->
        <token name="@TOOL_CITATION@">10.1088/1742-6596/430/1/012007</token>
        <xml name="name">
            <param name="name" type="text" optional="true" label="Name" help="The name of the variable should be unique, and can be used in expressions for other paths. If name is set, will overwrite the default bevaviour for this variable."/>
        </xml>
        <xml name="expr">
            <param name="expr" type="text" optional="true" label="Expression" help="If set, the variable will be 'Defined' by the expression. This can include other variable name, for example in order to set two paths to use the same variable."/>
        </xml>
        <xml name="vary">
            <param name="vary" type="boolean" checked="true" label="Vary" help="If True, the initial 'Guess' will be optimised in the fitting. If False, the value will be 'Set' instead and not optimised."/>
        </xml>
        <xml name="max_number">
            <param name="max_number" type="integer" min="1" optional="true" label="Maximum number of paths" help="Will select (up to) this many paths, ordered by ascending path length, subject to criteria below. If unset, will select all that meet the criteria."/>
        </xml>
        <xml name="max_path_length">
            <param name="max_path_length" type="float" min="0" optional="true" label="Maximum path length" help="Exclude paths with lengths greater than this (expressed in Angstrom). If unset, will not restrict based on path length."/>
        </xml>
        <xml name="min_amplitude_ratio">
            <param name="min_amplitude_ratio" type="float" min="0" max="100" optional="true" label="Minimum amplitude ratio (%)" help="Exclude paths with a ratio below this ratio (expressed as a percentage). If unset, will not restrict based on amplitude ratio."/>
        </xml>
        <xml name="max_degeneracy">
            <param name="max_degeneracy" type="integer" min="1" optional="true" label="Maximum degeneracy" help="Exclude paths with degeneracy above this value. If unset, will not restrict based on degeneracy."/>
        </xml>
    </macros>
    <creator>
        <person givenName="Patrick" familyName="Austin" url="https://github.com/patrick-austin" identifier="https://orcid.org/0000-0002-6279-7823"/>
    </creator>
    <required_files>
        <include type="literal" path="larch_select_paths.py"/>
    </required_files>
    <command detect_errors="exit_code"><![CDATA[
        mkdir sp &&
        python '${__tool_directory__}/larch_select_paths.py' '$inputs'
    ]]></command>
    <configfiles>
        <inputs name="inputs" data_style="paths"/>
    </configfiles>
    <inputs>
        <section name="variables" expanded="false" title="GDS variable defaults" help="Define default values for variables in the EXAFS equation to use for the paths selected below.">
            <section name="s02" expanded="false" title="S02: passive electron reduction factor">
                <param name="is_common" type="boolean" checked="true" label="Common to all paths" help="If set, a single variable 's02' will be used for all paths. Otherwise, each path has a distinct variable."/>
                <param name="value" type="float" value="1.0" min="0.0" max="1.0" label="Value" help="The initial value for 's02'. This is typically between 0.7 and 1.0."/>
                <expand macro="vary"/>
            </section>
            <section name="e0" expanded="false" title="E0: energy shift">
                <param name="is_common" type="boolean" checked="true" label="Common to all paths" help="If set, a single variable 'e0' will be used for all paths. Otherwise, each path has a distinct variable."/>
                <param name="value" type="float" value="0.0" label="Value" help="The initial value for 'e0'. This should be close to zero, as it represents the difference in the absorption edge positions between simulation and experiment."/>
                <expand macro="vary"/>
            </section>
            <section name="deltar" expanded="false" title="Delta R: change in path length">
                <param name="is_common" type="boolean" checked="true" label="Common to all paths" help="If set, a single variable 'alpha*reff' will be used for all paths (where 'reff' is the effective path length). Otherwise, each path has a distinct variable."/>
                <param name="value" type="float" value="0.0" label="Value" help="The initial value for 'alpha'. This should be close to zero, as it represents the difference in the FEFF path and fitted (experimental) path length."/>
                <expand macro="vary"/>
            </section>
            <section name="sigma2" expanded="false" title="Sigma2: mean squared displacement">
                <param name="is_common" type="boolean" checked="true" label="Common to all paths" help="If set, a single variable 'sigma2' will be used for all paths. Otherwise, each path has a distinct variable."/>
                <param name="value" type="float" value="0.003" label="Value" help="The initial value for 'sigma2'. This should be small, but non-zero, as atomic vibrations will result in variation in exact positions."/>
                <expand macro="vary"/>
            </section>
        </section>
        <repeat name="feff_outputs" title="Distinct FEFF outputs" min="1" help="If FEFF has been used on multiple structures, the paths from each can be merged into one input to provide to Larch Artemis. If only one entry is provided, then the zipped directory of paths will be unchanged.">
            <param name="label" type="text" optional="true" label="Label" help="Short label to use when merging different FEFF outputs to ensure they remain distinct. Not required if only providing a single output, and will default to the index in this list."/>
            <param name="paths_zip" type="data" format="zip" label="Zipped paths directory" help="Zipped directory containing the actual path files output by FEFF."/>
            <param name="paths_file" type="data" format="feff" label="Paths file" help="CSV file detailing the possible scattering paths. Note that rows with '1' in the 'select' column will be selected with default values for their parameters automatically."/>
            <conditional name="selection">
                <param name="selection" type="select" label="Selection method">
                    <option value="all" selected="true">All paths</option>
                    <option value="criteria">Criteria</option>
                    <option value="combinations">Combinations</option>
                    <option value="manual">Manual</option>
                </param>
                <when value="all">
                    <repeat name="paths" title="Define path variables" help="Overwrite the default variables defined above for chosen paths.">
                        <param name="id" type="integer" value="1" min="1" label="Path ID" help="Numerical id of a path to select, this appears at the end of the label and filename in the path summary CSV."/>
                        <section name="s02" expanded="false" title="S02: passive electron reduction factor">
                            <expand macro="name"/>
                            <param name="value" type="float" value="1.0" min="0.0" max="1.0" label="Value" help="The initial value for 's02'. This is typically between 0.7 and 1.0."/>
                            <expand macro="expr"/>
                            <expand macro="vary"/>
                        </section>
                        <section name="e0" expanded="false" title="E0: energy shift">
                            <expand macro="name"/>
                            <param name="value" type="float" value="0.0" label="Value" help="The initial value for 'e0'. This should be close to zero, as it represents the difference in the absorption edge positions between simulation and experiment."/>
                            <expand macro="expr"/>
                            <expand macro="vary"/>
                        </section>
                        <section name="deltar" expanded="false" title="Delta R: change in path length">
                            <expand macro="name"/>
                            <param name="value" type="float" value="0.0" label="Value" help="The initial value for 'alpha'. This should be close to zero, as it represents the difference in the FEFF path and fitted (experimental) path length."/>
                            <expand macro="expr"/>
                            <expand macro="vary"/>
                        </section>
                        <section name="sigma2" expanded="false" title="Sigma2: mean squared displacement">
                            <expand macro="name"/>
                            <param name="value" type="float" value="0.003" label="Value" help="The initial value for 'sigma2'. This should be small, but non-zero, as atomic vibrations will result in variation in exact positions."/>
                            <expand macro="expr"/>
                            <expand macro="vary"/>
                        </section>
                    </repeat>
                </when>
                <when value="criteria">
                    <expand macro="max_number"/>
                    <expand macro="max_path_length"/>
                    <expand macro="min_amplitude_ratio"/>
                    <expand macro="max_degeneracy"/>
                </when>
                <when value="combinations">
                    <param name="min_combination_size" type="integer" value="1" min="1" label="Minimum combination size" help="Each combination will have at least this many paths. If less then this many paths match the below criteria, a single combination with all paths will be generated."/>
                    <param name="max_combination_size" type="integer" optional="true" label="Maximum combination size" help="Each combination will have at most this many paths. Note this is distinct from the number of paths to consider, below. One might want to consider combinations of the 5 shortest paths (set using 'Maximum number of paths'), but have no more than 3 paths in each combination (set using this)."/>
                    <expand macro="max_number"/>
                    <expand macro="max_path_length"/>
                    <expand macro="min_amplitude_ratio"/>
                    <expand macro="max_degeneracy"/>
                </when>
                <when value="manual">
                    <repeat name="paths" title="Select paths" help="Identify paths to use in the fitting by their id, and optionally define their variables. This will overwrite and defaults set above.">
                        <param name="id" type="integer" value="1" min="1" label="Path ID" help="Numerical id of a path to select, this appears at the end of the label and filename in the path summary CSV."/>
                        <section name="s02" expanded="false" title="S02: passive electron reduction factor">
                            <expand macro="name"/>
                            <param name="value" type="float" value="1.0" min="0.0" max="1.0" label="Value" help="The initial value for 's02'. This is typically between 0.7 and 1.0."/>
                            <expand macro="expr"/>
                            <expand macro="vary"/>
                        </section>
                        <section name="e0" expanded="false" title="E0: energy shift">
                            <expand macro="name"/>
                            <param name="value" type="float" value="0.0" label="Value" help="The initial value for 'e0'. This should be close to zero, as it represents the difference in the absorption edge positions between simulation and experiment."/>
                            <expand macro="expr"/>
                            <expand macro="vary"/>
                        </section>
                        <section name="deltar" expanded="false" title="Delta R: change in path length">
                            <expand macro="name"/>
                            <param name="value" type="float" value="0.0" label="Value" help="The initial value for 'alpha'. This should be close to zero, as it represents the difference in the FEFF path and fitted (experimental) path length."/>
                            <expand macro="expr"/>
                            <expand macro="vary"/>
                        </section>
                        <section name="sigma2" expanded="false" title="Sigma2: mean squared displacement">
                            <expand macro="name"/>
                            <param name="value" type="float" value="0.003" label="Value" help="The initial value for 'sigma2'. This should be small, but non-zero, as atomic vibrations will result in variation in exact positions."/>
                            <expand macro="expr"/>
                            <expand macro="vary"/>
                        </section>
                    </repeat>
                </when>
            </conditional>
        </repeat>
    </inputs>
    <outputs>
        <data name="merged_directories" format="zip" from_work_dir="merged.zip" label="Merged directories from ${on_string}">
            <filter>len(feff_outputs) > 1</filter>
        </data>
        <data name="gds_csv" format="gds" from_work_dir="gds.csv" label="GDS values for ${on_string}"/>
        <data name="sp_csv" format="sp" from_work_dir="sp.csv" label="Selected paths for ${on_string}">
            <filter>not any([f["selection"]["selection"] == "combinations" for f in feff_outputs])</filter>
        </data>
        <collection name="sp_collection" format="sp" type="list" label="Selected path combinations for ${on_string}">
            <discover_datasets pattern="__name_and_ext__" directory="sp"/>
            <filter>any([f["selection"]["selection"] == "combinations" for f in feff_outputs])</filter>
        </collection>
    </outputs>
    <tests>
        <!-- Test defaults for CSV with select_all -->
        <test expect_num_outputs="2">
            <param name="paths_zip" value="FEFF_paths.zip"/>
            <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
            <output name="gds_csv" file="gds_default.csv"/>
            <output name="sp_csv" file="sp_default.csv"/>
        </test>
        <!-- Test defaults for CSV with some selected rows -->
        <test expect_num_outputs="2">
            <param name="paths_zip" value="FEFF_paths.zip"/>
            <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
            <param name="selection" value="manual"/>
            <output name="gds_csv" file="gds_default.csv"/>
            <output name="sp_csv" file="sp_select_all_false.csv"/>
        </test>
        <!-- Test selected paths without custom GDS -->
        <test expect_num_outputs="2">
            <param name="paths_zip" value="FEFF_paths.zip"/>
            <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
            <param name="selection" value="manual"/>
            <param name="id" value="3"/>
            <output name="gds_csv" file="gds_default.csv"/>
            <output name="sp_csv" file="sp_include_path_3.csv"/>
        </test>
        <!-- Test selected paths with custom name but no GDS entry -->
        <test expect_num_outputs="2">
            <param name="paths_zip" value="FEFF_paths.zip"/>
            <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
            <param name="selection" value="manual"/>
            <param name="id" value="3"/>
            <section name="sigma2">
                <param name="name" value="custom_name"/>
            </section>
            <output name="gds_csv" file="gds_include_path_3_custom_name.csv"/>
            <output name="sp_csv" file="sp_include_path_3_custom_name.csv"/>
        </test>
        <!-- Test selected paths with custom GDS -->
        <test expect_num_outputs="2">
            <param name="paths_zip" value="FEFF_paths.zip"/>
            <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
            <param name="selection" value="manual"/>
            <param name="id" value="3"/>
            <repeat name="paths">
                <section name="sigma2">
                    <param name="name" value="custom_name"/>
                    <param name="value" value="0.005"/>
                    <param name="expr" value=""/>
                    <param name="vary" value="false"/>
                </section>
            </repeat>
            <output name="gds_csv" file="gds_include_path_3_custom_name_value.csv"/>
            <output name="sp_csv" file="sp_include_path_3_custom_name.csv"/>
        </test>
        <!-- Test changing default GDS values -->
        <test expect_num_outputs="2">
            <section name="variables">
                <section name="s02">
                    <param name="value" value="0.1"/>
                    <param name="vary" value="false"/>
                </section>
                <section name="e0">
                    <param name="value" value="0.1"/>
                    <param name="vary" value="true"/>
                </section>
                <section name="deltar">
                    <param name="value" value="10"/>
                    <param name="vary" value="false"/>
                </section>
            </section>
            <param name="paths_zip" value="FEFF_paths.zip"/>
            <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
            <param name="selection" value="manual"/>
            <output name="gds_csv" file="gds_altered_defaults.csv"/>
            <output name="sp_csv" file="sp_select_all_false.csv"/>
        </test>
        <!-- Test merging defaults -->
        <test expect_num_outputs="3">
            <repeat name="feff_outputs">
                <param name="paths_zip" value="FEFF_paths.zip"/>
                <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
            </repeat>
            <repeat name="feff_outputs">
                <param name="paths_zip" value="FEFF_paths.zip"/>
                <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
            </repeat>
            <output name="merged_directories">
                <assert_contents>
                    <has_size value="206000" delta="100"/>
                </assert_contents>
            </output>
            <output name="gds_csv" file="gds_default.csv"/>
            <output name="sp_csv" file="sp_merge_default.csv"/>
        </test>
        <!-- Test merging custom arguments -->
        <test expect_num_outputs="3">
            <repeat name="feff_outputs">
                <param name="label" value="primary"/>
                <param name="paths_zip" value="FEFF_paths.zip"/>
                <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
                <conditional name="selection">
                    <param name="selection" value="manual"/>
                    <repeat name="paths">
                        <param name="id" value="3"/>
                        <section name="sigma2">
                            <param name="name" value="custom_name_1"/>
                        </section>
                    </repeat>
                </conditional>
            </repeat>
            <repeat name="feff_outputs">
                <param name="label" value="secondary"/>
                <param name="paths_zip" value="FEFF_paths.zip"/>
                <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
                <conditional name="selection">
                    <param name="selection" value="manual"/>
                    <repeat name="paths">
                        <param name="id" value="3"/>
                        <section name="sigma2">
                            <param name="name" value="custom_name_2"/>
                        </section>
                    </repeat>
                </conditional>
            </repeat>
            <output name="merged_directories">
                <assert_contents>
                    <has_size value="206500" delta="100"/>
                </assert_contents>
            </output>
            <output name="gds_csv" file="gds_merge_custom.csv"/>
            <output name="sp_csv" file="sp_merge_custom.csv"/>
        </test>
        <!-- Test for criteria based selection -->
        <test expect_num_outputs="2">
            <repeat name="feff_outputs">
                <param name="paths_zip" value="FEFF_paths.zip"/>
                <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
                <conditional name="selection">
                    <param name="selection" value="criteria"/>
                    <param name="min_amplitude_ratio" value="20"/>
                    <param name="max_degeneracy" value="4"/>
                </conditional>
            </repeat>
            <output name="gds_csv" file="gds_default.csv"/>
            <output name="sp_csv" file="sp_criteria.csv"/>
        </test>
        <!-- Test for combinations based selection -->
        <test expect_num_outputs="3">
            <!-- Should result in 4 + 6 + 4 + 1 = 15 combinations -->
            <repeat name="feff_outputs">
                <param name="paths_zip" value="FEFF_paths.zip"/>
                <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
                <conditional name="selection">
                    <param name="selection" value="combinations"/>
                    <param name="min_amplitude_ratio" value="20"/>
                </conditional>
            </repeat>
            <!-- Should result in 3 combinations -->
            <repeat name="feff_outputs">
                <param name="paths_zip" value="FEFF_paths.zip"/>
                <param name="paths_file" value="[CSV_summary_of_1564889.cif].csv"/>
                <conditional name="selection">
                    <param name="selection" value="combinations"/>
                    <param name="min_combination_size" value="2"/>
                    <param name="max_combination_size" value="2"/>
                    <param name="max_number" value="3"/>
                </conditional>
            </repeat>
            <output name="merged_directories">
                <assert_contents>
                    <has_size value="206000" delta="1000"/>
                </assert_contents>
            </output>
            <output name="gds_csv" file="gds_default.csv"/>
            <!-- Should get 15 * 3 = 45 combinations in total -->
            <output_collection name="sp_collection" type="list" count="45"/>
        </test>
    </tests>
    <help><![CDATA[
        Select FEFF scattering paths to use in the fitting process.

        If paths from multiple different FEFF outputs are of interest (for example, corresponding to different structural files), then additional FEFF outputs can be added.
        Each requires its own zip directory and path summary CSV, and any custom GDS parameters will be uniquely labelled with the label provided or a numerical default.
        In this case the zipped directories will also be merged into one output containing all paths and associated files.

        If only one set of FEFF outputs is provided, labelling is not required and the existing zip file can be used as the input to Larch Artemis.

        If the selection method "All paths" is chosen, or an individual row in the CSV with ``select`` is set to ``1``, it will be automatically used, with the default values defined.
        This can be useful when many paths are needed and using the UI can be cumbersome.

        It is also possible to manually check and further modify the GDS and SP output CSVs to ensure the values are suitable, as an alternative to re-running this tool.
    ]]></help>
    <citations>
        <citation type="doi">@TOOL_CITATION@</citation>
        <citation type="doi">10.1107/S0909049505012719</citation>
    </citations>
</tool>