Mercurial > repos > galaxyp > openms_mapalignerspectrum

<!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTDConverter.-->
<!--Proposed Tool Section: [Map Alignment]-->
<tool id="MapAlignerSpectrum" name="MapAlignerSpectrum" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
  <description>Corrects retention time distortions between maps by spectrum alignment</description>
  <macros>
    <token name="@EXECUTABLE@">MapAlignerSpectrum</token>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements"/>
  <expand macro="stdio"/>
  <command detect_errors="exit_code"><![CDATA[@QUOTE_FOO@
@EXT_FOO@
#import re

## Preprocessing
mkdir in_cond.in &&
#if $in_cond.in_select == "no"
mkdir ${' '.join(["'in_cond.in/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} &&
${' '.join(["cp '%s' 'in_cond.in/%s/%s.%s' && " % (f, i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])}
#else
cp '$in_cond.in' 'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)' &&
#end if
#if "out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  mkdir out &&
  mkdir ${' '.join(["'out/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} &&
#end if
#if "trafo_out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  mkdir trafo_out &&
  mkdir ${' '.join(["'trafo_out/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} &&
#end if

## Main program call

set -o pipefail &&
@EXECUTABLE@ -write_ctd ./ &&
python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' &&
@EXECUTABLE@ -ini @EXECUTABLE@.ctd
-in
#if $in_cond.in_select == "no"
${' '.join(["'in_cond.in/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])}
#else
'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)'
#end if
#if "out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  -out
  ${' '.join(["'out/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext("mzml")) for i, f in enumerate($in_cond.in) if f])}
#end if
#if "trafo_out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  -trafo_out
  ${' '.join(["'trafo_out/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext("trafoxml")) for i, f in enumerate($in_cond.in) if f])}
#end if
#if len(str($OPTIONAL_OUTPUTS).split(',')) == 0
  | tee '$stdout'
#end if

## Postprocessing
#if "out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  ${' '.join(["&& mv -n 'out/%(bn)s/%(id)s.%(gext)s' 'out/%(bn)s/%(id)s'"%{"bn": i, "id": re.sub('[^\w\-_]', '_', f.element_identifier), "gext": $gxy2omsext("mzml")} for i, f in enumerate($in_cond.in) if f])}
#end if
#if "trafo_out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  ${' '.join(["&& mv -n 'trafo_out/%(bn)s/%(id)s.%(gext)s' 'trafo_out/%(bn)s/%(id)s'"%{"bn": i, "id": re.sub('[^\w\-_]', '_', f.element_identifier), "gext": $gxy2omsext("trafoxml")} for i, f in enumerate($in_cond.in) if f])}
#end if
#if "ctd_out_FLAG" in $OPTIONAL_OUTPUTS
  && mv '@EXECUTABLE@.ctd' '$ctd_out'
#end if]]></command>
  <configfiles>
    <inputs name="args_json" data_style="paths"/>
    <configfile name="hardcoded_json"><![CDATA[{"log": "log.txt", "threads": "\${GALAXY_SLOTS:-1}", "no_progress": true}]]></configfile>
  </configfiles>
  <inputs>
    <conditional name="in_cond">
      <param name="in_select" type="select" label="Run tool in batch mode for -in">
        <option value="no">No: process all datasets jointly</option>
        <option value="yes">Yes: process each dataset in an independent job</option>
      </param>
      <when value="no">
        <param argument="-in" type="data" format="mzml" multiple="true" label="Input files to align (all must have the same file type)" help=" select mzml data sets(s)"/>
      </when>
      <when value="yes">
        <param argument="-in" type="data" format="mzml" label="Input files to align (all must have the same file type)" help=" select mzml data sets(s)"/>
      </when>
    </conditional>
    <section name="algorithm" title="Algorithm parameters section" help="" expanded="false">
      <param name="gapcost" argument="-algorithm:gapcost" type="float" min="0.0" value="1.0" label="This Parameter stands for the cost of opening a gap in the Alignment" help="A gap means that one spectrum can not be aligned directly to another spectrum in the Map. This happens, when the similarity of both spectra a too low or even not present. Imagine it as a insert or delete of the spectrum in the map (similar to sequence alignment). The gap is necessary for aligning, if we open a gap there is a possibility that an another spectrum can be correct aligned with a higher score as before without gap. But to open a gap is a negative event and needs to carry a punishment, so a gap should only be opened if the benefits outweigh the downsides. The Parameter is to giving as a positive number, the implementation convert it to a negative number"/>
      <param name="affinegapcost" argument="-algorithm:affinegapcost" type="float" min="0.0" value="0.5" label="This Parameter controls the cost of extension a already open gap" help="The idea behind the affine gapcost lies under the assumption, that it is better to get a long distance of connected gaps than to have a structure of gaps interspersed with matches (gap match gap match etc.). Therefore the punishment for the extension of a gap generally should be lower than the normal gapcost. If the result of the alignment shows high compression, it is a good idea to lower either the affine gapcost or gap opening cost"/>
      <param name="cutoff_score" argument="-algorithm:cutoff_score" type="float" min="0.0" max="1.0" value="0.7" label="The Parameter defines the threshold which filtered spectra, these spectra are high potential candidate for deciding the interval of a sub-alignment" help="Only those pair of spectra are selected, which has a score higher or same of the threshold"/>
      <param name="bucketsize" argument="-algorithm:bucketsize" type="integer" min="1" value="100" label="Defines the numbers of buckets" help="It is a quantize of the interval of those points, which defines the main alignment (match points). These points have to filtered, to reduce the amount of points for the calculating a smoother spline curve"/>
      <param name="anchorpoints" argument="-algorithm:anchorpoints" type="integer" min="1" max="100" value="100" label="Defines the percent of numbers of match points which a selected from one bucket" help="The high score pairs are previously selected. The reduction of match points helps to get a smoother spline curve"/>
      <param name="mismatchscore" argument="-algorithm:mismatchscore" type="float" max="0.0" value="-5.0" label="Defines the score of two spectra if they have no similarity to each othe" help=""/>
      <param name="scorefunction" argument="-algorithm:scorefunction" type="select" label="The score function is the core of an alignment" help="The success of an alignment depends mostly of the elected score function. The score function return the similarity of two spectra. The score influence defines later the way of possible traceback. There are multiple spectra similarity scores available">
        <option value="SteinScottImproveScore" selected="true">SteinScottImproveScore</option>
        <option value="ZhangSimilarityScore">ZhangSimilarityScore</option>
        <expand macro="list_string_san" name="scorefunction"/>
      </param>
    </section>
    <section name="model" title="Options to control the modeling of retention time transformations from data" help="" expanded="false">
      <param name="type" argument="-model:type" type="select" label="Type of model" help="">
        <option value="linear">linear</option>
        <option value="b_spline">b_spline</option>
        <option value="lowess">lowess</option>
        <option value="interpolated" selected="true">interpolated</option>
        <expand macro="list_string_san" name="type"/>
      </param>
      <section name="linear" title="Parameters for 'linear' model" help="" expanded="false">
        <param name="symmetric_regression" argument="-model:linear:symmetric_regression" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Perform linear regression on 'y - x' vs" help="'y + x', instead of on 'y' vs. 'x'"/>
        <param name="x_weight" argument="-model:linear:x_weight" type="select" label="Weight x values" help="">
          <option value="1/x">1/x</option>
          <option value="1/x2">1/x2</option>
          <option value="ln(x)">ln(x)</option>
          <option value="x" selected="true">x</option>
          <expand macro="list_string_san" name="x_weight"/>
        </param>
        <param name="y_weight" argument="-model:linear:y_weight" type="select" label="Weight y values" help="">
          <option value="1/y">1/y</option>
          <option value="1/y2">1/y2</option>
          <option value="ln(y)">ln(y)</option>
          <option value="y" selected="true">y</option>
          <expand macro="list_string_san" name="y_weight"/>
        </param>
        <param name="x_datum_min" argument="-model:linear:x_datum_min" type="float" value="1e-15" label="Minimum x value" help=""/>
        <param name="x_datum_max" argument="-model:linear:x_datum_max" type="float" value="1000000000000000.0" label="Maximum x value" help=""/>
        <param name="y_datum_min" argument="-model:linear:y_datum_min" type="float" value="1e-15" label="Minimum y value" help=""/>
        <param name="y_datum_max" argument="-model:linear:y_datum_max" type="float" value="1000000000000000.0" label="Maximum y value" help=""/>
      </section>
      <section name="b_spline" title="Parameters for 'b_spline' model" help="" expanded="false">
        <param name="wavelength" argument="-model:b_spline:wavelength" type="float" min="0.0" value="0.0" label="Determines the amount of smoothing by setting the number of nodes for the B-spline" help="The number is chosen so that the spline approximates a low-pass filter with this cutoff wavelength. The wavelength is given in the same units as the data; a higher value means more smoothing. '0' sets the number of nodes to twice the number of input points"/>
        <param name="num_nodes" argument="-model:b_spline:num_nodes" type="integer" min="0" value="5" label="Number of nodes for B-spline fitting" help="Overrides 'wavelength' if set (to two or greater). A lower value means more smoothing"/>
        <param name="extrapolate" argument="-model:b_spline:extrapolate" type="select" label="Method to use for extrapolation beyond the original data range" help="'linear': Linear extrapolation using the slope of the B-spline at the corresponding endpoint. 'b_spline': Use the B-spline (as for interpolation). 'constant': Use the constant value of the B-spline at the corresponding endpoint. 'global_linear': Use a linear fit through the data (which will most probably introduce discontinuities at the ends of the data range)">
          <option value="linear" selected="true">linear</option>
          <option value="b_spline">b_spline</option>
          <option value="constant">constant</option>
          <option value="global_linear">global_linear</option>
          <expand macro="list_string_san" name="extrapolate"/>
        </param>
        <param name="boundary_condition" argument="-model:b_spline:boundary_condition" type="integer" min="0" max="2" value="2" label="Boundary condition at B-spline endpoints: 0 (value zero), 1 (first derivative zero) or 2 (second derivative zero)" help=""/>
      </section>
      <section name="lowess" title="Parameters for 'lowess' model" help="" expanded="false">
        <param name="span" argument="-model:lowess:span" type="float" min="0.0" max="1.0" value="0.666666666666667" label="Fraction of datapoints (f) to use for each local regression (determines the amount of smoothing)" help="Choosing this parameter in the range .2 to .8 usually results in a good fit"/>
        <param name="num_iterations" argument="-model:lowess:num_iterations" type="integer" min="0" value="3" label="Number of robustifying iterations for lowess fitting" help=""/>
        <param name="delta" argument="-model:lowess:delta" type="float" value="-1.0" label="Nonnegative parameter which may be used to save computations (recommended value is 0.01 of the range of the input" help="e.g. for data ranging from 1000 seconds to 2000 seconds, it could be set to 10). Setting a negative value will automatically do this"/>
        <param name="interpolation_type" argument="-model:lowess:interpolation_type" type="select" label="Method to use for interpolation between datapoints computed by lowess" help="'linear': Linear interpolation. 'cspline': Use the cubic spline for interpolation. 'akima': Use an akima spline for interpolation">
          <option value="linear">linear</option>
          <option value="cspline" selected="true">cspline</option>
          <option value="akima">akima</option>
          <expand macro="list_string_san" name="interpolation_type"/>
        </param>
        <param name="extrapolation_type" argument="-model:lowess:extrapolation_type" type="select" label="Method to use for extrapolation outside the data range" help="'two-point-linear': Uses a line through the first and last point to extrapolate. 'four-point-linear': Uses a line through the first and second point to extrapolate in front and and a line through the last and second-to-last point in the end. 'global-linear': Uses a linear regression to fit a line through all data points and use it for interpolation">
          <option value="two-point-linear">two-point-linear</option>
          <option value="four-point-linear" selected="true">four-point-linear</option>
          <option value="global-linear">global-linear</option>
          <expand macro="list_string_san" name="extrapolation_type"/>
        </param>
      </section>
      <section name="interpolated" title="Parameters for 'interpolated' model" help="" expanded="false">
        <param name="interpolation_type" argument="-model:interpolated:interpolation_type" type="select" label="Type of interpolation to apply" help="">
          <option value="linear">linear</option>
          <option value="cspline" selected="true">cspline</option>
          <option value="akima">akima</option>
          <expand macro="list_string_san" name="interpolation_type"/>
        </param>
        <param name="extrapolation_type" argument="-model:interpolated:extrapolation_type" type="select" label="Type of extrapolation to apply: two-point-linear: use the first and last data point to build a single linear model, four-point-linear: build two linear models on both ends using the first two / last two points, global-linear: use all points to build a single linear model" help="Note that global-linear may not be continuous at the border">
          <option value="two-point-linear" selected="true">two-point-linear</option>
          <option value="four-point-linear">four-point-linear</option>
          <option value="global-linear">global-linear</option>
          <expand macro="list_string_san" name="extrapolation_type"/>
        </param>
      </section>
    </section>
    <expand macro="adv_opts_macro">
      <param argument="-force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Overrides tool-specific checks" help=""/>
      <param argument="-test" type="hidden" value="False" label="Enables the test mode (needed for internal use only)" help="" optional="true">
        <expand macro="list_string_san" name="test"/>
      </param>
    </expand>
    <param name="OPTIONAL_OUTPUTS" type="select" optional="true" multiple="true" label="Optional outputs">
      <option value="out_FLAG">out (Output files (same file type as 'in'))</option>
      <option value="trafo_out_FLAG">trafo_out (Transformation output files)</option>
      <option value="ctd_out_FLAG">Output used ctd (ini) configuration file</option>
    </param>
  </inputs>
  <outputs>
    <collection type="list" name="out" label="${tool.name} on ${on_string}: out">
      <discover_datasets directory="out" recurse="true" format="mzml" pattern="__name__"/>
      <filter>OPTIONAL_OUTPUTS is not None and "out_FLAG" in OPTIONAL_OUTPUTS</filter>
    </collection>
    <collection type="list" name="trafo_out" label="${tool.name} on ${on_string}: trafo_out">
      <discover_datasets directory="trafo_out" recurse="true" format="trafoxml" pattern="__name__"/>
      <filter>OPTIONAL_OUTPUTS is not None and "trafo_out_FLAG" in OPTIONAL_OUTPUTS</filter>
    </collection>
    <data name="stdout" format="txt" label="${tool.name} on ${on_string}: stdout">
      <filter>OPTIONAL_OUTPUTS is None</filter>
    </data>
    <data name="ctd_out" format="xml" label="${tool.name} on ${on_string}: ctd">
      <filter>OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS</filter>
    </data>
  </outputs>
  <tests>
    <!-- TOPP_MapAlignerSpectrum_1 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="MapAlignerSpectrum_1_input1.mzML,MapAlignerSpectrum_1_input2.mzML,MapAlignerSpectrum_1_input3.mzML"/>
      </conditional>
      <output_collection name="out" count="3"/>
      <section name="algorithm">
        <param name="gapcost" value="1.0"/>
        <param name="affinegapcost" value="0.5"/>
        <param name="cutoff_score" value="0.7"/>
        <param name="bucketsize" value="100"/>
        <param name="anchorpoints" value="100"/>
        <param name="mismatchscore" value="-5.0"/>
        <param name="scorefunction" value="SteinScottImproveScore"/>
      </section>
      <section name="model">
        <param name="type" value="interpolated"/>
        <section name="linear">
          <param name="symmetric_regression" value="false"/>
          <param name="x_weight" value="x"/>
          <param name="y_weight" value="y"/>
          <param name="x_datum_min" value="1e-15"/>
          <param name="x_datum_max" value="1000000000000000.0"/>
          <param name="y_datum_min" value="1e-15"/>
          <param name="y_datum_max" value="1000000000000000.0"/>
        </section>
        <section name="b_spline">
          <param name="wavelength" value="0.0"/>
          <param name="num_nodes" value="5"/>
          <param name="extrapolate" value="linear"/>
          <param name="boundary_condition" value="2"/>
        </section>
        <section name="lowess">
          <param name="span" value="0.666666666666667"/>
          <param name="num_iterations" value="3"/>
          <param name="delta" value="-1.0"/>
          <param name="interpolation_type" value="cspline"/>
          <param name="extrapolation_type" value="four-point-linear"/>
        </section>
        <section name="interpolated">
          <param name="interpolation_type" value="cspline"/>
          <param name="extrapolation_type" value="two-point-linear"/>
        </section>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
  </tests>
  <help><![CDATA[Corrects retention time distortions between maps by spectrum alignment.


For more information, visit https://openms.de/doxygen/release/3.1.0/html/TOPP_MapAlignerSpectrum.html]]></help>
  <expand macro="references"/>
</tool>
author	galaxyp
date	Fri, 14 Jun 2024 21:40:27 +0000
parents	085863ba8f0e
children