view MapAlignerPoseClustering.xml @ 10:246847b3a188 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 3d1e5f37fd16524a415f707772eeb7ead848c5e3
author galaxyp
date Thu, 01 Dec 2022 19:16:48 +0000
parents d17db97d139f
children
line wrap: on
line source

<?xml version='1.0' encoding='UTF-8'?>
<!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTDConverter.-->
<!--Proposed Tool Section: [Map Alignment]-->
<tool id="MapAlignerPoseClustering" name="MapAlignerPoseClustering" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
  <description>Corrects retention time distortions between maps using a pose clustering approach.</description>
  <macros>
    <token name="@EXECUTABLE@">MapAlignerPoseClustering</token>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements"/>
  <expand macro="stdio"/>
  <command detect_errors="exit_code"><![CDATA[@QUOTE_FOO@
@EXT_FOO@
#import re

## Preprocessing
mkdir in_cond.in &&
#if $in_cond.in_select == "no"
mkdir ${' '.join(["'in_cond.in/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} && 
${' '.join(["ln -s '%s' 'in_cond.in/%s/%s.%s' && " % (f, i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])}
#else
ln -s '$in_cond.in' 'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)' &&
#end if
#if "out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  mkdir out &&
  mkdir ${' '.join(["'out/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} && 
#end if
#if "trafo_out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  mkdir trafo_out &&
  mkdir ${' '.join(["'trafo_out/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} && 
#end if
#if $reference.file:
  mkdir reference.file &&
  ln -s '$reference.file' 'reference.file/${re.sub("[^\w\-_]", "_", $reference.file.element_identifier)}.$gxy2omsext($reference.file.ext)' &&
#end if

## Main program call

set -o pipefail &&
@EXECUTABLE@ -write_ctd ./ &&
python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' &&
@EXECUTABLE@ -ini @EXECUTABLE@.ctd
-in
#if $in_cond.in_select == "no"
${' '.join(["'in_cond.in/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])}
#else
'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)'
#end if
#if "out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  -out
  ${' '.join(["'out/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), f.ext) for i, f in enumerate($in_cond.in) if f])}
#end if
#if "trafo_out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  -trafo_out
  ${' '.join(["'trafo_out/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext("trafoxml")) for i, f in enumerate($in_cond.in) if f])}
#end if
#if $reference.file:
  -reference:file
  'reference.file/${re.sub("[^\w\-_]", "_", $reference.file.element_identifier)}.$gxy2omsext($reference.file.ext)'
#end if
#if len(str($OPTIONAL_OUTPUTS).split(',')) == 0
  | tee '$stdout'
#end if

## Postprocessing
#if "trafo_out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  ${' '.join(["&& mv -n 'trafo_out/%(bn)s/%(id)s.%(gext)s' 'trafo_out/%(bn)s/%(id)s'"%{"bn": i, "id": re.sub('[^\w\-_]', '_', f.element_identifier), "gext": $gxy2omsext("trafoxml")} for i, f in enumerate($in_cond.in) if f])}
#end if
#if "ctd_out_FLAG" in $OPTIONAL_OUTPUTS
  && mv '@EXECUTABLE@.ctd' '$ctd_out'
#end if]]></command>
  <configfiles>
    <inputs name="args_json" data_style="paths"/>
    <configfile name="hardcoded_json"><![CDATA[{"log": "log.txt", "threads": "\${GALAXY_SLOTS:-1}", "no_progress": true}]]></configfile>
  </configfiles>
  <inputs>
    <conditional name="in_cond">
      <param name="in_select" type="select" label="Run tool in batch mode for -in">
        <option value="no">No: process all datasets jointly</option>
        <option value="yes">Yes: process each dataset in an independent job</option>
      </param>
      <when value="no">
        <param argument="-in" type="data" format="featurexml,mzml" multiple="true" optional="false" label="Input files to align (all must have the same file type)" help=" select featurexml,mzml data sets(s)"/>
      </when>
      <when value="yes">
        <param argument="-in" type="data" format="featurexml,mzml" multiple="false" optional="false" label="Input files to align (all must have the same file type)" help=" select featurexml,mzml data sets(s)"/>
      </when>
    </conditional>
    <section name="reference" title="Options to define a reference file (use either 'file' or 'index', not both)" help="" expanded="false">
      <param name="file" argument="-reference:file" type="data" format="featurexml,mzml" optional="true" label="File to use as reference (same file format as input files required)" help=" select featurexml,mzml data sets(s)"/>
      <param name="index" argument="-reference:index" type="integer" optional="true" min="0" value="0" label="Use one of the input files as reference ('1' for the first file, etc.)" help="If '0', no explicit reference is set - the algorithm will select a reference"/>
    </section>
    <section name="algorithm" title="Algorithm parameters section" help="" expanded="false">
      <param name="max_num_peaks_considered" argument="-algorithm:max_num_peaks_considered" type="integer" optional="true" min="-1" value="1000" label="The maximal number of peaks/features to be considered per map" help="To use all, set to '-1'"/>
      <section name="superimposer" title="" help="" expanded="false">
        <param name="mz_pair_max_distance" argument="-algorithm:superimposer:mz_pair_max_distance" type="float" optional="true" min="0.0" value="0.5" label="Maximum of m/z deviation of corresponding elements in different maps" help="This condition applies to the pairs considered in hashing"/>
        <param name="rt_pair_distance_fraction" argument="-algorithm:superimposer:rt_pair_distance_fraction" type="float" optional="true" min="0.0" max="1.0" value="0.1" label="Within each of the two maps, the pairs considered for pose clustering must be separated by at least this fraction of the total elution time interval" help="(i.e., max - min).  "/>
        <param name="num_used_points" argument="-algorithm:superimposer:num_used_points" type="integer" optional="true" min="-1" value="2000" label="Maximum number of elements considered in each map (selected by intensity)" help="Use this to reduce the running time and to disregard weak signals during alignment.  For using all points, set this to -1"/>
        <param name="scaling_bucket_size" argument="-algorithm:superimposer:scaling_bucket_size" type="float" optional="true" min="0.0" value="0.005" label="The scaling of the retention time interval is being hashed into buckets of this size during pose clustering" help="A good choice for this would be a bit smaller than the error you would expect from repeated runs"/>
        <param name="shift_bucket_size" argument="-algorithm:superimposer:shift_bucket_size" type="float" optional="true" min="0.0" value="3.0" label="The shift at the lower (respectively, higher) end of the retention time interval is being hashed into buckets of this size during pose clustering" help="A good choice for this would be about the time between consecutive MS scans"/>
        <param name="max_shift" argument="-algorithm:superimposer:max_shift" type="float" optional="true" min="0.0" value="1000.0" label="Maximal shift which is considered during histogramming (in seconds)" help="This applies for both directions"/>
        <param name="max_scaling" argument="-algorithm:superimposer:max_scaling" type="float" optional="true" min="1.0" value="2.0" label="Maximal scaling which is considered during histogramming" help="The minimal scaling is the reciprocal of this"/>
        <param name="dump_buckets" argument="-algorithm:superimposer:dump_buckets" type="text" optional="true" value="" label="[DEBUG] If non-empty, base filename where hash table buckets will be dumped to" help="A serial number for each invocation will be appended automatically">
          <expand macro="list_string_san" name="dump_buckets"/>
        </param>
        <param name="dump_pairs" argument="-algorithm:superimposer:dump_pairs" type="text" optional="true" value="" label="[DEBUG] If non-empty, base filename where the individual hashed pairs will be dumped to (large!)" help="A serial number for each invocation will be appended automatically">
          <expand macro="list_string_san" name="dump_pairs"/>
        </param>
      </section>
      <section name="pairfinder" title="" help="" expanded="false">
        <param name="second_nearest_gap" argument="-algorithm:pairfinder:second_nearest_gap" type="float" optional="true" min="1.0" value="2.0" label="Only link features whose distance to the second nearest neighbors (for both sides) is larger by 'second_nearest_gap' than the distance between the matched pair itself" help=""/>
        <param name="use_identifications" argument="-algorithm:pairfinder:use_identifications" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Never link features that are annotated with different peptides (features without ID's always match; only the best hit per peptide identification is considered)" help=""/>
        <param name="ignore_charge" argument="-algorithm:pairfinder:ignore_charge" type="boolean" truevalue="true" falsevalue="false" checked="false" label="false [default]: pairing requires equal charge state (or at least one unknown charge '0'); true: Pairing irrespective of charge state" help=""/>
        <param name="ignore_adduct" argument="-algorithm:pairfinder:ignore_adduct" type="boolean" truevalue="true" falsevalue="false" checked="true" label="true [default]: pairing requires equal adducts (or at least one without adduct annotation); true: Pairing irrespective of adducts" help=""/>
        <section name="distance_RT" title="Distance component based on RT differences" help="" expanded="false">
          <param name="max_difference" argument="-algorithm:pairfinder:distance_RT:max_difference" type="float" optional="true" min="0.0" value="100.0" label="Never pair features with a larger RT distance (in seconds)" help=""/>
          <param name="exponent" argument="-algorithm:pairfinder:distance_RT:exponent" type="float" optional="true" min="0.0" value="1.0" label="Normalized RT differences ([0-1], relative to 'max_difference') are raised to this power (using 1 or 2 will be fast, everything else is REALLY slow)" help=""/>
          <param name="weight" argument="-algorithm:pairfinder:distance_RT:weight" type="float" optional="true" min="0.0" value="1.0" label="Final RT distances are weighted by this facto" help=""/>
        </section>
        <section name="distance_MZ" title="Distance component based on m/z differences" help="" expanded="false">
          <param name="max_difference" argument="-algorithm:pairfinder:distance_MZ:max_difference" type="float" optional="true" min="0.0" value="0.3" label="Never pair features with larger m/z distance (unit defined by 'unit')" help=""/>
          <param name="unit" argument="-algorithm:pairfinder:distance_MZ:unit" type="select" optional="true" label="Unit of the 'max_difference' paramete" help="">
            <option value="Da" selected="true">Da</option>
            <option value="ppm">ppm</option>
            <expand macro="list_string_san" name="unit"/>
          </param>
          <param name="exponent" argument="-algorithm:pairfinder:distance_MZ:exponent" type="float" optional="true" min="0.0" value="2.0" label="Normalized ([0-1], relative to 'max_difference') m/z differences are raised to this power (using 1 or 2 will be fast, everything else is REALLY slow)" help=""/>
          <param name="weight" argument="-algorithm:pairfinder:distance_MZ:weight" type="float" optional="true" min="0.0" value="1.0" label="Final m/z distances are weighted by this facto" help=""/>
        </section>
        <section name="distance_intensity" title="Distance component based on differences in relative intensity (usually relative to highest peak in the whole data set)" help="" expanded="false">
          <param name="exponent" argument="-algorithm:pairfinder:distance_intensity:exponent" type="float" optional="true" min="0.0" value="1.0" label="Differences in relative intensity ([0-1]) are raised to this power (using 1 or 2 will be fast, everything else is REALLY slow)" help=""/>
          <param name="weight" argument="-algorithm:pairfinder:distance_intensity:weight" type="float" optional="true" min="0.0" value="0.0" label="Final intensity distances are weighted by this facto" help=""/>
          <param name="log_transform" argument="-algorithm:pairfinder:distance_intensity:log_transform" type="select" optional="true" label="Log-transform intensities" help="If disabled, d = |int_f2 - int_f1| / int_max. If enabled, d = |log(int_f2 + 1) - log(int_f1 + 1)| / log(int_max + 1))">
            <option value="enabled">enabled</option>
            <option value="disabled" selected="true">disabled</option>
            <expand macro="list_string_san" name="log_transform"/>
          </param>
        </section>
      </section>
    </section>
    <expand macro="adv_opts_macro">
      <param argument="-force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Overrides tool-specific checks" help=""/>
      <param argument="-test" type="hidden" optional="true" value="False" label="Enables the test mode (needed for internal use only)" help="">
        <expand macro="list_string_san" name="test"/>
      </param>
    </expand>
    <param name="OPTIONAL_OUTPUTS" type="select" optional="true" multiple="true" label="Optional outputs">
      <option value="out_FLAG">out (Output files (same file type as 'in'))</option>
      <option value="trafo_out_FLAG">trafo_out (Transformation output files)</option>
      <option value="ctd_out_FLAG">Output used ctd (ini) configuration file</option>
    </param>
  </inputs>
  <outputs>
    <collection type="list" name="out" label="${tool.name} on ${on_string}: out">
      <discover_datasets directory="out" recurse="true" pattern="__name_and_ext__"/>
      <filter>OPTIONAL_OUTPUTS is not None and "out_FLAG" in OPTIONAL_OUTPUTS</filter>
    </collection>
    <collection type="list" name="trafo_out" label="${tool.name} on ${on_string}: trafo_out">
      <discover_datasets directory="trafo_out" recurse="true" format="trafoxml" pattern="__name__"/>
      <filter>OPTIONAL_OUTPUTS is not None and "trafo_out_FLAG" in OPTIONAL_OUTPUTS</filter>
    </collection>
    <data name="stdout" format="txt" label="${tool.name} on ${on_string}: stdout">
      <filter>OPTIONAL_OUTPUTS is None</filter>
    </data>
    <data name="ctd_out" format="xml" label="${tool.name} on ${on_string}: ctd">
      <filter>OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS</filter>
    </data>
  </outputs>
  <tests><!-- TOPP_MapAlignerPoseClustering_1 -->
    <test expect_num_outputs="3">
      <section name="adv_opts">
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="MapAlignerPoseClustering_1_input1.featureXML,MapAlignerPoseClustering_1_input2.featureXML,MapAlignerPoseClustering_1_input3.featureXML"/>
      </conditional>
      <output_collection name="out" count="3"/>
      <output_collection name="trafo_out" count="3"/>
      <section name="reference">
        <param name="index" value="0"/>
      </section>
      <section name="algorithm">
        <param name="max_num_peaks_considered" value="400"/>
        <section name="superimposer">
          <param name="mz_pair_max_distance" value="0.5"/>
          <param name="rt_pair_distance_fraction" value="0.1"/>
          <param name="num_used_points" value="2000"/>
          <param name="scaling_bucket_size" value="0.005"/>
          <param name="shift_bucket_size" value="3.0"/>
          <param name="max_shift" value="1000.0"/>
          <param name="max_scaling" value="2.0"/>
          <param name="dump_buckets" value=""/>
          <param name="dump_pairs" value=""/>
        </section>
        <section name="pairfinder">
          <param name="second_nearest_gap" value="2.0"/>
          <param name="use_identifications" value="false"/>
          <param name="ignore_charge" value="true"/>
          <param name="ignore_adduct" value="true"/>
          <section name="distance_RT">
            <param name="max_difference" value="30.0"/>
            <param name="exponent" value="1.0"/>
            <param name="weight" value="1.0"/>
          </section>
          <section name="distance_MZ">
            <param name="max_difference" value="0.3"/>
            <param name="unit" value="Da"/>
            <param name="exponent" value="2.0"/>
            <param name="weight" value="1.0"/>
          </section>
          <section name="distance_intensity">
            <param name="exponent" value="1.0"/>
            <param name="weight" value="0.0"/>
            <param name="log_transform" value="disabled"/>
          </section>
        </section>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,out_FLAG,trafo_out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
    </test>
    <!-- TOPP_MapAlignerPoseClustering_2 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="MapAlignerPoseClustering_2_input1.mzML,MapAlignerPoseClustering_2_input2.mzML,MapAlignerPoseClustering_2_input3.mzML"/>
      </conditional>
      <output_collection name="out" count="3"/>
      <section name="reference">
        <param name="index" value="0"/>
      </section>
      <section name="algorithm">
        <param name="max_num_peaks_considered" value="400"/>
        <section name="superimposer">
          <param name="mz_pair_max_distance" value="0.5"/>
          <param name="rt_pair_distance_fraction" value="0.1"/>
          <param name="num_used_points" value="2000"/>
          <param name="scaling_bucket_size" value="0.005"/>
          <param name="shift_bucket_size" value="3.0"/>
          <param name="max_shift" value="1000.0"/>
          <param name="max_scaling" value="2.0"/>
          <param name="dump_buckets" value=""/>
          <param name="dump_pairs" value=""/>
        </section>
        <section name="pairfinder">
          <param name="second_nearest_gap" value="2.0"/>
          <param name="use_identifications" value="false"/>
          <param name="ignore_charge" value="false"/>
          <param name="ignore_adduct" value="true"/>
          <section name="distance_RT">
            <param name="max_difference" value="100.0"/>
            <param name="exponent" value="1.0"/>
            <param name="weight" value="1.0"/>
          </section>
          <section name="distance_MZ">
            <param name="max_difference" value="0.3"/>
            <param name="unit" value="Da"/>
            <param name="exponent" value="2.0"/>
            <param name="weight" value="1.0"/>
          </section>
          <section name="distance_intensity">
            <param name="exponent" value="1.0"/>
            <param name="weight" value="0.0"/>
            <param name="log_transform" value="disabled"/>
          </section>
        </section>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
    </test>
    <!-- TOPP_MapAlignerPoseClustering_3 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="MapAlignerPoseClustering_1_input2.featureXML,MapAlignerPoseClustering_1_input3.featureXML"/>
      </conditional>
      <output_collection name="out" count="2"/>
      <section name="reference">
        <param name="file" value="MapAlignerPoseClustering_1_input1.featureXML"/>
        <param name="index" value="0"/>
      </section>
      <section name="algorithm">
        <param name="max_num_peaks_considered" value="400"/>
        <section name="superimposer">
          <param name="mz_pair_max_distance" value="0.5"/>
          <param name="rt_pair_distance_fraction" value="0.1"/>
          <param name="num_used_points" value="2000"/>
          <param name="scaling_bucket_size" value="0.005"/>
          <param name="shift_bucket_size" value="3.0"/>
          <param name="max_shift" value="1000.0"/>
          <param name="max_scaling" value="2.0"/>
          <param name="dump_buckets" value=""/>
          <param name="dump_pairs" value=""/>
        </section>
        <section name="pairfinder">
          <param name="second_nearest_gap" value="2.0"/>
          <param name="use_identifications" value="false"/>
          <param name="ignore_charge" value="true"/>
          <param name="ignore_adduct" value="true"/>
          <section name="distance_RT">
            <param name="max_difference" value="30.0"/>
            <param name="exponent" value="1.0"/>
            <param name="weight" value="1.0"/>
          </section>
          <section name="distance_MZ">
            <param name="max_difference" value="0.3"/>
            <param name="unit" value="Da"/>
            <param name="exponent" value="2.0"/>
            <param name="weight" value="1.0"/>
          </section>
          <section name="distance_intensity">
            <param name="exponent" value="1.0"/>
            <param name="weight" value="0.0"/>
            <param name="log_transform" value="disabled"/>
          </section>
        </section>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
    </test>
    <!-- TOPP_MapAlignerPoseClustering_4 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="MapAlignerPoseClustering_1_input1.featureXML,MapAlignerPoseClustering_1_input2.featureXML"/>
      </conditional>
      <output_collection name="trafo_out" count="2"/>
      <section name="reference">
        <param name="index" value="2"/>
      </section>
      <section name="algorithm">
        <param name="max_num_peaks_considered" value="400"/>
        <section name="superimposer">
          <param name="mz_pair_max_distance" value="0.5"/>
          <param name="rt_pair_distance_fraction" value="0.1"/>
          <param name="num_used_points" value="2000"/>
          <param name="scaling_bucket_size" value="0.005"/>
          <param name="shift_bucket_size" value="3.0"/>
          <param name="max_shift" value="1000.0"/>
          <param name="max_scaling" value="2.0"/>
          <param name="dump_buckets" value=""/>
          <param name="dump_pairs" value=""/>
        </section>
        <section name="pairfinder">
          <param name="second_nearest_gap" value="2.0"/>
          <param name="use_identifications" value="false"/>
          <param name="ignore_charge" value="true"/>
          <param name="ignore_adduct" value="true"/>
          <section name="distance_RT">
            <param name="max_difference" value="30.0"/>
            <param name="exponent" value="1.0"/>
            <param name="weight" value="1.0"/>
          </section>
          <section name="distance_MZ">
            <param name="max_difference" value="0.3"/>
            <param name="unit" value="Da"/>
            <param name="exponent" value="2.0"/>
            <param name="weight" value="1.0"/>
          </section>
          <section name="distance_intensity">
            <param name="exponent" value="1.0"/>
            <param name="weight" value="0.0"/>
            <param name="log_transform" value="disabled"/>
          </section>
        </section>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,trafo_out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
    </test>
  </tests>
  <help><![CDATA[Corrects retention time distortions between maps using a pose clustering approach.


For more information, visit http://www.openms.de/doxygen/release/2.8.0/html/TOPP_MapAlignerPoseClustering.html]]></help>
  <expand macro="references"/>
</tool>