Mercurial > repos > galaxyp > openms_mapalignerposeclustering
view MapAlignerPoseClustering.xml @ 11:e527a8a430dd draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 5c080b1e2b99f1c88f4557e9fec8c45c9d23b906
author | galaxyp |
---|---|
date | Fri, 14 Jun 2024 21:40:41 +0000 |
parents | 246847b3a188 |
children |
line wrap: on
line source
<!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTDConverter.--> <!--Proposed Tool Section: [Map Alignment]--> <tool id="MapAlignerPoseClustering" name="MapAlignerPoseClustering" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> <description>Corrects retention time distortions between maps using a pose clustering approach</description> <macros> <token name="@EXECUTABLE@">MapAlignerPoseClustering</token> <import>macros.xml</import> </macros> <expand macro="requirements"/> <expand macro="stdio"/> <command detect_errors="exit_code"><![CDATA[@QUOTE_FOO@ @EXT_FOO@ #import re ## Preprocessing mkdir in_cond.in && #if $in_cond.in_select == "no" mkdir ${' '.join(["'in_cond.in/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} && ${' '.join(["cp '%s' 'in_cond.in/%s/%s.%s' && " % (f, i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])} #else cp '$in_cond.in' 'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)' && #end if #if "out_FLAG" in str($OPTIONAL_OUTPUTS).split(',') mkdir out && mkdir ${' '.join(["'out/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} && #end if #if "trafo_out_FLAG" in str($OPTIONAL_OUTPUTS).split(',') mkdir trafo_out && mkdir ${' '.join(["'trafo_out/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} && #end if #if $reference.file: mkdir reference.file && cp '$reference.file' 'reference.file/${re.sub("[^\w\-_]", "_", $reference.file.element_identifier)}.$gxy2omsext($reference.file.ext)' && #end if ## Main program call set -o pipefail && @EXECUTABLE@ -write_ctd ./ && python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' && @EXECUTABLE@ -ini @EXECUTABLE@.ctd -in #if $in_cond.in_select == "no" ${' '.join(["'in_cond.in/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])} #else 'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)' #end if #if "out_FLAG" in str($OPTIONAL_OUTPUTS).split(',') -out ${' '.join(["'out/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), f.ext) for i, f in enumerate($in_cond.in) if f])} #end if #if "trafo_out_FLAG" in str($OPTIONAL_OUTPUTS).split(',') -trafo_out ${' '.join(["'trafo_out/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext("trafoxml")) for i, f in enumerate($in_cond.in) if f])} #end if #if $reference.file: -reference:file 'reference.file/${re.sub("[^\w\-_]", "_", $reference.file.element_identifier)}.$gxy2omsext($reference.file.ext)' #end if #if len(str($OPTIONAL_OUTPUTS).split(',')) == 0 | tee '$stdout' #end if ## Postprocessing #if "trafo_out_FLAG" in str($OPTIONAL_OUTPUTS).split(',') ${' '.join(["&& mv -n 'trafo_out/%(bn)s/%(id)s.%(gext)s' 'trafo_out/%(bn)s/%(id)s'"%{"bn": i, "id": re.sub('[^\w\-_]', '_', f.element_identifier), "gext": $gxy2omsext("trafoxml")} for i, f in enumerate($in_cond.in) if f])} #end if #if "ctd_out_FLAG" in $OPTIONAL_OUTPUTS && mv '@EXECUTABLE@.ctd' '$ctd_out' #end if]]></command> <configfiles> <inputs name="args_json" data_style="paths"/> <configfile name="hardcoded_json"><![CDATA[{"log": "log.txt", "threads": "\${GALAXY_SLOTS:-1}", "no_progress": true}]]></configfile> </configfiles> <inputs> <conditional name="in_cond"> <param name="in_select" type="select" label="Run tool in batch mode for -in"> <option value="no">No: process all datasets jointly</option> <option value="yes">Yes: process each dataset in an independent job</option> </param> <when value="no"> <param argument="-in" type="data" format="featurexml,mzml" multiple="true" label="Input files to align (all must have the same file type)" help=" select featurexml,mzml data sets(s)"/> </when> <when value="yes"> <param argument="-in" type="data" format="featurexml,mzml" label="Input files to align (all must have the same file type)" help=" select featurexml,mzml data sets(s)"/> </when> </conditional> <section name="reference" title="Options to define a reference file (use either 'file' or 'index', not both)" help="" expanded="false"> <param name="file" argument="-reference:file" type="data" format="featurexml,mzml" optional="true" label="File to use as reference (same file format as input files required)" help=" select featurexml,mzml data sets(s)"/> <param name="index" argument="-reference:index" type="integer" min="0" value="0" label="Use one of the input files as reference ('1' for the first file, etc.)" help="If '0', no explicit reference is set - the algorithm will select a reference"/> </section> <section name="algorithm" title="Algorithm parameters section" help="" expanded="false"> <param name="max_num_peaks_considered" argument="-algorithm:max_num_peaks_considered" type="integer" min="-1" value="1000" label="The maximal number of peaks/features to be considered per map" help="To use all, set to '-1'"/> <section name="superimposer" title="" help="" expanded="false"> <param name="mz_pair_max_distance" argument="-algorithm:superimposer:mz_pair_max_distance" type="float" min="0.0" value="0.5" label="Maximum of m/z deviation of corresponding elements in different maps" help="This condition applies to the pairs considered in hashing"/> <param name="rt_pair_distance_fraction" argument="-algorithm:superimposer:rt_pair_distance_fraction" type="float" min="0.0" max="1.0" value="0.1" label="Within each of the two maps, the pairs considered for pose clustering must be separated by at least this fraction of the total elution time interval" help="(i.e., max - min). "/> <param name="num_used_points" argument="-algorithm:superimposer:num_used_points" type="integer" min="-1" value="2000" label="Maximum number of elements considered in each map (selected by intensity)" help="Use this to reduce the running time and to disregard weak signals during alignment. For using all points, set this to -1"/> <param name="scaling_bucket_size" argument="-algorithm:superimposer:scaling_bucket_size" type="float" min="0.0" value="0.005" label="The scaling of the retention time interval is being hashed into buckets of this size during pose clustering" help="A good choice for this would be a bit smaller than the error you would expect from repeated runs"/> <param name="shift_bucket_size" argument="-algorithm:superimposer:shift_bucket_size" type="float" min="0.0" value="3.0" label="The shift at the lower (respectively, higher) end of the retention time interval is being hashed into buckets of this size during pose clustering" help="A good choice for this would be about the time between consecutive MS scans"/> <param name="max_shift" argument="-algorithm:superimposer:max_shift" type="float" min="0.0" value="1000.0" label="Maximal shift which is considered during histogramming (in seconds)" help="This applies for both directions"/> <param name="max_scaling" argument="-algorithm:superimposer:max_scaling" type="float" min="1.0" value="2.0" label="Maximal scaling which is considered during histogramming" help="The minimal scaling is the reciprocal of this"/> <param name="dump_buckets" argument="-algorithm:superimposer:dump_buckets" type="text" optional="true" value="" label="[DEBUG] If non-empty, base filename where hash table buckets will be dumped to" help="A serial number for each invocation will be appended automatically"> <expand macro="list_string_san" name="dump_buckets"/> </param> <param name="dump_pairs" argument="-algorithm:superimposer:dump_pairs" type="text" optional="true" value="" label="[DEBUG] If non-empty, base filename where the individual hashed pairs will be dumped to (large!)" help="A serial number for each invocation will be appended automatically"> <expand macro="list_string_san" name="dump_pairs"/> </param> </section> <section name="pairfinder" title="" help="" expanded="false"> <param name="second_nearest_gap" argument="-algorithm:pairfinder:second_nearest_gap" type="float" min="1.0" value="2.0" label="Only link features whose distance to the second nearest neighbors (for both sides) is larger by 'second_nearest_gap' than the distance between the matched pair itself" help=""/> <param name="use_identifications" argument="-algorithm:pairfinder:use_identifications" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Never link features that are annotated with different peptides (features without ID's always match; only the best hit per peptide identification is considered)" help=""/> <param name="ignore_charge" argument="-algorithm:pairfinder:ignore_charge" type="boolean" truevalue="true" falsevalue="false" checked="false" label="false [default]: pairing requires equal charge state (or at least one unknown charge '0'); true: Pairing irrespective of charge state" help=""/> <param name="ignore_adduct" argument="-algorithm:pairfinder:ignore_adduct" type="boolean" truevalue="true" falsevalue="false" checked="true" label="true [default]: pairing requires equal adducts (or at least one without adduct annotation); true: Pairing irrespective of adducts" help=""/> <section name="distance_RT" title="Distance component based on RT differences" help="" expanded="false"> <param name="max_difference" argument="-algorithm:pairfinder:distance_RT:max_difference" type="float" min="0.0" value="100.0" label="Never pair features with a larger RT distance (in seconds)" help=""/> <param name="exponent" argument="-algorithm:pairfinder:distance_RT:exponent" type="float" min="0.0" value="1.0" label="Normalized RT differences ([0-1], relative to 'max_difference') are raised to this power (using 1 or 2 will be fast, everything else is REALLY slow)" help=""/> <param name="weight" argument="-algorithm:pairfinder:distance_RT:weight" type="float" min="0.0" value="1.0" label="Final RT distances are weighted by this facto" help=""/> </section> <section name="distance_MZ" title="Distance component based on m/z differences" help="" expanded="false"> <param name="max_difference" argument="-algorithm:pairfinder:distance_MZ:max_difference" type="float" min="0.0" value="0.3" label="Never pair features with larger m/z distance (unit defined by 'unit')" help=""/> <param name="unit" argument="-algorithm:pairfinder:distance_MZ:unit" type="select" label="Unit of the 'max_difference' paramete" help=""> <option value="Da" selected="true">Da</option> <option value="ppm">ppm</option> <expand macro="list_string_san" name="unit"/> </param> <param name="exponent" argument="-algorithm:pairfinder:distance_MZ:exponent" type="float" min="0.0" value="2.0" label="Normalized ([0-1], relative to 'max_difference') m/z differences are raised to this power (using 1 or 2 will be fast, everything else is REALLY slow)" help=""/> <param name="weight" argument="-algorithm:pairfinder:distance_MZ:weight" type="float" min="0.0" value="1.0" label="Final m/z distances are weighted by this facto" help=""/> </section> <section name="distance_intensity" title="Distance component based on differences in relative intensity (usually relative to highest peak in the whole data set)" help="" expanded="false"> <param name="exponent" argument="-algorithm:pairfinder:distance_intensity:exponent" type="float" min="0.0" value="1.0" label="Differences in relative intensity ([0-1]) are raised to this power (using 1 or 2 will be fast, everything else is REALLY slow)" help=""/> <param name="weight" argument="-algorithm:pairfinder:distance_intensity:weight" type="float" min="0.0" value="0.0" label="Final intensity distances are weighted by this facto" help=""/> <param name="log_transform" argument="-algorithm:pairfinder:distance_intensity:log_transform" type="select" label="Log-transform intensities" help="If disabled, d = |int_f2 - int_f1| / int_max. If enabled, d = |log(int_f2 + 1) - log(int_f1 + 1)| / log(int_max + 1))"> <option value="enabled">enabled</option> <option value="disabled" selected="true">disabled</option> <expand macro="list_string_san" name="log_transform"/> </param> </section> </section> </section> <expand macro="adv_opts_macro"> <param argument="-force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Overrides tool-specific checks" help=""/> <param argument="-test" type="hidden" value="False" label="Enables the test mode (needed for internal use only)" help="" optional="true"> <expand macro="list_string_san" name="test"/> </param> </expand> <param name="OPTIONAL_OUTPUTS" type="select" optional="true" multiple="true" label="Optional outputs"> <option value="out_FLAG">out (Output files (same file type as 'in'))</option> <option value="trafo_out_FLAG">trafo_out (Transformation output files)</option> <option value="ctd_out_FLAG">Output used ctd (ini) configuration file</option> </param> </inputs> <outputs> <collection type="list" name="out" label="${tool.name} on ${on_string}: out"> <discover_datasets directory="out" recurse="true" pattern="__name_and_ext__"/> <filter>OPTIONAL_OUTPUTS is not None and "out_FLAG" in OPTIONAL_OUTPUTS</filter> </collection> <collection type="list" name="trafo_out" label="${tool.name} on ${on_string}: trafo_out"> <discover_datasets directory="trafo_out" recurse="true" format="trafoxml" pattern="__name__"/> <filter>OPTIONAL_OUTPUTS is not None and "trafo_out_FLAG" in OPTIONAL_OUTPUTS</filter> </collection> <data name="stdout" format="txt" label="${tool.name} on ${on_string}: stdout"> <filter>OPTIONAL_OUTPUTS is None</filter> </data> <data name="ctd_out" format="xml" label="${tool.name} on ${on_string}: ctd"> <filter>OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS</filter> </data> </outputs> <tests> <!-- TOPP_MapAlignerPoseClustering_1 --> <test expect_num_outputs="3"> <section name="adv_opts"> <param name="force" value="false"/> <param name="test" value="true"/> </section> <conditional name="in_cond"> <param name="in" value="MapAlignerPoseClustering_1_input1.featureXML,MapAlignerPoseClustering_1_input2.featureXML,MapAlignerPoseClustering_1_input3.featureXML"/> </conditional> <output_collection name="out" count="3"/> <output_collection name="trafo_out" count="3"/> <section name="reference"> <param name="index" value="0"/> </section> <section name="algorithm"> <param name="max_num_peaks_considered" value="400"/> <section name="superimposer"> <param name="mz_pair_max_distance" value="0.5"/> <param name="rt_pair_distance_fraction" value="0.1"/> <param name="num_used_points" value="2000"/> <param name="scaling_bucket_size" value="0.005"/> <param name="shift_bucket_size" value="3.0"/> <param name="max_shift" value="1000.0"/> <param name="max_scaling" value="2.0"/> <param name="dump_buckets" value=""/> <param name="dump_pairs" value=""/> </section> <section name="pairfinder"> <param name="second_nearest_gap" value="2.0"/> <param name="use_identifications" value="false"/> <param name="ignore_charge" value="true"/> <param name="ignore_adduct" value="true"/> <section name="distance_RT"> <param name="max_difference" value="30.0"/> <param name="exponent" value="1.0"/> <param name="weight" value="1.0"/> </section> <section name="distance_MZ"> <param name="max_difference" value="0.3"/> <param name="unit" value="Da"/> <param name="exponent" value="2.0"/> <param name="weight" value="1.0"/> </section> <section name="distance_intensity"> <param name="exponent" value="1.0"/> <param name="weight" value="0.0"/> <param name="log_transform" value="disabled"/> </section> </section> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,out_FLAG,trafo_out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> <!-- TOPP_MapAlignerPoseClustering_2 --> <test expect_num_outputs="2"> <section name="adv_opts"> <param name="force" value="false"/> <param name="test" value="true"/> </section> <conditional name="in_cond"> <param name="in" value="MapAlignerPoseClustering_2_input1.mzML,MapAlignerPoseClustering_2_input2.mzML,MapAlignerPoseClustering_2_input3.mzML"/> </conditional> <output_collection name="out" count="3"/> <section name="reference"> <param name="index" value="0"/> </section> <section name="algorithm"> <param name="max_num_peaks_considered" value="400"/> <section name="superimposer"> <param name="mz_pair_max_distance" value="0.5"/> <param name="rt_pair_distance_fraction" value="0.1"/> <param name="num_used_points" value="2000"/> <param name="scaling_bucket_size" value="0.005"/> <param name="shift_bucket_size" value="3.0"/> <param name="max_shift" value="1000.0"/> <param name="max_scaling" value="2.0"/> <param name="dump_buckets" value=""/> <param name="dump_pairs" value=""/> </section> <section name="pairfinder"> <param name="second_nearest_gap" value="2.0"/> <param name="use_identifications" value="false"/> <param name="ignore_charge" value="false"/> <param name="ignore_adduct" value="true"/> <section name="distance_RT"> <param name="max_difference" value="100.0"/> <param name="exponent" value="1.0"/> <param name="weight" value="1.0"/> </section> <section name="distance_MZ"> <param name="max_difference" value="0.3"/> <param name="unit" value="Da"/> <param name="exponent" value="2.0"/> <param name="weight" value="1.0"/> </section> <section name="distance_intensity"> <param name="exponent" value="1.0"/> <param name="weight" value="0.0"/> <param name="log_transform" value="disabled"/> </section> </section> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> <!-- TOPP_MapAlignerPoseClustering_3 --> <test expect_num_outputs="2"> <section name="adv_opts"> <param name="force" value="false"/> <param name="test" value="true"/> </section> <conditional name="in_cond"> <param name="in" value="MapAlignerPoseClustering_1_input2.featureXML,MapAlignerPoseClustering_1_input3.featureXML"/> </conditional> <output_collection name="out" count="2"/> <section name="reference"> <param name="file" value="MapAlignerPoseClustering_1_input1.featureXML"/> <param name="index" value="0"/> </section> <section name="algorithm"> <param name="max_num_peaks_considered" value="400"/> <section name="superimposer"> <param name="mz_pair_max_distance" value="0.5"/> <param name="rt_pair_distance_fraction" value="0.1"/> <param name="num_used_points" value="2000"/> <param name="scaling_bucket_size" value="0.005"/> <param name="shift_bucket_size" value="3.0"/> <param name="max_shift" value="1000.0"/> <param name="max_scaling" value="2.0"/> <param name="dump_buckets" value=""/> <param name="dump_pairs" value=""/> </section> <section name="pairfinder"> <param name="second_nearest_gap" value="2.0"/> <param name="use_identifications" value="false"/> <param name="ignore_charge" value="true"/> <param name="ignore_adduct" value="true"/> <section name="distance_RT"> <param name="max_difference" value="30.0"/> <param name="exponent" value="1.0"/> <param name="weight" value="1.0"/> </section> <section name="distance_MZ"> <param name="max_difference" value="0.3"/> <param name="unit" value="Da"/> <param name="exponent" value="2.0"/> <param name="weight" value="1.0"/> </section> <section name="distance_intensity"> <param name="exponent" value="1.0"/> <param name="weight" value="0.0"/> <param name="log_transform" value="disabled"/> </section> </section> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> <!-- TOPP_MapAlignerPoseClustering_4 --> <test expect_num_outputs="2"> <section name="adv_opts"> <param name="force" value="false"/> <param name="test" value="true"/> </section> <conditional name="in_cond"> <param name="in" value="MapAlignerPoseClustering_1_input1.featureXML,MapAlignerPoseClustering_1_input2.featureXML"/> </conditional> <output_collection name="trafo_out" count="2"/> <section name="reference"> <param name="index" value="2"/> </section> <section name="algorithm"> <param name="max_num_peaks_considered" value="400"/> <section name="superimposer"> <param name="mz_pair_max_distance" value="0.5"/> <param name="rt_pair_distance_fraction" value="0.1"/> <param name="num_used_points" value="2000"/> <param name="scaling_bucket_size" value="0.005"/> <param name="shift_bucket_size" value="3.0"/> <param name="max_shift" value="1000.0"/> <param name="max_scaling" value="2.0"/> <param name="dump_buckets" value=""/> <param name="dump_pairs" value=""/> </section> <section name="pairfinder"> <param name="second_nearest_gap" value="2.0"/> <param name="use_identifications" value="false"/> <param name="ignore_charge" value="true"/> <param name="ignore_adduct" value="true"/> <section name="distance_RT"> <param name="max_difference" value="30.0"/> <param name="exponent" value="1.0"/> <param name="weight" value="1.0"/> </section> <section name="distance_MZ"> <param name="max_difference" value="0.3"/> <param name="unit" value="Da"/> <param name="exponent" value="2.0"/> <param name="weight" value="1.0"/> </section> <section name="distance_intensity"> <param name="exponent" value="1.0"/> <param name="weight" value="0.0"/> <param name="log_transform" value="disabled"/> </section> </section> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,trafo_out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> </tests> <help><![CDATA[Corrects retention time distortions between maps using a pose clustering approach. For more information, visit https://openms.de/doxygen/release/3.1.0/html/TOPP_MapAlignerPoseClustering.html]]></help> <expand macro="references"/> </tool>