view PercolatorAdapter.xml @ 4:e7881a82b56d draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit ddf41e8bda1ba065f5cdec98e93dee8165ffc1b9"
author galaxyp
date Thu, 27 Aug 2020 19:52:08 -0400
parents 4ed4e0b7f57c
children 147aaac03456
line wrap: on
line source

<?xml version='1.0' encoding='UTF-8'?>
<!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTDConverter.-->
<!--Proposed Tool Section: [ID Processing]-->
<tool id="PercolatorAdapter" name="PercolatorAdapter" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@" profile="20.05">
  <description>Facilitate input to Percolator and reintegrate.</description>
  <macros>
    <token name="@EXECUTABLE@">PercolatorAdapter</token>
    <import>macros.xml</import>
    <import>macros_autotest.xml</import>
    <import>macros_test.xml</import>
  </macros>
  <expand macro="requirements"/>
  <expand macro="stdio"/>
  <command detect_errors="exit_code"><![CDATA[@QUOTE_FOO@
@EXT_FOO@
#import re

## Preprocessing
#if $in:
  mkdir in &&
  ${ ' '.join(["ln -s '%s' 'in/%s.%s' &&" % (_, re.sub('[^\w\-_]', '_', _.element_identifier), $gxy2omsext(_.ext)) for _ in $in if _]) }
#end if
#if $in_decoy:
  mkdir in_decoy &&
  ${ ' '.join(["ln -s '%s' 'in_decoy/%s.%s' &&" % (_, re.sub('[^\w\-_]', '_', _.element_identifier), $gxy2omsext(_.ext)) for _ in $in_decoy if _]) }
#end if
#if $in_osw:
  mkdir in_osw &&
  ln -s '$in_osw' 'in_osw/${re.sub("[^\w\-_]", "_", $in_osw.element_identifier)}.$gxy2omsext($in_osw.ext)' &&
#end if
mkdir out &&
#if "out_pin_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  mkdir out_pin &&
#end if
#if "weights_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  mkdir weights &&
#end if
#if $adv_opts_cond.adv_opts_selector=='advanced':
  #if $adv_opts_cond.init_weights:
    mkdir adv_opts_cond.init_weights &&
    ln -s '$adv_opts_cond.init_weights' 'adv_opts_cond.init_weights/${re.sub("[^\w\-_]", "_", $adv_opts_cond.init_weights.element_identifier)}.$gxy2omsext($adv_opts_cond.init_weights.ext)' &&
  #end if
  #if $adv_opts_cond.fasta:
    mkdir adv_opts_cond.fasta &&
    ln -s '$adv_opts_cond.fasta' 'adv_opts_cond.fasta/${re.sub("[^\w\-_]", "_", $adv_opts_cond.fasta.element_identifier)}.$gxy2omsext($adv_opts_cond.fasta.ext)' &&
  #end if
#end if

## Main program call

set -o pipefail &&
@EXECUTABLE@ -write_ctd ./ &&
python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' &&
@EXECUTABLE@ -ini @EXECUTABLE@.ctd
#if $in:
  -in
  ${' '.join(["'in/%s.%s'"%(re.sub('[^\w\-_]', '_', _.element_identifier), $gxy2omsext(_.ext)) for _ in $in if _])}
#end if
#if $in_decoy:
  -in_decoy
  ${' '.join(["'in_decoy/%s.%s'"%(re.sub('[^\w\-_]', '_', _.element_identifier), $gxy2omsext(_.ext)) for _ in $in_decoy if _])}
#end if
#if $in_osw:
  -in_osw
  'in_osw/${re.sub("[^\w\-_]", "_", $in_osw.element_identifier)}.$gxy2omsext($in_osw.ext)'
#end if
-out
'out/output.${out_type}'
#if "out_pin_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  -out_pin
  'out_pin/output.${gxy2omsext("tabular")}'
#end if
#if "weights_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  -weights
  'weights/output.${gxy2omsext("txt")}'
#end if
#if $adv_opts_cond.adv_opts_selector=='advanced':
  #if $adv_opts_cond.init_weights:
    -init-weights
    'adv_opts_cond.init_weights/${re.sub("[^\w\-_]", "_", $adv_opts_cond.init_weights.element_identifier)}.$gxy2omsext($adv_opts_cond.init_weights.ext)'
  #end if
  #if $adv_opts_cond.fasta:
    -fasta
    'adv_opts_cond.fasta/${re.sub("[^\w\-_]", "_", $adv_opts_cond.fasta.element_identifier)}.$gxy2omsext($adv_opts_cond.fasta.ext)'
  #end if
#end if
#if len(str($OPTIONAL_OUTPUTS).split(',')) == 0
  | tee '$stdout'
#end if

## Postprocessing
&& mv 'out/output.${out_type}' '$out'
#if "out_pin_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  && mv 'out_pin/output.${gxy2omsext("tabular")}' '$out_pin'
#end if
#if "weights_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  && mv 'weights/output.${gxy2omsext("txt")}' '$weights'
#end if
#if "ctd_out_FLAG" in $OPTIONAL_OUTPUTS
  && mv '@EXECUTABLE@.ctd' '$ctd_out'
#end if]]></command>
  <configfiles>
    <inputs name="args_json" data_style="paths"/>
    <configfile name="hardcoded_json"><![CDATA[{"percolator_executable": "percolator", "log": "log.txt", "threads": "\${GALAXY_SLOTS:-1}", "no_progress": true}]]></configfile>
  </configfiles>
  <inputs>
    <param name="in" argument="-in" type="data" format="idxml,mzid" multiple="true" optional="true" label="Input file(s)" help=" select idxml,mzid data sets(s)"/>
    <param name="in_decoy" argument="-in_decoy" type="data" format="idxml,mzid" multiple="true" optional="true" label="Input decoy file(s) in case of separate searches" help=" select idxml,mzid data sets(s)"/>
    <param name="in_osw" argument="-in_osw" type="data" format="osw" optional="true" label="Input file in OSW format" help=" select osw data sets(s)"/>
    <param name="out_type" argument="-out_type" display="radio" type="select" optional="false" label="Output file type -- default: determined from file extension or content" help="">
      <option value="idXML">idxml</option>
      <option value="mzid">mzid</option>
      <option value="osw">osw</option>
      <expand macro="list_string_san"/>
    </param>
    <param name="enzyme" argument="-enzyme" type="select" optional="false" label="Type of enzyme: no_enzyme,elastase,pepsin,proteinasek,thermolysin,chymotrypsin,lys-n,lys-c,arg-c,asp-n,glu-c,trypsin,trypsinp" help="">
      <option value="no_enzyme">no_enzyme</option>
      <option value="elastase">elastase</option>
      <option value="pepsin">pepsin</option>
      <option value="proteinasek">proteinasek</option>
      <option value="thermolysin">thermolysin</option>
      <option value="chymotrypsin">chymotrypsin</option>
      <option value="lys-n">lys-n</option>
      <option value="lys-c">lys-c</option>
      <option value="arg-c">arg-c</option>
      <option value="asp-n">asp-n</option>
      <option value="glu-c">glu-c</option>
      <option value="trypsin" selected="true">trypsin</option>
      <option value="trypsinp">trypsinp</option>
      <expand macro="list_string_san"/>
    </param>
    <param name="peptide_level_fdrs" argument="-peptide-level-fdrs" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Calculate peptide-level FDRs instead of PSM-level FDRs" help=""/>
    <param name="protein_level_fdrs" argument="-protein-level-fdrs" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Use the picked protein-level FDR to infer protein probabilities" help="Use the -fasta option and -decoy-pattern to set the Fasta file and decoy pattern"/>
    <param name="osw_level" argument="-osw_level" type="text" optional="true" value="ms2" label="OSW: Either &quot;ms1&quot;, &quot;ms2&quot; or &quot;transition&quot;; the data level selected for scoring" help="">
      <expand macro="list_string_san"/>
    </param>
    <param name="score_type" argument="-score_type" display="radio" type="select" optional="false" label="Type of the peptide main score" help="">
      <option value="q-value" selected="true">q-value</option>
      <option value="pep">pep</option>
      <option value="svm">svm</option>
      <expand macro="list_string_san"/>
    </param>
    <expand macro="adv_opts_macro">
      <param name="generic_feature_set" argument="-generic-feature-set" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Use only generic" help="(i.e. not search engine specific) features. Generating search engine specific features for common search engines by PSMFeatureExtractor will typically boost the identification rate significantly"/>
      <param name="subset_max_train" argument="-subset-max-train" type="integer" optional="true" value="0" label="Only train an SVM on a subset of &lt;x&gt; PSMs, and use the resulting score vector to evaluate the other PSMs" help="Recommended when analyzing huge numbers (&gt;1 million) of PSMs. When set to 0, all PSMs are used for training as normal"/>
      <param name="cpos" argument="-cpos" type="float" optional="true" value="0.0" label="Cpos, penalty for mistakes made on positive examples" help="Set by cross validation if not specified"/>
      <param name="cneg" argument="-cneg" type="float" optional="true" value="0.0" label="Cneg, penalty for mistakes made on negative examples" help="Set by cross validation if not specified"/>
      <param name="testFDR" argument="-testFDR" type="float" optional="true" value="0.01" label="False discovery rate threshold for evaluating best cross validation result and the reported end result" help=""/>
      <param name="trainFDR" argument="-trainFDR" type="float" optional="true" value="0.01" label="False discovery rate threshold to define positive examples in training" help="Set to testFDR if 0"/>
      <param name="maxiter" argument="-maxiter" type="integer" optional="true" value="10" label="Maximal number of iterations" help=""/>
      <param name="quick_validation" argument="-quick-validation" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Quicker execution by reduced internal cross-validation" help=""/>
      <param name="init_weights" argument="-init-weights" type="data" format="txt" optional="true" label="Read initial weights to the given file" help=" select txt data sets(s)"/>
      <param name="default_direction" argument="-default-direction" type="text" optional="true" value="" label="The most informative feature given as the feature name, can be negated to indicate that a lower value is bette" help="">
        <expand macro="list_string_san"/>
      </param>
      <param name="verbose" argument="-verbose" type="integer" optional="true" value="2" label="Set verbosity of output: 0=no processing info, 5=all" help=""/>
      <param name="unitnorm" argument="-unitnorm" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Use unit normalization [0-1] instead of standard deviation normalization" help=""/>
      <param name="test_each_iteration" argument="-test-each-iteration" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Measure performance on test set each iteration" help=""/>
      <param name="override" argument="-override" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Override error check and do not fall back on default score vector in case of suspect score vecto" help=""/>
      <param name="seed" argument="-seed" type="integer" optional="true" value="1" label="Setting seed of the random number generato" help=""/>
      <param name="doc" argument="-doc" type="integer" optional="true" value="0" label="Include description of correct features" help=""/>
      <param name="klammer" argument="-klammer" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Retention time features calculated as in Klammer et al" help="Only available if -doc is set"/>
      <param name="fasta" argument="-fasta" type="data" format="fasta" optional="true" label="Provide the fasta file as the argument to this flag, which will be used for protein grouping based on an in-silico digest (only valid if option -protein-level-fdrs is active)" help=" select fasta data sets(s)"/>
      <param name="decoy_pattern" argument="-decoy-pattern" type="text" optional="true" value="random" label="Define the text pattern to identify the decoy proteins and/or PSMs, set this up if the label that identifies the decoys in the database is not the default (Only valid if option -protein-level-fdrs is active)" help="">
        <expand macro="list_string_san"/>
      </param>
      <param name="post_processing_tdc" argument="-post-processing-tdc" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Use target-decoy competition to assign q-values and PEPs" help=""/>
      <param name="train_best_positive" argument="-train-best-positive" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Enforce that, for each spectrum, at most one PSM is included in the positive set during each training iteration" help="If the user only provides one PSM per spectrum, this filter will have no effect"/>
      <param name="ipf_max_peakgroup_pep" argument="-ipf_max_peakgroup_pep" type="float" optional="true" value="0.7" label="OSW/IPF: Assess transitions only for candidate peak groups until maximum posterior error probability" help=""/>
      <param name="ipf_max_transition_isotope_overlap" argument="-ipf_max_transition_isotope_overlap" type="float" optional="true" value="0.5" label="OSW/IPF: Maximum isotope overlap to consider transitions in IPF" help=""/>
      <param name="ipf_min_transition_sn" argument="-ipf_min_transition_sn" type="float" optional="true" value="0.0" label="OSW/IPF: Minimum log signal-to-noise level to consider transitions in IPF" help="Set -1 to disable this filter"/>
      <param name="force" argument="-force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Overwrite tool specific checks" help=""/>
      <param name="test" argument="-test" type="hidden" optional="true" value="False" label="Enables the test mode (needed for internal use only)" help="">
        <expand macro="list_string_san"/>
      </param>
    </expand>
    <param name="OPTIONAL_OUTPUTS" type="select" multiple="true" label="Optional outputs" optional="true">
      <option value="out_pin_FLAG">out_pin (Enables the test mode (needed for internal use only))</option>
      <option value="weights_FLAG">weights (Enables the test mode (needed for internal use only))</option>
      <option value="ctd_out_FLAG">Output used ctd (ini) configuration file</option>
    </param>
  </inputs>
  <outputs>
    <data name="out" label="${tool.name} on ${on_string}: out">
      <change_format>
        <when input="out_type" value="idXML" format="idxml"/>
        <when input="out_type" value="mzid" format="mzid"/>
        <when input="out_type" value="osw" format="osw"/>
      </change_format>
    </data>
    <data name="out_pin" label="${tool.name} on ${on_string}: out_pin" format="tabular">
      <filter>OPTIONAL_OUTPUTS is not None and "out_pin_FLAG" in OPTIONAL_OUTPUTS</filter>
    </data>
    <data name="weights" label="${tool.name} on ${on_string}: weights" format="txt">
      <filter>OPTIONAL_OUTPUTS is not None and "weights_FLAG" in OPTIONAL_OUTPUTS</filter>
    </data>
    <data name="ctd_out" format="xml" label="${tool.name} on ${on_string}: ctd">
      <filter>OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS</filter>
    </data>
  </outputs>
  <tests>
    <expand macro="autotest_PercolatorAdapter"/>
    <expand macro="manutest_PercolatorAdapter"/>
  </tests>
  <help><![CDATA[Facilitate input to Percolator and reintegrate.


For more information, visit http://www.openms.de/documentation/TOPP_PercolatorAdapter.html]]></help>
  <expand macro="references"/>
</tool>