Mercurial > repos > galaxyp > openms_percolatoradapter
diff PercolatorAdapter.xml @ 4:e7881a82b56d draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit ddf41e8bda1ba065f5cdec98e93dee8165ffc1b9"
author | galaxyp |
---|---|
date | Thu, 27 Aug 2020 19:52:08 -0400 |
parents | 4ed4e0b7f57c |
children | 147aaac03456 |
line wrap: on
line diff
--- a/PercolatorAdapter.xml Fri May 17 09:53:53 2019 -0400 +++ b/PercolatorAdapter.xml Thu Aug 27 19:52:08 2020 -0400 @@ -1,135 +1,119 @@ <?xml version='1.0' encoding='UTF-8'?> <!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTDConverter.--> <!--Proposed Tool Section: [ID Processing]--> -<tool id="PercolatorAdapter" name="PercolatorAdapter" version="2.3.0"> +<tool id="PercolatorAdapter" name="PercolatorAdapter" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@" profile="20.05"> <description>Facilitate input to Percolator and reintegrate.</description> <macros> <token name="@EXECUTABLE@">PercolatorAdapter</token> <import>macros.xml</import> + <import>macros_autotest.xml</import> + <import>macros_test.xml</import> </macros> - <expand macro="references"/> + <expand macro="requirements"/> <expand macro="stdio"/> - <expand macro="requirements"/> - <command detect_errors="aggressive"><![CDATA[PercolatorAdapter + <command detect_errors="exit_code"><![CDATA[@QUOTE_FOO@ +@EXT_FOO@ +#import re --in - #for token in $param_in: - $token - #end for --in_decoy - #for token in $param_in_decoy: - $token - #end for -#if $param_out: - -out $param_out +## Preprocessing +#if $in: + mkdir in && + ${ ' '.join(["ln -s '%s' 'in/%s.%s' &&" % (_, re.sub('[^\w\-_]', '_', _.element_identifier), $gxy2omsext(_.ext)) for _ in $in if _]) } +#end if +#if $in_decoy: + mkdir in_decoy && + ${ ' '.join(["ln -s '%s' 'in_decoy/%s.%s' &&" % (_, re.sub('[^\w\-_]', '_', _.element_identifier), $gxy2omsext(_.ext)) for _ in $in_decoy if _]) } +#end if +#if $in_osw: + mkdir in_osw && + ln -s '$in_osw' 'in_osw/${re.sub("[^\w\-_]", "_", $in_osw.element_identifier)}.$gxy2omsext($in_osw.ext)' && #end if -#if $param_mzid_out: - -mzid_out $param_mzid_out +mkdir out && +#if "out_pin_FLAG" in str($OPTIONAL_OUTPUTS).split(',') + mkdir out_pin && +#end if +#if "weights_FLAG" in str($OPTIONAL_OUTPUTS).split(',') + mkdir weights && #end if -#if $param_enzyme: - -enzyme - #if " " in str($param_enzyme): - "$param_enzyme" - #else - $param_enzyme +#if $adv_opts_cond.adv_opts_selector=='advanced': + #if $adv_opts_cond.init_weights: + mkdir adv_opts_cond.init_weights && + ln -s '$adv_opts_cond.init_weights' 'adv_opts_cond.init_weights/${re.sub("[^\w\-_]", "_", $adv_opts_cond.init_weights.element_identifier)}.$gxy2omsext($adv_opts_cond.init_weights.ext)' && + #end if + #if $adv_opts_cond.fasta: + mkdir adv_opts_cond.fasta && + ln -s '$adv_opts_cond.fasta' 'adv_opts_cond.fasta/${re.sub("[^\w\-_]", "_", $adv_opts_cond.fasta.element_identifier)}.$gxy2omsext($adv_opts_cond.fasta.ext)' && #end if #end if -#if $param_percolator_executable: - -percolator_executable $param_percolator_executable -#end if -#if $param_peptide_level_fdrs: - -peptide-level-fdrs -#end if -#if $param_protein_level_fdrs: - -protein-level-fdrs -#end if -#if $param_weights: - -weights $param_weights -#end if -#if $adv_opts.adv_opts_selector=='advanced': - #if $adv_opts.param_generic_feature_set: - -generic-feature-set -#end if - #if $adv_opts.param_subset_max_train: - -subset-max-train $adv_opts.param_subset_max_train -#end if - #if $adv_opts.param_cpos: - -cpos $adv_opts.param_cpos + +## Main program call + +set -o pipefail && +@EXECUTABLE@ -write_ctd ./ && +python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' && +@EXECUTABLE@ -ini @EXECUTABLE@.ctd +#if $in: + -in + ${' '.join(["'in/%s.%s'"%(re.sub('[^\w\-_]', '_', _.element_identifier), $gxy2omsext(_.ext)) for _ in $in if _])} #end if - #if $adv_opts.param_cneg: - -cneg $adv_opts.param_cneg -#end if - #if $adv_opts.param_testFDR: - -testFDR $adv_opts.param_testFDR +#if $in_decoy: + -in_decoy + ${' '.join(["'in_decoy/%s.%s'"%(re.sub('[^\w\-_]', '_', _.element_identifier), $gxy2omsext(_.ext)) for _ in $in_decoy if _])} #end if - #if $adv_opts.param_trainFDR: - -trainFDR $adv_opts.param_trainFDR -#end if - #if $adv_opts.param_maxiter: - -maxiter $adv_opts.param_maxiter +#if $in_osw: + -in_osw + 'in_osw/${re.sub("[^\w\-_]", "_", $in_osw.element_identifier)}.$gxy2omsext($in_osw.ext)' #end if - #if $adv_opts.param_quick_validation: - -quick-validation -#end if - #if $adv_opts.param_init_weights: - -init-weights $adv_opts.param_init_weights +-out +'out/output.${out_type}' +#if "out_pin_FLAG" in str($OPTIONAL_OUTPUTS).split(',') + -out_pin + 'out_pin/output.${gxy2omsext("tabular")}' #end if - #if $adv_opts.param_default_direction: - -default-direction "$adv_opts.param_default_direction" -#end if - #if $adv_opts.param_verbose: - -verbose $adv_opts.param_verbose +#if "weights_FLAG" in str($OPTIONAL_OUTPUTS).split(',') + -weights + 'weights/output.${gxy2omsext("txt")}' #end if - #if $adv_opts.param_unitnorm: - -unitnorm -#end if - #if $adv_opts.param_test_each_iteration: - -test-each-iteration -#end if - #if $adv_opts.param_override: - -override -#end if - #if $adv_opts.param_seed: - -seed $adv_opts.param_seed +#if $adv_opts_cond.adv_opts_selector=='advanced': + #if $adv_opts_cond.init_weights: + -init-weights + 'adv_opts_cond.init_weights/${re.sub("[^\w\-_]", "_", $adv_opts_cond.init_weights.element_identifier)}.$gxy2omsext($adv_opts_cond.init_weights.ext)' + #end if + #if $adv_opts_cond.fasta: + -fasta + 'adv_opts_cond.fasta/${re.sub("[^\w\-_]", "_", $adv_opts_cond.fasta.element_identifier)}.$gxy2omsext($adv_opts_cond.fasta.ext)' + #end if #end if - #if $adv_opts.param_doc: - -doc $adv_opts.param_doc -#end if - #if $adv_opts.param_klammer: - -klammer +#if len(str($OPTIONAL_OUTPUTS).split(',')) == 0 + | tee '$stdout' #end if - #if $adv_opts.param_fasta: - -fasta $adv_opts.param_fasta -#end if - #if $adv_opts.param_decoy_pattern: - -decoy-pattern "$adv_opts.param_decoy_pattern" + +## Postprocessing +&& mv 'out/output.${out_type}' '$out' +#if "out_pin_FLAG" in str($OPTIONAL_OUTPUTS).split(',') + && mv 'out_pin/output.${gxy2omsext("tabular")}' '$out_pin' #end if - #if $adv_opts.param_post_processing_tdc: - -post-processing-tdc +#if "weights_FLAG" in str($OPTIONAL_OUTPUTS).split(',') + && mv 'weights/output.${gxy2omsext("txt")}' '$weights' #end if - #if $adv_opts.param_force: - -force -#end if -#end if -]]></command> +#if "ctd_out_FLAG" in $OPTIONAL_OUTPUTS + && mv '@EXECUTABLE@.ctd' '$ctd_out' +#end if]]></command> + <configfiles> + <inputs name="args_json" data_style="paths"/> + <configfile name="hardcoded_json"><![CDATA[{"percolator_executable": "percolator", "log": "log.txt", "threads": "\${GALAXY_SLOTS:-1}", "no_progress": true}]]></configfile> + </configfiles> <inputs> - <param name="param_in" type="data" format="mzid,idxml" multiple="true" optional="False" size="30" label="Input file(s)" help="(-in) "> - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - <remove value="""/> - </valid> - </sanitizer> + <param name="in" argument="-in" type="data" format="idxml,mzid" multiple="true" optional="true" label="Input file(s)" help=" select idxml,mzid data sets(s)"/> + <param name="in_decoy" argument="-in_decoy" type="data" format="idxml,mzid" multiple="true" optional="true" label="Input decoy file(s) in case of separate searches" help=" select idxml,mzid data sets(s)"/> + <param name="in_osw" argument="-in_osw" type="data" format="osw" optional="true" label="Input file in OSW format" help=" select osw data sets(s)"/> + <param name="out_type" argument="-out_type" display="radio" type="select" optional="false" label="Output file type -- default: determined from file extension or content" help=""> + <option value="idXML">idxml</option> + <option value="mzid">mzid</option> + <option value="osw">osw</option> + <expand macro="list_string_san"/> </param> - <param name="param_in_decoy" type="data" format="mzid,idxml" multiple="true" optional="True" size="30" label="Input decoy file(s) in case of separate searches" help="(-in_decoy) "> - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - <remove value="""/> - </valid> - </sanitizer> - </param> - <param name="param_enzyme" type="select" optional="False" value="trypsin" label="Type of enzyme: no_enzyme,elastase,pepsin,proteinasek,thermolysin,chymotrypsin,lys-n,lys-c,arg-c,asp-n,glu-c,trypsin" help="(-enzyme) "> + <param name="enzyme" argument="-enzyme" type="select" optional="false" label="Type of enzyme: no_enzyme,elastase,pepsin,proteinasek,thermolysin,chymotrypsin,lys-n,lys-c,arg-c,asp-n,glu-c,trypsin,trypsinp" help=""> <option value="no_enzyme">no_enzyme</option> <option value="elastase">elastase</option> <option value="pepsin">pepsin</option> @@ -142,55 +126,85 @@ <option value="asp-n">asp-n</option> <option value="glu-c">glu-c</option> <option value="trypsin" selected="true">trypsin</option> + <option value="trypsinp">trypsinp</option> + <expand macro="list_string_san"/> </param> - <param name="param_percolator_executable" type="data" format="txt" value="percolator" label="Percolator executable of the installation" help="(-percolator_executable) e.g. 'percolator.exe'"/> - <param name="param_peptide_level_fdrs" display="radio" type="boolean" truevalue="-peptide-level-fdrs" falsevalue="" checked="false" optional="True" label="Calculate peptide-level FDRs instead of PSM-level FDRs" help="(-peptide-level-fdrs) "/> - <param name="param_protein_level_fdrs" display="radio" type="boolean" truevalue="-protein-level-fdrs" falsevalue="" checked="false" optional="True" label="Use the picked protein-level FDR to infer protein probabilities" help="(-protein-level-fdrs) Use the -fasta option and -decoy-pattern to set the Fasta file and decoy pattern"/> - <expand macro="advanced_options"> - <param name="param_generic_feature_set" display="radio" type="boolean" truevalue="-generic-feature-set" falsevalue="" checked="false" optional="True" label="Use only generic (i.e" help="(-generic-feature-set) not search engine specific) features. Generating search engine specific features for common search engines by PSMFeatureExtractor will typically boost the identification rate significantly"/> - <param name="param_subset_max_train" type="integer" value="0" label="Only train an SVM on a subset of <x> PSMs, and use the resulting score vector to evaluate the other PSMs" help="(-subset-max-train) Recommended when analyzing huge numbers (>1 million) of PSMs. When set to 0, all PSMs are used for training as normal"/> - <param name="param_cpos" type="float" value="0.0" label="Cpos, penalty for mistakes made on positive examples" help="(-cpos) Set by cross validation if not specified"/> - <param name="param_cneg" type="float" value="0.0" label="Cneg, penalty for mistakes made on negative examples" help="(-cneg) Set by cross validation if not specified"/> - <param name="param_testFDR" type="float" value="0.01" label="False discovery rate threshold for evaluating best cross validation result and the reported end result" help="(-testFDR) "/> - <param name="param_trainFDR" type="float" value="0.01" label="False discovery rate threshold to define positive examples in training" help="(-trainFDR) Set to testFDR if 0"/> - <param name="param_maxiter" type="integer" value="10" label="Maximal number of iterations" help="(-maxiter) "/> - <param name="param_quick_validation" display="radio" type="boolean" truevalue="-quick-validation" falsevalue="" checked="false" optional="True" label="Quicker execution by reduced internal cross-validation" help="(-quick-validation) "/> - <param name="param_init_weights" type="data" format="txt" label="Read initial weights to the given file" help="(-init-weights) "/> - <param name="param_default_direction" type="text" size="30" label="The most informative feature given as the feature name, can be negated to indicate that a lower value is bette" help="(-default-direction) "> - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - <remove value="""/> - </valid> - </sanitizer> + <param name="peptide_level_fdrs" argument="-peptide-level-fdrs" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Calculate peptide-level FDRs instead of PSM-level FDRs" help=""/> + <param name="protein_level_fdrs" argument="-protein-level-fdrs" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Use the picked protein-level FDR to infer protein probabilities" help="Use the -fasta option and -decoy-pattern to set the Fasta file and decoy pattern"/> + <param name="osw_level" argument="-osw_level" type="text" optional="true" value="ms2" label="OSW: Either "ms1", "ms2" or "transition"; the data level selected for scoring" help=""> + <expand macro="list_string_san"/> + </param> + <param name="score_type" argument="-score_type" display="radio" type="select" optional="false" label="Type of the peptide main score" help=""> + <option value="q-value" selected="true">q-value</option> + <option value="pep">pep</option> + <option value="svm">svm</option> + <expand macro="list_string_san"/> + </param> + <expand macro="adv_opts_macro"> + <param name="generic_feature_set" argument="-generic-feature-set" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Use only generic" help="(i.e. not search engine specific) features. Generating search engine specific features for common search engines by PSMFeatureExtractor will typically boost the identification rate significantly"/> + <param name="subset_max_train" argument="-subset-max-train" type="integer" optional="true" value="0" label="Only train an SVM on a subset of <x> PSMs, and use the resulting score vector to evaluate the other PSMs" help="Recommended when analyzing huge numbers (>1 million) of PSMs. When set to 0, all PSMs are used for training as normal"/> + <param name="cpos" argument="-cpos" type="float" optional="true" value="0.0" label="Cpos, penalty for mistakes made on positive examples" help="Set by cross validation if not specified"/> + <param name="cneg" argument="-cneg" type="float" optional="true" value="0.0" label="Cneg, penalty for mistakes made on negative examples" help="Set by cross validation if not specified"/> + <param name="testFDR" argument="-testFDR" type="float" optional="true" value="0.01" label="False discovery rate threshold for evaluating best cross validation result and the reported end result" help=""/> + <param name="trainFDR" argument="-trainFDR" type="float" optional="true" value="0.01" label="False discovery rate threshold to define positive examples in training" help="Set to testFDR if 0"/> + <param name="maxiter" argument="-maxiter" type="integer" optional="true" value="10" label="Maximal number of iterations" help=""/> + <param name="quick_validation" argument="-quick-validation" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Quicker execution by reduced internal cross-validation" help=""/> + <param name="init_weights" argument="-init-weights" type="data" format="txt" optional="true" label="Read initial weights to the given file" help=" select txt data sets(s)"/> + <param name="default_direction" argument="-default-direction" type="text" optional="true" value="" label="The most informative feature given as the feature name, can be negated to indicate that a lower value is bette" help=""> + <expand macro="list_string_san"/> </param> - <param name="param_verbose" type="integer" value="2" label="Set verbosity of output: 0=no processing info, 5=all" help="(-verbose) "/> - <param name="param_unitnorm" display="radio" type="boolean" truevalue="-unitnorm" falsevalue="" checked="false" optional="True" label="Use unit normalization [0-1] instead of standard deviation normalization" help="(-unitnorm) "/> - <param name="param_test_each_iteration" display="radio" type="boolean" truevalue="-test-each-iteration" falsevalue="" checked="false" optional="True" label="Measure performance on test set each iteration" help="(-test-each-iteration) "/> - <param name="param_override" display="radio" type="boolean" truevalue="-override" falsevalue="" checked="false" optional="True" label="Override error check and do not fall back on default score vector in case of suspect score vecto" help="(-override) "/> - <param name="param_seed" type="integer" value="1" label="Setting seed of the random number generato" help="(-seed) "/> - <param name="param_doc" type="integer" value="0" label="Include description of correct features" help="(-doc) "/> - <param name="param_klammer" display="radio" type="boolean" truevalue="-klammer" falsevalue="" checked="false" optional="True" label="Retention time features calculated as in Klammer et al" help="(-klammer) Only available if -doc is set"/> - <param name="param_fasta" type="data" format="fasta" optional="True" label="Provide the fasta file as the argument to this flag, which will be used for protein grouping based on an in-silico digest (only valid if option -protein-level-fdrs is active)" help="(-fasta) "/> - <param name="param_decoy_pattern" type="text" size="30" value="random" label="Define the text pattern to identify the decoy proteins and/or PSMs, set this up if the label that identifies the decoys in the database is not the default (Only valid if option -protein-level-fdrs is active)" help="(-decoy-pattern) "> - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - <remove value="""/> - </valid> - </sanitizer> + <param name="verbose" argument="-verbose" type="integer" optional="true" value="2" label="Set verbosity of output: 0=no processing info, 5=all" help=""/> + <param name="unitnorm" argument="-unitnorm" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Use unit normalization [0-1] instead of standard deviation normalization" help=""/> + <param name="test_each_iteration" argument="-test-each-iteration" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Measure performance on test set each iteration" help=""/> + <param name="override" argument="-override" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Override error check and do not fall back on default score vector in case of suspect score vecto" help=""/> + <param name="seed" argument="-seed" type="integer" optional="true" value="1" label="Setting seed of the random number generato" help=""/> + <param name="doc" argument="-doc" type="integer" optional="true" value="0" label="Include description of correct features" help=""/> + <param name="klammer" argument="-klammer" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Retention time features calculated as in Klammer et al" help="Only available if -doc is set"/> + <param name="fasta" argument="-fasta" type="data" format="fasta" optional="true" label="Provide the fasta file as the argument to this flag, which will be used for protein grouping based on an in-silico digest (only valid if option -protein-level-fdrs is active)" help=" select fasta data sets(s)"/> + <param name="decoy_pattern" argument="-decoy-pattern" type="text" optional="true" value="random" label="Define the text pattern to identify the decoy proteins and/or PSMs, set this up if the label that identifies the decoys in the database is not the default (Only valid if option -protein-level-fdrs is active)" help=""> + <expand macro="list_string_san"/> </param> - <param name="param_post_processing_tdc" display="radio" type="boolean" truevalue="-post-processing-tdc" falsevalue="" checked="false" optional="True" label="Use target-decoy competition to assign q-values and PEPs" help="(-post-processing-tdc) "/> - <param name="param_force" display="radio" type="boolean" truevalue="-force" falsevalue="" checked="false" optional="True" label="Overwrite tool specific checks" help="(-force) "/> + <param name="post_processing_tdc" argument="-post-processing-tdc" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Use target-decoy competition to assign q-values and PEPs" help=""/> + <param name="train_best_positive" argument="-train-best-positive" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Enforce that, for each spectrum, at most one PSM is included in the positive set during each training iteration" help="If the user only provides one PSM per spectrum, this filter will have no effect"/> + <param name="ipf_max_peakgroup_pep" argument="-ipf_max_peakgroup_pep" type="float" optional="true" value="0.7" label="OSW/IPF: Assess transitions only for candidate peak groups until maximum posterior error probability" help=""/> + <param name="ipf_max_transition_isotope_overlap" argument="-ipf_max_transition_isotope_overlap" type="float" optional="true" value="0.5" label="OSW/IPF: Maximum isotope overlap to consider transitions in IPF" help=""/> + <param name="ipf_min_transition_sn" argument="-ipf_min_transition_sn" type="float" optional="true" value="0.0" label="OSW/IPF: Minimum log signal-to-noise level to consider transitions in IPF" help="Set -1 to disable this filter"/> + <param name="force" argument="-force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Overwrite tool specific checks" help=""/> + <param name="test" argument="-test" type="hidden" optional="true" value="False" label="Enables the test mode (needed for internal use only)" help=""> + <expand macro="list_string_san"/> + </param> </expand> + <param name="OPTIONAL_OUTPUTS" type="select" multiple="true" label="Optional outputs" optional="true"> + <option value="out_pin_FLAG">out_pin (Enables the test mode (needed for internal use only))</option> + <option value="weights_FLAG">weights (Enables the test mode (needed for internal use only))</option> + <option value="ctd_out_FLAG">Output used ctd (ini) configuration file</option> + </param> </inputs> <outputs> - <data name="param_out" format="idxml"/> - <data name="param_mzid_out" format="mzid"/> - <data name="param_weights" format="data"/> + <data name="out" label="${tool.name} on ${on_string}: out"> + <change_format> + <when input="out_type" value="idXML" format="idxml"/> + <when input="out_type" value="mzid" format="mzid"/> + <when input="out_type" value="osw" format="osw"/> + </change_format> + </data> + <data name="out_pin" label="${tool.name} on ${on_string}: out_pin" format="tabular"> + <filter>OPTIONAL_OUTPUTS is not None and "out_pin_FLAG" in OPTIONAL_OUTPUTS</filter> + </data> + <data name="weights" label="${tool.name} on ${on_string}: weights" format="txt"> + <filter>OPTIONAL_OUTPUTS is not None and "weights_FLAG" in OPTIONAL_OUTPUTS</filter> + </data> + <data name="ctd_out" format="xml" label="${tool.name} on ${on_string}: ctd"> + <filter>OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS</filter> + </data> </outputs> - <help>Facilitate input to Percolator and reintegrate. + <tests> + <expand macro="autotest_PercolatorAdapter"/> + <expand macro="manutest_PercolatorAdapter"/> + </tests> + <help><![CDATA[Facilitate input to Percolator and reintegrate. -For more information, visit https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Documentation/release/2.3.0/html/TOPP_PercolatorAdapter.html</help> +For more information, visit http://www.openms.de/documentation/TOPP_PercolatorAdapter.html]]></help> + <expand macro="references"/> </tool>