view DecoyDatabase.xml @ 16:06cca574e337 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 5c080b1e2b99f1c88f4557e9fec8c45c9d23b906
author galaxyp
date Fri, 14 Jun 2024 21:33:28 +0000
parents 25529df60a81
children
line wrap: on
line source

<!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTDConverter.-->
<!--Proposed Tool Section: [File Filtering / Extraction / Merging]-->
<tool id="DecoyDatabase" name="DecoyDatabase" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
  <description>Create decoy sequence database from forward sequence database</description>
  <macros>
    <token name="@EXECUTABLE@">DecoyDatabase</token>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements"/>
  <expand macro="stdio"/>
  <command detect_errors="exit_code"><![CDATA[@QUOTE_FOO@
@EXT_FOO@
#import re

## Preprocessing
mkdir in_cond.in &&
#if $in_cond.in_select == "no"
mkdir ${' '.join(["'in_cond.in/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} && 
${' '.join(["cp '%s' 'in_cond.in/%s/%s.%s' && " % (f, i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])}
#else
cp '$in_cond.in' 'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)' &&
#end if
mkdir out &&

## Main program call

set -o pipefail &&
@EXECUTABLE@ -write_ctd ./ &&
python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' &&
@EXECUTABLE@ -ini @EXECUTABLE@.ctd
-in
#if $in_cond.in_select == "no"
${' '.join(["'in_cond.in/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])}
#else
'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)'
#end if
-out
'out/output.${gxy2omsext("fasta")}'

## Postprocessing
&& mv 'out/output.${gxy2omsext("fasta")}' '$out'
#if "ctd_out_FLAG" in $OPTIONAL_OUTPUTS
  && mv '@EXECUTABLE@.ctd' '$ctd_out'
#end if]]></command>
  <configfiles>
    <inputs name="args_json" data_style="paths"/>
    <configfile name="hardcoded_json"><![CDATA[{"log": "log.txt", "threads": "\${GALAXY_SLOTS:-1}", "no_progress": true}]]></configfile>
  </configfiles>
  <inputs>
    <conditional name="in_cond">
      <param name="in_select" type="select" label="Run tool in batch mode for -in">
        <option value="no">No: process all datasets jointly</option>
        <option value="yes">Yes: process each dataset in an independent job</option>
      </param>
      <when value="no">
        <param argument="-in" type="data" format="fasta" multiple="true" label="Input FASTA file(s), each containing a database" help="It is recommended to include a contaminant database as well select fasta data sets(s)"/>
      </when>
      <when value="yes">
        <param argument="-in" type="data" format="fasta" label="Input FASTA file(s), each containing a database" help="It is recommended to include a contaminant database as well select fasta data sets(s)"/>
      </when>
    </conditional>
    <param argument="-decoy_string" type="text" value="DECOY_" label="String that is combined with the accession of the protein identifier to indicate a decoy protein" help="">
      <expand macro="list_string_san" name="decoy_string"/>
    </param>
    <param argument="-decoy_string_position" type="select" label="Should the 'decoy_string' be prepended (prefix) or appended (suffix) to the protein accession?" help="">
      <option value="prefix" selected="true">prefix</option>
      <option value="suffix">suffix</option>
      <expand macro="list_string_san" name="decoy_string_position"/>
    </param>
    <param argument="-only_decoy" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write only decoy proteins to the output database instead of a combined database" help=""/>
    <param argument="-type" type="select" label="Type of sequence" help="RNA sequences may contain modification codes, which will be handled correctly if this is set to 'RNA'">
      <option value="protein" selected="true">protein</option>
      <option value="RNA">RNA</option>
      <expand macro="list_string_san" name="type"/>
    </param>
    <param argument="-method" type="select" label="Method by which decoy sequences are generated from target sequences" help="Note that all sequences are shuffled using the same random seed, ensuring that identical sequences produce the same shuffled decoy sequences. Shuffled sequences that produce highly similar output sequences are shuffled again (see shuffle_sequence_identity_threshold)">
      <option value="reverse" selected="true">reverse</option>
      <option value="shuffle">shuffle</option>
      <expand macro="list_string_san" name="method"/>
    </param>
    <param argument="-enzyme" type="select" label="Enzyme used for the digestion of the sample" help="Only applicable if parameter 'type' is 'protein'">
      <option value="glutamyl endopeptidase">glutamyl endopeptidase</option>
      <option value="Alpha-lytic protease">Alpha-lytic protease</option>
      <option value="no cleavage">no cleavage</option>
      <option value="unspecific cleavage">unspecific cleavage</option>
      <option value="Trypsin" selected="true">Trypsin</option>
      <option value="Arg-C">Arg-C</option>
      <option value="Arg-C/P">Arg-C/P</option>
      <option value="Asp-N">Asp-N</option>
      <option value="Asp-N/B">Asp-N/B</option>
      <option value="Glu-C+P">Glu-C+P</option>
      <option value="PepsinA + P">PepsinA + P</option>
      <option value="cyanogen-bromide">cyanogen-bromide</option>
      <option value="Clostripain/P">Clostripain/P</option>
      <option value="elastase-trypsin-chymotrypsin">elastase-trypsin-chymotrypsin</option>
      <option value="Asp-N_ambic">Asp-N_ambic</option>
      <option value="Chymotrypsin">Chymotrypsin</option>
      <option value="Chymotrypsin/P">Chymotrypsin/P</option>
      <option value="CNBr">CNBr</option>
      <option value="Formic_acid">Formic_acid</option>
      <option value="Lys-C">Lys-C</option>
      <option value="Lys-N">Lys-N</option>
      <option value="Lys-C/P">Lys-C/P</option>
      <option value="PepsinA">PepsinA</option>
      <option value="TrypChymo">TrypChymo</option>
      <option value="Trypsin/P">Trypsin/P</option>
      <option value="V8-DE">V8-DE</option>
      <option value="V8-E">V8-E</option>
      <option value="leukocyte elastase">leukocyte elastase</option>
      <option value="proline endopeptidase">proline endopeptidase</option>
      <option value="2-iodobenzoate">2-iodobenzoate</option>
      <option value="iodosobenzoate">iodosobenzoate</option>
      <option value="staphylococcal protease/D">staphylococcal protease/D</option>
      <option value="proline-endopeptidase/HKR">proline-endopeptidase/HKR</option>
      <expand macro="list_string_san" name="enzyme"/>
    </param>
    <section name="Decoy" title="Decoy parameters section" help="" expanded="false">
      <param name="non_shuffle_pattern" argument="-Decoy:non_shuffle_pattern" type="text" optional="true" value="" label="Residues to not shuffle (keep at a constant position when shuffling)" help="Separate by comma, e.g. use 'K,P,R' here">
        <expand macro="list_string_san" name="non_shuffle_pattern"/>
      </param>
      <param name="keepPeptideNTerm" argument="-Decoy:keepPeptideNTerm" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Whether to keep peptide N terminus constant when shuffling / reversing" help=""/>
      <param name="keepPeptideCTerm" argument="-Decoy:keepPeptideCTerm" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Whether to keep peptide C terminus constant when shuffling / reversing" help=""/>
    </section>
    <expand macro="adv_opts_macro">
      <param argument="-shuffle_max_attempts" type="integer" value="30" label="shuffle: maximum attempts to lower the amino acid sequence identity between target and decoy for the shuffle algorithm" help=""/>
      <param argument="-shuffle_sequence_identity_threshold" type="float" value="0.5" label="shuffle: target-decoy amino acid sequence identity threshold for the shuffle algorithm" help="If the sequence identity is above this threshold, shuffling is repeated. In case of repeated failure, individual amino acids are 'mutated' to produce a different amino acid sequence"/>
      <param argument="-seed" type="text" value="1" label="Random number seed (use 'time' for system time)" help="">
        <expand macro="list_string_san" name="seed"/>
      </param>
      <param argument="-force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Overrides tool-specific checks" help=""/>
      <param argument="-test" type="hidden" value="False" label="Enables the test mode (needed for internal use only)" help="" optional="true">
        <expand macro="list_string_san" name="test"/>
      </param>
    </expand>
    <param name="OPTIONAL_OUTPUTS" type="select" optional="true" multiple="true" label="Optional outputs">
      <option value="ctd_out_FLAG">Output used ctd (ini) configuration file</option>
    </param>
  </inputs>
  <outputs>
    <data name="out" label="${tool.name} on ${on_string}: out" format="fasta"/>
    <data name="ctd_out" format="xml" label="${tool.name} on ${on_string}: ctd">
      <filter>OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS</filter>
    </data>
  </outputs>
  <tests>
    <!-- TOPP_DecoyDatabase_1 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="shuffle_max_attempts" value="30"/>
        <param name="shuffle_sequence_identity_threshold" value="0.5"/>
        <param name="seed" value="1"/>
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="DecoyDatabase_1.fasta"/>
      </conditional>
      <output name="out" value="DecoyDatabase_1_out.fasta" compare="sim_size" delta_frac="0.7" ftype="fasta"/>
      <param name="decoy_string" value="DECOY_"/>
      <param name="decoy_string_position" value="prefix"/>
      <param name="only_decoy" value="true"/>
      <param name="type" value="protein"/>
      <param name="method" value="reverse"/>
      <param name="enzyme" value="Trypsin"/>
      <section name="Decoy">
        <param name="non_shuffle_pattern" value=""/>
        <param name="keepPeptideNTerm" value="true"/>
        <param name="keepPeptideCTerm" value="true"/>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
    <!-- TOPP_DecoyDatabase_2 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="shuffle_max_attempts" value="30"/>
        <param name="shuffle_sequence_identity_threshold" value="0.5"/>
        <param name="seed" value="42"/>
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="DecoyDatabase_1.fasta"/>
      </conditional>
      <output name="out" value="DecoyDatabase_2_out.fasta" compare="sim_size" delta_frac="0.7" ftype="fasta"/>
      <param name="decoy_string" value="DECOY_SEQ_"/>
      <param name="decoy_string_position" value="prefix"/>
      <param name="only_decoy" value="false"/>
      <param name="type" value="protein"/>
      <param name="method" value="shuffle"/>
      <param name="enzyme" value="Trypsin"/>
      <section name="Decoy">
        <param name="non_shuffle_pattern" value="KRP"/>
        <param name="keepPeptideNTerm" value="true"/>
        <param name="keepPeptideCTerm" value="true"/>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
    <!-- TOPP_DecoyDatabase_3 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="shuffle_max_attempts" value="30"/>
        <param name="shuffle_sequence_identity_threshold" value="0.5"/>
        <param name="seed" value="42"/>
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="DecoyDatabase_1.fasta"/>
      </conditional>
      <output name="out" value="DecoyDatabase_3_out.fasta" compare="sim_size" delta_frac="0.7" ftype="fasta"/>
      <param name="decoy_string" value="DECOY_SEQ_"/>
      <param name="decoy_string_position" value="prefix"/>
      <param name="only_decoy" value="false"/>
      <param name="type" value="protein"/>
      <param name="method" value="shuffle"/>
      <param name="enzyme" value="Chymotrypsin"/>
      <section name="Decoy">
        <param name="non_shuffle_pattern" value="KR"/>
        <param name="keepPeptideNTerm" value="true"/>
        <param name="keepPeptideCTerm" value="true"/>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
    <!-- TOPP_DecoyDatabase_4 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="shuffle_max_attempts" value="30"/>
        <param name="shuffle_sequence_identity_threshold" value="0.5"/>
        <param name="seed" value="42"/>
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="DecoyDatabase_4.fasta"/>
      </conditional>
      <output name="out" value="DecoyDatabase_4_out.fasta" compare="sim_size" delta_frac="0.7" ftype="fasta"/>
      <param name="decoy_string" value="DECOY_SEQ_"/>
      <param name="decoy_string_position" value="prefix"/>
      <param name="only_decoy" value="false"/>
      <param name="type" value="RNA"/>
      <param name="method" value="reverse"/>
      <param name="enzyme" value="Trypsin"/>
      <section name="Decoy">
        <param name="non_shuffle_pattern" value=""/>
        <param name="keepPeptideNTerm" value="true"/>
        <param name="keepPeptideCTerm" value="true"/>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
  </tests>
  <help><![CDATA[Create decoy sequence database from forward sequence database.


For more information, visit https://openms.de/doxygen/release/3.1.0/html/TOPP_DecoyDatabase.html]]></help>
  <expand macro="references"/>
</tool>