Mercurial > repos > galaxyp > openms_databasesuitability

<!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTDConverter.-->
<!--Proposed Tool Section: [Quality Control]-->
<tool id="DatabaseSuitability" name="DatabaseSuitability" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
  <description>Computes a suitability score for a database which was used for a peptide identification search. Also reports the quality of LC-MS spectra</description>
  <macros>
    <token name="@EXECUTABLE@">DatabaseSuitability</token>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements"/>
  <expand macro="stdio"/>
  <command detect_errors="exit_code"><![CDATA[@QUOTE_FOO@
@EXT_FOO@
#import re

## Preprocessing
mkdir in_id &&
cp '$in_id' 'in_id/${re.sub("[^\w\-_]", "_", $in_id.element_identifier)}.$gxy2omsext($in_id.ext)' &&
mkdir in_spec &&
cp '$in_spec' 'in_spec/${re.sub("[^\w\-_]", "_", $in_spec.element_identifier)}.$gxy2omsext($in_spec.ext)' &&
mkdir in_novo &&
cp '$in_novo' 'in_novo/${re.sub("[^\w\-_]", "_", $in_novo.element_identifier)}.$gxy2omsext($in_novo.ext)' &&
mkdir database &&
cp '$database' 'database/${re.sub("[^\w\-_]", "_", $database.element_identifier)}.$gxy2omsext($database.ext)' &&
mkdir novo_database &&
cp '$novo_database' 'novo_database/${re.sub("[^\w\-_]", "_", $novo_database.element_identifier)}.$gxy2omsext($novo_database.ext)' &&
#if "out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  mkdir out &&
#end if

## Main program call

set -o pipefail &&
@EXECUTABLE@ -write_ctd ./ &&
python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' &&
@EXECUTABLE@ -ini @EXECUTABLE@.ctd
-in_id
'in_id/${re.sub("[^\w\-_]", "_", $in_id.element_identifier)}.$gxy2omsext($in_id.ext)'
-in_spec
'in_spec/${re.sub("[^\w\-_]", "_", $in_spec.element_identifier)}.$gxy2omsext($in_spec.ext)'
-in_novo
'in_novo/${re.sub("[^\w\-_]", "_", $in_novo.element_identifier)}.$gxy2omsext($in_novo.ext)'
-database
'database/${re.sub("[^\w\-_]", "_", $database.element_identifier)}.$gxy2omsext($database.ext)'
-novo_database
'novo_database/${re.sub("[^\w\-_]", "_", $novo_database.element_identifier)}.$gxy2omsext($novo_database.ext)'
#if "out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  -out
  'out/output.${gxy2omsext("tabular")}'
#end if
#if len(str($OPTIONAL_OUTPUTS).split(',')) == 0
  | tee '$stdout'
#end if

## Postprocessing
#if "out_FLAG" in str($OPTIONAL_OUTPUTS).split(',')
  && mv 'out/output.${gxy2omsext("tabular")}' '$out'
#end if
#if "ctd_out_FLAG" in $OPTIONAL_OUTPUTS
  && mv '@EXECUTABLE@.ctd' '$ctd_out'
#end if]]></command>
  <configfiles>
    <inputs name="args_json" data_style="paths"/>
    <configfile name="hardcoded_json"><![CDATA[{"log": "log.txt", "threads": "\${GALAXY_SLOTS:-1}", "no_progress": true}]]></configfile>
  </configfiles>
  <inputs>
    <param argument="-in_id" type="data" format="idxml" label="Input idXML file from a peptide identification search with a combined database" help="PeptideIndexer is needed, FDR is forbidden select idxml data sets(s)"/>
    <param argument="-in_spec" type="data" format="mzml" label="Input MzML file used for the peptide identification" help=" select mzml data sets(s)"/>
    <param argument="-in_novo" type="data" format="idxml" label="Input idXML file containing de novo peptides (unfiltered)" help=" select idxml data sets(s)"/>
    <param argument="-database" type="data" format="fasta" label="Input FASTA file of the database in question" help=" select fasta data sets(s)"/>
    <param argument="-novo_database" type="data" format="fasta" label="Input deNovo sequences derived from MzML given in 'in_spec' concatenated to one FASTA entry" help=" select fasta data sets(s)"/>
    <section name="algorithm" title="Parameter section for the suitability calculation algorithm" help="" expanded="false">
      <param name="no_rerank" argument="-algorithm:no_rerank" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Use this flag if you want to disable re-ranking" help="Cases, where a de novo peptide scores just higher than the database peptide, are overlooked and counted as a de novo hit. This might underestimate the database quality"/>
      <param name="reranking_cutoff_percentile" argument="-algorithm:reranking_cutoff_percentile" type="float" min="0.0" max="1.0" value="0.01" label="Swap a top-scoring deNovo hit with a lower scoring DB hit if their xcorr score difference is in the given percentile of all score differences between the first two decoy hits of a PSM" help="The lower the value the lower the decoy cut-off will be. Therefore it will be harder for a lower scoring DB hit to be re-ranked to the top"/>
      <param name="FDR" argument="-algorithm:FDR" type="float" min="0.0" max="1.0" value="0.01" label="Filter peptide hits based on this q-value" help="(e.g., 0.05 = 5 % FDR)"/>
      <param name="number_of_subsampled_runs" argument="-algorithm:number_of_subsampled_runs" type="integer" min="0" value="1" label="Controls how many runs should be done for calculating corrected suitability" help="(0 : number of runs will be estimated automaticly) ATTENTION: For each run a seperate ID-search is performed. This can result in some serious run time"/>
      <param name="keep_search_files" argument="-algorithm:keep_search_files" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Set this flag if you wish to keep the files used by and produced by the internal ID search" help=""/>
      <param name="disable_correction" argument="-algorithm:disable_correction" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Set this flag to disable the calculation of the corrected suitability" help=""/>
      <param name="force" argument="-algorithm:force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Set this flag to enforce re-ranking when no cross correlation score is present" help="For re-ranking the default score found at each peptide hit is used. Use with care!"/>
    </section>
    <expand macro="adv_opts_macro">
      <param argument="-novo_threshold" type="float" min="0.0" value="60.0" label="Minimum score a de novo sequence has to have to be defined as 'correct'" help="The default of 60 is proven to be a good estimate for sequences generated by Novor"/>
      <param argument="-force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Overrides tool-specific checks" help=""/>
      <param argument="-test" type="hidden" value="False" label="Enables the test mode (needed for internal use only)" help="" optional="true">
        <expand macro="list_string_san" name="test"/>
      </param>
    </expand>
    <param name="OPTIONAL_OUTPUTS" type="select" optional="true" multiple="true" label="Optional outputs">
      <option value="out_FLAG">out (Optional tsv output containing database suitability information as well as spectral quality)</option>
      <option value="ctd_out_FLAG">Output used ctd (ini) configuration file</option>
    </param>
  </inputs>
  <outputs>
    <data name="out" label="${tool.name} on ${on_string}: out" format="tabular">
      <filter>OPTIONAL_OUTPUTS is not None and "out_FLAG" in OPTIONAL_OUTPUTS</filter>
    </data>
    <data name="stdout" format="txt" label="${tool.name} on ${on_string}: stdout">
      <filter>OPTIONAL_OUTPUTS is None</filter>
    </data>
    <data name="ctd_out" format="xml" label="${tool.name} on ${on_string}: ctd">
      <filter>OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS</filter>
    </data>
  </outputs>
  <tests>
    <!-- TOPP_DatabaseSuitability -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="novo_threshold" value="60.0"/>
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <param name="in_id" value="DatabaseSuitability_in_id.idXML"/>
      <param name="in_spec" value="DatabaseSuitability_in_spec.mzML"/>
      <param name="in_novo" value="DatabaseSuitability_in_novo.idXML"/>
      <param name="database" value="DatabaseSuitability_database.fasta"/>
      <param name="novo_database" value="DatabaseSuitability_novo_database.FASTA"/>
      <output name="out" value="DatabaseSuitability_out.tsv" compare="sim_size" delta_frac="0.7" ftype="tabular"/>
      <section name="algorithm">
        <param name="no_rerank" value="false"/>
        <param name="reranking_cutoff_percentile" value="0.01"/>
        <param name="FDR" value="0.01"/>
        <param name="number_of_subsampled_runs" value="1"/>
        <param name="keep_search_files" value="false"/>
        <param name="disable_correction" value="true"/>
        <param name="force" value="false"/>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
    <!-- TOPP_DatabaseSuitability_1 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="novo_threshold" value="60.0"/>
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <param name="in_id" value="DatabaseSuitability_in_id.idXML"/>
      <param name="in_spec" value="DatabaseSuitability_in_spec.mzML"/>
      <param name="in_novo" value="DatabaseSuitability_in_novo.idXML"/>
      <param name="database" value="DatabaseSuitability_database.fasta"/>
      <param name="novo_database" value="DatabaseSuitability_novo_database.FASTA"/>
      <output name="out" value="DatabaseSuitability_out_1.tsv" compare="sim_size" delta_frac="0.7" ftype="tabular"/>
      <section name="algorithm">
        <param name="no_rerank" value="false"/>
        <param name="reranking_cutoff_percentile" value="0.01"/>
        <param name="FDR" value="0.01"/>
        <param name="number_of_subsampled_runs" value="1"/>
        <param name="keep_search_files" value="false"/>
        <param name="disable_correction" value="false"/>
        <param name="force" value="false"/>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
    <!-- TOPP_DatabaseSuitability_2 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="novo_threshold" value="60.0"/>
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <param name="in_id" value="DatabaseSuitability_in_id.idXML"/>
      <param name="in_spec" value="DatabaseSuitability_in_spec.mzML"/>
      <param name="in_novo" value="DatabaseSuitability_in_novo.idXML"/>
      <param name="database" value="DatabaseSuitability_database.fasta"/>
      <param name="novo_database" value="DatabaseSuitability_novo_database.FASTA"/>
      <output name="out" value="DatabaseSuitability_out_2.tsv" compare="sim_size" delta_frac="0.7" ftype="tabular"/>
      <section name="algorithm">
        <param name="no_rerank" value="false"/>
        <param name="reranking_cutoff_percentile" value="0.01"/>
        <param name="FDR" value="0.05"/>
        <param name="number_of_subsampled_runs" value="1"/>
        <param name="keep_search_files" value="false"/>
        <param name="disable_correction" value="false"/>
        <param name="force" value="false"/>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
    <!-- TOPP_DatabaseSuitability_3 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="novo_threshold" value="60.0"/>
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <param name="in_id" value="DatabaseSuitability_in_id.idXML"/>
      <param name="in_spec" value="DatabaseSuitability_in_spec.mzML"/>
      <param name="in_novo" value="DatabaseSuitability_in_novo.idXML"/>
      <param name="database" value="DatabaseSuitability_database.fasta"/>
      <param name="novo_database" value="DatabaseSuitability_novo_database.FASTA"/>
      <output name="out" value="DatabaseSuitability_out_3.tsv" compare="sim_size" delta_frac="0.7" ftype="tabular"/>
      <section name="algorithm">
        <param name="no_rerank" value="false"/>
        <param name="reranking_cutoff_percentile" value="0.5"/>
        <param name="FDR" value="0.5"/>
        <param name="number_of_subsampled_runs" value="1"/>
        <param name="keep_search_files" value="false"/>
        <param name="disable_correction" value="false"/>
        <param name="force" value="false"/>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG,out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
  </tests>
  <help><![CDATA[Computes a suitability score for a database which was used for a peptide identification search. Also reports the quality of LC-MS spectra.


For more information, visit https://openms.de/doxygen/release/3.1.0/html/TOPP_DatabaseSuitability.html]]></help>
  <expand macro="references"/>
</tool>
author	galaxyp
date	Fri, 14 Jun 2024 21:46:31 +0000
parents	4890f50ae1a2
children