view ProteinInference.xml @ 14:e278a1abeebe draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 5c080b1e2b99f1c88f4557e9fec8c45c9d23b906
author galaxyp
date Fri, 14 Jun 2024 21:29:31 +0000
parents 98adda3af74a
children
line wrap: on
line source

<!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTDConverter.-->
<!--Proposed Tool Section: [Identification Processing]-->
<tool id="ProteinInference" name="ProteinInference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
  <description>Protein inference based on an aggregation of the scores of the identified peptides</description>
  <macros>
    <token name="@EXECUTABLE@">ProteinInference</token>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements"/>
  <expand macro="stdio"/>
  <command detect_errors="exit_code"><![CDATA[@QUOTE_FOO@
@EXT_FOO@
#import re

## Preprocessing
mkdir in_cond.in &&
#if $in_cond.in_select == "no"
mkdir ${' '.join(["'in_cond.in/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} && 
${' '.join(["cp '%s' 'in_cond.in/%s/%s.%s' && " % (f, i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])}
#else
cp '$in_cond.in' 'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)' &&
#end if
mkdir out &&

## Main program call

set -o pipefail &&
@EXECUTABLE@ -write_ctd ./ &&
python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' &&
@EXECUTABLE@ -ini @EXECUTABLE@.ctd
-in
#if $in_cond.in_select == "no"
${' '.join(["'in_cond.in/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])}
#else
'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)'
#end if
-out
'out/output.${out_type}'

## Postprocessing
&& mv 'out/output.${out_type}' '$out'
#if "ctd_out_FLAG" in $OPTIONAL_OUTPUTS
  && mv '@EXECUTABLE@.ctd' '$ctd_out'
#end if]]></command>
  <configfiles>
    <inputs name="args_json" data_style="paths"/>
    <configfile name="hardcoded_json"><![CDATA[{"log": "log.txt", "threads": "\${GALAXY_SLOTS:-1}", "no_progress": true}]]></configfile>
  </configfiles>
  <inputs>
    <conditional name="in_cond">
      <param name="in_select" type="select" label="Run tool in batch mode for -in">
        <option value="no">No: process all datasets jointly</option>
        <option value="yes">Yes: process each dataset in an independent job</option>
      </param>
      <when value="no">
        <param argument="-in" type="data" format="consensusxml,idxml" multiple="true" label="input file(s)" help=" select consensusxml,idxml data sets(s)"/>
      </when>
      <when value="yes">
        <param argument="-in" type="data" format="consensusxml,idxml" label="input file(s)" help=" select consensusxml,idxml data sets(s)"/>
      </when>
    </conditional>
    <param argument="-out_type" type="select" label="output file type" help="">
      <option value="consensusXML">consensusxml</option>
      <option value="idXML">idxml</option>
      <validator type="expression" message="A value needs to be selected">value != "select a value"</validator>
      <expand macro="list_string_san" name="out_type"/>
    </param>
    <param argument="-merge_runs" type="select" label="If your idXML contains multiple runs, merge them beforehand" help="Otherwise performs inference separately per run">
      <option value="no">no</option>
      <option value="all" selected="true">all</option>
      <expand macro="list_string_san" name="merge_runs"/>
    </param>
    <param argument="-protein_fdr" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Additionally calculate the target-decoy FDR on protein-level after inference" help=""/>
    <section name="Merging" title="" help="" expanded="false">
      <param name="annotate_origin" argument="-Merging:annotate_origin" type="boolean" truevalue="true" falsevalue="false" checked="true" label="If true, adds a map_index MetaValue to the PeptideIDs to annotate the IDRun they came from" help=""/>
      <param name="allow_disagreeing_settings" argument="-Merging:allow_disagreeing_settings" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Force merging of disagreeing runs" help="Use at your own risk"/>
    </section>
    <section name="Algorithm" title="" help="" expanded="false">
      <param name="min_peptides_per_protein" argument="-Algorithm:min_peptides_per_protein" type="integer" min="0" value="1" label="Minimal number of peptides needed for a protein identification" help="If set to zero, unmatched proteins get a score of -Infinity. If bigger than zero, proteins with less peptides are filtered and evidences removed from the PSMs. PSMs that do not reference any proteins anymore are removed but the spectrum info is kept"/>
      <param name="score_aggregation_method" argument="-Algorithm:score_aggregation_method" type="select" label="How to aggregate scores of peptides matching to the same protein?" help="">
        <option value="best" selected="true">best</option>
        <option value="product">product</option>
        <option value="sum">sum</option>
        <option value="maximum">maximum</option>
        <expand macro="list_string_san" name="score_aggregation_method"/>
      </param>
      <param name="treat_charge_variants_separately" argument="-Algorithm:treat_charge_variants_separately" type="boolean" truevalue="true" falsevalue="false" checked="true" label="If this is true, different charge variants of the same peptide sequence count as individual evidences" help=""/>
      <param name="treat_modification_variants_separately" argument="-Algorithm:treat_modification_variants_separately" type="boolean" truevalue="true" falsevalue="false" checked="true" label="If this is true, different modification variants of the same peptide sequence count as individual evidences" help=""/>
      <param name="use_shared_peptides" argument="-Algorithm:use_shared_peptides" type="boolean" truevalue="true" falsevalue="false" checked="true" label="If this is true, shared peptides are used as evidences" help="Note: shared_peptides are not deleted and potentially resolved in postprocessing as well"/>
      <param name="skip_count_annotation" argument="-Algorithm:skip_count_annotation" type="boolean" truevalue="true" falsevalue="false" checked="false" label="If this is set, peptide counts won't be annotated at the proteins" help=""/>
      <param name="annotate_indistinguishable_groups" argument="-Algorithm:annotate_indistinguishable_groups" type="boolean" truevalue="true" falsevalue="false" checked="true" label="If this is true, calculates and annotates indistinguishable protein groups" help=""/>
      <param name="greedy_group_resolution" argument="-Algorithm:greedy_group_resolution" type="boolean" truevalue="true" falsevalue="false" checked="false" label="If this is true, shared peptides will be associated to best proteins only" help="(i.e. become potentially quantifiable razor peptides)"/>
    </section>
    <expand macro="adv_opts_macro">
      <param argument="-conservative_fdr" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Use (D+1)/(T) instead of (D+1)/(T+D) for reporting protein FDRs" help=""/>
      <param argument="-picked_fdr" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Use picked protein FDRs" help=""/>
      <param argument="-picked_decoy_string" type="text" optional="true" value="" label="If using picked protein FDRs, which decoy string was used" help="Leave blank for auto-detection">
        <expand macro="list_string_san" name="picked_decoy_string"/>
      </param>
      <param argument="-picked_decoy_prefix" type="select" label="If using picked protein FDRs, was the decoy string a prefix or suffix" help="Ignored during auto-detection">
        <option value="prefix" selected="true">prefix</option>
        <option value="suffix">suffix</option>
        <expand macro="list_string_san" name="picked_decoy_prefix"/>
      </param>
      <param argument="-force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Overrides tool-specific checks" help=""/>
      <param argument="-test" type="hidden" value="False" label="Enables the test mode (needed for internal use only)" help="" optional="true">
        <expand macro="list_string_san" name="test"/>
      </param>
    </expand>
    <param name="OPTIONAL_OUTPUTS" type="select" optional="true" multiple="true" label="Optional outputs">
      <option value="ctd_out_FLAG">Output used ctd (ini) configuration file</option>
    </param>
  </inputs>
  <outputs>
    <data name="out" label="${tool.name} on ${on_string}: out" format="consensusxml">
      <change_format>
        <when input="out_type" value="idXML" format="idxml"/>
      </change_format>
    </data>
    <data name="ctd_out" format="xml" label="${tool.name} on ${on_string}: ctd">
      <filter>OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS</filter>
    </data>
  </outputs>
  <tests>
    <!-- TOPP_ProteinInference_1 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="conservative_fdr" value="true"/>
        <param name="picked_fdr" value="true"/>
        <param name="picked_decoy_string" value=""/>
        <param name="picked_decoy_prefix" value="prefix"/>
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="ProteinInference_1_input.idXML"/>
      </conditional>
      <output name="out" value="ProteinInference_1_output.idXML" compare="sim_size" delta_frac="0.7" ftype="idxml"/>
      <param name="out_type" value="idXML"/>
      <param name="merge_runs" value="all"/>
      <param name="protein_fdr" value="false"/>
      <section name="Merging">
        <param name="annotate_origin" value="false"/>
        <param name="allow_disagreeing_settings" value="false"/>
      </section>
      <section name="Algorithm">
        <param name="min_peptides_per_protein" value="1"/>
        <param name="score_aggregation_method" value="best"/>
        <param name="treat_charge_variants_separately" value="true"/>
        <param name="treat_modification_variants_separately" value="true"/>
        <param name="use_shared_peptides" value="false"/>
        <param name="skip_count_annotation" value="false"/>
        <param name="annotate_indistinguishable_groups" value="true"/>
        <param name="greedy_group_resolution" value="false"/>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
    <!-- TOPP_ProteinInference_2 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="conservative_fdr" value="true"/>
        <param name="picked_fdr" value="true"/>
        <param name="picked_decoy_string" value=""/>
        <param name="picked_decoy_prefix" value="prefix"/>
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="Epifany_2_input.consensusXML"/>
      </conditional>
      <output name="out" value="ProteinInference_2_output.consensusXML" compare="sim_size" delta_frac="0.7" ftype="consensusxml"/>
      <param name="out_type" value="consensusXML"/>
      <param name="merge_runs" value="all"/>
      <param name="protein_fdr" value="false"/>
      <section name="Merging">
        <param name="annotate_origin" value="true"/>
        <param name="allow_disagreeing_settings" value="false"/>
      </section>
      <section name="Algorithm">
        <param name="min_peptides_per_protein" value="1"/>
        <param name="score_aggregation_method" value="best"/>
        <param name="treat_charge_variants_separately" value="true"/>
        <param name="treat_modification_variants_separately" value="true"/>
        <param name="use_shared_peptides" value="false"/>
        <param name="skip_count_annotation" value="false"/>
        <param name="annotate_indistinguishable_groups" value="true"/>
        <param name="greedy_group_resolution" value="false"/>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
    <!-- TOPP_ProteinInference_3 -->
    <test expect_num_outputs="2">
      <section name="adv_opts">
        <param name="conservative_fdr" value="true"/>
        <param name="picked_fdr" value="true"/>
        <param name="picked_decoy_string" value="_rev"/>
        <param name="picked_decoy_prefix" value="suffix"/>
        <param name="force" value="false"/>
        <param name="test" value="true"/>
      </section>
      <conditional name="in_cond">
        <param name="in" value="MSstatsConverter_3_in.consensusXML"/>
      </conditional>
      <output name="out" value="ProteinInference_3_output.consensusXML" compare="sim_size" delta_frac="0.7" ftype="consensusxml"/>
      <param name="out_type" value="consensusXML"/>
      <param name="merge_runs" value="all"/>
      <param name="protein_fdr" value="true"/>
      <section name="Merging">
        <param name="annotate_origin" value="true"/>
        <param name="allow_disagreeing_settings" value="false"/>
      </section>
      <section name="Algorithm">
        <param name="min_peptides_per_protein" value="1"/>
        <param name="score_aggregation_method" value="best"/>
        <param name="treat_charge_variants_separately" value="true"/>
        <param name="treat_modification_variants_separately" value="true"/>
        <param name="use_shared_peptides" value="true"/>
        <param name="skip_count_annotation" value="false"/>
        <param name="annotate_indistinguishable_groups" value="true"/>
        <param name="greedy_group_resolution" value="false"/>
      </section>
      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/>
      <output name="ctd_out" ftype="xml">
        <assert_contents>
          <is_valid_xml/>
        </assert_contents>
      </output>
      <assert_stdout>
        <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/>
      </assert_stdout>
    </test>
  </tests>
  <help><![CDATA[Protein inference based on an aggregation of the scores of the identified peptides.


For more information, visit https://openms.de/doxygen/release/3.1.0/html/TOPP_ProteinInference.html]]></help>
  <expand macro="references"/>
</tool>