diff DecoyDatabase.xml @ 15:25529df60a81 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 3d1e5f37fd16524a415f707772eeb7ead848c5e3
author galaxyp
date Thu, 01 Dec 2022 19:06:38 +0000
parents 71a6c870aa48
children 06cca574e337
line wrap: on
line diff
--- a/DecoyDatabase.xml	Fri Nov 06 19:56:44 2020 +0000
+++ b/DecoyDatabase.xml	Thu Dec 01 19:06:38 2022 +0000
@@ -1,13 +1,11 @@
 <?xml version='1.0' encoding='UTF-8'?>
 <!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTDConverter.-->
 <!--Proposed Tool Section: [Utilities]-->
-<tool id="DecoyDatabase" name="DecoyDatabase" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@" profile="20.05">
+<tool id="DecoyDatabase" name="DecoyDatabase" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
   <description>Create decoy sequence database from forward sequence database.</description>
   <macros>
     <token name="@EXECUTABLE@">DecoyDatabase</token>
     <import>macros.xml</import>
-    <import>macros_autotest.xml</import>
-    <import>macros_test.xml</import>
   </macros>
   <expand macro="requirements"/>
   <expand macro="stdio"/>
@@ -16,8 +14,13 @@
 #import re
 
 ## Preprocessing
-mkdir in &&
-${ ' '.join(["ln -s '%s' 'in/%s.%s' &&" % (_, re.sub('[^\w\-_]', '_', _.element_identifier), $gxy2omsext(_.ext)) for _ in $in if _]) }
+mkdir in_cond.in &&
+#if $in_cond.in_select == "no"
+mkdir ${' '.join(["'in_cond.in/%s'" % (i) for i, f in enumerate($in_cond.in) if f])} && 
+${' '.join(["ln -s '%s' 'in_cond.in/%s/%s.%s' && " % (f, i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])}
+#else
+ln -s '$in_cond.in' 'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)' &&
+#end if
 mkdir out &&
 
 ## Main program call
@@ -27,7 +30,11 @@
 python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' &&
 @EXECUTABLE@ -ini @EXECUTABLE@.ctd
 -in
-${' '.join(["'in/%s.%s'"%(re.sub('[^\w\-_]', '_', _.element_identifier), $gxy2omsext(_.ext)) for _ in $in if _])}
+#if $in_cond.in_select == "no"
+${' '.join(["'in_cond.in/%s/%s.%s'"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($in_cond.in) if f])}
+#else
+'in_cond.in/${re.sub("[^\w\-_]", "_", $in_cond.in.element_identifier)}.$gxy2omsext($in_cond.in.ext)'
+#end if
 -out
 'out/output.${gxy2omsext("fasta")}'
 
@@ -41,36 +48,53 @@
     <configfile name="hardcoded_json"><![CDATA[{"log": "log.txt", "threads": "\${GALAXY_SLOTS:-1}", "no_progress": true}]]></configfile>
   </configfiles>
   <inputs>
-    <param name="in" argument="-in" type="data" format="fasta" multiple="true" optional="false" label="Input FASTA file(s), each containing a database" help="It is recommended to include a contaminant database as well select fasta data sets(s)"/>
-    <param name="decoy_string" argument="-decoy_string" type="text" optional="true" value="DECOY_" label="String that is combined with the accession of the protein identifier to indicate a decoy protein" help="">
-      <expand macro="list_string_san"/>
+    <conditional name="in_cond">
+      <param name="in_select" type="select" label="Run tool in batch mode for -in">
+        <option value="no">No: process all datasets jointly</option>
+        <option value="yes">Yes: process each dataset in an independent job</option>
+      </param>
+      <when value="no">
+        <param argument="-in" type="data" format="fasta" multiple="true" optional="false" label="Input FASTA file(s), each containing a database" help="It is recommended to include a contaminant database as well select fasta data sets(s)"/>
+      </when>
+      <when value="yes">
+        <param argument="-in" type="data" format="fasta" multiple="false" optional="false" label="Input FASTA file(s), each containing a database" help="It is recommended to include a contaminant database as well select fasta data sets(s)"/>
+      </when>
+    </conditional>
+    <param argument="-decoy_string" type="text" optional="true" value="DECOY_" label="String that is combined with the accession of the protein identifier to indicate a decoy protein" help="">
+      <expand macro="list_string_san" name="decoy_string"/>
     </param>
-    <param name="decoy_string_position" argument="-decoy_string_position" display="radio" type="select" optional="false" label="Should the 'decoy_string' be prepended (prefix) or appended (suffix) to the protein accession?" help="">
+    <param argument="-decoy_string_position" type="select" optional="true" label="Should the 'decoy_string' be prepended (prefix) or appended (suffix) to the protein accession?" help="">
       <option value="prefix" selected="true">prefix</option>
       <option value="suffix">suffix</option>
-      <expand macro="list_string_san"/>
+      <expand macro="list_string_san" name="decoy_string_position"/>
     </param>
-    <param name="only_decoy" argument="-only_decoy" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write only decoy proteins to the output database instead of a combined database" help=""/>
-    <param name="type" argument="-type" display="radio" type="select" optional="false" label="Type of sequence" help="RNA sequences may contain modification codes, which will be handled correctly if this is set to 'RNA'">
+    <param argument="-only_decoy" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write only decoy proteins to the output database instead of a combined database" help=""/>
+    <param argument="-type" type="select" optional="true" label="Type of sequence" help="RNA sequences may contain modification codes, which will be handled correctly if this is set to 'RNA'">
       <option value="protein" selected="true">protein</option>
       <option value="RNA">RNA</option>
-      <expand macro="list_string_san"/>
+      <expand macro="list_string_san" name="type"/>
     </param>
-    <param name="method" argument="-method" display="radio" type="select" optional="false" label="Method by which decoy sequences are generated from target sequences" help="Note that all sequences are shuffled using the same random seed, ensuring that identical sequences produce the same shuffled decoy sequences. Shuffled sequences that produce highly similar output sequences are shuffled again (see shuffle_sequence_identity_threshold)">
+    <param argument="-method" type="select" optional="true" label="Method by which decoy sequences are generated from target sequences" help="Note that all sequences are shuffled using the same random seed, ensuring that identical sequences produce the same shuffled decoy sequences. Shuffled sequences that produce highly similar output sequences are shuffled again (see shuffle_sequence_identity_threshold)">
       <option value="reverse" selected="true">reverse</option>
       <option value="shuffle">shuffle</option>
-      <expand macro="list_string_san"/>
+      <expand macro="list_string_san" name="method"/>
     </param>
-    <param name="enzyme" argument="-enzyme" type="select" optional="false" label="Enzyme used for the digestion of the sample" help="Only applicable if parameter 'type' is 'protein'">
-      <option value="unspecific cleavage">unspecific cleavage</option>
-      <option value="glutamyl endopeptidase">glutamyl endopeptidase</option>
-      <option value="Alpha-lytic protease">Alpha-lytic protease</option>
-      <option value="2-iodobenzoate">2-iodobenzoate</option>
-      <option value="iodosobenzoate">iodosobenzoate</option>
-      <option value="staphylococcal protease/D">staphylococcal protease/D</option>
+    <param argument="-enzyme" type="select" optional="true" label="Enzyme used for the digestion of the sample" help="Only applicable if parameter 'type' is 'protein'">
+      <option value="Asp-N/B">Asp-N/B</option>
+      <option value="Asp-N_ambic">Asp-N_ambic</option>
       <option value="Chymotrypsin">Chymotrypsin</option>
       <option value="Chymotrypsin/P">Chymotrypsin/P</option>
       <option value="CNBr">CNBr</option>
+      <option value="2-iodobenzoate">2-iodobenzoate</option>
+      <option value="iodosobenzoate">iodosobenzoate</option>
+      <option value="Arg-C">Arg-C</option>
+      <option value="Arg-C/P">Arg-C/P</option>
+      <option value="Asp-N">Asp-N</option>
+      <option value="cyanogen-bromide">cyanogen-bromide</option>
+      <option value="Clostripain/P">Clostripain/P</option>
+      <option value="elastase-trypsin-chymotrypsin">elastase-trypsin-chymotrypsin</option>
+      <option value="no cleavage">no cleavage</option>
+      <option value="unspecific cleavage">unspecific cleavage</option>
       <option value="Formic_acid">Formic_acid</option>
       <option value="Lys-C">Lys-C</option>
       <option value="Lys-N">Lys-N</option>
@@ -78,41 +102,35 @@
       <option value="PepsinA">PepsinA</option>
       <option value="TrypChymo">TrypChymo</option>
       <option value="Trypsin/P">Trypsin/P</option>
-      <option value="Arg-C">Arg-C</option>
-      <option value="Arg-C/P">Arg-C/P</option>
-      <option value="Asp-N">Asp-N</option>
-      <option value="Asp-N/B">Asp-N/B</option>
-      <option value="Asp-N_ambic">Asp-N_ambic</option>
-      <option value="proline-endopeptidase/HKR">proline-endopeptidase/HKR</option>
-      <option value="Glu-C+P">Glu-C+P</option>
-      <option value="PepsinA + P">PepsinA + P</option>
-      <option value="cyanogen-bromide">cyanogen-bromide</option>
-      <option value="Clostripain/P">Clostripain/P</option>
-      <option value="elastase-trypsin-chymotrypsin">elastase-trypsin-chymotrypsin</option>
-      <option value="no cleavage">no cleavage</option>
       <option value="V8-DE">V8-DE</option>
       <option value="V8-E">V8-E</option>
       <option value="leukocyte elastase">leukocyte elastase</option>
       <option value="proline endopeptidase">proline endopeptidase</option>
+      <option value="glutamyl endopeptidase">glutamyl endopeptidase</option>
+      <option value="Alpha-lytic protease">Alpha-lytic protease</option>
+      <option value="staphylococcal protease/D">staphylococcal protease/D</option>
+      <option value="proline-endopeptidase/HKR">proline-endopeptidase/HKR</option>
+      <option value="Glu-C+P">Glu-C+P</option>
+      <option value="PepsinA + P">PepsinA + P</option>
       <option value="Trypsin" selected="true">Trypsin</option>
-      <expand macro="list_string_san"/>
+      <expand macro="list_string_san" name="enzyme"/>
     </param>
     <section name="Decoy" title="Decoy parameters section" help="" expanded="false">
       <param name="non_shuffle_pattern" argument="-Decoy:non_shuffle_pattern" type="text" optional="true" value="" label="Residues to not shuffle (keep at a constant position when shuffling)" help="Separate by comma, e.g. use 'K,P,R' here">
-        <expand macro="list_string_san"/>
+        <expand macro="list_string_san" name="non_shuffle_pattern"/>
       </param>
       <param name="keepPeptideNTerm" argument="-Decoy:keepPeptideNTerm" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Whether to keep peptide N terminus constant when shuffling / reversing" help=""/>
       <param name="keepPeptideCTerm" argument="-Decoy:keepPeptideCTerm" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Whether to keep peptide C terminus constant when shuffling / reversing" help=""/>
     </section>
     <expand macro="adv_opts_macro">
-      <param name="shuffle_max_attempts" argument="-shuffle_max_attempts" type="integer" optional="true" value="30" label="shuffle: maximum attempts to lower the amino acid sequence identity between target and decoy for the shuffle algorithm" help=""/>
-      <param name="shuffle_sequence_identity_threshold" argument="-shuffle_sequence_identity_threshold" type="float" optional="true" value="0.5" label="shuffle: target-decoy amino acid sequence identity threshold for the shuffle algorithm" help="If the sequence identity is above this threshold, shuffling is repeated. In case of repeated failure, individual amino acids are 'mutated' to produce a different amino acid sequence"/>
-      <param name="seed" argument="-seed" type="text" optional="true" value="1" label="Random number seed (use 'time' for system time)" help="">
-        <expand macro="list_string_san"/>
+      <param argument="-shuffle_max_attempts" type="integer" optional="true" value="30" label="shuffle: maximum attempts to lower the amino acid sequence identity between target and decoy for the shuffle algorithm" help=""/>
+      <param argument="-shuffle_sequence_identity_threshold" type="float" optional="true" value="0.5" label="shuffle: target-decoy amino acid sequence identity threshold for the shuffle algorithm" help="If the sequence identity is above this threshold, shuffling is repeated. In case of repeated failure, individual amino acids are 'mutated' to produce a different amino acid sequence"/>
+      <param argument="-seed" type="text" optional="true" value="1" label="Random number seed (use 'time' for system time)" help="">
+        <expand macro="list_string_san" name="seed"/>
       </param>
-      <param name="force" argument="-force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Overrides tool-specific checks" help=""/>
-      <param name="test" argument="-test" type="hidden" optional="true" value="False" label="Enables the test mode (needed for internal use only)" help="">
-        <expand macro="list_string_san"/>
+      <param argument="-force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Overrides tool-specific checks" help=""/>
+      <param argument="-test" type="hidden" optional="true" value="False" label="Enables the test mode (needed for internal use only)" help="">
+        <expand macro="list_string_san" name="test"/>
       </param>
     </expand>
     <param name="OPTIONAL_OUTPUTS" type="select" optional="true" multiple="true" label="Optional outputs">
@@ -125,13 +143,134 @@
       <filter>OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS</filter>
     </data>
   </outputs>
-  <tests>
-    <expand macro="autotest_DecoyDatabase"/>
-    <expand macro="manutest_DecoyDatabase"/>
+  <tests><!-- UTILS_DecoyDatabase_1 -->
+    <test expect_num_outputs="2">
+      <section name="adv_opts">
+        <param name="shuffle_max_attempts" value="30"/>
+        <param name="shuffle_sequence_identity_threshold" value="0.5"/>
+        <param name="seed" value="1"/>
+        <param name="force" value="false"/>
+        <param name="test" value="true"/>
+      </section>
+      <conditional name="in_cond">
+        <param name="in" value="DecoyDatabase_1.fasta"/>
+      </conditional>
+      <output name="out" file="DecoyDatabase_1_out.fasta" compare="sim_size" delta_frac="0.7" ftype="fasta"/>
+      <param name="decoy_string" value="DECOY_"/>
+      <param name="decoy_string_position" value="prefix"/>
+      <param name="only_decoy" value="true"/>
+      <param name="type" value="protein"/>
+      <param name="method" value="reverse"/>
+      <param name="enzyme" value="Trypsin"/>
+      <section name="Decoy">
+        <param name="non_shuffle_pattern" value=""/>
+        <param name="keepPeptideNTerm" value="true"/>
+        <param name="keepPeptideCTerm" value="true"/>
+      </section>
+      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/>
+      <output name="ctd_out" ftype="xml">
+        <assert_contents>
+          <is_valid_xml/>
+        </assert_contents>
+      </output>
+    </test>
+    <!-- UTILS_DecoyDatabase_2 -->
+    <test expect_num_outputs="2">
+      <section name="adv_opts">
+        <param name="shuffle_max_attempts" value="30"/>
+        <param name="shuffle_sequence_identity_threshold" value="0.5"/>
+        <param name="seed" value="42"/>
+        <param name="force" value="false"/>
+        <param name="test" value="true"/>
+      </section>
+      <conditional name="in_cond">
+        <param name="in" value="DecoyDatabase_1.fasta"/>
+      </conditional>
+      <output name="out" file="DecoyDatabase_2_out.fasta" compare="sim_size" delta_frac="0.7" ftype="fasta"/>
+      <param name="decoy_string" value="blabla"/>
+      <param name="decoy_string_position" value="prefix"/>
+      <param name="only_decoy" value="false"/>
+      <param name="type" value="protein"/>
+      <param name="method" value="shuffle"/>
+      <param name="enzyme" value="Trypsin"/>
+      <section name="Decoy">
+        <param name="non_shuffle_pattern" value="KRP"/>
+        <param name="keepPeptideNTerm" value="true"/>
+        <param name="keepPeptideCTerm" value="true"/>
+      </section>
+      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/>
+      <output name="ctd_out" ftype="xml">
+        <assert_contents>
+          <is_valid_xml/>
+        </assert_contents>
+      </output>
+    </test>
+    <!-- UTILS_DecoyDatabase_3 -->
+    <test expect_num_outputs="2">
+      <section name="adv_opts">
+        <param name="shuffle_max_attempts" value="30"/>
+        <param name="shuffle_sequence_identity_threshold" value="0.5"/>
+        <param name="seed" value="42"/>
+        <param name="force" value="false"/>
+        <param name="test" value="true"/>
+      </section>
+      <conditional name="in_cond">
+        <param name="in" value="DecoyDatabase_1.fasta"/>
+      </conditional>
+      <output name="out" file="DecoyDatabase_3_out.fasta" compare="sim_size" delta_frac="0.7" ftype="fasta"/>
+      <param name="decoy_string" value="blabla"/>
+      <param name="decoy_string_position" value="prefix"/>
+      <param name="only_decoy" value="false"/>
+      <param name="type" value="protein"/>
+      <param name="method" value="shuffle"/>
+      <param name="enzyme" value="Chymotrypsin"/>
+      <section name="Decoy">
+        <param name="non_shuffle_pattern" value="KR"/>
+        <param name="keepPeptideNTerm" value="true"/>
+        <param name="keepPeptideCTerm" value="true"/>
+      </section>
+      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/>
+      <output name="ctd_out" ftype="xml">
+        <assert_contents>
+          <is_valid_xml/>
+        </assert_contents>
+      </output>
+    </test>
+    <!-- UTILS_DecoyDatabase_4 -->
+    <test expect_num_outputs="2">
+      <section name="adv_opts">
+        <param name="shuffle_max_attempts" value="30"/>
+        <param name="shuffle_sequence_identity_threshold" value="0.5"/>
+        <param name="seed" value="42"/>
+        <param name="force" value="false"/>
+        <param name="test" value="true"/>
+      </section>
+      <conditional name="in_cond">
+        <param name="in" value="DecoyDatabase_4.fasta"/>
+      </conditional>
+      <output name="out" file="DecoyDatabase_4_out.fasta" compare="sim_size" delta_frac="0.7" ftype="fasta"/>
+      <param name="decoy_string" value="blabla"/>
+      <param name="decoy_string_position" value="prefix"/>
+      <param name="only_decoy" value="false"/>
+      <param name="type" value="RNA"/>
+      <param name="method" value="reverse"/>
+      <param name="enzyme" value="Trypsin"/>
+      <section name="Decoy">
+        <param name="non_shuffle_pattern" value=""/>
+        <param name="keepPeptideNTerm" value="true"/>
+        <param name="keepPeptideCTerm" value="true"/>
+      </section>
+      <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/>
+      <output name="ctd_out" ftype="xml">
+        <assert_contents>
+          <is_valid_xml/>
+        </assert_contents>
+      </output>
+    </test>
   </tests>
   <help><![CDATA[Create decoy sequence database from forward sequence database.
 
 
-For more information, visit http://www.openms.de/doxygen/release/2.6.0/html/UTILS_DecoyDatabase.html]]></help>
+For more information, visit http://www.openms.de/doxygen/release/2.8.0/html/UTILS_DecoyDatabase.html]]></help>
   <expand macro="references"/>
 </tool>