changeset 0:1e5400393797 draft default tip

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteomiqon_peptidedb commit c8f0a6706869a8f2988a096c61d7de7de573434f"
author galaxyp
date Sun, 04 Jul 2021 21:26:08 +0000
parents
children
files proteomiqon_peptidedb.xml test-data/result_1.json test-data/result_2.json test-data/result_3.json test-data/sample.fasta
diffstat 5 files changed, 303 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/proteomiqon_peptidedb.xml	Sun Jul 04 21:26:08 2021 +0000
@@ -0,0 +1,210 @@
+<tool id="proteomiqon_peptidedb" name="ProteomIQon PeptideDB" version="@VERSION@" profile="20.05">
+    <description>
+        creates a peptide database in the SQLite format.
+    </description>
+    <macros>
+        <token name="@VERSION@">0.0.7</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@VERSION@">proteomiqon-peptidedb</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        #if $outputParamfile:
+            cat '$paramfile' >> '$out_paramfile' &&
+        #end if
+        proteomiqon-peptidedb -i '$fasta' -p '$paramfile' -o ./ &&
+        cp './galaxy.db' '$out_db'
+            ]]>
+    </command>
+    <configfiles>
+        <configfile name="paramfile">
+        <![CDATA[
+        {
+            "Name": "galaxy",
+            "ParseProteinIDRegexPattern": "${ParseProteinIDRegexPattern}",
+            "Protease": { "Case": "${Protease}" },
+            "MinMissedCleavages": ${MinMissedCleavages},
+            "MaxMissedCleavages": ${MaxMissedCleavages},
+            "MaxMass": ${MaxMass},
+            "MinPepLength": ${MinPepLength},
+            "MaxPepLength": ${MaxPepLength},
+            "IsotopicMod": [
+            #set isotopicModList = $list($IsotopicMod)
+            #for $mod in $range($len($isotopicModList))
+                #if $mod != $len($isotopicModList) -1
+                {"Case":"${isotopicModList[$mod]}"},
+                #else
+                {"Case":"${isotopicModList[$mod]}"}
+                #end if
+            #end for
+            ],
+            "MassMode": {
+                "Case": "${MassMode}"
+            },
+            "FixedMods": [
+            #set fixedModList = $list($FixedMods)
+            #for $mod in $range($len($fixedModList))
+                #if $mod != $len($fixedModList) -1
+                {"Case":"${fixedModList[$mod]}"},
+                #else
+                {"Case":"${fixedModList[$mod]}"}
+                #end if
+            #end for
+            ],
+            "VariableMods": [
+            #set variableModList = $list($VariableMods)
+            #for $mod in $range($len($variableModList))
+                #if $mod != $len($variableModList) -1
+                {"Case":"${variableModList[$mod]}"},
+                #else
+                {"Case":"${variableModList[$mod]}"}
+                #end if
+            #end for
+            ],
+            "VarModThreshold": ${VarModThreshold}
+            }
+            ]]>
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="fasta" type="data" format="fasta" label="Fasta file" help="Please specify your protein sequences in the .fasta format. If you want to search for contaminants you have to include the sequences in your file. You do not have to supply decoy sequences, those are generated for you."/>
+        <param name="ParseProteinIDRegexPattern" type="text" value="id" label="Regex pattern to parse protein ID from .fasta header" help="Fasta headers do often contain additional information in addition to your protein identifier, by specifying a regex pattern the tool can extract the protein IDs. If you fasta headers are already cleaned you can leave this field empty.">
+        <sanitizer sanitize="false" />
+        </param>
+        <param name="Protease" type="select" label="Protease" help="Select a protease the tool can use when performing in silico digestion.">
+            <option selected="true" value="Trypsin">Trypsin</option>
+            <option value="Trypsin_P">Trypsin_P</option>
+            <option value="LysC">LysC</option>
+            <option value="LysC_P">LysC_P</option>
+            <option value="Chymotrypsin">Chymotrypsin</option>
+            <option value="PepsinA">PepsinA</option>
+        </param>
+        <param name="MinMissedCleavages" type="integer" value="0" label="Min missed cleavages" help="Select the minimum amount of miss cleavages in a peptide sequence."/>
+        <param name="MaxMissedCleavages" type="integer" value="2" label="Max missed cleavages" help="Select the maximum amount of miss cleavages in a peptide sequence."/>
+        <param name="MaxMass" type="float" value="15000.0" label="Max mass" help="Select the maximum mass of peptides included in the data base."/>
+        <param name="MinPepLength" type="integer" value="4" label="Min peptide length" help="Select the minimum amino acid  length of peptides included in the data base."/>
+        <param name="MaxPepLength" type="integer" value="65" label="Max peptide length" help="Select the minimum amino acid  length of peptides included in the data base."/>
+        <param name="IsotopicMod" type="select" label="Isotopic mod" help="Select isotopic amino acid modifications. These modifications change the natural abundance of isotopes of single elements. Each peptide sequence is duplicated and included in the data base at natural isotopic abundances and using the modified isotopic abundances." multiple="true">
+            <option value="N15" selected="true">N15</option>
+            <option value="C13">C13</option>
+            <option value="O17">O17</option>
+            <option value="O18">O18</option>
+            <option value="D">D</option>
+        </param>
+        <param name="MassMode" type="select" label="Mass mode" help="Specify how the peptide masses should be calculated.">
+            <option value="Monoisotopic" selected="true">Monoisotopic</option>
+            <option value="Average">Average</option>
+        </param>
+        <param name="FixedMods" type="select" label="Fixed mods" help="Select fixed amino acid modifications. These Modifications are applied whenever possible." multiple="true">
+            <option value="Acetylation'ProtNTerm'">Acetylation'ProtNTerm'</option>
+            <option value="Carbamidomethyl'Cys'">Carbamidomethyl'Cys'</option>
+            <option value="Oxidation'Met'">Oxidation'Met'</option>
+            <option value="Phosphorylation'Ser'Thr'Tyr'">Phosphorylation'Ser'Thr'Tyr'</option>
+            <option value="Pyro_Glu'GluNterm'">Pyro_Glu'GluNterm'</option>
+            <option value="Pyro_Glu'GlnNterm'">Pyro_Glu'GlnNterm'</option>
+        </param>
+        <param name="VariableMods" type="select" label="Variable mods" help="Select variable amino acid modifications. Whenever a this modification can be applied the peptide sequence is duplicated and included in the data base in a modified and unmodiefied version." multiple="true">
+            <option value="Acetylation'ProtNTerm'" selected="true">Acetylation'ProtNTerm'</option>
+            <option value="Carbamidomethyl'Cys'">Carbamidomethyl'Cys'</option>
+            <option value="Oxidation'Met'" selected="true">Oxidation'Met'</option>
+            <option value="Phosphorylation'Ser'Thr'Tyr'">Phosphorylation'Ser'Thr'Tyr'</option>
+            <option value="Pyro_Glu'GluNterm'">Pyro_Glu'GluNterm'</option>
+            <option value="Pyro_Glu'GlnNterm'">Pyro_Glu'GlnNterm'</option>
+        </param>
+        <param name="VarModThreshold" type="integer" value="4" label="Variable mod threshold" help="Select the maximum amount of variable amino acid modifications in one sequence. This parameter is needed to circumvent a combinatoric explosion."/>
+        <param name="outputParamfile" type="boolean" value="false" label="Output parameter file"/>
+    </inputs>
+    <outputs>
+       <data format="sqlite" name="out_db" />
+       <data format="json" name="out_paramfile">
+            <filter>outputParamfile</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="fasta" value="sample.fasta"/>
+            <param name="ParseProteinIDRegexPattern" value="id"/>
+            <param name="Protease" value="Trypsin"/>
+            <param name="MinMissedCleavages" value="0"/>
+            <param name="MaxMissedCleavages" value="2"/>
+            <param name="MaxMass" value="15000.0"/>
+            <param name="MinPepLength" value="4"/>
+            <param name="MaxPepLength" value="65"/>
+            <param name="IsotopicMod" value="N15"/>
+            <param name="MassMode" value="Monoisotopic"/>
+            <param name="FixedMods" value="Acetylation'ProtNTerm'"/>
+            <param name="VariableMods" value="Acetylation'ProtNTerm'"/>
+            <param name="VarModThreshold" value="4"/>
+            <param name="outputParamfile" value="false"/>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="fasta" value="sample.fasta"/>
+            <param name="ParseProteinIDRegexPattern" value="id"/>
+            <param name="Protease" value="Trypsin"/>
+            <param name="MinMissedCleavages" value="0"/>
+            <param name="MaxMissedCleavages" value="2"/>
+            <param name="MaxMass" value="15000.0"/>
+            <param name="MinPepLength" value="4"/>
+            <param name="MaxPepLength" value="65"/>
+            <param name="IsotopicMod" value="N15"/>
+            <param name="MassMode" value="Monoisotopic"/>
+            <param name="FixedMods" value="Acetylation'ProtNTerm'"/>
+            <param name="VariableMods" value="Acetylation'ProtNTerm'"/>
+            <param name="VarModThreshold" value="4"/>
+            <param name="outputParamfile" value="true"/>
+            <output name="out_paramfile" file="result_1.json"/>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="fasta" value="sample.fasta"/>
+            <param name="ParseProteinIDRegexPattern" value="id"/>
+            <param name="Protease" value="Trypsin"/>
+            <param name="MinMissedCleavages" value="0"/>
+            <param name="MaxMissedCleavages" value="2"/>
+            <param name="MaxMass" value="15000.0"/>
+            <param name="MinPepLength" value="4"/>
+            <param name="MaxPepLength" value="65"/>
+            <param name="IsotopicMod" value="N15"/>
+            <param name="MassMode" value="Monoisotopic"/>
+            <param name="FixedMods" value="Acetylation'ProtNTerm',Carbamidomethyl'Cys',Oxidation'Met',Phosphorylation'Ser'Thr'Tyr',Pyro_Glu'GluNterm',Pyro_Glu'GlnNterm'"/>
+            <param name="VariableMods" value="Acetylation'ProtNTerm',Carbamidomethyl'Cys',Oxidation'Met',Phosphorylation'Ser'Thr'Tyr',Pyro_Glu'GluNterm',Pyro_Glu'GlnNterm'"/>
+            <param name="VarModThreshold" value="4"/>
+            <param name="outputParamfile" value="true"/>
+            <output name="out_paramfile" file="result_2.json"/>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="fasta" value="sample.fasta"/>
+            <param name="ParseProteinIDRegexPattern" value="id"/>
+            <param name="Protease" value="Trypsin"/>
+            <param name="MinMissedCleavages" value="0"/>
+            <param name="MaxMissedCleavages" value="2"/>
+            <param name="MaxMass" value="15000.0"/>
+            <param name="MinPepLength" value="4"/>
+            <param name="MaxPepLength" value="65"/>
+            <param name="IsotopicMod" value=""/>
+            <param name="MassMode" value="Average"/>
+            <param name="FixedMods" value=""/>
+            <param name="VariableMods" value=""/>
+            <param name="VarModThreshold" value="4"/>
+            <param name="outputParamfile" value="true"/>
+            <output name="out_paramfile" file="result_3.json"/>
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+Introduction
+------------
+MS-based shotgun proteomics estimates protein abundances using a proxy: peptides. 
+An established method to identify acquired MS/MS spectra is the comparison of each spectrum with peptides in a reference database. 
+
+What It Does
+------------
+The PeptideDB tool helps to create peptide databases by in silico digestion given proteome information in the FASTA format and a set of parameters 
+that allow the user to mimic conditions of their specific experiment. 
+The created database stores peptide protein relationships in a SQLite database which can then be supplied to other ProteomIQon tools.
+
+Further Reading
+---------------
+Additional information about the tool can be found in the `documentation <https://csbiology.github.io/ProteomIQon/tools/PeptideDB.html>`_.
+    ]]>
+    </help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result_1.json	Sun Jul 04 21:26:08 2021 +0000
@@ -0,0 +1,27 @@
+
+        
+        {
+            "Name": "galaxy",
+            "ParseProteinIDRegexPattern": "id",
+            "Protease": { "Case": "Trypsin" },
+            "MinMissedCleavages": 0,
+            "MaxMissedCleavages": 2,
+            "MaxMass": 15000.0,
+            "MinPepLength": 4,
+            "MaxPepLength": 65,
+            "IsotopicMod": [
+                {"Case":"N15"}
+            ],
+            "MassMode": {
+                "Case": "Monoisotopic"
+            },
+            "FixedMods": [
+                {"Case":"Acetylation'ProtNTerm'"}
+            ],
+            "VariableMods": [
+                {"Case":"Acetylation'ProtNTerm'"}
+            ],
+            "VarModThreshold": 4
+            }
+            
+        
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result_2.json	Sun Jul 04 21:26:08 2021 +0000
@@ -0,0 +1,37 @@
+
+        
+        {
+            "Name": "galaxy",
+            "ParseProteinIDRegexPattern": "id",
+            "Protease": { "Case": "Trypsin" },
+            "MinMissedCleavages": 0,
+            "MaxMissedCleavages": 2,
+            "MaxMass": 15000.0,
+            "MinPepLength": 4,
+            "MaxPepLength": 65,
+            "IsotopicMod": [
+                {"Case":"N15"}
+            ],
+            "MassMode": {
+                "Case": "Monoisotopic"
+            },
+            "FixedMods": [
+                {"Case":"Acetylation'ProtNTerm'"},
+                {"Case":"Carbamidomethyl'Cys'"},
+                {"Case":"Oxidation'Met'"},
+                {"Case":"Phosphorylation'Ser'Thr'Tyr'"},
+                {"Case":"Pyro_Glu'GluNterm'"},
+                {"Case":"Pyro_Glu'GlnNterm'"}
+            ],
+            "VariableMods": [
+                {"Case":"Acetylation'ProtNTerm'"},
+                {"Case":"Carbamidomethyl'Cys'"},
+                {"Case":"Oxidation'Met'"},
+                {"Case":"Phosphorylation'Ser'Thr'Tyr'"},
+                {"Case":"Pyro_Glu'GluNterm'"},
+                {"Case":"Pyro_Glu'GlnNterm'"}
+            ],
+            "VarModThreshold": 4
+            }
+            
+        
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result_3.json	Sun Jul 04 21:26:08 2021 +0000
@@ -0,0 +1,24 @@
+
+        
+        {
+            "Name": "galaxy",
+            "ParseProteinIDRegexPattern": "id",
+            "Protease": { "Case": "Trypsin" },
+            "MinMissedCleavages": 0,
+            "MaxMissedCleavages": 2,
+            "MaxMass": 15000.0,
+            "MinPepLength": 4,
+            "MaxPepLength": 65,
+            "IsotopicMod": [
+            ],
+            "MassMode": {
+                "Case": "Average"
+            },
+            "FixedMods": [
+            ],
+            "VariableMods": [
+            ],
+            "VarModThreshold": 4
+            }
+            
+        
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample.fasta	Sun Jul 04 21:26:08 2021 +0000
@@ -0,0 +1,5 @@
+>sp|A0A023PZB3|FMP49_YEAST Protein FMP49, mitochondrial OS=Saccharomyces cerevisiae (strain ATCC 204508 / S288c) OX=559292 GN=FMP49 PE=4 SV=1
+MYYFSRVAARTFCCCIFFCLATAYSRPDRNPRKIEKKDKKFFGASKNTNPANAMGNLFKA
+PTIEYVVEEVTRTHQPEQYDIPTDMSPLMTIAASESADKFTDKFFVDQSSIMKEKTSSKG
+NARTLL
+>sp|P04050|RPB1_YEAST DNA-directed RNA polymerase II subunit RPB1 OS=Saccharomyces cerevisiae (strain ATCC 204508 / S288c) OX=559292 GN=RPO21 PE=1 SV=2