diff diffacto.xml @ 0:3cc7ce0822a1 draft default tip

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/diffacto commit 507bb20a2c246bb0a1a0c7dae1555a851730e4a6"
author galaxyp
date Mon, 21 Jun 2021 12:50:54 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/diffacto.xml	Mon Jun 21 12:50:54 2021 +0000
@@ -0,0 +1,267 @@
+<tool id="diffacto" name="Diffacto" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
+    <description>Comparative Protein Abundance from Covariation of Peptide Abundances</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.0.6</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">diffacto</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        tr '\t' ',' < '$input' > input.csv &&
+        diffacto
+        -i input.csv
+        #if $db
+            -db '$db'
+        #end if
+        #if $samples
+            -samples '$samples'
+        #end if
+        -reference '$reference'
+        #if $normalize
+            -normalize $normalize
+        #end if
+        #if $farms_mu
+            -farms_mu $farms_mu
+        #end if
+        #if $farms_alpha
+            -farms_alpha $farms_alpha
+        #end if
+        -min_samples $min_samples
+        -impute_threshold $impute_threshold
+        -cutoff_weight $cutoff_weight
+        $use_unique
+        #if $scale == 'log2'
+            -log2 True
+        #else
+            -log2 False
+        #end if
+        $fast
+        -out '$output'
+        #if $mcfdr
+            -mc_out '$mc_out'
+        #end if
+        #if $loadings
+            -loadings_out '$loadings_out'
+        #end if
+    ]]></command>
+    <inputs>
+        <param name="input" argument="-i" type="data" format="tabular,csv" label="Peptides abundances">
+            <help><![CDATA[
+                Peptides abundances in tabular or csv format. 
+                <ul>
+                <li>The first row is column headers and should contain the sample name for each sample column. </li>
+                <li>The first column should contain unique peptide sequences. </li>
+                <li><i>Optionally, the second column may be ProteinID assignments, else the <b>Protein database</b> input is required.</i></li>
+                <li>Each remaining column is a sample column with numeric abundance values.</li>
+                <li>Missing values should be empty instead of zeros.</li>
+                </ul>
+            ]]></help>
+        </param>
+        <param argument="-db" type="data" format="fasta" label="Protein database" optional="true"
+               help="Required if the Peptide abundances input does not have Protein IDs in the second column"/> 
+        <param argument="-samples" type="data" format="tabular" label="Sample Groups" optional="true">
+            <help><![CDATA[
+               <i>Optional: By default, each Sample column in Peptide abundances is treated as a singleton group.</i> 
+               <br>
+               Groups the samples from the Peptides abundance input for comparison.
+               Each sample column from Peptides abundance input should be on a line with 2 columns:
+               <ol> 
+               <li>Sample name for header line of the Peptides abundance input.</li>
+               <li>Group Name assignemnt for the sample</li>
+               </ol>
+            ]]></help>
+        </param>
+        <param argument="-reference" type="text" value="" label="Reference sample groups" optional="true">
+            <help><![CDATA[
+            <i>Optional: By default, Diffacto uses the average of all samples/groups as the reference.</i> 
+            <br>
+            Names of sample groups <i>(separated by semicolon)</i> treated as the comparison reference.
+            <ul>
+            <li>If a Sample Groups input was used, the reference names should be Group names from column 2.</li>
+            <li>Otherwise, the reference names should be Sample names from the Peptides abundance column header line.</li>
+	    </ul>
+            ]]></help>
+        </param>
+        <param name="scale" argument="-log2" type="select" label="Peptides abundance scale">
+            <option value="linear">linear</option>
+            <option value="log2">log2</option>
+        </param>
+        <param argument="-normalize" type="select" label="Sample-wise normalization" optional="true">
+            <option value="average">average</option>
+            <option value="median">median</option>
+            <option value="GMM">GMM</option>
+        </param>
+        <param argument="-farms_mu" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter mu"
+            help="Hyperparameter mu (default: 0.1)"/>
+        <param argument="-farms_alpha" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter alpha"
+            help="Hyperparameter weight of prior probability (default: 0.1)"/>
+        <param argument="-min_samples" type="integer" value="1" min="1" label="Minimum samples for peptide"
+            help="Minimum number of samples peptides needed to be quantified in"/>
+        <param argument="-impute_threshold" type="float" value="0.99" min="0.1" max="1.0" label="Minimum fraction of missing values in the group"
+            help="Impute missing values if missing fraction is larger than the threshold."/>
+        <param argument="-cutoff_weight" type="float" value="0.5" min="0." max="1.0" label="Peptide cutoff weight"
+            help="Peptides weighted lower than the cutoff will be excluded."/>
+        <param argument="-use_unique" type="boolean" truevalue="-use_unique True" falsevalue="" checked="false" label="Use unique peptides only"/>
+        <param argument="-fast" type="boolean" truevalue="-fast True" falsevalue="" checked="false" label="Allow early termination in EM calculation when noise is sufficiently small."/>
+        <param name="mcfdr" argument="-mc_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Perform Monte Carlo FDR simulation"/>
+        <param name="loadings" argument="-loadings_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Output Protein Peptide loadings file"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" label="${tool.name} on ${on_string}: Protein Abundance">
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="Protein,N.Pept,Q.Pept,S/N,P(PECA)" />
+            </actions>
+        </data>
+        <data name="mc_out" format="tabular" label="${tool.name} on ${on_string}: MC FDR">
+            <filter>mcfdr == True</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="Protein,P(MC),MCFDR" />
+            </actions>
+        </data>
+        <data name="loadings_out" format="tabular" label="${tool.name} on ${on_string}: Protein Peptide loading">
+            <filter>loadings == True</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="Protein,Peptide,Loading" />
+            </actions>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" ftype="csv" value="HBY20Mix.peptides.csv"/>
+            <param name="db" ftype="fasta" value="UP000002311_559292.fasta"/>
+            <param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/>
+            <output name="output">
+               <assert_contents>
+                    <has_text text="P19097" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input" ftype="tabular" value="HBY20Mix.peptides.tsv"/>
+            <param name="db" ftype="fasta" value="UP000002311_559292.fasta"/>
+            <param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/>
+            <output name="output">
+               <assert_contents>
+                    <has_text text="P19097" />
+                </assert_contents>
+            </output>
+        </test>
+
+        <test>
+            <param name="input" ftype="csv" value="iPRG.novo.pep.csv"/>
+            <param name="samples" ftype="tabular" value="iPRG.samples.lst"/>
+            <param name="min_samples" value="2"/>
+            <output name="output">
+               <assert_contents>
+                    <has_text text="FAS2" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input" ftype="csv" value="iPRG.novo.pep.csv"/>
+            <param name="samples" ftype="tabular" value="iPRG.samples.lst"/>
+            <param name="min_samples" value="4"/>
+            <param name="use_unique" value="True"/>
+            <param name="mcfdr" value="True"/>
+            <output name="output">
+               <assert_contents>
+                    <has_text text="FAS2" />
+                </assert_contents>
+            </output>
+            <output name="mc_out">
+               <assert_contents>
+                    <has_text text="FAS2" />
+                </assert_contents>
+            </output>
+        </test>
+
+    </tests>
+    <help><![CDATA[
+**Diffacto**
+
+Diffacto_ quantifies comparative protein abundance from the covariation of peptide abundances.
+
+Diffacto_ applies factor analysis to extract the covariation of peptides' abundances. The method enables a weighted geometrical average summarization and automatic elimination of incoherent peptides, which may result from suboptimal digestion or being partially modified, and are not representative of the protein concentration.
+
+**Inputs**
+
+  - **Peptides abundances** *in tabular or csv format*
+
+    - The first row is column headers and should contain the sample name for each sample column.
+    - The first column should contain unique peptide sequences.
+    - *Optionally, the second column may be Protein ID assignments, else the* **Protein database** *input is required.*
+    - Each remaining column is a sample column with numeric abundance values.
+    - Missing values should be empty instead of zeros.
+    - Example: 
+
+      ============ ========== ========= ========= ========= =========
+      sequences    Protein    Sample1-A Sample1_B Sample2_A Sample2_B
+      ============ ========== ========= ========= ========= =========
+      AAATAAMTK    EF3A       127.35209 142.58217 135.89206 162.54500
+      AAATTGEWDK   PDC1       100.35922 114.68676 922.60617 833.97955
+      LPVLLADACCSR HSP72;PDC1 120.21570 194.99594 977.48321 219.23281
+      AAEEAGVTDVK  FAS2       442.67501 457.52266 448.52837 424.15980
+      ============ ========== ========= ========= ========= =========
+
+
+  - **Protein database** *(optional)*
+
+    - The Protein database in fasta format that has protein seqeunces containing the peptides.
+    - Required if the **Peptides abundances** input does not have a second column containing Protein ID assignments
+
+
+  - **Sample Groups** *(optional)*
+
+    - First column has the sample name
+    - Second column has the group name
+    - Example:
+
+      ========= ==
+      Sample1-A S1
+      Sample1_B S1
+      Sample2_A S2
+      Sample2_B S2
+      ========= ==
+
+
+**Outputs**
+
+  - **Protein Abundance**
+
+        ======= ====== ====== =================== =================== ================== ==================
+        Protein N.Pept Q.Pept S/N                 P(PECA)	         S1                 S2
+        ======= ====== ====== =================== =================== ================== ==================
+        EF3A    2      2      -2.874362404756714  0.2608189432601452  463172795.59269696 489796576.81520355
+        FAS2    6      4      -0.5901265476375578 0.8395809777778386  52093246.23323742  53280470.3811749
+        PDC1    3      2      6.634988423694361   0.25491030879514676 203769831.79809052 174641994.14231393
+        ======= ====== ====== =================== =================== ================== ==================
+
+  - **FDR Estimate from Monte Carlo Simulation** *(optional)*
+
+        =======  =================== ===================
+        Protein  P(MC)               MCFDR            
+        =======  =================== ===================
+        EF3A     0.1419053964023984  0.5287482885321804
+        FAS2     0.9867109634551495  0.9132662960822688
+        PDC1     0.3338088445078459  0.5287482885321804
+        =======  =================== ===================
+
+  - **Protein Peptide Loadings** *(optional)*
+
+        =======  ===========  ===================
+        EF3A     AAATAAMTK    0.5287482885321804
+        FAS2     AAEEAGVTDVK  0.9132662960822688
+        PDC1     AAATTGEWDK   0.5287482885321804
+        =======  ===========  ===================
+
+.. _Diffacto: https://github.com/statisticalbiotechnology/diffacto
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1074/mcp.O117.067728</citation>
+    </citations>
+</tool>