diffacto: diffacto.xml comparison

comparison diffacto.xml @ 0:3cc7ce0822a1 draft default tip

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/diffacto commit 507bb20a2c246bb0a1a0c7dae1555a851730e4a6"

author	galaxyp
date	Mon, 21 Jun 2021 12:50:54 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:3cc7ce0822a1
+<tool id="diffacto" name="Diffacto" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
+<description>Comparative Protein Abundance from Covariation of Peptide Abundances</description>
+<macros>
+<token name="@TOOL_VERSION@">1.0.6</token>
+<token name="@VERSION_SUFFIX@">0</token>
+</macros>
+<requirements>
+<requirement type="package" version="@TOOL_VERSION@">diffacto</requirement>
+</requirements>
+<command detect_errors="exit_code"><![CDATA[
+tr '\t' ',' < '$input' > input.csv &&
+diffacto
+-i input.csv
+#if $db
+-db '$db'
+#end if
+#if $samples
+-samples '$samples'
+#end if
+-reference '$reference'
+#if $normalize
+-normalize $normalize
+#end if
+#if $farms_mu
+-farms_mu $farms_mu
+#end if
+#if $farms_alpha
+-farms_alpha $farms_alpha
+#end if
+-min_samples $min_samples
+-impute_threshold $impute_threshold
+-cutoff_weight $cutoff_weight
+$use_unique
+#if $scale == 'log2'
+-log2 True
+#else
+-log2 False
+#end if
+$fast
+-out '$output'
+#if $mcfdr
+-mc_out '$mc_out'
+#end if
+#if $loadings
+-loadings_out '$loadings_out'
+#end if
+]]></command>
+<inputs>
+<param name="input" argument="-i" type="data" format="tabular,csv" label="Peptides abundances">
+<help><![CDATA[
+Peptides abundances in tabular or csv format.
+<ul>
+<li>The first row is column headers and should contain the sample name for each sample column. </li>
+<li>The first column should contain unique peptide sequences. </li>
+<li><i>Optionally, the second column may be ProteinID assignments, else the <b>Protein database</b> input is required.</i></li>
+<li>Each remaining column is a sample column with numeric abundance values.</li>
+<li>Missing values should be empty instead of zeros.</li>
+</ul>
+]]></help>
+</param>
+<param argument="-db" type="data" format="fasta" label="Protein database" optional="true"
+help="Required if the Peptide abundances input does not have Protein IDs in the second column"/>
+<param argument="-samples" type="data" format="tabular" label="Sample Groups" optional="true">
+<help><![CDATA[
+<i>Optional: By default, each Sample column in Peptide abundances is treated as a singleton group.</i>
+<br>
+Groups the samples from the Peptides abundance input for comparison.
+Each sample column from Peptides abundance input should be on a line with 2 columns:
+<ol>
+<li>Sample name for header line of the Peptides abundance input.</li>
+<li>Group Name assignemnt for the sample</li>
+</ol>
+]]></help>
+</param>
+<param argument="-reference" type="text" value="" label="Reference sample groups" optional="true">
+<help><![CDATA[
+<i>Optional: By default, Diffacto uses the average of all samples/groups as the reference.</i>
+<br>
+Names of sample groups <i>(separated by semicolon)</i> treated as the comparison reference.
+<ul>
+<li>If a Sample Groups input was used, the reference names should be Group names from column 2.</li>
+<li>Otherwise, the reference names should be Sample names from the Peptides abundance column header line.</li>
+	    </ul>
+]]></help>
+</param>
+<param name="scale" argument="-log2" type="select" label="Peptides abundance scale">
+<option value="linear">linear</option>
+<option value="log2">log2</option>
+</param>
+<param argument="-normalize" type="select" label="Sample-wise normalization" optional="true">
+<option value="average">average</option>
+<option value="median">median</option>
+<option value="GMM">GMM</option>
+</param>
+<param argument="-farms_mu" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter mu"
+help="Hyperparameter mu (default: 0.1)"/>
+<param argument="-farms_alpha" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter alpha"
+help="Hyperparameter weight of prior probability (default: 0.1)"/>
+<param argument="-min_samples" type="integer" value="1" min="1" label="Minimum samples for peptide"
+help="Minimum number of samples peptides needed to be quantified in"/>
+<param argument="-impute_threshold" type="float" value="0.99" min="0.1" max="1.0" label="Minimum fraction of missing values in the group"
+help="Impute missing values if missing fraction is larger than the threshold."/>
+<param argument="-cutoff_weight" type="float" value="0.5" min="0." max="1.0" label="Peptide cutoff weight"
+help="Peptides weighted lower than the cutoff will be excluded."/>
+<param argument="-use_unique" type="boolean" truevalue="-use_unique True" falsevalue="" checked="false" label="Use unique peptides only"/>
+<param argument="-fast" type="boolean" truevalue="-fast True" falsevalue="" checked="false" label="Allow early termination in EM calculation when noise is sufficiently small."/>
+<param name="mcfdr" argument="-mc_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Perform Monte Carlo FDR simulation"/>
+<param name="loadings" argument="-loadings_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Output Protein Peptide loadings file"/>
+</inputs>
+<outputs>
+<data name="output" format="tabular" label="${tool.name} on ${on_string}: Protein Abundance">
+<actions>
+<action name="comment_lines" type="metadata" default="1" />
+<action name="column_names" type="metadata" default="Protein,N.Pept,Q.Pept,S/N,P(PECA)" />
+</actions>
+</data>
+<data name="mc_out" format="tabular" label="${tool.name} on ${on_string}: MC FDR">
+<filter>mcfdr == True</filter>
+<actions>
+<action name="comment_lines" type="metadata" default="1" />
+<action name="column_names" type="metadata" default="Protein,P(MC),MCFDR" />
+</actions>
+</data>
+<data name="loadings_out" format="tabular" label="${tool.name} on ${on_string}: Protein Peptide loading">
+<filter>loadings == True</filter>
+<actions>
+<action name="comment_lines" type="metadata" default="1" />
+<action name="column_names" type="metadata" default="Protein,Peptide,Loading" />
+</actions>
+</data>
+</outputs>
+<tests>
+<test>
+<param name="input" ftype="csv" value="HBY20Mix.peptides.csv"/>
+<param name="db" ftype="fasta" value="UP000002311_559292.fasta"/>
+<param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/>
+<output name="output">
+<assert_contents>
+<has_text text="P19097" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="input" ftype="tabular" value="HBY20Mix.peptides.tsv"/>
+<param name="db" ftype="fasta" value="UP000002311_559292.fasta"/>
+<param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/>
+<output name="output">
+<assert_contents>
+<has_text text="P19097" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="input" ftype="csv" value="iPRG.novo.pep.csv"/>
+<param name="samples" ftype="tabular" value="iPRG.samples.lst"/>
+<param name="min_samples" value="2"/>
+<output name="output">
+<assert_contents>
+<has_text text="FAS2" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="input" ftype="csv" value="iPRG.novo.pep.csv"/>
+<param name="samples" ftype="tabular" value="iPRG.samples.lst"/>
+<param name="min_samples" value="4"/>
+<param name="use_unique" value="True"/>
+<param name="mcfdr" value="True"/>
+<output name="output">
+<assert_contents>
+<has_text text="FAS2" />
+</assert_contents>
+</output>
+<output name="mc_out">
+<assert_contents>
+<has_text text="FAS2" />
+</assert_contents>
+</output>
+</test>
+</tests>
+<help><![CDATA[
+**Diffacto**
+Diffacto_ quantifies comparative protein abundance from the covariation of peptide abundances.
+Diffacto_ applies factor analysis to extract the covariation of peptides' abundances. The method enables a weighted geometrical average summarization and automatic elimination of incoherent peptides, which may result from suboptimal digestion or being partially modified, and are not representative of the protein concentration.
+**Inputs**
+- **Peptides abundances** *in tabular or csv format*
+- The first row is column headers and should contain the sample name for each sample column.
+- The first column should contain unique peptide sequences.
+- *Optionally, the second column may be Protein ID assignments, else the* **Protein database** *input is required.*
+- Each remaining column is a sample column with numeric abundance values.
+- Missing values should be empty instead of zeros.
+- Example:
+============ ========== ========= ========= ========= =========
+sequences    Protein    Sample1-A Sample1_B Sample2_A Sample2_B
+============ ========== ========= ========= ========= =========
+AAATAAMTK    EF3A       127.35209 142.58217 135.89206 162.54500
+AAATTGEWDK   PDC1       100.35922 114.68676 922.60617 833.97955
+LPVLLADACCSR HSP72;PDC1 120.21570 194.99594 977.48321 219.23281
+AAEEAGVTDVK  FAS2       442.67501 457.52266 448.52837 424.15980
+============ ========== ========= ========= ========= =========
+- **Protein database** *(optional)*
+- The Protein database in fasta format that has protein seqeunces containing the peptides.
+- Required if the **Peptides abundances** input does not have a second column containing Protein ID assignments
+- **Sample Groups** *(optional)*
+- First column has the sample name
+- Second column has the group name
+- Example:
+========= ==
+Sample1-A S1
+Sample1_B S1
+Sample2_A S2
+Sample2_B S2
+========= ==
+**Outputs**
+- **Protein Abundance**
+======= ====== ====== =================== =================== ================== ==================
+Protein N.Pept Q.Pept S/N                 P(PECA)	         S1                 S2
+======= ====== ====== =================== =================== ================== ==================
+EF3A    2      2      -2.874362404756714  0.2608189432601452  463172795.59269696 489796576.81520355
+FAS2    6      4      -0.5901265476375578 0.8395809777778386  52093246.23323742  53280470.3811749
+PDC1    3      2      6.634988423694361   0.25491030879514676 203769831.79809052 174641994.14231393
+======= ====== ====== =================== =================== ================== ==================
+- **FDR Estimate from Monte Carlo Simulation** *(optional)*
+=======  =================== ===================
+Protein  P(MC)               MCFDR
+=======  =================== ===================
+EF3A     0.1419053964023984  0.5287482885321804
+FAS2     0.9867109634551495  0.9132662960822688
+PDC1     0.3338088445078459  0.5287482885321804
+=======  =================== ===================
+- **Protein Peptide Loadings** *(optional)*
+=======  ===========  ===================
+EF3A     AAATAAMTK    0.5287482885321804
+FAS2     AAEEAGVTDVK  0.9132662960822688
+PDC1     AAATTGEWDK   0.5287482885321804
+=======  ===========  ===================
+.. _Diffacto: https://github.com/statisticalbiotechnology/diffacto
+]]></help>
+<citations>
+<citation type="doi">10.1074/mcp.O117.067728</citation>
+</citations>
+</tool>

Mercurial > repos > galaxyp > diffacto

comparison diffacto.xml @ 0:3cc7ce0822a1 draft default tip