comparison diffacto.xml @ 0:3cc7ce0822a1 draft default tip

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/diffacto commit 507bb20a2c246bb0a1a0c7dae1555a851730e4a6"
author galaxyp
date Mon, 21 Jun 2021 12:50:54 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:3cc7ce0822a1
1 <tool id="diffacto" name="Diffacto" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
2 <description>Comparative Protein Abundance from Covariation of Peptide Abundances</description>
3 <macros>
4 <token name="@TOOL_VERSION@">1.0.6</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 </macros>
7 <requirements>
8 <requirement type="package" version="@TOOL_VERSION@">diffacto</requirement>
9 </requirements>
10 <command detect_errors="exit_code"><![CDATA[
11 tr '\t' ',' < '$input' > input.csv &&
12 diffacto
13 -i input.csv
14 #if $db
15 -db '$db'
16 #end if
17 #if $samples
18 -samples '$samples'
19 #end if
20 -reference '$reference'
21 #if $normalize
22 -normalize $normalize
23 #end if
24 #if $farms_mu
25 -farms_mu $farms_mu
26 #end if
27 #if $farms_alpha
28 -farms_alpha $farms_alpha
29 #end if
30 -min_samples $min_samples
31 -impute_threshold $impute_threshold
32 -cutoff_weight $cutoff_weight
33 $use_unique
34 #if $scale == 'log2'
35 -log2 True
36 #else
37 -log2 False
38 #end if
39 $fast
40 -out '$output'
41 #if $mcfdr
42 -mc_out '$mc_out'
43 #end if
44 #if $loadings
45 -loadings_out '$loadings_out'
46 #end if
47 ]]></command>
48 <inputs>
49 <param name="input" argument="-i" type="data" format="tabular,csv" label="Peptides abundances">
50 <help><![CDATA[
51 Peptides abundances in tabular or csv format.
52 <ul>
53 <li>The first row is column headers and should contain the sample name for each sample column. </li>
54 <li>The first column should contain unique peptide sequences. </li>
55 <li><i>Optionally, the second column may be ProteinID assignments, else the <b>Protein database</b> input is required.</i></li>
56 <li>Each remaining column is a sample column with numeric abundance values.</li>
57 <li>Missing values should be empty instead of zeros.</li>
58 </ul>
59 ]]></help>
60 </param>
61 <param argument="-db" type="data" format="fasta" label="Protein database" optional="true"
62 help="Required if the Peptide abundances input does not have Protein IDs in the second column"/>
63 <param argument="-samples" type="data" format="tabular" label="Sample Groups" optional="true">
64 <help><![CDATA[
65 <i>Optional: By default, each Sample column in Peptide abundances is treated as a singleton group.</i>
66 <br>
67 Groups the samples from the Peptides abundance input for comparison.
68 Each sample column from Peptides abundance input should be on a line with 2 columns:
69 <ol>
70 <li>Sample name for header line of the Peptides abundance input.</li>
71 <li>Group Name assignemnt for the sample</li>
72 </ol>
73 ]]></help>
74 </param>
75 <param argument="-reference" type="text" value="" label="Reference sample groups" optional="true">
76 <help><![CDATA[
77 <i>Optional: By default, Diffacto uses the average of all samples/groups as the reference.</i>
78 <br>
79 Names of sample groups <i>(separated by semicolon)</i> treated as the comparison reference.
80 <ul>
81 <li>If a Sample Groups input was used, the reference names should be Group names from column 2.</li>
82 <li>Otherwise, the reference names should be Sample names from the Peptides abundance column header line.</li>
83 </ul>
84 ]]></help>
85 </param>
86 <param name="scale" argument="-log2" type="select" label="Peptides abundance scale">
87 <option value="linear">linear</option>
88 <option value="log2">log2</option>
89 </param>
90 <param argument="-normalize" type="select" label="Sample-wise normalization" optional="true">
91 <option value="average">average</option>
92 <option value="median">median</option>
93 <option value="GMM">GMM</option>
94 </param>
95 <param argument="-farms_mu" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter mu"
96 help="Hyperparameter mu (default: 0.1)"/>
97 <param argument="-farms_alpha" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter alpha"
98 help="Hyperparameter weight of prior probability (default: 0.1)"/>
99 <param argument="-min_samples" type="integer" value="1" min="1" label="Minimum samples for peptide"
100 help="Minimum number of samples peptides needed to be quantified in"/>
101 <param argument="-impute_threshold" type="float" value="0.99" min="0.1" max="1.0" label="Minimum fraction of missing values in the group"
102 help="Impute missing values if missing fraction is larger than the threshold."/>
103 <param argument="-cutoff_weight" type="float" value="0.5" min="0." max="1.0" label="Peptide cutoff weight"
104 help="Peptides weighted lower than the cutoff will be excluded."/>
105 <param argument="-use_unique" type="boolean" truevalue="-use_unique True" falsevalue="" checked="false" label="Use unique peptides only"/>
106 <param argument="-fast" type="boolean" truevalue="-fast True" falsevalue="" checked="false" label="Allow early termination in EM calculation when noise is sufficiently small."/>
107 <param name="mcfdr" argument="-mc_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Perform Monte Carlo FDR simulation"/>
108 <param name="loadings" argument="-loadings_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Output Protein Peptide loadings file"/>
109 </inputs>
110 <outputs>
111 <data name="output" format="tabular" label="${tool.name} on ${on_string}: Protein Abundance">
112 <actions>
113 <action name="comment_lines" type="metadata" default="1" />
114 <action name="column_names" type="metadata" default="Protein,N.Pept,Q.Pept,S/N,P(PECA)" />
115 </actions>
116 </data>
117 <data name="mc_out" format="tabular" label="${tool.name} on ${on_string}: MC FDR">
118 <filter>mcfdr == True</filter>
119 <actions>
120 <action name="comment_lines" type="metadata" default="1" />
121 <action name="column_names" type="metadata" default="Protein,P(MC),MCFDR" />
122 </actions>
123 </data>
124 <data name="loadings_out" format="tabular" label="${tool.name} on ${on_string}: Protein Peptide loading">
125 <filter>loadings == True</filter>
126 <actions>
127 <action name="comment_lines" type="metadata" default="1" />
128 <action name="column_names" type="metadata" default="Protein,Peptide,Loading" />
129 </actions>
130 </data>
131 </outputs>
132 <tests>
133 <test>
134 <param name="input" ftype="csv" value="HBY20Mix.peptides.csv"/>
135 <param name="db" ftype="fasta" value="UP000002311_559292.fasta"/>
136 <param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/>
137 <output name="output">
138 <assert_contents>
139 <has_text text="P19097" />
140 </assert_contents>
141 </output>
142 </test>
143 <test>
144 <param name="input" ftype="tabular" value="HBY20Mix.peptides.tsv"/>
145 <param name="db" ftype="fasta" value="UP000002311_559292.fasta"/>
146 <param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/>
147 <output name="output">
148 <assert_contents>
149 <has_text text="P19097" />
150 </assert_contents>
151 </output>
152 </test>
153
154 <test>
155 <param name="input" ftype="csv" value="iPRG.novo.pep.csv"/>
156 <param name="samples" ftype="tabular" value="iPRG.samples.lst"/>
157 <param name="min_samples" value="2"/>
158 <output name="output">
159 <assert_contents>
160 <has_text text="FAS2" />
161 </assert_contents>
162 </output>
163 </test>
164 <test>
165 <param name="input" ftype="csv" value="iPRG.novo.pep.csv"/>
166 <param name="samples" ftype="tabular" value="iPRG.samples.lst"/>
167 <param name="min_samples" value="4"/>
168 <param name="use_unique" value="True"/>
169 <param name="mcfdr" value="True"/>
170 <output name="output">
171 <assert_contents>
172 <has_text text="FAS2" />
173 </assert_contents>
174 </output>
175 <output name="mc_out">
176 <assert_contents>
177 <has_text text="FAS2" />
178 </assert_contents>
179 </output>
180 </test>
181
182 </tests>
183 <help><![CDATA[
184 **Diffacto**
185
186 Diffacto_ quantifies comparative protein abundance from the covariation of peptide abundances.
187
188 Diffacto_ applies factor analysis to extract the covariation of peptides' abundances. The method enables a weighted geometrical average summarization and automatic elimination of incoherent peptides, which may result from suboptimal digestion or being partially modified, and are not representative of the protein concentration.
189
190 **Inputs**
191
192 - **Peptides abundances** *in tabular or csv format*
193
194 - The first row is column headers and should contain the sample name for each sample column.
195 - The first column should contain unique peptide sequences.
196 - *Optionally, the second column may be Protein ID assignments, else the* **Protein database** *input is required.*
197 - Each remaining column is a sample column with numeric abundance values.
198 - Missing values should be empty instead of zeros.
199 - Example:
200
201 ============ ========== ========= ========= ========= =========
202 sequences Protein Sample1-A Sample1_B Sample2_A Sample2_B
203 ============ ========== ========= ========= ========= =========
204 AAATAAMTK EF3A 127.35209 142.58217 135.89206 162.54500
205 AAATTGEWDK PDC1 100.35922 114.68676 922.60617 833.97955
206 LPVLLADACCSR HSP72;PDC1 120.21570 194.99594 977.48321 219.23281
207 AAEEAGVTDVK FAS2 442.67501 457.52266 448.52837 424.15980
208 ============ ========== ========= ========= ========= =========
209
210
211 - **Protein database** *(optional)*
212
213 - The Protein database in fasta format that has protein seqeunces containing the peptides.
214 - Required if the **Peptides abundances** input does not have a second column containing Protein ID assignments
215
216
217 - **Sample Groups** *(optional)*
218
219 - First column has the sample name
220 - Second column has the group name
221 - Example:
222
223 ========= ==
224 Sample1-A S1
225 Sample1_B S1
226 Sample2_A S2
227 Sample2_B S2
228 ========= ==
229
230
231 **Outputs**
232
233 - **Protein Abundance**
234
235 ======= ====== ====== =================== =================== ================== ==================
236 Protein N.Pept Q.Pept S/N P(PECA) S1 S2
237 ======= ====== ====== =================== =================== ================== ==================
238 EF3A 2 2 -2.874362404756714 0.2608189432601452 463172795.59269696 489796576.81520355
239 FAS2 6 4 -0.5901265476375578 0.8395809777778386 52093246.23323742 53280470.3811749
240 PDC1 3 2 6.634988423694361 0.25491030879514676 203769831.79809052 174641994.14231393
241 ======= ====== ====== =================== =================== ================== ==================
242
243 - **FDR Estimate from Monte Carlo Simulation** *(optional)*
244
245 ======= =================== ===================
246 Protein P(MC) MCFDR
247 ======= =================== ===================
248 EF3A 0.1419053964023984 0.5287482885321804
249 FAS2 0.9867109634551495 0.9132662960822688
250 PDC1 0.3338088445078459 0.5287482885321804
251 ======= =================== ===================
252
253 - **Protein Peptide Loadings** *(optional)*
254
255 ======= =========== ===================
256 EF3A AAATAAMTK 0.5287482885321804
257 FAS2 AAEEAGVTDVK 0.9132662960822688
258 PDC1 AAATTGEWDK 0.5287482885321804
259 ======= =========== ===================
260
261 .. _Diffacto: https://github.com/statisticalbiotechnology/diffacto
262
263 ]]></help>
264 <citations>
265 <citation type="doi">10.1074/mcp.O117.067728</citation>
266 </citations>
267 </tool>