comparison mqppep_anova.xml @ 0:dbff53e6f75f draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
author galaxyp
date Mon, 11 Jul 2022 19:22:25 +0000
parents
children 08678c931f5d
comparison
equal deleted inserted replaced
-1:000000000000 0:dbff53e6f75f
1 <tool
2 id="mqppep_anova"
3 name="MaxQuant Phosphopeptide ANOVA"
4 version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
5 profile="21.05"
6 >
7 <description>Runs ANOVA and KSEA for phosphopeptides.</description>
8 <macros>
9 <import>macros.xml</import>
10 </macros>
11 <edam_topics>
12 <edam_topic>topic_0121</edam_topic><!-- proteomics -->
13 <edam_topic>topic_3520</edam_topic><!-- proteomics experiment-->
14 </edam_topics>
15 <edam_operations>
16 <edam_operation>operation_0276</edam_operation><!-- Analyse a network of protein interactions. -->
17 <edam_operation>operation_0531</edam_operation><!-- Heat map generation -->
18 <edam_operation>operation_2938</edam_operation><!-- Dendrogram generation -->
19 <edam_operation>operation_2938</edam_operation><!-- Imputation -->
20 <edam_operation>operation_3435</edam_operation><!-- Standardisation and normalisation -->
21 <edam_operation>operation_3501</edam_operation><!-- Enrichment analysis -->
22 <edam_operation>operation_3658</edam_operation><!-- Statistical inference -->
23 </edam_operations>
24 <expand macro="requirements"/>
25 <!--
26 The weird invocation used here is because knitr and install_tinytex
27 both need access to a writeable directory, but most directories in a
28 biocontainer are read-only, so this builds a pseudo-home under /tmp
29 -->
30 <command detect_errors="exit_code"><![CDATA[
31 cp '$__tool_directory__/mqppep_anova_script.Rmd' . &&
32 cp '$__tool_directory__/mqppep_anova.R' . &&
33 Rscript mqppep_anova.R
34 --inputFile '$input_file'
35 --alphaFile '$alpha_file'
36 --preproc_sqlite '$preproc_sqlite'
37 --firstDataColumn $intensity_column_regex_f
38 --imputationMethod $imputation.imputation_method
39 #if $imputation.imputation_method == "random"
40 --meanPercentile '$imputation.meanPercentile'
41 --sdPercentile '$imputation.sdPercentile'
42 #end if
43 --regexSampleNames $sample_names_regex_f
44 --regexSampleGrouping $sample_grouping_regex_f
45 --imputedDataFile $imputed_data_file
46 --imputedQNLTDataFile '$imp_qn_lt_file'
47 --ksea_sqlite '$ksea_sqlite'
48 --ksea_cutoff_threshold '$ksea_cutoff_threshold'
49 --ksea_cutoff_statistic 'FDR'
50 --reportFile '$report_file'
51 --anova_ksea_metadata '$anova_ksea_metadata'
52 ]]></command>
53 <configfiles>
54 <configfile name="sample_names_regex_f">
55 $sample_names_regex
56 </configfile>
57 <configfile name="sample_grouping_regex_f">
58 $sample_grouping_regex
59 </configfile>
60 <configfile name="intensity_column_regex_f">
61 $intensity_column_regex
62 </configfile>
63 </configfiles>
64 <inputs>
65 <param name="input_file" type="data" format="tabular" label="Filtered Phosphopeptide Intensities"
66 help="Phosphopeptide intensities filtered for minimal quality. First column label 'Phosphopeptide'; sample-intensities must begin in column 10 and must have column labels to match argument [sample_names_regex]"
67 />
68 <param name="alpha_file" type="data" format="tabular" label="ANOVA alpha cutoff level"
69 help="ANOVA alpha cutoff values for significance testing: tabular data having one column and no header"
70 />
71 <param name="preproc_sqlite" type="data" format="sqlite" label="preproc_sqlite dataset from mqppep_preproc"
72 help="'preproc_sqlite' dataset produced by 'MaxQuant Phosphopeptide Preprocessing' tool"
73 />
74 <param name="intensity_column_regex" type="text" value="^Intensity[^_]"
75 label="Intensity-column pattern"
76 help="Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)"
77 />
78 <!-- imputation_method <- c("group-median","median","mean","random")[1] -->
79 <conditional name="imputation">
80 <param name="imputation_method" type="select" label="Imputation method"
81 help="Impute missing values by (1) using median for each sample-group; (2) using median across all samples; (3) using mean across all samples; or (4) using randomly generated values having same std. dev. as across all samples (with mean specified by [meanPercentile])"
82 >
83 <option value="random" selected="true">random</option>
84 <option value="group-median">group-median</option>
85 <option value="median">median</option>
86 <option value="mean">mean</option>
87 </param>
88 <when value="group-median" />
89 <when value="median" />
90 <when value="mean" />
91 <when value="random">
92 <param name="meanPercentile" type="integer" value="1" min="1" max="99"
93 label="Mean percentile for random values"
94 help="Percentile center of random values; range [1,99]"
95 />
96 <param name="sdPercentile" type="float" value="1.0"
97 label="Percentile std. dev. for random values"
98 help="Standard deviation adjustment-factor for random values; real number. (1.0 means SD equal to the SD for the entire data set.)"
99 />
100 </when>
101 </conditional>
102 <param name="sample_names_regex" type="text" value="\.\d+[A-Z]$"
103 help="Pattern extracting sample-names from names of columns that have peptide intensity data (PERL-compatible regular expression)"
104 label="Sample-extraction pattern">
105 <sanitizer>
106 <valid initial="string.printable">
107 <remove value="&apos;"/>
108 </valid>
109 </sanitizer>
110 </param>
111 <param name="sample_grouping_regex" type="text" value="\d+"
112 help="Pattern extracting sample-group from the sample-names that are extracted by 'Sample-extraction pattern' (PERL-compatible regular expression)"
113 label="Group-extraction pattern">
114 <sanitizer>
115 <valid initial="string.printable">
116 <remove value="&apos;"/>
117 </valid>
118 </sanitizer>
119 </param>
120 <param name="ksea_cutoff_threshold" type="float" value="0.05"
121 label="KSEA threshold level"
122 help="Maximum FDR to be used to score a kinase enrichment as significant"
123 />
124 </inputs>
125 <outputs>
126 <data name="imputed_data_file" format="tabular" label="${input_file.name}.${imputation.imputation_method}-imputed_intensities" ></data>
127 <data name="imp_qn_lt_file" format="tabular" label="${input_file.name}.${imputation.imputation_method}-imputed_QN_LT_intensities" ></data>
128 <data name="anova_ksea_metadata" format="tabular" label="${input_file.name}.${imputation.imputation_method}-anova_ksea_metadata" ></data>
129 <!--
130 <data name="report_file" format="html" label="${input_file.name}.${imputation.imputation_method}-imputed_report (download/unzip to view)" ></data>
131 -->
132 <data name="report_file" format="pdf" label="${input_file.name}.${imputation.imputation_method}-imputed_report" ></data>
133 <data name="ksea_sqlite" format="sqlite" label="${input_file.name}..${imputation.imputation_method}-imputed_ksea_sqlite">
134 </data>
135 </outputs>
136 <tests>
137 <test>
138 <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/>
139 <param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/>
140 <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/>
141 <param name="intensity_column_regex" value="^Intensity[^_]"/>
142 <param name="imputation_method" value="median"/>
143 <param name="sample_names_regex" value="\.\d+[A-Z]$"/>
144 <param name="sample_grouping_regex" value="\d+"/>
145 <output name="imputed_data_file">
146 <assert_contents>
147 <has_text text="Phosphopeptide" />
148 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
149 <!-- missing missing observd missing observd observd -->
150 <has_text_matching expression="pSQKQEEENPAEETGEEK.*8765300.8765300.8765300.8765300.2355900.14706000" />
151
152 </assert_contents>
153 </output>
154 <output name="imp_qn_lt_file">
155 <assert_contents>
156 <has_text text="Phosphopeptide" />
157 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
158 <!-- missing missing observed missing observed observed -->
159 <has_text_matching expression="pSQKQEEENPAEETGEEK.*6.962256.*6.908828.*6.814580.*6.865411.*6.908828.*7.088909" />
160
161 <has_text text="pSQKQEEENPAEETGEEK" />
162 </assert_contents>
163 </output>
164 </test>
165 <test>
166 <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/>
167 <param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/>
168 <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/>
169 <param name="intensity_column_regex" value="^Intensity[^_]"/>
170 <param name="imputation_method" value="mean"/>
171 <param name="sample_names_regex" value="\.\d+[A-Z]$"/>
172 <param name="sample_grouping_regex" value="\d+"/>
173 <output name="imputed_data_file">
174 <assert_contents>
175 <has_text text="Phosphopeptide" />
176 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
177 <!-- missing missing observd missing observd observd -->
178 <has_text_matching expression="pSQKQEEENPAEETGEEK.*6721601.6721601.8765300.6721601.2355900.14706000" />
179
180 </assert_contents>
181 </output>
182 <output name="imp_qn_lt_file">
183 <assert_contents>
184 <has_text text="Phosphopeptide" />
185 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
186 <!-- missing missing observed missing observed observed -->
187 <has_text_matching expression="pSQKQEEENPAEETGEEK.*6.839850.*6.797424.*6.797424.*6.797424.*6.896609.*7.092451" />
188 </assert_contents>
189 </output>
190 </test>
191 <test>
192 <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/>
193 <param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/>
194 <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/>
195 <param name="intensity_column_regex" value="^Intensity[^_]"/>
196 <param name="imputation_method" value="group-median"/>
197 <param name="sample_names_regex" value="\.\d+[A-Z]$"/>
198 <param name="sample_grouping_regex" value="\d+"/>
199 <output name="imputed_data_file">
200 <assert_contents>
201 <has_text text="Phosphopeptide" />
202 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
203 <!-- missing missing observd missing observd observd -->
204 <has_text_matching expression="pSQKQEEENPAEETGEEK.*8765300.8765300.8765300.5886074.2355900.14706000" />
205
206 </assert_contents>
207 </output>
208 <output name="imp_qn_lt_file">
209 <assert_contents>
210 <has_text text="Phosphopeptide" />
211 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
212 <!-- missing missing observed missing observed observed -->
213 <has_text_matching expression="pSQKQEEENPAEETGEEK.*6.946112.*6.888985.*6.792137.*6.792137.*6.888985.*7.089555" />
214 </assert_contents>
215 </output>
216 </test>
217 <test>
218 <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/>
219 <param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/>
220 <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/>
221 <param name="intensity_column_regex" value="^Intensity[^_]"/>
222 <param name="imputation_method" value="random"/>
223 <param name="meanPercentile" value="1" />
224 <param name="sdPercentile" value="1.0" />
225 <param name="sample_names_regex" value="\.\d+[A-Z]$"/>
226 <param name="sample_grouping_regex" value="\d+"/>
227 <output name="imputed_data_file">
228 <assert_contents>
229 <has_text text="Phosphopeptide" />
230 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
231 <!-- observd observd observd -->
232 <has_text_matching expression="pSQKQEEENPAEETGEEK.*8765300.*2355900.*4706000" />
233
234 </assert_contents>
235 </output>
236 <output name="imp_qn_lt_file">
237 <assert_contents>
238 <has_text text="Phosphopeptide" />
239 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" />
240 <has_text text="5.409549" /> <!-- log-transformed value for pTYVDPFTpYEDPNQAVR .1B -->
241 <has_text text="6.464714" /> <!-- log-transformed value for pSQKQEEENPAEETGEEK .2A -->
242 </assert_contents>
243 </output>
244 </test>
245 </tests>
246 <help><![CDATA[
247 ====================================================
248 Phopsphoproteomic Enrichment Pipeline ANOVA and KSEA
249 ====================================================
250
251 **Input files**
252
253 ``Filtered Phosphopeptide Intensities``
254 Phosphopeptides annotated with SwissProt and phosphosite metadata (in tabular format).
255 This is the output from the "Phopsphoproteomic Enrichment Pipeline Merge and Filter"
256 (``mqppep_mrgflt``) tool.
257
258 ``ANOVA alpha cutoff level``
259 List of alpha cutoff values for significance testing; text file having one column and no header. For example:
260
261 ::
262
263 0.2
264 0.1
265 0.05
266
267 **Input parameters**
268
269 ``Intensity-column pattern``
270 First column of ``input_file`` having intensity values (integer or PERL-compatible regular expression matching column label). Default: **Intensity**
271
272 ``Imputation method``
273 Impute missing values by:
274
275 1. ``group-median`` - use median for each sample-group;
276 2. ``mean`` - use mean across all samples; or
277 3. ``median`` - use median across all samples;
278 4. ``random`` - use randomly generated values where:
279
280 - ``Mean percentile for random values`` specifies the percentile among non-missing values to be used as mean of random values, and
281 - ``Percentile std. dev. for random values`` specifies the factor to be multiplied by the standard deviation among the non-missing values (across all samples) to determine the standard deviation of random values.
282
283 ``Sample-extraction pattern``
284 PERL-compatible regular expression extracting the sample-name from the the name of a column of instensities (from ``input_file``) for one sample.
285
286 - For example, ``"\.\d+[A-Z]$"`` applied to ``Intensity.splunge.10A`` would produce ``.10A``
287 - Note that *this is case sensitive* by default.
288
289 ``Group-extraction pattern``
290 PERL-compatible regular expression extracting the sample-grouping from the sample-name that was extracted with ``sample_names_regex`` from a column of intensites (from ``input_file``).
291
292 - For example, ``"\d+$"`` applied to ``.10A`` would produce ``10``
293 - Note that *this is case sensitive* by default.
294
295 ``KSEA threshold level``
296 Specifies minimum FDR at which a kinase will be considered to be enriched; the default choice of 0.05 is arbitrary.
297
298 **Outputs**
299
300 ``imputed_intensities (input_file.imputation_method-imputed_intensities)``
301 Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, in tabular format.
302
303 ``imputed_QN_LT_intensities (input_file.imputation_method-imputed_QN_LT_intensities)``
304 Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, quantile-normalized (**QN**), and log10-transformed (**LT**), in tabular format.
305
306 ``report_file (input_file.imputation_method-imputed_report)``
307 Summary report for normalization, imputation, and **ANOVA**, in PDF format.
308
309 ``anova_ksea_metadata (input_file.imputation_method-imputed_anova_ksea_metadata)``
310 Phosphopeptide metadata including ANOVA significance and KSEA enrichments.
311
312 ``ksea_sqlite (input_file.imputation_method-imputed_ksea_sqlite)``
313 SQLite database for ad-hoc report creation.
314
315 **Algorithm**
316
317 The KSEA algorithm used here is as in the KSEAapp package as reported in [Wiredja 2017].
318 The code is adapted from "Danica D. Wiredja (2017). KSEAapp: Kinase-Substrate Enrichment Analysis. R package version 0.99.0." to work with output from the "MaxQuant Phosphopeptide Preprocessing" Galaxy tool.
319
320 **Authors**
321
322 ``Larry C. Cheng``
323 (`ORCiD 0000-0002-6922-6433 <https://orcid.org/0000-0002-6922-6433>`_) wrote the original script.
324
325 ``Arthur C. Eschenlauer``
326 (`ORCiD 0000-0002-2882-0508 <https://orcid.org/0000-0002-2882-0508>`_) adapted the script to run in Galaxy.
327
328 ===================================
329 PERL-compatible regular expressions
330 ===================================
331
332 Note that the PERL-compatible regular expressions accepted by this tool are documented at http://rdrr.io/r/base/regex.html
333
334 ]]></help>
335 <citations>
336 <!-- Cheng_2018 "Phosphopeptide Enrichment ..." PMID: 30124664 -->
337 <citation type="doi">10.3791/57996</citation>
338 <!-- Wiredja_2017 "The KSEA App ..." PMID: 28655153 -->
339 <citation type="doi">10.1093/bioinformatics/btx415</citation>
340 </citations>
341 </tool>