Mercurial > repos > galaxyp > mqppep_anova
comparison mqppep_anova.xml @ 0:dbff53e6f75f draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
author | galaxyp |
---|---|
date | Mon, 11 Jul 2022 19:22:25 +0000 |
parents | |
children | 08678c931f5d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dbff53e6f75f |
---|---|
1 <tool | |
2 id="mqppep_anova" | |
3 name="MaxQuant Phosphopeptide ANOVA" | |
4 version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" | |
5 profile="21.05" | |
6 > | |
7 <description>Runs ANOVA and KSEA for phosphopeptides.</description> | |
8 <macros> | |
9 <import>macros.xml</import> | |
10 </macros> | |
11 <edam_topics> | |
12 <edam_topic>topic_0121</edam_topic><!-- proteomics --> | |
13 <edam_topic>topic_3520</edam_topic><!-- proteomics experiment--> | |
14 </edam_topics> | |
15 <edam_operations> | |
16 <edam_operation>operation_0276</edam_operation><!-- Analyse a network of protein interactions. --> | |
17 <edam_operation>operation_0531</edam_operation><!-- Heat map generation --> | |
18 <edam_operation>operation_2938</edam_operation><!-- Dendrogram generation --> | |
19 <edam_operation>operation_2938</edam_operation><!-- Imputation --> | |
20 <edam_operation>operation_3435</edam_operation><!-- Standardisation and normalisation --> | |
21 <edam_operation>operation_3501</edam_operation><!-- Enrichment analysis --> | |
22 <edam_operation>operation_3658</edam_operation><!-- Statistical inference --> | |
23 </edam_operations> | |
24 <expand macro="requirements"/> | |
25 <!-- | |
26 The weird invocation used here is because knitr and install_tinytex | |
27 both need access to a writeable directory, but most directories in a | |
28 biocontainer are read-only, so this builds a pseudo-home under /tmp | |
29 --> | |
30 <command detect_errors="exit_code"><![CDATA[ | |
31 cp '$__tool_directory__/mqppep_anova_script.Rmd' . && | |
32 cp '$__tool_directory__/mqppep_anova.R' . && | |
33 Rscript mqppep_anova.R | |
34 --inputFile '$input_file' | |
35 --alphaFile '$alpha_file' | |
36 --preproc_sqlite '$preproc_sqlite' | |
37 --firstDataColumn $intensity_column_regex_f | |
38 --imputationMethod $imputation.imputation_method | |
39 #if $imputation.imputation_method == "random" | |
40 --meanPercentile '$imputation.meanPercentile' | |
41 --sdPercentile '$imputation.sdPercentile' | |
42 #end if | |
43 --regexSampleNames $sample_names_regex_f | |
44 --regexSampleGrouping $sample_grouping_regex_f | |
45 --imputedDataFile $imputed_data_file | |
46 --imputedQNLTDataFile '$imp_qn_lt_file' | |
47 --ksea_sqlite '$ksea_sqlite' | |
48 --ksea_cutoff_threshold '$ksea_cutoff_threshold' | |
49 --ksea_cutoff_statistic 'FDR' | |
50 --reportFile '$report_file' | |
51 --anova_ksea_metadata '$anova_ksea_metadata' | |
52 ]]></command> | |
53 <configfiles> | |
54 <configfile name="sample_names_regex_f"> | |
55 $sample_names_regex | |
56 </configfile> | |
57 <configfile name="sample_grouping_regex_f"> | |
58 $sample_grouping_regex | |
59 </configfile> | |
60 <configfile name="intensity_column_regex_f"> | |
61 $intensity_column_regex | |
62 </configfile> | |
63 </configfiles> | |
64 <inputs> | |
65 <param name="input_file" type="data" format="tabular" label="Filtered Phosphopeptide Intensities" | |
66 help="Phosphopeptide intensities filtered for minimal quality. First column label 'Phosphopeptide'; sample-intensities must begin in column 10 and must have column labels to match argument [sample_names_regex]" | |
67 /> | |
68 <param name="alpha_file" type="data" format="tabular" label="ANOVA alpha cutoff level" | |
69 help="ANOVA alpha cutoff values for significance testing: tabular data having one column and no header" | |
70 /> | |
71 <param name="preproc_sqlite" type="data" format="sqlite" label="preproc_sqlite dataset from mqppep_preproc" | |
72 help="'preproc_sqlite' dataset produced by 'MaxQuant Phosphopeptide Preprocessing' tool" | |
73 /> | |
74 <param name="intensity_column_regex" type="text" value="^Intensity[^_]" | |
75 label="Intensity-column pattern" | |
76 help="Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)" | |
77 /> | |
78 <!-- imputation_method <- c("group-median","median","mean","random")[1] --> | |
79 <conditional name="imputation"> | |
80 <param name="imputation_method" type="select" label="Imputation method" | |
81 help="Impute missing values by (1) using median for each sample-group; (2) using median across all samples; (3) using mean across all samples; or (4) using randomly generated values having same std. dev. as across all samples (with mean specified by [meanPercentile])" | |
82 > | |
83 <option value="random" selected="true">random</option> | |
84 <option value="group-median">group-median</option> | |
85 <option value="median">median</option> | |
86 <option value="mean">mean</option> | |
87 </param> | |
88 <when value="group-median" /> | |
89 <when value="median" /> | |
90 <when value="mean" /> | |
91 <when value="random"> | |
92 <param name="meanPercentile" type="integer" value="1" min="1" max="99" | |
93 label="Mean percentile for random values" | |
94 help="Percentile center of random values; range [1,99]" | |
95 /> | |
96 <param name="sdPercentile" type="float" value="1.0" | |
97 label="Percentile std. dev. for random values" | |
98 help="Standard deviation adjustment-factor for random values; real number. (1.0 means SD equal to the SD for the entire data set.)" | |
99 /> | |
100 </when> | |
101 </conditional> | |
102 <param name="sample_names_regex" type="text" value="\.\d+[A-Z]$" | |
103 help="Pattern extracting sample-names from names of columns that have peptide intensity data (PERL-compatible regular expression)" | |
104 label="Sample-extraction pattern"> | |
105 <sanitizer> | |
106 <valid initial="string.printable"> | |
107 <remove value="'"/> | |
108 </valid> | |
109 </sanitizer> | |
110 </param> | |
111 <param name="sample_grouping_regex" type="text" value="\d+" | |
112 help="Pattern extracting sample-group from the sample-names that are extracted by 'Sample-extraction pattern' (PERL-compatible regular expression)" | |
113 label="Group-extraction pattern"> | |
114 <sanitizer> | |
115 <valid initial="string.printable"> | |
116 <remove value="'"/> | |
117 </valid> | |
118 </sanitizer> | |
119 </param> | |
120 <param name="ksea_cutoff_threshold" type="float" value="0.05" | |
121 label="KSEA threshold level" | |
122 help="Maximum FDR to be used to score a kinase enrichment as significant" | |
123 /> | |
124 </inputs> | |
125 <outputs> | |
126 <data name="imputed_data_file" format="tabular" label="${input_file.name}.${imputation.imputation_method}-imputed_intensities" ></data> | |
127 <data name="imp_qn_lt_file" format="tabular" label="${input_file.name}.${imputation.imputation_method}-imputed_QN_LT_intensities" ></data> | |
128 <data name="anova_ksea_metadata" format="tabular" label="${input_file.name}.${imputation.imputation_method}-anova_ksea_metadata" ></data> | |
129 <!-- | |
130 <data name="report_file" format="html" label="${input_file.name}.${imputation.imputation_method}-imputed_report (download/unzip to view)" ></data> | |
131 --> | |
132 <data name="report_file" format="pdf" label="${input_file.name}.${imputation.imputation_method}-imputed_report" ></data> | |
133 <data name="ksea_sqlite" format="sqlite" label="${input_file.name}..${imputation.imputation_method}-imputed_ksea_sqlite"> | |
134 </data> | |
135 </outputs> | |
136 <tests> | |
137 <test> | |
138 <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/> | |
139 <param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/> | |
140 <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/> | |
141 <param name="intensity_column_regex" value="^Intensity[^_]"/> | |
142 <param name="imputation_method" value="median"/> | |
143 <param name="sample_names_regex" value="\.\d+[A-Z]$"/> | |
144 <param name="sample_grouping_regex" value="\d+"/> | |
145 <output name="imputed_data_file"> | |
146 <assert_contents> | |
147 <has_text text="Phosphopeptide" /> | |
148 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" /> | |
149 <!-- missing missing observd missing observd observd --> | |
150 <has_text_matching expression="pSQKQEEENPAEETGEEK.*8765300.8765300.8765300.8765300.2355900.14706000" /> | |
151 | |
152 </assert_contents> | |
153 </output> | |
154 <output name="imp_qn_lt_file"> | |
155 <assert_contents> | |
156 <has_text text="Phosphopeptide" /> | |
157 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" /> | |
158 <!-- missing missing observed missing observed observed --> | |
159 <has_text_matching expression="pSQKQEEENPAEETGEEK.*6.962256.*6.908828.*6.814580.*6.865411.*6.908828.*7.088909" /> | |
160 | |
161 <has_text text="pSQKQEEENPAEETGEEK" /> | |
162 </assert_contents> | |
163 </output> | |
164 </test> | |
165 <test> | |
166 <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/> | |
167 <param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/> | |
168 <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/> | |
169 <param name="intensity_column_regex" value="^Intensity[^_]"/> | |
170 <param name="imputation_method" value="mean"/> | |
171 <param name="sample_names_regex" value="\.\d+[A-Z]$"/> | |
172 <param name="sample_grouping_regex" value="\d+"/> | |
173 <output name="imputed_data_file"> | |
174 <assert_contents> | |
175 <has_text text="Phosphopeptide" /> | |
176 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" /> | |
177 <!-- missing missing observd missing observd observd --> | |
178 <has_text_matching expression="pSQKQEEENPAEETGEEK.*6721601.6721601.8765300.6721601.2355900.14706000" /> | |
179 | |
180 </assert_contents> | |
181 </output> | |
182 <output name="imp_qn_lt_file"> | |
183 <assert_contents> | |
184 <has_text text="Phosphopeptide" /> | |
185 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" /> | |
186 <!-- missing missing observed missing observed observed --> | |
187 <has_text_matching expression="pSQKQEEENPAEETGEEK.*6.839850.*6.797424.*6.797424.*6.797424.*6.896609.*7.092451" /> | |
188 </assert_contents> | |
189 </output> | |
190 </test> | |
191 <test> | |
192 <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/> | |
193 <param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/> | |
194 <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/> | |
195 <param name="intensity_column_regex" value="^Intensity[^_]"/> | |
196 <param name="imputation_method" value="group-median"/> | |
197 <param name="sample_names_regex" value="\.\d+[A-Z]$"/> | |
198 <param name="sample_grouping_regex" value="\d+"/> | |
199 <output name="imputed_data_file"> | |
200 <assert_contents> | |
201 <has_text text="Phosphopeptide" /> | |
202 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" /> | |
203 <!-- missing missing observd missing observd observd --> | |
204 <has_text_matching expression="pSQKQEEENPAEETGEEK.*8765300.8765300.8765300.5886074.2355900.14706000" /> | |
205 | |
206 </assert_contents> | |
207 </output> | |
208 <output name="imp_qn_lt_file"> | |
209 <assert_contents> | |
210 <has_text text="Phosphopeptide" /> | |
211 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" /> | |
212 <!-- missing missing observed missing observed observed --> | |
213 <has_text_matching expression="pSQKQEEENPAEETGEEK.*6.946112.*6.888985.*6.792137.*6.792137.*6.888985.*7.089555" /> | |
214 </assert_contents> | |
215 </output> | |
216 </test> | |
217 <test> | |
218 <param name="input_file" ftype="tabular" value="test_input_for_anova.tabular"/> | |
219 <param name="preproc_sqlite" ftype="sqlite" value="test_input_for_anova.sqlite"/> | |
220 <param name="alpha_file" ftype="tabular" value="alpha_levels.tabular"/> | |
221 <param name="intensity_column_regex" value="^Intensity[^_]"/> | |
222 <param name="imputation_method" value="random"/> | |
223 <param name="meanPercentile" value="1" /> | |
224 <param name="sdPercentile" value="1.0" /> | |
225 <param name="sample_names_regex" value="\.\d+[A-Z]$"/> | |
226 <param name="sample_grouping_regex" value="\d+"/> | |
227 <output name="imputed_data_file"> | |
228 <assert_contents> | |
229 <has_text text="Phosphopeptide" /> | |
230 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" /> | |
231 <!-- observd observd observd --> | |
232 <has_text_matching expression="pSQKQEEENPAEETGEEK.*8765300.*2355900.*4706000" /> | |
233 | |
234 </assert_contents> | |
235 </output> | |
236 <output name="imp_qn_lt_file"> | |
237 <assert_contents> | |
238 <has_text text="Phosphopeptide" /> | |
239 <has_text text="AAAITDMADLEELSRLpSPLPPGpSPGSAAR" /> | |
240 <has_text text="5.409549" /> <!-- log-transformed value for pTYVDPFTpYEDPNQAVR .1B --> | |
241 <has_text text="6.464714" /> <!-- log-transformed value for pSQKQEEENPAEETGEEK .2A --> | |
242 </assert_contents> | |
243 </output> | |
244 </test> | |
245 </tests> | |
246 <help><![CDATA[ | |
247 ==================================================== | |
248 Phopsphoproteomic Enrichment Pipeline ANOVA and KSEA | |
249 ==================================================== | |
250 | |
251 **Input files** | |
252 | |
253 ``Filtered Phosphopeptide Intensities`` | |
254 Phosphopeptides annotated with SwissProt and phosphosite metadata (in tabular format). | |
255 This is the output from the "Phopsphoproteomic Enrichment Pipeline Merge and Filter" | |
256 (``mqppep_mrgflt``) tool. | |
257 | |
258 ``ANOVA alpha cutoff level`` | |
259 List of alpha cutoff values for significance testing; text file having one column and no header. For example: | |
260 | |
261 :: | |
262 | |
263 0.2 | |
264 0.1 | |
265 0.05 | |
266 | |
267 **Input parameters** | |
268 | |
269 ``Intensity-column pattern`` | |
270 First column of ``input_file`` having intensity values (integer or PERL-compatible regular expression matching column label). Default: **Intensity** | |
271 | |
272 ``Imputation method`` | |
273 Impute missing values by: | |
274 | |
275 1. ``group-median`` - use median for each sample-group; | |
276 2. ``mean`` - use mean across all samples; or | |
277 3. ``median`` - use median across all samples; | |
278 4. ``random`` - use randomly generated values where: | |
279 | |
280 - ``Mean percentile for random values`` specifies the percentile among non-missing values to be used as mean of random values, and | |
281 - ``Percentile std. dev. for random values`` specifies the factor to be multiplied by the standard deviation among the non-missing values (across all samples) to determine the standard deviation of random values. | |
282 | |
283 ``Sample-extraction pattern`` | |
284 PERL-compatible regular expression extracting the sample-name from the the name of a column of instensities (from ``input_file``) for one sample. | |
285 | |
286 - For example, ``"\.\d+[A-Z]$"`` applied to ``Intensity.splunge.10A`` would produce ``.10A`` | |
287 - Note that *this is case sensitive* by default. | |
288 | |
289 ``Group-extraction pattern`` | |
290 PERL-compatible regular expression extracting the sample-grouping from the sample-name that was extracted with ``sample_names_regex`` from a column of intensites (from ``input_file``). | |
291 | |
292 - For example, ``"\d+$"`` applied to ``.10A`` would produce ``10`` | |
293 - Note that *this is case sensitive* by default. | |
294 | |
295 ``KSEA threshold level`` | |
296 Specifies minimum FDR at which a kinase will be considered to be enriched; the default choice of 0.05 is arbitrary. | |
297 | |
298 **Outputs** | |
299 | |
300 ``imputed_intensities (input_file.imputation_method-imputed_intensities)`` | |
301 Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, in tabular format. | |
302 | |
303 ``imputed_QN_LT_intensities (input_file.imputation_method-imputed_QN_LT_intensities)`` | |
304 Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, quantile-normalized (**QN**), and log10-transformed (**LT**), in tabular format. | |
305 | |
306 ``report_file (input_file.imputation_method-imputed_report)`` | |
307 Summary report for normalization, imputation, and **ANOVA**, in PDF format. | |
308 | |
309 ``anova_ksea_metadata (input_file.imputation_method-imputed_anova_ksea_metadata)`` | |
310 Phosphopeptide metadata including ANOVA significance and KSEA enrichments. | |
311 | |
312 ``ksea_sqlite (input_file.imputation_method-imputed_ksea_sqlite)`` | |
313 SQLite database for ad-hoc report creation. | |
314 | |
315 **Algorithm** | |
316 | |
317 The KSEA algorithm used here is as in the KSEAapp package as reported in [Wiredja 2017]. | |
318 The code is adapted from "Danica D. Wiredja (2017). KSEAapp: Kinase-Substrate Enrichment Analysis. R package version 0.99.0." to work with output from the "MaxQuant Phosphopeptide Preprocessing" Galaxy tool. | |
319 | |
320 **Authors** | |
321 | |
322 ``Larry C. Cheng`` | |
323 (`ORCiD 0000-0002-6922-6433 <https://orcid.org/0000-0002-6922-6433>`_) wrote the original script. | |
324 | |
325 ``Arthur C. Eschenlauer`` | |
326 (`ORCiD 0000-0002-2882-0508 <https://orcid.org/0000-0002-2882-0508>`_) adapted the script to run in Galaxy. | |
327 | |
328 =================================== | |
329 PERL-compatible regular expressions | |
330 =================================== | |
331 | |
332 Note that the PERL-compatible regular expressions accepted by this tool are documented at http://rdrr.io/r/base/regex.html | |
333 | |
334 ]]></help> | |
335 <citations> | |
336 <!-- Cheng_2018 "Phosphopeptide Enrichment ..." PMID: 30124664 --> | |
337 <citation type="doi">10.3791/57996</citation> | |
338 <!-- Wiredja_2017 "The KSEA App ..." PMID: 28655153 --> | |
339 <citation type="doi">10.1093/bioinformatics/btx415</citation> | |
340 </citations> | |
341 </tool> |