comparison mqppep_preproc.xml @ 0:8dfd5d2b5903 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
author galaxyp
date Mon, 11 Jul 2022 19:22:54 +0000
parents
children b76c75521d91
comparison
equal deleted inserted replaced
-1:000000000000 0:8dfd5d2b5903
1 <tool
2 id="mqppep_preproc"
3 name="MaxQuant Phosphopeptide Preprocessing"
4 version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
5 profile="21.05"
6 >
7 <description>
8 Prep phosphoproteomic MaxQuant output for statistical anlaysis.
9 </description>
10 <macros>
11 <import>macros.xml</import>
12 </macros>
13 <edam_topics>
14 <edam_topic>topic_0121</edam_topic><!-- Proteomics -->
15 <edam_topic>topic_3520</edam_topic><!-- Proteomics experiment-->
16 </edam_topics>
17 <edam_operations>
18 <edam_operation>operation_0338</edam_operation><!-- Sequence database search -->
19 <edam_operation>operation_0361</edam_operation><!-- Sequence annotation -->
20 <edam_operation>operation_3434</edam_operation><!-- Conversion -->
21 <edam_operation>operation_3436</edam_operation><!-- Aggregation -->
22 </edam_operations>
23 <expand macro="requirements"/>
24 <command detect_errors="exit_code"><![CDATA[
25 echo '--- localization-filter step:'
26 && (
27 Rscript '$__tool_directory__/MaxQuantProcessingScript.R'
28 -i '$phosphoSites'
29 #if $pst_py_selector == "y"
30 --enriched Y
31 #else
32 --enriched ST
33 #end if
34 --phosphoCol '$phosphocol_script'
35 --startCol '$startcol_script'
36 --intervalCol $intervalCol
37 --localProbCutoff $localProbCutoff
38 --collapse_func $collapse_func
39 -o '$phosphoPepIntensities'
40 --locProbCutoffGraph $locProbCutoffGraph
41 --enrichGraph $enrichGraph
42 --locProbCutoffGraph_svg $locProbCutoffGraph_svg
43 --enrichGraph_svg $enrichGraph_svg
44 --filtered_data $filteredData_tabular
45 --quant_data $quantData_tabular
46 ) &&
47 echo '... end localization-filter.'
48 && (
49 echo '--- kinase-mapping step:'
50 ) && (
51 perl '$__tool_directory__/PhosphoPeptide_Upstream_Kinase_Mapping.pl'
52 -i '$phosphoPepIntensities'
53 -f '$protein_fasta'
54 -n '$networkin'
55 -m '$p_sty_motifs'
56 -p '$psp_kinase_substrate'
57 -r '$psp_regulatory_sites'
58 #if $pst_py_selector == "y"
59 -P y
60 #else
61 -P sty
62 #end if
63 -F $merge_function
64 -o '$mapped_phophopeptides'
65 -O '$melted_phophopeptide_map'
66 -D '$mqppep_output_sqlite'
67 -s '$species'
68 ) &&
69 echo '... end kinase-mapping.'
70 &&
71 echo '--- merge-and-filter step:'
72 && (
73 python '$__tool_directory__/mqppep_mrgfltr.py'
74 --phosphopeptides='$mapped_phophopeptides'
75 --ppep_mapping_db='$mqppep_output_sqlite'
76 --species='$species'
77 --mrgfltr_tab='$preproc_tab'
78 --mrgfltr_csv='$preproc_csv'
79 --mrgfltr_sqlite='$preproc_sqlite'
80 )
81 && echo '... end merge-and-filter.'
82 ]]></command>
83 <configfiles>
84 <configfile name="phosphocol_script">$phosphoCol
85 </configfile>
86 <configfile name="startcol_script">$startCol
87 </configfile>
88 </configfiles>
89 <inputs>
90 <param name="phosphoSites" type="data" format="tabular"
91 label="Phospho (STY)Sites.txt"
92 help="Tabular 'Phospho (STY)Sites.txt' produced by MaxQuant"
93 />
94 <param name="phosphoCol" type="text"
95 label="pattern for column 'Number of Phospho (STY)'"
96 help="PERL-compatible regular expression matching header of column having number of 'Phospho (STY)'"
97 value="^Number of Phospho [(]STY[)]$">
98 <sanitizer>
99 <valid initial="string.printable">
100 <remove value="&apos;"/>
101 </valid>
102 </sanitizer>
103 </param>
104 <param name="startCol" type="text"
105 label="pattern for first column of intensity values"
106 help="PERL-compatible regular expression matching column header having first sample intensity"
107 value="^Intensity[^_]">
108 <sanitizer>
109 <valid initial="string.printable">
110 <remove value="&apos;"/>
111 </valid>
112 </sanitizer>
113 </param>
114 <param name="intervalCol" type="integer" value="1" min="1"
115 label="Interval between the intensity column of samples"
116 help="E.g., 1 if subsequent column is next sample; 2 if next sample is two columns away, etc."/>
117 <param name="pst_py_selector" type="select"
118 label="Phosphopeptide enrichment type"
119 help="Were samples enriched for pS and pT, or were they enriched for pY instead?"
120 >
121 <option value="st" selected="true">pST</option>
122 <option value="y">pY</option>
123 </param>
124 <param name="collapse_func" type="select"
125 label="Intensity merge function"
126 help="When a peptide is multiply phosphorylated, how should intensities be merged? [default: sum]"
127 >
128 <option value="sum" selected="true">sum</option>
129 <option value="mean">average</option>
130 </param>
131 <param name="localProbCutoff" type="float" value="0.75" min="0" max="1.0"
132 label="Localization Probability Cutoff"
133 help="See help below for an explanation."
134 />
135 <param name="merge_function" type="select" label="intensity merge-function"
136 help="Specifies how intensities for identical phosphosites should be merged">
137 <option value="sum" selected="true">sum</option>
138 <option value="average">average</option>
139 </param>
140 <param name="protein_fasta" type="data" format="fasta" label="UniProtKB/SwissProt FASTA database"
141 help="Sequence database; supply the same FASTA file as you supplied to by MaxQuant"
142 />
143 <param name="networkin" type="data" format="tabular" label="NetworKIN file"
144 help="NetworKIN file; see help section below"/>
145 <param name="p_sty_motifs" type="data" format="tabular" label="pSTY_Motifs file"
146 help="pS/pT/pY phosphorylation site motifs; see help section below"/>
147 <param name="psp_kinase_substrate" type="data" format="tabular" label="PSP_Kinase_Substrate_Dataset"
148 help="'Kinase-substrate dataset'; see help section below"/>
149 <param name="psp_regulatory_sites" type="data" format="tabular" label="PSP_Regulatory_sites"
150 help="'Regulatory sites'; see help section below"/>
151 <param name="species"
152 type="text"
153 value = "human"
154 label="filter to limit PhosphoSitePlus records to indicated species"
155 help="(field may be empty) [default: human]. If you supply this parameter, use the species indentifier seen as a suffix in UniProtKB"
156 />
157 </inputs>
158 <outputs>
159 <!-- localization filter -->
160 <data name="phosphoPepIntensities" format="tabular" label="${phosphoSites.name}.ppep_intensities" />
161 <data name="enrichGraph" format="pdf" label="${phosphoSites.name}.enrichment.pdf" />
162 <data name="locProbCutoffGraph" format="pdf" label="${phosphoSites.name}.locProbCutoff.pdf" />
163 <data name="enrichGraph_svg" format="svg" label="${phosphoSites.name}.enrichment.svg" />
164 <data name="locProbCutoffGraph_svg" format="svg" label="${phosphoSites.name}.locProbCutoff.svg" />
165 <data name="filteredData_tabular" format="tabular" label="${phosphoSites.name}.filteredData" />
166 <data name="quantData_tabular" format="tabular" label="${phosphoSites.name}.quantData" />
167 <!-- upstream kinase mapping -->
168 <data name="mapped_phophopeptides" format="tabular" label="${phosphoSites.name}.ppep_intensities.ppep_map"/>
169 <data name="melted_phophopeptide_map" format="tabular" label="${phosphoSites.name}.ppep_intensities.melted"/>
170 <data name="mqppep_output_sqlite" format="sqlite" label="${phosphoSites.name}.ppep_intensities.ppep_mapping_sqlite"/>
171 <!-- merge and filter -->
172 <data name="preproc_tab" format="tabular" label="${phosphoSites.name}.ppep_intensities.ppep_map.preproc_tab" />
173 <data name="preproc_csv" format="csv" label="${phosphoSites.name}.ppep_intensities.ppep_map.preproc_csv" />
174 <data name="preproc_sqlite" format="sqlite" label="${phosphoSites.name}.ppep_intensities.ppep_map.preproc_sqlite" />
175 </outputs>
176 <tests>
177 <test>
178 <param name="phosphoSites" ftype="tabular" value="test_input_for_preproc.tabular" />
179 <param name="protein_fasta" ftype="fasta" value="test_swissprot.fasta" />
180 <param name="networkin" ftype="tabular" value="test_networkin.tabular" />
181 <param name="p_sty_motifs" ftype="tabular" value="pSTY_motifs.tabular" />
182 <param name="psp_kinase_substrate" ftype="tabular" value="test_kinase_substrate.tabular" />
183 <param name="psp_regulatory_sites" ftype="tabular" value="test_regulatory_sites.tabular" />
184 <param name="pst_py_selector" value="st"/>
185 <param name="merge_function" value="sum"/>
186
187 <param name="phosphoCol" value="^Number of Phospho [(][STY][STY]*[)]$"/>
188 <param name="startCol" value="^Intensity[^_]"/>
189 <param name="intervalCol" value="1"/>
190 <param name="collapse_func" value="sum"/>
191 <param name="localProbCutoff" value="0.75"/>
192 <param name="species" value="human"/>
193
194 <output name="phosphoPepIntensities">
195 <assert_contents>
196 <has_text text="Phosphopeptide" />
197 <has_line_matching expression="AAAITDMADLEELSRLpSPLPPGpSPGSAAR.5416400.7101800.385280000.208060000.41426000.352400000" />
198 <has_line_matching expression="pSQKQEEENPAEETGEEK.0.0.8765300.0.2355900.14706000" />
199 </assert_contents>
200 </output>
201
202 <output name="preproc_tab">
203 <assert_contents>
204 <has_text text="SSRP1_HUMAN FACT complex subunit SSRP1" />
205 <has_text text="AEBP2_HUMAN Isoform 2 of Zinc finger protein AEBP2" />
206 <has_text text="molecular association, regulation" />
207 <has_text text="cell cycle regulation" />
208 <has_text text="PPP2CA(INDUCES)" />
209 <has_text text="SNCA(DISRUPTS)" />
210 <has_text text="CDK7" />
211 <has_text text="CK1alpha" />
212 <has_text text="CK2alpha" />
213 <has_text text="DNAPK" />
214 <has_text text="HIPK2" />
215 <has_text text="IKKalpha" />
216 <has_text text="PKCalpha" />
217 <has_text text="PKCbeta" />
218 <has_text text="PKC" />
219 <has_text text="CK2a2" />
220 <has_text text="CK2alpha" />
221 <has_text text="Csnk2a1" />
222 </assert_contents>
223 </output>
224
225 <output name="melted_phophopeptide_map">
226 <assert_contents>
227 <has_text text="CDK7" />
228 <has_text text="CK1alpha" />
229 <has_text text="CK2alpha" />
230 <has_text text="DNAPK" />
231 <has_text text="HIPK2" />
232 <has_text text="IKKalpha" />
233 <has_text text="PKCalpha" />
234 <has_text text="PKCbeta" />
235 <has_text text="PKC" />
236 <has_text text="CK2a2" />
237 <has_text text="CK2alpha" />
238 <has_text text="Csnk2a1" />
239 </assert_contents>
240 </output>
241 </test>
242 <test>
243 <param name="phosphoSites" ftype="tabular" value="test_input_for_preproc.tabular" />
244 <param name="protein_fasta" ftype="fasta" value="test_swissprot.fasta" />
245 <param name="networkin" ftype="tabular" value="test_networkin.tabular" />
246 <param name="p_sty_motifs" ftype="tabular" value="pSTY_motifs.tabular" />
247 <param name="psp_kinase_substrate" ftype="tabular" value="test_kinase_substrate.tabular" />
248 <param name="psp_regulatory_sites" ftype="tabular" value="test_regulatory_sites.tabular" />
249 <param name="pst_py_selector" value="y"/>
250 <param name="merge_function" value="sum"/>
251
252 <param name="phosphoCol" value="^Number of Phospho [(][STY][STY]*[)]$"/>
253 <param name="startCol" value="^Intensity[^_]"/>
254 <param name="intervalCol" value="1"/>
255 <param name="collapse_func" value="sum"/>
256 <param name="localProbCutoff" value="0.75"/>
257 <param name="species" value="human"/>
258
259 <output name="phosphoPepIntensities">
260 <assert_contents>
261 <has_text text="Phosphopeptide" />
262 <has_text text="pTYVDPFTpYEDPNQAVR" />
263 </assert_contents>
264 </output>
265
266 <output name="preproc_tab">
267 <assert_contents>
268 <has_text text="pTYVDPFTpYEDPNQAVR" />
269 <has_text text="EEKHLNQGVRpTYVDPFTYEDP" />
270 <has_text text="GVRTYVDPFTpYEDPNQAVREF" />
271 <has_text text="HLNQGVRtYVDPFTY" />
272 <has_text text="TYVDPFTyEDPNQAV" />
273 <has_text text="EPHA4" />
274 <has_text text="pT595, pY602" />
275 <has_text text="pT544, pY551" />
276 <has_text text="P54764;" />
277 <has_text text="P54764-2" />
278 </assert_contents>
279 </output>
280
281 <output name="melted_phophopeptide_map">
282 <assert_contents>
283 <has_text text="EphA6" />
284 <has_text text="EPHA4" />
285 <has_text text="EphA4" />
286 </assert_contents>
287 </output>
288 </test>
289 </tests>
290 <help><![CDATA[
291 =========================================================
292 Phopsphoproteomic Enrichment Pipeline Preprocessing Steps
293 =========================================================
294
295 **Overview**
296
297 Prior to statistical analysis, it is necessary to perform
298 three steps to transform the MaxQuant output
299 for phosphoproteome-enriched samples.
300
301 **Workflow position**
302
303 ``upstream tool``
304 The input data file for this tool is the ``Phospho (STY)Sites.txt`` file that is produced:
305
306 - by the Galaxy "MaxQuant" (``maxquant``) tool
307 - or by the Galaxy "Maxquant (using mqpar.xml)" (``maxquant_mqpar``) tool
308 - or by the desktop version of MaxQuant.
309
310 ``downstream tool``
311 The "MaxQuant Phosphopeptide ANOVA" tool (``mqppep_anova``) consumes the ``merged/filtered`` output file ``preproc_tab`` that this tool produces.
312
313 ======================================================================
314 Phopsphoproteomic Enrichment Pipeline Localization-Probability Cut-Off
315 ======================================================================
316
317 This step applies a "localization-probability cut-off" for phosphopeptides for each phosphopeptide.
318 Higher values may reduce the number of peptides in the output.
319 The default value of 0.75 reflects the text of [Cheng 2018]:
320
321 "For phosphopeptide identification, a localization probability cutoff is applied. This filter is performed to select for phosphopeptides with a high confidence (i.e., greater than 0.75) in phosphoresidue identification [Hogrebe 2018; Olsen 2006]. In other words, the summed probability of all other residues that could potentially contain the phospho-group is less than 0.25. This cutoff could be raised to increase the stringency of the phosphopeptide selection. In regard to the number of identifications, the expected number of pY peptides is in the hundreds, while the expected number of pST peptides is in the high thousands. These values reflect previously observed phosphoproteome distribution where about 2%, 12%, and 86% of the phosphosites are pY, pT, and pS, respectively [Olsen 2006]."
322
323 This tool wraps an R script. written by Larry Cheng, that performs the following (in order):
324
325 1. Remove contaminant and reverse sequence rows
326 2. Filters rows based on localization probability
327 3. Extract the quantitative data
328 4. Inserts a "p" before the phosphorylated residue(s) in each peptide sequence
329 5. Merges (aggregating by "sum" or "average") multiply-phosphorylated peptides
330 6. Filters output phosphopeptides based on enrichment
331 7. Produces an output file (in tabular format) that contains the phosphopeptide (first column) and its (possibly merged) mass spectral intensity for each sample.
332
333 Note that the "ProTeomiX Quality Control Report"
334 [Bielow 2016] (available at `https://github.com/cbielow/PTXQC/
335 <https://github.com/cbielow/PTXQC/>`_) is run by the Galaxy wrappers for MaxQuant,
336 so it is omitted here even though it was included in Larry Cheng's original script.
337
338
339 **Input dataset**
340
341 ``phosphoSites``
342 This is the ``MaxQuant Phospho (STY)Sites.txt`` file produced by MaxQuant.
343 If you use the desktop version of MaxQuant, you will find this file in the ``txt`` folder.
344
345 **Output datasets**
346
347 ``ppep_intensities``
348 Data table (in tabular format) presenting, for each sample, the mass-spectral intensity of each phopshopeptide having localization probability greater than the cutoff.
349 ``enrichment.pdf``
350 Graph (in PDF format) presenting non-zero proportions of pS, pT, and pY among the phosphosites; note that a phosphopeptide may have multiple phosphosite.
351 ``locProbCutoff.pdf``
352 Graph (in PDF format) contrasting proportion of phosphopeptides above the localization probability cutoff with the proportion below.
353 ``enrichment.svg``
354 Enrichment graph (in downloadable "scalable vector graphics" format) for incorporation into documents.
355 ``locProbCutoff.svg``
356 Localization probability cutoff graph (in downloadable "scalable vector graphics" format) for incorporation into documents.
357 ``filteredData``
358 Data table (in tabular format) comprising rows of the ``phosphSites`` input file that are not flagged as contaminants or reversed sequences.
359 ``quantData``
360 Data table (in tabular format) comprising rows of the ``filteredData`` file whose localization probability exceeds the **Localization Probability Cutoff** parameter.
361
362 **Authors**
363
364 ``Nicholas A. Graham``
365 (`ORCiD 0000-0002-6811-1941 <https://orcid.org/0000-0002-6811-1941>`_) initiated the original script.
366
367 ``Larry C. Cheng``
368 (`ORCiD 0000-0002-6922-6433 <https://orcid.org/0000-0002-6922-6433>`_) updated the original script.
369
370 ``Arthur C. Eschenlauer``
371 (`ORCiD 0000-0002-2882-0508 <https://orcid.org/0000-0002-2882-0508>`_) adapted the script to run in Galaxy.
372
373 ``James E. Johnson``
374 (University of Minnesota Supercomputing Institute) adapted the script to run in Galaxy.
375
376
377 =============================================================
378 Phopsphoproteomic Enrichment Pipeline Upstream Kinase Mapping
379 =============================================================
380
381 This step searches phosphopeptides against several databases for known or predicted sites.
382
383 **Input databases**
384
385 ``networkin``
386 This table is the result of filtering the NetworkKIN database [Linding 2007; Horn 2014] for cutoff score > 2.0. The ENSEMBL data used to generate the file were from Ensembl, `ensembl.org <https://web.archive.org/web/20220308011159/http://useast.ensembl.org/index.html>`_ [Howe 2021].
387
388 *To generate this file:*
389
390 **(1)** Download the "precomputed data for all available kinase predictors against ENSEMBL"
391 (Available at the NetworkKIN predictions link on the downloads page at https://web.archive.org/web/20200208000403/http://networkin.info/download/networkin_human_predictions_3.1.tsv.xz; N.B.: "Commercial users are requested to contact the authors before using the data on the networkin.info website");
392
393 **(2)** Decompress the .tsv.xz with file with "unxz" (from XZ Utils `https://tukaani.org/xz/ <https://tukaani.org/xz/>`_);
394
395 **(3)** Filter out the rows having "network_kin" less than 2.0.
396
397 The result should be a tab-separated file with the following columns:
398
399 1. ``#substrate``
400 2. ``position``
401 3. ``id``
402 4. ``networkin_score``
403 5. ``tree``
404 6. ``netphorest_group``
405 7. ``netphorest_score``
406 8. ``string_identifier``
407 9. ``string_score``
408 10. ``substrate_name``
409 11. ``sequence``
410 12. ``string_path``
411
412
413 ``p_sty_motifs``
414 This database merges motif patterns from [Amanchy 2007] and Phosida [Gnad 2011].
415
416 The Amanchy data are adapted from `http://hprd.org/serine_motifs <http://hprd.org/serine_motifs>`_ and `http://hprd.org/tyrosine_motifs <http://hprd.org/tyrosine_motifs>`_ (both links cite the reference where each motif was published), and the patterns are translated into Perl regular expression format (`https://perldoc.perl.org/perlre <https://perldoc.perl.org/perlre>`_).
417
418 The Phosida data are adapted (translated to Perl-formatted regular expressions) from `http://pegasus.biochem.mpg.de/phosida/help/motifs.aspx <http://pegasus.biochem.mpg.de/phosida/help/motifs.aspx>`_ (this link cites the reference where each motif was published).
419
420 This file has three tab-separated columns (and no header):
421
422 1. column 1 is an (ignored) identifier
423 2. column 2 is a Perl regular expression
424 3. column 3 is a descriptor.
425
426 For two examples:
427
428 ``2<TAB>R.R..(pS|pT)<TAB>Akt kinase substrate motif (HPRD)``
429
430 ``10<TAB>R..(pS|pT)V<TAB>CAMK2_Phosida``
431
432 ``psp_kinase_substrate``
433 'Kinase-substrate dataset: experimentally determined substrates, sequences, cognate kinases, and metadata curated from the literature' [Hornbeck 2011]. This tabular-formatted file may be downloaded for non-commercial purposes as 'Kinase_Substrate_Dataset.gz' from `https://www.phosphosite.org/staticDownloads.action <https://www.phosphosite.org/staticDownloads.action>`_.
434
435 Data extracted from PhosphoSitePlus(R), created by Cell Signaling Technology Inc. PhosphoSitePlus is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (`https://creativecommons.org/licenses/by-nc-sa/3.0/ <https://creativecommons.org/licenses/by-nc-sa/3.0/>`_). Attribution must be given in written, oral and digital presentations to PhosphoSitePlus, www.phosphosite.org. Written documents should additionally cite:
436
437 Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M (2012) PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. Nucleic Acids Res. 40, D261-D270.; www.phosphosite.org.
438
439 ``psp_regulatory_sites``
440 'Regulatory sites: information curated from the literature about modification sites shown to regulate molecular functions, biological processes, and molecular interactions including protein-protein interactions' [Hornbeck 2011]. This tabular-formatted file may be downloaded for non-commercial purposes as 'Regulatory_sites.gz' from `https://www.phosphosite.org/staticDownloads.action <https://www.phosphosite.org/staticDownloads.action>`_.
441
442 Terms of use and citatation are as for the ``psp_kinase_substrate`` file.
443
444 **Output datasets**
445
446 ``ppep_map``
447 Data table (in tabular format, consumed by the merge/filter step) presenting, for each phosphopeptide, the kinase mappings, the mass-spectral intensities for each sample, and the metadata from UniProtKB/SwissProt, phospho-sites, phospho-motifs, and regulatory sites. Data in the columns marked "``Domain``", "``ON_...``", or "``..._PhosphoSite``" are available subject to the following terms:
448
449 "PhosphoSitePlus\ |reg| (PSP) was created by Cell Signaling Technology Inc. It is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License(`https://creativecommons.org/licenses/by-nc-sa/3.0/ <https://creativecommons.org/licenses/by-nc-sa/3.0/>`_). When using PSP data or analyses in printed publications or in online resources, the following acknowledgements must be included: (a) the words 'PhosphoSitePlus(R), www.phosphosite.org' must be included at appropriate places in the text or webpage, and (b) citation of [Hornbeck 2011 (`PMID: 25514926 <https://pubmed.ncbi.nlm.nih.gov/25514926>`_)] must be included in the bibliography."
450
451
452 ``melted``
453 Data table (in tabular format) presenting, for each phosphopeptide, the gene and one of the phospho-motifs or kinase-substrate sites.
454
455 ``ppep_mapping_sqlite``
456 SQLite database (consumed by the merge/filter step).
457
458 **Authors**
459
460 ``Nicholas A. Graham``
461 (`ORCiD 0000-0002-6811-1941 <https://orcid.org/0000-0002-6811-1941>`_) wrote the original script.
462
463 ``Arthur C. Eschenlauer``
464 (`ORCiD 0000-0002-2882-0508 <https://orcid.org/0000-0002-2882-0508>`_) adapted the script to run in Galaxy.
465
466
467 ======================================================
468 Phopsphoproteomic Enrichment Pipeline Merge and Filter
469 ======================================================
470
471 This step merges mapped metadata into metadata for phosphopeptides, filtering by species.
472
473 **Input parameters**
474
475 ``species``
476 Limit PhosphoSitesPlus to indicated species. Default: **human**
477
478 **Output datasets**
479
480 ``preproc_tab``
481 Phosphopeptides annotated with SwissProt and phosphosite metadata, in tabular format. This file is designed to be consumed by the downstream ANOVA tool. Some data in the columns marked "PSP" are available subject to the following terms:
482
483 "PhosphoSitePlus\ |reg| (PSP) was created by Cell Signaling Technology Inc. It is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License(`https://creativecommons.org/licenses/by-nc-sa/3.0/ <https://creativecommons.org/licenses/by-nc-sa/3.0/>`_). When using PSP data or analyses in printed publications or in online resources, the following acknowledgements must be included: (a) the words 'PhosphoSitePlus(R), www.phosphosite.org' must be included at appropriate places in the text or webpage, and (b) citation of [Hornbeck 2011 (`PMID: 25514926 <https://pubmed.ncbi.nlm.nih.gov/25514926>`_)] must be included in the bibliography."
484
485 ``preproc_csv``
486 Phosphopeptides annotated with SwissProt and phosphosite metadata, in CSV format.
487
488 ``preproc_sqlite``
489 ``ppep_mapping_sqlite`` updated with annotations, in SQLite format.
490
491 **Authors**
492
493 ``Nicholas A. Graham``
494 (`ORCiD 0000-0002-6811-1941 <https://orcid.org/0000-0002-6811-1941>`_) initiated the original script.
495
496 ``Larry C. Cheng``
497 (`ORCiD 0000-0002-6922-6433 <https://orcid.org/0000-0002-6922-6433>`_) updated the original script.
498
499 ``Arthur C. Eschenlauer``
500 (`ORCiD 0000-0002-2882-0508 <https://orcid.org/0000-0002-2882-0508>`_) adapted the script to run in Galaxy.
501
502 .. |reg| unicode:: U+000AE .. REGISTERED SIGN
503 ]]></help>
504 <citations>
505 <!-- upstream kinase mapping -->
506 <!-- Amanchy, R., Periaswamy, B., Mathivanan, S. et al. A curated compendium of phosphorylation motifs. PMID: 17344875 -->
507 <citation type="doi">10.1038/nbt0307-285</citation>
508 <!-- Aken 2016 "The Ensembl gene annotation system." PMID: 33137190 -->
509 <citation type="doi">10.1093/database/baw093</citation>
510 <!-- localization filter -->
511 <!-- Bielow_2016 "Proteomics Quality Control: Quality Control Software for MaxQuant Results" PMID: 26653327 -->
512 <citation type="doi">10.1021/acs.jproteome.5b00780</citation>
513 <!-- all three -->
514 <!-- Cheng 2018 "Phosphopeptide Enrichment ..." PMID: 30124664 -->
515 <citation type="doi">10.3791/57996</citation>
516 <!-- localization and upstream kinase mapping -->
517 <!-- Cox 2014 "Accurate proteome-wide label-free quantification ..." PMID: 24942700 -->
518 <citation type="doi">10.1074/mcp.M113.031591</citation>
519 <!-- Cox 2008 "MaxQuant enables high peptide identification rates ..." PMID: 19029910 -->
520 <!-- upstream kinase mapping -->
521 <citation type="doi">10.1038/nbt.1511</citation>
522 <!-- Gnad 2011 "PHOSIDA 2011: the posttranslational modification database." PMID: 21081558 -->
523 <citation type="doi">10.1093/nar/gkq1159</citation>
524 <!-- localization filter -->
525 <!-- Hogrebe_2018 "Benchmarking common quantification strategies for large-scale phosphoproteomics" PMID: 29535314 -->
526 <citation type="doi">10.1038/s41467-018-03309-6</citation>
527 <!-- upstream kinase mapping -->
528 <!-- Horn 2014 "KinomeXplorer: an integrated platform for kinome biology studies." PMID: 24874572 -->
529 <citation type="doi">10.1038/nmeth.2968</citation>
530 <!-- upstream kinase mapping and merge and filter -->
531 <!-- Hornbeck 2012 "PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse." PMID: 22135298 -->
532 <citation type="doi">10.1093/nar/gkr1122</citation>
533 <!-- upstream kinase mapping -->
534 <!-- Linding 2007 "Systematic discovery of in vivo phosphorylation networks." PMID: 17570479 -->
535 <citation type="doi">10.1016/j.cell.2007.05.052</citation>
536 <!-- localization filter -->
537 <!-- Olsen_2006 "Global, in vivo, and site-specific phosphorylation dynamics in signaling networks" PMID: 17081983 -->
538 <citation type="doi">10.1016/j.cell.2006.09.026</citation>
539 </citations>
540 </tool>