# HG changeset patch # User galaxyp # Date 1626616388 0 # Node ID b8590dea5e2dc7ba13caf4525da14abc2ff250d7 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteomiqon_psmstatistics commit 7e7a2e71b21a1c92ae0c79cc4e25a7c2a5f0c4f1" diff -r 000000000000 -r b8590dea5e2d proteomiqon_psmstatistics.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/proteomiqon_psmstatistics.xml Sun Jul 18 13:53:08 2021 +0000 @@ -0,0 +1,175 @@ + + + utilizes semi supervised machine learning techniques to integrate search engine scores as well as the mentioned quality scores into one single consensus score. + + + 0.0.6 + + + proteomiqon-psmstatistics + + > '$out_paramfile' && + #end if + ln -s '$psm' '${basename}.psm' && + ln -s '$out_qpsm' '${basename}.qpsm' && + proteomiqon-psmstatistics -i './${basename}.psm' -d '$peptideDB' -p '$paramfile' -o ./ + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + outputParamfile + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `_ and `peptide spectrum matches `_. + +An established method to identify acquired MS/MS spectra is the comparison of each spectrum with peptides in a reference database. + +To measure the similarity of in silico generated spectra and measured MS/MS scans we use our own implementations of three established search enginge scores: SEQUEST, Andromeda and XTandem. +Additionally, we also record quality control parameters such as the mass difference between the precursor ion and the theoretically calulated mass or the uniquness of each score in comparison to 'competing' peptides within the search space. +The PSMStatistics tool utilizes semi supervised machine learning techniques to integrate search engine scores as well as the mentioned quality scores into one single consensus score. + +.. image:: SemiSupervisedScoring.png + :width: 768pt + :height: 345pt + +Since the search space is extended by so called decoys - reversed counterparts of peptides within the search space - we can estimate the distribution of 'true negatives' and calculate local (PEP values) and global (Q values) false discovery rates at each consensus score. +The reported peptides at user defined local and global FDR cutoffs can then be used as inputs for any downstream analysis be it ProteinInference or PSMBasedQuantification. + +Further Reading +--------------- +Additional information about the tool can be found in the `documentation `_. + ]]> + + diff -r 000000000000 -r b8590dea5e2d static/images/SemiSupervisedScoring.png Binary file static/images/SemiSupervisedScoring.png has changed diff -r 000000000000 -r b8590dea5e2d test-data/result_1.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_1.json Sun Jul 18 13:53:08 2021 +0000 @@ -0,0 +1,24 @@ + + + { + "Threshold": + { + "Case":"Estimate", + "Fields": + [ + { + "QValueThreshold" : 0.01, + "PepValueThreshold" : 0.05, + "MaxIterations" : 15, + "MinimumIncreaseBetweenIterations" : 0.005, + "PepValueFittingMethod" : { + "Case":"LinearSpline" + } + } + ] + }, + "ParseProteinIDRegexPattern":"id", + "KeepTemporaryFiles":false + } + + \ No newline at end of file diff -r 000000000000 -r b8590dea5e2d test-data/result_2.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_2.json Sun Jul 18 13:53:08 2021 +0000 @@ -0,0 +1,24 @@ + + + { + "Threshold": + { + "Case":"Estimate", + "Fields": + [ + { + "QValueThreshold" : 0.01, + "PepValueThreshold" : 0.05, + "MaxIterations" : 15, + "MinimumIncreaseBetweenIterations" : 0.005, + "PepValueFittingMethod" : { + "Case":"LogisticRegressionLogit" + } + } + ] + }, + "ParseProteinIDRegexPattern":"id", + "KeepTemporaryFiles":false + } + + \ No newline at end of file diff -r 000000000000 -r b8590dea5e2d test-data/result_3.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_3.json Sun Jul 18 13:53:08 2021 +0000 @@ -0,0 +1,19 @@ + + + { + "Threshold": + { + "Case":"Fixed", + "Fields": + [ + { + "SequestLike":5.0, + "Andromeda":40.0 + } + ] + }, + "ParseProteinIDRegexPattern":"id", + "KeepTemporaryFiles":false + } + + \ No newline at end of file diff -r 000000000000 -r b8590dea5e2d test-data/sample.db Binary file test-data/sample.db has changed diff -r 000000000000 -r b8590dea5e2d test-data/sample.psm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample.psm Sun Jul 18 13:53:08 2021 +0000 @@ -0,0 +1,7 @@ +PSMId GlobalMod PepSequenceID ModSequenceID Label ScanNr ScanTime Charge PrecursorMZ TheoMass AbsDeltaMass PeptideLength MissCleavages SequestScore SequestNormDeltaBestToRest SequestNormDeltaNext AndroScore AndroNormDeltaBestToRest AndroNormDeltaNext XtandemScore XtandemNormDeltaBestToRest XtandemNormDeltaNext StringSequence +sample=1-period=1-cycle=2033-experiment=4_0_2_0 1 4 8 -1 0 32.9949 2 383.2268582 764.4508194 0.01165603632 7 -1 2.780764976 0.6779896705 0.0 26.3150871 0.7657267969 0.0 20.14279733 0.5594493789 0.0 ILVGDIK +sample=1-period=1-cycle=2033-experiment=4_0_2_0 1 4 8 1 0 32.9949 2 383.2268582 764.4508194 0.01165603632 7 -1 8.635639049 0.0 0.6779896705 112.3264921 0.0 0.7657267969 45.72186797 0.0 0.5594493789 ILVGDIK +sample=1-period=1-cycle=2043-experiment=5_2_2_0 0 22 87 -1 2 33.36088333 2 399.242789 796.4806883 0.009663316474 7 -1 1.412423849 0.8192994029 0.0 0.0 1.0 0.0 7.323219744 0.7506817907 0.0 ALEVIPR +sample=1-period=1-cycle=2043-experiment=5_2_2_0 0 22 87 1 2 33.36088333 2 399.242789 796.4806883 0.009663316474 7 -1 7.816376209 0.0 0.8192994029 74.99645487 0.0 1.0 29.37298389 0.0 0.7506817907 ALEVIPR +sample=1-period=1-cycle=2038-experiment=7_1_2_0 1 22 88 -1 1 33.18005 2 404.2288055 806.4510372 0.007979227388 7 -1 1.589227453 0.7970537368 0.0 11.5896755 0.8973792939 0.0 18.85459139 0.6070363597 0.0 ALEVIPR +sample=1-period=1-cycle=2038-experiment=7_1_2_0 1 22 88 1 1 33.18005 2 404.2288055 806.4510372 0.007979227388 7 -1 7.830779576 0.0 0.7970537368 112.937008 0.0 0.8973792939 47.98049859 0.0 0.6070363597 ALEVIPR