# HG changeset patch # User galaxyp # Date 1627306466 0 # Node ID 8e4fb95a319a090699a069981a1a024d2734f861 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteomiqon_proteininference commit 1b4c15d5c84c890663475a22cc6ff71bbc9aa90c" diff -r 000000000000 -r 8e4fb95a319a proteomiqon_proteininference.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/proteomiqon_proteininference.xml Mon Jul 26 13:34:26 2021 +0000 @@ -0,0 +1,269 @@ + + + uses identified peptides to infere proteins explaining their presence in the sample. + + + 0.0.7 + + + proteomiqon-proteininference + + > '$out_paramfile' && + #end if + proteomiqon-proteininference -i './' -d '$peptideDB' -p '$paramfile' -o './out' + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + outputParamfile + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `_ and `peptide spectrum matches `_ which `passed fdr thresholds `_. + +MS-based shotgun proteomics estimates protein abundances using a proxy: peptides. The process of 'Protein Inference' is concerned with the mapping of identified peptides to the proteins they putatively originated from. This process is not as straightforward as one might think at a first glance on the subject, since the peptide-to-protein mapping is not necessarily a one-to-one relationship but in many cases a one-to-many relationship. This is due to the fact that many proteins share peptides with an identical sequence, e.g. two proteins originating from two different splice variants of the same gene. + +One way to cope with this problem is to introduce the concept of protein groups, which allow us to report the aggregation of all peptides which map to all isoforms of a gene independently from the peptides mapping uniquely to a single isoform. +While this approach has its merits it leaves room for fine tuning when implemented. +Lets say we have two proteins pA and pB which were both discovered by one peptide uniquely mapping to each of them and additionally by a third peptide, which maps to both of them: How do we report our findings? +We could report both proteins seperately and as a protein group, we could only report the protein group, or we could report both proteins but not the protein group. +A problem of comparable complexity occurs when we think about peptides when calculating the abundances for the proteingroup pA;pB. +Do we use the peptides only once, or do we also use the peptides mapping uniquely to protein pA and pB? +Fortunately, the tool ProteinInference gives you the possibility to choose any of the described scenarios by tuning the parameters described below. +The following scheme gives an overview how parameter settings influence inferred protein groups: + +.. image:: $PATH_TO_IMAGES/ProteinInference.png + :width: 1048pt + :height: 358pt + +Moreover, we report each protein group with a so called 'Peptide evidence class'. This metric gives an indication how pure the peptide composition of a protein group is and lets us differentiate between protein groups that consist of isoforms of a splice variant or contain a rather arbitrary mix of proteins. +In order to determine these inter-protein relationships the user can optionally supply a gff3 file. + +Further Reading +--------------- +Additional information about the tool can be found in the `documentation `_. + ]]> + + \ No newline at end of file diff -r 000000000000 -r 8e4fb95a319a static/images/ProteinInference.png Binary file static/images/ProteinInference.png has changed diff -r 000000000000 -r 8e4fb95a319a test-data/result_1.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_1.json Mon Jul 26 13:34:26 2021 +0000 @@ -0,0 +1,17 @@ + + + { + "ProteinIdentifierRegex": "id", + "Protein": { + "Case": "Maximal" + }, + "Peptide": { + "Case": "Maximal" + }, + "GroupFiles": true, + "GetQValue": { + "Case": "Storey", + } + } + + \ No newline at end of file diff -r 000000000000 -r 8e4fb95a319a test-data/result_2.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_2.json Mon Jul 26 13:34:26 2021 +0000 @@ -0,0 +1,17 @@ + + + { + "ProteinIdentifierRegex": "id", + "Protein": { + "Case": "Minimal" + }, + "Peptide": { + "Case": "MaximalInverse" + }, + "GroupFiles": true, + "GetQValue": { + "Case": "NoQValue", + } + } + + \ No newline at end of file diff -r 000000000000 -r 8e4fb95a319a test-data/result_3.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_3.json Mon Jul 26 13:34:26 2021 +0000 @@ -0,0 +1,22 @@ + + + { + "ProteinIdentifierRegex": "id", + "Protein": { + "Case": "Minimal" + }, + "Peptide": { + "Case": "MaximalInverse" + }, + "GroupFiles": true, + "GetQValue": { + "Case": "LogisticRegression", + "Fields": [ + { + "Case": "Conservative" + } + ] + } + } + + \ No newline at end of file diff -r 000000000000 -r 8e4fb95a319a test-data/result_4.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_4.json Mon Jul 26 13:34:26 2021 +0000 @@ -0,0 +1,22 @@ + + + { + "ProteinIdentifierRegex": "id", + "Protein": { + "Case": "Minimal" + }, + "Peptide": { + "Case": "MaximalInverse" + }, + "GroupFiles": true, + "GetQValue": { + "Case": "LogisticRegression", + "Fields": [ + { + "Case": "MAYU" + } + ] + } + } + + \ No newline at end of file diff -r 000000000000 -r 8e4fb95a319a test-data/result_5.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_5.json Mon Jul 26 13:34:26 2021 +0000 @@ -0,0 +1,22 @@ + + + { + "ProteinIdentifierRegex": "id", + "Protein": { + "Case": "Minimal" + }, + "Peptide": { + "Case": "MaximalInverse" + }, + "GroupFiles": true, + "GetQValue": { + "Case": "LogisticRegression", + "Fields": [ + { + "Case": "DecoyTargetRatio" + } + ] + } + } + + \ No newline at end of file diff -r 000000000000 -r 8e4fb95a319a test-data/result_6.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_6.json Mon Jul 26 13:34:26 2021 +0000 @@ -0,0 +1,17 @@ + + + { + "ProteinIdentifierRegex": "id", + "Protein": { + "Case": "Maximal" + }, + "Peptide": { + "Case": "Maximal" + }, + "GroupFiles": true, + "GetQValue": { + "Case": "NoQValue", + } + } + + \ No newline at end of file diff -r 000000000000 -r 8e4fb95a319a test-data/result_7.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_7.json Mon Jul 26 13:34:26 2021 +0000 @@ -0,0 +1,17 @@ + + + { + "ProteinIdentifierRegex": "id", + "Protein": { + "Case": "Maximal" + }, + "Peptide": { + "Case": "Maximal" + }, + "GroupFiles": false, + "GetQValue": { + "Case": "NoQValue", + } + } + + \ No newline at end of file diff -r 000000000000 -r 8e4fb95a319a test-data/sample.db Binary file test-data/sample.db has changed diff -r 000000000000 -r 8e4fb95a319a test-data/sample_1.qpsm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_1.qpsm Mon Jul 26 13:34:26 2021 +0000 @@ -0,0 +1,4 @@ +PSMId GlobalMod PepSequenceID ModSequenceID Label ScanNr ScanTime Charge PrecursorMZ TheoMass AbsDeltaMass PeptideLength MissCleavages SequestScore SequestNormDeltaBestToRest SequestNormDeltaNext AndroScore AndroNormDeltaBestToRest AndroNormDeltaNext XtandemScore XtandemNormDeltaBestToRest XtandemNormDeltaNext ModelScore QValue PEPValue StringSequence ProteinNames +sample=1 period=1 cycle=2033 experiment=4 1 4 8 1 0 32.9949 2 383.2268582 764.4508194 0.01165603632 7 0 8.635639049 0 0.6779896705 112.3264921 0 0.7657267969 45.72186797 0 0.5594493789 NaN NaN NaN ILVGDIK Cre02.g143307.t1.1 +sample=1 period=1 cycle=2043 experiment=5 0 22 87 1 2 33.36088333 2 399.242789 796.4806883 0.009663316474 7 0 7.816376209 0 0.8192994029 74.99645487 0 1 29.37298389 0 0.7506817907 NaN NaN NaN ALEVIPR Cre01.g026550.t1.1 +sample=1 period=1 cycle=2038 experiment=7 1 22 88 1 1 33.18005 2 404.2288055 806.4510372 0.007979227388 7 0 7.830779576 0 0.7970537368 112.937008 0 0.8973792939 47.98049859 0 0.6070363597 NaN NaN NaN ALEVIPR Cre01.g026550.t1.1 diff -r 000000000000 -r 8e4fb95a319a test-data/sample_2.qpsm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_2.qpsm Mon Jul 26 13:34:26 2021 +0000 @@ -0,0 +1,4 @@ +PSMId GlobalMod PepSequenceID ModSequenceID Label ScanNr ScanTime Charge PrecursorMZ TheoMass AbsDeltaMass PeptideLength MissCleavages SequestScore SequestNormDeltaBestToRest SequestNormDeltaNext AndroScore AndroNormDeltaBestToRest AndroNormDeltaNext XtandemScore XtandemNormDeltaBestToRest XtandemNormDeltaNext ModelScore QValue PEPValue StringSequence ProteinNames +sample=1 period=1 cycle=2033 experiment=4 1 4 8 1 0 32.9949 2 383.2268582 764.4508194 0.01165603632 7 0 8.635639049 0 0.6779896705 112.3264921 0 0.7657267969 45.72186797 0 0.5594493789 NaN NaN NaN ILVGDIK Cre02.g143307.t1.1 +sample=1 period=1 cycle=2043 experiment=5 0 22 87 1 2 33.36088333 2 399.242789 796.4806883 0.009663316474 7 0 7.816376209 0 0.8192994029 74.99645487 0 1 29.37298389 0 0.7506817907 NaN NaN NaN ALEVIPR Cre01.g026550.t1.1 +sample=1 period=1 cycle=2038 experiment=7 1 22 88 1 1 33.18005 2 404.2288055 806.4510372 0.007979227388 7 0 7.830779576 0 0.7970537368 112.937008 0 0.8973792939 47.98049859 0 0.6070363597 NaN NaN NaN ALEVIPR Cre01.g026550.t1.1 diff -r 000000000000 -r 8e4fb95a319a test-data/sample_3.qpsm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_3.qpsm Mon Jul 26 13:34:26 2021 +0000 @@ -0,0 +1,4 @@ +PSMId GlobalMod PepSequenceID ModSequenceID Label ScanNr ScanTime Charge PrecursorMZ TheoMass AbsDeltaMass PeptideLength MissCleavages SequestScore SequestNormDeltaBestToRest SequestNormDeltaNext AndroScore AndroNormDeltaBestToRest AndroNormDeltaNext XtandemScore XtandemNormDeltaBestToRest XtandemNormDeltaNext ModelScore QValue PEPValue StringSequence ProteinNames +sample=1 period=1 cycle=2033 experiment=4 1 4 8 1 0 32.9949 2 383.2268582 764.4508194 0.01165603632 7 0 8.635639049 0 0.6779896705 112.3264921 0 0.7657267969 45.72186797 0 0.5594493789 NaN NaN NaN ILVGDIK Cre02.g143307.t1.1 +sample=1 period=1 cycle=2043 experiment=5 0 22 87 1 2 33.36088333 2 399.242789 796.4806883 0.009663316474 7 0 7.816376209 0 0.8192994029 74.99645487 0 1 29.37298389 0 0.7506817907 NaN NaN NaN ALEVIPR Cre01.g026550.t1.1 +sample=1 period=1 cycle=2038 experiment=7 1 22 88 1 1 33.18005 2 404.2288055 806.4510372 0.007979227388 7 0 7.830779576 0 0.7970537368 112.937008 0 0.8973792939 47.98049859 0 0.6070363597 NaN NaN NaN ALEVIPR Cre01.g026550.t1.1