changeset 1:460edeedeb7d draft default tip

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pmd_fdr commit d87c44ec8fd5bc03d7bb06f65a1a6ec247daee36"
author galaxyp
date Thu, 10 Oct 2019 17:29:37 -0400
parents 5cc0c32d05a2
children
files PMD_FDR_package_for_Galaxy.R pmd_fdr.xml
diffstat 2 files changed, 143 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/PMD_FDR_package_for_Galaxy.R	Mon Oct 07 11:59:37 2019 -0400
+++ b/PMD_FDR_package_for_Galaxy.R	Thu Oct 10 17:29:37 2019 -0400
@@ -2945,10 +2945,9 @@
     local_add_argument("--output_g_fdr"        , default = ""                  , help="full name and path to the g-FDR output file ")
     local_add_argument("--output_densities"    , default = ""                  , help="full name and path to the densities output file ")
     #local_add_argument("--score_field_name"    , default = ""                  , help="name of score field (in R format)")
-    local_add_argument("--input_file_type"     , default = "PMD_FDR_input_file", help="type of input file (currently supports: PSM_Report)")
+    local_add_argument("--input_file_type"     , default = "PMD_FDR_input_file", help="type of input file (currently supports: PMD_FDR_file_type, PSM_Report, MaxQuant_Evidence)")
   }
-)
-###############################################################################
+)###############################################################################
 #            Class: Data_Object_Parser
 ###############################################################################
 Data_Object_Parser <- setRefClass("Data_Object_Parser", 
--- a/pmd_fdr.xml	Mon Oct 07 11:59:37 2019 -0400
+++ b/pmd_fdr.xml	Thu Oct 10 17:29:37 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="pmd_fdr" name="PMD FDR" version="0.1.0">
+<tool id="pmd_fdr" name="PMD FDR" version="1.4.0">
     <description>recalculate FDR fom precursor mass discrepancy</description>
     <requirements>
         <requirement type="package" version="3.5.1">r-base</requirement>
@@ -24,6 +24,7 @@
         <param argument="--input_file_type" type="select" label="Input file type">
             <option value="PSM_Report" selected="true">PeptideShaker PSM_Report</option>
             <option value="PMD_FDR_input_file">PMD_FDR_input_file</option>
+            <option value="MaxQuant_Evidence">MaxQuant_Evidence</option>
         </param>
     </inputs>
     <outputs>
@@ -40,31 +41,151 @@
         </test>
     </tests>
     <help><![CDATA[
-Computes individual and global False Discovery Rate (FDR) using Precursor Mass Discrepancy (PMD) from a Peptide Spectrum Match (PSM) report.  
+=======
+PMD FDR
+=======
+PMD FDR calculates individual and global False Discovery Rate (FDR) using Precursor Mass Discrepancy (PMD) from a Peptide Spectrum Match (PSM) report.  ee the `PMD-FDR-for-Galaxy-P home page <https://github.com/slhubler/PMD-FDR-for-Galaxy-P/blob/master/README.md#pmd-fdr-for-galaxy-p>`_ for details.
+
+----------
+**Inputs**
+----------
+
+  **Primary Input file** is a PSM report, 3 formats are currently accepted:
+
+    * PeptideShaker PSM report (--input_file_type PSM_Report)
+      This format is the output of PeptideShaker. We expect it to be a tab-delimited file with the first row being the column labels. 
+      The following fields are required for a file to be correctly processed:
 
-The PSM report must be from PeptideShaker (--input_file_type PSM_Report), 
-or otherwise (--input_file_type PMD_FDR_input_file) have columns with header names:
+        - Confidence [%]
+        - Precursor m/z Error [ppm]
+        - Spectrum File
+        - Protein(s)
+        - Spectrum Title
+        - Sequence
+        - Decoy
+
+    * Maxquant evidence.txt (--input_file_type MaxQuant_Evidence)
+      The following fields are required for correct processing (others can exist):
+
+        - PEP
+        - Mass error [ppm]
+        - Proteins
+        - Retention time
+        - Sequence
+        - Reverse
 
-  - PMD_FDR_input_score
-  - PMD_FDR_pmd
-  - PMD_FDR_spectrum_file
-  - PMD_FDR_proteins
-  - PMD_FDR_spectrum_title
-  - PMD_FDR_sequence
-  - PMD_FDR_decoy
+    * Custom generated (--input_file_type PMD_FDR_input_file)
+      having columns with header names:
+
+        - PMD_FDR_input_score
+        - PMD_FDR_pmd
+        - PMD_FDR_spectrum_file
+        - PMD_FDR_proteins
+        - PMD_FDR_spectrum_title
+        - PMD_FDR_sequence
+        - PMD_FDR_decoy
+
+  **Optional PSM 1% file**
+
+    At present, this only supports PSM_Report format. This is simply the PSM_Report from PeptideShaker using a 1% FDR. This file is matched against the Primary Input file, using the spectrum file and title to match records. If they agree, the record (in the original input) is marked by setting is_one_percent_FDR to TRUE
+
+
+-----------
+**Outputs**
+-----------
+
+  Score ranges are used in all three files. An example score range: 060_099_ge_lt 
+
+    The structure of score range field is *aaa_bbb_cc_dd* where:
+
+      * *aaa* is the lower bound of the score range
+      * *bbb* is the upper bound of the score range
+      * *cc* and *dd* describe the lower bound and upper bound comparison operator:
+
+        - eq - "equal to"
+        - ge - "greater than or equal to"
+        - gt - "greater than"
+        - le - "less than or equal to"
+        - lt - "less than"
+
 
 
-PMD_FDR_package_for_Galaxy.R
+  **output_densities**
+    File contains a normalized version of the density function applied to up to 13 subsets of the data. All but "x" refers to the subsetting variable. As such, each column, except x, should sum to 1:
+
+     - x *center of a range of normalized PMD interval*
+     - t *(estimated) relative abundance of True Hits*
+     - f *(estimated) relative abundance of False Hits*
+     - aaa_bbb_cc_dd *relative abundance of score range; see above for definition of score*
+     - decoy *relative abundance of decoys (superset of f)*
+
+    Example:
+
+    ::
+
+       x       t       f       000_060_ge_lt   060_099_ge_lt   099_100_ge_lt   100_100_eq_eq   decoy
+       -16.9133785470534       4.16622917525113e-18    8.93652665157669e-05    0.000309818912433651    0       3.74804907374386e-18    4.16622917525113e-18    8.93652665157669e-05
+       -16.8519045481908       4.23773371666637e-18    9.74050812738847e-05    0.000317480777369488    0       5.16725222326666e-18    4.23773371666637e-18    9.74050812738847e-05
+       ...
 
-usage: PMD_FDR_package_for_Galaxy.R [--] [--help] [--opts OPTS] [--psm_report PSM_REPORT] [--psm_report_1_percent PSM_REPORT_1_PERCENT] [--output_i_fdr OUTPUT_I_FDR] [--output_g_fdr OUTPUT_G_FDR] [--output_densities OUTPUT_DENSITIES] [--score_field_name SCORE_FIELD_NAME] [--input_file_type INPUT_FILE_TYPE]
+  **output_g_fdr**
+
+    File contains the groupwise FDR (gFDR) for each score range:
+
+      - *group_of_interest* name of score group
+      - *alpha* gFDR
+
+      *Alpha hould be a number between 0 and 1 However, it is generated after excluding the training data and based on the resulting (random) peak height of each distribution. This means that the gFDR can be greater than 1 in practice.*
+
+    Example::
+
+        group_of_interest       alpha
+        000_060_ge_lt   0.953202547009365
+        060_099_ge_lt   0.477777669534645
+        099_100_ge_lt   0.381269718674806
+        100_100_eq_eq   0
+        decoy   1
+
 
-arguments:
-  -p, --psm_report PSM_REPORT            full name and path to the PSM report
-  --psm_report_1_percent PSM_REPORT_1_PERCENT            full name and path to the PSM report for 1% FDR [default: ]
-  -i, --input_file_type INPUT_FILE_TYPE            type of input file (currently supports: PSM_Report) [default: PMD_FDR_input_file]
-  -o, --output_i_fdr OUTPUT_I_FDR            full name and path to the i-FDR output file  [default: ]
-  --output_g_fdr OUTPUT_G_FDR            full name and path to the g-FDR output file  [default: ]
-  --output_densities OUTPUT_DENSITIES            full name and path to the densities output file  [default: ]
+  **output_i_fdr**
+
+    This file contains the individual FDR (iFDR) for each PSM, with supporting evidence:
+
+      - PMD_FDR_spectrum_title *Unique identifier concatenating PMD_FDR_spectrum_file and PMD_FDR_spectrum_index*
+      - value *Identical to PMD_FDR_pmd. Implemented this way to allow future alterations that would use input variables other than PMD*
+      - PMD_FDR_decoy *Input variable - 1 for decoy, 0 for other*
+      - median_of_group_index *median PMD for good-training records with the same group_index as the current record*
+      - value_norm *normalized value (value minus median_of_group_index)*
+      - used_to_find_middle *logical variable reflecting the following statement: was this record used to identify the median_of_group_index? (These records MUST be excluded in any summary statistics.)*
+      - PMD_FDR_input_score *The score used to separate data*
+      - PMD_FDR_pmd *Precursor Mass Discrepancy*
+      - PMD_FDR_peptide_length *Peptide length of identified peptide*
+      - PMD_FDR_spectrum_file *Name of file containing spectrum*
+      - PMD_FDR_spectrum_index *Spectrum number within file*
+      - PMD_FDR_proteins *Protein name*
+      - group_input_score *Grouping by score*
+      - group_pmd *Grouping by PMD (approx 20 groups)*
+      - group_peptide_length *Grouping by peptide length*
+      - group_training_class *Grouping by Training class, (see notes)*
+      - group_proteins *Grouping by Protein groups (see notes)*
+      - group_spectrum_file *Same as PMD_FDR_spectrum_file*
+      - group_spectrum_index *Contiguous groups of spectra (see notes)*
+      - group_proteins *Grouping by species (however, see notes)*
+      - group_decoy_input_score *decoy version of group_input_score*
+      - group_decoy_pmd *decoy version of group_pmd*
+      - group_decoy_peptide_length *decoy version of group_peptide_length*
+      - group_decoy_spectrum_file *decoy version of group_spectrum_file*
+      - group_decoy_spectrum_index *decoy version of group_spectrum_index*
+      - group_decoy_proteins *decoy version of group_proteins*
+      - is_in_1percent *PSM in 1% FDR file (if it exists)*
+      - value_of_interest *Defunct column (used during processing)*
+      - group_of_interest *Defunct column (used during processing)*
+      - interpolated_groupwise_FDR *estimated gFDR, interpolated from gFDR derived from group_decoy_input_score*
+      - t *density of t at PMD of record*
+      - f *density of f at PMD of record*
+      - alpha *same as interpolated_groupwise_FDR*
+      - i_fdr *iFDR (alphaf / (alphaf + (1-alpha)*t))*
+
 
     ]]></help>
 </tool>