Repository 'tpp_prophets'
hg clone https://toolshed.g2.bx.psu.edu/repos/iracooke/tpp_prophets

Changeset 14:d90c8bc10a9c (2015-03-26)
Previous changeset 13:b793fe628648 (2014-07-01) Next changeset 15:0746a2ae9e04 (2015-10-20)
Commit message:
Docker support and update for protk 1.4
modified:
interprophet.xml
peptide_prophet.xml
pepxml_to_table.xml
protein_prophet.xml
protxml_to_table.xml
added:
README.rst
repository_dependencies.xml
test-data/mr176-BSA100fmole_BA3_01_8168.d_tandem.pep.xml
test-data/mr176-BSA100fmole_BA3_01_8168.d_tandem_pproph_protproph.prot.xml
removed:
README.md
b
diff -r b793fe628648 -r d90c8bc10a9c README.md
--- a/README.md Tue Jul 01 11:16:41 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,8 +0,0 @@
-## What is it?
-Galaxy tool definition files and wrapper scripts for Peptide and Protein inference tools in the [Trans Proteomic Pipeline](http://tools.proteomecenter.org/wiki/index.php?title=Software:TPP) (Peptide Prophet, iProphet and Protein Prophet). 
-
-## Installation
-Install from the main galaxy toolshed at http://toolshed.g2.bx.psu.edu/
-
-All the tools depend on command-line scripts and databases available in the [protk ruby gem](https://bitbucket.org/iracooke/protk). 
-
b
diff -r b793fe628648 -r d90c8bc10a9c README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Thu Mar 26 19:55:19 2015 -0400
b
@@ -0,0 +1,37 @@
+TPP Prophets
+============
+
+Galaxy wrappers for Peptide Prophet, iProphet and Protein Prophet
+
+Requirements
+------------
+
+This package uses protk_ and the trans_proteomic_pipeline_ which need to be present in order for the tool to work.
+
+.. _protk: https://github.com/iracooke/protk
+.. _trans_proteomic_pipeline: http://tools.proteomecenter.org/wiki/index.php?title=Software:TPP
+
+There are two ways you can satify these dependencies (choose one):
+
+1. **Manual Install:** Details on how to install protk_ and the trans_proteomic_pipeline_ manually are available here_.
+
+2. **Use Docker:** These tools are designed to run inside a docker_ container. If your galaxy supports `running tools within a docker container`__ you don't need to worry about dependencies. Simply install and things should just work.  The docker container itself is versioned and new versions of this tool will automatically download an update to the container if needed.
+
+.. _docker: https://www.docker.com/
+.. _here: https://github.com/iracooke/protk/#galaxy-integration
+.. _container: https://wiki.galaxyproject.org/Admin/Tools/Docker
+__ container_
+
+
+Further Info
+------------
+
+The source code for this tool and other protk galaxy tools is on github_.  Please visit the github page to contribute to the project or to `report an issue`__ 
+
+.. _github: https://github.com/iracooke/protk-galaxytools
+.. _issue: https://github.com/iracooke/protk-galaxytools/issues
+__ issue_
+
+Please visit the TPP__ website for details of the various Prophet tools
+
+__ trans_proteomic_pipeline_
b
diff -r b793fe628648 -r d90c8bc10a9c interprophet.xml
--- a/interprophet.xml Tue Jul 01 11:16:41 2014 -0400
+++ b/interprophet.xml Thu Mar 26 19:55:19 2015 -0400
[
@@ -1,8 +1,9 @@
-<tool id="proteomics_search_interprophet_1" name="InterProphet" version="1.0.1">
+<tool id="proteomics_search_interprophet_1" name="InterProphet" version="1.1.0">
 
  <requirements>
-     <requirement type="package" version="1.3">protk</requirement>
-     <requirement type="package" version="4.6.3">trans_proteomic_pipeline</requirement>
+         <container type="docker">iracooke/protk-1.4.1</container>
+     <requirement type="package" version="1.4">protk</requirement>
+     <requirement type="package" version="4.8.0">trans_proteomic_pipeline</requirement>
    </requirements>
 
 
@@ -22,35 +23,34 @@
 
  $use_nsm 
 
- --minprob $minprob
+ --p-thresh $p_thresh
+
+ --threads $threads
 
- ## Inputs.
- ${first_input}
- #for $input_file in $input_files:
- ${input_file.additional_input}
- #end for  
+#for $pepxml_file in $pepxml_files:
+ ${pepxml_file}
+#end for
 
   </command>
 
+
   <inputs>
 
- <param name="first_input" type="data" format="peptideprophet_pepxml" label="Peptide Prophet Results" help="These files will typically be outputs from search tools that have subsequently been run through peptide prophet"/> 
-
- <repeat name="input_files" title="Additional PepXML Input Files">
- <param format="peptideprophet_pepxml" name="additional_input" type="data" label="PepXML produced by Peptide Prophet" help=""/>
- </repeat>
+ <param name="pepxml_files" multiple="true" type="data" format="peptideprophet_pepxml" label="Peptide Prophet Results" help="These files will typically be outputs from search tools that have subsequently been run through peptide prophet"/>
+
+ <param name="use_nss" checked="true" type="boolean" label="Include NSS in Model" help="Include NSS (Number of Sibling Searches) in Statistical Model" truevalue="" falsevalue="--no-nss"/>
+ <param name="use_nrs" checked="true" type="boolean" label="Include NRS in Model" help="Include NRS (Number of Replicate Spectra) in Statistical Model" truevalue="" falsevalue="--no-nrs"/>
+ <param name="use_nse" checked="true" type="boolean" label="Include NSE in Model" help="Include NSE (Number of Sibling Experiments) in Statistical Model" truevalue="" falsevalue="--no-nse"/>
+ <param name="use_nsi" checked="true" type="boolean" label="Include NSI in Model" help="Include NSI (Number of Sibling Ions) in Statistical Model" truevalue="" falsevalue="--no-nsi"/>
+ <param name="use_nsm" checked="true" type="boolean" label="Include NSM in Model" help="Include NSM (Number of Sibling Modifications) in Statistical Model" truevalue="" falsevalue="--no-nsm"/>
 
- <param name="use_nss" checked="true" type="boolean" label="Include NSS in Model" help="Include NSS (Number of Sibling Searches) in Statistical Model" truevalue="blank" falsevalue="--no-nss"/>
- <param name="use_nrs" checked="true" type="boolean" label="Include NRS in Model" help="Include NRS (Number of Replicate Spectra) in Statistical Model" truevalue="blank" falsevalue="--no-nrs"/>
- <param name="use_nse" checked="true" type="boolean" label="Include NSE in Model" help="Include NSE (Number of Sibling Experiments) in Statistical Model" truevalue="blank" falsevalue="--no-nse"/>
- <param name="use_nsi" checked="true" type="boolean" label="Include NSI in Model" help="Include NSI (Number of Sibling Ions) in Statistical Model" truevalue="blank" falsevalue="--no-nsi"/>
- <param name="use_nsm" checked="true" type="boolean" label="Include NSM in Model" help="Include NSM (Number of Sibling Modifications) in Statistical Model" truevalue="blank" falsevalue="--no-nsm"/>
-
- <param name="minprob" type="text" label="Minimum threshod probability for reporting results"/>
+    <param name="p_thresh" help="Peptides scoring less than this value are discarded" type="float" value="0.05" min="0" max="1" label="Probability Threshold"/>
+
+ <param name="threads" type="integer" value="1" min="0" label="Threads" help="Number of threads to use"/>
 
   </inputs>
   <outputs>
-    <data format="interprophet_pepxml" name="output" metadata_source="first_input" label="interprophet.${first_input.display_name}" from_work_dir="interprophet_output.pep.xml"/>
+    <data format="interprophet_pepxml" name="output" label="interprophet.${pepxml_files[0].display_name}" from_work_dir="interprophet_output.pep.xml"/>
   </outputs>
 
  <help>
b
diff -r b793fe628648 -r d90c8bc10a9c peptide_prophet.xml
--- a/peptide_prophet.xml Tue Jul 01 11:16:41 2014 -0400
+++ b/peptide_prophet.xml Thu Mar 26 19:55:19 2015 -0400
b
@@ -1,7 +1,8 @@
-<tool id="proteomics_search_peptide_prophet_1" name="Peptide Prophet" version="1.0.1">
+<tool id="proteomics_search_peptide_prophet_1" name="Peptide Prophet" version="1.1.0">
     <requirements>
-        <requirement type="package" version="1.3">protk</requirement>
-        <requirement type="package" version="4.6.3">trans_proteomic_pipeline</requirement>
+        <container type="docker">iracooke/protk-1.4.1</container>
+        <requirement type="package" version="1.4">protk</requirement>
+        <requirement type="package" version="4.8.0">trans_proteomic_pipeline</requirement>
    </requirements>
    
  <description>Calculate Peptide Prophet statistics on search results</description>
@@ -23,6 +24,16 @@
         $force_fit 
         $allow_alt_instruments 
         $maldi
+        $usedecoys
+        --decoy-prefix $decoy_prefix_string
+
+#if $experiment_label
+        --experiment-label $experiment_label
+#end if
+
+        --p-thresh $p_thresh
+
+        --threads $threads
 
  </command>
 
@@ -44,18 +55,23 @@
  <param name="force_fit" type="boolean" label="Force fitting" help="Bypasses automatic mixture model checks and forces fitting of a mixture model" truevalue="--force-fit" falsevalue=""/>
  <param name="allow_alt_instruments" type="boolean" label="Allow multiple instrument types" help="Warning instead of exit with error if instrument types between runs is different" truevalue="--allow-alt-instruments" falsevalue=""/>
  <param name="maldi" type="boolean" label="Maldi data" truevalue="-l" falsevalue=""/>
-
+    <param name="usedecoys" type="boolean" label="Use decoys to pin down the negative distribution" truevalue="" falsevalue="--no-decoy"/>
+    <param name="decoy_prefix_string" help="Prefix string for decoy ids" type="text" value="decoy_" label="Decoy Prefix String" size="20"/>
+    <param name="experiment_label" help="Used to commonly label all spectra from one experiment" type="text" value="" label="Experiment Label" size="20"/>
+    <param name="p_thresh" help="Peptides scoring less than this value are discarded" type="float" value="0.05" min="0" max="1" label="Probability Threshold"/>
+    <param name="threads" type="integer" value="1" min="0" label="Threads" help="Number of threads to use"/>
 
   </inputs>
   <outputs>
     <data format="peptideprophet_pepxml" name="output" metadata_source="input_file" label="peptide_prophet.${input_file.display_name}.pep.xml" from_work_dir="peptide_prophet_output.pep.xml"/>
   </outputs>
 
+
 <help>
 
 **What it does**
 
-Given raw search engine scores as inputs this tool estimates the accuracy of peptide assignments.  From a practical perspective it estimates the probability that each peptide assignment is correct (providing probabilities as outputs), given raw scores (possibly on some arbitrary scale) as inputs. 
+Given raw search engine scores as inputs this tool estimates the accuracy of peptide assignments.
 
 ----
 
b
diff -r b793fe628648 -r d90c8bc10a9c pepxml_to_table.xml
--- a/pepxml_to_table.xml Tue Jul 01 11:16:41 2014 -0400
+++ b/pepxml_to_table.xml Thu Mar 26 19:55:19 2015 -0400
b
@@ -1,7 +1,8 @@
-<tool id="pepxml_to_table_1" name="PepXML to Table" version="1.0.1">
+<tool id="pepxml_to_table_1" name="PepXML to Table" version="1.1.0">
 
  <requirements>
-     <requirement type="package" version="1.3">protk</requirement>
+            <container type="docker">iracooke/protk-1.4.1</container>
+     <requirement type="package" version="1.4">protk</requirement>
    </requirements>
 
 
@@ -10,18 +11,33 @@
 
 
 <!-- Note .. the input file is assumed to be the first argument -->
-<command>pepxml_to_table.rb $input_file -o $output</command>
+<command>pepxml_to_table.rb $input_file -o $output $invert_probs</command>
 
 
 <inputs>
 
  <param name="input_file" type="data" format="pepxml,raw_pepxml,peptideprophet_pepxml,interprophet_pepxml"  multiple="false" label="Input File" help="A pepXML file"/>
+    <param name="invert_probs" type="boolean" label="Print inverted probabilities (ie 1-p instead of p)" truevalue="--invert-probabilities" falsevalue=""/>
 
 </inputs>
 <outputs>
  <data format="csv" name="output" metadata_source="input_file" label="${input_file.display_name}.csv" />
 </outputs>
 
+
+ <tests>
+   <!-- Just test that the tool runs and produces vaguely correct output -->
+   <test>
+       <param name="input_file" value="mr176-BSA100fmole_BA3_01_8168.d_tandem.pep.xml" format="raw_pepxml"/>
+       <output name="output" format="csv">
+           <assert_contents>
+               <has_text text="ANTNNYAPKSSR" />
+           </assert_contents>
+       </output>
+   </test>
+ </tests>
+
+
 <help>
  Convert a pepXML file to Tab delimited text
 </help>
b
diff -r b793fe628648 -r d90c8bc10a9c protein_prophet.xml
--- a/protein_prophet.xml Tue Jul 01 11:16:41 2014 -0400
+++ b/protein_prophet.xml Thu Mar 26 19:55:19 2015 -0400
b
@@ -1,7 +1,8 @@
-<tool id="proteomics_search_protein_prophet_1" name="Protein Prophet" version="1.0.1">
+<tool id="proteomics_search_protein_prophet_1" name="Protein Prophet" version="1.1.0">
  <requirements>
-     <requirement type="package" version="1.3">protk</requirement>
-     <requirement type="package" version="4.6.3">trans_proteomic_pipeline</requirement>
+            <container type="docker">iracooke/protk-1.4.1</container>
+     <requirement type="package" version="1.4">protk</requirement>
+     <requirement type="package" version="4.8.0">trans_proteomic_pipeline</requirement>
    </requirements>
 
   <description>Calculate Protein Prophet statistics on search results</description>
@@ -35,7 +36,6 @@
 
     <param name="input_file" type="data" format="peptideprophet_pepxml,interprophet_pepxml" multiple="false" label="Peptide Prophet Results" help="These files will typically be outputs from peptide prophet or interprophet"/>
 
-
  <param name="iproph" selected="true" type="boolean" label="Inputs are from iProphet" truevalue="--iprophet-input" falsevalue=""/>
  <param name="nooccam" type="boolean" label="Don't apply Occam's razor" help="When selected no attempt will be made to derive the simplest protein list explaining observed peptides" truevalue="--no-occam" falsevalue=""/>
  <param name="groupwts" type="boolean" label="Use group weights" help="Check peptide's total weight (rather than actual weight) in the Protein Group against the threshold" truevalue="--group-wts" falsevalue=""/>
b
diff -r b793fe628648 -r d90c8bc10a9c protxml_to_table.xml
--- a/protxml_to_table.xml Tue Jul 01 11:16:41 2014 -0400
+++ b/protxml_to_table.xml Thu Mar 26 19:55:19 2015 -0400
b
@@ -1,12 +1,14 @@
-<tool id="protxml_to_table_1" name="ProtXML to Table" version="1.0.1">
+<tool id="protxml_to_table_1" name="ProtXML to Table" version="1.1.0">
  <requirements>
-     <requirement type="package" version="1.3">protk</requirement>
+            <container type="docker">iracooke/protk-1.4.1</container>
+     <requirement type="package" version="1.4">protk</requirement>
    </requirements>
 
  <description>Converts a ProtXML file to a table</description>
 
  <command>
  protxml_to_table.rb 
+    $invert_probs
 
  $input_file 
  -o $output 
@@ -15,7 +17,7 @@
  <inputs>
 
  <param format="protxml" name="input_file" type="data" label="ProtXML File to Convert"/>
-
+     <param name="invert_probs" type="boolean" label="Print inverted probabilities (ie 1-p instead of p)" truevalue="--invert-probabilities" falsevalue=""/>
  </inputs>
 
 
@@ -23,6 +25,18 @@
  <data format="tabular" name="output" />
  </outputs>
 
+ <tests>
+   <!-- Just test that the tool runs and produces vaguely correct output -->
+   <test>
+       <param name="input_file" value="mr176-BSA100fmole_BA3_01_8168.d_tandem_pproph_protproph.prot.xml" format="protxml"/>
+       <output name="output" format="tabular">
+           <assert_contents>
+               <has_text text="AVQKYLTAHEQSK" />
+           </assert_contents>
+       </output>
+   </test>
+ </tests>
+
 
   <help>
 
b
diff -r b793fe628648 -r d90c8bc10a9c repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Thu Mar 26 19:55:19 2015 -0400
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="Proteomics datatypes">
+    <repository changeset_revision="ac51d9dbfb4d" name="proteomics_datatypes" owner="iracooke" toolshed="https://toolshed.g2.bx.psu.edu" />
+ </repositories>
b
diff -r b793fe628648 -r d90c8bc10a9c test-data/mr176-BSA100fmole_BA3_01_8168.d_tandem.pep.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mr176-BSA100fmole_BA3_01_8168.d_tandem.pep.xml Thu Mar 26 19:55:19 2015 -0400
[
b'@@ -0,0 +1,1651 @@\n+<?xml version="1.0" encoding="UTF-8"?>\n+<?xml-stylesheet type="text/xsl" href="pepXML_std.xsl"?>\n+<msms_pipeline_analysis date="2014-06-22T13:57:05" summary_xml="/Users/icooke/Desktop/iptest/mr176-BSA100fmole_BA3_01_8168.d_tandem.pep.xml" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/pepXML/pepXML_v117.xsd">\n+   <msms_run_summary base_name="mr176-BSA100fmole_BA3_01_8168.d_tandem" search_engine="X! Tandem" raw_data_type="raw" raw_data=".?">\n+      <sample_enzyme name="trypsin">\n+<specificity cut="KR" no_cut="P" sense="C"/>\n+</sample_enzyme>\n+<search_summary base_name="mr176-BSA100fmole_BA3_01_8168.d_tandem" search_engine="X! Tandem" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">\n+         <search_database local_path="/Users/icooke/Desktop/iptest/AASequences.fasta" type="AA"/>\n+         <enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="1" />\n+         <aminoacid_modification aminoacid="E" massdiff="-18.0106" mass="111.0320" variable="Y" symbol="^" /><!--X! Tandem n-terminal AA variable modification-->\n+         <aminoacid_modification aminoacid="M" massdiff="15.9949" mass="147.0354" variable="Y" />\n+         <aminoacid_modification aminoacid="Q" massdiff="-17.0265" mass="111.0321" variable="Y" symbol="^" /><!--X! Tandem n-terminal AA variable modification-->\n+         <terminal_modification terminus="n" massdiff="42.0106" mass="43.0184" protein_terminus="N" variable="Y" symbol="^" /><!--X! Tandem n-terminal AA variable modification-->\n+         \n+         <!-- Input parameters -->\n+         <parameter name="list path, default parameters" value="/Users/icooke/.rvm/gems/ruby-2.0.0-p353/gems/protk-1.2.6.pre6/lib/protk/data/tandem_isb_native_defaults.xml"/>\n+         <parameter name="list path, taxonomy information" value="/Users/icooke/Desktop/iptest/mr176-BSA100fmole_BA3_01_8168.d_tandem.taxonomy.xml"/>\n+         <parameter name="output, histogram column width" value="30"/>\n+         <parameter name="output, histograms" value="no"/>\n+         <parameter name="output, log path" value=""/>\n+         <parameter name="output, maximum valid expectation value" value="0.1"/>\n+         <parameter name="output, message" value="1234567890"/>\n+         <parameter name="output, one sequence copy" value="no"/>\n+         <parameter name="output, parameters" value="yes"/>\n+         <parameter name="output, path" value="/Users/icooke/Desktop/iptest/mr176-BSA100fmole_BA3_01_8168.d_tandem"/>\n+         <parameter name="output, path hashing" value="no"/>\n+         <parameter name="output, performance" value="yes"/>\n+         <parameter name="output, proteins" value="yes"/>\n+         <parameter name="output, results" value="all"/>\n+         <parameter name="output, sequence path" value=""/>\n+         <parameter name="output, sequences" value="no"/>\n+         <parameter name="output, sort results by" value="spectrum"/>\n+         <parameter name="output, spectra" value="no"/>\n+         <parameter name="output, xsl path" value="tandem-style.xsl"/>\n+         <parameter name="protein, C-terminal residue modification mass" value=""/>\n+         <parameter name="protein, N-terminal residue modification mass" value=""/>\n+         <parameter name="protein, cleavage C-terminal mass change" value=""/>\n+         <parameter name="protein, cleavage N-terminal mass change" value=""/>\n+         <parameter name="protein, cleavage semi" value="yes"/>\n+         <parameter name="protein, cleavage site" value="[RK]|{P}"/>\n+         <parameter name="protein, homolog management" value="no"/>\n+         <parameter name="protein, modified residue mass file" value=""/>\n+         <parameter name="protein, quick acetyl" value="yes"/>\n+         <parameter name="protein, taxon" value="AASequences.fasta"/>\n+         <parameter name="refine" value="no"/>\n+  '..b'         <search_score name="expect" value="1.9"/>\n+         </search_hit>\n+      </search_result>\n+      </spectrum_query>\n+      <spectrum_query spectrum="mr176-BSA100fmole_BA3_01_8168.d.mgf.00096.00096.1" start_scan="96" end_scan="96" precursor_neutral_mass="996.6218" assumed_charge="1" index="95">\n+      <search_result>\n+         <search_hit hit_rank="1" peptide="VLMFCELP" peptide_prev_aa="R" peptide_next_aa="P" protein="tr|O70238|O70238_MOUSE" protein_descr="Homeobox protein PSX OS=Mus musculus GN=Rhox6 PE=2 SV=2" num_tot_proteins="1" num_matched_ions="4" tot_num_ions="14" calc_neutral_pep_mass="948.4447" massdiff="48.177" num_tol_term="1" num_missed_cleavages="0" is_rejected="0">\n+            <modification_info>\n+               <mod_aminoacid_mass position="3" mass="147.0354" />\n+            </modification_info>\n+            <search_score name="hyperscore" value="13.2"/>\n+            <search_score name="nextscore" value="11.0"/>\n+            <search_score name="bscore" value="8.7"/>\n+            <search_score name="yscore" value="10.2"/>\n+            <search_score name="cscore" value="0"/>\n+            <search_score name="zscore" value="0"/>\n+            <search_score name="ascore" value="0"/>\n+            <search_score name="xscore" value="0"/>\n+            <search_score name="expect" value="3.9"/>\n+         </search_hit>\n+      </search_result>\n+      </spectrum_query>\n+      <spectrum_query spectrum="mr176-BSA100fmole_BA3_01_8168.d.mgf.00099.00099.1" start_scan="99" end_scan="99" precursor_neutral_mass="997.0408" assumed_charge="1" index="96">\n+      <search_result>\n+         <search_hit hit_rank="1" peptide="VLMFCELP" peptide_prev_aa="R" peptide_next_aa="P" protein="tr|O70238|O70238_MOUSE" protein_descr="Homeobox protein PSX OS=Mus musculus GN=Rhox6 PE=2 SV=2" num_tot_proteins="1" num_matched_ions="4" tot_num_ions="14" calc_neutral_pep_mass="948.4447" massdiff="48.596" num_tol_term="1" num_missed_cleavages="0" is_rejected="0">\n+            <modification_info>\n+               <mod_aminoacid_mass position="3" mass="147.0354" />\n+            </modification_info>\n+            <search_score name="hyperscore" value="13.0"/>\n+            <search_score name="nextscore" value="11.6"/>\n+            <search_score name="bscore" value="8.6"/>\n+            <search_score name="yscore" value="10"/>\n+            <search_score name="cscore" value="0"/>\n+            <search_score name="zscore" value="0"/>\n+            <search_score name="ascore" value="0"/>\n+            <search_score name="xscore" value="0"/>\n+            <search_score name="expect" value="4.7"/>\n+         </search_hit>\n+      </search_result>\n+      </spectrum_query>\n+      <spectrum_query spectrum="mr176-BSA100fmole_BA3_01_8168.d.mgf.00100.00100.1" start_scan="100" end_scan="100" precursor_neutral_mass="996.6215" assumed_charge="1" index="97">\n+      <search_result>\n+         <search_hit hit_rank="1" peptide="VLMFCELP" peptide_prev_aa="R" peptide_next_aa="P" protein="tr|O70238|O70238_MOUSE" protein_descr="Homeobox protein PSX OS=Mus musculus GN=Rhox6 PE=2 SV=2" num_tot_proteins="1" num_matched_ions="4" tot_num_ions="14" calc_neutral_pep_mass="948.4447" massdiff="48.177" num_tol_term="1" num_missed_cleavages="0" is_rejected="0">\n+            <modification_info>\n+               <mod_aminoacid_mass position="3" mass="147.0354" />\n+            </modification_info>\n+            <search_score name="hyperscore" value="13.1"/>\n+            <search_score name="nextscore" value="11.1"/>\n+            <search_score name="bscore" value="8.6"/>\n+            <search_score name="yscore" value="10"/>\n+            <search_score name="cscore" value="0"/>\n+            <search_score name="zscore" value="0"/>\n+            <search_score name="ascore" value="0"/>\n+            <search_score name="xscore" value="0"/>\n+            <search_score name="expect" value="4"/>\n+         </search_hit>\n+      </search_result>\n+      </spectrum_query>\n+   </msms_run_summary>\n+</msms_pipeline_analysis>\n'
b
diff -r b793fe628648 -r d90c8bc10a9c test-data/mr176-BSA100fmole_BA3_01_8168.d_tandem_pproph_protproph.prot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mr176-BSA100fmole_BA3_01_8168.d_tandem_pproph_protproph.prot.xml Thu Mar 26 19:55:19 2015 -0400
[
b'@@ -0,0 +1,262 @@\n+<?xml version="1.0" encoding="UTF-8"?>\n+<protein_summary xmlns="http://regis-web.systemsbiology.net/protXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/protXML/protXML_v6.xsd" summary_xml="/Users/icooke/Desktop/iptest/mr176-BSA100fmole_BA3_01_8168.d_tandem_pproph_protproph.prot.xml">\n+<protein_summary_header reference_database="/Users/icooke/Sources/protk/spec/data/AASequences.fasta" residue_substitution_list="I -> L" source_files="/Users/icooke/Desktop/iptest/mr176-BSA100fmole_BA3_01_8168.d_tandem_pproph.pep.xml" source_files_alt="/Users/icooke/Desktop/iptest/mr176-BSA100fmole_BA3_01_8168.d_tandem_pproph.pep.xml" min_peptide_probability="0.20" min_peptide_weight="0.50" num_predicted_correct_prots="14.1" num_input_1_spectra="0" num_input_2_spectra="34" num_input_3_spectra="0" num_input_4_spectra="0" num_input_5_spectra="0" initial_min_peptide_prob="0.05" total_no_spectrum_ids="21.1" sample_enzyme="trypsin">\n+<program_details analysis="proteinprophet" time="2014-06-24T07:06:03" version=" Insilicos_LabKey_C++ (TPP v4.7 POLAR VORTEX rev 0, Build 201405141111 (linux))">\n+<proteinprophet_details  occam_flag="Y" groups_flag="Y" degen_flag="Y" nsp_flag="Y" fpkm_flag="N" initial_peptide_wt_iters="1" nsp_distribution_iters="2" final_peptide_wt_iters="0" run_options="NOPLOT">\n+      <nsp_information neighboring_bin_smoothing="Y">\n+         <nsp_distribution bin_no="0" nsp_lower_bound_incl="0.00" nsp_upper_bound_excl="0.10" pos_freq="0.293" neg_freq="0.305" pos_to_neg_ratio="0.96"/>\n+         <nsp_distribution bin_no="1" nsp_lower_bound_incl="0.10" nsp_upper_bound_excl="0.25" pos_freq="0.108" neg_freq="0.108" pos_to_neg_ratio="0.99" alt_pos_to_neg_ratio="0.81"/>\n+         <nsp_distribution bin_no="2" nsp_lower_bound_incl="0.25" nsp_upper_bound_excl="0.50" pos_freq="0.081" neg_freq="0.070" pos_to_neg_ratio="1.16" alt_pos_to_neg_ratio="0.81"/>\n+         <nsp_distribution bin_no="3" nsp_lower_bound_incl="0.50" nsp_upper_bound_excl="1.00" pos_freq="0.138" neg_freq="0.118" pos_to_neg_ratio="1.17"/>\n+         <nsp_distribution bin_no="4" nsp_lower_bound_incl="1.00" nsp_upper_bound_excl="2.00" pos_freq="0.184" neg_freq="0.176" pos_to_neg_ratio="1.05" alt_pos_to_neg_ratio="1.17"/>\n+         <nsp_distribution bin_no="5" nsp_lower_bound_incl="2.00" nsp_upper_bound_excl="5.00" pos_freq="0.086" neg_freq="0.107" pos_to_neg_ratio="0.81" alt_pos_to_neg_ratio="1.17"/>\n+         <nsp_distribution bin_no="6" nsp_lower_bound_incl="5.00" nsp_upper_bound_excl="15.00" pos_freq="0.055" neg_freq="0.061" pos_to_neg_ratio="0.90" alt_pos_to_neg_ratio="1.17"/>\n+         <nsp_distribution bin_no="7" nsp_lower_bound_incl="15.00" nsp_upper_bound_excl="inf" pos_freq="0.054" neg_freq="0.054" pos_to_neg_ratio="1.00" alt_pos_to_neg_ratio="1.17"/>\n+      </nsp_information>\n+      <fpkm_information neighboring_bin_smoothing="Y">\n+         <fpkm_distribution bin_no="0" fpkm_lower_bound_excl="0.00" fpkm_upper_bound_incl="2.00" pos_freq="0.100" neg_freq="0.100" pos_to_neg_ratio="1.00"/>\n+         <fpkm_distribution bin_no="1" fpkm_lower_bound_excl="2.00" fpkm_upper_bound_incl="4.00" pos_freq="0.100" neg_freq="0.100" pos_to_neg_ratio="1.00" alt_pos_to_neg_ratio="1.00"/>\n+         <fpkm_distribution bin_no="2" fpkm_lower_bound_excl="4.00" fpkm_upper_bound_incl="6.00" pos_freq="0.100" neg_freq="0.100" pos_to_neg_ratio="1.00" alt_pos_to_neg_ratio="1.00"/>\n+         <fpkm_distribution bin_no="3" fpkm_lower_bound_excl="6.00" fpkm_upper_bound_incl="8.00" pos_freq="0.100" neg_freq="0.100" pos_to_neg_ratio="1.00" alt_pos_to_neg_ratio="1.00"/>\n+         <fpkm_distribution bin_no="4" fpkm_lower_bound_excl="8.00" fpkm_upper_bound_incl="10.00" pos_freq="0.100" neg_freq="0.100" pos_to_neg_ratio="1.00" alt_pos_to_neg_ratio="1.00"/>\n+         <fpkm_distribution bin_no="5" fpkm_lower_bound_excl="10.00" fpkm_upper_bound_incl="12.50" pos_freq="0.100" neg_freq="0.100" pos_to_neg_ratio="1.00" alt_pos_'..b'cription="Homeobox protein ceh-30 OS=Caenorhabditis elegans GN=ceh-30 PE=2 SV=2"/>\n+         <peptide peptide_sequence="RQATSGMDLLS" charge="2" initial_probability="0.1644" nsp_adjusted_probability="0.1591" fpkm_adjusted_probability="0.1591" weight="1.00" group_weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="1" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" max_fpkm="0.00" fpkm_bin="0" n_instances="1" exp_tot_instances="0.16" is_contributing_evidence="N" calc_neutral_pep_mass="1175.5607">\n+<modification_info modified_peptide="RQATSGM[147]DLLS">\n+<mod_aminoacid_mass position="7" mass="147.035400"/>\n+</modification_info>\n+         </peptide>\n+      </protein>\n+</protein_group>\n+<protein_group group_number="19" probability="0.0000">\n+      <protein protein_name="tr|O77024|O77024_EPHMU" n_indistinguishable_proteins="1" probability="0.0000" unique_stripped_peptides="ETEMEMK" group_sibling_id="a" total_number_peptides="0" confidence="0.0790">\n+         <parameter name="prot_length" value="166"/>\n+         <annotation protein_description="EmH-3 (Fragment) OS=Ephydatia muelleri GN=EmH-3 PE=3 SV=1"/>\n+         <peptide peptide_sequence="ETEMEMK" charge="2" initial_probability="0.0906" nsp_adjusted_probability="0.0874" fpkm_adjusted_probability="0.0874" weight="1.00" group_weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="1" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" max_fpkm="0.00" fpkm_bin="0" n_instances="1" exp_tot_instances="0.09" is_contributing_evidence="N" calc_neutral_pep_mass="892.3307">\n+<modification_info modified_peptide="E[111]TEM[147]EM[147]K">\n+<mod_aminoacid_mass position="1" mass="111.032000"/>\n+<mod_aminoacid_mass position="4" mass="147.035400"/>\n+<mod_aminoacid_mass position="6" mass="147.035400"/>\n+</modification_info>\n+         </peptide>\n+      </protein>\n+</protein_group>\n+<protein_group group_number="20" probability="0.0000">\n+      <protein protein_name="tr|Q23824|Q23824_HYDVD" n_indistinguishable_proteins="1" probability="0.0000" unique_stripped_peptides="SKEAEIEESVR" group_sibling_id="a" total_number_peptides="0" confidence="0.0908">\n+         <parameter name="prot_length" value="128"/>\n+         <annotation protein_description="Msh protein (Fragment) OS=Hydra viridissima GN=msh PE=2 SV=1"/>\n+         <peptide peptide_sequence="SKEAEIEESVR" charge="2" initial_probability="0.1145" nsp_adjusted_probability="0.1105" fpkm_adjusted_probability="0.1105" weight="1.00" group_weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" max_fpkm="0.00" fpkm_bin="0" n_instances="1" exp_tot_instances="0.11" is_contributing_evidence="N" calc_neutral_pep_mass="1257.6197">\n+         </peptide>\n+      </protein>\n+</protein_group>\n+<protein_group group_number="21" probability="0.0000">\n+      <protein protein_name="tr|Q9YH59|Q9YH59_CHICK" n_indistinguishable_proteins="1" probability="0.0000" unique_stripped_peptides="FMAPSSGMNMGGMGG" group_sibling_id="a" total_number_peptides="0" confidence="0.0611">\n+         <parameter name="prot_length" value="344"/>\n+         <annotation protein_description="Homeodomain protein NKx2.1 OS=Gallus gallus GN=NKx2.1 PE=2 SV=1"/>\n+         <peptide peptide_sequence="FMAPSSGMNMGGMGG" charge="2" initial_probability="0.0864" nsp_adjusted_probability="0.0833" fpkm_adjusted_probability="0.0833" weight="1.00" group_weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="1" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" max_fpkm="0.00" fpkm_bin="0" n_instances="1" exp_tot_instances="0.09" is_contributing_evidence="N" calc_neutral_pep_mass="1460.5197">\n+<modification_info modified_peptide="FM[147]APSSGMNM[147]GGM[147]GG">\n+<mod_aminoacid_mass position="2" mass="147.035400"/>\n+<mod_aminoacid_mass position="10" mass="147.035400"/>\n+<mod_aminoacid_mass position="13" mass="147.035400"/>\n+</modification_info>\n+         </peptide>\n+      </protein>\n+</protein_group>\n+</protein_summary>\n'