Repository 'idpqonvert'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/idpqonvert

Changeset 3:d1f7b2395bc2 (2016-12-15)
Previous changeset 2:e39c5664b04a (2014-09-30) Next changeset 4:055aaa665682 (2017-08-23)
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/bumbershoot/idpqonvert commit 0be0fd3f899ce64aa53e44117931fe1a7d8c52ab
modified:
README.md
added:
idpQonvertEmbedder.xml
idpqonvert.xml
macros.xml
test-data/201208-378803-cm.idpDB
test-data/201208-378803-embeddedGenes.idpDB
test-data/201208-378803-embeddedGenesAndQuantitation.idpDB
test-data/201208-378803-embeddedGenesAndScanTimes.idpDB
test-data/201208-378803-embeddedGenesAndSpectra.idpDB
test-data/201208-378803-embeddedGenesAndSpectra2.idpDB
test-data/201208-378803-msgf.idpDB
test-data/201208-378803-myrimatch.idpDB
test-data/input/201208-378803-cm.pep.xml
test-data/input/201208-378803-msgf.mzid
test-data/input/201208-378803-myrimatch.pepXML
test-data/input/201208-378803.idpDB
test-data/input/201208-378803.mzML
test-data/input/cow.protein.PRG2012-subset.fasta
removed:
test-data/.gitkeep
tool-data/.gitkeep
tools/idpqonvert.xml
tools/repository_dependencies.xml
tools/tool_dependencies.xml
b
diff -r e39c5664b04a -r d1f7b2395bc2 README.md
--- a/README.md Tue Sep 30 15:31:54 2014 -0400
+++ b/README.md Thu Dec 15 17:20:57 2016 -0500
[
@@ -1,7 +1,7 @@
 GalaxyP - idpQonvert
 ====================
 
-* Home: <https://bitbucket.org/galaxyp/idpqonvert>
+* Home: <https://github.com/galaxyproteomics/tools-galaxyp/>
 * Galaxy Tool Shed: <http://toolshed.g2.bx.psu.edu/view/galaxyp/idpqonvert>
 * Tool ID: `idpqonvert`
 
@@ -13,15 +13,15 @@
 
 See:
 
-* <http://fenchurch.mc.vanderbilt.edu/bumbershoot/idpicker/>
+* <http://fenchurch.mc.vanderbilt.edu/>
 
 
 GalaxyP Community
 -----------------
 
-Current governing community policies for [GalaxyP](https://bitbucket.org/galaxyp/) and other information can be found at:
+Current governing community policies for [GalaxyP](https://github.com/galaxyproteomics/) and other information can be found at:
 
-<https://bitbucket.org/galaxyp/galaxyp>
+<https://github.com/galaxyproteomics>
 
 
 License
@@ -39,7 +39,7 @@
 Contributing
 ------------
 
-Contributions to this repository are reviewed through pull requests. If you would like your work acknowledged, please also add yourself to the Authors section. If your pull request is accepted, you will also be acknowledged in <https://bitbucket.org/galaxyp/galaxyp/CONTRIBUTORS.md> unless you opt-out.
+Contributions to this repository are reviewed through pull requests. If you would like your work acknowledged, please also add yourself to the Authors section. If your pull request is accepted, you will also be acknowledged in <https://github.com/galaxyproteomics/tools-galaxyp/>
 
 
 Authors
@@ -47,5 +47,8 @@
 
 Authors and contributors:
 
+* Matt Chambers <matt.chambers42@gmail.com>
+  Vanderbilt University Medical Center
+
 * John Chilton <jmchilton@gmail.com>
-* Minnesota Supercomputing Institute, Univeristy of Minnesota
+  Minnesota Supercomputing Institute, University of Minnesota
b
diff -r e39c5664b04a -r d1f7b2395bc2 idpQonvertEmbedder.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/idpQonvertEmbedder.xml Thu Dec 15 17:20:57 2016 -0500
[
@@ -0,0 +1,115 @@
+<?xml version="1.0"?>
+<tool id="idpqonvertEmbedder" name="idpEmbedder" version="@VERSION@.0">
+    <description>Embed human/mouse gene metadata into IDPicker files</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Job Failed" />
+        <regex match="\nError:.*\n" source="both" level="fatal" />
+    </stdio>
+    <command>
+<![CDATA[
+        #set $input_name = $input.display_name
+        cp '$input' output &&
+        ln -s output output.idpDB &&
+        ln -s \$(dirname \$(which idpQonvert))/gene2protein.db3 gene2protein.db3 &&
+
+        #if $use_raw_data_condition.use_raw_data
+            #for $i in $use_raw_data_condition.input_raw
+                ln -s '${i.file_name}' '${i.display_name}' &&
+            #end for
+        #end if
+
+        idpQonvert
+            -EmbedOnly true
+            -EmbedGeneMetadata $EmbedGeneMetadata
+            #if $use_raw_data_condition.use_raw_data
+                -EmbedSpectrumScanTimes $use_raw_data_condition.EmbedSpectrumScanTimes
+                -EmbedSpectrumSources $use_raw_data_condition.EmbedSpectrumSources
+                -QuantitationMethod $use_raw_data_condition.QuantitationMethod
+            #end if
+            output.idpDB
+]]>
+    </command>
+    <inputs>
+        <param name="input" type="data" format="idpdb" label="Input idpDB file" />
+        <param argument="-EmbedGeneMetadata" type="boolean" value="true" label="Embed Gene Metadata?" help="Allows gene-centric analysis in IDPicker. The gene metadata embedding only works for human/mouse proteins from a RefSeq database. For best results, run idpQonvert with RefSeq no matter what database was used for the search: idpQonvert always remap the peptides anyway. Non-human/mouse proteins and any unmappable human/mouse proteins will get gene ids like &quot;Unmapped_&lt;protein accession&gt;&quot;. This option, run by itself, is very fast." />
+        <conditional name="use_raw_data_condition">
+            <param name="use_raw_data" type="boolean" label="Do you have the raw data (spectra)?" help="More embed options are available if the raw spectra are available." />
+            <when value="false"></when>
+            <when value="true">
+                <param name="input_raw" type="data" format="mzml,mzxml,mgf,ms2,mz5" label="Input raw MS files" multiple="true" />
+                <param argument="-EmbedSpectrumSources" type="boolean" value="false" label="Embed Spectrum Sources?" help="Allows visualizing peptide-spectrum-matches without downloading the raw data. Embedding spectra will greatly increase the size of the database, even though only spectra that passed the import FDR filter will be included. This option can take a LONG time to run." />
+                <param argument="-EmbedSpectrumScanTimes" type="boolean" value="false" label="Embed Spectrum Scan Times?" help="If the pepXML/mzIdentML file did not contain scan time (retention time) information, this will look up that information in the raw data. This option, run by itself, will take some time (it has to open every raw file)." />
+                <param argument="-QuantitationMethod" type="select" label="Quantitation Method" help="Enables quantitation methods other than spectral counting. For isobaric isotope labelling quantitation (iTRAQ/TMT) or intensity-based label-free quantitation (XIC), select the appropriate QuantitationMethod here. You have to keep iTRAQ/TMT and label-free data separate since you can only specify a single QuantitationMethod for the entire assembly. Like embedding spectrum sources, this option can take a LONG time to run, although not quite as long.">
+                    <option value="None" selected="true">None</option>
+                    <option value="LabelFree">Label-free (XIC)</option>
+                    <option value="ITRAQ4plex">iTRAQ 4-plex</option>
+                    <option value="ITRAQ8plex">iTRAQ 8-plex</option>
+                    <option value="TMT2plex">TMT 2-plex</option>
+                    <option value="TMT6plex">TMT 6-plex</option>
+                    <option value="TMT10plex">TMT 10-plex</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="idpdb" name="output" from_work_dir="output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="input/201208-378803.idpDB" />
+            <param name="EmbedGeneMetadata" value="false" />
+            <output name="output" file="input/201208-378803.idpDB" compare="sim_size" delta="500000" />
+        </test>
+        <test>
+            <param name="input" value="input/201208-378803.idpDB" />
+            <param name="EmbedGeneMetadata" value="true" />
+            <output name="output" file="201208-378803-embeddedGenes.idpDB" compare="sim_size" delta="500000" />
+        </test>
+        <test>
+            <param name="input" value="input/201208-378803.idpDB" />
+            <param name="EmbedGeneMetadata" value="true" />
+            <param name="use_raw_data" value="true" /> 
+            <param name="input_raw" value="input/201208-378803.mzML" />
+            <param name="EmbedSpectrumSources" value="false" />
+            <param name="EmbedSpectrumScanTimes" value="true" />
+            <param name="QuantitationMethod" value="None" />
+            <output name="output" file="201208-378803-embeddedGenesAndScanTimes.idpDB" compare="sim_size" delta="500000" />
+        </test>
+        <test>
+            <param name="input" value="input/201208-378803.idpDB" />
+            <param name="EmbedGeneMetadata" value="true" />
+            <param name="use_raw_data" value="true" /> 
+            <param name="input_raw" value="input/201208-378803.mzML" />
+            <param name="EmbedSpectrumSources" value="true" />
+            <param name="EmbedSpectrumScanTimes" value="false" />
+            <param name="QuantitationMethod" value="None" />
+            <output name="output" file="201208-378803-embeddedGenesAndSpectra.idpDB" compare="sim_size" delta="500000" />
+        </test>
+        <test>
+            <param name="input" value="input/201208-378803.idpDB" />
+            <param name="EmbedGeneMetadata" value="true" />
+            <param name="use_raw_data" value="true" /> 
+            <param name="input_raw" value="input/201208-378803.mzML" />
+            <param name="EmbedSpectrumSources" value="false" />
+            <param name="EmbedSpectrumScanTimes" value="true" />
+            <param name="QuantitationMethod" value="ITRAQ4plex" />
+            <output name="output" file="201208-378803-embeddedGenesAndQuantitation.idpDB" compare="sim_size" delta="500000" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+Embeds optional data and metadata into the IDPicker 3 idpDB files. This should be run after all idpDB files have been merged together (e.g. after idpAssemble).
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1021/pr900360j</citation>
+        <citation type="bibtex">@misc{toolsGalaxyP, author = {Chilton, J, Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub repository},
+                                      year = {2015}, url = {https://github.com/galaxyproteomics/tools-galaxyp}}</citation> <!-- TODO: fix substitution of commit ", commit = {$sha1$}" -->
+    </citations>
+</tool>
b
diff -r e39c5664b04a -r d1f7b2395bc2 idpqonvert.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/idpqonvert.xml Thu Dec 15 17:20:57 2016 -0500
[
b'@@ -0,0 +1,207 @@\n+<?xml version="1.0"?>\n+<tool id="idpqonvert" name="idpQonvert" version="@VERSION@.0">\n+    <description>Prepare identification results for IDPicker</description>\n+    <macros>\n+        <import>macros.xml</import>\n+        <xml name="normalizationOptions">\n+            <option value="off" selected="true">None</option>\n+            <option value="quantile">Quantile</option>\n+            <option value="linear">Linear</option>\n+        </xml>\n+    </macros>\n+    <expand macro="requirements" />\n+    <stdio>\n+        <exit_code range="1:" level="fatal" description="Job Failed" />\n+        <regex match="^Error:.*$" source="both" level="fatal" />\n+    </stdio>\n+    <command>\n+<![CDATA[\n+        #set $db_name = $ProteinDatabase.display_name.replace(".fasta", "") + ".fasta"\n+        #set $input_name = $input.display_name\n+        #set $output_name = $input_name.split(".")[0] + ".idpDB"\n+        ln -s \'$input\' \'${input_name}\' &&\n+        ln -s \'$ProteinDatabase\' \'${db_name}\' &&\n+\n+        idpQonvert\n+            -DecoyPrefix \'${DecoyPrefix}\'\n+            -WriteQonversionDetails true\n+            -ProteinDatabase \'${db_name}\'\n+            -MaxFDR $MaxFDR\n+            -EmbedSpectrumScanTimes false\n+            -EmbedSpectrumSources false\n+            #set $score_info = []\n+            #for $engine in ["myrimatch:mvh", "xcorr", "sequest:xcorr", "sequest:deltacn", "mascot:score", "x!tandem:expect", "x!tandem:hyperscore", "ms-gf:specevalue", "comet:xcorr", "comet:deltacn", "comet:expect"]\n+                #set $name = $engine.replace(":", "_").replace("!","")\n+                #set $weight = $getVar("scoring." + $name + "." + $name + "_weight.value")\n+                #set $type = $getVar("scoring." + $name + "." + $name + "_type.value")\n+                #set $score_info = $score_info + [str($weight) + " " + $type + " " + $engine]\n+                #continue\n+            #end for\n+            #for $i, $s in enumerate($scoring.extra_score)\n+                #set $name = $s.extra_score_name\n+                #set $type = $s.extra_score_normalization\n+                #set $weight = $s.extra_score_weight\n+                #set $score_info = $score_info + [str($weight) + " " + $type + " " + $name]\n+            #end for\n+            #set $score_str = "; ".join($score_info)\n+            -ScoreInfo \'$score_str\'\n+            -MaxImportFDR $advanced_options.MaxImportFDR\n+            -MaxResultRank $advanced_options.MaxResultRank\n+            -RerankMatches $advanced_options.RerankMatches\n+            -Gamma $advanced_options.Gamma\n+            -Nu $advanced_options.Nu\n+            -PolynomialDegree $advanced_options.PolynomialDegree\n+            -TruePositiveThreshold $advanced_options.TruePositiveThreshold\n+            -MaxTrainingRank $advanced_options.MaxTrainingRank\n+            -SVMType $advanced_options.SVMType\n+            -Kernel $advanced_options.Kernel\n+            -ChargeStateHandling $advanced_options.ChargeStateHandling\n+            -TerminalSpecificityHandling $advanced_options.TerminalSpecificityHandling\n+            -MissedCleavagesHandling $advanced_options.MissedCleavagesHandling\n+            -QonverterMethod $advanced_options.QonverterMethod\n+            -MinPartitionSize $advanced_options.MinPartitionSize\n+            \'${input_name}\' &&\n+        mv \'$output_name\' output\n+]]>\n+    </command>\n+    <inputs>\n+        <param name="input" type="data" format="mzid,raw_pepxml" label="Input mzIdentML/pepXML file(s)" />\n+        <param argument="-ProteinDatabase" format="fasta" type="data" label="Protein Database" help="The FASTA protein database to map peptides against. Only peptides from target proteins will be mapped; decoy peptides will be stored as-is in the idpDB." />\n+        <param argument="-DecoyPrefix" type="text" label="Decoy Prefix" help="The prefix to look for on protein accessions to determine whether it is a target or decoy." />\n+        <param argument="-MaxFDR" type="float" label="Max FDR" min="0.0000001" value'..b'     <param name="ms-gf_specevalue_type" label="MS-GF+ (specevalue) Normalization" type="select"><expand macro="normalizationOptions" /></param>\n+            </section>\n+            <section name="comet_xcorr" title="Comet (xcorr) Score/Weight" expanded="False">\n+                <param name="comet_xcorr_weight" label="Comet (xcorr) Weight" type="float" min="-1" max="1" value="1" />\n+                <param name="comet_xcorr_type" label="Comet (xcorr) Normalization" type="select"><expand macro="normalizationOptions" /></param>\n+            </section>\n+            <section name="comet_deltacn" title="Comet (deltacn) Score/Weight" expanded="False">\n+                <param name="comet_deltacn_weight" label="Comet (deltacn) Weight" type="float" min="-1" max="1" value="1" />\n+                <param name="comet_deltacn_type" label="Comet (deltacn) Normalization" type="select"><expand macro="normalizationOptions" /></param>\n+            </section>\n+            <section name="comet_expect" title="Comet (expect) Score/Weight" expanded="False">\n+                <param name="comet_expect_weight" label="Comet (expect) Weight" type="float" min="-1" max="1" value="-1" />\n+                <param name="comet_expect_type" label="Comet (expect) Normalization" type="select"><expand macro="normalizationOptions" /></param>\n+            </section>\n+            <repeat name="extra_score" title="Extra Search Engine Score/Weight">\n+                <param name="extra_score_name" label="Name" type="text" help="For scores in the PSI-MS controlled vocabulary, this field must match the name for that score; for other scores, the UserParam name or pepXML name will suffice." value="SearchEngineName" />\n+                <param name="extra_score_normalization" label="Normalization Method" type="select"><expand macro="normalizationOptions" /></param>\n+                <param name="extra_score_weight" label="Weight" type="float" help="Negative weights indicate that a lower score is better (e.g. expectation value, p-value)." min="-1" max="1" value="1" />\n+            </repeat>\n+        </section>\n+    </inputs>\n+    <outputs>\n+        <data format="idpdb" name="output" from_work_dir="output" />\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="input" value="input/201208-378803-myrimatch.pepXML" />\n+            <param name="ProteinDatabase" value="input/cow.protein.PRG2012-subset.fasta" />\n+            <param name="MaxFDR" value="0.05" />\n+            <output name="output" file="201208-378803-myrimatch.idpDB" compare="sim_size" delta="500000" />\n+        </test>\n+        <test>\n+            <param name="input" value="input/201208-378803-cm.pep.xml" />\n+            <param name="ProteinDatabase" value="input/cow.protein.PRG2012-subset.fasta" />\n+            <param name="MaxFDR" value="0.05" />\n+            <output name="output" file="201208-378803-cm.idpDB" compare="sim_size" delta="500000" />\n+        </test>\n+        <test>\n+            <param name="input" value="input/201208-378803-msgf.mzid" />\n+            <param name="ProteinDatabase" value="input/cow.protein.PRG2012-subset.fasta" />\n+            <param name="MaxFDR" value="0.05" />\n+            <output name="output" file="201208-378803-msgf.idpDB" compare="sim_size" delta="500000" />\n+        </test>\n+    </tests>\n+    <help>\n+<![CDATA[\n+**What it does**\n+\n+Creates IDPicker 3 idpDB files from pepXML and mzIdentML files. The input files should be unfiltered and must come from a target/decoy database search.\n+]]>\n+    </help>\n+    <citations>\n+        <citation type="doi">10.1021/pr900360j</citation>\n+        <citation type="bibtex">@misc{toolsGalaxyP, author = {Chilton, J, Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub repository},\n+                                      year = {2015}, url = {https://github.com/galaxyproteomics/tools-galaxyp}}</citation> <!-- TODO: fix substitution of commit ", commit = {$sha1$}" -->\n+    </citations>\n+</tool>\n'
b
diff -r e39c5664b04a -r d1f7b2395bc2 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Dec 15 17:20:57 2016 -0500
b
@@ -0,0 +1,9 @@
+<macros>
+    <token name="@VERSION@">3.0.10246</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="3_0_10246">bumbershoot</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+</macros>
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/201208-378803-cm.idpDB
b
Binary file test-data/201208-378803-cm.idpDB has changed
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/201208-378803-embeddedGenes.idpDB
b
Binary file test-data/201208-378803-embeddedGenes.idpDB has changed
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/201208-378803-embeddedGenesAndQuantitation.idpDB
b
Binary file test-data/201208-378803-embeddedGenesAndQuantitation.idpDB has changed
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/201208-378803-embeddedGenesAndScanTimes.idpDB
b
Binary file test-data/201208-378803-embeddedGenesAndScanTimes.idpDB has changed
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/201208-378803-embeddedGenesAndSpectra.idpDB
b
Binary file test-data/201208-378803-embeddedGenesAndSpectra.idpDB has changed
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/201208-378803-embeddedGenesAndSpectra2.idpDB
b
Binary file test-data/201208-378803-embeddedGenesAndSpectra2.idpDB has changed
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/201208-378803-msgf.idpDB
b
Binary file test-data/201208-378803-msgf.idpDB has changed
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/201208-378803-myrimatch.idpDB
b
Binary file test-data/201208-378803-myrimatch.idpDB has changed
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/input/201208-378803-cm.pep.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input/201208-378803-cm.pep.xml Thu Dec 15 17:20:57 2016 -0500
[
b'@@ -0,0 +1,10303 @@\n+<?xml version="1.0" encoding="UTF-8"?>\n+ <msms_pipeline_analysis date="2014-09-19T11:13:55" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/pepXML/pepXML_v117.xsd" summary_xml="C:\\idpicker\\branches\\IDPicker-3\\TestData\\201208-378803.pep.xml">\n+ <msms_run_summary base_name="C:\\idpicker\\branches\\IDPicker-3\\TestData\\201208-378803" msManufacturer="unknown" msModel="unknown" raw_data_type="raw" raw_data=".mzML">\n+ <sample_enzyme name="Trypsin/P">\n+  <specificity cut="KR" no_cut="-" sense="C"/>\n+ </sample_enzyme>\n+ <search_summary base_name="C:\\idpicker\\branches\\IDPicker-3\\TestData\\201208-378803" search_engine="Comet" search_engine_version="2014.02 rev. 0" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">\n+  <search_database local_path="cow.protein.PRG2012-subset.fasta" type="AA"/>\n+  <enzymatic_search_constraint enzyme="Trypsin/P" max_num_internal_cleavages="2" min_number_termini="1"/>\n+  <aminoacid_modification aminoacid="M" massdiff="15.994900" mass="147.035385" variable="Y" symbol="*"/>\n+  <aminoacid_modification aminoacid="Q" massdiff="-17.026000" mass="111.032578" variable="Y" symbol="#"/>\n+  <aminoacid_modification aminoacid="Q" massdiff="0.984016" mass="129.042594" variable="Y" symbol="@"/>\n+  <aminoacid_modification aminoacid="N" massdiff="0.984016" mass="115.026943" variable="Y" symbol="@"/>\n+  <aminoacid_modification aminoacid="C" massdiff="58.005480" mass="161.014665" variable="N"/>\n+  <parameter name="# comet_version " value="2014.02"/>\n+  <parameter name="activation_method" value="ALL"/>\n+  <parameter name="add_A_alanine" value="0.000000"/>\n+  <parameter name="add_B_user_amino_acid" value="0.000000"/>\n+  <parameter name="add_C_cysteine" value="58.005480"/>\n+  <parameter name="add_Cterm_peptide" value="0.000000"/>\n+  <parameter name="add_Cterm_protein" value="0.000000"/>\n+  <parameter name="add_D_aspartic_acid" value="0.000000"/>\n+  <parameter name="add_E_glutamic_acid" value="0.000000"/>\n+  <parameter name="add_F_phenylalanine" value="0.000000"/>\n+  <parameter name="add_G_glycine" value="0.000000"/>\n+  <parameter name="add_H_histidine" value="0.000000"/>\n+  <parameter name="add_I_isoleucine" value="0.000000"/>\n+  <parameter name="add_J_user_amino_acid" value="0.000000"/>\n+  <parameter name="add_K_lysine" value="0.000000"/>\n+  <parameter name="add_L_leucine" value="0.000000"/>\n+  <parameter name="add_M_methionine" value="0.000000"/>\n+  <parameter name="add_N_asparagine" value="0.000000"/>\n+  <parameter name="add_Nterm_peptide" value="0.000000"/>\n+  <parameter name="add_Nterm_protein" value="0.000000"/>\n+  <parameter name="add_O_ornithine" value="0.000000"/>\n+  <parameter name="add_P_proline" value="0.000000"/>\n+  <parameter name="add_Q_glutamine" value="0.000000"/>\n+  <parameter name="add_R_arginine" value="0.000000"/>\n+  <parameter name="add_S_serine" value="0.000000"/>\n+  <parameter name="add_T_threonine" value="0.000000"/>\n+  <parameter name="add_U_user_amino_acid" value="0.000000"/>\n+  <parameter name="add_V_valine" value="0.000000"/>\n+  <parameter name="add_W_tryptophan" value="0.000000"/>\n+  <parameter name="add_X_user_amino_acid" value="0.000000"/>\n+  <parameter name="add_Y_tyrosine" value="0.000000"/>\n+  <parameter name="add_Z_user_amino_acid" value="0.000000"/>\n+  <parameter name="allowed_missed_cleavage" value="2"/>\n+  <parameter name="clear_mz_range" value="0.000000 0.000000"/>\n+  <parameter name="clip_nterm_methionine" value="1"/>\n+  <parameter name="database_name" value="cow.protein.PRG2012-subset.fasta"/>\n+  <parameter name="decoy_prefix" value="XXX_"/>\n+  <parameter name="decoy_search" value="1"/>\n+  <parameter name="digest_mass_range" value="600.000000 5000.000000"/>\n+  <parameter name="fragment_bin_offset" value="0.020000"/>\n+  <parameter name="fragment_bin_tol" value="0.020000"/>\n+  <parameter name="isotope_error" value="1"/>\n'..b'="108" precursor_neutral_mass="1162.515574" assumed_charge="2" index="216" retention_time_sec="1070.9">\n+  <search_result>\n+   <search_hit hit_rank="1" peptide="ISDMNQNIGAT" peptide_prev_aa="K" peptide_next_aa="E" protein="XXX_gi|528944678|ref|XP_005204735.1|" num_tot_proteins="3" num_matched_ions="4" tot_num_ions="20" calc_neutral_pep_mass="1163.512853" massdiff="-0.997279" num_tol_term="1" num_missed_cleavages="0" num_matched_peptides="562">\n+    <modification_info modified_peptide="ISDMN[115]QNIGAT">\n+     <mod_aminoacid_mass position="5" mass="115.026943"/>\n+    </modification_info>\n+    <search_score name="xcorr" value="0.721"/>\n+    <search_score name="deltacn" value="0.290"/>\n+    <search_score name="deltacnstar" value="0.063"/>\n+    <search_score name="spscore" value="28.2"/>\n+    <search_score name="sprank" value="1"/>\n+    <search_score name="expect" value="1.00E+000"/>\n+   </search_hit>\n+   <search_hit hit_rank="2" peptide="ISDMNQNIGAT" peptide_prev_aa="K" peptide_next_aa="E" protein="XXX_gi|528944678|ref|XP_005204735.1|" num_tot_proteins="3" num_matched_ions="4" tot_num_ions="20" calc_neutral_pep_mass="1163.512853" massdiff="-0.997279" num_tol_term="1" num_missed_cleavages="0" num_matched_peptides="562">\n+    <modification_info modified_peptide="ISDMNQ[129]NIGAT">\n+     <mod_aminoacid_mass position="6" mass="129.042594"/>\n+    </modification_info>\n+    <search_score name="xcorr" value="0.675"/>\n+    <search_score name="deltacn" value="0.242"/>\n+    <search_score name="deltacnstar" value="0.000"/>\n+    <search_score name="spscore" value="28.2"/>\n+    <search_score name="sprank" value="1"/>\n+    <search_score name="expect" value="1.00E+000"/>\n+   </search_hit>\n+   <search_hit hit_rank="3" peptide="ISDMNQNIGAT" peptide_prev_aa="K" peptide_next_aa="E" protein="XXX_gi|528944678|ref|XP_005204735.1|" num_tot_proteins="3" num_matched_ions="4" tot_num_ions="20" calc_neutral_pep_mass="1163.512853" massdiff="-0.997279" num_tol_term="1" num_missed_cleavages="0" num_matched_peptides="562">\n+    <modification_info modified_peptide="ISDMNQN[115]IGAT">\n+     <mod_aminoacid_mass position="7" mass="115.026943"/>\n+    </modification_info>\n+    <search_score name="xcorr" value="0.675"/>\n+    <search_score name="deltacn" value="0.242"/>\n+    <search_score name="deltacnstar" value="0.000"/>\n+    <search_score name="spscore" value="28.2"/>\n+    <search_score name="sprank" value="1"/>\n+    <search_score name="expect" value="1.00E+000"/>\n+   </search_hit>\n+   <search_hit hit_rank="4" peptide="VIDEGLMDEK" peptide_prev_aa="E" peptide_next_aa="L" protein="gi|528944676|ref|XP_005204734.1|" num_tot_proteins="1" num_matched_ions="1" tot_num_ions="18" calc_neutral_pep_mass="1163.537991" massdiff="-1.022417" num_tol_term="1" num_missed_cleavages="0" num_matched_peptides="562">\n+    <modification_info modified_peptide="VIDEGLM[147]DEK">\n+     <mod_aminoacid_mass position="7" mass="147.035385"/>\n+    </modification_info>\n+    <search_score name="xcorr" value="0.512"/>\n+    <search_score name="deltacn" value="0.077"/>\n+    <search_score name="deltacnstar" value="0.000"/>\n+    <search_score name="spscore" value="3.7"/>\n+    <search_score name="sprank" value="3"/>\n+    <search_score name="expect" value="2.60E+000"/>\n+   </search_hit>\n+   <search_hit hit_rank="5" peptide="ISDMNQNIGAT" peptide_prev_aa="K" peptide_next_aa="E" protein="XXX_gi|528944678|ref|XP_005204735.1|" num_tot_proteins="3" num_matched_ions="3" tot_num_ions="20" calc_neutral_pep_mass="1162.528837" massdiff="-0.013263" num_tol_term="1" num_missed_cleavages="0" num_matched_peptides="562">\n+    <search_score name="xcorr" value="0.473"/>\n+    <search_score name="deltacn" value="1.000"/>\n+    <search_score name="deltacnstar" value="0.000"/>\n+    <search_score name="spscore" value="15.4"/>\n+    <search_score name="sprank" value="2"/>\n+    <search_score name="expect" value="4.02E+000"/>\n+   </search_hit>\n+  </search_result>\n+ </spectrum_query>\n+ </msms_run_summary>\n+</msms_pipeline_analysis>\n'
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/input/201208-378803-msgf.mzid
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input/201208-378803-msgf.mzid Thu Dec 15 17:20:57 2016 -0500
[
b'@@ -0,0 +1,4631 @@\n+<?xml version="1.0" encoding="UTF-8"?>\n+<MzIdentML id="MS-GF+" version="1.1.0" xmlns="http://psidev.info/psi/pi/mzIdentML/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd" creationDate="2014-09-05T13:19:45" >\n+<cvList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">\n+    <cv id="PSI-MS" uri="http://psidev.cvs.sourceforge.net/viewvc/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo" version="3.30.0" fullName="PSI-MS"/>\n+    <cv id="UNIMOD" uri="http://www.unimod.org/obo/unimod.obo" fullName="UNIMOD"/>\n+    <cv id="UO" uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo" fullName="UNIT-ONTOLOGY"/>\n+</cvList>\n+<AnalysisSoftwareList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">\n+    <AnalysisSoftware version="Beta (v10072)" name="MS-GF+" id="ID_software">\n+        <SoftwareName>\n+            <cvParam accession="MS:1002048" cvRef="PSI-MS" name="MS-GF+"/>\n+        </SoftwareName>\n+    </AnalysisSoftware>\n+</AnalysisSoftwareList>\n+<SequenceCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">\n+    <DBSequence accession="gi|30794280|ref|NP_851335.1|" searchDatabase_ref="SearchDB_1" length="607" id="DBSeq43937">\n+        <cvParam accession="MS:1001088" cvRef="PSI-MS" value="gi|30794280|ref|NP_851335.1| serum albumin precursor [Bos taurus]" name="protein description"/>\n+    </DBSequence>\n+    <DBSequence accession="gi|528993971|ref|XP_005219929.1|" searchDatabase_ref="SearchDB_1" length="768" id="DBSeq31093">\n+        <cvParam accession="MS:1001088" cvRef="PSI-MS" value="gi|528993971|ref|XP_005219929.1| PREDICTED: lactoperoxidase isoform X1 [Bos taurus]" name="protein description"/>\n+    </DBSequence>\n+    <DBSequence accession="gi|528993973|ref|XP_005219930.1|" searchDatabase_ref="SearchDB_1" length="712" id="DBSeq31862">\n+        <cvParam accession="MS:1001088" cvRef="PSI-MS" value="gi|528993973|ref|XP_005219930.1| PREDICTED: lactoperoxidase isoform X2 [Bos taurus]" name="protein description"/>\n+    </DBSequence>\n+    <DBSequence accession="gi|528993975|ref|XP_005219931.1|" searchDatabase_ref="SearchDB_1" length="685" id="DBSeq32575">\n+        <cvParam accession="MS:1001088" cvRef="PSI-MS" value="gi|528993975|ref|XP_005219931.1| PREDICTED: lactoperoxidase isoform X3 [Bos taurus]" name="protein description"/>\n+    </DBSequence>\n+    <DBSequence accession="gi|27806851|ref|NP_776358.1|" searchDatabase_ref="SearchDB_1" length="712" id="DBSeq47665">\n+        <cvParam accession="MS:1001088" cvRef="PSI-MS" value="gi|27806851|ref|NP_776358.1| lactoperoxidase precursor [Bos taurus]" name="protein description"/>\n+    </DBSequence>\n+    <DBSequence accession="gi|27806963|ref|NP_776953.1|" searchDatabase_ref="SearchDB_1" length="222" id="DBSeq46622">\n+        <cvParam accession="MS:1001088" cvRef="PSI-MS" value="gi|27806963|ref|NP_776953.1| alpha-S2-casein precursor [Bos taurus]" name="protein description"/>\n+    </DBSequence>\n+    <DBSequence accession="XXX_gi|528968104|ref|XP_005212567.1|" searchDatabase_ref="SearchDB_1" length="815" id="DBSeq83632"/>\n+    <DBSequence accession="XXX_gi|528968106|ref|XP_005212568.1|" searchDatabase_ref="SearchDB_1" length="811" id="DBSeq84448"/>\n+    <DBSequence accession="XXX_gi|528968108|ref|XP_005212569.1|" searchDatabase_ref="SearchDB_1" length="766" id="DBSeq85260"/>\n+    <DBSequence accession="XXX_gi|115497338|ref|NP_001069884.1|" searchDatabase_ref="SearchDB_1" length="626" id="DBSeq93486"/>\n+    <DBSequence accession="XXX_gi|528944676|ref|XP_005204734.1|" searchDatabase_ref="SearchDB_1" length="7610" id="DBSeq65480"/>\n+    <DBSequence accession="XXX_gi|528944678|ref|XP_005204735.1|" searchDatabase_ref="SearchDB_1" length="5498" id="DBSeq73091"/>\n+    <DBSequence accession="XXX_gi|219804516|ref|NP_001137332.1|" searchDatabase_ref="SearchDB_1" length="5422" id="DBSeq94489"/>\n+    <DBSequence accession="gi|528944676'..b'lue="40" name="MS-GF:DeNovoScore"/>\n+                    <cvParam accession="MS:1002052" cvRef="PSI-MS" value="0.0031106102" name="MS-GF:SpecEValue"/>\n+                    <cvParam accession="MS:1002053" cvRef="PSI-MS" value="123.513" name="MS-GF:EValue"/>\n+                    <cvParam accession="MS:1002054" cvRef="PSI-MS" value="1.0" name="MS-GF:QValue"/>\n+                    <cvParam accession="MS:1002055" cvRef="PSI-MS" value="1.0" name="MS-GF:PepQValue"/>\n+                    <userParam value="1" name="IsotopeError"/>\n+                    <userParam value="HCD" name="AssumedDissociationMethod"/>\n+                    <userParam value="0.02173913" name="ExplainedIonCurrentRatio"/>\n+                    <userParam value="0.0" name="NTermIonCurrentRatio"/>\n+                    <userParam value="0.02173913" name="CTermIonCurrentRatio"/>\n+                    <userParam value="138.0" name="MS2IonCurrent"/>\n+                    <userParam value="1" name="NumMatchedMainIons"/>\n+                    <userParam value="5.6731687" name="MeanErrorAll"/>\n+                    <userParam value="0.0" name="StdevErrorAll"/>\n+                    <userParam value="5.6731687" name="MeanErrorTop7"/>\n+                    <userParam value="0.0" name="StdevErrorTop7"/>\n+                    <userParam value="-5.6731687" name="MeanRelErrorAll"/>\n+                    <userParam value="0.0" name="StdevRelErrorAll"/>\n+                    <userParam value="-5.6731687" name="MeanRelErrorTop7"/>\n+                    <userParam value="0.0" name="StdevRelErrorTop7"/>\n+                </SpectrumIdentificationItem>\n+                <SpectrumIdentificationItem passThreshold="true" rank="3" peptide_ref="Pep123" calculatedMassToCharge="677.2850341796875" experimentalMassToCharge="677.2626342773438" chargeState="2" id="SII_48_3">\n+                    <PeptideEvidenceRef peptideEvidence_ref="PepEv_66843_123_1364"/>\n+                    <PeptideEvidenceRef peptideEvidence_ref="PepEv_74384_123_1294"/>\n+                    <PeptideEvidenceRef peptideEvidence_ref="PepEv_95782_123_1294"/>\n+                    <cvParam accession="MS:1002049" cvRef="PSI-MS" value="-42" name="MS-GF:RawScore"/>\n+                    <cvParam accession="MS:1002050" cvRef="PSI-MS" value="40" name="MS-GF:DeNovoScore"/>\n+                    <cvParam accession="MS:1002052" cvRef="PSI-MS" value="0.0031106102" name="MS-GF:SpecEValue"/>\n+                    <cvParam accession="MS:1002053" cvRef="PSI-MS" value="123.513" name="MS-GF:EValue"/>\n+                    <cvParam accession="MS:1002054" cvRef="PSI-MS" value="1.0" name="MS-GF:QValue"/>\n+                    <cvParam accession="MS:1002055" cvRef="PSI-MS" value="1.0" name="MS-GF:PepQValue"/>\n+                    <userParam value="0" name="IsotopeError"/>\n+                    <userParam value="HCD" name="AssumedDissociationMethod"/>\n+                    <userParam value="0.02173913" name="ExplainedIonCurrentRatio"/>\n+                    <userParam value="0.0" name="NTermIonCurrentRatio"/>\n+                    <userParam value="0.02173913" name="CTermIonCurrentRatio"/>\n+                    <userParam value="138.0" name="MS2IonCurrent"/>\n+                    <userParam value="1" name="NumMatchedMainIons"/>\n+                    <userParam value="5.6731687" name="MeanErrorAll"/>\n+                    <userParam value="0.0" name="StdevErrorAll"/>\n+                    <userParam value="5.6731687" name="MeanErrorTop7"/>\n+                    <userParam value="0.0" name="StdevErrorTop7"/>\n+                    <userParam value="-5.6731687" name="MeanRelErrorAll"/>\n+                    <userParam value="0.0" name="StdevRelErrorAll"/>\n+                    <userParam value="-5.6731687" name="MeanRelErrorTop7"/>\n+                    <userParam value="0.0" name="StdevRelErrorTop7"/>\n+                </SpectrumIdentificationItem>\n+            </SpectrumIdentificationResult>\n+        </SpectrumIdentificationList>\n+    </AnalysisData>\n+</DataCollection>\n+</MzIdentML>\n'
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/input/201208-378803-myrimatch.pepXML
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input/201208-378803-myrimatch.pepXML Thu Dec 15 17:20:57 2016 -0500
[
b'@@ -0,0 +1,20531 @@\n+<?xml version="1.0" encoding="ISO-8859-1"?>\n+<msms_pipeline_analysis date="2015-09-03T15:49:50" summary_xml="201208-378803.pepXML" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/pepXML/pepXML_v117.xsd">\n+  <analysis_summary analysis="MyriMatch" version="2.2.8634" time="2015-09-03T15:49:50"/>\n+  <msms_run_summary base_name="201208-378803" raw_data_type="" raw_data="">\n+    <sample_enzyme name="Trypsin/P" independent="false" fidelity="semispecific">\n+      <specificity sense="C" cut="KR" no_cut="" min_spacing="1"/>\n+    </sample_enzyme>\n+    <search_summary base_name="201208-378803" search_engine="MyriMatch" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" out_data_type="" out_data="">\n+      <search_database local_path="cow.protein.PRG2012-subset.fasta" database_name="SDB" type="AA"/>\n+      <enzymatic_search_constraint enzyme="Trypsin/P" max_num_internal_cleavages="2" min_number_termini="1"/>\n+      <aminoacid_modification aminoacid="M" massdiff="15.9949" mass="147.0353846062" variable="Y" description="Oxidation"/>\n+      <aminoacid_modification aminoacid="Q" massdiff="-17.026" mass="111.0325775114" peptide_terminus="n" variable="Y"/>\n+      <aminoacid_modification aminoacid="Q" massdiff="0.984016" mass="129.0425935114" variable="Y" description="Deamidated"/>\n+      <aminoacid_modification aminoacid="N" massdiff="0.984016" mass="115.0269434472" variable="Y" description="Deamidated"/>\n+      <aminoacid_modification aminoacid="C" massdiff="58.00548" mass="161.0146644778" variable="N" description="Carboxymethyl"/>\n+      <parameter name="Config: AvgPrecursorMzTolerance" value="1.5mz"/>\n+      <parameter name="Config: ClassSizeMultiplier" value="2"/>\n+      <parameter name="Config: CleavageRules" value="Trypsin/P"/>\n+      <parameter name="Config: ComputeXCorr" value="1"/>\n+      <parameter name="Config: DecoyPrefix" value="XXX_"/>\n+      <parameter name="Config: DynamicMods" value="M * 15.9949 (Q * -17.026 [QN] * 0.984016"/>\n+      <parameter name="Config: EstimateSearchTimeOnly" value="0"/>\n+      <parameter name="Config: FragmentMzTolerance" value="0.5mz"/>\n+      <parameter name="Config: FragmentationAutoRule" value="1"/>\n+      <parameter name="Config: FragmentationRule" value="cid"/>\n+      <parameter name="Config: KeepUnadjustedPrecursorMz" value="0"/>\n+      <parameter name="Config: MaxDynamicMods" value="2"/>\n+      <parameter name="Config: MaxFragmentChargeState" value="0"/>\n+      <parameter name="Config: MaxMissedCleavages" value="2"/>\n+      <parameter name="Config: MaxPeakCount" value="300"/>\n+      <parameter name="Config: MaxPeptideLength" value="75"/>\n+      <parameter name="Config: MaxPeptideMass" value="10000"/>\n+      <parameter name="Config: MaxPeptideVariants" value="1000000"/>\n+      <parameter name="Config: MaxResultRank" value="3"/>\n+      <parameter name="Config: MinMatchedFragments" value="5"/>\n+      <parameter name="Config: MinPeptideLength" value="5"/>\n+      <parameter name="Config: MinPeptideMass" value="0"/>\n+      <parameter name="Config: MinResultScore" value="9.9999999999999995e-08"/>\n+      <parameter name="Config: MinTerminiCleavages" value="1"/>\n+      <parameter name="Config: MonoPrecursorMzTolerance" value="50ppm"/>\n+      <parameter name="Config: MonoisotopeAdjustmentSet" value="[-1,2] "/>\n+      <parameter name="Config: NumBatches" value="50"/>\n+      <parameter name="Config: NumChargeStates" value="5"/>\n+      <parameter name="Config: NumIntensityClasses" value="3"/>\n+      <parameter name="Config: NumMzFidelityClasses" value="3"/>\n+      <parameter name="Config: OutputFormat" value="pepXML"/>\n+      <parameter name="Config: OutputSuffix" value=""/>\n+      <parameter name="Config: PrecursorMzToleranceRule" value="mono"/>\n+      <parameter name="Config: PreferIntenseComplements" value="1"/>\n+      <parameter name="Config: Pr'..b'="180">\n+          <modification_info>\n+            <mod_aminoacid_mass position="12" mass="147.0353846062"/>\n+            <mod_aminoacid_mass position="14" mass="161.0146644778"/>\n+            <mod_aminoacid_mass position="24" mass="129.0425935114"/>\n+          </modification_info>\n+          <search_score name="mvh" value="21.662729207058"/>\n+          <search_score name="mzFidelity" value="10.946707360696"/>\n+          <search_score name="xcorr" value="2.2375717592793039"/>\n+        </search_hit>\n+        <search_hit hit_rank="2" peptide="EMLIAHSQPAEMSCGKGESEKLSQIE" peptide_prev_aa="K" peptide_next_aa="N" protein="gi|528944676|ref|XP_005204734.1|" num_tot_proteins="1" calc_neutral_pep_mass="2905.3143452141" massdiff="0.03407514139" num_tol_term="1" num_missed_cleavages="2" num_matched_ions="21" tot_num_ions="180">\n+          <modification_info>\n+            <mod_aminoacid_mass position="12" mass="147.0353846062"/>\n+            <mod_aminoacid_mass position="14" mass="161.0146644778"/>\n+          </modification_info>\n+          <search_score name="mvh" value="18.555164806005"/>\n+          <search_score name="mzFidelity" value="9.229647340988"/>\n+          <search_score name="xcorr" value="1.9522110076248838"/>\n+        </search_hit>\n+        <search_hit hit_rank="3" peptide="LLLNTSKRIMDDVETSSLHLDESFK" peptide_prev_aa="T" peptide_next_aa="L" protein="XXX_gi|115497338|ref|NP_001069884.1|" num_tot_proteins="4" calc_neutral_pep_mass="2906.4695232566" massdiff="0.18058826829" num_tol_term="1" num_missed_cleavages="2" num_matched_ions="20" tot_num_ions="171">\n+          <alternative_protein protein="XXX_gi|528968104|ref|XP_005212567.1|"/>\n+          <alternative_protein protein="XXX_gi|528968106|ref|XP_005212568.1|"/>\n+          <alternative_protein protein="XXX_gi|528968108|ref|XP_005212569.1|"/>\n+          <modification_info>\n+            <mod_aminoacid_mass position="10" mass="147.0353846062"/>\n+          </modification_info>\n+          <search_score name="mvh" value="17.95496665471"/>\n+          <search_score name="mzFidelity" value="6.815403335045"/>\n+          <search_score name="xcorr" value="2.564456180282789"/>\n+        </search_hit>\n+        <search_hit hit_rank="4" peptide="QIFVKTLTGKTITLEVEPSDTIENVK" peptide_prev_aa="M" peptide_next_aa="A" protein="gi|115496708|ref|NP_001069831.1|" num_tot_proteins="4" calc_neutral_pep_mass="2904.5583369414" massdiff="0.28673178429" num_tol_term="2" num_missed_cleavages="2" num_matched_ions="20" tot_num_ions="179">\n+          <alternative_protein protein="gi|27807503|ref|NP_777203.1|"/>\n+          <alternative_protein protein="gi|528968221|ref|XP_005212615.1|"/>\n+          <alternative_protein protein="gi|528995063|ref|XP_005220352.1|"/>\n+          <modification_info>\n+            <mod_aminoacid_mass position="1" mass="129.0425935114"/>\n+            <mod_aminoacid_mass position="24" mass="115.0269434472"/>\n+          </modification_info>\n+          <search_score name="mvh" value="16.196711830419"/>\n+          <search_score name="mzFidelity" value="8.384634792566"/>\n+          <search_score name="xcorr" value="1.9797285930138711"/>\n+        </search_hit>\n+        <search_hit hit_rank="5" peptide="ELNGSLQEMQGESSGVSTVWDLLADIK" peptide_prev_aa="K" peptide_next_aa="R" protein="XXX_gi|528944678|ref|XP_005204735.1|" num_tot_proteins="1" calc_neutral_pep_mass="2907.369549893" massdiff="0.07194998909" num_tol_term="2" num_missed_cleavages="0" num_matched_ions="18" tot_num_ions="188">\n+          <modification_info>\n+            <mod_aminoacid_mass position="7" mass="129.0425935114"/>\n+            <mod_aminoacid_mass position="10" mass="129.0425935114"/>\n+          </modification_info>\n+          <search_score name="mvh" value="13.019234366281"/>\n+          <search_score name="mzFidelity" value="6.610442990057"/>\n+          <search_score name="xcorr" value="1.709385507643441"/>\n+        </search_hit>\n+      </search_result>\n+    </spectrum_query>\n+  </msms_run_summary>\n+</msms_pipeline_analysis>\n'
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/input/201208-378803.idpDB
b
Binary file test-data/input/201208-378803.idpDB has changed
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/input/201208-378803.mzML
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input/201208-378803.mzML Thu Dec 15 17:20:57 2016 -0500
b
b'@@ -0,0 +1,5375 @@\n+<?xml version="1.0" encoding="utf-8"?>\n+<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.2_idx.xsd">\n+  <mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="201208-378803-ABRR-AUG-1" version="1.1.0">\n+    <cvList count="2">\n+      <cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="3.65.0" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/>\n+      <cv id="UO" fullName="Unit Ontology" version="12:10:2011" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo"/>\n+    </cvList>\n+    <fileDescription>\n+      <fileContent>\n+        <cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value=""/>\n+        <cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/>\n+      </fileContent>\n+      <sourceFileList count="2">\n+        <sourceFile id="WIFF" name="201208-378803.wiff" location="file://.">\n+          <cvParam cvRef="MS" accession="MS:1000770" name="WIFF nativeID format" value=""/>\n+          <cvParam cvRef="MS" accession="MS:1000562" name="ABI WIFF format" value=""/>\n+          <cvParam cvRef="MS" accession="MS:1000569" name="SHA-1" value="794711d760f2db8a6a11fff2e277b47ce5576df3"/>\n+        </sourceFile>\n+        <sourceFile id="WIFFSCAN" name="201208-378803.wiff.scan" location="file://.">\n+          <cvParam cvRef="MS" accession="MS:1000770" name="WIFF nativeID format" value=""/>\n+          <cvParam cvRef="MS" accession="MS:1000562" name="ABI WIFF format" value=""/>\n+          <cvParam cvRef="MS" accession="MS:1000569" name="SHA-1" value="165a0af0b1763bbe371899814a9e1457151586b8"/>\n+        </sourceFile>\n+      </sourceFileList>\n+    </fileDescription>\n+    <softwareList count="2">\n+      <software id="Analyst" version="unknown">\n+        <cvParam cvRef="MS" accession="MS:1000551" name="Analyst" value=""/>\n+      </software>\n+      <software id="pwiz_Reader_ABI" version="3.0.6585">\n+        <cvParam cvRef="MS" accession="MS:1000615" name="ProteoWizard software" value=""/>\n+      </software>\n+    </softwareList>\n+    <instrumentConfigurationList count="1">\n+      <instrumentConfiguration id="IC1">\n+        <cvParam cvRef="MS" accession="MS:1000495" name="Applied Biosystems instrument model" value=""/>\n+        <softwareRef ref="Analyst"/>\n+      </instrumentConfiguration>\n+    </instrumentConfigurationList>\n+    <dataProcessingList count="1">\n+      <dataProcessing id="pwiz_Reader_ABI_conversion">\n+        <processingMethod order="0" softwareRef="pwiz_Reader_ABI">\n+          <cvParam cvRef="MS" accession="MS:1000544" name="Conversion to mzML" value=""/>\n+        </processingMethod>\n+        <processingMethod order="1" softwareRef="pwiz_Reader_ABI">\n+          <cvParam cvRef="MS" accession="MS:1000035" name="peak picking" value=""/>\n+        </processingMethod>\n+      </dataProcessing>\n+    </dataProcessingList>\n+    <run id="_x0032_01208-378803-ABRR-AUG-1" defaultInstrumentConfigurationRef="IC1" startTimeStamp="2012-08-08T14:40:01Z" defaultSourceFileRef="WIFF">\n+      <spectrumList count="108" defaultDataProcessingRef="pwiz_Reader_ABI_conversion">\n+        <spectrum index="0" id="sample=1 period=1 cycle=1181 experiment=2" defaultArrayLength="77" dataProcessingRef="pwiz_Reader_ABI_conversion">\n+          <cvParam cvRef="MS" accession="MS:1000511" name="ms level" value="2"/>\n+          <cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/>\n+          <cvParam cvRef="MS" accession="MS:1000130" name="positive scan" value=""/>\n+          <cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value=""/>\n+          <cvParam cvRef="MS" accession="MS'..b'2">331255</offset>\n+      <offset idRef="sample=1 period=1 cycle=1215 experiment=3">336828</offset>\n+      <offset idRef="sample=1 period=1 cycle=1215 experiment=4">342736</offset>\n+      <offset idRef="sample=1 period=1 cycle=1216 experiment=2">347660</offset>\n+      <offset idRef="sample=1 period=1 cycle=1216 experiment=3">353081</offset>\n+      <offset idRef="sample=1 period=1 cycle=1216 experiment=4">358642</offset>\n+      <offset idRef="sample=1 period=1 cycle=1216 experiment=5">364615</offset>\n+      <offset idRef="sample=1 period=1 cycle=1217 experiment=2">371809</offset>\n+      <offset idRef="sample=1 period=1 cycle=1217 experiment=3">378218</offset>\n+      <offset idRef="sample=1 period=1 cycle=1217 experiment=4">383633</offset>\n+      <offset idRef="sample=1 period=1 cycle=1217 experiment=5">390049</offset>\n+      <offset idRef="sample=1 period=1 cycle=1218 experiment=2">395533</offset>\n+      <offset idRef="sample=1 period=1 cycle=1218 experiment=3">402094</offset>\n+      <offset idRef="sample=1 period=1 cycle=1218 experiment=4">407366</offset>\n+      <offset idRef="sample=1 period=1 cycle=1218 experiment=5">412645</offset>\n+      <offset idRef="sample=1 period=1 cycle=1219 experiment=2">417751</offset>\n+      <offset idRef="sample=1 period=1 cycle=1219 experiment=3">423165</offset>\n+      <offset idRef="sample=1 period=1 cycle=1219 experiment=4">428674</offset>\n+      <offset idRef="sample=1 period=1 cycle=1220 experiment=2">433753</offset>\n+      <offset idRef="sample=1 period=1 cycle=1221 experiment=2">438772</offset>\n+      <offset idRef="sample=1 period=1 cycle=1221 experiment=3">444113</offset>\n+      <offset idRef="sample=1 period=1 cycle=1222 experiment=2">449012</offset>\n+      <offset idRef="sample=1 period=1 cycle=1223 experiment=2">454745</offset>\n+      <offset idRef="sample=1 period=1 cycle=1223 experiment=3">460131</offset>\n+      <offset idRef="sample=1 period=1 cycle=1224 experiment=2">464728</offset>\n+      <offset idRef="sample=1 period=1 cycle=1225 experiment=2">470652</offset>\n+      <offset idRef="sample=1 period=1 cycle=1228 experiment=2">476088</offset>\n+      <offset idRef="sample=1 period=1 cycle=1228 experiment=3">481428</offset>\n+      <offset idRef="sample=1 period=1 cycle=1229 experiment=2">486412</offset>\n+      <offset idRef="sample=1 period=1 cycle=1229 experiment=3">491845</offset>\n+      <offset idRef="sample=1 period=1 cycle=1229 experiment=4">497057</offset>\n+      <offset idRef="sample=1 period=1 cycle=1229 experiment=5">501846</offset>\n+      <offset idRef="sample=1 period=1 cycle=1230 experiment=2">506593</offset>\n+      <offset idRef="sample=1 period=1 cycle=1230 experiment=3">511174</offset>\n+      <offset idRef="sample=1 period=1 cycle=1231 experiment=2">516081</offset>\n+      <offset idRef="sample=1 period=1 cycle=1234 experiment=2">521300</offset>\n+      <offset idRef="sample=1 period=1 cycle=1235 experiment=2">526428</offset>\n+      <offset idRef="sample=1 period=1 cycle=1236 experiment=2">532118</offset>\n+      <offset idRef="sample=1 period=1 cycle=1236 experiment=3">537438</offset>\n+      <offset idRef="sample=1 period=1 cycle=1238 experiment=2">542518</offset>\n+      <offset idRef="sample=1 period=1 cycle=1239 experiment=2">547274</offset>\n+      <offset idRef="sample=1 period=1 cycle=1281 experiment=2">551813</offset>\n+      <offset idRef="sample=1 period=1 cycle=1283 experiment=2">556796</offset>\n+      <offset idRef="sample=1 period=1 cycle=1428 experiment=2">562274</offset>\n+      <offset idRef="sample=1 period=1 cycle=1580 experiment=2">566840</offset>\n+      <offset idRef="sample=1 period=1 cycle=1583 experiment=2">571692</offset>\n+      <offset idRef="sample=1 period=1 cycle=1627 experiment=2">577069</offset>\n+    </index>\n+    <index name="chromatogram">\n+      <offset idRef="TIC">581634</offset>\n+    </index>\n+  </indexList>\n+  <indexListOffset>634114</indexListOffset>\n+  <fileChecksum>7fac4bf3be88419c71e9806717db788f44a10a68</fileChecksum>\n+</indexedmzML>\n'
b
diff -r e39c5664b04a -r d1f7b2395bc2 test-data/input/cow.protein.PRG2012-subset.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input/cow.protein.PRG2012-subset.fasta Thu Dec 15 17:20:57 2016 -0500
[
b'@@ -0,0 +1,144 @@\n+>gi|528903801|ref|XP_871686.4| PREDICTED: cationic trypsin isoformX1 [Bos taurus]\n+MKTFIFLALLGAAVAFPVDDDDKIVGGYTCGANTVPYQVSLNSGYHFCGGSLINSQWVVSAAHCYKSGIQVRLGEDNINVVEGNEQFISASKSIVHPSYNSNTLNNDIMLIKLKSAASLNSRVASISLPTSCASAGTQCLISGWGNTKSSGTSYPDVLKCLKAPILSDSSCKSAYPGQITSNMFCAGYLEGGKDSCQGDSGGPVVCSGKLQGIVSWGSGCAQKNKPGVYTKVCNYVSWIKQTIASN\n+>gi|528908802|ref|XP_005199340.1| PREDICTED: zinc finger protein 169 isoform X1 [Bos taurus]\n+MRRVFSRKSTHQTKNMAPGLLTTRDEALMAFRDVAVAFTQKEWKLLSPAQRTLYRDVMLENYSHMVSLGIAFPKPKLIIQLEQGDEPWREESECLLDLCAAEGRKEFQPCLSCPVTFSSPQILHHYMLCGHALQIFPGSSAESHFLLDAPSCLNEKAKDGEREGSGTVFGRLQLSGTSRAFFSSSQGQPVDQGGSSSGRIDQGMISDEADAVLTETNISESGAVICENYRLGFSRKSSLFSLQKHHVCPECGRNFCQKSDLVKHQRTHSGEKPFSCRECGRGFGRRSSLTVHQRKHSGEKPYVCRECGRHFRYTSSLTNHKRIHSGERPFVCQQCGRGFRQKIALILHQRTHLEEKPFVCPECGRGFCQKASLLQHRSSHSGERPFVCLECGRGFRQQSLLLSHQVTHSGEKPYVCAECGHSFRQKVTLIRHQRTHTGEKPYLCSECGRGFSQKVSLMGHQRTHTGEKPYVCSECGRGFGQKVTLIRHQRTHTGEKPFLCPECGRTFGFKSLLTRHKRIHSGEEADVYRVCEQRLGLKIQLTSDQRTHSGEKPCVCDECGRGFGFKSALIRHQRTHSGEKPYVCRDCGRGFSQKSHLHRHRKTKSGHHLLPQELFS\n+>gi|528908804|ref|XP_005199341.1| PREDICTED: zinc finger protein 169 isoform X2 [Bos taurus]\n+MRRVFSRKSTHQTKNMAPGLLTTRDEALMAFRDVAVAFTQKEWKLLSPAQRTLYRDVMLENYSHMVSLGIAFPKPKLIIQLEQGDEPWREESECLLDLCAAEGRKEFQPCLSCPVTFSSPQILHHYMLCGHALQIFPGSSAESHFLLDAPSCLNEKAKDGEREGSGTVFGRLQLSGTSRAFFSSSQGQPVDQGGSSSGRIDQGMISDEADAVLTETNISESGAVICENYRLGFSRKSSLFSLQKHHVCPECGRNFCQKSDLVKHQRTHSGEKPFSCRECGRGFGRRSSLTVHQRKHSGEKPYVCRECGRHFRYTSSLTNHKRIHSGERPFVCQQCGRGFRQKIALILHQRTHLEEKPFVCPECGRGFCQKASLLQHRSSHSGERPFVCLECGRGFRQQSLLLSHQVTHSGEKPYVCAECGHSFRQKVTLIRHQRTHTGEKPYLCSECGRGFSQKVSLMGHQRTHTGEKPYVCSECGRGFGQKVTLIRHQRTHTGEKPFLCPECGRTFGFKSLLTRHKRIHSGEEADVYRVCEQRLGLKIQLTSDQRTHSGEKPCVCDECGRGFGFKSALIRHQRTHSGEKPYVCRDCGRGFSQKSHLHRHRKTKSGHHLLPQELFS\n+>gi|528908806|ref|XP_005199342.1| PREDICTED: zinc finger protein 169 isoform X3 [Bos taurus]\n+MRRVFSRKSTHQTKNMAPGLLTTRDEALMAFRDVAVAFTQKEWKLLSPAQRTLYRDVMLENYSHMVSLGIAFPKPKLIIQLEQGDEPWREESECLLDLCAAEGRKEFQPCLSCPVTFSSPQILHHYMLCGHALQIFPGSSAESHFLLDAPSCLNEKAKDGEREGSGTVFGRLQLSGTSRAFFSSSQGQPVDQGGSSSGRIDQGMISDEADAVLTETNISESGAVICENYRLGFSRKSSLFSLQKHHVCPECGRNFCQKSDLVKHQRTHSGEKPFSCRECGRGFGRRSSLTVHQRKHSGEKPYVCRECGRHFRYTSSLTNHKRIHSGERPFVCQQCGRGFRQKIALILHQRTHLEEKPFVCPECGRGFCQKASLLQHRSSHSGERPFVCLECGRGFRQQSLLLSHQVTHSGEKPYVCAECGHSFRQKVTLIRHQRTHTGEKPYLCSECGRGFSQKVSLMGHQRTHTGEKPYVCSECGRGFGQKVTLIRHQRTHTGEKPFLCPECGRTFGFKSLLTRHKRIHSGEEADVYRVCEQRLGLKIQLTSDQRTHSGEKPCVCDECGRGFGFKSALIRHQRTHSGEKPYVCRDCGRGFSQKSHLHRHRKTKSGHHLLPQELFS\n+>gi|528908808|ref|XP_005199343.1| PREDICTED: zinc finger protein 169 isoform X4 [Bos taurus]\n+MRRVFSRKSTHQTKNMAPGLLTTRDEALMAFRDVAVAFTQKEWKLLSPAQRTLYRDVMLENYSHMVSLGIAFPKPKLIIQLEQGDEPWREESECLLDLCAEGRKEFQPCLSCPVTFSSPQILHHYMLCGHALQIFPGSSAESHFLLDAPSCLNEKAKDGEREGSGTVFGRLQLSGTSRAFFSSSQGQPVDQGGSSSGRIDQGMISDEADAVLTETNISESGAVICENYRLGFSRKSSLFSLQKHHVCPECGRNFCQKSDLVKHQRTHSGEKPFSCRECGRGFGRRSSLTVHQRKHSGEKPYVCRECGRHFRYTSSLTNHKRIHSGERPFVCQQCGRGFRQKIALILHQRTHLEEKPFVCPECGRGFCQKASLLQHRSSHSGERPFVCLECGRGFRQQSLLLSHQVTHSGEKPYVCAECGHSFRQKVTLIRHQRTHTGEKPYLCSECGRGFSQKVSLMGHQRTHTGEKPYVCSECGRGFGQKVTLIRHQRTHTGEKPFLCPECGRTFGFKSLLTRHKRIHSGEEADVYRVCEQRLGLKIQLTSDQRTHSGEKPCVCDECGRGFGFKSALIRHQRTHSGEKPYVCRDCGRGFSQKSHLHRHRKTKSGHHLLPQELFS\n+>gi|528908810|ref|XP_005199344.1| PREDICTED: zinc finger protein 169 isoform X5 [Bos taurus]\n+MSVLDHALMAFRDVAVAFTQKEWKLLSPAQRTLYRDVMLENYSHMVSLGIAFPKPKLIIQLEQGDEPWREESECLLDLCAAEGRKEFQPCLSCPVTFSSPQILHHYMLCGHALQIFPGSSAESHFLLDAPSCLNEKAKDGEREGSGTVFGRLQLSGTSRAFFSSSQGQPVDQGGSSSGRIDQGMISDEADAVLTETNISESGAVICENYRLGFSRKSSLFSLQKHHVCPECGRNFCQKSDLVKHQRTHSGEKPFSCRECGRGFGRRSSLTVHQRKHSGEKPYVCRECGRHFRYTSSLTNHKRIHSGERPFVCQQCGRGFRQKIALILHQRTHLEEKPFVCPECGRGFCQKASLLQHRSSHSGERPFVCLECGRGFRQQSLLLSHQVTHSGEKPYVCAECGHSFRQKVTLIRHQRTHTGEKPYLCSECGRGFSQKVSLMGHQRTHTGEKPYVCSECGRGFGQKVTLIRHQRTHTGEKPFLCPECGRTFGFKSLLTRHKRIHSGEEADVYRVCEQRLGLKIQLTSDQRTHSGEKPCVCDECGRGFGFKSALIRHQRTHSGEKPYVCRDCGRGFSQKSHLHRHRKTKSGHHLLPQELFS\n+>gi|528908812|ref|XP_609847.5| PREDICTED: zinc finger protein 169 isoform X6 [Bos taurus]\n+MAFRDVAVAFTQKEWKLLS'..b'ily 12 member 1 isoform X3 [Bos taurus]\n+MSLNNSSNVFLDSTPSNTNRFQVNVINESHESSAAMNDNADPPHYEETSFGDEGQNRFRISFRPGNQECYDNFLQTGETAKTDASFHAYDSHTNTYYLQTFGHNTVDAVPKIEYYRNTGSVSGPKVNRPSLLDIHEQLAKNVSVAPGSADVVANGEGTPGDEQAENKGEDQAGAVKFGWVKGVLVRCMLNIWGVMLFIRLSWIVGEAGIGLGVIIIGLSVVVTTLTGISMSAICTNGVVRGGGAYYLISRSLGPEFGGSIGLIFAFANAVAVAMYVVGFAETVVDLLKETDSMMVDPTNDIRIIGSITVVILLGISVAGMEWEAKAQVILLIILLIAIANFFIGTVIPSNNEKRARGFFNYQASIFAENFGPSFTKGEGFFSVFAIFFPAATGILAGANISGDLEDPQDAIPKGTMLAIFITTVAYLGVAICVGACVVRDATGSVNDTIISGMNCNGSAACGLGYDFSRCRHEPCQYGLMNNFQVMSMVSGFGPLITAGIFSATLSSALASLVSAPKVFQALCKDNIYKALQFFAKGYGKNNEPLRGYFLTFVIAMAFILIAELNTIAPIISNFFLASYALINFSCFHASYAKSPGWRPAYGIYNMWVSLFGAVLCCAVMFVINWWAAVITYVIEFFLYIYVTYKKPDVNWGSSTQALSYMSALDNALELTTVEDHVKNFRPQCIVLTGGPMTRPALLDITHAFTKNSGLCICCEVFVGPRKLCVKEMNSGMAKKQAWLIKNKIKAFYAAVAADCFRDGVRSLLQASGLGRMKPNTLVIGYKKNWRKAPLTEIENYVGIIHDAFDFEIGVVIVRISQGFDISQVLQVREELEKLEQERLALEATIKDNESEEGNGGIRGLFKKAGKLNITKPTPKKDSSINTIQSMHVGEFNQKLVEASTQFKKKQGKGTIDVWWLFDDGGLILLIPYILTLRKKWKDCKLRIYVGGKINRIEEEKIAMASLLSKFRIKFADIHVIGDINVKPNKESWKVFEEMIEPYCLHESCKDLTTAEKLKRETPWKITDAELEAVKEKSYRQVRLNELLQEHSRAANLIVLSLPVARKGSISDWLYMAWLEILTKNLPPVLLVRGNHKNVLTFYS\n+>gi|297479727|ref|XP_002690982.1| PREDICTED: solute carrier family 12 member 1 isoform X1 [Bos taurus]\n+MSLNNSSNVFLDSTPSNTNRFQVNVINESHESSAAMNDNADPPHYEETSFGDEGQNRFRISFRPGNQECYDNFLQTGETAKTDASFHAYDSHTNTYYLQTFGHNTVDAVPKIEYYRNTGSVSGPKVNRPSLLDIHEQLAKNVSVAPGSADVVANGEGTPGDEQAENKGEDQAGAVKFGWVKGVLVRCMLNIWGVMLFIRLSWIVGEAGIGLGVLIILLSTMVTSITGLSTSAIATNGFVRGGGAYYLISRSLGPEFGGSIGLIFAFANAVAVAMYVVGFAETVVDLLKETDSMMVDPTNDIRIIGSITVVILLGISVAGMEWEAKAQVILLIILLIAIANFFIGTVIPSNNEKRARGFFNYQASIFAENFGPSFTKGEGFFSVFAIFFPAATGILAGANISGDLEDPQDAIPKGTMLAIFITTVAYLGVAICVGACVVRDATGSVNDTIISGMNCNGSAACGLGYDFSRCRHEPCQYGLMNNFQVMSMVSGFGPLITAGIFSATLSSALASLVSAPKVFQALCKDNIYKALQFFAKGYGKNNEPLRGYFLTFVIAMAFILIAELNTIAPIISNFFLASYALINFSCFHASYAKSPGWRPAYGIYNMWVSLFGAVLCCAVMFVINWWAAVITYVIEFFLYIYVTYKKPDVNWGSSTQALSYMSALDNALELTTVEDHVKNFRPQCIVLTGGPMTRPALLDITHAFTKNSGLCICCEVFVGPRKLCVKEMNSGMAKKQAWLIKNKIKAFYAAVAADCFRDGVRSLLQASGLGRMKPNTLVIGYKKNWRKAPLTEIENYVGIIHDAFDFEIGVVIVRISQGFDISQVLQVREELEKLEQERLALEATIKDNESEEGNGGIRGLFKKAGKLNITKPTPKKDSSINTIQSMHVGEFNQKLVEASTQFKKKQGKGTIDVWWLFDDGGLILLIPYILTLRKKWKDCKLRIYVGGKINRIEEEKIAMASLLSKFRIKFADIHVIGDINVKPNKESWKVFEEMIEPYCLHESCKDLTTAEKLKRETPWKITDAELEAVKEKSYRQVRLNELLQEHSRAANLIVLSLPVARKGSISDWLYMAWLEILTKNLPPVLLVRGNHKNVLTFYS\n+>gi|529012038|ref|XP_005226889.1| PREDICTED: pregnancy-associated glycoprotein 2 isoform X1 [Bos taurus]\n+MKWLVLLGLVALSECIVILPLKKMKTLRETLREKNLLNNFLEEQAYRLSKNDSKITIHPLRNYLDTAYVGNITIGTPPQEFRVVFDTGSANLWVPCITCTSPACYTHKTFNPQNSSSFREVGSPITIFYGSGIIQGFLGSDTVRIGNLVSPEQSFGLSLEEYGFDSLPFDGILGLAFPAMGIEDTIPIFDNLWSHGAFSEPVFAFYLNTISMNGTVTACSCGCEALLDTGTSMIYGPTKLVTNIHKLMNARLENSEYVVSCDAVKTLPPVIFNINGIDYPLRPQAYIIKIQNSCRSVFQGGTENSSLNTWILGDIFLRQYFSVFDRKNRRIGLAPAV\n+>gi|156523214|ref|NP_001096021.1| protein KHNYN [Bos taurus]\n+MPTWGAGSPSPDRFAVSAEAEDKVREQLPRVERIFRVGMSVLPKDCPENPHIWLQLEGPKENASRAKEYLKGLCSPELQNEIHYPPKLHCIFLGAQGFFLDCLTWSTSAHLVPGVPGSLMVSGLTEAFVMVQSRVEELVERLSWDFRLGPSPGASQCAGVLREFSALLQARGDAHTEALLQLPQAVQEELLSLVQEASRGQGPQAFPSWGWGGPGPLGAQQQGVRTPLGDGGVSLDTGPTGWQESRGERHAVEKEGTKQGGAREMDLGWKEWPGEEAWERQVAFRPQSGGGEASGGGEAGQAGPPKGKALGKEGVPQERGRLGVQGQPPSTQGPYQRASQLRGASLLQRLHNGEASPPRVPSPPPAPEPPWHCGDRGDRGDRADKQLVVARGRGSPWKRGTRGGNLVTGTQRFQEALQDPFTLCLANVPGKPDLRHIVIDGSNVAMVHGLQHYFSSRGIAIAVQYFWDRGHRDITVFVPQWRFSKDSKVREGHFLHKLYSLSLLSLTPSRVLDGKRISSYDDRFMVKLAEETDGIIVSNDQFRDLAEESEKWMAIIRERLLPFTFVGNLFMVPDDPLGRNGPTLDEFLKKPVRAPGSSKPQQSARGVTEHSNQQQGRKEEEKGNGGIRKTRETERLRRQLLEVFWGQDHKVDFILQREPYCRDINQLSEALLSLNF\n+>gi|28849951|ref|NP_788787.1| pregnancy-associated glycoprotein 2 precursor [Bos taurus]\n+MKWLVLLGLVALSECIVILPLKKMKTLRETLREKNLLNNFLEEQAYRLSKNDSKITIHPLRNYLDTAYVGNITIGTPPQEFRVVFDTGSANLWVPCITCTSPACYTHKTFNPQNSSSFREVGSPITIFYGSGIIQGFLGSDTVRIGNLVSPEQSFGLSLEEYGFDSLPFDGILGLAFPAMGIEDTIPIFDNLWSHGAFSEPVFAFYLNTNKPEGSVVMFGGVDHRYYKGELNWIPVSQTSHWQISMNNISMNGTVTACSCGCEALLDTGTSMIYGPTKLVTNIHKLMNARLENSEYVVSCDAVKTLPPVIFNINGIDYPLRPQAYIIKIQNSCRSVFQGGTENSSLNTWILGDIFLRQYFSVFDRKNRRIGLAPAV\n'
b
diff -r e39c5664b04a -r d1f7b2395bc2 tools/idpqonvert.xml
--- a/tools/idpqonvert.xml Tue Sep 30 15:31:54 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,189 +0,0 @@\n-<?xml version="1.0"?>\n-<tool id="idpqonvert" version="0.1.0" name="idpQonvert" >\n-  <requirements>\n-    <requirement type="package" version="333">binaries_for_package_idpqonvert</requirement>\n-  </requirements>\n-\n-  <description>\n-    Prepare identification results for IDPicker.\n-  </description>\n-  <command>\n-    #set $db_name = $input_database.display_name.replace(".fasta", "") + ".fasta"\n-    #set $input_name = $input.display_name\n-    #set $output_name = $input_name.split(".")[0] + ".idpDB"\n-    ln -s \'$input\' \'${input_name}\';\n-    ln -s \'$input_database\' \'${db_name}\';\n-    idpQonvert -DecoyPrefix \'${decoy_prefix}\' \\\n-    -WriteQonversionDetails true \\\n-    -ProteinDatabase \'${db_name}\' \\\n-    -MaxFDR $max_fdr \\\n-    -EmbedSpectrumScanTimes false \\\n-    -EmbedSpectrumSources false \\\n-    #if $scoring.override_scoring\n-    #set $score_info = []\n-    #for $engine in ["myrimatch:mvh", "xcorr", "sequest:xcorr", "sequest:deltacn", "mascot:score", "x!tandem:expect", "x!tandem:hyperscore", "ms-gf:spacevalue"]\n-    #set $weight = $getVar("scoring." + $engine.replace(":", "_").replace("!","") + "_weight.value")\n-    #set $type = $getVar("scoring." + $engine.replace(":", "_").replace("!","") + "_type.value")\n-    #set $score_info = $score_info + [str($weight) + " " + $type + " " + $engine]\n-    #continue\n-    #end for\n-    #set $score_str = "; ".join($score_info)\n-    -ScoreInfo \'$score_str\' \\\n-    #end if\n-    #if $advanced_options.use_advanced_options\n-    -MaxImportFDR $advanced_options.max_import_fdr \\\n-    -MaxResultRank $advanced_options.max_result_rank \\\n-    -RerankMatches $advanced_options.rerank_matches \\\n-    -Gamma $advanced_options.gamma \\\n-    -Nu $advanced_options.nu \\\n-    -PolynomialDegree $advanced_options.polynomial_degree \\\n-    -TruePositiveThreshold $advanced_options.true_positive_threshold \\\n-    -MaxTrainingRank $advanced_options.max_training_rank \\\n-    -SVMType $advanced_options.svm_type \\\n-    -Kernel $advanced_options.kernel \\\n-    -ChargeStateHandling $advanced_options.charge_state_handling \\\n-    -QonverterMethod $advanced_options.qonverter_method \\\n-    -MinPartitionSize $advanced_options.min_partition_size \\\n-    #end if\n-    \'${input_name}\';\n-    mv \'$output_name\' output\n-  </command>\n-  <stdio>\n-    <exit_code range="1:" level="fatal" description="Job Failed" />\n-    <regex match="^Could not find the default configuration file.*$"\n-      source="both"\n-      level="warning" />\n-  </stdio>\n-  <inputs>\n-    <conditional name="type">\n-      <param name="input_type" type="select" label="Input Type">\n-        <option value="mzid">mzIdentML</option>\n-        <option value="pepXML">pepXML</option>\n-      </param>\n-      <when value="mzid">\n-        <param format="mzid" name="input" type="data" label="Input mzIdentML"/>\n-      </when>\n-      <when value="pepXML">\n-        <param format="pepXML" name="input" type="data" label="Input pepXML"/>\n-      </when>\n-    </conditional>\n-    <param format="fasta" name="input_database" type="data" label="Protein Database"/>\n-    <param name="decoy_prefix" type="text" label="Decoy Prefix"/>\n-    <param name="max_fdr" type="float" label="Max FDR" value="0.05" />\n-    <conditional name="advanced_options">\n-      <param name="use_advanced_options" type="boolean" truevalue="true" falsevalue="false" label="Set Advanced Options" />\n-      <when value="false" />\n-      <when value="true">\n-        <param name="max_import_fdr" type="float" label="Max Import FDR" value="0.25" />\n-        <param name="max_result_rank" type="integer" label="Max Result Rank" value="3" />\n-        <param name="max_training_rank" type="integer" label="Max Training Rank" value="1" />\n-        <param name="rerank_matches" type="boolean" label="Rerank Matches" checked="false" truevalue="true" falsevalue="false" />\n-        <param name="gamma" type="float" label="Gamma" value="5" />\n-        <param name="nu" type="float" label="Nu" value="-0.5" />\n-        <param name="true_positive_t'..b'g" type="select">\n-          <option value="Ignore" selected="true">Ignore</option>\n-          <option value="Feature">Feature</option>\n-        </param>\n-        <param name="qonverter_method" label="Qonverter Method" type="select">\n-          <option value="MonteCarlo" selected="true">Monte Carlo</option>\n-          <option value="SingleSVM">SVM (single)</option>\n-          <option value="PartitionSVM">SVM (parition)</option>\n-          <option value="StaticWeighted">Static Weighted</option>\n-        </param>\n-      </when>\n-    </conditional>\n-    <conditional name="scoring">\n-      <param name="override_scoring" type="boolean" truevalue="true" falsevalue="false" label="Modify Search Application Weights" />\n-      <when value="false" />\n-      <when value="true">\n-        <param name="myrimatch_mvh_weight" label="Myrimatch (mvh) Weight" type="float" value="1" />\n-        <param name="myrimatch_mvh_type" label="Myrimatch (mvh) Normalization" type="select">\n-          <option value="off" selected="true">None</option>\n-          <option value="quantile">Quantile</option>\n-          <option value="linear">Linear</option>\n-        </param>\n-        <param name="xcorr_weight" label="XCorr Weight" type="float" value="1" />\n-        <param name="xcorr_type" label="XCorr Normalization" type="select">\n-          <option value="off" selected="true">None</option>\n-          <option value="quantile">Quantile</option>\n-          <option value="linear">Linear</option>\n-        </param>\n-        <param name="sequest_xcorr_weight" label="Sequest (xcorr) Weight" type="float" value="1" />\n-        <param name="sequest_xcorr_type" label="Sequest (xcorr) Normalization" type="select">\n-          <option value="off" selected="true">None</option>\n-          <option value="quantile">Quantile</option>\n-          <option value="linear">Linear</option>\n-        </param>\n-        <param name="sequest_deltacn_weight" label="Sequest (deltacn) Weight" type="float" value="1" />\n-        <param name="sequest_deltacn_type" label="Sequest (deltacn) Normalization" type="select">\n-          <option value="off" selected="true">None</option>\n-          <option value="quantile">Quantile</option>\n-          <option value="linear">Linear</option>\n-        </param>\n-        <param name="mascot_score_weight" label="Mascot Score Weight" type="float" value="1" />\n-        <param name="mascot_score_type" label="Mascot Score Normalization" type="select">\n-          <option value="off" selected="true">None</option>\n-          <option value="quantile">Quantile</option>\n-          <option value="linear">Linear</option>\n-        </param>\n-        <param name="xtandem_expect_weight" label="X! Tandem (Expectation) Weight" type="float" value="-1" />\n-        <param name="xtandem_expect_type" label="X! Tandem (Expectation) Normalization" type="select">\n-          <option value="off" selected="true">None</option>\n-          <option value="quantile">Quantile</option>\n-          <option value="linear">Linear</option>\n-        </param>\n-        <param name="xtandem_hyperscore_weight" label="X! Tandem (hyperscore) Weight" type="float" value="1" />\n-        <param name="xtandem_hyperscore_type" label="X! Tandem (hyperscore)Normalization" type="select">\n-          <option value="off" selected="true">None</option>\n-          <option value="quantile">Quantile</option>\n-          <option value="linear">Linear</option>\n-        </param>\n-        <param name="ms-gf_spacevalue_weight" label="MS-GF (spacevalue) Weight" type="float" value="-1" />\n-        <param name="ms-gf_spacevalue_type" label="MS-GF (spacevalue) Normalization" type="select">\n-          <option value="off" selected="true">None</option>\n-          <option value="quantile">Quantile</option>\n-          <option value="linear">Linear</option>\n-        </param>\n-      </when>\n-    </conditional>\n-  </inputs>\n-  <outputs>\n-    <data format="idpdb" name="output" from_work_dir="output" />\n-  </outputs>\n-  <help>\n-  </help>\n-</tool>\n'
b
diff -r e39c5664b04a -r d1f7b2395bc2 tools/repository_dependencies.xml
--- a/tools/repository_dependencies.xml Tue Sep 30 15:31:54 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-<?xml version="1.0"?>
-<repositories>
-  <repository changeset_revision="f66f8ca7b7b9" name="proteomics_datatypes" owner="iracooke" toolshed="https://toolshed.g2.bx.psu.edu" />
-</repositories>
b
diff -r e39c5664b04a -r d1f7b2395bc2 tools/tool_dependencies.xml
--- a/tools/tool_dependencies.xml Tue Sep 30 15:31:54 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="binaries_for_package_idpqonvert" version="333">
-    <repository changeset_revision="603f4cba7e8f" name="package_idpqonvert" owner="galaxyp" toolshed="https://toolshed.g2.bx.psu.edu" />
-  </package>
-</tool_dependency>