changeset 0:34c5c95740a1 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
author galaxyp
date Mon, 22 May 2017 05:08:23 -0400
parents
children 8a30d6e5b97d
files README.rst delta_pi_calc.xml peptide_pi_annotator.py pi_database_splitter.py pi_db_split.xml test-data/decoy_splitdb_fr1.fasta test-data/decoy_splitdb_fr2.fasta test-data/decoy_splitdb_fr3.fasta test-data/peptable.txt test-data/peptable_deltapi.txt test-data/peptable_missed_ox.txt test-data/predicted_peptides.txt test-data/predicted_peptides_to_split.txt test-data/target_splitdb_fr1.fasta test-data/target_splitdb_fr2.fasta test-data/target_splitdb_fr3.fasta
diffstat 16 files changed, 600 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,41 @@
+GalaxyP - Percolator
+=======================
+
+- Home: <https://github.com/galaxyproteomics/tools-galaxyp/>
+- Galaxy Tool Shed: <http://toolshed.g2.bx.psu.edu/view/galaxyp/pi_db_tools
+- Tool ID: `pi_db_split`, `calc_delta_pi`
+
+
+Description
+-----------
+MS sample prefractionation is oftentimes done using isoelectric focusing, as each
+peptide has a specific isoelectric point expressed in pI. The peptides are then
+spread across a strip containing a pI gradient of a certain number of fractions. 
+
+Here are tools that can calculate delta-pI between experimental and predicted 
+or calculated pIs, as well as split a tryptic peptide database in fractions 
+that match the experimental pI gradient.
+
+
+GalaxyP Community
+-----------------
+
+Current governing community policies for GalaxyP_ and other information can be found at:
+
+<https://github.com/galaxyproteomics>
+
+.. _GalaxyP: https://github.com/galaxyproteomics/
+
+
+Contributing
+------------
+
+Contributions to this repository are reviewed through pull requests. If you would like your work acknowledged, please also add yourself to the Authors section. If your pull request is accepted, you will also be acknowledged in <https://github.com/galaxyproteomics/tools-galaxyp/>
+
+
+Authors
+-------
+
+Authors and contributors:
+
+* Jorrit Boekel
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delta_pi_calc.xml	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,146 @@
+<tool id="calc_delta_pi" name="Add delta pI" version="1.0">
+    <requirements>
+        <requirement type="package" version="3.6">python</requirement>
+    </requirements>
+    <description>to peptide table</description>
+    <command>
+	    python '$__tool_directory__/peptide_pi_annotator.py' -i '$trainingpi' -p '$peptable'
+            --stripcol $stripcol --pepcol $pepcol --fraccol $fraccol --out '$output'
+	    
+	    --strippatterns
+	    #for $strip in $strips
+	        '$strip.pattern'
+	    #end for
+
+	    --intercepts 
+	    #for $strip in $strips
+	        $strip.intercept 
+	    #end for
+	    
+	    --widths
+	    #for $strip in $strips
+		$strip.fr_width
+	    #end for
+	    
+	    #if len($ignoremods) > 0
+	        --ignoremods
+	        #for $mod in $ignoremods
+		    '$mod.regex'
+		#end for
+	    #end if
+    </command>
+    
+    <inputs>
+      <param name="trainingpi" type="data" format="tabular" label="Known peptides with annotation of pI" help="First column is sequence, second pI" />
+      <param name="peptable" type="data" format="tabular" label="Peptide table to determine pI shift from" />
+      <repeat name="ignoremods" title="Peptide modification weights to ignore">
+          <param name="regex" label="Regex to strip modification from peptide table before looking up sequence in predicted pI" type="text" help="Enter e.g. 15.994915 for oxidation, * to ignore all modifications, or a proper regex (however brackets are stripped by galaxy). See help below.">
+              <sanitizer>
+                  <valid>
+                      <remove preset="string.whitespace" /> 
+		  </valid>
+	      </sanitizer>
+          </param>
+      </repeat>
+      <param name="pepcol" type="integer" value="" label="Peptide sequence column in peptide table" />
+      <param name="fraccol" type="integer" value="" label="Fraction number column in peptide table" />
+      <param name="stripcol" type="integer" value="" label="Strip pattern column in peptide table" help="E.g. column with filename to derive strip name from"/>
+      <repeat name="strips" title="pI separation strip data">
+	      <param name="pattern" type="text" label="Strip regex detection pattern" help="Regex (see help below) that identifies the pI strip from the column in the above field.">
+                  <sanitizer>
+                      <valid>
+                          <remove preset="string.whitespace" />
+                      </valid>
+	          </sanitizer>
+              </param>
+	      <param name="intercept" type="text" label="pI intercept of strip" />
+	      <param name="fr_width" type="text" label="fraction widths" />
+     </repeat>
+    </inputs>
+    
+    <outputs>
+        <data format="tabular" name="output"/>
+    </outputs>
+    <tests>
+        <test> 
+            <param name="trainingpi" value="predicted_peptides.txt" />
+            <param name="peptable" value="peptable.txt" />
+            <repeat name="ignoremods">
+                <param name="regex" value="*" />
+            </repeat>
+            <param name="pepcol" value="1" />
+            <param name="fraccol" value="4" />
+            <param name="stripcol" value="2" />
+            <repeat name="strips">
+                <param name="pattern" value="strip1" />
+                <param name="intercept" value="8.21" />
+                <param name="fr_width" value="0.013" />
+            </repeat>
+            <repeat name="strips">
+                <param name="pattern" value="strip2" />
+                <param name="intercept" value="6.11" />
+                <param name="fr_width" value="0.04" />
+            </repeat>
+            <output name="output" value="peptable_deltapi.txt" />
+        </test>
+        <test>
+            <param name="trainingpi" value="predicted_peptides.txt" />
+            <param name="peptable" value="peptable.txt" />
+            <repeat name="ignoremods">
+                <param name="regex" value="15.994915" />
+            </repeat>
+            <param name="pepcol" value="1" />
+            <param name="fraccol" value="4" />
+            <param name="stripcol" value="2" />
+            <repeat name="strips">
+               <param name="pattern" value="strip1" />
+               <param name="intercept" value="8.21" />
+               <param name="fr_width" value="0.013" />
+            </repeat>
+            <repeat name="strips">
+               <param name="pattern" value="strip2" />
+               <param name="intercept" value="6.11" />
+               <param name="fr_width" value="0.04" />
+            </repeat>
+            <output name="output" value="peptable_deltapi.txt" />
+        </test>
+        <test>
+            <param name="trainingpi" value="predicted_peptides.txt" />
+            <param name="peptable" value="peptable.txt" />
+            <param name="pepcol" value="1" />
+            <param name="fraccol" value="4" />
+            <param name="stripcol" value="2" />
+            <repeat name="strips">
+                <param name="pattern" value="strip1" />
+                <param name="intercept" value="8.21" />
+                <param name="fr_width" value="0.013" />
+            </repeat>
+            <repeat name="strips">
+                <param name="pattern" value="strip2" />
+                <param name="intercept" value="6.11" />
+                <param name="fr_width" value="0.04" />
+            </repeat>
+            <output name="output" value="peptable_missed_ox.txt" />
+        </test>
+    </tests>
+
+    <help>
+	    In case you have no pI calculation method but a large table with 
+	    peptides and their predicted pIs available. This tool adds a column 
+	    with delta-pI values to a peptide or PSM table for each peptide it 
+	    can find in the predicted collection. Needs a tab-separated file 
+	    with peptide-sequences and their predicted pI, and a PSM/peptide table 
+	    with at least peptide sequences.
+
+	    Regexes, or regular expressions are are sequences of characters that 
+	    are used to find a certain pattern in a string of text. For example 
+	    the regex "peptide" will find the word "peptide" in the text
+	    "thisisa peptide in my sample". More advanced regexes can allow for
+	    finding for example specific but variable pieces of text, e.g.
+	    "[a-c].*" will match a string "acbcba" in "yxyzyxacbcbayxzyxyzxy".
+	    Much more elaborate regexes exist. Since this is a python script,
+	    python regexes are described here:
+	    https://docs.python.org/3/library/re.html
+    </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/peptide_pi_annotator.py	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+
+import re
+import sys
+import argparse
+
+
+def main():
+    if sys.argv[1:] == []:
+        sys.argv.append('-h')
+    args = parse_commandline()
+    strips = {}
+    for i, strip in enumerate(args.pipatterns):
+        strips[strip] = {'intercept': args.intercepts[i],
+                         'fr_width': args.fr_width[i]}
+    with open(args.outpeptable, 'w') as fp:
+        for outline in annotate_peptable(args.pipeps, args.peptable,
+                                         args.pepcol, args.frac_col,
+                                         args.stripcol, strips,
+                                         args.ignoremods):
+            fp.write('\t'.join([str(x) for x in outline]))
+            fp.write('\n')
+
+
+def get_first_matching_pattern(patterns, string):
+    for pattern in patterns:
+        if re.search(pattern, string):
+            return pattern
+    return False
+
+
+def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol,
+                      strips, ignoremods):
+    if frac_col > 0:
+        frac_col -= 1
+    predicted_peps = {}
+    with open(predicted_peps_fn) as fp:
+        for line in fp:
+            line = line.strip('\n').split('\t')
+            predicted_peps[line[0]] = line[1]
+    not_predicted_count, predicted_count = 0, 0
+    with open(peptable) as fp:
+        header = next(fp).strip('\n').split('\t')
+        yield header + ['Experimental pI', 'Predicted pI', 'Delta pI']
+        for line in fp:
+            line = line.strip('\n').split('\t')
+            strip = strips[get_first_matching_pattern(strips.keys(),
+                                                      line[stripcol - 1])]
+            exp_pi = (strip['fr_width'] * int(line[frac_col]) +
+                      strip['intercept'])
+
+            sequence = line[seqcol - 1]
+            for weight in ignoremods:
+                if weight == '*':
+                    regex = '[+-]\d*\.\d*'
+                else:
+                    regex = '[+-]{}'.format(weight)
+                sequence = re.sub(regex, '', sequence)
+            try:
+                pred_pi = float(predicted_peps[sequence])
+            except KeyError:
+                print('CANNOT PREDICT', sequence)
+                not_predicted_count += 1
+                pred_pi, delta_pi = 'NA', 'NA'
+            else:
+                delta_pi = exp_pi - pred_pi
+                predicted_count += 1
+            yield line + [exp_pi, pred_pi, delta_pi]
+    print('Number of peptides without pI prediction: {}\n'
+          'Number of peptides with predicion: {}\n'.format(not_predicted_count,
+                                                           predicted_count))
+
+
+def parse_commandline():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument('--out', dest='outpeptable', help='Output peptide '
+                        'table')
+    parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with '
+                        'peptides, FDR, fraction numbers. Used to calculate'
+                        'pI shift.')
+    parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file '
+                        'with peptide seq, pI value')
+    parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence '
+                        'column number in peptide table. First column is 1.',
+                        default=False, type=int)
+    parser.add_argument('--fraccol', dest='frac_col', help='Fraction number '
+                        'column number in peptide table. First column is 1.',
+                        type=int)
+    parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to '
+                        'identify modification weights to be ignored.',
+                        default=[], nargs='+', type=str)
+    parser.add_argument('--stripcol', dest='stripcol', help='Strip name '
+                        'column number in peptide table. Will be used to '
+                        'detect strips if multiple are present using pattern '
+                        'passed with --strippatterns. First column is nr. 1.',
+                        default=False, type=int)
+    parser.add_argument('--strippatterns', dest='pipatterns',
+                        help='Patterns to detect different pI ranges from e.g.'
+                        ' file name in peptide table', nargs='+')
+    parser.add_argument('--intercepts', dest='intercepts',
+                        help='pI Intercept of strips', nargs='+', type=float)
+    parser.add_argument('--widths', dest='fr_width', nargs='+',
+                        help='Strip fraction widths in pI', type=float)
+    return parser.parse_args(sys.argv[1:])
+
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pi_database_splitter.py	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,176 @@
+#!/usr/bin/env python
+import sys
+import argparse
+from numpy import median
+from contextlib import ExitStack
+
+
+def main():
+    if sys.argv[1:] == []:
+        sys.argv.append('-h')
+    args = parse_commandline()
+    locfun = {False: locatefraction,
+              True: reverse_locatefraction}[args.reverse]
+    # Column nrs should start from 0
+    # If negative, -1 is last item in list, etc
+    if args.fdrcol > 0:
+        args.fdrcol -= 1
+    if args.deltapicol > 0:
+        args.deltapicol -= 1
+    pishift = get_pishift(args.train_peptable, args.fdrcol, args.deltapicol,
+                          args.fdrcutoff, args.picutoff)
+    binarray = get_bin_array(args.fr_amount, args.fr_width, args.intercept,
+                             args.tolerance, pishift)
+    write_fractions(args.pipeps, args.fr_amount, args.prefix,
+                    binarray, locfun, args.minlen, args.maxlen)
+
+
+def locatefraction(pep_pi, bins):
+    index = []
+    for pibin in bins:
+        if pep_pi > pibin[2]:
+            continue
+        elif pep_pi >= pibin[1]:
+            index.append(pibin[0])
+        else:
+            return index
+    return index
+
+
+def reverse_locatefraction(pep_pi, bins):
+    index = []
+    for pibin in bins:
+        if pep_pi < pibin[1]:
+            continue
+        elif pep_pi < pibin[2]:
+            index.append(pibin[0])
+        else:
+            return index
+    return index
+
+
+def parse_commandline():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument('-p', dest='train_peptable', help='Peptide table with '
+                        'peptides, FDR, and fraction numbers. Used to '
+                        'calculate pI shift. Leave emtpy for no shift. '
+                        'Tab separated file.')
+    parser.add_argument('--deltacol', dest='deltapicol', help='Delta pI column'
+                        ' number in peptide table. First column is nr. 1. '
+                        'Negative number for counting from last col '
+                        '(-1 is last).', default=False, type=int)
+    parser.add_argument('--picutoff', dest='picutoff',
+                        help='delta pI value to filter experimental peptides'
+                        ' when calculating pi shift.', default=0.2, type=float)
+    parser.add_argument('--fdrcol', dest='fdrcol', help='FDR column number in '
+                        'peptide table. First column is nr. 1. Empty includes '
+                        'all peptides', default=False, type=int)
+    parser.add_argument('--fdrcutoff', dest='fdrcutoff',
+                        help='FDR cutoff value to filter experimental peptides'
+                        ' when calculating pi shift.', default=0, type=float)
+    parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file '
+                        'with accession, peptide seq, pI value')
+    parser.add_argument('--prefix', dest='prefix', default='pisep',
+                        help='Prefix for target/decoy output files')
+    parser.add_argument('--tolerance', dest='tolerance',
+                        help='Strip fraction tolerance pi tolerance represents'
+                        ' 2.5/97.5 percentile', type=float)
+    parser.add_argument('--amount', dest='fr_amount',
+                        help='Strip fraction amount', type=int)
+    parser.add_argument('--reverse', dest='reverse', help='Strip is reversed',
+                        action='store_const', const=True, default=False)
+    parser.add_argument('--intercept', dest='intercept',
+                        help='pI Intercept of strip', type=float)
+    parser.add_argument('--width', dest='fr_width',
+                        help='Strip fraction width in pI', type=float)
+    parser.add_argument('--minlen', dest='minlen', help='Minimal peptide length', 
+                        type=int)
+    parser.add_argument('--maxlen', dest='maxlen', help='Maximal peptide length',
+                        type=int, default=False)
+    return parser.parse_args(sys.argv[1:])
+
+
+def get_pishift(peptable, fdrcol, deltapicol, fdrcutoff, delta_pi_cutoff):
+    delta_pis = []
+    with open(peptable) as fp:
+        next(fp)  # skip header
+        for line in fp:
+            line = line.strip('\n').split('\t')
+            if fdrcol:
+                try:
+                    fdr = float(line[fdrcol])
+                except ValueError:
+                    continue
+                if fdr > fdrcutoff:
+                    continue
+            try:
+                delta_pi = float(line[deltapicol])
+            except ValueError:
+                continue
+            if delta_pi < delta_pi_cutoff:
+                delta_pis.append(delta_pi)
+    shift = median(delta_pis)
+    print('pI shift (median of delta pIs): {}'.format(shift))
+    return shift
+
+
+def get_bin_array(amount_fractions, fr_width, intercept, tolerance, pi_shift):
+    frnr = 1
+    bin_array = []
+    while frnr <= amount_fractions:
+        pi_center = fr_width * frnr + intercept
+        bin_left = pi_center - fr_width / 2 - tolerance - pi_shift
+        bin_right = pi_center + fr_width / 2 + tolerance - pi_shift
+        print('Bins in fraction', frnr, bin_left, bin_right)
+        bin_array.append((frnr, bin_left, bin_right))
+        frnr += 1
+    return bin_array
+
+
+def write_fractions(pi_peptides_fn, amount_fractions, out_prefix,
+                    bin_array, locate_function, minlen, maxlen):
+    amountpad = len(str(amount_fractions))
+    with ExitStack() as stack:
+        target_out_fp = {frnr: ([], stack.enter_context(
+            open('{p}_fr{i:0{pad}}.fasta'.format(p=out_prefix, i=frnr,
+                                                 pad=amountpad), 'w')))
+            for frnr in range(1, amount_fractions + 1)}
+        decoy_out_fp = {frnr: ([], stack.enter_context(
+            open('decoy_{p}_fr{i:0{pad}}.fasta'.format(p=out_prefix, i=frnr,
+                                                       pad=amountpad), 'w')))
+            for frnr in range(1, amount_fractions + 1)}
+        input_fp = stack.enter_context(open(pi_peptides_fn))
+        pepcount = 0
+        for line in input_fp:
+            accs, pep, pi = line.strip().split("\t")
+            pi = float(pi)
+            if maxlen and len(pep) > maxlen:
+                continue 
+            elif len(pep) >= minlen:
+                pepcount += 1
+                if pep[-1] in {'K', 'R'}:
+                    rev_pep = pep[::-1][1:] + pep[-1]
+                else:
+                    rev_pep = pep[::-1]
+                for i in locate_function(pi, bin_array):
+                    target_out_fp[i][0].append('>{}\n{}\n'.format(accs, pep))
+                    # write pseudoReversed decoy peptide at the same time
+                    decoy_out_fp[i][0].append('>decoy_{}\n{}\n'.format(
+                        accs, rev_pep))
+            if pepcount > 1000000:
+                # write in chunks to make it go faster
+                pepcount = 0
+                [fp.write(''.join(peps)) for peps, fp in
+                 target_out_fp.values()]
+                [fp.write(''.join(peps)) for peps, fp in decoy_out_fp.values()]
+                target_out_fp = {fr: ([], pep_fp[1])
+                                 for fr, pep_fp in target_out_fp.items()}
+                decoy_out_fp = {fr: ([], pep_fp[1])
+                                for fr, pep_fp in decoy_out_fp.items()}
+        [fp.write(''.join(peps)) for peps, fp in target_out_fp.values()]
+        [fp.write(''.join(peps)) for peps, fp in decoy_out_fp.values()]
+
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pi_db_split.xml	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,83 @@
+<tool id="pi_db_split" name="Split peptide database" version="1.0">
+    <description>into pI separated fractions</description>
+    <requirements>
+        <requirement type="package">numpy</requirement>
+        <requirement type="package" version="3.6">python</requirement>
+    </requirements>
+    <command>
+	    <![CDATA[
+	    mkdir pi_fr_out && cd pi_fr_out &&
+	    python '$__tool_directory__/pi_database_splitter.py' -i '$pipeptides' -p '$peptable'
+            --intercept $intercept --width $fr_width --tolerance $tolerance --amount $fr_amount --prefix pisplit
+	    --deltacol $deltacol --picutoff $picutoff 
+	    #if $fdrcol
+	        --fdrcol $fdrcol --fdrcutoff $fdrcutoff 
+	    #end if
+            #if $reverse
+                --reverse
+            #end if
+            #if $maxlen
+                --maxlen $maxlen
+            #end if
+            --minlen $minlen
+	    ]]>
+    </command>
+    
+    <inputs>
+      <param name="pipeptides" type="data" format="tabular" label="Target peptides with pI and accession" help="First col accession, second sequence, third pI" />
+      <param name="peptable" type="data" format="tabular" label="Peptide table to determine pI shift from" help="Should have delta pI as a column" />
+      <param name="fdrcol" type="integer" value="" optional="true" label="FDR (q-value) column in peptide table" />
+      <param name="fdrcutoff" type="float" value="0.0" help="Not used when no FDR column specified" label="FDR value cutoff for inclusion in shift determination" />
+      <param name="deltacol" type="integer" value="" label="Delta pI column in peptide table" />
+      <param name="picutoff" type="float" value="0.2" optional="true" label="delta-pI cutoff for inclusion in shift determination" />
+      <param name="minlen" type="integer" value="8" label="Minimum length of peptide to include in split DB" />
+      <param name="maxlen" type="integer" optional="true" value="" label="Max. length of peptide to include in split DB" />
+      <param name="intercept" type="float" value="" label="Intercept of pI strip" />
+      <param name="fr_width" type="float" value="" label="Fraction width" />
+      <param name="tolerance" type="float" value="" label="pI tolerance" />
+      <param name="fr_amount" type="integer" value="" label="Fraction amount" />
+      <param name="reverse" type="boolean" label="Strip is reversed (high-to-low pI)?" />
+    </inputs>
+    
+    <outputs>
+	<collection name="target_pi_db" type="list" label="target pI separated db">
+            <discover_datasets pattern="pisplit_(?P&lt;designation&gt;.+)\.fasta" ext="fasta" directory="pi_fr_out" />
+	</collection>
+	<collection name="decoy_pi_db" type="list" label="decoy pI separated db">
+            <discover_datasets pattern="decoy_pisplit_(?P&lt;designation&gt;.+)\.fasta" ext="fasta" directory="pi_fr_out" />
+	</collection>
+    </outputs>
+    <tests>
+	    <test>
+		    <param name="pipeptides" value="predicted_peptides_to_split.txt" />
+		    <param name="peptable" value="peptable_deltapi.txt" />
+		    <param name="fdrcol" value="3" />
+		    <param name="fdrcutoff" value="0.2" />
+		    <param name="deltacol" value="-1" />
+		    <param name="picutoff" value="10" />
+		    <param name="minlen" value="8" />
+		    <param name="intercept" value="5.6" />
+		    <param name="fr_width" value="1.3" />
+		    <param name="tolerance" value="0.1" />
+		    <param name="fr_amount" value="3" />
+		    <param name="reverse" value="false" />
+		    <output_collection name="target_pi_db" type="list">
+			    <element name="fr1" value="target_splitdb_fr1.fasta" />
+			    <element name="fr2" value="target_splitdb_fr2.fasta" />
+			    <element name="fr3" value="target_splitdb_fr3.fasta" />
+		    </output_collection>
+		    <output_collection name="decoy_pi_db" type="list">
+			    <element name="fr1" value="decoy_splitdb_fr1.fasta" />
+			    <element name="fr2" value="decoy_splitdb_fr2.fasta" />
+			    <element name="fr3" value="decoy_splitdb_fr3.fasta" />
+		    </output_collection>
+	    </test>
+    </tests>
+
+    <help>
+	    Creates a pI separated database collection from a pI-determined input
+	    file of peptide/protein mappings. Outputs one db for target, one
+	    for decoy.
+    </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/decoy_splitdb_fr1.fasta	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,4 @@
+>decoy_protein1
+TFSLFGCSIPNTNVEFSIKLFDVCLLLCNCLFSLIIMIYVII
+>decoy_protein2
+TFSLFGCSIPNTNVEFSI
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/decoy_splitdb_fr2.fasta	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,4 @@
+>decoy_protein1
+LNLSKPILSEST
+>decoy_protein3
+LFDVCLLLCNCLFSLIIMIYVIIK
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/decoy_splitdb_fr3.fasta	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,2 @@
+>decoy_protein2
+LFDVCLLLCNCLFSLIIMIYVIIKLWLFK
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/peptable.txt	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,5 @@
+Sequence	Filename	FDR	Fraction
+TSESLIPKSLNL	strip1_fr20	0.1	20
+FLWLKIIVYIM+15.994915IILSFLCNCLLLCVDFLK	strip1_fr50	0.3	50
+IIVYIMIILSFLCNCLLLCVDFLK	strip2_fr50	0	50
+IIVYIMIILSFLCNCLLLCVDFLKISFEVNTNPISCGFLSFT	strip2_fr43	0.01	43
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/peptable_deltapi.txt	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,5 @@
+Sequence	Filename	FDR	Fraction	Experimental pI	Predicted pI	Delta pI
+TSESLIPKSLNL	strip1_fr20	0.1	20	8.47	6.13955	2.330450000000001
+FLWLKIIVYIM+15.994915IILSFLCNCLLLCVDFLK	strip1_fr50	0.3	50	8.860000000000001	7.6171	1.2429000000000014
+IIVYIMIILSFLCNCLLLCVDFLK	strip2_fr50	0	50	8.11	5.99038	2.1196199999999994
+IIVYIMIILSFLCNCLLLCVDFLKISFEVNTNPISCGFLSFT	strip2_fr43	0.01	43	7.83	4.55361	3.27639
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/peptable_missed_ox.txt	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,5 @@
+Sequence	Filename	FDR	Fraction	Experimental pI	Predicted pI	Delta pI
+TSESLIPKSLNL	strip1_fr20	0.1	20	8.47	6.13955	2.330450000000001
+FLWLKIIVYIM+15.994915IILSFLCNCLLLCVDFLK	strip1_fr50	0.3	50	8.860000000000001	NA	NA
+IIVYIMIILSFLCNCLLLCVDFLK	strip2_fr50	0	50	8.11	5.99038	2.1196199999999994
+IIVYIMIILSFLCNCLLLCVDFLKISFEVNTNPISCGFLSFT	strip2_fr43	0.01	43	7.83	4.55361	3.27639
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predicted_peptides.txt	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,5 @@
+TSESLIPKSLNL	6.13955
+FLWLKIIVYIMIILSFLCNCLLLCVDFLK	7.6171
+IIVYIMIILSFLCNCLLLCVDFLK	5.99038
+IIVYIMIILSFLCNCLLLCVDFLKISFEVNTNPISCGFLSFT	4.55361
+ISFEVNTNPISCGFLSFT	4.08563	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predicted_peptides_to_split.txt	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,5 @@
+protein1	TSESLIPKSLNL	6.13955
+protein2	FLWLKIIVYIMIILSFLCNCLLLCVDFLK	7.6171
+protein3	IIVYIMIILSFLCNCLLLCVDFLK	5.99038
+protein1	IIVYIMIILSFLCNCLLLCVDFLKISFEVNTNPISCGFLSFT	4.55361
+protein2	ISFEVNTNPISCGFLSFT	4.08563	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/target_splitdb_fr1.fasta	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,4 @@
+>protein1
+IIVYIMIILSFLCNCLLLCVDFLKISFEVNTNPISCGFLSFT
+>protein2
+ISFEVNTNPISCGFLSFT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/target_splitdb_fr2.fasta	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,4 @@
+>protein1
+TSESLIPKSLNL
+>protein3
+IIVYIMIILSFLCNCLLLCVDFLK
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/target_splitdb_fr3.fasta	Mon May 22 05:08:23 2017 -0400
@@ -0,0 +1,2 @@
+>protein2
+FLWLKIIVYIMIILSFLCNCLLLCVDFLK