changeset 1:8a30d6e5b97d draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
author galaxyp
date Mon, 24 Jul 2017 05:25:22 -0400
parents 34c5c95740a1
children 77ddaee887a8
files README.rst __pycache__/peptide_pi_annotator.cpython-36.pyc delta_pi_calc.xml peptide_pi_annotator.py pi_database_splitter.py pi_db_split.xml
diffstat 6 files changed, 181 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/README.rst	Mon May 22 05:08:23 2017 -0400
+++ b/README.rst	Mon Jul 24 05:25:22 2017 -0400
@@ -1,4 +1,4 @@
-GalaxyP - Percolator
+GalaxyP - HiRIEF tools
 =======================
 
 - Home: <https://github.com/galaxyproteomics/tools-galaxyp/>
Binary file __pycache__/peptide_pi_annotator.cpython-36.pyc has changed
--- a/delta_pi_calc.xml	Mon May 22 05:08:23 2017 -0400
+++ b/delta_pi_calc.xml	Mon Jul 24 05:25:22 2017 -0400
@@ -1,11 +1,25 @@
-<tool id="calc_delta_pi" name="Add delta pI" version="1.0">
+<tool id="calc_delta_pi" name="Add delta pI" version="1.1">
     <requirements>
         <requirement type="package" version="3.6">python</requirement>
     </requirements>
     <description>to peptide table</description>
     <command>
-	    python '$__tool_directory__/peptide_pi_annotator.py' -i '$trainingpi' -p '$peptable'
-            --stripcol $stripcol --pepcol $pepcol --fraccol $fraccol --out '$output'
+	    python '$__tool_directory__/peptide_pi_annotator.py' -i '$trainingpi' -p '$peptable' --out '$output'
+	    #if $stripcol
+	        --stripcol $stripcol
+	    #else if $stripcolpattern
+	        --stripcolpattern '$stripcolpattern'
+	    #end if
+	    #if $pepcol
+                --pepcol $pepcol 
+	    #else if $pepcolpattern
+                --pepcolpattern '$pepcolpattern'
+	    #end if
+	    #if $fraccol
+                --fraccol $fraccol
+	    #else if $fraccolpattern
+                --fraccolpattern '$fraccolpattern'
+	    #end if
 	    
 	    --strippatterns
 	    #for $strip in $strips
@@ -42,9 +56,12 @@
 	      </sanitizer>
           </param>
       </repeat>
-      <param name="pepcol" type="integer" value="" label="Peptide sequence column in peptide table" />
-      <param name="fraccol" type="integer" value="" label="Fraction number column in peptide table" />
-      <param name="stripcol" type="integer" value="" label="Strip pattern column in peptide table" help="E.g. column with filename to derive strip name from"/>
+      <param name="pepcolpattern" type="text" value="" optional="true" label="Peptide sequence pattern for column header field in peptide table." />
+      <param name="pepcol" type="integer" value="" optional="true" label="Peptide sequence column number in peptide table. First column is 1. Overrides column pattern." />
+      <param name="fraccolpattern" type="text" value="" optional="true" label="Fraction number column header papttern in peptide table." />
+      <param name="fraccol" type="integer" optional="true" value="" label="Fraction number column number in peptide table. First column is 1. Overrides column pattern." />
+      <param name="stripcolpattern" type="text" optional="true" value="" label="Strip pattern header column pattern in peptide table" help="E.g. column with filename to derive strip name from"/>
+      <param name="stripcol" type="integer" optional="true" value="" label="Strip pattern column number in peptide table" help="E.g. column with filename to derive strip name from. First column is 1. Overrides column pattern"/>
       <repeat name="strips" title="pI separation strip data">
 	      <param name="pattern" type="text" label="Strip regex detection pattern" help="Regex (see help below) that identifies the pI strip from the column in the above field.">
                   <sanitizer>
@@ -83,6 +100,27 @@
             </repeat>
             <output name="output" value="peptable_deltapi.txt" />
         </test>
+        <test> 
+            <param name="trainingpi" value="predicted_peptides.txt" />
+            <param name="peptable" value="peptable.txt" />
+            <repeat name="ignoremods">
+                <param name="regex" value="*" />
+            </repeat>
+            <param name="pepcolpattern" value="Sequence" />
+            <param name="fraccolpattern" value="Fraction" />
+            <param name="stripcolpattern" value="Filename" />
+            <repeat name="strips">
+                <param name="pattern" value="strip1" />
+                <param name="intercept" value="8.21" />
+                <param name="fr_width" value="0.013" />
+            </repeat>
+            <repeat name="strips">
+                <param name="pattern" value="strip2" />
+                <param name="intercept" value="6.11" />
+                <param name="fr_width" value="0.04" />
+            </repeat>
+            <output name="output" value="peptable_deltapi.txt" />
+        </test>
         <test>
             <param name="trainingpi" value="predicted_peptides.txt" />
             <param name="peptable" value="peptable.txt" />
--- a/peptide_pi_annotator.py	Mon May 22 05:08:23 2017 -0400
+++ b/peptide_pi_annotator.py	Mon Jul 24 05:25:22 2017 -0400
@@ -10,13 +10,34 @@
         sys.argv.append('-h')
     args = parse_commandline()
     strips = {}
+    if args.frac_col > 0:
+        frac_col = args.frac_col - 1
+    elif args.frac_col:
+        frac_col = args.frac_col
+    elif args.frac_colpattern:
+        frac_col = get_col_by_pattern(args.peptable, args.frac_colpattern)
+    else:
+        raise RuntimeError('Must define fraction column')
+    if args.stripcol > 0:
+        stripcol = args.stripcol - 1
+    elif args.stripcol:
+        stripcol = args.stripcol
+    elif args.stripcolpattern:
+        stripcol = get_col_by_pattern(args.peptable, args.stripcolpattern)
+    else:
+        raise RuntimeError('Must define strip column')
+    if args.pepcol:
+        pepcol = args.pepcol - 1
+    elif args.pepcolpattern:
+        pepcol = get_col_by_pattern(args.peptable, args.pepcolpattern)
+    else:
+        raise RuntimeError('Must define peptide sequence column')
     for i, strip in enumerate(args.pipatterns):
         strips[strip] = {'intercept': args.intercepts[i],
                          'fr_width': args.fr_width[i]}
     with open(args.outpeptable, 'w') as fp:
-        for outline in annotate_peptable(args.pipeps, args.peptable,
-                                         args.pepcol, args.frac_col,
-                                         args.stripcol, strips,
+        for outline in annotate_peptable(args.pipeps, args.peptable, pepcol,
+                                         frac_col, stripcol, strips,
                                          args.ignoremods):
             fp.write('\t'.join([str(x) for x in outline]))
             fp.write('\n')
@@ -29,10 +50,16 @@
     return False
 
 
+def get_col_by_pattern(peptable, colpattern):
+    with open(peptable) as fp:
+        header = next(fp).strip('\n').split('\t')
+    for ix, field in enumerate(header):
+        if colpattern in field:
+            return ix
+
+
 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol,
                       strips, ignoremods):
-    if frac_col > 0:
-        frac_col -= 1
     predicted_peps = {}
     with open(predicted_peps_fn) as fp:
         for line in fp:
@@ -45,11 +72,11 @@
         for line in fp:
             line = line.strip('\n').split('\t')
             strip = strips[get_first_matching_pattern(strips.keys(),
-                                                      line[stripcol - 1])]
+                                                      line[stripcol])]
             exp_pi = (strip['fr_width'] * int(line[frac_col]) +
                       strip['intercept'])
 
-            sequence = line[seqcol - 1]
+            sequence = line[seqcol]
             for weight in ignoremods:
                 if weight == '*':
                     regex = '[+-]\d*\.\d*'
@@ -81,15 +108,24 @@
                         'pI shift.')
     parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file '
                         'with peptide seq, pI value')
+    parser.add_argument('--pepcolpattern', dest='pepcolpattern',
+                        help='Peptide sequence column pattern in peptide '
+                        'table.', default=False, type=str)
     parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence '
                         'column number in peptide table. First column is 1.',
                         default=False, type=int)
+    parser.add_argument('--fraccolpattern', dest='frac_colpattern',
+                        help='Fraction number column pattern in peptide '
+                        'table.', default=False, type=str)
     parser.add_argument('--fraccol', dest='frac_col', help='Fraction number '
                         'column number in peptide table. First column is 1.',
-                        type=int)
+                        default=False, type=int)
     parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to '
                         'identify modification weights to be ignored.',
                         default=[], nargs='+', type=str)
+    parser.add_argument('--stripcolpattern', dest='stripcolpattern',
+                        help='Strip name column pattern in peptide '
+                        'table.', type=str, default=False)
     parser.add_argument('--stripcol', dest='stripcol', help='Strip name '
                         'column number in peptide table. Will be used to '
                         'detect strips if multiple are present using pattern '
--- a/pi_database_splitter.py	Mon May 22 05:08:23 2017 -0400
+++ b/pi_database_splitter.py	Mon Jul 24 05:25:22 2017 -0400
@@ -4,6 +4,8 @@
 from numpy import median
 from contextlib import ExitStack
 
+from peptide_pi_annotator import get_col_by_pattern
+
 
 def main():
     if sys.argv[1:] == []:
@@ -14,10 +16,23 @@
     # Column nrs should start from 0
     # If negative, -1 is last item in list, etc
     if args.fdrcol > 0:
-        args.fdrcol -= 1
+        fdrcol = args.fdrcol - 1
+    elif args.fdrcol:
+        fdrcol = args.fdrcol
+    elif args.fdrcolpattern:
+        fdrcol = get_col_by_pattern(args.train_peptable, args.fdrcolpattern)
+    else:
+        fdrcol = False
     if args.deltapicol > 0:
-        args.deltapicol -= 1
-    pishift = get_pishift(args.train_peptable, args.fdrcol, args.deltapicol,
+        deltapicol = args.deltapicol - 1
+    elif args.deltapicol:
+        deltapicol = args.deltapicol
+    elif args.deltapicolpattern:
+        deltapicol = get_col_by_pattern(args.train_peptable,
+                                        args.deltapicolpattern)
+    else:
+        deltapicol = False
+    pishift = get_pishift(args.train_peptable, fdrcol, deltapicol,
                           args.fdrcutoff, args.picutoff)
     binarray = get_bin_array(args.fr_amount, args.fr_width, args.intercept,
                              args.tolerance, pishift)
@@ -60,9 +75,15 @@
                         ' number in peptide table. First column is nr. 1. '
                         'Negative number for counting from last col '
                         '(-1 is last).', default=False, type=int)
+    parser.add_argument('--deltacolpattern', dest='deltapicolpattern',
+                        help='Delta pI column header pattern in peptide '
+                        'table.', default=False, type=str)
     parser.add_argument('--picutoff', dest='picutoff',
                         help='delta pI value to filter experimental peptides'
                         ' when calculating pi shift.', default=0.2, type=float)
+    parser.add_argument('--fdrcolpattern', dest='fdrcolpattern',
+                        help='FDR column header pattern in peptide table.',
+                        default=False, type=str)
     parser.add_argument('--fdrcol', dest='fdrcol', help='FDR column number in '
                         'peptide table. First column is nr. 1. Empty includes '
                         'all peptides', default=False, type=int)
@@ -84,7 +105,7 @@
                         help='pI Intercept of strip', type=float)
     parser.add_argument('--width', dest='fr_width',
                         help='Strip fraction width in pI', type=float)
-    parser.add_argument('--minlen', dest='minlen', help='Minimal peptide length', 
+    parser.add_argument('--minlen', dest='minlen', help='Minimal peptide length',
                         type=int)
     parser.add_argument('--maxlen', dest='maxlen', help='Maximal peptide length',
                         type=int, default=False)
@@ -146,7 +167,7 @@
             accs, pep, pi = line.strip().split("\t")
             pi = float(pi)
             if maxlen and len(pep) > maxlen:
-                continue 
+                continue
             elif len(pep) >= minlen:
                 pepcount += 1
                 if pep[-1] in {'K', 'R'}:
--- a/pi_db_split.xml	Mon May 22 05:08:23 2017 -0400
+++ b/pi_db_split.xml	Mon Jul 24 05:25:22 2017 -0400
@@ -1,4 +1,4 @@
-<tool id="pi_db_split" name="Split peptide database" version="1.0">
+<tool id="pi_db_split" name="Split peptide database" version="1.1">
     <description>into pI separated fractions</description>
     <requirements>
         <requirement type="package">numpy</requirement>
@@ -8,14 +8,26 @@
 	    <![CDATA[
 	    mkdir pi_fr_out && cd pi_fr_out &&
 	    python '$__tool_directory__/pi_database_splitter.py' -i '$pipeptides' -p '$peptable'
-            --intercept $intercept --width $fr_width --tolerance $tolerance --amount $fr_amount --prefix pisplit
-	    --deltacol $deltacol --picutoff $picutoff 
+	    #for $strip in $strips
+	        #if not $strip.peptable_pattern or str($strip.peptable_pattern) in $peptable.element_identifier
+                    --intercept $strip.intercept --width $strip.fr_width --tolerance $strip.tolerance --amount $strip.fr_amount --prefix pisplit --picutoff $strip.picutoff 
+                    #if $strip.reverse
+                        --reverse
+                    #end if
+	            #break
+                #end if 
+            #end for
+
+	    #if $deltacol
+	        --deltacol $deltacol 
+            #else if $deltacolpattern
+		--deltacolpattern '$deltacolpattern'
+            #end if
 	    #if $fdrcol
 	        --fdrcol $fdrcol --fdrcutoff $fdrcutoff 
+            #else if $fdrcolpattern
+                --fdrcolpattern '$fdrcolpattern' --fdrcutoff $fdrcutoff 
 	    #end if
-            #if $reverse
-                --reverse
-            #end if
             #if $maxlen
                 --maxlen $maxlen
             #end if
@@ -26,17 +38,22 @@
     <inputs>
       <param name="pipeptides" type="data" format="tabular" label="Target peptides with pI and accession" help="First col accession, second sequence, third pI" />
       <param name="peptable" type="data" format="tabular" label="Peptide table to determine pI shift from" help="Should have delta pI as a column" />
-      <param name="fdrcol" type="integer" value="" optional="true" label="FDR (q-value) column in peptide table" />
+      <param name="fdrcolpattern" type="text" optional="true" label="FDR (q-value) column pattern in peptide table" />
+      <param name="fdrcol" type="integer" value="" optional="true" label="FDR (q-value) column number in peptide table" help="Overrides column pattern if filled. First column is 1" />
       <param name="fdrcutoff" type="float" value="0.0" help="Not used when no FDR column specified" label="FDR value cutoff for inclusion in shift determination" />
-      <param name="deltacol" type="integer" value="" label="Delta pI column in peptide table" />
-      <param name="picutoff" type="float" value="0.2" optional="true" label="delta-pI cutoff for inclusion in shift determination" />
+      <param name="deltacolpattern" type="text" value="" label="Delta pI column pattern in peptide table" />
+      <param name="deltacol" type="integer" optional="true" value="" label="Delta pI column number in peptide table" help="Overrides column pattern if filled. First column is 1"/>
       <param name="minlen" type="integer" value="8" label="Minimum length of peptide to include in split DB" />
       <param name="maxlen" type="integer" optional="true" value="" label="Max. length of peptide to include in split DB" />
-      <param name="intercept" type="float" value="" label="Intercept of pI strip" />
-      <param name="fr_width" type="float" value="" label="Fraction width" />
-      <param name="tolerance" type="float" value="" label="pI tolerance" />
-      <param name="fr_amount" type="integer" value="" label="Fraction amount" />
-      <param name="reverse" type="boolean" label="Strip is reversed (high-to-low pI)?" />
+      <repeat name="strips" title="pI separation strip data">
+          <param name="peptable_pattern" type="text" label="Pattern to find correct peptide table for a strip, for when multiple peptide tables have different strips" help="Will match against peptide table's name. Leave blank for single peptide table or when using same strip in all tables" />
+          <param name="intercept" type="float" value="" label="Intercept of pI strip" />
+          <param name="fr_width" type="float" value="" label="Fraction width" />
+          <param name="tolerance" type="float" value="" label="pI tolerance" />
+          <param name="fr_amount" type="integer" value="" label="Fraction amount" />
+          <param name="reverse" type="boolean" label="Strip is reversed (high-to-low pI)?" />
+          <param name="picutoff" type="float" value="0.2" optional="true" label="delta-pI cutoff for inclusion in shift determination" />
+      </repeat>
     </inputs>
     
     <outputs>
@@ -54,13 +71,42 @@
 		    <param name="fdrcol" value="3" />
 		    <param name="fdrcutoff" value="0.2" />
 		    <param name="deltacol" value="-1" />
-		    <param name="picutoff" value="10" />
 		    <param name="minlen" value="8" />
-		    <param name="intercept" value="5.6" />
-		    <param name="fr_width" value="1.3" />
-		    <param name="tolerance" value="0.1" />
-		    <param name="fr_amount" value="3" />
-		    <param name="reverse" value="false" />
+                    <repeat name="strips">
+		        <param name="peptable_pattern" value="deltapi" />
+		        <param name="intercept" value="5.6" />
+		        <param name="fr_width" value="1.3" />
+		        <param name="tolerance" value="0.1" />
+		        <param name="fr_amount" value="3" />
+		        <param name="reverse" value="false" />
+		        <param name="picutoff" value="10" />
+                    </repeat>
+		    <output_collection name="target_pi_db" type="list">
+			    <element name="fr1" value="target_splitdb_fr1.fasta" />
+			    <element name="fr2" value="target_splitdb_fr2.fasta" />
+			    <element name="fr3" value="target_splitdb_fr3.fasta" />
+		    </output_collection>
+		    <output_collection name="decoy_pi_db" type="list">
+			    <element name="fr1" value="decoy_splitdb_fr1.fasta" />
+			    <element name="fr2" value="decoy_splitdb_fr2.fasta" />
+			    <element name="fr3" value="decoy_splitdb_fr3.fasta" />
+		    </output_collection>
+	    </test>
+	    <test>
+		    <param name="pipeptides" value="predicted_peptides_to_split.txt" />
+		    <param name="peptable" value="peptable_deltapi.txt" />
+		    <param name="fdrcolpattern" value="FDR" />
+		    <param name="fdrcutoff" value="0.2" />
+		    <param name="deltacolpattern" value="Delta" />
+		    <param name="minlen" value="8" />
+                    <repeat name="strips">
+		        <param name="intercept" value="5.6" />
+		        <param name="fr_width" value="1.3" />
+		        <param name="tolerance" value="0.1" />
+		        <param name="fr_amount" value="3" />
+		        <param name="reverse" value="false" />
+		        <param name="picutoff" value="10" />
+                    </repeat>
 		    <output_collection name="target_pi_db" type="list">
 			    <element name="fr1" value="target_splitdb_fr1.fasta" />
 			    <element name="fr2" value="target_splitdb_fr2.fasta" />