annotate pi_database_splitter.py @ 3:78afc81ab244 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
author galaxyp
date Thu, 14 Sep 2017 11:55:02 -0400
parents 8a30d6e5b97d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
2 import sys
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
3 import argparse
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
4 from numpy import median
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
5 from contextlib import ExitStack
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
6
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
7 from peptide_pi_annotator import get_col_by_pattern
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
8
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
9
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
10 def main():
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
11 if sys.argv[1:] == []:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
12 sys.argv.append('-h')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
13 args = parse_commandline()
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
14 locfun = {False: locatefraction,
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
15 True: reverse_locatefraction}[args.reverse]
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
16 # Column nrs should start from 0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
17 # If negative, -1 is last item in list, etc
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
18 if args.fdrcol > 0:
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
19 fdrcol = args.fdrcol - 1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
20 elif args.fdrcol:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
21 fdrcol = args.fdrcol
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
22 elif args.fdrcolpattern:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
23 fdrcol = get_col_by_pattern(args.train_peptable, args.fdrcolpattern)
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
24 else:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
25 fdrcol = False
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
26 if args.deltapicol > 0:
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
27 deltapicol = args.deltapicol - 1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
28 elif args.deltapicol:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
29 deltapicol = args.deltapicol
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
30 elif args.deltapicolpattern:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
31 deltapicol = get_col_by_pattern(args.train_peptable,
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
32 args.deltapicolpattern)
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
33 else:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
34 deltapicol = False
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
35 pishift = get_pishift(args.train_peptable, fdrcol, deltapicol,
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
36 args.fdrcutoff, args.picutoff)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
37 binarray = get_bin_array(args.fr_amount, args.fr_width, args.intercept,
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
38 args.tolerance, pishift)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
39 write_fractions(args.pipeps, args.fr_amount, args.prefix,
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
40 binarray, locfun, args.minlen, args.maxlen)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
41
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
42
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
43 def locatefraction(pep_pi, bins):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
44 index = []
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
45 for pibin in bins:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
46 if pep_pi > pibin[2]:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
47 continue
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
48 elif pep_pi >= pibin[1]:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
49 index.append(pibin[0])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
50 else:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
51 return index
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
52 return index
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
53
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
54
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
55 def reverse_locatefraction(pep_pi, bins):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
56 index = []
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
57 for pibin in bins:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
58 if pep_pi < pibin[1]:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
59 continue
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
60 elif pep_pi < pibin[2]:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
61 index.append(pibin[0])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
62 else:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
63 return index
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
64 return index
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
65
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
66
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
67 def parse_commandline():
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
68 parser = argparse.ArgumentParser(
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
69 formatter_class=argparse.RawTextHelpFormatter)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
70 parser.add_argument('-p', dest='train_peptable', help='Peptide table with '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
71 'peptides, FDR, and fraction numbers. Used to '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
72 'calculate pI shift. Leave emtpy for no shift. '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
73 'Tab separated file.')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
74 parser.add_argument('--deltacol', dest='deltapicol', help='Delta pI column'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
75 ' number in peptide table. First column is nr. 1. '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
76 'Negative number for counting from last col '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
77 '(-1 is last).', default=False, type=int)
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
78 parser.add_argument('--deltacolpattern', dest='deltapicolpattern',
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
79 help='Delta pI column header pattern in peptide '
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
80 'table.', default=False, type=str)
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
81 parser.add_argument('--picutoff', dest='picutoff',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
82 help='delta pI value to filter experimental peptides'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
83 ' when calculating pi shift.', default=0.2, type=float)
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
84 parser.add_argument('--fdrcolpattern', dest='fdrcolpattern',
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
85 help='FDR column header pattern in peptide table.',
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
86 default=False, type=str)
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
87 parser.add_argument('--fdrcol', dest='fdrcol', help='FDR column number in '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
88 'peptide table. First column is nr. 1. Empty includes '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
89 'all peptides', default=False, type=int)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
90 parser.add_argument('--fdrcutoff', dest='fdrcutoff',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
91 help='FDR cutoff value to filter experimental peptides'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
92 ' when calculating pi shift.', default=0, type=float)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
93 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
94 'with accession, peptide seq, pI value')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
95 parser.add_argument('--prefix', dest='prefix', default='pisep',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
96 help='Prefix for target/decoy output files')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
97 parser.add_argument('--tolerance', dest='tolerance',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
98 help='Strip fraction tolerance pi tolerance represents'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
99 ' 2.5/97.5 percentile', type=float)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
100 parser.add_argument('--amount', dest='fr_amount',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
101 help='Strip fraction amount', type=int)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
102 parser.add_argument('--reverse', dest='reverse', help='Strip is reversed',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
103 action='store_const', const=True, default=False)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
104 parser.add_argument('--intercept', dest='intercept',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
105 help='pI Intercept of strip', type=float)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
106 parser.add_argument('--width', dest='fr_width',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
107 help='Strip fraction width in pI', type=float)
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
108 parser.add_argument('--minlen', dest='minlen', help='Minimal peptide length',
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
109 type=int)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
110 parser.add_argument('--maxlen', dest='maxlen', help='Maximal peptide length',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
111 type=int, default=False)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
112 return parser.parse_args(sys.argv[1:])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
113
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
114
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
115 def get_pishift(peptable, fdrcol, deltapicol, fdrcutoff, delta_pi_cutoff):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
116 delta_pis = []
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
117 with open(peptable) as fp:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
118 next(fp) # skip header
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
119 for line in fp:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
120 line = line.strip('\n').split('\t')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
121 if fdrcol:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
122 try:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
123 fdr = float(line[fdrcol])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
124 except ValueError:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
125 continue
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
126 if fdr > fdrcutoff:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
127 continue
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
128 try:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
129 delta_pi = float(line[deltapicol])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
130 except ValueError:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
131 continue
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
132 if delta_pi < delta_pi_cutoff:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
133 delta_pis.append(delta_pi)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
134 shift = median(delta_pis)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
135 print('pI shift (median of delta pIs): {}'.format(shift))
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
136 return shift
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
137
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
138
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
139 def get_bin_array(amount_fractions, fr_width, intercept, tolerance, pi_shift):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
140 frnr = 1
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
141 bin_array = []
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
142 while frnr <= amount_fractions:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
143 pi_center = fr_width * frnr + intercept
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
144 bin_left = pi_center - fr_width / 2 - tolerance - pi_shift
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
145 bin_right = pi_center + fr_width / 2 + tolerance - pi_shift
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
146 print('Bins in fraction', frnr, bin_left, bin_right)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
147 bin_array.append((frnr, bin_left, bin_right))
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
148 frnr += 1
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
149 return bin_array
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
150
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
151
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
152 def write_fractions(pi_peptides_fn, amount_fractions, out_prefix,
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
153 bin_array, locate_function, minlen, maxlen):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
154 amountpad = len(str(amount_fractions))
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
155 with ExitStack() as stack:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
156 target_out_fp = {frnr: ([], stack.enter_context(
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
157 open('{p}_fr{i:0{pad}}.fasta'.format(p=out_prefix, i=frnr,
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
158 pad=amountpad), 'w')))
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
159 for frnr in range(1, amount_fractions + 1)}
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
160 decoy_out_fp = {frnr: ([], stack.enter_context(
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
161 open('decoy_{p}_fr{i:0{pad}}.fasta'.format(p=out_prefix, i=frnr,
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
162 pad=amountpad), 'w')))
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
163 for frnr in range(1, amount_fractions + 1)}
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
164 input_fp = stack.enter_context(open(pi_peptides_fn))
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
165 pepcount = 0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
166 for line in input_fp:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
167 accs, pep, pi = line.strip().split("\t")
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
168 pi = float(pi)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
169 if maxlen and len(pep) > maxlen:
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
170 continue
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
171 elif len(pep) >= minlen:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
172 pepcount += 1
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
173 if pep[-1] in {'K', 'R'}:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
174 rev_pep = pep[::-1][1:] + pep[-1]
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
175 else:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
176 rev_pep = pep[::-1]
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
177 for i in locate_function(pi, bin_array):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
178 target_out_fp[i][0].append('>{}\n{}\n'.format(accs, pep))
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
179 # write pseudoReversed decoy peptide at the same time
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
180 decoy_out_fp[i][0].append('>decoy_{}\n{}\n'.format(
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
181 accs, rev_pep))
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
182 if pepcount > 1000000:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
183 # write in chunks to make it go faster
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
184 pepcount = 0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
185 [fp.write(''.join(peps)) for peps, fp in
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
186 target_out_fp.values()]
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
187 [fp.write(''.join(peps)) for peps, fp in decoy_out_fp.values()]
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
188 target_out_fp = {fr: ([], pep_fp[1])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
189 for fr, pep_fp in target_out_fp.items()}
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
190 decoy_out_fp = {fr: ([], pep_fp[1])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
191 for fr, pep_fp in decoy_out_fp.items()}
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
192 [fp.write(''.join(peps)) for peps, fp in target_out_fp.values()]
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
193 [fp.write(''.join(peps)) for peps, fp in decoy_out_fp.values()]
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
194
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
195
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
196 if __name__ == '__main__':
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
197 main()