Mercurial > repos > proteore > filter_keywords_values
annotate filter_kw_val.py @ 1:d29e469b6b20 draft
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
author | proteore |
---|---|
date | Fri, 16 Feb 2018 03:27:43 -0500 |
parents | 6a45ccfc0e4c |
children | 1e9911190142 |
rev | line source |
---|---|
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
1 import argparse |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
2 import re |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
3 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
4 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
5 def options(): |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
6 """ |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
7 Parse options |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
8 """ |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
9 parser = argparse.ArgumentParser() |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
10 parser.add_argument("-i", "--input", help="Input file", required=True) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
11 parser.add_argument("-m", "--match", help="Exact macth") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
12 parser.add_argument("--kw", nargs="+", action="append", help="") # |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
13 parser.add_argument("--kw_file", nargs="+", action="append", help="") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
14 parser.add_argument("--value", nargs="+", action="append", help="") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
15 parser.add_argument("-o", "--output", default="output.txt") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
16 parser.add_argument("--trash_file", default="trash_MQfilter.txt") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
17 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
18 args = parser.parse_args() |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
19 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
20 filters(args) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
21 |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
22 # python filter2.py -i "/projet/galaxydev/galaxy/tools/proteore_uc1/proteinGroups_Maud.txt" |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
23 # --protein_IDs "A2A288:A8K2U0" --peptides 2 "=" -o "test-data/output_MQfilter.txt" |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
24 |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
25 |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
26 def isnumber(number_format, n): |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
27 """ |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
28 Check if a variable is a float or an integer |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
29 """ |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
30 float_format = re.compile("^[\-]?[1-9][0-9]*\.?[0-9]+$") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
31 int_format = re.compile("^[\-]?[1-9][0-9]*$") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
32 test = "" |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
33 if number_format == "int": |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
34 test = re.match(int_format, n) |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
35 elif number_format == "float": |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
36 test = re.match(float_format, n) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
37 if test: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
38 return True |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
39 # else: |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
40 # return False |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
41 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
42 def filters(args): |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
43 """ |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
44 Filter the document |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
45 """ |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
46 MQfilename = args.input.split(",")[0] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
47 header = args.input.split(",")[1] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
48 MQfile = readMQ(MQfilename) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
49 results = [MQfile, None] |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
50 |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
51 if args.kw: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
52 keywords = args.kw |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
53 for k in keywords: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
54 results = filter_keyword(results[0], header, results[1], k[0], k[1], k[2]) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
55 if args.kw_file: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
56 key_files = args.kw_file |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
57 for kf in key_files: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
58 ids = readOption(kf[0]) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
59 results = filter_keyword(results[0], header, results[1], ids, kf[1], kf[2]) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
60 if args.value: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
61 for v in args.value: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
62 if isnumber("float", v[0]): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
63 results = filter_value(results[0], header, results[1], v[0], v[1], v[2]) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
64 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
65 raise ValueError("Please enter a number in filter by value") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
66 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
67 # Write results to output |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
68 output = open(args.output, "w") |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
69 output.write("\n".join(results[0])) |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
70 output.close() |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
71 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
72 # Write deleted lines to trash_file |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
73 trash = open(args.trash_file, "w") |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
74 trash.write("\n".join(results[1])) |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
75 trash.close() |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
76 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
77 def readOption(filename): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
78 f = open(filename, "r") |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
79 file_content = f.read() |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
80 filter_list = file_content.split("\n") |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
81 filters = "" |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
82 for i in filter_list: |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
83 filters += i + ";" |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
84 filters = filters[:-1] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
85 return filters |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
86 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
87 def readMQ(MQfilename): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
88 # Read MQ file |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
89 mqfile = open(MQfilename, "r") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
90 mq = mqfile.readlines() |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
91 # Remove empty lines (contain only space or new line or "") |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
92 [mq.remove(blank) for blank in mq if blank.isspace() or blank == ""] |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
93 return mq |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
94 |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
95 def filter_keyword(MQfile, header, filtered_lines, ids, ncol, match): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
96 mq = MQfile |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
97 if isnumber("int", ncol.replace("c", "")): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
98 id_index = int(ncol.replace("c", "")) - 1 #columns.index("Majority protein IDs") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
99 else: |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
100 raise ValueError("Please specify the column where " |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
101 "you would like to apply the filter " |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
102 "with valid format") |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
103 |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
104 # Split list of filter IDs |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
105 ids = ids.upper().split(";") |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
106 # Remove blank IDs |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
107 [ids.remove(blank) for blank in ids if blank.isspace() or blank == ""] |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
108 # Remove space from 2 heads of IDs |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
109 ids = [id.strip() for id in ids] |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
110 |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
111 |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
112 if header == "true": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
113 header = mq[0] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
114 content = mq[1:] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
115 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
116 header = "" |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
117 content = mq[:] |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
118 |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
119 if not filtered_lines: # In case there is already some filtered lines from other filters |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
120 filtered_lines = [] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
121 if header != "": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
122 filtered_lines.append(header) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
123 |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
124 for line in content: |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
125 line = line.replace("\n", "") |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
126 id_inline = line.split("\t")[id_index].replace('"', "").split(";") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
127 one_id_line = line.replace(line.split("\t")[id_index], id_inline[0]) # Take only first IDs |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
128 line = line + "\n" |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
129 |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
130 if match != "false": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
131 # Filter protein IDs |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
132 if any(pid.upper() in ids for pid in id_inline): |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
133 filtered_lines.append(one_id_line) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
134 mq.remove(line) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
135 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
136 mq[mq.index(line)] = one_id_line |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
137 else: |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
138 if any(ft in pid.upper() for pid in id_inline for ft in ids): |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
139 filtered_lines.append(one_id_line) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
140 mq.remove(line) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
141 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
142 mq[mq.index(line)] = one_id_line |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
143 return mq, filtered_lines |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
144 |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
145 def filter_value(MQfile, header, filtered_prots, filter_value, ncol, opt): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
146 mq = MQfile |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
147 if ncol and isnumber("int", ncol.replace("c", "")): #"Gene names" in columns: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
148 index = int(ncol.replace("c", "")) - 1 #columns.index("Gene names") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
149 else: |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
150 raise ValueError("Please specify the column where " |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
151 "you would like to apply the filter " |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
152 "with valid format") |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
153 if header == "true": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
154 header = mq[0] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
155 content = mq[1:] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
156 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
157 header = "" |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
158 content = mq[:] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
159 if not filtered_prots: # In case there is already some filtered lines from other filters |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
160 filtered_prots = [] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
161 if header != "": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
162 filtered_prots.append(header) |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
163 |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
164 for line in content: |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
165 prot = line.replace("\n","") |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
166 filter_value = float(filter_value) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
167 pep = prot.split("\t")[index].replace('"', "") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
168 if pep.replace(".", "", 1).isdigit(): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
169 if opt == "<": |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
170 if float(pep) >= filter_value: |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
171 filtered_prots.append(line) |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
172 mq.remove(line) |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
173 elif opt == "<=": |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
174 if float(pep) > filter_value: |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
175 filtered_prots.append(line) |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
176 mq.remove(line) |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
177 elif opt == ">": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
178 #print(prot.number_of_prots, filter_value, int(prot.number_of_prots) > filter_value) |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
179 if float(pep) <= filter_value: |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
180 filtered_prots.append(line) |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
181 mq.remove(line) |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
182 elif opt == ">=": |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
183 if float(pep) < filter_value: |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
184 filtered_prots.append(line) |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
185 mq.remove(line) |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
186 else: |
1
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
187 if float(pep) != filter_value: |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
188 filtered_prots.append(line) |
d29e469b6b20
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
0
diff
changeset
|
189 mq.remove(line) |
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
190 return mq, filtered_prots #output, trash_file |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
191 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
192 if __name__ == "__main__": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
193 options() |