Mercurial > repos > proteore > filter_keywords_values
annotate filter_kw_val.py @ 0:6a45ccfc0e4c draft
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
author | proteore |
---|---|
date | Sun, 26 Nov 2017 18:36:43 -0500 |
parents | |
children | d29e469b6b20 |
rev | line source |
---|---|
0
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
1 import argparse |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
2 import re |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
3 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
4 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
5 def options(): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
6 parser = argparse.ArgumentParser() |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
7 parser.add_argument("-i", "--input", help="Input file", required=True) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
8 parser.add_argument("-m", "--match", help="Exact macth") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
9 parser.add_argument("--kw", nargs="+", action="append", help="") # |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
10 parser.add_argument("--kw_file", nargs="+", action="append", help="") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
11 parser.add_argument("--value", nargs="+", action="append", help="") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
12 parser.add_argument("-o", "--output", default="output.txt") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
13 parser.add_argument("--trash_file", default="trash_MQfilter.txt") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
14 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
15 args = parser.parse_args() |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
16 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
17 filters(args) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
18 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
19 # python filter2.py -i "/projet/galaxydev/galaxy/tools/proteore_uc1/proteinGroups_Maud.txt" --protein_IDs "A2A288:A8K2U0" --peptides 2 "=" -o "test-data/output_MQfilter.txt" |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
20 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
21 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
22 def isnumber(format, n): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
23 float_format = re.compile("^[\-]?[1-9][0-9]*\.?[0-9]+$") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
24 int_format = re.compile("^[\-]?[1-9][0-9]*$") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
25 test = "" |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
26 if format == "int": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
27 test = re.match(int_format, n) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
28 elif format == "float": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
29 test = re.match(float_format, n) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
30 if test: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
31 return True |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
32 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
33 return False |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
34 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
35 def filters(args): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
36 MQfilename = args.input.split(",")[0] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
37 header = args.input.split(",")[1] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
38 MQfile = readMQ(MQfilename) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
39 results = [MQfile, None] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
40 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
41 if args.kw: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
42 keywords = args.kw |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
43 for k in keywords: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
44 results = filter_keyword(results[0], header, results[1], k[0], k[1], k[2]) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
45 if args.kw_file: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
46 key_files = args.kw_file |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
47 for kf in key_files: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
48 ids = readOption(kf[0]) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
49 results = filter_keyword(results[0], header, results[1], ids, kf[1], kf[2]) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
50 if args.value: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
51 for v in args.value: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
52 if isnumber("float", v[0]): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
53 results = filter_value(results[0], header, results[1], v[0], v[1], v[2]) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
54 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
55 raise ValueError("Please enter a number in filter by value") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
56 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
57 # Write results to output |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
58 output = open(args.output, "w") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
59 output.write("".join(results[0])) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
60 output.close() |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
61 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
62 # Write deleted lines to trash_file |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
63 trash = open(args.trash_file, "w") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
64 #print("".join(results[1])) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
65 trash.write("".join(results[1])) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
66 trash.close() |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
67 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
68 def readOption(filename): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
69 f = open(filename, "r") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
70 file = f.read() |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
71 #print(file) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
72 filter_list = file.split("\n") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
73 #print(filter_list) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
74 filters = "" |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
75 for i in filter_list: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
76 filters += i + ":" |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
77 filters = filters[:-1] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
78 #print(filters) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
79 return filters |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
80 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
81 def readMQ(MQfilename): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
82 # Read MQ file |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
83 mqfile = open(MQfilename, "r") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
84 mq = mqfile.readlines() |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
85 # Remove empty lines (contain only space or new line or "") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
86 [mq.remove(blank) for blank in mq if blank.isspace() or blank == ""] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
87 return mq |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
88 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
89 def filter_keyword(MQfile, header, filtered_lines, ids, ncol, match): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
90 mq = MQfile |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
91 if isnumber("int", ncol.replace("c", "")): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
92 id_index = int(ncol.replace("c", "")) - 1 #columns.index("Majority protein IDs") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
93 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
94 raise ValueError("Please specify the column where you would like to apply the filter with valid format") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
95 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
96 ids = ids.upper().split(":") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
97 [ids.remove(blank) for blank in ids if blank.isspace() or blank == ""] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
98 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
99 if header == "true": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
100 header = mq[0] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
101 content = mq[1:] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
102 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
103 header = "" |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
104 content = mq[:] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
105 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
106 if not filtered_lines: # In case there is already some filtered lines from other filters |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
107 filtered_lines = [] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
108 if header != "": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
109 filtered_lines.append(header) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
110 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
111 for line in content: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
112 id_inline = line.split("\t")[id_index].replace('"', "").split(";") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
113 one_id_line = line.replace(line.split("\t")[id_index], id_inline[0]) # Take only first IDs |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
114 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
115 if match != "false": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
116 # Filter protein IDs |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
117 if any (pid.upper() in ids for pid in id_inline): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
118 #ids = prot_ids.split(":") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
119 #print(prot_ids.split(":")) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
120 #if prot_id in ids: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
121 filtered_lines.append(one_id_line) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
122 mq.remove(line) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
123 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
124 mq[mq.index(line)] = one_id_line |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
125 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
126 if any (ft in pid.upper() for pid in id_inline for ft in ids): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
127 filtered_lines.append(one_id_line) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
128 mq.remove(line) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
129 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
130 mq[mq.index(line)] = one_id_line |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
131 return mq, filtered_lines |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
132 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
133 def filter_value(MQfile, header, filtered_prots, filter_value, ncol, opt): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
134 mq = MQfile |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
135 if ncol and isnumber("int", ncol.replace("c", "")): #"Gene names" in columns: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
136 index = int(ncol.replace("c", "")) - 1 #columns.index("Gene names") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
137 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
138 raise ValueError("Please specify the column where you would like to apply the filter with valid format") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
139 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
140 if header == "true": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
141 header = mq[0] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
142 content = mq[1:] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
143 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
144 header = "" |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
145 content = mq[:] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
146 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
147 if not filtered_prots: # In case there is already some filtered lines from other filters |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
148 filtered_prots = [] |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
149 if header != "": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
150 filtered_prots.append(header) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
151 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
152 for prot in content: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
153 filter_value = float(filter_value) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
154 pep = prot.split("\t")[index].replace('"', "") |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
155 if pep.replace(".", "", 1).isdigit(): |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
156 if opt == "<": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
157 if not float(pep) < filter_value: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
158 filtered_prots.append(prot) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
159 mq.remove(prot) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
160 elif opt == "<=": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
161 if not float(pep) <= filter_value: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
162 filtered_prots.append(prot) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
163 mq.remove(prot) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
164 elif opt == ">": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
165 #print(prot.number_of_prots, filter_value, int(prot.number_of_prots) > filter_value) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
166 if not float(pep) > filter_value: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
167 filtered_prots.append(prot) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
168 mq.remove(prot) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
169 elif opt == ">=": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
170 if not float(pep) >= filter_value: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
171 filtered_prots.append(prot) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
172 mq.remove(prot) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
173 else: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
174 if not float(pep) == filter_value: |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
175 filtered_prots.append(prot) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
176 mq.remove(prot) |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
177 return mq, filtered_prots #output, trash_file |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
178 |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
179 if __name__ == "__main__": |
6a45ccfc0e4c
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
180 options() |