annotate filter_kw_val.py @ 8:98cb671a92eb draft default tip

"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
author proteore
date Mon, 10 May 2021 12:27:04 +0000
parents b4641c0f8a82
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
1 import argparse
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
2 import csv
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
3 import re
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
4 import sys
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
5
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
6
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
7 def options():
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
8 """
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
9 Parse options:
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
10 -i, --input Input filename and boolean value if the file contains header ["filename,true/false"] # noqa 501
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
11 --kw Keyword to be filtered, the column number where this filter applies,
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
12 boolean value if the keyword should be filtered in exact ["keyword,ncol,true/false"].
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
13 This option can be repeated: --kw "kw1,c1,true" --kw "kw2,c1,false" --kw "kw3,c2,true"
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
14 --kwfile A file that contains keywords to be filter, the column where this filter applies and
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
15 boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"]
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
16 --value The value to be filtered, the column number where this filter applies and the
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
17 operation symbol ["value,ncol,=/>/>=/</<=/!="]
5
33ca9ba2495a planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents: 4
diff changeset
18 --values_range range of values to be keep, example : --values_range 5 20 c1 true
33ca9ba2495a planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents: 4
diff changeset
19 --operation 'keep' or 'discard' lines concerned by filter(s)
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
20 --operator The operator used to filter with several keywords/values : AND or OR
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
21 --o --output The output filename
5
33ca9ba2495a planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents: 4
diff changeset
22 --discarded_lines The file contains removed lines
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
23 -s --sort_col Used column to sort the file, ",true" for reverse sorting, ",false" otherwise example : c1,false
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
24 """
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
25 parser = argparse.ArgumentParser()
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
26 parser.add_argument("-i", "--input", help="Input file", required=True)
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
27 parser.add_argument("--kw", nargs="+", action="append", help="")
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
28 parser.add_argument("--kw_file", nargs="+", action="append", help="")
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
29 parser.add_argument("--value", nargs="+", action="append", help="")
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
30 parser.add_argument("--values_range", nargs="+", action="append", help="")
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
31 parser.add_argument("--operation", default="keep", type=str, choices=['keep', 'discard'], help='') # noqa 501
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
32 parser.add_argument("--operator", default="OR", type=str, choices=['AND', 'OR'], help='') # noqa 501
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
33 parser.add_argument("-o", "--output", default="output.txt")
5
33ca9ba2495a planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents: 4
diff changeset
34 parser.add_argument("--discarded_lines", default="filtered_output.txt")
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
35 parser.add_argument("-s", "--sort_col", help="")
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
36
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
37 args = parser.parse_args()
5
33ca9ba2495a planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents: 4
diff changeset
38
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
39 filters(args)
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
40
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
41
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
42 def str_to_bool(v):
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
43 if v.lower() in ('yes', 'true', 't', 'y', '1'):
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
44 return True
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
45 elif v.lower() in ('no', 'false', 'f', 'n', '0'):
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
46 return False
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
47 else:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
48 raise argparse.ArgumentTypeError('Boolean value expected.')
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
49
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
50
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
51 def proper_ncol(ncol, file):
6
b4641c0f8a82 planemo upload commit 77279e994f5751c6cd9aa165aa0604db3d241271-dirty
proteore
parents: 5
diff changeset
52 if ncol not in range(len(file[0])):
b4641c0f8a82 planemo upload commit 77279e994f5751c6cd9aa165aa0604db3d241271-dirty
proteore
parents: 5
diff changeset
53 print("Column "+str(ncol+1)+" not found in input file")
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
54 # traceback.print_exc(file=sys.stdout)
6
b4641c0f8a82 planemo upload commit 77279e994f5751c6cd9aa165aa0604db3d241271-dirty
proteore
parents: 5
diff changeset
55 sys.exit(1)
b4641c0f8a82 planemo upload commit 77279e994f5751c6cd9aa165aa0604db3d241271-dirty
proteore
parents: 5
diff changeset
56
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
57 # Check if a variable is a float or an integer
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
58
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
59
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
60 def is_number(number_format, n):
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
61 float_format = re.compile(r"^[-]?[0-9][0-9]*.?[0-9]+$")
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
62 int_format = re.compile(r"^[-]?[0-9][0-9]*$")
4
2080e2a4f209 planemo upload commit ef71f7a32bb76c79052b535be1d0beceff6e03a5-dirty
proteore
parents: 2
diff changeset
63 scientific_number = re.compile(r"^[-+]?[\d]+\.?[\d]*[Ee](?:[-+]?[\d]+)?$")
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
64 test = ""
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
65 if number_format == "int":
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
66 test = re.match(int_format, n)
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
67 elif number_format == "float":
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
68 test = re.match(float_format, n)
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
69 if test is None:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
70 test = re.match(scientific_number, n)
4
2080e2a4f209 planemo upload commit ef71f7a32bb76c79052b535be1d0beceff6e03a5-dirty
proteore
parents: 2
diff changeset
71
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
72 if test:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
73 return True
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
74 else:
4
2080e2a4f209 planemo upload commit ef71f7a32bb76c79052b535be1d0beceff6e03a5-dirty
proteore
parents: 2
diff changeset
75 return False
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
76
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
77 # Filter the document
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
78
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
79
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
80 def filters(args):
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
81 filename = args.input.split(",")[0]
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
82 header = str_to_bool(args.input.split(",")[1])
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
83 csv_file = blank_to_NA(read_file(filename))
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
84 results_dict = {}
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
85 operator_dict = {"Equal": "=", "Higher": ">", "Equal-or-higher": ">=", "Lower": "<", "Equal-or-lower": "<=", "Different": "!="} # noqa 501
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
86
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
87 if args.kw:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
88 keywords = args.kw
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
89 for k in keywords:
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
90 results_dict = filter_keyword(csv_file,
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
91 header,
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
92 results_dict,
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
93 k[0],
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
94 k[1],
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
95 k[2])
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
96
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
97 if args.kw_file:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
98 key_files = args.kw_file
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
99 for kf in key_files:
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
100 header = str_to_bool(kf[1])
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
101 ncol = column_from_txt(kf[2], csv_file)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
102 keywords = read_keywords_file(kf[0], header, ncol)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
103 results_dict = filter_keyword(csv_file, header, results_dict,
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
104 keywords, kf[3], kf[4])
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
105
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
106 if args.value:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
107 for v in args.value:
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
108 v[0] = v[0].replace(",", ".")
5
33ca9ba2495a planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents: 4
diff changeset
109 v[2] = operator_dict[v[2]]
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
110 if is_number("float", v[0]):
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
111 csv_file = comma_number_to_float(csv_file,
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
112 column_from_txt(
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
113 v[1], csv_file), header)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
114 results_dict = filter_value(csv_file, header,
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
115 results_dict, v[0], v[1], v[2])
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
116 else:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
117 raise ValueError("Please enter a number in filter by value")
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
118
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
119 if args.values_range:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
120 for vr in args.values_range:
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
121 vr[:2] = [value.replace(",", ".") for value in vr[:2]]
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
122 csv_file = comma_number_to_float(csv_file,
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
123 column_from_txt(
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
124 vr[2], csv_file), header)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
125 if (is_number("float", vr[0]) or is_number("int", vr[0])) and (is_number("float", vr[1]) or is_number("int", vr[1])): # noqa 501
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
126 results_dict = filter_values_range(csv_file,
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
127 header, results_dict,
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
128 vr[0], vr[1], vr[2], vr[3])
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
129
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
130 remaining_lines = []
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
131 filtered_lines = []
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
132
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
133 if header is True:
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
134 remaining_lines.append(csv_file[0])
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
135 filtered_lines.append(csv_file[0])
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
136
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
137 if results_dict == {}: # no filter used
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
138 remaining_lines.extend(csv_file[1:])
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
139 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
140 for id_line, line in enumerate(csv_file):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
141 if id_line in results_dict: # skip header and empty lines
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
142 if args.operator == 'OR':
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
143 if any(results_dict[id_line]):
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
144 filtered_lines.append(line)
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
145 else:
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
146 remaining_lines.append(line)
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
147
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
148 elif args.operator == "AND":
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
149 if all(results_dict[id_line]):
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
150 filtered_lines.append(line)
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
151 else:
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
152 remaining_lines.append(line)
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
153
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
154 # sort of results by column
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
155 if args.sort_col:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
156 sort_col = args.sort_col.split(",")[0]
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
157 sort_col = column_from_txt(sort_col, csv_file)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
158 reverse = str_to_bool(args.sort_col.split(",")[1])
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
159 remaining_lines = sort_by_column(remaining_lines, sort_col,
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
160 reverse, header)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
161 filtered_lines = sort_by_column(filtered_lines, sort_col,
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
162 reverse, header)
5
33ca9ba2495a planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents: 4
diff changeset
163
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
164 # swap lists of lines (files) if 'keep' option selected
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
165 if args.operation == "keep":
5
33ca9ba2495a planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents: 4
diff changeset
166 swap = remaining_lines, filtered_lines
33ca9ba2495a planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents: 4
diff changeset
167 remaining_lines = swap[1]
33ca9ba2495a planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents: 4
diff changeset
168 filtered_lines = swap[0]
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
169
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
170 # Write results to output
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
171 with open(args.output, "w") as output:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
172 writer = csv.writer(output, delimiter="\t")
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
173 writer.writerows(remaining_lines)
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
174
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
175 # Write filtered lines to filtered_output
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
176 with open(args.discarded_lines, "w") as filtered_output:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
177 writer = csv.writer(filtered_output, delimiter="\t")
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
178 writer.writerows(filtered_lines)
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
179
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
180 # function to sort the csv_file by value in a specific column
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
181
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
182
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
183 def sort_by_column(tab, sort_col, reverse, header):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
184
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
185 if len(tab) > 1: # if there's more than just a header or 1 row
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
186 if header:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
187 head = tab[0]
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
188 tab = tab[1:]
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
189
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
190 # list of empty cells in the column to sort
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
191 unsortable_lines = [i for i, line in enumerate(tab) if (line[sort_col]=='' or line[sort_col] == 'NA')] # noqa 501
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
192 unsorted_tab = [tab[i] for i in unsortable_lines]
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
193 tab = [line for i, line in enumerate(tab) if i not in unsortable_lines]
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
194
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
195 if only_number(tab, sort_col) and any_float(tab, sort_col):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
196 tab = comma_number_to_float(tab, sort_col, False)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
197 tab = sorted(tab, key=lambda row: float(row[sort_col]),
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
198 reverse=reverse)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
199 elif only_number(tab, sort_col):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
200 tab = sorted(tab, key=lambda row: int(row[sort_col]),
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
201 reverse=reverse)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
202 else:
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
203 tab = sorted(tab, key=lambda row: row[sort_col], reverse=reverse)
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
204
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
205 tab.extend(unsorted_tab)
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
206 if header is True:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
207 tab = [head]+tab
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
208
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
209 return tab
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
210
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
211
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
212 # replace all blank cells to NA
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
213
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
214
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
215 def blank_to_NA(csv_file):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
216
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
217 tmp = []
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
218 for line in csv_file:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
219 line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in line ] # noqa 501
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
220 tmp.append(line)
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
221
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
222 return tmp
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
223
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
224 # turn into float a column
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
225
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
226
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
227 def comma_number_to_float(csv_file, ncol, header):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
228 if header:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
229 tmp = [csv_file[0]]
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
230 csv_file = csv_file[1:]
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
231 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
232 tmp = []
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
233
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
234 for line in csv_file:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
235 line[ncol] = line[ncol].replace(",", ".")
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
236 tmp.append(line)
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
237
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
238 return (tmp)
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
239
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
240 # return True is there is at least one float in the column
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
241
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
242
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
243 def any_float(tab, col):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
244
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
245 for line in tab:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
246 if is_number("float", line[col].replace(",", ".")):
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
247 return True
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
248
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
249 return False
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
250
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
251
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
252 def only_number(tab, col):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
253 for line in tab:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
254 if not (is_number("float", line[col].replace(",", ".")) or is_number("int", line[col].replace(",", "."))): # noqa 501
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
255 return False
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
256 return True
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
257
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
258 # Read the keywords file to extract the list of keywords
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
259
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
260
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
261 def read_keywords_file(filename, header, ncol):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
262 with open(filename, "r") as csv_file:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
263 lines = csv.reader(csv_file, delimiter='\t')
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
264 lines = blank_to_NA(lines)
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
265 if (len(lines[0])) > 1:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
266 keywords = [line[ncol] for line in lines]
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
267 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
268 keywords = ["".join(key) for key in lines]
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
269 if header:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
270 keywords = keywords[1:]
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
271 keywords = list(set(keywords))
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
272
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
273 return keywords
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
274
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
275 # Read input file
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
276
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
277
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
278 def read_file(filename):
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
279 with open(filename, "r") as f:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
280 reader = csv.reader(f, delimiter="\t")
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
281 tab = list(reader)
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
282
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
283 # Remove empty lines (contain only space or new line or "")
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
284 # [tab.remove(blank) for blank in tab if blank.isspace() or blank == ""]
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
285 tab = [line for line in tab if len("".join(line).replace(" ", "")) != 0] # noqa 501
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
286
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
287 return tab
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
288
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
289 # seek for keywords in rows of csvfile, return a dictionary of boolean
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
290 # (true if keyword found, false otherwise)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
291
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
292
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
293 def filter_keyword(csv_file, header, results_dict, keywords, ncol, match):
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
294 match = str_to_bool(match)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
295 ncol = column_from_txt(ncol, csv_file)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
296 if type(keywords) != list:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
297 keywords = keywords.upper().split() # Split list of filter keyword
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
298
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
299 for id_line, line in enumerate(csv_file):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
300 if header is True and id_line == 0:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
301 continue
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
302 keyword_inline = line[ncol].replace('"', "").split(";")
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
303
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
304 # Perfect match or not
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
305 if match is True:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
306 found_in_line = any(pid.upper() in keywords for pid in keyword_inline) # noqa 501
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
307 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
308 found_in_line = any(ft in pid.upper() for pid in keyword_inline for ft in keywords) # noqa 501
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
309
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
310 # if the keyword is found in line
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
311 if id_line in results_dict:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
312 results_dict[id_line].append(found_in_line)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
313 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
314 results_dict[id_line] = [found_in_line]
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
315
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
316 return results_dict
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
317
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
318 # filter ba determined value in rows of csvfile, return a dictionary
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
319 # of boolean (true if value filtered, false otherwise)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
320
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
321
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
322 def filter_value(csv_file, header, results_dict, filter_value, ncol, opt):
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
323
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
324 filter_value = float(filter_value)
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
325 ncol = column_from_txt(ncol, csv_file)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
326 nb_string = 0
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
327
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
328 for id_line, line in enumerate(csv_file):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
329 if header is True and id_line == 0:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
330 continue
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
331 value = line[ncol].replace('"', "").replace(",", ".").strip()
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
332 if value.replace(".", "", 1).isdigit():
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
333 to_filter = value_compare(value, filter_value, opt)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
334
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
335 # adding the result to the dictionary
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
336 if id_line in results_dict:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
337 results_dict[id_line].append(to_filter)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
338 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
339 results_dict[id_line] = [to_filter]
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
340
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
341 # impossible to treat (ex : "" instead of a number),
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
342 # we keep the line by default
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
343 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
344 nb_string += 1
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
345 if id_line in results_dict:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
346 results_dict[id_line].append(False)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
347 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
348 results_dict[id_line] = [False]
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
349
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
350 # number of lines in the csv file
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
351 if header:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
352 nb_lines = len(csv_file) - 1
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
353 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
354 nb_lines = len(csv_file)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
355
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
356 # if there's no numeric value in the column
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
357 if nb_string == nb_lines:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
358 print('No numeric values found in the column '+str(ncol+1))
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
359 print('The filter "'+str(opt)+' '+str(filter_value)+'" can not be applied on the column '+str(ncol+1)) # noqa 501
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
360
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
361 return results_dict
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
362
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
363 # filter ba determined value in rows of csvfile, return a dictionary
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
364 # of boolean (true if value filtered, false otherwise)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
365
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
366
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
367 def filter_values_range(csv_file, header, results_dict, bottom_value, top_value, ncol, inclusive): # noqa 501
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
368 inclusive = str_to_bool(inclusive)
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
369 bottom_value = float(bottom_value)
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
370 top_value = float(top_value)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
371 ncol = column_from_txt(ncol, csv_file)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
372 nb_string = 0
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
373
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
374 for id_line, line in enumerate(csv_file):
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
375 if header is True and id_line == 0:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
376 continue
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
377 value = line[ncol].replace('"', "").replace(",", ".").strip()
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
378 if value.replace(".", "", 1).isdigit():
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
379 value = float(value)
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
380 if inclusive is True:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
381 in_range = not (bottom_value <= value <= top_value)
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
382 else:
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
383 in_range = not (bottom_value < value < top_value)
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
384
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
385 # adding the result to the dictionary
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
386 if id_line in results_dict:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
387 results_dict[id_line].append(in_range)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
388 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
389 results_dict[id_line] = [in_range]
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
390
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
391 # impossible to treat (ex : "" instead of a number),
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
392 # we keep the line by default
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
393 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
394 nb_string += 1
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
395 if id_line in results_dict:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
396 results_dict[id_line].append(False)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
397 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
398 results_dict[id_line] = [False]
2
52a7afd01c6d planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents: 0
diff changeset
399
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
400 # number of lines in the csv file
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
401 if header:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
402 nb_lines = len(csv_file) - 1
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
403 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
404 nb_lines = len(csv_file)
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
405
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
406 # if there's no numeric value in the column
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
407 if nb_string == nb_lines:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
408 print('No numeric values found in the column '+str(ncol+1))
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
409 if inclusive:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
410 print ('The filter "'+str(bottom_value)+' <= x <= '+str(top_value)+'" can not be applied on the column '+str(ncol+1)) # noqa 501
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
411 else:
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
412 print ('The filter "'+str(bottom_value)+' < x < '+str(top_value)+'" can not be applied on the column '+str(ncol+1)) # noqa 501
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
413
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
414 return results_dict
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
415
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
416
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
417 def column_from_txt(ncol, file):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
418 if is_number("int", ncol.replace("c", "")):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
419 ncol = int(ncol.replace("c", "")) - 1
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
420 else:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
421 raise ValueError("Please specify the column where "
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
422 "you would like to apply the filter "
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
423 "with valid format")
6
b4641c0f8a82 planemo upload commit 77279e994f5751c6cd9aa165aa0604db3d241271-dirty
proteore
parents: 5
diff changeset
424
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
425 proper_ncol(ncol, file)
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
426
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
427 return ncol
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
428
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
429 # return True if value is in the determined values, false otherwise
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
430
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
431
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
432 def value_compare(value, filter_value, opt):
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
433 test_value = False
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
434
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
435 if opt == "<":
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
436 if float(value) < filter_value:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
437 test_value = True
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
438 elif opt == "<=":
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
439 if float(value) <= filter_value:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
440 test_value = True
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
441 elif opt == ">":
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
442 if float(value) > filter_value:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
443 test_value = True
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
444 elif opt == ">=":
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
445 if float(value) >= filter_value:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
446 test_value = True
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
447 elif opt == "=":
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
448 if float(value) == filter_value:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
449 test_value = True
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
450 elif opt == "!=":
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
451 if float(value) != filter_value:
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
452 test_value = True
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
453
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
454 return test_value
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
455
8
98cb671a92eb "planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents: 6
diff changeset
456
0
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
457 if __name__ == "__main__":
a55e8b137c6b planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff changeset
458 options()