Mercurial > repos > proteore > proteore_filter_keywords_values
annotate filter_kw_val.py @ 8:98cb671a92eb draft default tip
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
author | proteore |
---|---|
date | Mon, 10 May 2021 12:27:04 +0000 |
parents | b4641c0f8a82 |
children |
rev | line source |
---|---|
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
1 import argparse |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
2 import csv |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
3 import re |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
4 import sys |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
5 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
6 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
7 def options(): |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
8 """ |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
9 Parse options: |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
10 -i, --input Input filename and boolean value if the file contains header ["filename,true/false"] # noqa 501 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
11 --kw Keyword to be filtered, the column number where this filter applies, |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
12 boolean value if the keyword should be filtered in exact ["keyword,ncol,true/false"]. |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
13 This option can be repeated: --kw "kw1,c1,true" --kw "kw2,c1,false" --kw "kw3,c2,true" |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
14 --kwfile A file that contains keywords to be filter, the column where this filter applies and |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
15 boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"] |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
16 --value The value to be filtered, the column number where this filter applies and the |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
17 operation symbol ["value,ncol,=/>/>=/</<=/!="] |
5
33ca9ba2495a
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents:
4
diff
changeset
|
18 --values_range range of values to be keep, example : --values_range 5 20 c1 true |
33ca9ba2495a
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents:
4
diff
changeset
|
19 --operation 'keep' or 'discard' lines concerned by filter(s) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
20 --operator The operator used to filter with several keywords/values : AND or OR |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
21 --o --output The output filename |
5
33ca9ba2495a
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents:
4
diff
changeset
|
22 --discarded_lines The file contains removed lines |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
23 -s --sort_col Used column to sort the file, ",true" for reverse sorting, ",false" otherwise example : c1,false |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
24 """ |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
25 parser = argparse.ArgumentParser() |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
26 parser.add_argument("-i", "--input", help="Input file", required=True) |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
27 parser.add_argument("--kw", nargs="+", action="append", help="") |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
28 parser.add_argument("--kw_file", nargs="+", action="append", help="") |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
29 parser.add_argument("--value", nargs="+", action="append", help="") |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
30 parser.add_argument("--values_range", nargs="+", action="append", help="") |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
31 parser.add_argument("--operation", default="keep", type=str, choices=['keep', 'discard'], help='') # noqa 501 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
32 parser.add_argument("--operator", default="OR", type=str, choices=['AND', 'OR'], help='') # noqa 501 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
33 parser.add_argument("-o", "--output", default="output.txt") |
5
33ca9ba2495a
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents:
4
diff
changeset
|
34 parser.add_argument("--discarded_lines", default="filtered_output.txt") |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
35 parser.add_argument("-s", "--sort_col", help="") |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
36 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
37 args = parser.parse_args() |
5
33ca9ba2495a
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents:
4
diff
changeset
|
38 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
39 filters(args) |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
40 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
41 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
42 def str_to_bool(v): |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
43 if v.lower() in ('yes', 'true', 't', 'y', '1'): |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
44 return True |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
45 elif v.lower() in ('no', 'false', 'f', 'n', '0'): |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
46 return False |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
47 else: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
48 raise argparse.ArgumentTypeError('Boolean value expected.') |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
49 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
50 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
51 def proper_ncol(ncol, file): |
6
b4641c0f8a82
planemo upload commit 77279e994f5751c6cd9aa165aa0604db3d241271-dirty
proteore
parents:
5
diff
changeset
|
52 if ncol not in range(len(file[0])): |
b4641c0f8a82
planemo upload commit 77279e994f5751c6cd9aa165aa0604db3d241271-dirty
proteore
parents:
5
diff
changeset
|
53 print("Column "+str(ncol+1)+" not found in input file") |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
54 # traceback.print_exc(file=sys.stdout) |
6
b4641c0f8a82
planemo upload commit 77279e994f5751c6cd9aa165aa0604db3d241271-dirty
proteore
parents:
5
diff
changeset
|
55 sys.exit(1) |
b4641c0f8a82
planemo upload commit 77279e994f5751c6cd9aa165aa0604db3d241271-dirty
proteore
parents:
5
diff
changeset
|
56 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
57 # Check if a variable is a float or an integer |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
58 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
59 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
60 def is_number(number_format, n): |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
61 float_format = re.compile(r"^[-]?[0-9][0-9]*.?[0-9]+$") |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
62 int_format = re.compile(r"^[-]?[0-9][0-9]*$") |
4
2080e2a4f209
planemo upload commit ef71f7a32bb76c79052b535be1d0beceff6e03a5-dirty
proteore
parents:
2
diff
changeset
|
63 scientific_number = re.compile(r"^[-+]?[\d]+\.?[\d]*[Ee](?:[-+]?[\d]+)?$") |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
64 test = "" |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
65 if number_format == "int": |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
66 test = re.match(int_format, n) |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
67 elif number_format == "float": |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
68 test = re.match(float_format, n) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
69 if test is None: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
70 test = re.match(scientific_number, n) |
4
2080e2a4f209
planemo upload commit ef71f7a32bb76c79052b535be1d0beceff6e03a5-dirty
proteore
parents:
2
diff
changeset
|
71 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
72 if test: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
73 return True |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
74 else: |
4
2080e2a4f209
planemo upload commit ef71f7a32bb76c79052b535be1d0beceff6e03a5-dirty
proteore
parents:
2
diff
changeset
|
75 return False |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
76 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
77 # Filter the document |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
78 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
79 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
80 def filters(args): |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
81 filename = args.input.split(",")[0] |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
82 header = str_to_bool(args.input.split(",")[1]) |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
83 csv_file = blank_to_NA(read_file(filename)) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
84 results_dict = {} |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
85 operator_dict = {"Equal": "=", "Higher": ">", "Equal-or-higher": ">=", "Lower": "<", "Equal-or-lower": "<=", "Different": "!="} # noqa 501 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
86 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
87 if args.kw: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
88 keywords = args.kw |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
89 for k in keywords: |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
90 results_dict = filter_keyword(csv_file, |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
91 header, |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
92 results_dict, |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
93 k[0], |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
94 k[1], |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
95 k[2]) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
96 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
97 if args.kw_file: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
98 key_files = args.kw_file |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
99 for kf in key_files: |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
100 header = str_to_bool(kf[1]) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
101 ncol = column_from_txt(kf[2], csv_file) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
102 keywords = read_keywords_file(kf[0], header, ncol) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
103 results_dict = filter_keyword(csv_file, header, results_dict, |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
104 keywords, kf[3], kf[4]) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
105 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
106 if args.value: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
107 for v in args.value: |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
108 v[0] = v[0].replace(",", ".") |
5
33ca9ba2495a
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents:
4
diff
changeset
|
109 v[2] = operator_dict[v[2]] |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
110 if is_number("float", v[0]): |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
111 csv_file = comma_number_to_float(csv_file, |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
112 column_from_txt( |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
113 v[1], csv_file), header) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
114 results_dict = filter_value(csv_file, header, |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
115 results_dict, v[0], v[1], v[2]) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
116 else: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
117 raise ValueError("Please enter a number in filter by value") |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
118 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
119 if args.values_range: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
120 for vr in args.values_range: |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
121 vr[:2] = [value.replace(",", ".") for value in vr[:2]] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
122 csv_file = comma_number_to_float(csv_file, |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
123 column_from_txt( |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
124 vr[2], csv_file), header) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
125 if (is_number("float", vr[0]) or is_number("int", vr[0])) and (is_number("float", vr[1]) or is_number("int", vr[1])): # noqa 501 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
126 results_dict = filter_values_range(csv_file, |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
127 header, results_dict, |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
128 vr[0], vr[1], vr[2], vr[3]) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
129 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
130 remaining_lines = [] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
131 filtered_lines = [] |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
132 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
133 if header is True: |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
134 remaining_lines.append(csv_file[0]) |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
135 filtered_lines.append(csv_file[0]) |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
136 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
137 if results_dict == {}: # no filter used |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
138 remaining_lines.extend(csv_file[1:]) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
139 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
140 for id_line, line in enumerate(csv_file): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
141 if id_line in results_dict: # skip header and empty lines |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
142 if args.operator == 'OR': |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
143 if any(results_dict[id_line]): |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
144 filtered_lines.append(line) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
145 else: |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
146 remaining_lines.append(line) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
147 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
148 elif args.operator == "AND": |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
149 if all(results_dict[id_line]): |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
150 filtered_lines.append(line) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
151 else: |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
152 remaining_lines.append(line) |
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
153 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
154 # sort of results by column |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
155 if args.sort_col: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
156 sort_col = args.sort_col.split(",")[0] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
157 sort_col = column_from_txt(sort_col, csv_file) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
158 reverse = str_to_bool(args.sort_col.split(",")[1]) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
159 remaining_lines = sort_by_column(remaining_lines, sort_col, |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
160 reverse, header) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
161 filtered_lines = sort_by_column(filtered_lines, sort_col, |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
162 reverse, header) |
5
33ca9ba2495a
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents:
4
diff
changeset
|
163 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
164 # swap lists of lines (files) if 'keep' option selected |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
165 if args.operation == "keep": |
5
33ca9ba2495a
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents:
4
diff
changeset
|
166 swap = remaining_lines, filtered_lines |
33ca9ba2495a
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents:
4
diff
changeset
|
167 remaining_lines = swap[1] |
33ca9ba2495a
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
proteore
parents:
4
diff
changeset
|
168 filtered_lines = swap[0] |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
169 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
170 # Write results to output |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
171 with open(args.output, "w") as output: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
172 writer = csv.writer(output, delimiter="\t") |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
173 writer.writerows(remaining_lines) |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
174 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
175 # Write filtered lines to filtered_output |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
176 with open(args.discarded_lines, "w") as filtered_output: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
177 writer = csv.writer(filtered_output, delimiter="\t") |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
178 writer.writerows(filtered_lines) |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
179 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
180 # function to sort the csv_file by value in a specific column |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
181 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
182 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
183 def sort_by_column(tab, sort_col, reverse, header): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
184 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
185 if len(tab) > 1: # if there's more than just a header or 1 row |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
186 if header: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
187 head = tab[0] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
188 tab = tab[1:] |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
189 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
190 # list of empty cells in the column to sort |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
191 unsortable_lines = [i for i, line in enumerate(tab) if (line[sort_col]=='' or line[sort_col] == 'NA')] # noqa 501 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
192 unsorted_tab = [tab[i] for i in unsortable_lines] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
193 tab = [line for i, line in enumerate(tab) if i not in unsortable_lines] |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
194 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
195 if only_number(tab, sort_col) and any_float(tab, sort_col): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
196 tab = comma_number_to_float(tab, sort_col, False) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
197 tab = sorted(tab, key=lambda row: float(row[sort_col]), |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
198 reverse=reverse) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
199 elif only_number(tab, sort_col): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
200 tab = sorted(tab, key=lambda row: int(row[sort_col]), |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
201 reverse=reverse) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
202 else: |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
203 tab = sorted(tab, key=lambda row: row[sort_col], reverse=reverse) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
204 |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
205 tab.extend(unsorted_tab) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
206 if header is True: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
207 tab = [head]+tab |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
208 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
209 return tab |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
210 |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
211 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
212 # replace all blank cells to NA |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
213 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
214 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
215 def blank_to_NA(csv_file): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
216 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
217 tmp = [] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
218 for line in csv_file: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
219 line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in line ] # noqa 501 |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
220 tmp.append(line) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
221 |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
222 return tmp |
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
223 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
224 # turn into float a column |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
225 |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
226 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
227 def comma_number_to_float(csv_file, ncol, header): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
228 if header: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
229 tmp = [csv_file[0]] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
230 csv_file = csv_file[1:] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
231 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
232 tmp = [] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
233 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
234 for line in csv_file: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
235 line[ncol] = line[ncol].replace(",", ".") |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
236 tmp.append(line) |
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
237 |
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
238 return (tmp) |
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
239 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
240 # return True is there is at least one float in the column |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
241 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
242 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
243 def any_float(tab, col): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
244 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
245 for line in tab: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
246 if is_number("float", line[col].replace(",", ".")): |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
247 return True |
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
248 |
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
249 return False |
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
250 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
251 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
252 def only_number(tab, col): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
253 for line in tab: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
254 if not (is_number("float", line[col].replace(",", ".")) or is_number("int", line[col].replace(",", "."))): # noqa 501 |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
255 return False |
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
256 return True |
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
257 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
258 # Read the keywords file to extract the list of keywords |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
259 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
260 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
261 def read_keywords_file(filename, header, ncol): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
262 with open(filename, "r") as csv_file: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
263 lines = csv.reader(csv_file, delimiter='\t') |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
264 lines = blank_to_NA(lines) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
265 if (len(lines[0])) > 1: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
266 keywords = [line[ncol] for line in lines] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
267 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
268 keywords = ["".join(key) for key in lines] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
269 if header: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
270 keywords = keywords[1:] |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
271 keywords = list(set(keywords)) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
272 |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
273 return keywords |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
274 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
275 # Read input file |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
276 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
277 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
278 def read_file(filename): |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
279 with open(filename, "r") as f: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
280 reader = csv.reader(f, delimiter="\t") |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
281 tab = list(reader) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
282 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
283 # Remove empty lines (contain only space or new line or "") |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
284 # [tab.remove(blank) for blank in tab if blank.isspace() or blank == ""] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
285 tab = [line for line in tab if len("".join(line).replace(" ", "")) != 0] # noqa 501 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
286 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
287 return tab |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
288 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
289 # seek for keywords in rows of csvfile, return a dictionary of boolean |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
290 # (true if keyword found, false otherwise) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
291 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
292 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
293 def filter_keyword(csv_file, header, results_dict, keywords, ncol, match): |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
294 match = str_to_bool(match) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
295 ncol = column_from_txt(ncol, csv_file) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
296 if type(keywords) != list: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
297 keywords = keywords.upper().split() # Split list of filter keyword |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
298 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
299 for id_line, line in enumerate(csv_file): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
300 if header is True and id_line == 0: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
301 continue |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
302 keyword_inline = line[ncol].replace('"', "").split(";") |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
303 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
304 # Perfect match or not |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
305 if match is True: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
306 found_in_line = any(pid.upper() in keywords for pid in keyword_inline) # noqa 501 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
307 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
308 found_in_line = any(ft in pid.upper() for pid in keyword_inline for ft in keywords) # noqa 501 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
309 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
310 # if the keyword is found in line |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
311 if id_line in results_dict: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
312 results_dict[id_line].append(found_in_line) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
313 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
314 results_dict[id_line] = [found_in_line] |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
315 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
316 return results_dict |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
317 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
318 # filter ba determined value in rows of csvfile, return a dictionary |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
319 # of boolean (true if value filtered, false otherwise) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
320 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
321 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
322 def filter_value(csv_file, header, results_dict, filter_value, ncol, opt): |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
323 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
324 filter_value = float(filter_value) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
325 ncol = column_from_txt(ncol, csv_file) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
326 nb_string = 0 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
327 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
328 for id_line, line in enumerate(csv_file): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
329 if header is True and id_line == 0: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
330 continue |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
331 value = line[ncol].replace('"', "").replace(",", ".").strip() |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
332 if value.replace(".", "", 1).isdigit(): |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
333 to_filter = value_compare(value, filter_value, opt) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
334 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
335 # adding the result to the dictionary |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
336 if id_line in results_dict: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
337 results_dict[id_line].append(to_filter) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
338 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
339 results_dict[id_line] = [to_filter] |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
340 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
341 # impossible to treat (ex : "" instead of a number), |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
342 # we keep the line by default |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
343 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
344 nb_string += 1 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
345 if id_line in results_dict: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
346 results_dict[id_line].append(False) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
347 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
348 results_dict[id_line] = [False] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
349 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
350 # number of lines in the csv file |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
351 if header: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
352 nb_lines = len(csv_file) - 1 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
353 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
354 nb_lines = len(csv_file) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
355 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
356 # if there's no numeric value in the column |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
357 if nb_string == nb_lines: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
358 print('No numeric values found in the column '+str(ncol+1)) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
359 print('The filter "'+str(opt)+' '+str(filter_value)+'" can not be applied on the column '+str(ncol+1)) # noqa 501 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
360 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
361 return results_dict |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
362 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
363 # filter ba determined value in rows of csvfile, return a dictionary |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
364 # of boolean (true if value filtered, false otherwise) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
365 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
366 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
367 def filter_values_range(csv_file, header, results_dict, bottom_value, top_value, ncol, inclusive): # noqa 501 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
368 inclusive = str_to_bool(inclusive) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
369 bottom_value = float(bottom_value) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
370 top_value = float(top_value) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
371 ncol = column_from_txt(ncol, csv_file) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
372 nb_string = 0 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
373 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
374 for id_line, line in enumerate(csv_file): |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
375 if header is True and id_line == 0: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
376 continue |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
377 value = line[ncol].replace('"', "").replace(",", ".").strip() |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
378 if value.replace(".", "", 1).isdigit(): |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
379 value = float(value) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
380 if inclusive is True: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
381 in_range = not (bottom_value <= value <= top_value) |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
382 else: |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
383 in_range = not (bottom_value < value < top_value) |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
384 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
385 # adding the result to the dictionary |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
386 if id_line in results_dict: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
387 results_dict[id_line].append(in_range) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
388 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
389 results_dict[id_line] = [in_range] |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
390 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
391 # impossible to treat (ex : "" instead of a number), |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
392 # we keep the line by default |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
393 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
394 nb_string += 1 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
395 if id_line in results_dict: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
396 results_dict[id_line].append(False) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
397 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
398 results_dict[id_line] = [False] |
2
52a7afd01c6d
planemo upload commit 9af2cf12c26c94e7206751ccf101a3368f92d0ba
proteore
parents:
0
diff
changeset
|
399 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
400 # number of lines in the csv file |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
401 if header: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
402 nb_lines = len(csv_file) - 1 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
403 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
404 nb_lines = len(csv_file) |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
405 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
406 # if there's no numeric value in the column |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
407 if nb_string == nb_lines: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
408 print('No numeric values found in the column '+str(ncol+1)) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
409 if inclusive: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
410 print ('The filter "'+str(bottom_value)+' <= x <= '+str(top_value)+'" can not be applied on the column '+str(ncol+1)) # noqa 501 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
411 else: |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
412 print ('The filter "'+str(bottom_value)+' < x < '+str(top_value)+'" can not be applied on the column '+str(ncol+1)) # noqa 501 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
413 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
414 return results_dict |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
415 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
416 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
417 def column_from_txt(ncol, file): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
418 if is_number("int", ncol.replace("c", "")): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
419 ncol = int(ncol.replace("c", "")) - 1 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
420 else: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
421 raise ValueError("Please specify the column where " |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
422 "you would like to apply the filter " |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
423 "with valid format") |
6
b4641c0f8a82
planemo upload commit 77279e994f5751c6cd9aa165aa0604db3d241271-dirty
proteore
parents:
5
diff
changeset
|
424 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
425 proper_ncol(ncol, file) |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
426 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
427 return ncol |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
428 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
429 # return True if value is in the determined values, false otherwise |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
430 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
431 |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
432 def value_compare(value, filter_value, opt): |
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
433 test_value = False |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
434 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
435 if opt == "<": |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
436 if float(value) < filter_value: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
437 test_value = True |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
438 elif opt == "<=": |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
439 if float(value) <= filter_value: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
440 test_value = True |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
441 elif opt == ">": |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
442 if float(value) > filter_value: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
443 test_value = True |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
444 elif opt == ">=": |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
445 if float(value) >= filter_value: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
446 test_value = True |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
447 elif opt == "=": |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
448 if float(value) == filter_value: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
449 test_value = True |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
450 elif opt == "!=": |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
451 if float(value) != filter_value: |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
452 test_value = True |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
453 |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
454 return test_value |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
455 |
8
98cb671a92eb
"planemo upload commit 80e3e50ca52b5b232f91e6dd6850da606d9c4c5f-dirty"
proteore
parents:
6
diff
changeset
|
456 |
0
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
457 if __name__ == "__main__": |
a55e8b137c6b
planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
proteore
parents:
diff
changeset
|
458 options() |