Mercurial > repos > iuc > table_compute
comparison scripts/table_compute.py @ 1:dddadbbac949 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/table_compute commit 6820ec9431a22576f3716c40feeb27f0b8cf5e83"
author | iuc |
---|---|
date | Fri, 30 Aug 2019 05:28:18 -0400 |
parents | 1b0f96ed73f2 |
children | 02c3e335a695 |
comparison
equal
deleted
inserted
replaced
0:1b0f96ed73f2 | 1:dddadbbac949 |
---|---|
1 #!/usr/bin/env python3 | 1 #!/usr/bin/env python3 |
2 """ | 2 """ |
3 Table Compute tool - a wrapper around pandas with parameter input validation. | 3 Table Compute tool - a wrapper around pandas with parameter input validation. |
4 """ | 4 """ |
5 | 5 |
6 __version__ = "0.8" | 6 |
7 __version__ = "0.9.1" | |
7 | 8 |
8 import csv | 9 import csv |
9 import math | 10 import math |
10 from sys import argv | 11 from sys import argv |
11 | 12 |
12 import numpy as np | 13 import numpy as np |
13 import pandas as pd | 14 import pandas as pd |
14 import userconfig as uc | |
15 from safety import Safety | 15 from safety import Safety |
16 # This should be generated in the same directory | 16 |
17 | |
18 # Version command should not need to copy the config | |
19 if len(argv) == 2 and argv[1] == "--version": | 17 if len(argv) == 2 and argv[1] == "--version": |
20 print(__version__) | 18 print(__version__) |
21 exit(-1) | 19 exit(-1) |
22 | 20 |
21 # The import below should be generated in the same directory as | |
22 # the table_compute.py script. | |
23 # It is placed here so that the --version switch does not fail | |
24 import userconfig as uc # noqa: I100,I202 | |
25 | |
23 | 26 |
24 class Utils: | 27 class Utils: |
25 @staticmethod | 28 @staticmethod |
26 def getOneValueMathOp(op_name): | 29 def getOneValueMathOp(op_name): |
27 "Returns a simple one value math operator such as log, sqrt, etc" | 30 "Returns a simple one value math operator such as log, sqrt, etc" |
35 @staticmethod | 38 @staticmethod |
36 def getTwoValuePandaOp(op_name, pd_obj): | 39 def getTwoValuePandaOp(op_name, pd_obj): |
37 "Returns a valid two value DataFrame or Series operator" | 40 "Returns a valid two value DataFrame or Series operator" |
38 return getattr(type(pd_obj), "__" + op_name + "__") | 41 return getattr(type(pd_obj), "__" + op_name + "__") |
39 | 42 |
40 | 43 @staticmethod |
41 # Math is imported but not directly used because users | 44 def readcsv(filedict, narm): |
42 # may specify a "math.<function>" when inserting a custom | 45 data = pd.read_csv( |
43 # function. To remove linting errors, which break CI testing | 46 filedict["file"], |
44 # we will just use an arbitrary math statement here. | 47 header=filedict["header"], |
45 __ = math.log | 48 index_col=filedict["row_names"], |
49 keep_default_na=narm, | |
50 nrows=filedict["nrows"], | |
51 skipfooter=filedict["skipfooter"], | |
52 skip_blank_lines=filedict["skip_blank_lines"], | |
53 sep='\t' | |
54 ) | |
55 # Fix whitespace issues in index or column names | |
56 data.columns = [col.strip() if type(col) is str else col | |
57 for col in data.columns] | |
58 data.index = [row.strip() if type(row) is str else row | |
59 for row in data.index] | |
60 return(data) | |
61 | |
62 @staticmethod | |
63 def rangemaker(tab): | |
64 # e.g. "1:3,2:-2" specifies "1,2,3,2,1,0,-1,-2" to give [0,1,2,1,0,-1,-2] | |
65 # Positive indices are decremented by 1 to reference 0-base numbering | |
66 # Negative indices are unaltered, so that -1 refers to the last column | |
67 out = [] | |
68 err_mess = None | |
69 for ranges in tab.split(","): | |
70 nums = ranges.split(":") | |
71 if len(nums) == 1: | |
72 numb = int(nums[0]) | |
73 # Positive numbers get decremented. | |
74 # i.e. column "3" refers to index 2 | |
75 # column "-1" still refers to index -1 | |
76 if numb != 0: | |
77 out.append(numb if (numb < 0) else (numb - 1)) | |
78 else: | |
79 err_mess = "Please do not use 0 as an index" | |
80 elif len(nums) == 2: | |
81 left, right = map(int, nums) | |
82 if 0 in (left, right): | |
83 err_mess = "Please do not use 0 as an index" | |
84 elif left < right: | |
85 if left > 0: # and right > 0 too | |
86 # 1:3 to 0,1,2 | |
87 out.extend(range(left - 1, right)) | |
88 elif right < 0: # and left < 0 too | |
89 # -3:-1 to -3,-2,-1 | |
90 out.extend(range(left, right + 1)) | |
91 elif left < 0 and right > 0: | |
92 # -2:2 to -2,-1,0,1 | |
93 out.extend(range(left, 0)) | |
94 out.extend(range(0, right)) | |
95 elif right < left: | |
96 if right > 0: # and left > 0 | |
97 # 3:1 to 2,1,0 | |
98 out.extend(range(left - 1, right - 2, -1)) | |
99 elif left < 0: # and right < 0 | |
100 # -1:-3 to -1,-2,-3 | |
101 out.extend(range(left, right - 1, -1)) | |
102 elif right < 0 and left > 0: | |
103 # 2:-2 to 1,0,-1,-2 | |
104 out.extend(range(left - 1, right - 1, -1)) | |
105 else: | |
106 err_mess = "%s should not be equal or contain a zero" % nums | |
107 if err_mess: | |
108 print(err_mess) | |
109 return(None) | |
110 return(out) | |
46 | 111 |
47 | 112 |
48 # Set decimal precision | 113 # Set decimal precision |
49 pd.options.display.precision = uc.Default["precision"] | 114 pd.options.display.precision = uc.Default["precision"] |
50 | 115 |
53 out_table = None | 118 out_table = None |
54 params = uc.Data["params"] | 119 params = uc.Data["params"] |
55 | 120 |
56 if user_mode == "single": | 121 if user_mode == "single": |
57 # Read in TSV file | 122 # Read in TSV file |
58 data = pd.read_csv( | 123 data = Utils.readcsv(uc.Data["tables"][0], uc.Default["narm"]) |
59 uc.Data["tables"][0]["reader_file"], | |
60 header=uc.Data["tables"][0]["reader_header"], | |
61 index_col=uc.Data["tables"][0]["reader_row_col"], | |
62 keep_default_na=uc.Default["narm"], | |
63 sep='\t' | |
64 ) | |
65 # Fix whitespace issues in index or column names | |
66 data.columns = [col.strip() if type(col) is str else col | |
67 for col in data.columns] | |
68 data.index = [row.strip() if type(row) is str else row | |
69 for row in data.index] | |
70 | |
71 user_mode_single = params["user_mode_single"] | 124 user_mode_single = params["user_mode_single"] |
72 | 125 |
73 if user_mode_single == "precision": | 126 if user_mode_single == "precision": |
74 # Useful for changing decimal precision on write out | 127 # Useful for changing decimal precision on write out |
75 out_table = data | 128 out_table = data |
77 elif user_mode_single == "select": | 130 elif user_mode_single == "select": |
78 cols_specified = params["select_cols_wanted"] | 131 cols_specified = params["select_cols_wanted"] |
79 rows_specified = params["select_rows_wanted"] | 132 rows_specified = params["select_rows_wanted"] |
80 | 133 |
81 # Select all indexes if empty array of values | 134 # Select all indexes if empty array of values |
82 if not cols_specified: | 135 if cols_specified: |
136 cols_specified = Utils.rangemaker(cols_specified) | |
137 else: | |
83 cols_specified = range(len(data.columns)) | 138 cols_specified = range(len(data.columns)) |
84 if not rows_specified: | 139 if rows_specified: |
140 rows_specified = Utils.rangemaker(rows_specified) | |
141 else: | |
85 rows_specified = range(len(data)) | 142 rows_specified = range(len(data)) |
86 | 143 |
87 # do not use duplicate indexes | 144 # do not use duplicate indexes |
88 # e.g. [2,3,2,5,5,4,2] to [2,3,5,4] | 145 # e.g. [2,3,2,5,5,4,2] to [2,3,5,4] |
89 nodupes_col = not params["select_cols_unique"] | 146 nodupes_col = not params["select_cols_unique"] |
159 out_table = op(data, axis) | 216 out_table = op(data, axis) |
160 | 217 |
161 elif user_mode_single == "element": | 218 elif user_mode_single == "element": |
162 # lt, gt, ge, etc. | 219 # lt, gt, ge, etc. |
163 operation = params["element_op"] | 220 operation = params["element_op"] |
221 bool_mat = None | |
164 if operation is not None: | 222 if operation is not None: |
165 op = Utils.getTwoValuePandaOp(operation, data) | 223 if operation == "rowcol": |
166 value = params["element_value"] | 224 # Select all indexes if empty array of values |
167 try: | 225 if "element_cols" in params: |
168 # Could be numeric | 226 cols_specified = Utils.rangemaker(params["element_cols"]) |
169 value = float(value) | 227 else: |
170 except ValueError: | 228 cols_specified = range(len(data.columns)) |
171 pass | 229 if "element_rows" in params: |
172 # generate filter matrix of True/False values | 230 rows_specified = Utils.rangemaker(params["element_rows"]) |
173 bool_mat = op(data, value) | 231 else: |
232 rows_specified = range(len(data)) | |
233 | |
234 # Inclusive selection: | |
235 # - True: Giving a row or column will match all elements in that row or column | |
236 # - False: Give a row or column will match only elements in both those rows or columns | |
237 inclusive = params["element_inclusive"] | |
238 | |
239 # Create a bool matrix (intialised to False) with selected | |
240 # rows and columns set to True | |
241 bool_mat = data.copy() | |
242 bool_mat[:] = False | |
243 if inclusive: | |
244 bool_mat.iloc[rows_specified, :] = True | |
245 bool_mat.iloc[:, cols_specified] = True | |
246 else: | |
247 bool_mat.iloc[rows_specified, cols_specified] = True | |
248 | |
249 else: | |
250 op = Utils.getTwoValuePandaOp(operation, data) | |
251 value = params["element_value"] | |
252 try: | |
253 # Could be numeric | |
254 value = float(value) | |
255 except ValueError: | |
256 pass | |
257 # generate filter matrix of True/False values | |
258 bool_mat = op(data, value) | |
174 else: | 259 else: |
175 # implement no filtering through a filter matrix filled with | 260 # implement no filtering through a filter matrix filled with |
176 # True values. | 261 # True values. |
177 bool_mat = np.full(data.shape, True) | 262 bool_mat = np.full(data.shape, True) |
178 | 263 |
263 # Actual 0-based references "table[0]", "table[1]", etc. | 348 # Actual 0-based references "table[0]", "table[1]", etc. |
264 table_names_real = [] | 349 table_names_real = [] |
265 | 350 |
266 # Read and populate tables | 351 # Read and populate tables |
267 for x, t_sect in enumerate(table_sections): | 352 for x, t_sect in enumerate(table_sections): |
268 tmp = pd.read_csv( | 353 tmp = Utils.readcsv(t_sect, uc.Default["narm"]) |
269 t_sect["file"], | |
270 header=t_sect["header"], | |
271 index_col=t_sect["row_names"], | |
272 keep_default_na=uc.Default["narm"], | |
273 sep="\t" | |
274 ) | |
275 table.append(tmp) | 354 table.append(tmp) |
276 table_names.append("table" + str(x + 1)) | 355 table_names.append("table" + str(x + 1)) |
277 table_names_real.append("table[" + str(x) + "]") | 356 table_names_real.append("table[" + str(x) + "]") |
278 | 357 |
279 custom_op = params["fulltable_customop"] | 358 custom_op = params["fulltable_customop"] |