Mercurial > repos > iuc > table_compute
comparison scripts/table_compute.py @ 1:dddadbbac949 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/table_compute commit 6820ec9431a22576f3716c40feeb27f0b8cf5e83"
| author | iuc |
|---|---|
| date | Fri, 30 Aug 2019 05:28:18 -0400 |
| parents | 1b0f96ed73f2 |
| children | 02c3e335a695 |
comparison
equal
deleted
inserted
replaced
| 0:1b0f96ed73f2 | 1:dddadbbac949 |
|---|---|
| 1 #!/usr/bin/env python3 | 1 #!/usr/bin/env python3 |
| 2 """ | 2 """ |
| 3 Table Compute tool - a wrapper around pandas with parameter input validation. | 3 Table Compute tool - a wrapper around pandas with parameter input validation. |
| 4 """ | 4 """ |
| 5 | 5 |
| 6 __version__ = "0.8" | 6 |
| 7 __version__ = "0.9.1" | |
| 7 | 8 |
| 8 import csv | 9 import csv |
| 9 import math | 10 import math |
| 10 from sys import argv | 11 from sys import argv |
| 11 | 12 |
| 12 import numpy as np | 13 import numpy as np |
| 13 import pandas as pd | 14 import pandas as pd |
| 14 import userconfig as uc | |
| 15 from safety import Safety | 15 from safety import Safety |
| 16 # This should be generated in the same directory | 16 |
| 17 | |
| 18 # Version command should not need to copy the config | |
| 19 if len(argv) == 2 and argv[1] == "--version": | 17 if len(argv) == 2 and argv[1] == "--version": |
| 20 print(__version__) | 18 print(__version__) |
| 21 exit(-1) | 19 exit(-1) |
| 22 | 20 |
| 21 # The import below should be generated in the same directory as | |
| 22 # the table_compute.py script. | |
| 23 # It is placed here so that the --version switch does not fail | |
| 24 import userconfig as uc # noqa: I100,I202 | |
| 25 | |
| 23 | 26 |
| 24 class Utils: | 27 class Utils: |
| 25 @staticmethod | 28 @staticmethod |
| 26 def getOneValueMathOp(op_name): | 29 def getOneValueMathOp(op_name): |
| 27 "Returns a simple one value math operator such as log, sqrt, etc" | 30 "Returns a simple one value math operator such as log, sqrt, etc" |
| 35 @staticmethod | 38 @staticmethod |
| 36 def getTwoValuePandaOp(op_name, pd_obj): | 39 def getTwoValuePandaOp(op_name, pd_obj): |
| 37 "Returns a valid two value DataFrame or Series operator" | 40 "Returns a valid two value DataFrame or Series operator" |
| 38 return getattr(type(pd_obj), "__" + op_name + "__") | 41 return getattr(type(pd_obj), "__" + op_name + "__") |
| 39 | 42 |
| 40 | 43 @staticmethod |
| 41 # Math is imported but not directly used because users | 44 def readcsv(filedict, narm): |
| 42 # may specify a "math.<function>" when inserting a custom | 45 data = pd.read_csv( |
| 43 # function. To remove linting errors, which break CI testing | 46 filedict["file"], |
| 44 # we will just use an arbitrary math statement here. | 47 header=filedict["header"], |
| 45 __ = math.log | 48 index_col=filedict["row_names"], |
| 49 keep_default_na=narm, | |
| 50 nrows=filedict["nrows"], | |
| 51 skipfooter=filedict["skipfooter"], | |
| 52 skip_blank_lines=filedict["skip_blank_lines"], | |
| 53 sep='\t' | |
| 54 ) | |
| 55 # Fix whitespace issues in index or column names | |
| 56 data.columns = [col.strip() if type(col) is str else col | |
| 57 for col in data.columns] | |
| 58 data.index = [row.strip() if type(row) is str else row | |
| 59 for row in data.index] | |
| 60 return(data) | |
| 61 | |
| 62 @staticmethod | |
| 63 def rangemaker(tab): | |
| 64 # e.g. "1:3,2:-2" specifies "1,2,3,2,1,0,-1,-2" to give [0,1,2,1,0,-1,-2] | |
| 65 # Positive indices are decremented by 1 to reference 0-base numbering | |
| 66 # Negative indices are unaltered, so that -1 refers to the last column | |
| 67 out = [] | |
| 68 err_mess = None | |
| 69 for ranges in tab.split(","): | |
| 70 nums = ranges.split(":") | |
| 71 if len(nums) == 1: | |
| 72 numb = int(nums[0]) | |
| 73 # Positive numbers get decremented. | |
| 74 # i.e. column "3" refers to index 2 | |
| 75 # column "-1" still refers to index -1 | |
| 76 if numb != 0: | |
| 77 out.append(numb if (numb < 0) else (numb - 1)) | |
| 78 else: | |
| 79 err_mess = "Please do not use 0 as an index" | |
| 80 elif len(nums) == 2: | |
| 81 left, right = map(int, nums) | |
| 82 if 0 in (left, right): | |
| 83 err_mess = "Please do not use 0 as an index" | |
| 84 elif left < right: | |
| 85 if left > 0: # and right > 0 too | |
| 86 # 1:3 to 0,1,2 | |
| 87 out.extend(range(left - 1, right)) | |
| 88 elif right < 0: # and left < 0 too | |
| 89 # -3:-1 to -3,-2,-1 | |
| 90 out.extend(range(left, right + 1)) | |
| 91 elif left < 0 and right > 0: | |
| 92 # -2:2 to -2,-1,0,1 | |
| 93 out.extend(range(left, 0)) | |
| 94 out.extend(range(0, right)) | |
| 95 elif right < left: | |
| 96 if right > 0: # and left > 0 | |
| 97 # 3:1 to 2,1,0 | |
| 98 out.extend(range(left - 1, right - 2, -1)) | |
| 99 elif left < 0: # and right < 0 | |
| 100 # -1:-3 to -1,-2,-3 | |
| 101 out.extend(range(left, right - 1, -1)) | |
| 102 elif right < 0 and left > 0: | |
| 103 # 2:-2 to 1,0,-1,-2 | |
| 104 out.extend(range(left - 1, right - 1, -1)) | |
| 105 else: | |
| 106 err_mess = "%s should not be equal or contain a zero" % nums | |
| 107 if err_mess: | |
| 108 print(err_mess) | |
| 109 return(None) | |
| 110 return(out) | |
| 46 | 111 |
| 47 | 112 |
| 48 # Set decimal precision | 113 # Set decimal precision |
| 49 pd.options.display.precision = uc.Default["precision"] | 114 pd.options.display.precision = uc.Default["precision"] |
| 50 | 115 |
| 53 out_table = None | 118 out_table = None |
| 54 params = uc.Data["params"] | 119 params = uc.Data["params"] |
| 55 | 120 |
| 56 if user_mode == "single": | 121 if user_mode == "single": |
| 57 # Read in TSV file | 122 # Read in TSV file |
| 58 data = pd.read_csv( | 123 data = Utils.readcsv(uc.Data["tables"][0], uc.Default["narm"]) |
| 59 uc.Data["tables"][0]["reader_file"], | |
| 60 header=uc.Data["tables"][0]["reader_header"], | |
| 61 index_col=uc.Data["tables"][0]["reader_row_col"], | |
| 62 keep_default_na=uc.Default["narm"], | |
| 63 sep='\t' | |
| 64 ) | |
| 65 # Fix whitespace issues in index or column names | |
| 66 data.columns = [col.strip() if type(col) is str else col | |
| 67 for col in data.columns] | |
| 68 data.index = [row.strip() if type(row) is str else row | |
| 69 for row in data.index] | |
| 70 | |
| 71 user_mode_single = params["user_mode_single"] | 124 user_mode_single = params["user_mode_single"] |
| 72 | 125 |
| 73 if user_mode_single == "precision": | 126 if user_mode_single == "precision": |
| 74 # Useful for changing decimal precision on write out | 127 # Useful for changing decimal precision on write out |
| 75 out_table = data | 128 out_table = data |
| 77 elif user_mode_single == "select": | 130 elif user_mode_single == "select": |
| 78 cols_specified = params["select_cols_wanted"] | 131 cols_specified = params["select_cols_wanted"] |
| 79 rows_specified = params["select_rows_wanted"] | 132 rows_specified = params["select_rows_wanted"] |
| 80 | 133 |
| 81 # Select all indexes if empty array of values | 134 # Select all indexes if empty array of values |
| 82 if not cols_specified: | 135 if cols_specified: |
| 136 cols_specified = Utils.rangemaker(cols_specified) | |
| 137 else: | |
| 83 cols_specified = range(len(data.columns)) | 138 cols_specified = range(len(data.columns)) |
| 84 if not rows_specified: | 139 if rows_specified: |
| 140 rows_specified = Utils.rangemaker(rows_specified) | |
| 141 else: | |
| 85 rows_specified = range(len(data)) | 142 rows_specified = range(len(data)) |
| 86 | 143 |
| 87 # do not use duplicate indexes | 144 # do not use duplicate indexes |
| 88 # e.g. [2,3,2,5,5,4,2] to [2,3,5,4] | 145 # e.g. [2,3,2,5,5,4,2] to [2,3,5,4] |
| 89 nodupes_col = not params["select_cols_unique"] | 146 nodupes_col = not params["select_cols_unique"] |
| 159 out_table = op(data, axis) | 216 out_table = op(data, axis) |
| 160 | 217 |
| 161 elif user_mode_single == "element": | 218 elif user_mode_single == "element": |
| 162 # lt, gt, ge, etc. | 219 # lt, gt, ge, etc. |
| 163 operation = params["element_op"] | 220 operation = params["element_op"] |
| 221 bool_mat = None | |
| 164 if operation is not None: | 222 if operation is not None: |
| 165 op = Utils.getTwoValuePandaOp(operation, data) | 223 if operation == "rowcol": |
| 166 value = params["element_value"] | 224 # Select all indexes if empty array of values |
| 167 try: | 225 if "element_cols" in params: |
| 168 # Could be numeric | 226 cols_specified = Utils.rangemaker(params["element_cols"]) |
| 169 value = float(value) | 227 else: |
| 170 except ValueError: | 228 cols_specified = range(len(data.columns)) |
| 171 pass | 229 if "element_rows" in params: |
| 172 # generate filter matrix of True/False values | 230 rows_specified = Utils.rangemaker(params["element_rows"]) |
| 173 bool_mat = op(data, value) | 231 else: |
| 232 rows_specified = range(len(data)) | |
| 233 | |
| 234 # Inclusive selection: | |
| 235 # - True: Giving a row or column will match all elements in that row or column | |
| 236 # - False: Give a row or column will match only elements in both those rows or columns | |
| 237 inclusive = params["element_inclusive"] | |
| 238 | |
| 239 # Create a bool matrix (intialised to False) with selected | |
| 240 # rows and columns set to True | |
| 241 bool_mat = data.copy() | |
| 242 bool_mat[:] = False | |
| 243 if inclusive: | |
| 244 bool_mat.iloc[rows_specified, :] = True | |
| 245 bool_mat.iloc[:, cols_specified] = True | |
| 246 else: | |
| 247 bool_mat.iloc[rows_specified, cols_specified] = True | |
| 248 | |
| 249 else: | |
| 250 op = Utils.getTwoValuePandaOp(operation, data) | |
| 251 value = params["element_value"] | |
| 252 try: | |
| 253 # Could be numeric | |
| 254 value = float(value) | |
| 255 except ValueError: | |
| 256 pass | |
| 257 # generate filter matrix of True/False values | |
| 258 bool_mat = op(data, value) | |
| 174 else: | 259 else: |
| 175 # implement no filtering through a filter matrix filled with | 260 # implement no filtering through a filter matrix filled with |
| 176 # True values. | 261 # True values. |
| 177 bool_mat = np.full(data.shape, True) | 262 bool_mat = np.full(data.shape, True) |
| 178 | 263 |
| 263 # Actual 0-based references "table[0]", "table[1]", etc. | 348 # Actual 0-based references "table[0]", "table[1]", etc. |
| 264 table_names_real = [] | 349 table_names_real = [] |
| 265 | 350 |
| 266 # Read and populate tables | 351 # Read and populate tables |
| 267 for x, t_sect in enumerate(table_sections): | 352 for x, t_sect in enumerate(table_sections): |
| 268 tmp = pd.read_csv( | 353 tmp = Utils.readcsv(t_sect, uc.Default["narm"]) |
| 269 t_sect["file"], | |
| 270 header=t_sect["header"], | |
| 271 index_col=t_sect["row_names"], | |
| 272 keep_default_na=uc.Default["narm"], | |
| 273 sep="\t" | |
| 274 ) | |
| 275 table.append(tmp) | 354 table.append(tmp) |
| 276 table_names.append("table" + str(x + 1)) | 355 table_names.append("table" + str(x + 1)) |
| 277 table_names_real.append("table[" + str(x) + "]") | 356 table_names_real.append("table[" + str(x) + "]") |
| 278 | 357 |
| 279 custom_op = params["fulltable_customop"] | 358 custom_op = params["fulltable_customop"] |
