comparison scripts/table_compute.py @ 1:dddadbbac949 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/table_compute commit 6820ec9431a22576f3716c40feeb27f0b8cf5e83"
author iuc
date Fri, 30 Aug 2019 05:28:18 -0400
parents 1b0f96ed73f2
children 02c3e335a695
comparison
equal deleted inserted replaced
0:1b0f96ed73f2 1:dddadbbac949
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 """ 2 """
3 Table Compute tool - a wrapper around pandas with parameter input validation. 3 Table Compute tool - a wrapper around pandas with parameter input validation.
4 """ 4 """
5 5
6 __version__ = "0.8" 6
7 __version__ = "0.9.1"
7 8
8 import csv 9 import csv
9 import math 10 import math
10 from sys import argv 11 from sys import argv
11 12
12 import numpy as np 13 import numpy as np
13 import pandas as pd 14 import pandas as pd
14 import userconfig as uc
15 from safety import Safety 15 from safety import Safety
16 # This should be generated in the same directory 16
17
18 # Version command should not need to copy the config
19 if len(argv) == 2 and argv[1] == "--version": 17 if len(argv) == 2 and argv[1] == "--version":
20 print(__version__) 18 print(__version__)
21 exit(-1) 19 exit(-1)
22 20
21 # The import below should be generated in the same directory as
22 # the table_compute.py script.
23 # It is placed here so that the --version switch does not fail
24 import userconfig as uc # noqa: I100,I202
25
23 26
24 class Utils: 27 class Utils:
25 @staticmethod 28 @staticmethod
26 def getOneValueMathOp(op_name): 29 def getOneValueMathOp(op_name):
27 "Returns a simple one value math operator such as log, sqrt, etc" 30 "Returns a simple one value math operator such as log, sqrt, etc"
35 @staticmethod 38 @staticmethod
36 def getTwoValuePandaOp(op_name, pd_obj): 39 def getTwoValuePandaOp(op_name, pd_obj):
37 "Returns a valid two value DataFrame or Series operator" 40 "Returns a valid two value DataFrame or Series operator"
38 return getattr(type(pd_obj), "__" + op_name + "__") 41 return getattr(type(pd_obj), "__" + op_name + "__")
39 42
40 43 @staticmethod
41 # Math is imported but not directly used because users 44 def readcsv(filedict, narm):
42 # may specify a "math.<function>" when inserting a custom 45 data = pd.read_csv(
43 # function. To remove linting errors, which break CI testing 46 filedict["file"],
44 # we will just use an arbitrary math statement here. 47 header=filedict["header"],
45 __ = math.log 48 index_col=filedict["row_names"],
49 keep_default_na=narm,
50 nrows=filedict["nrows"],
51 skipfooter=filedict["skipfooter"],
52 skip_blank_lines=filedict["skip_blank_lines"],
53 sep='\t'
54 )
55 # Fix whitespace issues in index or column names
56 data.columns = [col.strip() if type(col) is str else col
57 for col in data.columns]
58 data.index = [row.strip() if type(row) is str else row
59 for row in data.index]
60 return(data)
61
62 @staticmethod
63 def rangemaker(tab):
64 # e.g. "1:3,2:-2" specifies "1,2,3,2,1,0,-1,-2" to give [0,1,2,1,0,-1,-2]
65 # Positive indices are decremented by 1 to reference 0-base numbering
66 # Negative indices are unaltered, so that -1 refers to the last column
67 out = []
68 err_mess = None
69 for ranges in tab.split(","):
70 nums = ranges.split(":")
71 if len(nums) == 1:
72 numb = int(nums[0])
73 # Positive numbers get decremented.
74 # i.e. column "3" refers to index 2
75 # column "-1" still refers to index -1
76 if numb != 0:
77 out.append(numb if (numb < 0) else (numb - 1))
78 else:
79 err_mess = "Please do not use 0 as an index"
80 elif len(nums) == 2:
81 left, right = map(int, nums)
82 if 0 in (left, right):
83 err_mess = "Please do not use 0 as an index"
84 elif left < right:
85 if left > 0: # and right > 0 too
86 # 1:3 to 0,1,2
87 out.extend(range(left - 1, right))
88 elif right < 0: # and left < 0 too
89 # -3:-1 to -3,-2,-1
90 out.extend(range(left, right + 1))
91 elif left < 0 and right > 0:
92 # -2:2 to -2,-1,0,1
93 out.extend(range(left, 0))
94 out.extend(range(0, right))
95 elif right < left:
96 if right > 0: # and left > 0
97 # 3:1 to 2,1,0
98 out.extend(range(left - 1, right - 2, -1))
99 elif left < 0: # and right < 0
100 # -1:-3 to -1,-2,-3
101 out.extend(range(left, right - 1, -1))
102 elif right < 0 and left > 0:
103 # 2:-2 to 1,0,-1,-2
104 out.extend(range(left - 1, right - 1, -1))
105 else:
106 err_mess = "%s should not be equal or contain a zero" % nums
107 if err_mess:
108 print(err_mess)
109 return(None)
110 return(out)
46 111
47 112
48 # Set decimal precision 113 # Set decimal precision
49 pd.options.display.precision = uc.Default["precision"] 114 pd.options.display.precision = uc.Default["precision"]
50 115
53 out_table = None 118 out_table = None
54 params = uc.Data["params"] 119 params = uc.Data["params"]
55 120
56 if user_mode == "single": 121 if user_mode == "single":
57 # Read in TSV file 122 # Read in TSV file
58 data = pd.read_csv( 123 data = Utils.readcsv(uc.Data["tables"][0], uc.Default["narm"])
59 uc.Data["tables"][0]["reader_file"],
60 header=uc.Data["tables"][0]["reader_header"],
61 index_col=uc.Data["tables"][0]["reader_row_col"],
62 keep_default_na=uc.Default["narm"],
63 sep='\t'
64 )
65 # Fix whitespace issues in index or column names
66 data.columns = [col.strip() if type(col) is str else col
67 for col in data.columns]
68 data.index = [row.strip() if type(row) is str else row
69 for row in data.index]
70
71 user_mode_single = params["user_mode_single"] 124 user_mode_single = params["user_mode_single"]
72 125
73 if user_mode_single == "precision": 126 if user_mode_single == "precision":
74 # Useful for changing decimal precision on write out 127 # Useful for changing decimal precision on write out
75 out_table = data 128 out_table = data
77 elif user_mode_single == "select": 130 elif user_mode_single == "select":
78 cols_specified = params["select_cols_wanted"] 131 cols_specified = params["select_cols_wanted"]
79 rows_specified = params["select_rows_wanted"] 132 rows_specified = params["select_rows_wanted"]
80 133
81 # Select all indexes if empty array of values 134 # Select all indexes if empty array of values
82 if not cols_specified: 135 if cols_specified:
136 cols_specified = Utils.rangemaker(cols_specified)
137 else:
83 cols_specified = range(len(data.columns)) 138 cols_specified = range(len(data.columns))
84 if not rows_specified: 139 if rows_specified:
140 rows_specified = Utils.rangemaker(rows_specified)
141 else:
85 rows_specified = range(len(data)) 142 rows_specified = range(len(data))
86 143
87 # do not use duplicate indexes 144 # do not use duplicate indexes
88 # e.g. [2,3,2,5,5,4,2] to [2,3,5,4] 145 # e.g. [2,3,2,5,5,4,2] to [2,3,5,4]
89 nodupes_col = not params["select_cols_unique"] 146 nodupes_col = not params["select_cols_unique"]
159 out_table = op(data, axis) 216 out_table = op(data, axis)
160 217
161 elif user_mode_single == "element": 218 elif user_mode_single == "element":
162 # lt, gt, ge, etc. 219 # lt, gt, ge, etc.
163 operation = params["element_op"] 220 operation = params["element_op"]
221 bool_mat = None
164 if operation is not None: 222 if operation is not None:
165 op = Utils.getTwoValuePandaOp(operation, data) 223 if operation == "rowcol":
166 value = params["element_value"] 224 # Select all indexes if empty array of values
167 try: 225 if "element_cols" in params:
168 # Could be numeric 226 cols_specified = Utils.rangemaker(params["element_cols"])
169 value = float(value) 227 else:
170 except ValueError: 228 cols_specified = range(len(data.columns))
171 pass 229 if "element_rows" in params:
172 # generate filter matrix of True/False values 230 rows_specified = Utils.rangemaker(params["element_rows"])
173 bool_mat = op(data, value) 231 else:
232 rows_specified = range(len(data))
233
234 # Inclusive selection:
235 # - True: Giving a row or column will match all elements in that row or column
236 # - False: Give a row or column will match only elements in both those rows or columns
237 inclusive = params["element_inclusive"]
238
239 # Create a bool matrix (intialised to False) with selected
240 # rows and columns set to True
241 bool_mat = data.copy()
242 bool_mat[:] = False
243 if inclusive:
244 bool_mat.iloc[rows_specified, :] = True
245 bool_mat.iloc[:, cols_specified] = True
246 else:
247 bool_mat.iloc[rows_specified, cols_specified] = True
248
249 else:
250 op = Utils.getTwoValuePandaOp(operation, data)
251 value = params["element_value"]
252 try:
253 # Could be numeric
254 value = float(value)
255 except ValueError:
256 pass
257 # generate filter matrix of True/False values
258 bool_mat = op(data, value)
174 else: 259 else:
175 # implement no filtering through a filter matrix filled with 260 # implement no filtering through a filter matrix filled with
176 # True values. 261 # True values.
177 bool_mat = np.full(data.shape, True) 262 bool_mat = np.full(data.shape, True)
178 263
263 # Actual 0-based references "table[0]", "table[1]", etc. 348 # Actual 0-based references "table[0]", "table[1]", etc.
264 table_names_real = [] 349 table_names_real = []
265 350
266 # Read and populate tables 351 # Read and populate tables
267 for x, t_sect in enumerate(table_sections): 352 for x, t_sect in enumerate(table_sections):
268 tmp = pd.read_csv( 353 tmp = Utils.readcsv(t_sect, uc.Default["narm"])
269 t_sect["file"],
270 header=t_sect["header"],
271 index_col=t_sect["row_names"],
272 keep_default_na=uc.Default["narm"],
273 sep="\t"
274 )
275 table.append(tmp) 354 table.append(tmp)
276 table_names.append("table" + str(x + 1)) 355 table_names.append("table" + str(x + 1))
277 table_names_real.append("table[" + str(x) + "]") 356 table_names_real.append("table[" + str(x) + "]")
278 357
279 custom_op = params["fulltable_customop"] 358 custom_op = params["fulltable_customop"]