table_compute: scripts/table_compute.py comparison

comparison scripts/table_compute.py @ 1:dddadbbac949 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/table_compute commit 6820ec9431a22576f3716c40feeb27f0b8cf5e83"

author	iuc
date	Fri, 30 Aug 2019 05:28:18 -0400
parents	1b0f96ed73f2
children	02c3e335a695

comparison

equal deleted inserted replaced

-:1b0f96ed73f2
+:dddadbbac949
 #!/usr/bin/env python3
 """
 Table Compute tool - a wrapper around pandas with parameter input validation.
 """
-__version__ = "0.8"
+__version__ = "0.9.1"
 import csv
 import math
 from sys import argv
 import numpy as np
 import pandas as pd
-import userconfig as uc
 from safety import Safety
-# This should be generated in the same directory
-# Version command should not need to copy the config
 if len(argv) == 2 and argv[1] == "--version":
 print(__version__)
 exit(-1)
+# The import below should be generated in the same directory as
+# the table_compute.py script.
+# It is placed here so that the --version switch does not fail
+import userconfig as uc  # noqa: I100,I202
 class Utils:
 @staticmethod
 def getOneValueMathOp(op_name):
 "Returns a simple one value math operator such as log, sqrt, etc"
 @staticmethod
 def getTwoValuePandaOp(op_name, pd_obj):
 "Returns a valid two value DataFrame or Series operator"
 return getattr(type(pd_obj), "__" + op_name + "__")
+@staticmethod
-# Math is imported but not directly used because users
+def readcsv(filedict, narm):
-# may specify a "math.<function>" when inserting a custom
+data = pd.read_csv(
-# function. To remove linting errors, which break CI testing
+filedict["file"],
-# we will just use an arbitrary math statement here.
+header=filedict["header"],
-__ = math.log
+index_col=filedict["row_names"],
+keep_default_na=narm,
+nrows=filedict["nrows"],
+skipfooter=filedict["skipfooter"],
+skip_blank_lines=filedict["skip_blank_lines"],
+sep='\t'
+)
+# Fix whitespace issues in index or column names
+data.columns = [col.strip() if type(col) is str else col
+for col in data.columns]
+data.index = [row.strip() if type(row) is str else row
+for row in data.index]
+return(data)
+@staticmethod
+def rangemaker(tab):
+# e.g. "1:3,2:-2" specifies "1,2,3,2,1,0,-1,-2" to give [0,1,2,1,0,-1,-2]
+# Positive indices are decremented by 1 to reference 0-base numbering
+# Negative indices are unaltered, so that -1 refers to the last column
+out = []
+err_mess = None
+for ranges in tab.split(","):
+nums = ranges.split(":")
+if len(nums) == 1:
+numb = int(nums[0])
+# Positive numbers get decremented.
+# i.e. column "3" refers to index 2
+#      column "-1" still refers to index -1
+if numb != 0:
+out.append(numb if (numb < 0) else (numb - 1))
+else:
+err_mess = "Please do not use 0 as an index"
+elif len(nums) == 2:
+left, right = map(int, nums)
+if 0 in (left, right):
+err_mess = "Please do not use 0 as an index"
+elif left < right:
+if left > 0:  # and right > 0 too
+# 1:3 to 0,1,2
+out.extend(range(left - 1, right))
+elif right < 0:  # and left < 0 too
+# -3:-1 to -3,-2,-1
+out.extend(range(left, right + 1))
+elif left < 0 and right > 0:
+# -2:2 to -2,-1,0,1
+out.extend(range(left, 0))
+out.extend(range(0, right))
+elif right < left:
+if right > 0:  # and left > 0
+# 3:1 to 2,1,0
+out.extend(range(left - 1, right - 2, -1))
+elif left < 0:  # and right < 0
+# -1:-3 to -1,-2,-3
+out.extend(range(left, right - 1, -1))
+elif right < 0 and left > 0:
+# 2:-2 to 1,0,-1,-2
+out.extend(range(left - 1, right - 1, -1))
+else:
+err_mess = "%s should not be equal or contain a zero" % nums
+if err_mess:
+print(err_mess)
+return(None)
+return(out)
 # Set decimal precision
 pd.options.display.precision = uc.Default["precision"]
 out_table = None
 params = uc.Data["params"]
 if user_mode == "single":
 # Read in TSV file
-data = pd.read_csv(
+data = Utils.readcsv(uc.Data["tables"][0], uc.Default["narm"])
-uc.Data["tables"][0]["reader_file"],
-header=uc.Data["tables"][0]["reader_header"],
-index_col=uc.Data["tables"][0]["reader_row_col"],
-keep_default_na=uc.Default["narm"],
-sep='\t'
-)
-# Fix whitespace issues in index or column names
-data.columns = [col.strip() if type(col) is str else col
-for col in data.columns]
-data.index = [row.strip() if type(row) is str else row
-for row in data.index]
 user_mode_single = params["user_mode_single"]
 if user_mode_single == "precision":
 # Useful for changing decimal precision on write out
 out_table = data
 elif user_mode_single == "select":
 cols_specified = params["select_cols_wanted"]
 rows_specified = params["select_rows_wanted"]
 # Select all indexes if empty array of values
-if not cols_specified:
+if cols_specified:
+cols_specified = Utils.rangemaker(cols_specified)
+else:
 cols_specified = range(len(data.columns))
-if not rows_specified:
+if rows_specified:
+rows_specified = Utils.rangemaker(rows_specified)
+else:
 rows_specified = range(len(data))
 # do not use duplicate indexes
 # e.g. [2,3,2,5,5,4,2] to [2,3,5,4]
 nodupes_col = not params["select_cols_unique"]
 out_table = op(data, axis)
 elif user_mode_single == "element":
 # lt, gt, ge, etc.
 operation = params["element_op"]
+bool_mat = None
 if operation is not None:
-op = Utils.getTwoValuePandaOp(operation, data)
+if operation == "rowcol":
-value = params["element_value"]
+# Select all indexes if empty array of values
-try:
+if "element_cols" in params:
-# Could be numeric
+cols_specified = Utils.rangemaker(params["element_cols"])
-value = float(value)
+else:
-except ValueError:
+cols_specified = range(len(data.columns))
-pass
+if "element_rows" in params:
-# generate filter matrix of True/False values
+rows_specified = Utils.rangemaker(params["element_rows"])
-bool_mat = op(data, value)
+else:
+rows_specified = range(len(data))
+# Inclusive selection:
+# - True: Giving a row or column will match all elements in that row or column
+# - False: Give a row or column will match only elements in both those rows or columns
+inclusive = params["element_inclusive"]
+# Create a bool matrix (intialised to False) with selected
+# rows and columns set to True
+bool_mat = data.copy()
+bool_mat[:] = False
+if inclusive:
+bool_mat.iloc[rows_specified, :] = True
+bool_mat.iloc[:, cols_specified] = True
+else:
+bool_mat.iloc[rows_specified, cols_specified] = True
+else:
+op = Utils.getTwoValuePandaOp(operation, data)
+value = params["element_value"]
+try:
+# Could be numeric
+value = float(value)
+except ValueError:
+pass
+# generate filter matrix of True/False values
+bool_mat = op(data, value)
 else:
 # implement no filtering through a filter matrix filled with
 # True values.
 bool_mat = np.full(data.shape, True)
 # Actual 0-based references "table[0]", "table[1]", etc.
 table_names_real = []
 # Read and populate tables
 for x, t_sect in enumerate(table_sections):
-tmp = pd.read_csv(
+tmp = Utils.readcsv(t_sect, uc.Default["narm"])
-t_sect["file"],
-header=t_sect["header"],
-index_col=t_sect["row_names"],
-keep_default_na=uc.Default["narm"],
-sep="\t"
-)
 table.append(tmp)
 table_names.append("table" + str(x + 1))
 table_names_real.append("table[" + str(x) + "]")
 custom_op = params["fulltable_customop"]

Mercurial > repos > iuc > table_compute

comparison scripts/table_compute.py @ 1:dddadbbac949 draft