Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/rule_parsing.py @ 4:41f35c2f0c7b draft
Uploaded
| author | luca_milaz |
|---|---|
| date | Wed, 18 Sep 2024 10:59:10 +0000 |
| parents | |
| children | a6e45049c1b9 |
comparison
equal
deleted
inserted
replaced
| 3:1f3ac6fd9867 | 4:41f35c2f0c7b |
|---|---|
| 1 from enum import Enum | |
| 2 import utils.general_utils as utils | |
| 3 from typing import List, Union, Optional | |
| 4 | |
| 5 class RuleErr(utils.CustomErr): | |
| 6 """ | |
| 7 CustomErr subclass for rule syntax errors. | |
| 8 """ | |
| 9 errName = "Rule Syntax Error" | |
| 10 def __init__(self, rule :str, msg = "no further details provided") -> None: | |
| 11 super().__init__( | |
| 12 f"rule \"{rule}\" is malformed, {msg}", | |
| 13 "please verify your input follows the validity guidelines") | |
| 14 | |
| 15 class RuleOp(Enum): | |
| 16 """ | |
| 17 Encodes all operators valid in gene rules. | |
| 18 """ | |
| 19 OR = "or" | |
| 20 AND = "and" | |
| 21 | |
| 22 @classmethod | |
| 23 def isOperator(cls, op :str) -> bool: | |
| 24 return op.upper() in cls.__members__ | |
| 25 | |
| 26 def __str__(self) -> str: return self.value | |
| 27 | |
| 28 class OpList(List[Union[str, "OpList"]]): | |
| 29 """ | |
| 30 Represents a parsed rule and each of its nesting levels, including the operator that level uses. | |
| 31 """ | |
| 32 def __init__(self, op :Optional[RuleOp] = None) -> None: | |
| 33 """ | |
| 34 (Private) Initializes an instance of OpList. | |
| 35 | |
| 36 Args: | |
| 37 op (str): Operator to be assigned to the OpList. Defaults to "". | |
| 38 | |
| 39 Returns: | |
| 40 None : practically, an OpList instance. | |
| 41 """ | |
| 42 self.op = op | |
| 43 | |
| 44 def setOpIfMissing(self, op :RuleOp) -> None: | |
| 45 """ | |
| 46 Sets the operator of the OpList if it's missing. | |
| 47 | |
| 48 Args: | |
| 49 op (str): Operator to be assigned to the OpList. | |
| 50 | |
| 51 Returns: | |
| 52 None | |
| 53 """ | |
| 54 if not self.op: self.op = op | |
| 55 | |
| 56 def __repr__(self, indent = "") -> str: | |
| 57 """ | |
| 58 (Private) Returns a string representation of the current OpList instance. | |
| 59 | |
| 60 Args: | |
| 61 indent (str): Indentation level . Defaults to "". | |
| 62 | |
| 63 Returns: | |
| 64 str: A string representation of the current OpList instance. | |
| 65 """ | |
| 66 nextIndent = indent + " " | |
| 67 return f"<{self.op}>[\n" + ",\n".join([ | |
| 68 f"{nextIndent}{item.__repr__(nextIndent) if isinstance(item, OpList) else item}" | |
| 69 for item in self ]) + f"\n{indent}]" | |
| 70 | |
| 71 class RuleStack: | |
| 72 """ | |
| 73 FILO stack structure to save the intermediate representation of a Rule during parsing, with the | |
| 74 current nesting level at the top of the stack. | |
| 75 """ | |
| 76 def __init__(self) -> None: | |
| 77 """ | |
| 78 (Private) initializes an instance of RuleStack. | |
| 79 | |
| 80 Returns: | |
| 81 None : practically, a RuleStack instance. | |
| 82 """ | |
| 83 self.__stack = [OpList()] # the stack starts out with the result list already allocated | |
| 84 self.__updateCurrent() | |
| 85 | |
| 86 def pop(self) -> None: | |
| 87 """ | |
| 88 Removes the OpList on top of the stack, also flattening it once when possible. | |
| 89 | |
| 90 Side Effects: | |
| 91 self : mut | |
| 92 | |
| 93 Returns: | |
| 94 None | |
| 95 """ | |
| 96 oldTop = self.__stack.pop() | |
| 97 if len(oldTop) == 1 and isinstance(oldTop[0], OpList): self.__stack[-1][-1] = oldTop[0] | |
| 98 self.__updateCurrent() | |
| 99 | |
| 100 def push(self, operator = "") -> None: | |
| 101 """ | |
| 102 Adds a new nesting level, in the form of a new OpList on top of the stack. | |
| 103 | |
| 104 Args: | |
| 105 operator : the operator assigned to the new OpList. | |
| 106 | |
| 107 Side Effects: | |
| 108 self : mut | |
| 109 | |
| 110 Returns: | |
| 111 None | |
| 112 """ | |
| 113 newLevel = OpList(operator) | |
| 114 self.current.append(newLevel) | |
| 115 self.__stack.append(newLevel) | |
| 116 self.__updateCurrent() | |
| 117 | |
| 118 def popForward(self) -> None: | |
| 119 """ | |
| 120 Moves the last "actual" item from the 2nd to last list to the beginning of the top list, as per | |
| 121 the example below: | |
| 122 stack : [list_a, list_b] | |
| 123 list_a : [item1, item2, list_b] --> [item1, list_b] | |
| 124 list_b : [item3, item4] --> [item2, item3, item4] | |
| 125 | |
| 126 This is essentially a "give back as needed" operation. | |
| 127 | |
| 128 Side Effects: | |
| 129 self : mut | |
| 130 | |
| 131 Returns: | |
| 132 None | |
| 133 """ | |
| 134 self.current.insert(0, self.__stack[-2].pop(-2)) | |
| 135 | |
| 136 def currentIsAnd(self) -> bool: | |
| 137 """ | |
| 138 Checks if the current OpList's assigned operator is "and". | |
| 139 | |
| 140 Returns: | |
| 141 bool : True if the current OpList's assigned operator is "and", False otherwise. | |
| 142 """ | |
| 143 return self.current.op is RuleOp.AND | |
| 144 | |
| 145 def obtain(self, err :Optional[utils.CustomErr] = None) -> Optional[OpList]: | |
| 146 """ | |
| 147 Obtains the first OpList on the stack, only if it's the only element. | |
| 148 | |
| 149 Args: | |
| 150 err : The error to raise if obtaining the result is not possible. | |
| 151 | |
| 152 Side Effects: | |
| 153 self : mut | |
| 154 | |
| 155 Raises: | |
| 156 err: If given, otherwise None is returned. | |
| 157 | |
| 158 Returns: | |
| 159 Optional[OpList]: The first OpList on the stack, only if it's the only element. | |
| 160 """ | |
| 161 | |
| 162 if len(self.__stack) == 1: return self.__stack.pop() | |
| 163 if err: raise err | |
| 164 return None | |
| 165 | |
| 166 def __updateCurrent(self) -> None: | |
| 167 """ | |
| 168 (Private) Updates the current OpList to the one on top of the stack. | |
| 169 | |
| 170 Side Effects: | |
| 171 self : mut | |
| 172 | |
| 173 Returns: | |
| 174 None | |
| 175 """ | |
| 176 self.current = self.__stack[-1] | |
| 177 | |
| 178 def parseRuleToNestedList(rule :str) -> OpList: | |
| 179 """ | |
| 180 Parse a single rule from its string representation to an OpList, making all priority explicit | |
| 181 through nesting levels. | |
| 182 | |
| 183 Args: | |
| 184 rule : the string representation of a rule to be parsed. | |
| 185 | |
| 186 Raises: | |
| 187 RuleErr : whenever something goes wrong during parsing. | |
| 188 | |
| 189 Returns: | |
| 190 OpList : the parsed rule. | |
| 191 """ | |
| 192 source = iter(rule | |
| 193 .replace("(", "( ").replace(")", " )") # Single out parens as words | |
| 194 .strip() # remove whitespace at extremities | |
| 195 .split()) # split by spaces | |
| 196 | |
| 197 stack = RuleStack() | |
| 198 nestingErr = RuleErr(rule, "mismatch between open and closed parentheses") | |
| 199 try: | |
| 200 while True: # keep reading until source ends | |
| 201 while True: | |
| 202 operand = next(source, None) # expected name or rule opening | |
| 203 if operand is None: raise RuleErr(rule, "found trailing open parentheses") | |
| 204 if operand == "and" or operand == "or" or operand == ")": # found operator instead, panic | |
| 205 raise RuleErr(rule, f"found \"{operand}\" in unexpected position") | |
| 206 | |
| 207 if operand != "(": break # found name | |
| 208 | |
| 209 # found rule opening, we add new nesting level but don't know the operator | |
| 210 stack.push() | |
| 211 | |
| 212 stack.current.append(operand) | |
| 213 | |
| 214 while True: # keep reading until operator is found or source ends | |
| 215 operator = next(source, None) # expected operator or rule closing | |
| 216 if operator and operator != ")": break # found operator | |
| 217 | |
| 218 if stack.currentIsAnd(): stack.pop() # we close the "and" chain | |
| 219 | |
| 220 if not operator: break | |
| 221 stack.pop() # we close the parentheses | |
| 222 | |
| 223 # we proceed with operator: | |
| 224 if not operator: break # there is no such thing as a double loop break.. yet | |
| 225 | |
| 226 if not RuleOp.isOperator(operator): raise RuleErr( | |
| 227 rule, f"found \"{operator}\" in unexpected position, expected operator") | |
| 228 | |
| 229 operator = RuleOp(operator) | |
| 230 if operator is RuleOp.OR and stack.currentIsAnd(): | |
| 231 stack.pop() | |
| 232 | |
| 233 elif operator is RuleOp.AND and not stack.currentIsAnd(): | |
| 234 stack.push(operator) | |
| 235 stack.popForward() | |
| 236 | |
| 237 stack.current.setOpIfMissing(operator) # buffer now knows what operator its data had | |
| 238 | |
| 239 except RuleErr as err: raise err # bubble up proper errors | |
| 240 except: raise nestingErr # everything else is interpreted as a nesting error. | |
| 241 | |
| 242 parsedRule = stack.obtain(nestingErr) | |
| 243 return parsedRule[0] if len(parsedRule) == 1 and isinstance(parsedRule[0], list) else parsedRule |
