Mercurial > repos > bimib > cobraxy
diff COBRAxy/utils/rule_parsing.py @ 456:a6e45049c1b9 draft default tip
Uploaded
author | francesco_lapi |
---|---|
date | Fri, 12 Sep 2025 17:28:45 +0000 |
parents | 41f35c2f0c7b |
children |
line wrap: on
line diff
--- a/COBRAxy/utils/rule_parsing.py Fri Sep 12 15:05:54 2025 +0000 +++ b/COBRAxy/utils/rule_parsing.py Fri Sep 12 17:28:45 2025 +0000 @@ -1,10 +1,20 @@ +""" +Parsing utilities for gene rules (GPRs). + +This module provides: +- RuleErr: structured errors for malformed rules +- RuleOp: valid logical operators (AND/OR) +- OpList: nested list structure representing parsed rules with explicit operator +- RuleStack: helper stack to build nested OpLists during parsing +- parseRuleToNestedList: main entry to parse a rule string into an OpList +""" from enum import Enum import utils.general_utils as utils from typing import List, Union, Optional class RuleErr(utils.CustomErr): """ - CustomErr subclass for rule syntax errors. + Error type for rule syntax errors. """ errName = "Rule Syntax Error" def __init__(self, rule :str, msg = "no further details provided") -> None: @@ -14,7 +24,7 @@ class RuleOp(Enum): """ - Encodes all operators valid in gene rules. + Valid logical operators for gene rules. """ OR = "or" AND = "and" @@ -27,7 +37,7 @@ class OpList(List[Union[str, "OpList"]]): """ - Represents a parsed rule and each of its nesting levels, including the operator that level uses. + Parsed rule structure: a list with an associated operator for that level. """ def __init__(self, op :Optional[RuleOp] = None) -> None: """ @@ -70,8 +80,7 @@ class RuleStack: """ - FILO stack structure to save the intermediate representation of a Rule during parsing, with the - current nesting level at the top of the stack. + FILO stack used during parsing to build nested OpLists; the top is the current level. """ def __init__(self) -> None: """ @@ -177,51 +186,49 @@ def parseRuleToNestedList(rule :str) -> OpList: """ - Parse a single rule from its string representation to an OpList, making all priority explicit - through nesting levels. + Parse a rule string into an OpList, making operator precedence explicit via nesting. Args: - rule : the string representation of a rule to be parsed. + rule: Rule string to parse (supports parentheses, 'and', 'or'). Raises: - RuleErr : whenever something goes wrong during parsing. + RuleErr: If the rule is malformed (e.g., mismatched parentheses or misplaced operators). Returns: - OpList : the parsed rule. + OpList: Parsed rule as an OpList structure. """ source = iter(rule - .replace("(", "( ").replace(")", " )") # Single out parens as words - .strip() # remove whitespace at extremities + .replace("(", "( ").replace(")", " )") # single out parentheses as words + .strip() # trim edges .split()) # split by spaces stack = RuleStack() nestingErr = RuleErr(rule, "mismatch between open and closed parentheses") try: - while True: # keep reading until source ends + while True: # read until source ends while True: - operand = next(source, None) # expected name or rule opening + operand = next(source, None) # expect operand or '(' if operand is None: raise RuleErr(rule, "found trailing open parentheses") - if operand == "and" or operand == "or" or operand == ")": # found operator instead, panic + if operand in ("and", "or", ")"): # unexpected operator position raise RuleErr(rule, f"found \"{operand}\" in unexpected position") - if operand != "(": break # found name + if operand != "(": break # got a name - # found rule opening, we add new nesting level but don't know the operator + # found rule opening: add a new nesting level stack.push() stack.current.append(operand) - while True: # keep reading until operator is found or source ends - operator = next(source, None) # expected operator or rule closing - if operator and operator != ")": break # found operator + while True: # read until operator found or source ends + operator = next(source, None) # expect operator or ')' + if operator and operator != ")": break # got operator - if stack.currentIsAnd(): stack.pop() # we close the "and" chain + if stack.currentIsAnd(): stack.pop() # close current AND chain if not operator: break - stack.pop() # we close the parentheses + stack.pop() # close parentheses - # we proceed with operator: - if not operator: break # there is no such thing as a double loop break.. yet + if not operator: break if not RuleOp.isOperator(operator): raise RuleErr( rule, f"found \"{operator}\" in unexpected position, expected operator") @@ -234,7 +241,7 @@ stack.push(operator) stack.popForward() - stack.current.setOpIfMissing(operator) # buffer now knows what operator its data had + stack.current.setOpIfMissing(operator) except RuleErr as err: raise err # bubble up proper errors except: raise nestingErr # everything else is interpreted as a nesting error.