view COBRAxy/utils/reaction_parsing.py @ 456:a6e45049c1b9 draft default tip

Uploaded
author francesco_lapi
date Fri, 12 Sep 2025 17:28:45 +0000
parents 4a385fdb9e58
children
line wrap: on
line source

"""
Helpers to parse reaction strings into structured dictionaries.

Features:
- Reaction direction detection (forward, backward, reversible)
- Parsing of custom reaction strings into stoichiometric maps
- Conversion of a dict of raw reactions into a directional reactions dict
- Loading custom reactions from a tabular file (TSV)
"""
from enum import Enum
import utils.general_utils as utils
from typing import Dict
import re

# Reaction direction encoding:
class ReactionDir(Enum):
  """
  A reaction can go forward, backward, or be reversible (both directions).
  Cobrapy-style formulas encode direction using specific arrows handled here.
  """
  FORWARD    = "-->"
  BACKWARD   = "<--"
  REVERSIBLE = "<=>"

  @classmethod
  def fromReaction(cls, reaction :str) -> 'ReactionDir':
    """
    Takes a whole reaction formula string and looks for one of the arrows, returning the
    corresponding reaction direction.

    Args:
      reaction : the reaction's formula.
    
    Raises:
      ValueError : if no valid arrow is found.
    
    Returns:
      ReactionDir : the corresponding reaction direction.
    """
    for member in cls:
      if member.value in reaction: return member

    raise ValueError("No valid arrow found within reaction string.")

ReactionsDict = Dict[str, Dict[str, float]]


def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None:
  """
  Add one reaction entry to reactionsDict.

  The entry maps each substrate ID to its stoichiometric coefficient.
  If a substrate appears without an explicit coefficient, 1.0 is assumed.

  Args:
    reactionsDict: Dict to update in place.
    rId: Unique reaction ID.
    reaction: Reaction formula string.
  
  Returns:
    None

  Side effects: updates reactionsDict in place.
  """
  reaction = reaction.strip()
  if not reaction: return

  reactionsDict[rId] = {}
  # Assumes ' + ' is spaced to avoid confusion with charge symbols.
  for word in reaction.split(" + "):
    metabId, stoichCoeff = word, 1.0
    # Coefficient can be integer or float (dot decimal) and must be space-separated.
    foundCoeff = re.search(r"\d+(\.\d+)? ", word)
    if foundCoeff:
      wholeMatch  = foundCoeff.group(0)
      metabId     = word[len(wholeMatch):].strip()
      stoichCoeff = float(wholeMatch.strip())

    reactionsDict[rId][metabId] = stoichCoeff

  if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed.


def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict:
    """
  Parse a dict of raw reaction strings into a directional reactions dict.

    Args:
    unparsed_reactions: Mapping reaction ID -> raw reaction string.

    Returns:
    ReactionsDict: Parsed dict. Reversible reactions produce two entries with _F and _B suffixes.
    """
    reactionsDict :ReactionsDict = {}
    for rId, reaction in unparsed_reactions.items():
        reactionDir = ReactionDir.fromReaction(reaction)
        left, right = reaction.split(f" {reactionDir.value} ")

    # Reversible reactions are split into two: forward (_F) and backward (_B).
        reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE
        if reactionDir is not ReactionDir.BACKWARD:
            add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left)
        
        if reactionDir is not ReactionDir.FORWARD:
            add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right)
    
    return reactionsDict


def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict:
  """
  Load custom reactions from a tabular file and parse into a reactions dict.

  Args:
    customReactionsPath: Path to the reactions file (TSV or CSV-like).
  
  Returns:
    ReactionsDict: Parsed reactions dictionary.
  """
  try:
    rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
    if len(rows) <= 1:
      raise ValueError("The custom reactions file must contain at least one reaction.")

    id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")

  except Exception as e:
    # Fallback re-read with same settings; preserves original behavior
    rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
    if len(rows) <= 1:
      raise ValueError("The custom reactions file must contain at least one reaction.")
    
    id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
  
  reactionsData = {row[id_idx] : row[idx_formula] for row in rows[1:]}
  
  return create_reaction_dict(reactionsData)