diff COBRAxy/utils/reaction_parsing.py @ 456:a6e45049c1b9 draft default tip

Uploaded
author francesco_lapi
date Fri, 12 Sep 2025 17:28:45 +0000
parents 4a385fdb9e58
children
line wrap: on
line diff
--- a/COBRAxy/utils/reaction_parsing.py	Fri Sep 12 15:05:54 2025 +0000
+++ b/COBRAxy/utils/reaction_parsing.py	Fri Sep 12 17:28:45 2025 +0000
@@ -1,15 +1,22 @@
+"""
+Helpers to parse reaction strings into structured dictionaries.
+
+Features:
+- Reaction direction detection (forward, backward, reversible)
+- Parsing of custom reaction strings into stoichiometric maps
+- Conversion of a dict of raw reactions into a directional reactions dict
+- Loading custom reactions from a tabular file (TSV)
+"""
 from enum import Enum
 import utils.general_utils as utils
 from typing import Dict
-import csv
 import re
 
 # Reaction direction encoding:
 class ReactionDir(Enum):
   """
-  A reaction can go forwards, backwards or be reversible (able to proceed in both directions).
-  Models created / managed with cobrapy encode this information within the reaction's
-  formula using the arrows this enum keeps as values.
+  A reaction can go forward, backward, or be reversible (both directions).
+  Cobrapy-style formulas encode direction using specific arrows handled here.
   """
   FORWARD    = "-->"
   BACKWARD   = "<--"
@@ -40,34 +47,29 @@
 
 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None:
   """
-  Adds an entry to the given reactionsDict. Each entry consists of a given unique reaction id
-  (key) and a :dict (value) matching each substrate in the reaction to its stoichiometric coefficient.
-  Keys and values are both obtained from the reaction's formula: if a substrate (custom metabolite id)
-  appears without an explicit coeff, the value 1.0 will be used instead.
+  Add one reaction entry to reactionsDict.
+
+  The entry maps each substrate ID to its stoichiometric coefficient.
+  If a substrate appears without an explicit coefficient, 1.0 is assumed.
 
   Args:
-    reactionsDict : dictionary encoding custom reactions information.
-    rId : unique reaction id.
-    reaction : the reaction's formula.
+    reactionsDict: Dict to update in place.
+    rId: Unique reaction ID.
+    reaction: Reaction formula string.
   
   Returns:
     None
 
-  Side effects:
-    reactionsDict : mut
+  Side effects: updates reactionsDict in place.
   """
   reaction = reaction.strip()
   if not reaction: return
 
   reactionsDict[rId] = {}
-  # We assume the '+' separating consecutive metabs in a reaction is spaced from them,
-  # to avoid confusing it for electrical charge:
+  # Assumes ' + ' is spaced to avoid confusion with charge symbols.
   for word in reaction.split(" + "):
     metabId, stoichCoeff = word, 1.0
-    # Implicit stoichiometric coeff is equal to 1, some coeffs are floats.
-
-    # Accepted coeffs can be integer or floats with a dot (.) decimal separator
-    # and must be separated from the metab with a space:
+    # Coefficient can be integer or float (dot decimal) and must be space-separated.
     foundCoeff = re.search(r"\d+(\.\d+)? ", word)
     if foundCoeff:
       wholeMatch  = foundCoeff.group(0)
@@ -81,48 +83,39 @@
 
 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict:
     """
-    Parses the given dictionary into the correct format.
+  Parse a dict of raw reaction strings into a directional reactions dict.
 
     Args:
-        unparsed_reactions (Dict[str, str]): A dictionary where keys are reaction IDs and values are unparsed reaction strings.
+    unparsed_reactions: Mapping reaction ID -> raw reaction string.
 
     Returns:
-        ReactionsDict: The correctly parsed dict.
+    ReactionsDict: Parsed dict. Reversible reactions produce two entries with _F and _B suffixes.
     """
     reactionsDict :ReactionsDict = {}
     for rId, reaction in unparsed_reactions.items():
         reactionDir = ReactionDir.fromReaction(reaction)
         left, right = reaction.split(f" {reactionDir.value} ")
 
-        # Reversible reactions are split into distinct reactions, one for each direction.
-        # In general we only care about substrates, the product information is lost.
+    # Reversible reactions are split into two: forward (_F) and backward (_B).
         reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE
         if reactionDir is not ReactionDir.BACKWARD:
             add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left)
         
         if reactionDir is not ReactionDir.FORWARD:
             add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right)
-        
-        # ^^^ to further clarify: if a reaction is NOT reversible it will not be marked as _F or _B
-        # and whichever direction we DO keep (forward if --> and backward if <--) loses this information.
-        # This IS a small problem when coloring the map in marea.py because the arrow IDs in the map follow
-        # through with a similar convention on ALL reactions and correctly encode direction based on their
-        # model of origin. TODO: a proposed solution is to unify the standard in RPS to fully mimic the maps,
-        # which involves re-writing the "reactions" dictionary.
     
     return reactionsDict
 
 
 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict:
   """
-  Creates a custom dictionary encoding reactions information from a csv file containing
-  data about these reactions, the path of which is given as input.
+  Load custom reactions from a tabular file and parse into a reactions dict.
 
   Args:
-    customReactionsPath : path to the reactions information file.
+    customReactionsPath: Path to the reactions file (TSV or CSV-like).
   
   Returns:
-    ReactionsDict : dictionary encoding custom reactions information.
+    ReactionsDict: Parsed reactions dictionary.
   """
   try:
     rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
@@ -132,7 +125,7 @@
     id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
 
   except Exception as e:
-        
+    # Fallback re-read with same settings; preserves original behavior
     rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
     if len(rows) <= 1:
       raise ValueError("The custom reactions file must contain at least one reaction.")