comparison COBRAxy/utils/reaction_parsing.py @ 456:a6e45049c1b9 draft default tip

Uploaded
author francesco_lapi
date Fri, 12 Sep 2025 17:28:45 +0000
parents 4a385fdb9e58
children
comparison
equal deleted inserted replaced
455:4e2bc80764b6 456:a6e45049c1b9
1 """
2 Helpers to parse reaction strings into structured dictionaries.
3
4 Features:
5 - Reaction direction detection (forward, backward, reversible)
6 - Parsing of custom reaction strings into stoichiometric maps
7 - Conversion of a dict of raw reactions into a directional reactions dict
8 - Loading custom reactions from a tabular file (TSV)
9 """
1 from enum import Enum 10 from enum import Enum
2 import utils.general_utils as utils 11 import utils.general_utils as utils
3 from typing import Dict 12 from typing import Dict
4 import csv
5 import re 13 import re
6 14
7 # Reaction direction encoding: 15 # Reaction direction encoding:
8 class ReactionDir(Enum): 16 class ReactionDir(Enum):
9 """ 17 """
10 A reaction can go forwards, backwards or be reversible (able to proceed in both directions). 18 A reaction can go forward, backward, or be reversible (both directions).
11 Models created / managed with cobrapy encode this information within the reaction's 19 Cobrapy-style formulas encode direction using specific arrows handled here.
12 formula using the arrows this enum keeps as values.
13 """ 20 """
14 FORWARD = "-->" 21 FORWARD = "-->"
15 BACKWARD = "<--" 22 BACKWARD = "<--"
16 REVERSIBLE = "<=>" 23 REVERSIBLE = "<=>"
17 24
38 ReactionsDict = Dict[str, Dict[str, float]] 45 ReactionsDict = Dict[str, Dict[str, float]]
39 46
40 47
41 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None: 48 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None:
42 """ 49 """
43 Adds an entry to the given reactionsDict. Each entry consists of a given unique reaction id 50 Add one reaction entry to reactionsDict.
44 (key) and a :dict (value) matching each substrate in the reaction to its stoichiometric coefficient. 51
45 Keys and values are both obtained from the reaction's formula: if a substrate (custom metabolite id) 52 The entry maps each substrate ID to its stoichiometric coefficient.
46 appears without an explicit coeff, the value 1.0 will be used instead. 53 If a substrate appears without an explicit coefficient, 1.0 is assumed.
47 54
48 Args: 55 Args:
49 reactionsDict : dictionary encoding custom reactions information. 56 reactionsDict: Dict to update in place.
50 rId : unique reaction id. 57 rId: Unique reaction ID.
51 reaction : the reaction's formula. 58 reaction: Reaction formula string.
52 59
53 Returns: 60 Returns:
54 None 61 None
55 62
56 Side effects: 63 Side effects: updates reactionsDict in place.
57 reactionsDict : mut
58 """ 64 """
59 reaction = reaction.strip() 65 reaction = reaction.strip()
60 if not reaction: return 66 if not reaction: return
61 67
62 reactionsDict[rId] = {} 68 reactionsDict[rId] = {}
63 # We assume the '+' separating consecutive metabs in a reaction is spaced from them, 69 # Assumes ' + ' is spaced to avoid confusion with charge symbols.
64 # to avoid confusing it for electrical charge:
65 for word in reaction.split(" + "): 70 for word in reaction.split(" + "):
66 metabId, stoichCoeff = word, 1.0 71 metabId, stoichCoeff = word, 1.0
67 # Implicit stoichiometric coeff is equal to 1, some coeffs are floats. 72 # Coefficient can be integer or float (dot decimal) and must be space-separated.
68
69 # Accepted coeffs can be integer or floats with a dot (.) decimal separator
70 # and must be separated from the metab with a space:
71 foundCoeff = re.search(r"\d+(\.\d+)? ", word) 73 foundCoeff = re.search(r"\d+(\.\d+)? ", word)
72 if foundCoeff: 74 if foundCoeff:
73 wholeMatch = foundCoeff.group(0) 75 wholeMatch = foundCoeff.group(0)
74 metabId = word[len(wholeMatch):].strip() 76 metabId = word[len(wholeMatch):].strip()
75 stoichCoeff = float(wholeMatch.strip()) 77 stoichCoeff = float(wholeMatch.strip())
79 if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed. 81 if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed.
80 82
81 83
82 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict: 84 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict:
83 """ 85 """
84 Parses the given dictionary into the correct format. 86 Parse a dict of raw reaction strings into a directional reactions dict.
85 87
86 Args: 88 Args:
87 unparsed_reactions (Dict[str, str]): A dictionary where keys are reaction IDs and values are unparsed reaction strings. 89 unparsed_reactions: Mapping reaction ID -> raw reaction string.
88 90
89 Returns: 91 Returns:
90 ReactionsDict: The correctly parsed dict. 92 ReactionsDict: Parsed dict. Reversible reactions produce two entries with _F and _B suffixes.
91 """ 93 """
92 reactionsDict :ReactionsDict = {} 94 reactionsDict :ReactionsDict = {}
93 for rId, reaction in unparsed_reactions.items(): 95 for rId, reaction in unparsed_reactions.items():
94 reactionDir = ReactionDir.fromReaction(reaction) 96 reactionDir = ReactionDir.fromReaction(reaction)
95 left, right = reaction.split(f" {reactionDir.value} ") 97 left, right = reaction.split(f" {reactionDir.value} ")
96 98
97 # Reversible reactions are split into distinct reactions, one for each direction. 99 # Reversible reactions are split into two: forward (_F) and backward (_B).
98 # In general we only care about substrates, the product information is lost.
99 reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE 100 reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE
100 if reactionDir is not ReactionDir.BACKWARD: 101 if reactionDir is not ReactionDir.BACKWARD:
101 add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left) 102 add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left)
102 103
103 if reactionDir is not ReactionDir.FORWARD: 104 if reactionDir is not ReactionDir.FORWARD:
104 add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right) 105 add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right)
105
106 # ^^^ to further clarify: if a reaction is NOT reversible it will not be marked as _F or _B
107 # and whichever direction we DO keep (forward if --> and backward if <--) loses this information.
108 # This IS a small problem when coloring the map in marea.py because the arrow IDs in the map follow
109 # through with a similar convention on ALL reactions and correctly encode direction based on their
110 # model of origin. TODO: a proposed solution is to unify the standard in RPS to fully mimic the maps,
111 # which involves re-writing the "reactions" dictionary.
112 106
113 return reactionsDict 107 return reactionsDict
114 108
115 109
116 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict: 110 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict:
117 """ 111 """
118 Creates a custom dictionary encoding reactions information from a csv file containing 112 Load custom reactions from a tabular file and parse into a reactions dict.
119 data about these reactions, the path of which is given as input.
120 113
121 Args: 114 Args:
122 customReactionsPath : path to the reactions information file. 115 customReactionsPath: Path to the reactions file (TSV or CSV-like).
123 116
124 Returns: 117 Returns:
125 ReactionsDict : dictionary encoding custom reactions information. 118 ReactionsDict: Parsed reactions dictionary.
126 """ 119 """
127 try: 120 try:
128 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) 121 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
129 if len(rows) <= 1: 122 if len(rows) <= 1:
130 raise ValueError("The custom reactions file must contain at least one reaction.") 123 raise ValueError("The custom reactions file must contain at least one reaction.")
131 124
132 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") 125 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
133 126
134 except Exception as e: 127 except Exception as e:
135 128 # Fallback re-read with same settings; preserves original behavior
136 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) 129 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
137 if len(rows) <= 1: 130 if len(rows) <= 1:
138 raise ValueError("The custom reactions file must contain at least one reaction.") 131 raise ValueError("The custom reactions file must contain at least one reaction.")
139 132
140 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") 133 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")