annotate COBRAxy/utils/reaction_parsing.py @ 459:f8c3d9f28f64 draft default tip

Uploaded
author francesco_lapi
date Wed, 17 Sep 2025 14:52:40 +0000
parents a6e45049c1b9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
1 """
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
2 Helpers to parse reaction strings into structured dictionaries.
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
3
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
4 Features:
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
5 - Reaction direction detection (forward, backward, reversible)
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
6 - Parsing of custom reaction strings into stoichiometric maps
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
7 - Conversion of a dict of raw reactions into a directional reactions dict
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
8 - Loading custom reactions from a tabular file (TSV)
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
9 """
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
10 from enum import Enum
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
11 import utils.general_utils as utils
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
12 from typing import Dict
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
13 import re
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
14
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
15 # Reaction direction encoding:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
16 class ReactionDir(Enum):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
17 """
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
18 A reaction can go forward, backward, or be reversible (both directions).
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
19 Cobrapy-style formulas encode direction using specific arrows handled here.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
20 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
21 FORWARD = "-->"
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
22 BACKWARD = "<--"
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
23 REVERSIBLE = "<=>"
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
24
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
25 @classmethod
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
26 def fromReaction(cls, reaction :str) -> 'ReactionDir':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
27 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
28 Takes a whole reaction formula string and looks for one of the arrows, returning the
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
29 corresponding reaction direction.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
30
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
31 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
32 reaction : the reaction's formula.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
33
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
34 Raises:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
35 ValueError : if no valid arrow is found.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
36
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
37 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
38 ReactionDir : the corresponding reaction direction.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
39 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
40 for member in cls:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
41 if member.value in reaction: return member
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
42
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
43 raise ValueError("No valid arrow found within reaction string.")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
44
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
45 ReactionsDict = Dict[str, Dict[str, float]]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
46
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
47
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
48 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
49 """
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
50 Add one reaction entry to reactionsDict.
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
51
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
52 The entry maps each substrate ID to its stoichiometric coefficient.
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
53 If a substrate appears without an explicit coefficient, 1.0 is assumed.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
54
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
55 Args:
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
56 reactionsDict: Dict to update in place.
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
57 rId: Unique reaction ID.
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
58 reaction: Reaction formula string.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
59
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
60 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
61 None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
62
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
63 Side effects: updates reactionsDict in place.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
64 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
65 reaction = reaction.strip()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
66 if not reaction: return
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
67
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
68 reactionsDict[rId] = {}
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
69 # Assumes ' + ' is spaced to avoid confusion with charge symbols.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
70 for word in reaction.split(" + "):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
71 metabId, stoichCoeff = word, 1.0
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
72 # Coefficient can be integer or float (dot decimal) and must be space-separated.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
73 foundCoeff = re.search(r"\d+(\.\d+)? ", word)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
74 if foundCoeff:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
75 wholeMatch = foundCoeff.group(0)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
76 metabId = word[len(wholeMatch):].strip()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
77 stoichCoeff = float(wholeMatch.strip())
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
78
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
79 reactionsDict[rId][metabId] = stoichCoeff
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
80
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
81 if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
82
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
83
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
84 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
85 """
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
86 Parse a dict of raw reaction strings into a directional reactions dict.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
87
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
88 Args:
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
89 unparsed_reactions: Mapping reaction ID -> raw reaction string.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
90
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
91 Returns:
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
92 ReactionsDict: Parsed dict. Reversible reactions produce two entries with _F and _B suffixes.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
93 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
94 reactionsDict :ReactionsDict = {}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
95 for rId, reaction in unparsed_reactions.items():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
96 reactionDir = ReactionDir.fromReaction(reaction)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
97 left, right = reaction.split(f" {reactionDir.value} ")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
98
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
99 # Reversible reactions are split into two: forward (_F) and backward (_B).
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
100 reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
101 if reactionDir is not ReactionDir.BACKWARD:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
102 add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
103
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
104 if reactionDir is not ReactionDir.FORWARD:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
105 add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
106
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
107 return reactionsDict
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
108
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
109
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
110 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
111 """
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
112 Load custom reactions from a tabular file and parse into a reactions dict.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
113
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
114 Args:
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
115 customReactionsPath: Path to the reactions file (TSV or CSV-like).
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
116
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
117 Returns:
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
118 ReactionsDict: Parsed reactions dictionary.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
119 """
427
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
120 try:
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
121 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
122 if len(rows) <= 1:
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
123 raise ValueError("The custom reactions file must contain at least one reaction.")
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
124
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
125 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
126
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
127 except Exception as e:
456
a6e45049c1b9 Uploaded
francesco_lapi
parents: 427
diff changeset
128 # Fallback re-read with same settings; preserves original behavior
427
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
129 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
130 if len(rows) <= 1:
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
131 raise ValueError("The custom reactions file must contain at least one reaction.")
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
132
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
133 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
134
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
135 reactionsData = {row[id_idx] : row[idx_formula] for row in rows[1:]}
4a385fdb9e58 Uploaded
francesco_lapi
parents: 381
diff changeset
136
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
137 return create_reaction_dict(reactionsData)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
138