Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/reaction_parsing.py @ 456:a6e45049c1b9 draft default tip
Uploaded
author | francesco_lapi |
---|---|
date | Fri, 12 Sep 2025 17:28:45 +0000 |
parents | 4a385fdb9e58 |
children |
comparison
equal
deleted
inserted
replaced
455:4e2bc80764b6 | 456:a6e45049c1b9 |
---|---|
1 """ | |
2 Helpers to parse reaction strings into structured dictionaries. | |
3 | |
4 Features: | |
5 - Reaction direction detection (forward, backward, reversible) | |
6 - Parsing of custom reaction strings into stoichiometric maps | |
7 - Conversion of a dict of raw reactions into a directional reactions dict | |
8 - Loading custom reactions from a tabular file (TSV) | |
9 """ | |
1 from enum import Enum | 10 from enum import Enum |
2 import utils.general_utils as utils | 11 import utils.general_utils as utils |
3 from typing import Dict | 12 from typing import Dict |
4 import csv | |
5 import re | 13 import re |
6 | 14 |
7 # Reaction direction encoding: | 15 # Reaction direction encoding: |
8 class ReactionDir(Enum): | 16 class ReactionDir(Enum): |
9 """ | 17 """ |
10 A reaction can go forwards, backwards or be reversible (able to proceed in both directions). | 18 A reaction can go forward, backward, or be reversible (both directions). |
11 Models created / managed with cobrapy encode this information within the reaction's | 19 Cobrapy-style formulas encode direction using specific arrows handled here. |
12 formula using the arrows this enum keeps as values. | |
13 """ | 20 """ |
14 FORWARD = "-->" | 21 FORWARD = "-->" |
15 BACKWARD = "<--" | 22 BACKWARD = "<--" |
16 REVERSIBLE = "<=>" | 23 REVERSIBLE = "<=>" |
17 | 24 |
38 ReactionsDict = Dict[str, Dict[str, float]] | 45 ReactionsDict = Dict[str, Dict[str, float]] |
39 | 46 |
40 | 47 |
41 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None: | 48 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None: |
42 """ | 49 """ |
43 Adds an entry to the given reactionsDict. Each entry consists of a given unique reaction id | 50 Add one reaction entry to reactionsDict. |
44 (key) and a :dict (value) matching each substrate in the reaction to its stoichiometric coefficient. | 51 |
45 Keys and values are both obtained from the reaction's formula: if a substrate (custom metabolite id) | 52 The entry maps each substrate ID to its stoichiometric coefficient. |
46 appears without an explicit coeff, the value 1.0 will be used instead. | 53 If a substrate appears without an explicit coefficient, 1.0 is assumed. |
47 | 54 |
48 Args: | 55 Args: |
49 reactionsDict : dictionary encoding custom reactions information. | 56 reactionsDict: Dict to update in place. |
50 rId : unique reaction id. | 57 rId: Unique reaction ID. |
51 reaction : the reaction's formula. | 58 reaction: Reaction formula string. |
52 | 59 |
53 Returns: | 60 Returns: |
54 None | 61 None |
55 | 62 |
56 Side effects: | 63 Side effects: updates reactionsDict in place. |
57 reactionsDict : mut | |
58 """ | 64 """ |
59 reaction = reaction.strip() | 65 reaction = reaction.strip() |
60 if not reaction: return | 66 if not reaction: return |
61 | 67 |
62 reactionsDict[rId] = {} | 68 reactionsDict[rId] = {} |
63 # We assume the '+' separating consecutive metabs in a reaction is spaced from them, | 69 # Assumes ' + ' is spaced to avoid confusion with charge symbols. |
64 # to avoid confusing it for electrical charge: | |
65 for word in reaction.split(" + "): | 70 for word in reaction.split(" + "): |
66 metabId, stoichCoeff = word, 1.0 | 71 metabId, stoichCoeff = word, 1.0 |
67 # Implicit stoichiometric coeff is equal to 1, some coeffs are floats. | 72 # Coefficient can be integer or float (dot decimal) and must be space-separated. |
68 | |
69 # Accepted coeffs can be integer or floats with a dot (.) decimal separator | |
70 # and must be separated from the metab with a space: | |
71 foundCoeff = re.search(r"\d+(\.\d+)? ", word) | 73 foundCoeff = re.search(r"\d+(\.\d+)? ", word) |
72 if foundCoeff: | 74 if foundCoeff: |
73 wholeMatch = foundCoeff.group(0) | 75 wholeMatch = foundCoeff.group(0) |
74 metabId = word[len(wholeMatch):].strip() | 76 metabId = word[len(wholeMatch):].strip() |
75 stoichCoeff = float(wholeMatch.strip()) | 77 stoichCoeff = float(wholeMatch.strip()) |
79 if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed. | 81 if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed. |
80 | 82 |
81 | 83 |
82 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict: | 84 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict: |
83 """ | 85 """ |
84 Parses the given dictionary into the correct format. | 86 Parse a dict of raw reaction strings into a directional reactions dict. |
85 | 87 |
86 Args: | 88 Args: |
87 unparsed_reactions (Dict[str, str]): A dictionary where keys are reaction IDs and values are unparsed reaction strings. | 89 unparsed_reactions: Mapping reaction ID -> raw reaction string. |
88 | 90 |
89 Returns: | 91 Returns: |
90 ReactionsDict: The correctly parsed dict. | 92 ReactionsDict: Parsed dict. Reversible reactions produce two entries with _F and _B suffixes. |
91 """ | 93 """ |
92 reactionsDict :ReactionsDict = {} | 94 reactionsDict :ReactionsDict = {} |
93 for rId, reaction in unparsed_reactions.items(): | 95 for rId, reaction in unparsed_reactions.items(): |
94 reactionDir = ReactionDir.fromReaction(reaction) | 96 reactionDir = ReactionDir.fromReaction(reaction) |
95 left, right = reaction.split(f" {reactionDir.value} ") | 97 left, right = reaction.split(f" {reactionDir.value} ") |
96 | 98 |
97 # Reversible reactions are split into distinct reactions, one for each direction. | 99 # Reversible reactions are split into two: forward (_F) and backward (_B). |
98 # In general we only care about substrates, the product information is lost. | |
99 reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE | 100 reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE |
100 if reactionDir is not ReactionDir.BACKWARD: | 101 if reactionDir is not ReactionDir.BACKWARD: |
101 add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left) | 102 add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left) |
102 | 103 |
103 if reactionDir is not ReactionDir.FORWARD: | 104 if reactionDir is not ReactionDir.FORWARD: |
104 add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right) | 105 add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right) |
105 | |
106 # ^^^ to further clarify: if a reaction is NOT reversible it will not be marked as _F or _B | |
107 # and whichever direction we DO keep (forward if --> and backward if <--) loses this information. | |
108 # This IS a small problem when coloring the map in marea.py because the arrow IDs in the map follow | |
109 # through with a similar convention on ALL reactions and correctly encode direction based on their | |
110 # model of origin. TODO: a proposed solution is to unify the standard in RPS to fully mimic the maps, | |
111 # which involves re-writing the "reactions" dictionary. | |
112 | 106 |
113 return reactionsDict | 107 return reactionsDict |
114 | 108 |
115 | 109 |
116 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict: | 110 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict: |
117 """ | 111 """ |
118 Creates a custom dictionary encoding reactions information from a csv file containing | 112 Load custom reactions from a tabular file and parse into a reactions dict. |
119 data about these reactions, the path of which is given as input. | |
120 | 113 |
121 Args: | 114 Args: |
122 customReactionsPath : path to the reactions information file. | 115 customReactionsPath: Path to the reactions file (TSV or CSV-like). |
123 | 116 |
124 Returns: | 117 Returns: |
125 ReactionsDict : dictionary encoding custom reactions information. | 118 ReactionsDict: Parsed reactions dictionary. |
126 """ | 119 """ |
127 try: | 120 try: |
128 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) | 121 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) |
129 if len(rows) <= 1: | 122 if len(rows) <= 1: |
130 raise ValueError("The custom reactions file must contain at least one reaction.") | 123 raise ValueError("The custom reactions file must contain at least one reaction.") |
131 | 124 |
132 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") | 125 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") |
133 | 126 |
134 except Exception as e: | 127 except Exception as e: |
135 | 128 # Fallback re-read with same settings; preserves original behavior |
136 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) | 129 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) |
137 if len(rows) <= 1: | 130 if len(rows) <= 1: |
138 raise ValueError("The custom reactions file must contain at least one reaction.") | 131 raise ValueError("The custom reactions file must contain at least one reaction.") |
139 | 132 |
140 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") | 133 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") |