Mercurial > repos > bimib > cobraxy
comparison COBRAxy/src/utils/reaction_parsing.py @ 539:2fb97466e404 draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Sat, 25 Oct 2025 14:55:13 +0000 |
| parents | |
| children | fcdbc81feb45 |
comparison
equal
deleted
inserted
replaced
| 538:fd53d42348bd | 539:2fb97466e404 |
|---|---|
| 1 """ | |
| 2 Helpers to parse reaction strings into structured dictionaries. | |
| 3 | |
| 4 Features: | |
| 5 - Reaction direction detection (forward, backward, reversible) | |
| 6 - Parsing of custom reaction strings into stoichiometric maps | |
| 7 - Conversion of a dict of raw reactions into a directional reactions dict | |
| 8 - Loading custom reactions from a tabular file (TSV) | |
| 9 """ | |
| 10 from enum import Enum | |
| 11 import utils.general_utils as utils | |
| 12 from typing import Dict | |
| 13 import re | |
| 14 | |
| 15 # Reaction direction encoding: | |
| 16 class ReactionDir(Enum): | |
| 17 """ | |
| 18 A reaction can go forward, backward, or be reversible (both directions). | |
| 19 Cobrapy-style formulas encode direction using specific arrows handled here. | |
| 20 """ | |
| 21 FORWARD = "-->" | |
| 22 BACKWARD = "<--" | |
| 23 REVERSIBLE = "<=>" | |
| 24 | |
| 25 @classmethod | |
| 26 def fromReaction(cls, reaction :str) -> 'ReactionDir': | |
| 27 """ | |
| 28 Takes a whole reaction formula string and looks for one of the arrows, returning the | |
| 29 corresponding reaction direction. | |
| 30 | |
| 31 Args: | |
| 32 reaction : the reaction's formula. | |
| 33 | |
| 34 Raises: | |
| 35 ValueError : if no valid arrow is found. | |
| 36 | |
| 37 Returns: | |
| 38 ReactionDir : the corresponding reaction direction. | |
| 39 """ | |
| 40 for member in cls: | |
| 41 if member.value in reaction: return member | |
| 42 | |
| 43 raise ValueError("No valid arrow found within reaction string.") | |
| 44 | |
| 45 ReactionsDict = Dict[str, Dict[str, float]] | |
| 46 | |
| 47 | |
| 48 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None: | |
| 49 """ | |
| 50 Add one reaction entry to reactionsDict. | |
| 51 | |
| 52 The entry maps each substrate ID to its stoichiometric coefficient. | |
| 53 If a substrate appears without an explicit coefficient, 1.0 is assumed. | |
| 54 | |
| 55 Args: | |
| 56 reactionsDict: Dict to update in place. | |
| 57 rId: Unique reaction ID. | |
| 58 reaction: Reaction formula string. | |
| 59 | |
| 60 Returns: | |
| 61 None | |
| 62 | |
| 63 Side effects: updates reactionsDict in place. | |
| 64 """ | |
| 65 reaction = reaction.strip() | |
| 66 if not reaction: return | |
| 67 | |
| 68 reactionsDict[rId] = {} | |
| 69 # Assumes ' + ' is spaced to avoid confusion with charge symbols. | |
| 70 for word in reaction.split(" + "): | |
| 71 metabId, stoichCoeff = word, 1.0 | |
| 72 # Coefficient can be integer or float (dot decimal) and must be space-separated. | |
| 73 foundCoeff = re.search(r"\d+(\.\d+)? ", word) | |
| 74 if foundCoeff: | |
| 75 wholeMatch = foundCoeff.group(0) | |
| 76 metabId = word[len(wholeMatch):].strip() | |
| 77 stoichCoeff = float(wholeMatch.strip()) | |
| 78 | |
| 79 reactionsDict[rId][metabId] = stoichCoeff | |
| 80 | |
| 81 if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed. | |
| 82 | |
| 83 | |
| 84 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict: | |
| 85 """ | |
| 86 Parse a dict of raw reaction strings into a directional reactions dict. | |
| 87 | |
| 88 Args: | |
| 89 unparsed_reactions: Mapping reaction ID -> raw reaction string. | |
| 90 | |
| 91 Returns: | |
| 92 ReactionsDict: Parsed dict. Reversible reactions produce two entries with _F and _B suffixes. | |
| 93 """ | |
| 94 reactionsDict :ReactionsDict = {} | |
| 95 for rId, reaction in unparsed_reactions.items(): | |
| 96 reactionDir = ReactionDir.fromReaction(reaction) | |
| 97 left, right = reaction.split(f" {reactionDir.value} ") | |
| 98 | |
| 99 # Reversible reactions are split into two: forward (_F) and backward (_B). | |
| 100 reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE | |
| 101 if reactionDir is not ReactionDir.BACKWARD: | |
| 102 add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left) | |
| 103 | |
| 104 if reactionDir is not ReactionDir.FORWARD: | |
| 105 add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right) | |
| 106 | |
| 107 return reactionsDict | |
| 108 | |
| 109 | |
| 110 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict: | |
| 111 """ | |
| 112 Load custom reactions from a tabular file and parse into a reactions dict. | |
| 113 | |
| 114 Args: | |
| 115 customReactionsPath: Path to the reactions file (TSV or CSV-like). | |
| 116 | |
| 117 Returns: | |
| 118 ReactionsDict: Parsed reactions dictionary. | |
| 119 """ | |
| 120 try: | |
| 121 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) | |
| 122 if len(rows) <= 1: | |
| 123 raise ValueError("The custom reactions file must contain at least one reaction.") | |
| 124 | |
| 125 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") | |
| 126 | |
| 127 except Exception as e: | |
| 128 # Fallback re-read with same settings; preserves original behavior | |
| 129 rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) | |
| 130 if len(rows) <= 1: | |
| 131 raise ValueError("The custom reactions file must contain at least one reaction.") | |
| 132 | |
| 133 id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") | |
| 134 | |
| 135 reactionsData = {row[id_idx] : row[idx_formula] for row in rows[1:]} | |
| 136 | |
| 137 return create_reaction_dict(reactionsData) | |
| 138 |
