Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/reaction_parsing.py @ 4:41f35c2f0c7b draft
Uploaded
| author | luca_milaz |
|---|---|
| date | Wed, 18 Sep 2024 10:59:10 +0000 |
| parents | |
| children | 0a3ca20848f3 |
comparison
equal
deleted
inserted
replaced
| 3:1f3ac6fd9867 | 4:41f35c2f0c7b |
|---|---|
| 1 from enum import Enum | |
| 2 import utils.general_utils as utils | |
| 3 from typing import Dict | |
| 4 import csv | |
| 5 import re | |
| 6 | |
| 7 # Reaction direction encoding: | |
| 8 class ReactionDir(Enum): | |
| 9 """ | |
| 10 A reaction can go forwards, backwards or be reversible (able to proceed in both directions). | |
| 11 Models created / managed with cobrapy encode this information within the reaction's | |
| 12 formula using the arrows this enum keeps as values. | |
| 13 """ | |
| 14 FORWARD = "-->" | |
| 15 BACKWARD = "<--" | |
| 16 REVERSIBLE = "<=>" | |
| 17 | |
| 18 @classmethod | |
| 19 def fromReaction(cls, reaction :str) -> 'ReactionDir': | |
| 20 """ | |
| 21 Takes a whole reaction formula string and looks for one of the arrows, returning the | |
| 22 corresponding reaction direction. | |
| 23 | |
| 24 Args: | |
| 25 reaction : the reaction's formula. | |
| 26 | |
| 27 Raises: | |
| 28 ValueError : if no valid arrow is found. | |
| 29 | |
| 30 Returns: | |
| 31 ReactionDir : the corresponding reaction direction. | |
| 32 """ | |
| 33 for member in cls: | |
| 34 if member.value in reaction: return member | |
| 35 | |
| 36 raise ValueError("No valid arrow found within reaction string.") | |
| 37 | |
| 38 ReactionsDict = Dict[str, Dict[str, float]] | |
| 39 | |
| 40 | |
| 41 def add_custom_reaction(reactionsDict :ReactionsDict, rId :str, reaction :str) -> None: | |
| 42 """ | |
| 43 Adds an entry to the given reactionsDict. Each entry consists of a given unique reaction id | |
| 44 (key) and a :dict (value) matching each substrate in the reaction to its stoichiometric coefficient. | |
| 45 Keys and values are both obtained from the reaction's formula: if a substrate (custom metabolite id) | |
| 46 appears without an explicit coeff, the value 1.0 will be used instead. | |
| 47 | |
| 48 Args: | |
| 49 reactionsDict : dictionary encoding custom reactions information. | |
| 50 rId : unique reaction id. | |
| 51 reaction : the reaction's formula. | |
| 52 | |
| 53 Returns: | |
| 54 None | |
| 55 | |
| 56 Side effects: | |
| 57 reactionsDict : mut | |
| 58 """ | |
| 59 reaction = reaction.strip() | |
| 60 if not reaction: return | |
| 61 | |
| 62 reactionsDict[rId] = {} | |
| 63 # We assume the '+' separating consecutive metabs in a reaction is spaced from them, | |
| 64 # to avoid confusing it for electrical charge: | |
| 65 for word in reaction.split(" + "): | |
| 66 metabId, stoichCoeff = word, 1.0 | |
| 67 # Implicit stoichiometric coeff is equal to 1, some coeffs are floats. | |
| 68 | |
| 69 # Accepted coeffs can be integer or floats with a dot (.) decimal separator | |
| 70 # and must be separated from the metab with a space: | |
| 71 foundCoeff = re.search(r"\d+(\.\d+)? ", word) | |
| 72 if foundCoeff: | |
| 73 wholeMatch = foundCoeff.group(0) | |
| 74 metabId = word[len(wholeMatch):].strip() | |
| 75 stoichCoeff = float(wholeMatch.strip()) | |
| 76 | |
| 77 reactionsDict[rId][metabId] = stoichCoeff | |
| 78 | |
| 79 if not reactionsDict[rId]: del reactionsDict[rId] # Empty reactions are removed. | |
| 80 | |
| 81 | |
| 82 def create_reaction_dict(unparsed_reactions: Dict[str, str]) -> ReactionsDict: | |
| 83 """ | |
| 84 Parses the given dictionary into the correct format. | |
| 85 | |
| 86 Args: | |
| 87 unparsed_reactions (Dict[str, str]): A dictionary where keys are reaction IDs and values are unparsed reaction strings. | |
| 88 | |
| 89 Returns: | |
| 90 ReactionsDict: The correctly parsed dict. | |
| 91 """ | |
| 92 reactionsDict :ReactionsDict = {} | |
| 93 for rId, reaction in unparsed_reactions.items(): | |
| 94 reactionDir = ReactionDir.fromReaction(reaction) | |
| 95 left, right = reaction.split(f" {reactionDir.value} ") | |
| 96 | |
| 97 # Reversible reactions are split into distinct reactions, one for each direction. | |
| 98 # In general we only care about substrates, the product information is lost. | |
| 99 reactionIsReversible = reactionDir is ReactionDir.REVERSIBLE | |
| 100 if reactionDir is not ReactionDir.BACKWARD: | |
| 101 add_custom_reaction(reactionsDict, rId + "_F" * reactionIsReversible, left) | |
| 102 | |
| 103 if reactionDir is not ReactionDir.FORWARD: | |
| 104 add_custom_reaction(reactionsDict, rId + "_B" * reactionIsReversible, right) | |
| 105 | |
| 106 # ^^^ to further clarify: if a reaction is NOT reversible it will not be marked as _F or _B | |
| 107 # and whichever direction we DO keep (forward if --> and backward if <--) loses this information. | |
| 108 # This IS a small problem when coloring the map in marea.py because the arrow IDs in the map follow | |
| 109 # through with a similar convention on ALL reactions and correctly encode direction based on their | |
| 110 # model of origin. TODO: a proposed solution is to unify the standard in RPS to fully mimic the maps, | |
| 111 # which involves re-writing the "reactions" dictionary. | |
| 112 | |
| 113 return reactionsDict | |
| 114 | |
| 115 | |
| 116 def parse_custom_reactions(customReactionsPath :str) -> ReactionsDict: | |
| 117 """ | |
| 118 Creates a custom dictionary encoding reactions information from a csv file containing | |
| 119 data about these reactions, the path of which is given as input. | |
| 120 | |
| 121 Args: | |
| 122 customReactionsPath : path to the reactions information file. | |
| 123 | |
| 124 Returns: | |
| 125 ReactionsDict : dictionary encoding custom reactions information. | |
| 126 """ | |
| 127 reactionsData :Dict[str, str] = {row[0]: row[1] for row in utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath))} | |
| 128 | |
| 129 return create_reaction_dict(reactionsData) | |
| 130 |
