changeset 499:a2f7a6dd9d0b draft

Uploaded
author francesco_lapi
date Tue, 30 Sep 2025 16:19:55 +0000
parents df90f40a156c
children 4e7e67693ce7
files COBRAxy/utils/model_utils.py
diffstat 1 files changed, 31 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/COBRAxy/utils/model_utils.py	Tue Sep 30 16:13:08 2025 +0000
+++ b/COBRAxy/utils/model_utils.py	Tue Sep 30 16:19:55 2025 +0000
@@ -358,21 +358,40 @@
 
 
 # Estrae tutti gli ID metaboliti nella formula (gestisce prefissi numerici + underscore)
+#def extract_metabolites_from_reaction(reaction_formula: str) -> Set[str]:
+#    """
+#    Extract metabolite IDs from a reaction formula.
+#    Robust pattern: tokens ending with _<compartment> (e.g., _c, _m, _e),
+#    allowing leading digits/underscores.
+#    """
+#    metabolites = set()
+#    # optional coefficient followed by a token ending with _<letters>
+#    if reaction_formula[-1] == ']' and reaction_formula[-3] == '[':
+#        pattern = r'(?:\d+(?:\.\d+)?\s+)?([A-Za-z0-9_]+[[A-Za-z0-9]]+)'
+#    else:
+#        pattern = r'(?:\d+(?:\.\d+)?\s+)?([A-Za-z0-9_]+_[A-Za-z0-9]+)'
+#    matches = re.findall(pattern, reaction_formula)
+#    metabolites.update(matches)
+#    return metabolites
+
+import re
+from typing import Set
+
+# Estrae tutti gli ID metaboliti nella formula (gestisce prefissi numerici + underscore e [comp])
 def extract_metabolites_from_reaction(reaction_formula: str) -> Set[str]:
     """
-    Extract metabolite IDs from a reaction formula.
-    Robust pattern: tokens ending with _<compartment> (e.g., _c, _m, _e),
-    allowing leading digits/underscores.
+    Estrae gli ID dei metaboliti da una formula di reazione.
+    Gestisce:
+      - coefficienti stechiometrici opzionali (interi o decimali)
+      - compartimenti sia in forma [c] sia _c, sempre a fine metabolita
+    Restituisce gli ID includendo il suffisso di compartimento così come appare.
     """
-    metabolites = set()
-    # optional coefficient followed by a token ending with _<letters>
-    if reaction_formula[-1] == ']' and reaction_formula[-3] == '[':
-        pattern = r'(?:\d+(?:\.\d+)?\s+)?([A-Za-z0-9_]+[[A-Za-z0-9]]+)'
-    else:
-        pattern = r'(?:\d+(?:\.\d+)?\s+)?([A-Za-z0-9_]+_[A-Za-z0-9]+)'
-    matches = re.findall(pattern, reaction_formula)
-    metabolites.update(matches)
-    return metabolites
+    pattern = re.compile(
+        r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))'              # confine a sinistra
+        r'(?:\d+(?:\.\d+)?\s*)?'                                   # coefficiente opzionale
+        r'([A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))'       # metabolita + compartimento
+    )
+    return {m.group(1) for m in pattern.finditer(reaction_formula)}
 
 
 def extract_compartment_from_metabolite(metabolite_id: str) -> str: