changeset 501:9bfd1ec3ae6f draft

Uploaded
author francesco_lapi
date Tue, 30 Sep 2025 17:06:37 +0000
parents 4e7e67693ce7
children 054c872e3880
files COBRAxy/utils/model_utils.py
diffstat 1 files changed, 53 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/COBRAxy/utils/model_utils.py	Tue Sep 30 16:39:30 2025 +0000
+++ b/COBRAxy/utils/model_utils.py	Tue Sep 30 17:06:37 2025 +0000
@@ -279,7 +279,18 @@
         cobra.Model: The constructed COBRApy model.
     """
     
-    df = pd.read_csv(csv_path, sep='\t')
+    # Try to detect separator
+    with open(csv_path, 'r') as f:
+        first_line = f.readline()
+        sep = '\t' if '\t' in first_line else ','
+    
+    df = pd.read_csv(csv_path, sep=sep)
+    
+    # Check required columns
+    required_cols = ['ReactionID', 'Formula']
+    missing_cols = [col for col in required_cols if col not in df.columns]
+    if missing_cols:
+        raise ValueError(f"Missing required columns: {missing_cols}. Available columns: {list(df.columns)}")
     
     model = cobraModel(model_id)
     
@@ -387,8 +398,8 @@
     """
     pattern = re.compile(
         r'(?:^|(?<=\s)|(?<=\+)|(?<=,)|(?<==)|(?<=:))'              # left boundary (start, space, +, comma, =, :)
-        r'(?:\d+(?:\.\d+)?\s*)?'                                   # optional coefficient
-        r'([A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))'       # metabolite + compartment
+        r'(?:\d+(?:\.\d+)?\s+)?'                                   # optional coefficient (requires space after)
+        r'([A-Za-z0-9][A-Za-z0-9_]*(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+))'  # metabolite + compartment (can start with number)
     )
     return {m.group(1) for m in pattern.finditer(reaction_formula)}
 
@@ -407,22 +418,28 @@
     """Parse a reaction formula and set metabolites with their coefficients."""
 
     if '<=>' in formula:
-        left, right = formula.split('<=>')
+        parts = formula.split('<=>')
         reversible = True
     elif '<--' in formula:
-        left, right = formula.split('<--')
+        parts = formula.split('<--')
         reversible = False
     elif '-->' in formula:
-        left, right = formula.split('-->')
+        parts = formula.split('-->')
         reversible = False
     elif '<-' in formula:
-        left, right = formula.split('<-')
+        parts = formula.split('<-')
         reversible = False
     else:
         raise ValueError(f"Unrecognized reaction format: {formula}")
     
-    reactants = parse_metabolites_side(left.strip())
-    products = parse_metabolites_side(right.strip())
+    # Handle cases where one side might be empty (exchange reactions)
+    if len(parts) != 2:
+        raise ValueError(f"Invalid reaction format, expected 2 parts: {formula}")
+    
+    left, right = parts[0].strip(), parts[1].strip()
+    
+    reactants = parse_metabolites_side(left) if left else {}
+    products = parse_metabolites_side(right) if right else {}
     
     metabolites_to_add = {}
     
@@ -449,12 +466,26 @@
         if not term:
             continue
 
-        # optional coefficient + id ending with _<compartment>
-        match = re.match(r'(?:(\d+\.?\d*)\s+)?([A-Za-z0-9_]+_[a-z]+)', term)
-        if match:
-            coeff_str, met_id = match.groups()
-            coeff = float(coeff_str) if coeff_str else 1.0
-            metabolites[met_id] = coeff
+        # First check if term has space-separated coefficient and metabolite
+        parts = term.split()
+        if len(parts) == 2:
+            # Two parts: potential coefficient + metabolite
+            try:
+                coeff = float(parts[0])
+                met_id = parts[1]
+                # Verify the second part looks like a metabolite with compartment
+                if re.match(r'[A-Za-z0-9_]+(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+)', met_id):
+                    metabolites[met_id] = coeff
+                    continue
+            except ValueError:
+                pass
+        
+        # Single term - check if it's a metabolite (no coefficient)  
+        # Updated pattern to include metabolites starting with numbers
+        if re.match(r'[A-Za-z0-9][A-Za-z0-9_]*(?:\[[A-Za-z0-9]+\]|_[A-Za-z0-9]+)', term):
+            metabolites[term] = 1.0
+        else:
+            print(f"Warning: Could not parse metabolite term: '{term}'")
 
     return metabolites
 
@@ -487,20 +518,24 @@
 
 def set_medium_from_data(model: cobraModel, df: pd.DataFrame):
     """Set the medium based on the 'InMedium' column in the dataframe."""
+    if 'InMedium' not in df.columns:
+        print("No 'InMedium' column found, skipping medium setup")
+        return
+        
     medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist()
     
     medium_dict = {}
     for rxn_id in medium_reactions:
         if rxn_id in [r.id for r in model.reactions]:
             reaction = model.reactions.get_by_id(rxn_id)
-            if reaction.lower_bound < 0:  # Solo reazioni di uptake
+            if reaction.lower_bound < 0: 
                 medium_dict[rxn_id] = abs(reaction.lower_bound)
     
     if medium_dict:
         model.medium = medium_dict
         print(f"Medium set with {len(medium_dict)} components")
-
-
+    else:
+        print("No medium components found")
 def validate_model(model: cobraModel) -> Dict[str, any]:
     """Validate the model and return basic statistics."""
     validation = {