cobraxy: COBRAxy/utils/CBS_backend.py comparison

comparison COBRAxy/utils/CBS_backend.py @ 456:a6e45049c1b9 draft default tip

Uploaded

author	francesco_lapi
date	Fri, 12 Sep 2025 17:28:45 +0000
parents	0a3ca20848f3
children

comparison

equal deleted inserted replaced

-:4e2bc80764b6
+:a6e45049c1b9
+"""
+CBS backend utilities using GLPK for constraint-based sampling.
+This module builds and solves LP problems from COBRA models, supports random
+objective function generation (CBS), and provides both swiglpk-based and
+COBRApy-based sampling fallbacks.
+"""
 from swiglpk import *
 import random
 import pandas as pd
 import numpy as np
 import cobra as cb
 # Initialize LP problem
 def initialize_lp_problem(S):
+"""
+Prepare sparse matrix structures for GLPK given a stoichiometric matrix.
+Args:
+S: Stoichiometric matrix (DOK or similar) as returned by cobra.util.create_stoichiometric_matrix.
+Returns:
+tuple: (len_vector, values, indexes, ia, ja, ar, nrows, ncol)
+- len_vector: number of non-zero entries
+- values: list of non-zero values
+- indexes: list of (row, col) indices for non-zero entries
+- ia, ja, ar: GLPK-ready arrays for the sparse matrix
+- nrows, ncol: matrix shape
+"""
 len_vector=len(S.keys())
 values=list(S.values())
 indexes=list(S.keys())
 ia = intArray(len_vector+1);
 return len_vector, values, indexes, ia, ja, ar, nrows, ncol
-# Solve LP problem from the structure of the metabolic model
 def create_and_solve_lp_problem(lb,ub,nrows, ncol, len_vector, ia, ja, ar,
 obj_coefs,reactions,return_lp=False):
+"""
+Create and solve a GLPK LP problem for a metabolic model.
+Args:
+lb, ub: Lower/upper bounds per reaction (lists of floats).
+nrows, ncol: Dimensions of the S matrix.
+len_vector, ia, ja, ar: Sparse matrix data prepared for GLPK.
+obj_coefs: Objective function coefficients (list of floats).
+reactions: Reaction identifiers (list of str), used for output mapping.
+return_lp: If True, also return the GLPK problem object (caller must delete).
+Returns:
+tuple: (fluxes, Z) or (fluxes, Z, lp) if return_lp=True.
+"""
 lp = glp_create_prob();
 glp_set_prob_name(lp, "sample");
 glp_set_obj_dir(lp, GLP_MAX);
 except Exception as e:
 glp_delete_prob(lp)
 raise Exception(e)
-# Solve LP problem from the structure of the metabolic model
 def solve_lp_problem(lp,obj_coefs,reactions):
+"""
+Configure and solve an LP with GLPK using provided objective coefficients.
+Args:
+lp: GLPK problem handle.
+obj_coefs: Objective coefficients.
+reactions: Reaction identifiers (unused in computation; length used for output).
+Returns:
+tuple: (fluxes, Z) where fluxes is a list of primal column values and Z is the objective value.
+"""
 # Set the coefficients of the objective function
 i=1
 for ind_coef in obj_coefs:
 glp_set_obj_coef(lp, i, ind_coef);
 raise Exception(e)
 finally:
 # Re-enable terminal output after solving
 glp_term_out(GLP_ON)
-# Create LP structure
 def create_lp_structure(model):
+"""
+Extract the LP structure (S matrix, bounds, objective) from a COBRA model.
+Args:
+model (cobra.Model): The COBRA model.
+Returns:
+tuple: (S, lb, ub, coefs_obj, reactions)
+"""
 reactions=[el.id for el in model.reactions]
 coefs_obj=[reaction.objective_coefficient for reaction in model.reactions]
 # Lower and upper bounds
 # Create S matrix
 S=cb.util.create_stoichiometric_matrix(model,array_type="dok")
 return S,lb,ub,coefs_obj,reactions
-# CBS sampling interface
 def randomObjectiveFunctionSampling(model, nsample, coefficients_df, df_sample):
+"""
+Sample fluxes using GLPK by iterating over random objective functions.
+Args:
+model: COBRA model.
+nsample: Number of samples to generate.
+coefficients_df: DataFrame of objective coefficients (columns are samples; last row is type_of_problem).
+df_sample: Output DataFrame to fill with flux samples (index: sample, columns: reactions).
+Returns:
+None
+"""
 S,lb,ub,coefs_obj,reactions = create_lp_structure(model)
 len_vector, values, indexes, ia, ja, ar, nrow, ncol = initialize_lp_problem(S)
 for i in range(nsample):
 coefs_obj=coefficients_df.iloc[:,i].values
-if coefs_obj[-1]==1: #minimize
+if coefs_obj[-1]==1: # minimize
 coefs_obj= coefs_obj[0:-1] * -1
 else:
 coefs_obj=coefs_obj[0:-1]
 fluxes,Z = create_and_solve_lp_problem(lb,ub, nrow, ncol, len_vector,
 ia, ja, ar, coefs_obj,reactions,return_lp=False)
 df_sample.loc[i] = fluxes
-pass
+return
 def randomObjectiveFunctionSampling_cobrapy(model, nsample, coefficients_df, df_sample):
+"""
+Fallback sampling using COBRApy's optimize with per-sample randomized objectives.
+Args:
+model: COBRA model.
+nsample: Number of samples to generate.
+coefficients_df: DataFrame of objective coefficients (columns are samples; last row is type_of_problem).
+df_sample: Output DataFrame to fill with flux samples (index: sample, columns: reactions).
+Returns:
+None
+"""
 for i in range(nsample):
 dict_coeff={}
 if(coefficients_df.iloc[-1][i]==1):
-type_problem = -1 #minimize
+type_problem = -1 # minimize
 else:
-type_problem = 1
+type_problem = 1 # maximize
 for rxn in [reaction.id for reaction in model.reactions]:
 dict_coeff[model.reactions.get_by_id(rxn)] = coefficients_df.loc[rxn][i] * type_problem
 model.objective = dict_coeff
 solution =  model.optimize().fluxes
 for rxn, flux in solution.items():
 df_sample.loc[i][rxn] = flux
-pass
+return
-# Create random coefficients for CBS
 def randomObjectiveFunction(model, n_samples, df_fva, seed=0):
+"""
+Create random objective function coefficients for CBS sampling.
-#reactions = model.reactions
-reactions = [reaction.id for reaction in model.reactions]
+The last row 'type_of_problem' encodes 0 for maximize and 1 for minimize.
-cont=seed
-list_ex=reactions.copy()
+Args:
-list_ex.append("type_of_problem")
+model: COBRA model.
-coefficients_df = pd.DataFrame(index=list_ex,columns=[str(i) for i in range(n_samples)])
+n_samples: Number of random objectives to generate.
+df_fva: Flux Variability Analysis results with 'minimum' and 'maximum' per reaction.
-for i in range(0, n_samples):
+seed: Seed for reproducibility.
-cont=cont+1
+Returns:
+pandas.DataFrame: Coefficients DataFrame indexed by reaction IDs plus 'type_of_problem'.
+"""
+# reactions = model.reactions
+reactions = [reaction.id for reaction in model.reactions]
+cont = seed
+list_ex = reactions.copy()
+list_ex.append("type_of_problem")
+coefficients_df = pd.DataFrame(index=list_ex, columns=[str(i) for i in range(n_samples)])
+for i in range(0, n_samples):
+cont = cont + 1
+random.seed(cont)
+# Generate a random threshold in [0, 1]
+threshold = random.random()
+for reaction in reactions:
+cont = cont + 1
 random.seed(cont)
-# Genera un numero casuale tra 0 e 1
+val = random.random()
-threshold = random.random() #coefficiente tra 0 e 1
+if val > threshold:
-for reaction in reactions:
+cont = cont + 1
-cont=cont+1
 random.seed(cont)
-val=random.random()
+# Coefficient in [-1, 1]
+c = 2 * random.random() - 1
-if val>threshold:
+val_max = np.max([abs(df_fva.loc[reaction, "minimum"]), abs(df_fva.loc[reaction, "maximum"])])
-cont=cont+1
-random.seed(cont)
+if val_max != 0: # only if FVA is non-zero
+coefficients_df.loc[reaction, str(i)] = c / val_max # scale by FVA
-c=2*random.random()-1 #coefficiente tra -1 e 1
-val_max = np.max([abs(df_fva.loc[reaction,"minimum"]), abs(df_fva.loc[reaction,"maximum"])])
-if val_max!=0: #solo se la fva è diversa da zero
-coefficients_df.loc[reaction,str(i)] = c/val_max #divido per la fva
-else:
-coefficients_df.loc[reaction,str(i)] = 0
 else:
-coefficients_df.loc[reaction,str(i)] = 0
+coefficients_df.loc[reaction, str(i)] = 0
-cont=cont+1
-random.seed(cont)
-if random.random()<0.5:
-coefficients_df.loc["type_of_problem",str(i)] = 0 #maximize
 else:
-coefficients_df.loc["type_of_problem",str(i)] = 1 #minimize
+coefficients_df.loc[reaction, str(i)] = 0
-return coefficients_df
+cont = cont + 1
+random.seed(cont)
+if random.random() < 0.5:
+coefficients_df.loc["type_of_problem", str(i)] = 0 # maximize
+else:
+coefficients_df.loc["type_of_problem", str(i)] = 1 # minimize
+return coefficients_df

Mercurial > repos > bimib > cobraxy

comparison COBRAxy/utils/CBS_backend.py @ 456:a6e45049c1b9 draft default tip