456
|
1 """
|
|
2 CBS backend utilities using GLPK for constraint-based sampling.
|
|
3
|
|
4 This module builds and solves LP problems from COBRA models, supports random
|
|
5 objective function generation (CBS), and provides both swiglpk-based and
|
|
6 COBRApy-based sampling fallbacks.
|
|
7 """
|
381
|
8 from swiglpk import *
|
|
9 import random
|
|
10 import pandas as pd
|
|
11 import numpy as np
|
|
12 import cobra as cb
|
|
13
|
|
14 # Initialize LP problem
|
|
15 def initialize_lp_problem(S):
|
456
|
16 """
|
|
17 Prepare sparse matrix structures for GLPK given a stoichiometric matrix.
|
|
18
|
|
19 Args:
|
|
20 S: Stoichiometric matrix (DOK or similar) as returned by cobra.util.create_stoichiometric_matrix.
|
|
21
|
|
22 Returns:
|
|
23 tuple: (len_vector, values, indexes, ia, ja, ar, nrows, ncol)
|
|
24 - len_vector: number of non-zero entries
|
|
25 - values: list of non-zero values
|
|
26 - indexes: list of (row, col) indices for non-zero entries
|
|
27 - ia, ja, ar: GLPK-ready arrays for the sparse matrix
|
|
28 - nrows, ncol: matrix shape
|
|
29 """
|
381
|
30
|
|
31 len_vector=len(S.keys())
|
|
32 values=list(S.values())
|
|
33 indexes=list(S.keys())
|
|
34 ia = intArray(len_vector+1);
|
|
35 ja = intArray(len_vector+1);
|
|
36 ar = doubleArray(len_vector+1);
|
|
37
|
|
38 i=0
|
|
39 ind_row=[indexes[i][0]+1 for i in range(0, len(values) )]
|
|
40 ind_col=[indexes[i][1]+1 for i in range(0, len(values) )]
|
|
41 for i in range(1, len(values) + 1):
|
|
42 ia[i]=ind_row[i-1]
|
|
43 ja[i]=ind_col[i-1]
|
|
44 ar[i] = values[i-1]
|
|
45
|
|
46 nrows=S.shape[0]
|
|
47 ncol=S.shape[1]
|
|
48
|
|
49 return len_vector, values, indexes, ia, ja, ar, nrows, ncol
|
|
50
|
|
51
|
|
52
|
|
53 def create_and_solve_lp_problem(lb,ub,nrows, ncol, len_vector, ia, ja, ar,
|
|
54 obj_coefs,reactions,return_lp=False):
|
456
|
55 """
|
|
56 Create and solve a GLPK LP problem for a metabolic model.
|
|
57
|
|
58 Args:
|
|
59 lb, ub: Lower/upper bounds per reaction (lists of floats).
|
|
60 nrows, ncol: Dimensions of the S matrix.
|
|
61 len_vector, ia, ja, ar: Sparse matrix data prepared for GLPK.
|
|
62 obj_coefs: Objective function coefficients (list of floats).
|
|
63 reactions: Reaction identifiers (list of str), used for output mapping.
|
|
64 return_lp: If True, also return the GLPK problem object (caller must delete).
|
|
65
|
|
66 Returns:
|
|
67 tuple: (fluxes, Z) or (fluxes, Z, lp) if return_lp=True.
|
|
68 """
|
381
|
69
|
|
70
|
|
71 lp = glp_create_prob();
|
|
72 glp_set_prob_name(lp, "sample");
|
|
73 glp_set_obj_dir(lp, GLP_MAX);
|
|
74 glp_add_rows(lp, nrows);
|
|
75 eps = 1e-16
|
|
76 for i in range(nrows):
|
|
77 glp_set_row_name(lp, i+1, "constrain_"+str(i+1));
|
|
78 glp_set_row_bnds(lp, i+1, GLP_FX, 0.0, 0.0);
|
|
79 glp_add_cols(lp, ncol);
|
|
80 for i in range(ncol):
|
|
81 glp_set_col_name(lp, i+1, "flux_"+str(i+1));
|
|
82 glp_set_col_bnds(lp, i+1, GLP_DB,lb[i]-eps,ub[i]+eps);
|
|
83 glp_load_matrix(lp, len_vector, ia, ja, ar);
|
|
84
|
|
85 try:
|
|
86 fluxes,Z=solve_lp_problem(lp,obj_coefs,reactions)
|
|
87 if return_lp:
|
|
88 return fluxes,Z,lp
|
|
89 else:
|
|
90 glp_delete_prob(lp);
|
|
91 return fluxes,Z
|
|
92 except Exception as e:
|
|
93 glp_delete_prob(lp)
|
|
94 raise Exception(e)
|
|
95
|
|
96
|
|
97 def solve_lp_problem(lp,obj_coefs,reactions):
|
456
|
98 """
|
|
99 Configure and solve an LP with GLPK using provided objective coefficients.
|
|
100
|
|
101 Args:
|
|
102 lp: GLPK problem handle.
|
|
103 obj_coefs: Objective coefficients.
|
|
104 reactions: Reaction identifiers (unused in computation; length used for output).
|
|
105
|
|
106 Returns:
|
|
107 tuple: (fluxes, Z) where fluxes is a list of primal column values and Z is the objective value.
|
|
108 """
|
381
|
109
|
|
110 # Set the coefficients of the objective function
|
|
111 i=1
|
|
112 for ind_coef in obj_coefs:
|
|
113 glp_set_obj_coef(lp, i, ind_coef);
|
|
114 i+=1
|
|
115
|
|
116 # Initialize the parameters
|
|
117 params=glp_smcp()
|
|
118 params.presolve=GLP_ON
|
|
119 params.msg_lev = GLP_MSG_ERR
|
|
120 params.tm_lim=4000
|
|
121 glp_init_smcp(params)
|
|
122
|
|
123 glp_term_out(GLP_OFF)
|
|
124
|
|
125 try:
|
|
126
|
|
127 # Solve the problem
|
|
128 glp_scale_prob(lp,GLP_SF_AUTO)
|
|
129
|
|
130 value=glp_simplex(lp, params)
|
|
131
|
|
132 Z = glp_get_obj_val(lp);
|
|
133
|
|
134 if value == 0:
|
|
135 fluxes = []
|
|
136 for i in range(len(reactions)): fluxes.append(glp_get_col_prim(lp, i+1))
|
|
137 return fluxes,Z
|
|
138 else:
|
|
139 raise Exception("error in LP problem. Problem:",str(value))
|
|
140 except Exception as e:
|
|
141 # Re-enable terminal output for error reporting
|
|
142 glp_term_out(GLP_ON)
|
|
143 raise Exception(e)
|
|
144 finally:
|
|
145 # Re-enable terminal output after solving
|
|
146 glp_term_out(GLP_ON)
|
|
147
|
|
148 def create_lp_structure(model):
|
456
|
149 """
|
|
150 Extract the LP structure (S matrix, bounds, objective) from a COBRA model.
|
|
151
|
|
152 Args:
|
|
153 model (cobra.Model): The COBRA model.
|
|
154
|
|
155 Returns:
|
|
156 tuple: (S, lb, ub, coefs_obj, reactions)
|
|
157 """
|
381
|
158
|
|
159 reactions=[el.id for el in model.reactions]
|
|
160 coefs_obj=[reaction.objective_coefficient for reaction in model.reactions]
|
|
161
|
|
162 # Lower and upper bounds
|
|
163 lb=[reaction.lower_bound for reaction in model.reactions]
|
|
164 ub=[reaction.upper_bound for reaction in model.reactions]
|
|
165
|
|
166 # Create S matrix
|
|
167 S=cb.util.create_stoichiometric_matrix(model,array_type="dok")
|
|
168
|
|
169 return S,lb,ub,coefs_obj,reactions
|
|
170
|
|
171 def randomObjectiveFunctionSampling(model, nsample, coefficients_df, df_sample):
|
456
|
172 """
|
|
173 Sample fluxes using GLPK by iterating over random objective functions.
|
|
174
|
|
175 Args:
|
|
176 model: COBRA model.
|
|
177 nsample: Number of samples to generate.
|
|
178 coefficients_df: DataFrame of objective coefficients (columns are samples; last row is type_of_problem).
|
|
179 df_sample: Output DataFrame to fill with flux samples (index: sample, columns: reactions).
|
|
180
|
|
181 Returns:
|
|
182 None
|
|
183 """
|
381
|
184
|
|
185 S,lb,ub,coefs_obj,reactions = create_lp_structure(model)
|
|
186 len_vector, values, indexes, ia, ja, ar, nrow, ncol = initialize_lp_problem(S)
|
|
187
|
|
188 for i in range(nsample):
|
|
189
|
|
190 coefs_obj=coefficients_df.iloc[:,i].values
|
|
191
|
456
|
192 if coefs_obj[-1]==1: # minimize
|
381
|
193 coefs_obj= coefs_obj[0:-1] * -1
|
|
194 else:
|
|
195 coefs_obj=coefs_obj[0:-1]
|
|
196
|
|
197 fluxes,Z = create_and_solve_lp_problem(lb,ub, nrow, ncol, len_vector,
|
|
198 ia, ja, ar, coefs_obj,reactions,return_lp=False)
|
|
199 df_sample.loc[i] = fluxes
|
456
|
200 return
|
381
|
201
|
|
202 def randomObjectiveFunctionSampling_cobrapy(model, nsample, coefficients_df, df_sample):
|
456
|
203 """
|
|
204 Fallback sampling using COBRApy's optimize with per-sample randomized objectives.
|
|
205
|
|
206 Args:
|
|
207 model: COBRA model.
|
|
208 nsample: Number of samples to generate.
|
|
209 coefficients_df: DataFrame of objective coefficients (columns are samples; last row is type_of_problem).
|
|
210 df_sample: Output DataFrame to fill with flux samples (index: sample, columns: reactions).
|
|
211
|
|
212 Returns:
|
|
213 None
|
|
214 """
|
381
|
215
|
|
216 for i in range(nsample):
|
|
217
|
|
218 dict_coeff={}
|
|
219 if(coefficients_df.iloc[-1][i]==1):
|
456
|
220 type_problem = -1 # minimize
|
381
|
221 else:
|
456
|
222 type_problem = 1 # maximize
|
381
|
223
|
|
224 for rxn in [reaction.id for reaction in model.reactions]:
|
|
225 dict_coeff[model.reactions.get_by_id(rxn)] = coefficients_df.loc[rxn][i] * type_problem
|
|
226
|
|
227 model.objective = dict_coeff
|
|
228 solution = model.optimize().fluxes
|
|
229 for rxn, flux in solution.items():
|
|
230 df_sample.loc[i][rxn] = flux
|
|
231
|
456
|
232 return
|
|
233
|
|
234 def randomObjectiveFunction(model, n_samples, df_fva, seed=0):
|
|
235 """
|
|
236 Create random objective function coefficients for CBS sampling.
|
381
|
237
|
456
|
238 The last row 'type_of_problem' encodes 0 for maximize and 1 for minimize.
|
|
239
|
|
240 Args:
|
|
241 model: COBRA model.
|
|
242 n_samples: Number of random objectives to generate.
|
|
243 df_fva: Flux Variability Analysis results with 'minimum' and 'maximum' per reaction.
|
|
244 seed: Seed for reproducibility.
|
381
|
245
|
456
|
246 Returns:
|
|
247 pandas.DataFrame: Coefficients DataFrame indexed by reaction IDs plus 'type_of_problem'.
|
|
248 """
|
|
249 # reactions = model.reactions
|
|
250 reactions = [reaction.id for reaction in model.reactions]
|
|
251 cont = seed
|
|
252 list_ex = reactions.copy()
|
|
253 list_ex.append("type_of_problem")
|
|
254 coefficients_df = pd.DataFrame(index=list_ex, columns=[str(i) for i in range(n_samples)])
|
|
255
|
|
256 for i in range(0, n_samples):
|
381
|
257
|
456
|
258 cont = cont + 1
|
|
259 random.seed(cont)
|
|
260
|
|
261 # Generate a random threshold in [0, 1]
|
|
262 threshold = random.random()
|
|
263
|
|
264 for reaction in reactions:
|
|
265
|
|
266 cont = cont + 1
|
381
|
267 random.seed(cont)
|
456
|
268
|
|
269 val = random.random()
|
381
|
270
|
456
|
271 if val > threshold:
|
|
272
|
|
273 cont = cont + 1
|
381
|
274 random.seed(cont)
|
456
|
275
|
|
276 # Coefficient in [-1, 1]
|
|
277 c = 2 * random.random() - 1
|
381
|
278
|
456
|
279 val_max = np.max([abs(df_fva.loc[reaction, "minimum"]), abs(df_fva.loc[reaction, "maximum"])])
|
|
280
|
|
281 if val_max != 0: # only if FVA is non-zero
|
|
282 coefficients_df.loc[reaction, str(i)] = c / val_max # scale by FVA
|
|
283 else:
|
|
284 coefficients_df.loc[reaction, str(i)] = 0
|
381
|
285
|
456
|
286 else:
|
|
287 coefficients_df.loc[reaction, str(i)] = 0
|
381
|
288
|
456
|
289 cont = cont + 1
|
|
290 random.seed(cont)
|
381
|
291
|
456
|
292 if random.random() < 0.5:
|
|
293 coefficients_df.loc["type_of_problem", str(i)] = 0 # maximize
|
|
294 else:
|
|
295 coefficients_df.loc["type_of_problem", str(i)] = 1 # minimize
|
|
296
|
|
297 return coefficients_df
|