comparison COBRAxy/flux_simulation.py @ 489:97eea560a10f draft

Uploaded
author francesco_lapi
date Mon, 29 Sep 2025 10:33:26 +0000
parents c561c060a55f
children
comparison
equal deleted inserted replaced
488:e0bcc61b2feb 489:97eea560a10f
1 """
2 Flux sampling and analysis utilities for COBRA models.
3
4 This script supports two modes:
5 - Mode 1 (model_and_bounds=True): load a base model and apply bounds from
6 separate files before sampling.
7 - Mode 2 (model_and_bounds=False): load complete models and sample directly.
8
9 Sampling algorithms supported: OPTGP and CBS. Outputs include flux samples
10 and optional analyses (pFBA, FVA, sensitivity), saved as tabular files.
11 """
12
1 import argparse 13 import argparse
2 import utils.general_utils as utils 14 import utils.general_utils as utils
3 from typing import Optional, List 15 from typing import List
4 import os 16 import os
17 import pandas as pd
5 import numpy as np 18 import numpy as np
6 import pandas as pd
7 import cobra 19 import cobra
8 import utils.CBS_backend as CBS_backend 20 import utils.CBS_backend as CBS_backend
9 from joblib import Parallel, delayed, cpu_count 21 from joblib import Parallel, delayed, cpu_count
10 from cobra.sampling import OptGPSampler 22 from cobra.sampling import OptGPSampler
11 import sys 23 import sys
24 import utils.model_utils as model_utils
25
12 26
13 ################################# process args ############################### 27 ################################# process args ###############################
14 def process_args(args :List[str] = None) -> argparse.Namespace: 28 def process_args(args: List[str] = None) -> argparse.Namespace:
15 """ 29 """
16 Processes command-line arguments. 30 Processes command-line arguments.
17 31
18 Args: 32 Args:
19 args (list): List of command-line arguments. 33 args (list): List of command-line arguments.
20 34
21 Returns: 35 Returns:
22 Namespace: An object containing parsed arguments. 36 Namespace: An object containing parsed arguments.
23 """ 37 """
24 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', 38 parser = argparse.ArgumentParser(usage='%(prog)s [options]',
25 description = 'process some value\'s') 39 description='process some value\'s')
26 40
27 parser.add_argument('-ol', '--out_log', 41 parser.add_argument("-mo", "--model_upload", type=str,
28 help = "Output log") 42 help="path to input file with custom rules, if provided")
43
44 parser.add_argument("-mab", "--model_and_bounds", type=str,
45 choices=['True', 'False'],
46 required=True,
47 help="upload mode: True for model+bounds, False for complete models")
48
49 parser.add_argument("-ens", "--sampling_enabled", type=str,
50 choices=['true', 'false'],
51 required=True,
52 help="enable sampling: 'true' for sampling, 'false' for no sampling")
53
54 parser.add_argument('-ol', '--out_log',
55 help="Output log")
29 56
30 parser.add_argument('-td', '--tool_dir', 57 parser.add_argument('-td', '--tool_dir',
31 type = str, 58 type=str,
32 required = True, 59 required=True,
33 help = 'your tool directory') 60 help='your tool directory')
34 61
35 parser.add_argument('-in', '--input', 62 parser.add_argument('-in', '--input',
36 required = True, 63 required=True,
37 type=str, 64 type=str,
38 help = 'inputs bounds') 65 help='input bounds files or complete model files')
39 66
40 parser.add_argument('-ni', '--names', 67 parser.add_argument('-ni', '--name',
41 required = True, 68 required=True,
42 type=str, 69 type=str,
43 help = 'cell names') 70 help='cell names')
44
45 parser.add_argument(
46 '-ms', '--model_selector',
47 type = utils.Model, default = utils.Model.ENGRO2, choices = [utils.Model.ENGRO2, utils.Model.Custom],
48 help = 'chose which type of model you want use')
49
50 parser.add_argument("-mo", "--model", type = str)
51
52 parser.add_argument("-mn", "--model_name", type = str, help = "custom mode name")
53 71
54 parser.add_argument('-a', '--algorithm', 72 parser.add_argument('-a', '--algorithm',
55 type = str, 73 type=str,
56 choices = ['OPTGP', 'CBS'], 74 choices=['OPTGP', 'CBS'],
57 required = True, 75 required=True,
58 help = 'choose sampling algorithm') 76 help='choose sampling algorithm')
59 77
60 parser.add_argument('-th', '--thinning', 78 parser.add_argument('-th', '--thinning',
61 type = int, 79 type=int,
62 default= 100, 80 default=100,
81 required=True,
82 help='choose thinning')
83
84 parser.add_argument('-ns', '--n_samples',
85 type=int,
86 required=True,
87 help='choose how many samples (set to 0 for optimization only)')
88
89 parser.add_argument('-sd', '--seed',
90 type=int,
91 required=True,
92 help='seed for random number generation')
93
94 parser.add_argument('-nb', '--n_batches',
95 type=int,
96 required=True,
97 help='choose how many batches')
98
99 parser.add_argument('-opt', '--perc_opt',
100 type=float,
101 default=0.9,
63 required=False, 102 required=False,
64 help = 'choose thinning') 103 help='choose the fraction of optimality for FVA (0-1)')
65 104
66 parser.add_argument('-ns', '--n_samples', 105 parser.add_argument('-ot', '--output_type',
67 type = int, 106 type=str,
68 required = True, 107 required=True,
69 help = 'choose how many samples') 108 help='output type for sampling results')
70 109
71 parser.add_argument('-sd', '--seed', 110 parser.add_argument('-ota', '--output_type_analysis',
72 type = int, 111 type=str,
73 required = True, 112 required=False,
74 help = 'seed') 113 help='output type analysis (optimization methods)')
75 114
76 parser.add_argument('-nb', '--n_batches', 115 parser.add_argument('-idop', '--output_path',
77 type = int, 116 type=str,
78 required = True,
79 help = 'choose how many batches')
80
81 parser.add_argument('-ot', '--output_type',
82 type = str,
83 required = True,
84 help = 'output type')
85
86 parser.add_argument('-ota', '--output_type_analysis',
87 type = str,
88 required = False,
89 help = 'output type analysis')
90
91 parser.add_argument('-idop', '--output_path',
92 type = str,
93 default='flux_simulation', 117 default='flux_simulation',
94 help = 'output path for maps') 118 help='output path for maps')
95 119
96 ARGS = parser.parse_args(args) 120 ARGS = parser.parse_args(args)
97 return ARGS 121 return ARGS
98
99 ########################### warning ########################################### 122 ########################### warning ###########################################
100 def warning(s :str) -> None: 123 def warning(s :str) -> None:
101 """ 124 """
102 Log a warning message to an output log file and print it to the console. 125 Log a warning message to an output log file and print it to the console.
103 126
111 log.write(s + "\n\n") 134 log.write(s + "\n\n")
112 print(s) 135 print(s)
113 136
114 137
115 def write_to_file(dataset: pd.DataFrame, name: str, keep_index:bool=False)->None: 138 def write_to_file(dataset: pd.DataFrame, name: str, keep_index:bool=False)->None:
139 """
140 Write a DataFrame to a TSV file under ARGS.output_path with a given base name.
141
142 Args:
143 dataset: The DataFrame to write.
144 name: Base file name (without extension).
145 keep_index: Whether to keep the DataFrame index in the file.
146
147 Returns:
148 None
149 """
116 dataset.index.name = 'Reactions' 150 dataset.index.name = 'Reactions'
117 dataset.to_csv(ARGS.output_path + "/" + name + ".csv", sep = '\t', index = keep_index) 151 dataset.to_csv(ARGS.output_path + "/" + name + ".csv", sep = '\t', index = keep_index)
118 152
119 ############################ dataset input #################################### 153 ############################ dataset input ####################################
120 def read_dataset(data :str, name :str) -> pd.DataFrame: 154 def read_dataset(data :str, name :str) -> pd.DataFrame:
140 sys.exit('Execution aborted: wrong format of ' + name + '\n') 174 sys.exit('Execution aborted: wrong format of ' + name + '\n')
141 return dataset 175 return dataset
142 176
143 177
144 178
145 def OPTGP_sampler(model:cobra.Model, model_name:str, n_samples:int=1000, thinning:int=100, n_batches:int=1, seed:int=0)-> None: 179 def OPTGP_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, thinning: int = 100, n_batches: int = 1, seed: int = 0) -> None:
146 """ 180 """
147 Samples from the OPTGP (Optimal Global Perturbation) algorithm and saves the results to CSV files. 181 Samples from the OPTGP (Optimal Global Perturbation) algorithm and saves the results to CSV files.
148 182
149 Args: 183 Args:
150 model (cobra.Model): The COBRA model to sample from. 184 model (cobra.Model): The COBRA model to sample from.
151 model_name (str): The name of the model, used in naming output files. 185 model_name (str): The name of the model, used in naming output files.
152 n_samples (int, optional): Number of samples per batch. Default is 1000. 186 n_samples (int, optional): Number of samples per batch. Default is 1000.
153 thinning (int, optional): Thinning parameter for the sampler. Default is 100. 187 thinning (int, optional): Thinning parameter for the sampler. Default is 100.
154 n_batches (int, optional): Number of batches to run. Default is 1. 188 n_batches (int, optional): Number of batches to run. Default is 1.
155 seed (int, optional): Random seed for reproducibility. Default is 0. 189 seed (int, optional): Random seed for reproducibility. Default is 0.
156 190
157 Returns: 191 Returns:
158 None 192 None
159 """ 193 """
160 194 import numpy as np
161 for i in range(0, n_batches): 195
196 # Get reaction IDs for consistent column ordering
197 reaction_ids = [rxn.id for rxn in model.reactions]
198
199 # Sample and save each batch as numpy file
200 for i in range(n_batches):
162 optgp = OptGPSampler(model, thinning, seed) 201 optgp = OptGPSampler(model, thinning, seed)
163 samples = optgp.sample(n_samples) 202 samples = optgp.sample(n_samples)
164 samples.to_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv', index=False) 203
165 seed+=1 204 # Save as numpy array (more memory efficient)
166 samplesTotal = pd.DataFrame() 205 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy"
167 for i in range(0, n_batches): 206 np.save(batch_filename, samples.to_numpy())
168 samples_batch = pd.read_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv') 207
169 samplesTotal = pd.concat([samplesTotal, samples_batch], ignore_index = True) 208 seed += 1
170 209
210 # Merge all batches into a single DataFrame
211 all_samples = []
212
213 for i in range(n_batches):
214 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy"
215 batch_data = np.load(batch_filename, allow_pickle=True)
216 all_samples.append(batch_data)
217
218 # Concatenate all batches
219 samplesTotal_array = np.vstack(all_samples)
220
221 # Convert back to DataFrame with proper column names
222 samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids)
223
224 # Save the final merged result as CSV
171 write_to_file(samplesTotal.T, model_name, True) 225 write_to_file(samplesTotal.T, model_name, True)
172 226
173 for i in range(0, n_batches): 227 # Clean up temporary numpy files
174 os.remove(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv') 228 for i in range(n_batches):
175 pass 229 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy"
176 230 if os.path.exists(batch_filename):
177 231 os.remove(batch_filename)
178 def CBS_sampler(model:cobra.Model, model_name:str, n_samples:int=1000, n_batches:int=1, seed:int=0)-> None: 232
233
234 def CBS_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, n_batches: int = 1, seed: int = 0) -> None:
179 """ 235 """
180 Samples using the CBS (Constraint-based Sampling) algorithm and saves the results to CSV files. 236 Samples using the CBS (Constraint-based Sampling) algorithm and saves the results to CSV files.
181 237
182 Args: 238 Args:
183 model (cobra.Model): The COBRA model to sample from. 239 model (cobra.Model): The COBRA model to sample from.
184 model_name (str): The name of the model, used in naming output files. 240 model_name (str): The name of the model, used in naming output files.
185 n_samples (int, optional): Number of samples per batch. Default is 1000. 241 n_samples (int, optional): Number of samples per batch. Default is 1000.
186 n_batches (int, optional): Number of batches to run. Default is 1. 242 n_batches (int, optional): Number of batches to run. Default is 1.
187 seed (int, optional): Random seed for reproducibility. Default is 0. 243 seed (int, optional): Random seed for reproducibility. Default is 0.
188 244
189 Returns: 245 Returns:
190 None 246 None
191 """ 247 """
192 248 import numpy as np
193 df_FVA = cobra.flux_analysis.flux_variability_analysis(model,fraction_of_optimum=0).round(6) 249
194 250 # Get reaction IDs for consistent column ordering
195 df_coefficients = CBS_backend.randomObjectiveFunction(model, n_samples*n_batches, df_FVA, seed=seed) 251 reaction_ids = [reaction.id for reaction in model.reactions]
196 252
197 for i in range(0, n_batches): 253 # Perform FVA analysis once for all batches
198 samples = pd.DataFrame(columns =[reaction.id for reaction in model.reactions], index = range(n_samples)) 254 df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0).round(6)
255
256 # Generate random objective functions for all samples across all batches
257 df_coefficients = CBS_backend.randomObjectiveFunction(model, n_samples * n_batches, df_FVA, seed=seed)
258
259 # Sample and save each batch as numpy file
260 for i in range(n_batches):
261 samples = pd.DataFrame(columns=reaction_ids, index=range(n_samples))
262
199 try: 263 try:
200 CBS_backend.randomObjectiveFunctionSampling(model, n_samples, df_coefficients.iloc[:,i*n_samples:(i+1)*n_samples], samples) 264 CBS_backend.randomObjectiveFunctionSampling(
265 model,
266 n_samples,
267 df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples],
268 samples
269 )
201 except Exception as e: 270 except Exception as e:
202 utils.logWarning( 271 utils.logWarning(
203 "Warning: GLPK solver has failed for " + model_name + ". Trying with COBRA interface. Error:" + str(e), 272 f"Warning: GLPK solver has failed for {model_name}. Trying with COBRA interface. Error: {str(e)}",
204 ARGS.out_log) 273 ARGS.out_log
205 CBS_backend.randomObjectiveFunctionSampling_cobrapy(model, n_samples, df_coefficients.iloc[:,i*n_samples:(i+1)*n_samples], 274 )
206 samples) 275 CBS_backend.randomObjectiveFunctionSampling_cobrapy(
207 utils.logWarning(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv', ARGS.out_log) 276 model,
208 samples.to_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv', index=False) 277 n_samples,
209 278 df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples],
210 samplesTotal = pd.DataFrame() 279 samples
211 for i in range(0, n_batches): 280 )
212 samples_batch = pd.read_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv') 281
213 samplesTotal = pd.concat([samplesTotal, samples_batch], ignore_index = True) 282 # Save as numpy array (more memory efficient)
214 283 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy"
284 utils.logWarning(batch_filename, ARGS.out_log)
285 np.save(batch_filename, samples.to_numpy())
286
287 # Merge all batches into a single DataFrame
288 all_samples = []
289
290 for i in range(n_batches):
291 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy"
292 batch_data = np.load(batch_filename, allow_pickle=True)
293 all_samples.append(batch_data)
294
295 # Concatenate all batches
296 samplesTotal_array = np.vstack(all_samples)
297
298 # Convert back to DataFrame with proper column namesq
299 samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids)
300
301 # Save the final merged result as CSV
215 write_to_file(samplesTotal.T, model_name, True) 302 write_to_file(samplesTotal.T, model_name, True)
216 303
217 for i in range(0, n_batches): 304 # Clean up temporary numpy files
218 os.remove(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv') 305 for i in range(n_batches):
219 pass 306 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy"
220 307 if os.path.exists(batch_filename):
221 308 os.remove(batch_filename)
222 def model_sampler(model_input_original:cobra.Model, bounds_path:str, cell_name:str)-> List[pd.DataFrame]: 309
223 """ 310
224 Prepares the model with bounds from the dataset and performs sampling and analysis based on the selected algorithm. 311
312 def model_sampler_with_bounds(model_input_original: cobra.Model, bounds_path: str, cell_name: str) -> List[pd.DataFrame]:
313 """
314 MODE 1: Prepares the model with bounds from separate bounds file and performs sampling.
225 315
226 Args: 316 Args:
227 model_input_original (cobra.Model): The original COBRA model. 317 model_input_original (cobra.Model): The original COBRA model.
228 bounds_path (str): Path to the CSV file containing the bounds dataset. 318 bounds_path (str): Path to the CSV file containing the bounds dataset.
229 cell_name (str): Name of the cell, used to generate filenames for output. 319 cell_name (str): Name of the cell, used to generate filenames for output.
232 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. 322 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results.
233 """ 323 """
234 324
235 model_input = model_input_original.copy() 325 model_input = model_input_original.copy()
236 bounds_df = read_dataset(bounds_path, "bounds dataset") 326 bounds_df = read_dataset(bounds_path, "bounds dataset")
327
328 # Apply bounds to model
237 for rxn_index, row in bounds_df.iterrows(): 329 for rxn_index, row in bounds_df.iterrows():
238 model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound 330 try:
239 model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound 331 model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound
240 332 model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound
241 333 except KeyError:
242 if ARGS.algorithm == 'OPTGP': 334 warning(f"Warning: Reaction {rxn_index} not found in model. Skipping.")
243 OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed) 335
244 336 return perform_sampling_and_analysis(model_input, cell_name)
245 elif ARGS.algorithm == 'CBS': 337
246 CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed) 338
247 339 def perform_sampling_and_analysis(model_input: cobra.Model, cell_name: str) -> List[pd.DataFrame]:
248 df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types) 340 """
249 341 Common function to perform sampling and analysis on a prepared model.
250 if("fluxes" not in ARGS.output_types): 342
251 os.remove(ARGS.output_path + "/" + cell_name + '.csv') 343 Args:
344 model_input (cobra.Model): The prepared COBRA model with bounds applied.
345 cell_name (str): Name of the cell, used to generate filenames for output.
346
347 Returns:
348 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results.
349 """
252 350
253 returnList = [] 351 returnList = []
254 returnList.append(df_mean) 352
255 returnList.append(df_median) 353 if ARGS.sampling_enabled == "true":
256 returnList.append(df_quantiles) 354
355 if ARGS.algorithm == 'OPTGP':
356 OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed)
357 elif ARGS.algorithm == 'CBS':
358 CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed)
359
360 df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types)
361
362 if("fluxes" not in ARGS.output_types):
363 os.remove(ARGS.output_path + "/" + cell_name + '.csv')
364
365 returnList = [df_mean, df_median, df_quantiles]
257 366
258 df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis) 367 df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis)
259 368
260 if("pFBA" in ARGS.output_type_analysis): 369 if("pFBA" in ARGS.output_type_analysis):
261 returnList.append(df_pFBA) 370 returnList.append(df_pFBA)
314 423
315 return df_mean, df_median, df_quantiles 424 return df_mean, df_median, df_quantiles
316 425
317 def fluxes_analysis(model:cobra.Model, model_name:str, output_types:List)-> List[pd.DataFrame]: 426 def fluxes_analysis(model:cobra.Model, model_name:str, output_types:List)-> List[pd.DataFrame]:
318 """ 427 """
319 Performs flux analysis including pFBA, FVA, and sensitivity analysis. 428 Performs flux analysis including pFBA, FVA, and sensitivity analysis. The objective function
429 is assumed to be already set in the model.
320 430
321 Args: 431 Args:
322 model (cobra.Model): The COBRA model to analyze. 432 model (cobra.Model): The COBRA model to analyze.
323 model_name (str): Name of the model, used in filenames for output. 433 model_name (str): Name of the model, used in filenames for output.
324 output_types (List[str]): Types of analysis to perform (pFBA, FVA, sensitivity). 434 output_types (List[str]): Types of analysis to perform (pFBA, FVA, sensitivity).
331 df_FVA= pd.DataFrame() 441 df_FVA= pd.DataFrame()
332 df_sensitivity= pd.DataFrame() 442 df_sensitivity= pd.DataFrame()
333 443
334 for output_type in output_types: 444 for output_type in output_types:
335 if(output_type == "pFBA"): 445 if(output_type == "pFBA"):
336 model.objective = "Biomass"
337 solution = cobra.flux_analysis.pfba(model) 446 solution = cobra.flux_analysis.pfba(model)
338 fluxes = solution.fluxes 447 fluxes = solution.fluxes
339 df_pFBA.loc[0,[rxn._id for rxn in model.reactions]] = fluxes.tolist() 448 df_pFBA.loc[0,[rxn.id for rxn in model.reactions]] = fluxes.tolist()
340 df_pFBA = df_pFBA.reset_index(drop=True) 449 df_pFBA = df_pFBA.reset_index(drop=True)
341 df_pFBA.index = [model_name] 450 df_pFBA.index = [model_name]
342 df_pFBA = df_pFBA.astype(float).round(6) 451 df_pFBA = df_pFBA.astype(float).round(6)
343 elif(output_type == "FVA"): 452 elif(output_type == "FVA"):
344 fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8) 453 fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=ARGS.perc_opt, processes=1).round(8)
345 columns = [] 454 columns = []
346 for rxn in fva.index.to_list(): 455 for rxn in fva.index.to_list():
347 columns.append(rxn + "_min") 456 columns.append(rxn + "_min")
348 columns.append(rxn + "_max") 457 columns.append(rxn + "_max")
349 df_FVA= pd.DataFrame(columns = columns) 458 df_FVA= pd.DataFrame(columns = columns)
352 df_FVA.loc[0, index_rxn+ "_max"] = fva.loc[index_rxn, "maximum"] 461 df_FVA.loc[0, index_rxn+ "_max"] = fva.loc[index_rxn, "maximum"]
353 df_FVA = df_FVA.reset_index(drop=True) 462 df_FVA = df_FVA.reset_index(drop=True)
354 df_FVA.index = [model_name] 463 df_FVA.index = [model_name]
355 df_FVA = df_FVA.astype(float).round(6) 464 df_FVA = df_FVA.astype(float).round(6)
356 elif(output_type == "sensitivity"): 465 elif(output_type == "sensitivity"):
357 model.objective = "Biomass"
358 solution_original = model.optimize().objective_value 466 solution_original = model.optimize().objective_value
359 reactions = model.reactions 467 reactions = model.reactions
360 single = cobra.flux_analysis.single_reaction_deletion(model) 468 single = cobra.flux_analysis.single_reaction_deletion(model)
361 newRow = [] 469 newRow = []
362 df_sensitivity = pd.DataFrame(columns = [rxn.id for rxn in reactions], index = [model_name]) 470 df_sensitivity = pd.DataFrame(columns = [rxn.id for rxn in reactions], index = [model_name])
365 df_sensitivity.loc[model_name] = newRow 473 df_sensitivity.loc[model_name] = newRow
366 df_sensitivity = df_sensitivity.astype(float).round(6) 474 df_sensitivity = df_sensitivity.astype(float).round(6)
367 return df_pFBA, df_FVA, df_sensitivity 475 return df_pFBA, df_FVA, df_sensitivity
368 476
369 ############################# main ########################################### 477 ############################# main ###########################################
370 def main(args :List[str] = None) -> None: 478 def main(args: List[str] = None) -> None:
371 """ 479 """
372 Initializes everything and sets the program in motion based on the fronted input arguments. 480 Initialize and run sampling/analysis based on the frontend input arguments.
373 481
374 Returns: 482 Returns:
375 None 483 None
376 """ 484 """
377 485
378 num_processors = cpu_count() 486 num_processors = max(1, cpu_count() - 1)
379 487
380 global ARGS 488 global ARGS
381 ARGS = process_args(args) 489 ARGS = process_args(args)
382 490
383 if not os.path.exists(ARGS.output_path): 491 if not os.path.exists(ARGS.output_path):
384 os.makedirs(ARGS.output_path) 492 os.makedirs(ARGS.output_path)
385 493
386 model_type :utils.Model = ARGS.model_selector 494 # --- Normalize inputs (the tool may pass comma-separated --input and either --name or --names) ---
387 if model_type is utils.Model.Custom: 495 ARGS.input_files = ARGS.input.split(",") if ARGS.input else []
388 model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext) 496 ARGS.file_names = ARGS.name.split(",")
497 # output types (required) -> list
498 ARGS.output_types = ARGS.output_type.split(",") if ARGS.output_type else []
499 # optional analysis output types -> list or empty
500 ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") if ARGS.output_type_analysis else []
501
502 # Determine if sampling should be performed
503 if ARGS.sampling_enabled == "true":
504 perform_sampling = True
389 else: 505 else:
390 model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) 506 perform_sampling = False
391 507
392 #Set solver verbosity to 1 to see warning and error messages only. 508 print("=== INPUT FILES ===")
393 model.solver.configuration.verbosity = 1 509 print(f"{ARGS.input_files}")
394 510 print(f"{ARGS.file_names}")
395 ARGS.bounds = ARGS.input.split(",") 511 print(f"{ARGS.output_type}")
396 ARGS.bounds_name = ARGS.names.split(",") 512 print(f"{ARGS.output_types}")
397 ARGS.output_types = ARGS.output_type.split(",") 513 print(f"{ARGS.output_type_analysis}")
398 ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") 514 print(f"Sampling enabled: {perform_sampling} (n_samples: {ARGS.n_samples})")
399 515
400 516 if ARGS.model_and_bounds == "True":
401 results = Parallel(n_jobs=num_processors)(delayed(model_sampler)(model, bounds_path, cell_name) for bounds_path, cell_name in zip(ARGS.bounds, ARGS.bounds_name)) 517 # MODE 1: Model + bounds (separate files)
402 518 print("=== MODE 1: Model + Bounds (separate files) ===")
403 all_mean = pd.concat([result[0] for result in results], ignore_index=False) 519
404 all_median = pd.concat([result[1] for result in results], ignore_index=False) 520 # Load base model
405 all_quantiles = pd.concat([result[2] for result in results], ignore_index=False) 521 if not ARGS.model_upload:
406 522 sys.exit("Error: model_upload is required for Mode 1")
407 if("mean" in ARGS.output_types): 523
408 all_mean = all_mean.fillna(0.0) 524 base_model = model_utils.build_cobra_model_from_csv(ARGS.model_upload)
409 all_mean = all_mean.sort_index() 525
410 write_to_file(all_mean.T, "mean", True) 526 validation = model_utils.validate_model(base_model)
411 527
412 if("median" in ARGS.output_types): 528 print("\n=== MODEL VALIDATION ===")
413 all_median = all_median.fillna(0.0) 529 for key, value in validation.items():
414 all_median = all_median.sort_index() 530 print(f"{key}: {value}")
415 write_to_file(all_median.T, "median", True) 531
416 532 # Set solver verbosity to 1 to see warning and error messages only.
417 if("quantiles" in ARGS.output_types): 533 base_model.solver.configuration.verbosity = 1
418 all_quantiles = all_quantiles.fillna(0.0) 534
419 all_quantiles = all_quantiles.sort_index() 535 # Process each bounds file with the base model
420 write_to_file(all_quantiles.T, "quantiles", True) 536 results = Parallel(n_jobs=num_processors)(
421 537 delayed(model_sampler_with_bounds)(base_model, bounds_file, cell_name)
422 index_result = 3 538 for bounds_file, cell_name in zip(ARGS.input_files, ARGS.file_names)
423 if("pFBA" in ARGS.output_type_analysis): 539 )
540
541 else:
542 # MODE 2: Multiple complete models
543 print("=== MODE 2: Multiple complete models ===")
544
545 # Process each complete model file
546 results = Parallel(n_jobs=num_processors)(
547 delayed(perform_sampling_and_analysis)(model_utils.build_cobra_model_from_csv(model_file), cell_name)
548 for model_file, cell_name in zip(ARGS.input_files, ARGS.file_names)
549 )
550
551 # Handle sampling outputs (only if sampling was performed)
552 if perform_sampling:
553 print("=== PROCESSING SAMPLING RESULTS ===")
554
555 all_mean = pd.concat([result[0] for result in results], ignore_index=False)
556 all_median = pd.concat([result[1] for result in results], ignore_index=False)
557 all_quantiles = pd.concat([result[2] for result in results], ignore_index=False)
558
559 if "mean" in ARGS.output_types:
560 all_mean = all_mean.fillna(0.0)
561 all_mean = all_mean.sort_index()
562 write_to_file(all_mean.T, "mean", True)
563
564 if "median" in ARGS.output_types:
565 all_median = all_median.fillna(0.0)
566 all_median = all_median.sort_index()
567 write_to_file(all_median.T, "median", True)
568
569 if "quantiles" in ARGS.output_types:
570 all_quantiles = all_quantiles.fillna(0.0)
571 all_quantiles = all_quantiles.sort_index()
572 write_to_file(all_quantiles.T, "quantiles", True)
573 else:
574 print("=== SAMPLING SKIPPED (n_samples = 0 or sampling disabled) ===")
575
576 # Handle optimization analysis outputs (always available)
577 print("=== PROCESSING OPTIMIZATION RESULTS ===")
578
579 # Determine the starting index for optimization results
580 # If sampling was performed, optimization results start at index 3
581 # If no sampling, optimization results start at index 0
582 index_result = 3 if perform_sampling else 0
583
584 if "pFBA" in ARGS.output_type_analysis:
424 all_pFBA = pd.concat([result[index_result] for result in results], ignore_index=False) 585 all_pFBA = pd.concat([result[index_result] for result in results], ignore_index=False)
425 all_pFBA = all_pFBA.sort_index() 586 all_pFBA = all_pFBA.sort_index()
426 write_to_file(all_pFBA.T, "pFBA", True) 587 write_to_file(all_pFBA.T, "pFBA", True)
427 index_result+=1 588 index_result += 1
428 if("FVA" in ARGS.output_type_analysis): 589
429 all_FVA= pd.concat([result[index_result] for result in results], ignore_index=False) 590 if "FVA" in ARGS.output_type_analysis:
591 all_FVA = pd.concat([result[index_result] for result in results], ignore_index=False)
430 all_FVA = all_FVA.sort_index() 592 all_FVA = all_FVA.sort_index()
431 write_to_file(all_FVA.T, "FVA", True) 593 write_to_file(all_FVA.T, "FVA", True)
432 index_result+=1 594 index_result += 1
433 if("sensitivity" in ARGS.output_type_analysis): 595
596 if "sensitivity" in ARGS.output_type_analysis:
434 all_sensitivity = pd.concat([result[index_result] for result in results], ignore_index=False) 597 all_sensitivity = pd.concat([result[index_result] for result in results], ignore_index=False)
435 all_sensitivity = all_sensitivity.sort_index() 598 all_sensitivity = all_sensitivity.sort_index()
436 write_to_file(all_sensitivity.T, "sensitivity", True) 599 write_to_file(all_sensitivity.T, "sensitivity", True)
437 600
438 pass 601 return
439 602
440 ############################################################################## 603 ##############################################################################
441 if __name__ == "__main__": 604 if __name__ == "__main__":
442 main() 605 main()