Mercurial > repos > bimib > cobraxy
comparison COBRAxy/src/flux_simulation.py @ 539:2fb97466e404 draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Sat, 25 Oct 2025 14:55:13 +0000 |
| parents | |
| children | fcdbc81feb45 |
comparison
equal
deleted
inserted
replaced
| 538:fd53d42348bd | 539:2fb97466e404 |
|---|---|
| 1 """ | |
| 2 Flux sampling and analysis utilities for COBRA models. | |
| 3 | |
| 4 This script supports two modes: | |
| 5 - Mode 1 (model_and_bounds=True): load a base model and apply bounds from | |
| 6 separate files before sampling. | |
| 7 - Mode 2 (model_and_bounds=False): load complete models and sample directly. | |
| 8 | |
| 9 Sampling algorithms supported: OPTGP and CBS. Outputs include flux samples | |
| 10 and optional analyses (pFBA, FVA, sensitivity), saved as tabular files. | |
| 11 """ | |
| 12 | |
| 13 import argparse | |
| 14 import utils.general_utils as utils | |
| 15 from typing import List | |
| 16 import os | |
| 17 import pandas as pd | |
| 18 import numpy as np | |
| 19 import cobra | |
| 20 import utils.CBS_backend as CBS_backend | |
| 21 from joblib import Parallel, delayed, cpu_count | |
| 22 from cobra.sampling import OptGPSampler | |
| 23 import sys | |
| 24 import utils.model_utils as model_utils | |
| 25 | |
| 26 | |
| 27 ################################# process args ############################### | |
| 28 def process_args(args: List[str] = None) -> argparse.Namespace: | |
| 29 """ | |
| 30 Processes command-line arguments. | |
| 31 | |
| 32 Args: | |
| 33 args (list): List of command-line arguments. | |
| 34 | |
| 35 Returns: | |
| 36 Namespace: An object containing parsed arguments. | |
| 37 """ | |
| 38 parser = argparse.ArgumentParser(usage='%(prog)s [options]', | |
| 39 description='process some value\'s') | |
| 40 | |
| 41 parser.add_argument("-mo", "--model_upload", type=str, | |
| 42 help="path to input file with custom rules, if provided") | |
| 43 | |
| 44 parser.add_argument("-mab", "--model_and_bounds", type=str, | |
| 45 choices=['True', 'False'], | |
| 46 required=True, | |
| 47 help="upload mode: True for model+bounds, False for complete models") | |
| 48 | |
| 49 parser.add_argument("-ens", "--sampling_enabled", type=str, | |
| 50 choices=['true', 'false'], | |
| 51 required=True, | |
| 52 help="enable sampling: 'true' for sampling, 'false' for no sampling") | |
| 53 | |
| 54 parser.add_argument('-ol', '--out_log', | |
| 55 help="Output log") | |
| 56 | |
| 57 parser.add_argument('-td', '--tool_dir', | |
| 58 type=str, | |
| 59 required=True, | |
| 60 help='your tool directory') | |
| 61 | |
| 62 parser.add_argument('-in', '--input', | |
| 63 required=True, | |
| 64 type=str, | |
| 65 help='input bounds files or complete model files') | |
| 66 | |
| 67 parser.add_argument('-ni', '--name', | |
| 68 required=True, | |
| 69 type=str, | |
| 70 help='cell names') | |
| 71 | |
| 72 parser.add_argument('-a', '--algorithm', | |
| 73 type=str, | |
| 74 choices=['OPTGP', 'CBS'], | |
| 75 required=True, | |
| 76 help='choose sampling algorithm') | |
| 77 | |
| 78 parser.add_argument('-th', '--thinning', | |
| 79 type=int, | |
| 80 default=100, | |
| 81 required=True, | |
| 82 help='choose thinning') | |
| 83 | |
| 84 parser.add_argument('-ns', '--n_samples', | |
| 85 type=int, | |
| 86 required=True, | |
| 87 help='choose how many samples (set to 0 for optimization only)') | |
| 88 | |
| 89 parser.add_argument('-sd', '--seed', | |
| 90 type=int, | |
| 91 required=True, | |
| 92 help='seed for random number generation') | |
| 93 | |
| 94 parser.add_argument('-nb', '--n_batches', | |
| 95 type=int, | |
| 96 required=True, | |
| 97 help='choose how many batches') | |
| 98 | |
| 99 parser.add_argument('-opt', '--perc_opt', | |
| 100 type=float, | |
| 101 default=0.9, | |
| 102 required=False, | |
| 103 help='choose the fraction of optimality for FVA (0-1)') | |
| 104 | |
| 105 parser.add_argument('-ot', '--output_type', | |
| 106 type=str, | |
| 107 required=True, | |
| 108 help='output type for sampling results') | |
| 109 | |
| 110 parser.add_argument('-ota', '--output_type_analysis', | |
| 111 type=str, | |
| 112 required=False, | |
| 113 help='output type analysis (optimization methods)') | |
| 114 | |
| 115 parser.add_argument('-idop', '--output_path', | |
| 116 type=str, | |
| 117 default='flux_simulation/', | |
| 118 help = 'output path for fluxes') | |
| 119 | |
| 120 parser.add_argument('-otm', '--out_mean', | |
| 121 type = str, | |
| 122 required=False, | |
| 123 help = 'output of mean of fluxes') | |
| 124 | |
| 125 parser.add_argument('-otmd', '--out_median', | |
| 126 type = str, | |
| 127 required=False, | |
| 128 help = 'output of median of fluxes') | |
| 129 | |
| 130 parser.add_argument('-otq', '--out_quantiles', | |
| 131 type = str, | |
| 132 required=False, | |
| 133 help = 'output of quantiles of fluxes') | |
| 134 | |
| 135 parser.add_argument('-otfva', '--out_fva', | |
| 136 type = str, | |
| 137 required=False, | |
| 138 help = 'output of FVA results') | |
| 139 parser.add_argument('-otp', '--out_pfba', | |
| 140 type = str, | |
| 141 required=False, | |
| 142 help = 'output of pFBA results') | |
| 143 parser.add_argument('-ots', '--out_sensitivity', | |
| 144 type = str, | |
| 145 required=False, | |
| 146 help = 'output of sensitivity results') | |
| 147 ARGS = parser.parse_args(args) | |
| 148 return ARGS | |
| 149 ########################### warning ########################################### | |
| 150 def warning(s :str) -> None: | |
| 151 """ | |
| 152 Log a warning message to an output log file and print it to the console. | |
| 153 | |
| 154 Args: | |
| 155 s (str): The warning message to be logged and printed. | |
| 156 | |
| 157 Returns: | |
| 158 None | |
| 159 """ | |
| 160 with open(ARGS.out_log, 'a') as log: | |
| 161 log.write(s + "\n\n") | |
| 162 print(s) | |
| 163 | |
| 164 | |
| 165 def write_to_file(dataset: pd.DataFrame, path: str, keep_index:bool=False, name:str=None)->None: | |
| 166 """ | |
| 167 Write a DataFrame to a TSV file under path with a given base name. | |
| 168 | |
| 169 Args: | |
| 170 dataset: The DataFrame to write. | |
| 171 name: Base file name (without extension). If None, 'path' is treated as the full file path. | |
| 172 path: Directory path where the file will be saved. | |
| 173 keep_index: Whether to keep the DataFrame index in the file. | |
| 174 | |
| 175 Returns: | |
| 176 None | |
| 177 """ | |
| 178 dataset.index.name = 'Reactions' | |
| 179 if name: | |
| 180 dataset.to_csv(os.path.join(path, name + ".csv"), sep = '\t', index = keep_index) | |
| 181 else: | |
| 182 dataset.to_csv(path, sep = '\t', index = keep_index) | |
| 183 | |
| 184 ############################ dataset input #################################### | |
| 185 def read_dataset(data :str, name :str) -> pd.DataFrame: | |
| 186 """ | |
| 187 Read a dataset from a CSV file and return it as a pandas DataFrame. | |
| 188 | |
| 189 Args: | |
| 190 data (str): Path to the CSV file containing the dataset. | |
| 191 name (str): Name of the dataset, used in error messages. | |
| 192 | |
| 193 Returns: | |
| 194 pandas.DataFrame: DataFrame containing the dataset. | |
| 195 | |
| 196 Raises: | |
| 197 pd.errors.EmptyDataError: If the CSV file is empty. | |
| 198 sys.exit: If the CSV file has the wrong format, the execution is aborted. | |
| 199 """ | |
| 200 try: | |
| 201 dataset = pd.read_csv(data, sep = '\t', header = 0, index_col=0, engine='python') | |
| 202 except pd.errors.EmptyDataError: | |
| 203 sys.exit('Execution aborted: wrong format of ' + name + '\n') | |
| 204 if len(dataset.columns) < 2: | |
| 205 sys.exit('Execution aborted: wrong format of ' + name + '\n') | |
| 206 return dataset | |
| 207 | |
| 208 | |
| 209 | |
| 210 def OPTGP_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, thinning: int = 100, n_batches: int = 1, seed: int = 0) -> None: | |
| 211 """ | |
| 212 Samples from the OPTGP (Optimal Global Perturbation) algorithm and saves the results to CSV files. | |
| 213 | |
| 214 Args: | |
| 215 model (cobra.Model): The COBRA model to sample from. | |
| 216 model_name (str): The name of the model, used in naming output files. | |
| 217 n_samples (int, optional): Number of samples per batch. Default is 1000. | |
| 218 thinning (int, optional): Thinning parameter for the sampler. Default is 100. | |
| 219 n_batches (int, optional): Number of batches to run. Default is 1. | |
| 220 seed (int, optional): Random seed for reproducibility. Default is 0. | |
| 221 | |
| 222 Returns: | |
| 223 None | |
| 224 """ | |
| 225 import numpy as np | |
| 226 | |
| 227 # Get reaction IDs for consistent column ordering | |
| 228 reaction_ids = [rxn.id for rxn in model.reactions] | |
| 229 | |
| 230 # Sample and save each batch as numpy file | |
| 231 for i in range(n_batches): | |
| 232 optgp = OptGPSampler(model, thinning, seed) | |
| 233 samples = optgp.sample(n_samples) | |
| 234 | |
| 235 # Save as numpy array (more memory efficient) | |
| 236 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" | |
| 237 np.save(batch_filename, samples.to_numpy()) | |
| 238 | |
| 239 seed += 1 | |
| 240 | |
| 241 # Merge all batches into a single DataFrame | |
| 242 all_samples = [] | |
| 243 | |
| 244 for i in range(n_batches): | |
| 245 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" | |
| 246 batch_data = np.load(batch_filename, allow_pickle=True) | |
| 247 all_samples.append(batch_data) | |
| 248 | |
| 249 # Concatenate all batches | |
| 250 samplesTotal_array = np.vstack(all_samples) | |
| 251 | |
| 252 # Convert back to DataFrame with proper column names | |
| 253 samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids) | |
| 254 | |
| 255 # Save the final merged result as CSV | |
| 256 write_to_file(samplesTotal.T, ARGS.output_path, True, name=model_name) | |
| 257 | |
| 258 # Clean up temporary numpy files | |
| 259 for i in range(n_batches): | |
| 260 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" | |
| 261 if os.path.exists(batch_filename): | |
| 262 os.remove(batch_filename) | |
| 263 | |
| 264 | |
| 265 def CBS_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, n_batches: int = 1, seed: int = 0) -> None: | |
| 266 """ | |
| 267 Samples using the CBS (Constraint-based Sampling) algorithm and saves the results to CSV files. | |
| 268 | |
| 269 Args: | |
| 270 model (cobra.Model): The COBRA model to sample from. | |
| 271 model_name (str): The name of the model, used in naming output files. | |
| 272 n_samples (int, optional): Number of samples per batch. Default is 1000. | |
| 273 n_batches (int, optional): Number of batches to run. Default is 1. | |
| 274 seed (int, optional): Random seed for reproducibility. Default is 0. | |
| 275 | |
| 276 Returns: | |
| 277 None | |
| 278 """ | |
| 279 import numpy as np | |
| 280 | |
| 281 # Get reaction IDs for consistent column ordering | |
| 282 reaction_ids = [reaction.id for reaction in model.reactions] | |
| 283 | |
| 284 # Perform FVA analysis once for all batches | |
| 285 df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0).round(6) | |
| 286 | |
| 287 # Generate random objective functions for all samples across all batches | |
| 288 df_coefficients = CBS_backend.randomObjectiveFunction(model, n_samples * n_batches, df_FVA, seed=seed) | |
| 289 | |
| 290 # Sample and save each batch as numpy file | |
| 291 for i in range(n_batches): | |
| 292 samples = pd.DataFrame(columns=reaction_ids, index=range(n_samples)) | |
| 293 | |
| 294 try: | |
| 295 CBS_backend.randomObjectiveFunctionSampling( | |
| 296 model, | |
| 297 n_samples, | |
| 298 df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples], | |
| 299 samples | |
| 300 ) | |
| 301 except Exception as e: | |
| 302 utils.logWarning( | |
| 303 f"Warning: GLPK solver has failed for {model_name}. Trying with COBRA interface. Error: {str(e)}", | |
| 304 ARGS.out_log | |
| 305 ) | |
| 306 CBS_backend.randomObjectiveFunctionSampling_cobrapy( | |
| 307 model, | |
| 308 n_samples, | |
| 309 df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples], | |
| 310 samples | |
| 311 ) | |
| 312 | |
| 313 # Save as numpy array (more memory efficient) | |
| 314 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" | |
| 315 utils.logWarning(batch_filename, ARGS.out_log) | |
| 316 np.save(batch_filename, samples.to_numpy()) | |
| 317 | |
| 318 # Merge all batches into a single DataFrame | |
| 319 all_samples = [] | |
| 320 | |
| 321 for i in range(n_batches): | |
| 322 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" | |
| 323 batch_data = np.load(batch_filename, allow_pickle=True) | |
| 324 all_samples.append(batch_data) | |
| 325 | |
| 326 # Concatenate all batches | |
| 327 samplesTotal_array = np.vstack(all_samples) | |
| 328 | |
| 329 # Convert back to DataFrame with proper column namesq | |
| 330 samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids) | |
| 331 | |
| 332 # Save the final merged result as CSV | |
| 333 write_to_file(samplesTotal.T, ARGS.output_path, True, name=model_name) | |
| 334 | |
| 335 # Clean up temporary numpy files | |
| 336 for i in range(n_batches): | |
| 337 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" | |
| 338 if os.path.exists(batch_filename): | |
| 339 os.remove(batch_filename) | |
| 340 | |
| 341 | |
| 342 | |
| 343 def model_sampler_with_bounds(model_input_original: cobra.Model, bounds_path: str, cell_name: str) -> List[pd.DataFrame]: | |
| 344 """ | |
| 345 MODE 1: Prepares the model with bounds from separate bounds file and performs sampling. | |
| 346 | |
| 347 Args: | |
| 348 model_input_original (cobra.Model): The original COBRA model. | |
| 349 bounds_path (str): Path to the CSV file containing the bounds dataset. | |
| 350 cell_name (str): Name of the cell, used to generate filenames for output. | |
| 351 | |
| 352 Returns: | |
| 353 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. | |
| 354 """ | |
| 355 | |
| 356 model_input = model_input_original.copy() | |
| 357 bounds_df = read_dataset(bounds_path, "bounds dataset") | |
| 358 | |
| 359 # Apply bounds to model | |
| 360 for rxn_index, row in bounds_df.iterrows(): | |
| 361 try: | |
| 362 model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound | |
| 363 model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound | |
| 364 except KeyError: | |
| 365 warning(f"Warning: Reaction {rxn_index} not found in model. Skipping.") | |
| 366 | |
| 367 return perform_sampling_and_analysis(model_input, cell_name) | |
| 368 | |
| 369 | |
| 370 def perform_sampling_and_analysis(model_input: cobra.Model, cell_name: str) -> List[pd.DataFrame]: | |
| 371 """ | |
| 372 Common function to perform sampling and analysis on a prepared model. | |
| 373 | |
| 374 Args: | |
| 375 model_input (cobra.Model): The prepared COBRA model with bounds applied. | |
| 376 cell_name (str): Name of the cell, used to generate filenames for output. | |
| 377 | |
| 378 Returns: | |
| 379 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. | |
| 380 """ | |
| 381 | |
| 382 returnList = [] | |
| 383 | |
| 384 if ARGS.sampling_enabled == "true": | |
| 385 | |
| 386 if ARGS.algorithm == 'OPTGP': | |
| 387 OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed) | |
| 388 elif ARGS.algorithm == 'CBS': | |
| 389 CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed) | |
| 390 | |
| 391 df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types) | |
| 392 | |
| 393 if("fluxes" not in ARGS.output_types): | |
| 394 os.remove(ARGS.output_path + "/" + cell_name + '.csv') | |
| 395 | |
| 396 returnList = [df_mean, df_median, df_quantiles] | |
| 397 | |
| 398 df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis) | |
| 399 | |
| 400 if("pFBA" in ARGS.output_type_analysis): | |
| 401 returnList.append(df_pFBA) | |
| 402 if("FVA" in ARGS.output_type_analysis): | |
| 403 returnList.append(df_FVA) | |
| 404 if("sensitivity" in ARGS.output_type_analysis): | |
| 405 returnList.append(df_sensitivity) | |
| 406 | |
| 407 return returnList | |
| 408 | |
| 409 def fluxes_statistics(model_name: str, output_types:List)-> List[pd.DataFrame]: | |
| 410 """ | |
| 411 Computes statistics (mean, median, quantiles) for the fluxes. | |
| 412 | |
| 413 Args: | |
| 414 model_name (str): Name of the model, used in filename for input. | |
| 415 output_types (List[str]): Types of statistics to compute (mean, median, quantiles). | |
| 416 | |
| 417 Returns: | |
| 418 List[pd.DataFrame]: List of DataFrames containing mean, median, and quantiles statistics. | |
| 419 """ | |
| 420 | |
| 421 df_mean = pd.DataFrame() | |
| 422 df_median= pd.DataFrame() | |
| 423 df_quantiles= pd.DataFrame() | |
| 424 | |
| 425 df_samples = pd.read_csv(ARGS.output_path + "/" + model_name + '.csv', sep = '\t', index_col = 0).T | |
| 426 df_samples = df_samples.round(8) | |
| 427 | |
| 428 for output_type in output_types: | |
| 429 if(output_type == "mean"): | |
| 430 df_mean = df_samples.mean() | |
| 431 df_mean = df_mean.to_frame().T | |
| 432 df_mean = df_mean.reset_index(drop=True) | |
| 433 df_mean.index = [model_name] | |
| 434 elif(output_type == "median"): | |
| 435 df_median = df_samples.median() | |
| 436 df_median = df_median.to_frame().T | |
| 437 df_median = df_median.reset_index(drop=True) | |
| 438 df_median.index = [model_name] | |
| 439 elif(output_type == "quantiles"): | |
| 440 newRow = [] | |
| 441 cols = [] | |
| 442 for rxn in df_samples.columns: | |
| 443 quantiles = df_samples[rxn].quantile([0.25, 0.50, 0.75]) | |
| 444 newRow.append(quantiles[0.25]) | |
| 445 cols.append(rxn + "_q1") | |
| 446 newRow.append(quantiles[0.5]) | |
| 447 cols.append(rxn + "_q2") | |
| 448 newRow.append(quantiles[0.75]) | |
| 449 cols.append(rxn + "_q3") | |
| 450 df_quantiles = pd.DataFrame(columns=cols) | |
| 451 df_quantiles.loc[0] = newRow | |
| 452 df_quantiles = df_quantiles.reset_index(drop=True) | |
| 453 df_quantiles.index = [model_name] | |
| 454 | |
| 455 return df_mean, df_median, df_quantiles | |
| 456 | |
| 457 def fluxes_analysis(model:cobra.Model, model_name:str, output_types:List)-> List[pd.DataFrame]: | |
| 458 """ | |
| 459 Performs flux analysis including pFBA, FVA, and sensitivity analysis. The objective function | |
| 460 is assumed to be already set in the model. | |
| 461 | |
| 462 Args: | |
| 463 model (cobra.Model): The COBRA model to analyze. | |
| 464 model_name (str): Name of the model, used in filenames for output. | |
| 465 output_types (List[str]): Types of analysis to perform (pFBA, FVA, sensitivity). | |
| 466 | |
| 467 Returns: | |
| 468 List[pd.DataFrame]: List of DataFrames containing pFBA, FVA, and sensitivity analysis results. | |
| 469 """ | |
| 470 | |
| 471 df_pFBA = pd.DataFrame() | |
| 472 df_FVA= pd.DataFrame() | |
| 473 df_sensitivity= pd.DataFrame() | |
| 474 | |
| 475 for output_type in output_types: | |
| 476 if(output_type == "pFBA"): | |
| 477 solution = cobra.flux_analysis.pfba(model) | |
| 478 fluxes = solution.fluxes | |
| 479 df_pFBA.loc[0,[rxn.id for rxn in model.reactions]] = fluxes.tolist() | |
| 480 df_pFBA = df_pFBA.reset_index(drop=True) | |
| 481 df_pFBA.index = [model_name] | |
| 482 df_pFBA = df_pFBA.astype(float).round(6) | |
| 483 elif(output_type == "FVA"): | |
| 484 fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=ARGS.perc_opt, processes=1).round(8) | |
| 485 columns = [] | |
| 486 for rxn in fva.index.to_list(): | |
| 487 columns.append(rxn + "_min") | |
| 488 columns.append(rxn + "_max") | |
| 489 df_FVA= pd.DataFrame(columns = columns) | |
| 490 for index_rxn, row in fva.iterrows(): | |
| 491 df_FVA.loc[0, index_rxn+ "_min"] = fva.loc[index_rxn, "minimum"] | |
| 492 df_FVA.loc[0, index_rxn+ "_max"] = fva.loc[index_rxn, "maximum"] | |
| 493 df_FVA = df_FVA.reset_index(drop=True) | |
| 494 df_FVA.index = [model_name] | |
| 495 df_FVA = df_FVA.astype(float).round(6) | |
| 496 elif(output_type == "sensitivity"): | |
| 497 solution_original = model.optimize().objective_value | |
| 498 reactions = model.reactions | |
| 499 single = cobra.flux_analysis.single_reaction_deletion(model) | |
| 500 newRow = [] | |
| 501 df_sensitivity = pd.DataFrame(columns = [rxn.id for rxn in reactions], index = [model_name]) | |
| 502 for rxn in reactions: | |
| 503 newRow.append(single.knockout[rxn.id].growth.values[0]/solution_original) | |
| 504 df_sensitivity.loc[model_name] = newRow | |
| 505 df_sensitivity = df_sensitivity.astype(float).round(6) | |
| 506 return df_pFBA, df_FVA, df_sensitivity | |
| 507 | |
| 508 ############################# main ########################################### | |
| 509 def main(args: List[str] = None) -> None: | |
| 510 """ | |
| 511 Initialize and run sampling/analysis based on the frontend input arguments. | |
| 512 | |
| 513 Returns: | |
| 514 None | |
| 515 """ | |
| 516 | |
| 517 num_processors = max(1, cpu_count() - 1) | |
| 518 | |
| 519 global ARGS | |
| 520 ARGS = process_args(args) | |
| 521 | |
| 522 if not os.path.exists('flux_simulation'): | |
| 523 os.makedirs('flux_simulation') | |
| 524 | |
| 525 # --- Normalize inputs (the tool may pass comma-separated --input and either --name or --names) --- | |
| 526 ARGS.input_files = ARGS.input.split(",") if ARGS.input else [] | |
| 527 ARGS.file_names = ARGS.name.split(",") | |
| 528 # output types (required) -> list | |
| 529 ARGS.output_types = ARGS.output_type.split(",") if ARGS.output_type else [] | |
| 530 # optional analysis output types -> list or empty | |
| 531 ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") if ARGS.output_type_analysis else [] | |
| 532 | |
| 533 # Determine if sampling should be performed | |
| 534 if ARGS.sampling_enabled == "true": | |
| 535 perform_sampling = True | |
| 536 else: | |
| 537 perform_sampling = False | |
| 538 | |
| 539 print("=== INPUT FILES ===") | |
| 540 print(f"{ARGS.input_files}") | |
| 541 print(f"{ARGS.file_names}") | |
| 542 print(f"{ARGS.output_type}") | |
| 543 print(f"{ARGS.output_types}") | |
| 544 print(f"{ARGS.output_type_analysis}") | |
| 545 print(f"Sampling enabled: {perform_sampling} (n_samples: {ARGS.n_samples})") | |
| 546 | |
| 547 if ARGS.model_and_bounds == "True": | |
| 548 # MODE 1: Model + bounds (separate files) | |
| 549 print("=== MODE 1: Model + Bounds (separate files) ===") | |
| 550 | |
| 551 # Load base model | |
| 552 if not ARGS.model_upload: | |
| 553 sys.exit("Error: model_upload is required for Mode 1") | |
| 554 | |
| 555 base_model = model_utils.build_cobra_model_from_csv(ARGS.model_upload) | |
| 556 | |
| 557 validation = model_utils.validate_model(base_model) | |
| 558 | |
| 559 print("\n=== MODEL VALIDATION ===") | |
| 560 for key, value in validation.items(): | |
| 561 print(f"{key}: {value}") | |
| 562 | |
| 563 # Set solver verbosity to 1 to see warning and error messages only. | |
| 564 base_model.solver.configuration.verbosity = 1 | |
| 565 | |
| 566 # Process each bounds file with the base model | |
| 567 results = Parallel(n_jobs=num_processors)( | |
| 568 delayed(model_sampler_with_bounds)(base_model, bounds_file, cell_name) | |
| 569 for bounds_file, cell_name in zip(ARGS.input_files, ARGS.file_names) | |
| 570 ) | |
| 571 | |
| 572 else: | |
| 573 # MODE 2: Multiple complete models | |
| 574 print("=== MODE 2: Multiple complete models ===") | |
| 575 | |
| 576 # Process each complete model file | |
| 577 results = Parallel(n_jobs=num_processors)( | |
| 578 delayed(perform_sampling_and_analysis)(model_utils.build_cobra_model_from_csv(model_file), cell_name) | |
| 579 for model_file, cell_name in zip(ARGS.input_files, ARGS.file_names) | |
| 580 ) | |
| 581 | |
| 582 # Handle sampling outputs (only if sampling was performed) | |
| 583 if perform_sampling: | |
| 584 print("=== PROCESSING SAMPLING RESULTS ===") | |
| 585 | |
| 586 all_mean = pd.concat([result[0] for result in results], ignore_index=False) | |
| 587 all_median = pd.concat([result[1] for result in results], ignore_index=False) | |
| 588 all_quantiles = pd.concat([result[2] for result in results], ignore_index=False) | |
| 589 | |
| 590 if "mean" in ARGS.output_types: | |
| 591 all_mean = all_mean.fillna(0.0) | |
| 592 all_mean = all_mean.sort_index() | |
| 593 write_to_file(all_mean.T, ARGS.out_mean, True) | |
| 594 | |
| 595 if "median" in ARGS.output_types: | |
| 596 all_median = all_median.fillna(0.0) | |
| 597 all_median = all_median.sort_index() | |
| 598 write_to_file(all_median.T, ARGS.out_median, True) | |
| 599 | |
| 600 if "quantiles" in ARGS.output_types: | |
| 601 all_quantiles = all_quantiles.fillna(0.0) | |
| 602 all_quantiles = all_quantiles.sort_index() | |
| 603 write_to_file(all_quantiles.T, ARGS.out_quantiles, True) | |
| 604 else: | |
| 605 print("=== SAMPLING SKIPPED (n_samples = 0 or sampling disabled) ===") | |
| 606 | |
| 607 # Handle optimization analysis outputs (always available) | |
| 608 print("=== PROCESSING OPTIMIZATION RESULTS ===") | |
| 609 | |
| 610 # Determine the starting index for optimization results | |
| 611 # If sampling was performed, optimization results start at index 3 | |
| 612 # If no sampling, optimization results start at index 0 | |
| 613 index_result = 3 if perform_sampling else 0 | |
| 614 | |
| 615 if "pFBA" in ARGS.output_type_analysis: | |
| 616 all_pFBA = pd.concat([result[index_result] for result in results], ignore_index=False) | |
| 617 all_pFBA = all_pFBA.sort_index() | |
| 618 write_to_file(all_pFBA.T, ARGS.out_pfba, True) | |
| 619 index_result += 1 | |
| 620 | |
| 621 if "FVA" in ARGS.output_type_analysis: | |
| 622 all_FVA = pd.concat([result[index_result] for result in results], ignore_index=False) | |
| 623 all_FVA = all_FVA.sort_index() | |
| 624 write_to_file(all_FVA.T, ARGS.out_fva, True) | |
| 625 index_result += 1 | |
| 626 | |
| 627 if "sensitivity" in ARGS.output_type_analysis: | |
| 628 all_sensitivity = pd.concat([result[index_result] for result in results], ignore_index=False) | |
| 629 all_sensitivity = all_sensitivity.sort_index() | |
| 630 write_to_file(all_sensitivity.T, ARGS.out_sensitivity, True) | |
| 631 | |
| 632 return | |
| 633 | |
| 634 ############################################################################## | |
| 635 if __name__ == "__main__": | |
| 636 main() |
