Mercurial > repos > bimib > cobraxy
comparison COBRAxy/flux_simulation.py @ 489:97eea560a10f draft
Uploaded
author | francesco_lapi |
---|---|
date | Mon, 29 Sep 2025 10:33:26 +0000 |
parents | c561c060a55f |
children |
comparison
equal
deleted
inserted
replaced
488:e0bcc61b2feb | 489:97eea560a10f |
---|---|
1 """ | |
2 Flux sampling and analysis utilities for COBRA models. | |
3 | |
4 This script supports two modes: | |
5 - Mode 1 (model_and_bounds=True): load a base model and apply bounds from | |
6 separate files before sampling. | |
7 - Mode 2 (model_and_bounds=False): load complete models and sample directly. | |
8 | |
9 Sampling algorithms supported: OPTGP and CBS. Outputs include flux samples | |
10 and optional analyses (pFBA, FVA, sensitivity), saved as tabular files. | |
11 """ | |
12 | |
1 import argparse | 13 import argparse |
2 import utils.general_utils as utils | 14 import utils.general_utils as utils |
3 from typing import Optional, List | 15 from typing import List |
4 import os | 16 import os |
17 import pandas as pd | |
5 import numpy as np | 18 import numpy as np |
6 import pandas as pd | |
7 import cobra | 19 import cobra |
8 import utils.CBS_backend as CBS_backend | 20 import utils.CBS_backend as CBS_backend |
9 from joblib import Parallel, delayed, cpu_count | 21 from joblib import Parallel, delayed, cpu_count |
10 from cobra.sampling import OptGPSampler | 22 from cobra.sampling import OptGPSampler |
11 import sys | 23 import sys |
24 import utils.model_utils as model_utils | |
25 | |
12 | 26 |
13 ################################# process args ############################### | 27 ################################# process args ############################### |
14 def process_args(args :List[str] = None) -> argparse.Namespace: | 28 def process_args(args: List[str] = None) -> argparse.Namespace: |
15 """ | 29 """ |
16 Processes command-line arguments. | 30 Processes command-line arguments. |
17 | 31 |
18 Args: | 32 Args: |
19 args (list): List of command-line arguments. | 33 args (list): List of command-line arguments. |
20 | 34 |
21 Returns: | 35 Returns: |
22 Namespace: An object containing parsed arguments. | 36 Namespace: An object containing parsed arguments. |
23 """ | 37 """ |
24 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | 38 parser = argparse.ArgumentParser(usage='%(prog)s [options]', |
25 description = 'process some value\'s') | 39 description='process some value\'s') |
26 | 40 |
27 parser.add_argument('-ol', '--out_log', | 41 parser.add_argument("-mo", "--model_upload", type=str, |
28 help = "Output log") | 42 help="path to input file with custom rules, if provided") |
43 | |
44 parser.add_argument("-mab", "--model_and_bounds", type=str, | |
45 choices=['True', 'False'], | |
46 required=True, | |
47 help="upload mode: True for model+bounds, False for complete models") | |
48 | |
49 parser.add_argument("-ens", "--sampling_enabled", type=str, | |
50 choices=['true', 'false'], | |
51 required=True, | |
52 help="enable sampling: 'true' for sampling, 'false' for no sampling") | |
53 | |
54 parser.add_argument('-ol', '--out_log', | |
55 help="Output log") | |
29 | 56 |
30 parser.add_argument('-td', '--tool_dir', | 57 parser.add_argument('-td', '--tool_dir', |
31 type = str, | 58 type=str, |
32 required = True, | 59 required=True, |
33 help = 'your tool directory') | 60 help='your tool directory') |
34 | 61 |
35 parser.add_argument('-in', '--input', | 62 parser.add_argument('-in', '--input', |
36 required = True, | 63 required=True, |
37 type=str, | 64 type=str, |
38 help = 'inputs bounds') | 65 help='input bounds files or complete model files') |
39 | 66 |
40 parser.add_argument('-ni', '--names', | 67 parser.add_argument('-ni', '--name', |
41 required = True, | 68 required=True, |
42 type=str, | 69 type=str, |
43 help = 'cell names') | 70 help='cell names') |
44 | |
45 parser.add_argument( | |
46 '-ms', '--model_selector', | |
47 type = utils.Model, default = utils.Model.ENGRO2, choices = [utils.Model.ENGRO2, utils.Model.Custom], | |
48 help = 'chose which type of model you want use') | |
49 | |
50 parser.add_argument("-mo", "--model", type = str) | |
51 | |
52 parser.add_argument("-mn", "--model_name", type = str, help = "custom mode name") | |
53 | 71 |
54 parser.add_argument('-a', '--algorithm', | 72 parser.add_argument('-a', '--algorithm', |
55 type = str, | 73 type=str, |
56 choices = ['OPTGP', 'CBS'], | 74 choices=['OPTGP', 'CBS'], |
57 required = True, | 75 required=True, |
58 help = 'choose sampling algorithm') | 76 help='choose sampling algorithm') |
59 | 77 |
60 parser.add_argument('-th', '--thinning', | 78 parser.add_argument('-th', '--thinning', |
61 type = int, | 79 type=int, |
62 default= 100, | 80 default=100, |
81 required=True, | |
82 help='choose thinning') | |
83 | |
84 parser.add_argument('-ns', '--n_samples', | |
85 type=int, | |
86 required=True, | |
87 help='choose how many samples (set to 0 for optimization only)') | |
88 | |
89 parser.add_argument('-sd', '--seed', | |
90 type=int, | |
91 required=True, | |
92 help='seed for random number generation') | |
93 | |
94 parser.add_argument('-nb', '--n_batches', | |
95 type=int, | |
96 required=True, | |
97 help='choose how many batches') | |
98 | |
99 parser.add_argument('-opt', '--perc_opt', | |
100 type=float, | |
101 default=0.9, | |
63 required=False, | 102 required=False, |
64 help = 'choose thinning') | 103 help='choose the fraction of optimality for FVA (0-1)') |
65 | 104 |
66 parser.add_argument('-ns', '--n_samples', | 105 parser.add_argument('-ot', '--output_type', |
67 type = int, | 106 type=str, |
68 required = True, | 107 required=True, |
69 help = 'choose how many samples') | 108 help='output type for sampling results') |
70 | 109 |
71 parser.add_argument('-sd', '--seed', | 110 parser.add_argument('-ota', '--output_type_analysis', |
72 type = int, | 111 type=str, |
73 required = True, | 112 required=False, |
74 help = 'seed') | 113 help='output type analysis (optimization methods)') |
75 | 114 |
76 parser.add_argument('-nb', '--n_batches', | 115 parser.add_argument('-idop', '--output_path', |
77 type = int, | 116 type=str, |
78 required = True, | |
79 help = 'choose how many batches') | |
80 | |
81 parser.add_argument('-ot', '--output_type', | |
82 type = str, | |
83 required = True, | |
84 help = 'output type') | |
85 | |
86 parser.add_argument('-ota', '--output_type_analysis', | |
87 type = str, | |
88 required = False, | |
89 help = 'output type analysis') | |
90 | |
91 parser.add_argument('-idop', '--output_path', | |
92 type = str, | |
93 default='flux_simulation', | 117 default='flux_simulation', |
94 help = 'output path for maps') | 118 help='output path for maps') |
95 | 119 |
96 ARGS = parser.parse_args(args) | 120 ARGS = parser.parse_args(args) |
97 return ARGS | 121 return ARGS |
98 | |
99 ########################### warning ########################################### | 122 ########################### warning ########################################### |
100 def warning(s :str) -> None: | 123 def warning(s :str) -> None: |
101 """ | 124 """ |
102 Log a warning message to an output log file and print it to the console. | 125 Log a warning message to an output log file and print it to the console. |
103 | 126 |
111 log.write(s + "\n\n") | 134 log.write(s + "\n\n") |
112 print(s) | 135 print(s) |
113 | 136 |
114 | 137 |
115 def write_to_file(dataset: pd.DataFrame, name: str, keep_index:bool=False)->None: | 138 def write_to_file(dataset: pd.DataFrame, name: str, keep_index:bool=False)->None: |
139 """ | |
140 Write a DataFrame to a TSV file under ARGS.output_path with a given base name. | |
141 | |
142 Args: | |
143 dataset: The DataFrame to write. | |
144 name: Base file name (without extension). | |
145 keep_index: Whether to keep the DataFrame index in the file. | |
146 | |
147 Returns: | |
148 None | |
149 """ | |
116 dataset.index.name = 'Reactions' | 150 dataset.index.name = 'Reactions' |
117 dataset.to_csv(ARGS.output_path + "/" + name + ".csv", sep = '\t', index = keep_index) | 151 dataset.to_csv(ARGS.output_path + "/" + name + ".csv", sep = '\t', index = keep_index) |
118 | 152 |
119 ############################ dataset input #################################### | 153 ############################ dataset input #################################### |
120 def read_dataset(data :str, name :str) -> pd.DataFrame: | 154 def read_dataset(data :str, name :str) -> pd.DataFrame: |
140 sys.exit('Execution aborted: wrong format of ' + name + '\n') | 174 sys.exit('Execution aborted: wrong format of ' + name + '\n') |
141 return dataset | 175 return dataset |
142 | 176 |
143 | 177 |
144 | 178 |
145 def OPTGP_sampler(model:cobra.Model, model_name:str, n_samples:int=1000, thinning:int=100, n_batches:int=1, seed:int=0)-> None: | 179 def OPTGP_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, thinning: int = 100, n_batches: int = 1, seed: int = 0) -> None: |
146 """ | 180 """ |
147 Samples from the OPTGP (Optimal Global Perturbation) algorithm and saves the results to CSV files. | 181 Samples from the OPTGP (Optimal Global Perturbation) algorithm and saves the results to CSV files. |
148 | 182 |
149 Args: | 183 Args: |
150 model (cobra.Model): The COBRA model to sample from. | 184 model (cobra.Model): The COBRA model to sample from. |
151 model_name (str): The name of the model, used in naming output files. | 185 model_name (str): The name of the model, used in naming output files. |
152 n_samples (int, optional): Number of samples per batch. Default is 1000. | 186 n_samples (int, optional): Number of samples per batch. Default is 1000. |
153 thinning (int, optional): Thinning parameter for the sampler. Default is 100. | 187 thinning (int, optional): Thinning parameter for the sampler. Default is 100. |
154 n_batches (int, optional): Number of batches to run. Default is 1. | 188 n_batches (int, optional): Number of batches to run. Default is 1. |
155 seed (int, optional): Random seed for reproducibility. Default is 0. | 189 seed (int, optional): Random seed for reproducibility. Default is 0. |
156 | 190 |
157 Returns: | 191 Returns: |
158 None | 192 None |
159 """ | 193 """ |
160 | 194 import numpy as np |
161 for i in range(0, n_batches): | 195 |
196 # Get reaction IDs for consistent column ordering | |
197 reaction_ids = [rxn.id for rxn in model.reactions] | |
198 | |
199 # Sample and save each batch as numpy file | |
200 for i in range(n_batches): | |
162 optgp = OptGPSampler(model, thinning, seed) | 201 optgp = OptGPSampler(model, thinning, seed) |
163 samples = optgp.sample(n_samples) | 202 samples = optgp.sample(n_samples) |
164 samples.to_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv', index=False) | 203 |
165 seed+=1 | 204 # Save as numpy array (more memory efficient) |
166 samplesTotal = pd.DataFrame() | 205 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" |
167 for i in range(0, n_batches): | 206 np.save(batch_filename, samples.to_numpy()) |
168 samples_batch = pd.read_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv') | 207 |
169 samplesTotal = pd.concat([samplesTotal, samples_batch], ignore_index = True) | 208 seed += 1 |
170 | 209 |
210 # Merge all batches into a single DataFrame | |
211 all_samples = [] | |
212 | |
213 for i in range(n_batches): | |
214 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" | |
215 batch_data = np.load(batch_filename, allow_pickle=True) | |
216 all_samples.append(batch_data) | |
217 | |
218 # Concatenate all batches | |
219 samplesTotal_array = np.vstack(all_samples) | |
220 | |
221 # Convert back to DataFrame with proper column names | |
222 samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids) | |
223 | |
224 # Save the final merged result as CSV | |
171 write_to_file(samplesTotal.T, model_name, True) | 225 write_to_file(samplesTotal.T, model_name, True) |
172 | 226 |
173 for i in range(0, n_batches): | 227 # Clean up temporary numpy files |
174 os.remove(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv') | 228 for i in range(n_batches): |
175 pass | 229 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" |
176 | 230 if os.path.exists(batch_filename): |
177 | 231 os.remove(batch_filename) |
178 def CBS_sampler(model:cobra.Model, model_name:str, n_samples:int=1000, n_batches:int=1, seed:int=0)-> None: | 232 |
233 | |
234 def CBS_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, n_batches: int = 1, seed: int = 0) -> None: | |
179 """ | 235 """ |
180 Samples using the CBS (Constraint-based Sampling) algorithm and saves the results to CSV files. | 236 Samples using the CBS (Constraint-based Sampling) algorithm and saves the results to CSV files. |
181 | 237 |
182 Args: | 238 Args: |
183 model (cobra.Model): The COBRA model to sample from. | 239 model (cobra.Model): The COBRA model to sample from. |
184 model_name (str): The name of the model, used in naming output files. | 240 model_name (str): The name of the model, used in naming output files. |
185 n_samples (int, optional): Number of samples per batch. Default is 1000. | 241 n_samples (int, optional): Number of samples per batch. Default is 1000. |
186 n_batches (int, optional): Number of batches to run. Default is 1. | 242 n_batches (int, optional): Number of batches to run. Default is 1. |
187 seed (int, optional): Random seed for reproducibility. Default is 0. | 243 seed (int, optional): Random seed for reproducibility. Default is 0. |
188 | 244 |
189 Returns: | 245 Returns: |
190 None | 246 None |
191 """ | 247 """ |
192 | 248 import numpy as np |
193 df_FVA = cobra.flux_analysis.flux_variability_analysis(model,fraction_of_optimum=0).round(6) | 249 |
194 | 250 # Get reaction IDs for consistent column ordering |
195 df_coefficients = CBS_backend.randomObjectiveFunction(model, n_samples*n_batches, df_FVA, seed=seed) | 251 reaction_ids = [reaction.id for reaction in model.reactions] |
196 | 252 |
197 for i in range(0, n_batches): | 253 # Perform FVA analysis once for all batches |
198 samples = pd.DataFrame(columns =[reaction.id for reaction in model.reactions], index = range(n_samples)) | 254 df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0).round(6) |
255 | |
256 # Generate random objective functions for all samples across all batches | |
257 df_coefficients = CBS_backend.randomObjectiveFunction(model, n_samples * n_batches, df_FVA, seed=seed) | |
258 | |
259 # Sample and save each batch as numpy file | |
260 for i in range(n_batches): | |
261 samples = pd.DataFrame(columns=reaction_ids, index=range(n_samples)) | |
262 | |
199 try: | 263 try: |
200 CBS_backend.randomObjectiveFunctionSampling(model, n_samples, df_coefficients.iloc[:,i*n_samples:(i+1)*n_samples], samples) | 264 CBS_backend.randomObjectiveFunctionSampling( |
265 model, | |
266 n_samples, | |
267 df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples], | |
268 samples | |
269 ) | |
201 except Exception as e: | 270 except Exception as e: |
202 utils.logWarning( | 271 utils.logWarning( |
203 "Warning: GLPK solver has failed for " + model_name + ". Trying with COBRA interface. Error:" + str(e), | 272 f"Warning: GLPK solver has failed for {model_name}. Trying with COBRA interface. Error: {str(e)}", |
204 ARGS.out_log) | 273 ARGS.out_log |
205 CBS_backend.randomObjectiveFunctionSampling_cobrapy(model, n_samples, df_coefficients.iloc[:,i*n_samples:(i+1)*n_samples], | 274 ) |
206 samples) | 275 CBS_backend.randomObjectiveFunctionSampling_cobrapy( |
207 utils.logWarning(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv', ARGS.out_log) | 276 model, |
208 samples.to_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv', index=False) | 277 n_samples, |
209 | 278 df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples], |
210 samplesTotal = pd.DataFrame() | 279 samples |
211 for i in range(0, n_batches): | 280 ) |
212 samples_batch = pd.read_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv') | 281 |
213 samplesTotal = pd.concat([samplesTotal, samples_batch], ignore_index = True) | 282 # Save as numpy array (more memory efficient) |
214 | 283 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" |
284 utils.logWarning(batch_filename, ARGS.out_log) | |
285 np.save(batch_filename, samples.to_numpy()) | |
286 | |
287 # Merge all batches into a single DataFrame | |
288 all_samples = [] | |
289 | |
290 for i in range(n_batches): | |
291 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" | |
292 batch_data = np.load(batch_filename, allow_pickle=True) | |
293 all_samples.append(batch_data) | |
294 | |
295 # Concatenate all batches | |
296 samplesTotal_array = np.vstack(all_samples) | |
297 | |
298 # Convert back to DataFrame with proper column namesq | |
299 samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids) | |
300 | |
301 # Save the final merged result as CSV | |
215 write_to_file(samplesTotal.T, model_name, True) | 302 write_to_file(samplesTotal.T, model_name, True) |
216 | 303 |
217 for i in range(0, n_batches): | 304 # Clean up temporary numpy files |
218 os.remove(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv') | 305 for i in range(n_batches): |
219 pass | 306 batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" |
220 | 307 if os.path.exists(batch_filename): |
221 | 308 os.remove(batch_filename) |
222 def model_sampler(model_input_original:cobra.Model, bounds_path:str, cell_name:str)-> List[pd.DataFrame]: | 309 |
223 """ | 310 |
224 Prepares the model with bounds from the dataset and performs sampling and analysis based on the selected algorithm. | 311 |
312 def model_sampler_with_bounds(model_input_original: cobra.Model, bounds_path: str, cell_name: str) -> List[pd.DataFrame]: | |
313 """ | |
314 MODE 1: Prepares the model with bounds from separate bounds file and performs sampling. | |
225 | 315 |
226 Args: | 316 Args: |
227 model_input_original (cobra.Model): The original COBRA model. | 317 model_input_original (cobra.Model): The original COBRA model. |
228 bounds_path (str): Path to the CSV file containing the bounds dataset. | 318 bounds_path (str): Path to the CSV file containing the bounds dataset. |
229 cell_name (str): Name of the cell, used to generate filenames for output. | 319 cell_name (str): Name of the cell, used to generate filenames for output. |
232 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. | 322 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. |
233 """ | 323 """ |
234 | 324 |
235 model_input = model_input_original.copy() | 325 model_input = model_input_original.copy() |
236 bounds_df = read_dataset(bounds_path, "bounds dataset") | 326 bounds_df = read_dataset(bounds_path, "bounds dataset") |
327 | |
328 # Apply bounds to model | |
237 for rxn_index, row in bounds_df.iterrows(): | 329 for rxn_index, row in bounds_df.iterrows(): |
238 model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound | 330 try: |
239 model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound | 331 model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound |
240 | 332 model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound |
241 | 333 except KeyError: |
242 if ARGS.algorithm == 'OPTGP': | 334 warning(f"Warning: Reaction {rxn_index} not found in model. Skipping.") |
243 OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed) | 335 |
244 | 336 return perform_sampling_and_analysis(model_input, cell_name) |
245 elif ARGS.algorithm == 'CBS': | 337 |
246 CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed) | 338 |
247 | 339 def perform_sampling_and_analysis(model_input: cobra.Model, cell_name: str) -> List[pd.DataFrame]: |
248 df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types) | 340 """ |
249 | 341 Common function to perform sampling and analysis on a prepared model. |
250 if("fluxes" not in ARGS.output_types): | 342 |
251 os.remove(ARGS.output_path + "/" + cell_name + '.csv') | 343 Args: |
344 model_input (cobra.Model): The prepared COBRA model with bounds applied. | |
345 cell_name (str): Name of the cell, used to generate filenames for output. | |
346 | |
347 Returns: | |
348 List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. | |
349 """ | |
252 | 350 |
253 returnList = [] | 351 returnList = [] |
254 returnList.append(df_mean) | 352 |
255 returnList.append(df_median) | 353 if ARGS.sampling_enabled == "true": |
256 returnList.append(df_quantiles) | 354 |
355 if ARGS.algorithm == 'OPTGP': | |
356 OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed) | |
357 elif ARGS.algorithm == 'CBS': | |
358 CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed) | |
359 | |
360 df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types) | |
361 | |
362 if("fluxes" not in ARGS.output_types): | |
363 os.remove(ARGS.output_path + "/" + cell_name + '.csv') | |
364 | |
365 returnList = [df_mean, df_median, df_quantiles] | |
257 | 366 |
258 df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis) | 367 df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis) |
259 | 368 |
260 if("pFBA" in ARGS.output_type_analysis): | 369 if("pFBA" in ARGS.output_type_analysis): |
261 returnList.append(df_pFBA) | 370 returnList.append(df_pFBA) |
314 | 423 |
315 return df_mean, df_median, df_quantiles | 424 return df_mean, df_median, df_quantiles |
316 | 425 |
317 def fluxes_analysis(model:cobra.Model, model_name:str, output_types:List)-> List[pd.DataFrame]: | 426 def fluxes_analysis(model:cobra.Model, model_name:str, output_types:List)-> List[pd.DataFrame]: |
318 """ | 427 """ |
319 Performs flux analysis including pFBA, FVA, and sensitivity analysis. | 428 Performs flux analysis including pFBA, FVA, and sensitivity analysis. The objective function |
429 is assumed to be already set in the model. | |
320 | 430 |
321 Args: | 431 Args: |
322 model (cobra.Model): The COBRA model to analyze. | 432 model (cobra.Model): The COBRA model to analyze. |
323 model_name (str): Name of the model, used in filenames for output. | 433 model_name (str): Name of the model, used in filenames for output. |
324 output_types (List[str]): Types of analysis to perform (pFBA, FVA, sensitivity). | 434 output_types (List[str]): Types of analysis to perform (pFBA, FVA, sensitivity). |
331 df_FVA= pd.DataFrame() | 441 df_FVA= pd.DataFrame() |
332 df_sensitivity= pd.DataFrame() | 442 df_sensitivity= pd.DataFrame() |
333 | 443 |
334 for output_type in output_types: | 444 for output_type in output_types: |
335 if(output_type == "pFBA"): | 445 if(output_type == "pFBA"): |
336 model.objective = "Biomass" | |
337 solution = cobra.flux_analysis.pfba(model) | 446 solution = cobra.flux_analysis.pfba(model) |
338 fluxes = solution.fluxes | 447 fluxes = solution.fluxes |
339 df_pFBA.loc[0,[rxn._id for rxn in model.reactions]] = fluxes.tolist() | 448 df_pFBA.loc[0,[rxn.id for rxn in model.reactions]] = fluxes.tolist() |
340 df_pFBA = df_pFBA.reset_index(drop=True) | 449 df_pFBA = df_pFBA.reset_index(drop=True) |
341 df_pFBA.index = [model_name] | 450 df_pFBA.index = [model_name] |
342 df_pFBA = df_pFBA.astype(float).round(6) | 451 df_pFBA = df_pFBA.astype(float).round(6) |
343 elif(output_type == "FVA"): | 452 elif(output_type == "FVA"): |
344 fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8) | 453 fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=ARGS.perc_opt, processes=1).round(8) |
345 columns = [] | 454 columns = [] |
346 for rxn in fva.index.to_list(): | 455 for rxn in fva.index.to_list(): |
347 columns.append(rxn + "_min") | 456 columns.append(rxn + "_min") |
348 columns.append(rxn + "_max") | 457 columns.append(rxn + "_max") |
349 df_FVA= pd.DataFrame(columns = columns) | 458 df_FVA= pd.DataFrame(columns = columns) |
352 df_FVA.loc[0, index_rxn+ "_max"] = fva.loc[index_rxn, "maximum"] | 461 df_FVA.loc[0, index_rxn+ "_max"] = fva.loc[index_rxn, "maximum"] |
353 df_FVA = df_FVA.reset_index(drop=True) | 462 df_FVA = df_FVA.reset_index(drop=True) |
354 df_FVA.index = [model_name] | 463 df_FVA.index = [model_name] |
355 df_FVA = df_FVA.astype(float).round(6) | 464 df_FVA = df_FVA.astype(float).round(6) |
356 elif(output_type == "sensitivity"): | 465 elif(output_type == "sensitivity"): |
357 model.objective = "Biomass" | |
358 solution_original = model.optimize().objective_value | 466 solution_original = model.optimize().objective_value |
359 reactions = model.reactions | 467 reactions = model.reactions |
360 single = cobra.flux_analysis.single_reaction_deletion(model) | 468 single = cobra.flux_analysis.single_reaction_deletion(model) |
361 newRow = [] | 469 newRow = [] |
362 df_sensitivity = pd.DataFrame(columns = [rxn.id for rxn in reactions], index = [model_name]) | 470 df_sensitivity = pd.DataFrame(columns = [rxn.id for rxn in reactions], index = [model_name]) |
365 df_sensitivity.loc[model_name] = newRow | 473 df_sensitivity.loc[model_name] = newRow |
366 df_sensitivity = df_sensitivity.astype(float).round(6) | 474 df_sensitivity = df_sensitivity.astype(float).round(6) |
367 return df_pFBA, df_FVA, df_sensitivity | 475 return df_pFBA, df_FVA, df_sensitivity |
368 | 476 |
369 ############################# main ########################################### | 477 ############################# main ########################################### |
370 def main(args :List[str] = None) -> None: | 478 def main(args: List[str] = None) -> None: |
371 """ | 479 """ |
372 Initializes everything and sets the program in motion based on the fronted input arguments. | 480 Initialize and run sampling/analysis based on the frontend input arguments. |
373 | 481 |
374 Returns: | 482 Returns: |
375 None | 483 None |
376 """ | 484 """ |
377 | 485 |
378 num_processors = cpu_count() | 486 num_processors = max(1, cpu_count() - 1) |
379 | 487 |
380 global ARGS | 488 global ARGS |
381 ARGS = process_args(args) | 489 ARGS = process_args(args) |
382 | 490 |
383 if not os.path.exists(ARGS.output_path): | 491 if not os.path.exists(ARGS.output_path): |
384 os.makedirs(ARGS.output_path) | 492 os.makedirs(ARGS.output_path) |
385 | 493 |
386 model_type :utils.Model = ARGS.model_selector | 494 # --- Normalize inputs (the tool may pass comma-separated --input and either --name or --names) --- |
387 if model_type is utils.Model.Custom: | 495 ARGS.input_files = ARGS.input.split(",") if ARGS.input else [] |
388 model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext) | 496 ARGS.file_names = ARGS.name.split(",") |
497 # output types (required) -> list | |
498 ARGS.output_types = ARGS.output_type.split(",") if ARGS.output_type else [] | |
499 # optional analysis output types -> list or empty | |
500 ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") if ARGS.output_type_analysis else [] | |
501 | |
502 # Determine if sampling should be performed | |
503 if ARGS.sampling_enabled == "true": | |
504 perform_sampling = True | |
389 else: | 505 else: |
390 model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) | 506 perform_sampling = False |
391 | 507 |
392 #Set solver verbosity to 1 to see warning and error messages only. | 508 print("=== INPUT FILES ===") |
393 model.solver.configuration.verbosity = 1 | 509 print(f"{ARGS.input_files}") |
394 | 510 print(f"{ARGS.file_names}") |
395 ARGS.bounds = ARGS.input.split(",") | 511 print(f"{ARGS.output_type}") |
396 ARGS.bounds_name = ARGS.names.split(",") | 512 print(f"{ARGS.output_types}") |
397 ARGS.output_types = ARGS.output_type.split(",") | 513 print(f"{ARGS.output_type_analysis}") |
398 ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") | 514 print(f"Sampling enabled: {perform_sampling} (n_samples: {ARGS.n_samples})") |
399 | 515 |
400 | 516 if ARGS.model_and_bounds == "True": |
401 results = Parallel(n_jobs=num_processors)(delayed(model_sampler)(model, bounds_path, cell_name) for bounds_path, cell_name in zip(ARGS.bounds, ARGS.bounds_name)) | 517 # MODE 1: Model + bounds (separate files) |
402 | 518 print("=== MODE 1: Model + Bounds (separate files) ===") |
403 all_mean = pd.concat([result[0] for result in results], ignore_index=False) | 519 |
404 all_median = pd.concat([result[1] for result in results], ignore_index=False) | 520 # Load base model |
405 all_quantiles = pd.concat([result[2] for result in results], ignore_index=False) | 521 if not ARGS.model_upload: |
406 | 522 sys.exit("Error: model_upload is required for Mode 1") |
407 if("mean" in ARGS.output_types): | 523 |
408 all_mean = all_mean.fillna(0.0) | 524 base_model = model_utils.build_cobra_model_from_csv(ARGS.model_upload) |
409 all_mean = all_mean.sort_index() | 525 |
410 write_to_file(all_mean.T, "mean", True) | 526 validation = model_utils.validate_model(base_model) |
411 | 527 |
412 if("median" in ARGS.output_types): | 528 print("\n=== MODEL VALIDATION ===") |
413 all_median = all_median.fillna(0.0) | 529 for key, value in validation.items(): |
414 all_median = all_median.sort_index() | 530 print(f"{key}: {value}") |
415 write_to_file(all_median.T, "median", True) | 531 |
416 | 532 # Set solver verbosity to 1 to see warning and error messages only. |
417 if("quantiles" in ARGS.output_types): | 533 base_model.solver.configuration.verbosity = 1 |
418 all_quantiles = all_quantiles.fillna(0.0) | 534 |
419 all_quantiles = all_quantiles.sort_index() | 535 # Process each bounds file with the base model |
420 write_to_file(all_quantiles.T, "quantiles", True) | 536 results = Parallel(n_jobs=num_processors)( |
421 | 537 delayed(model_sampler_with_bounds)(base_model, bounds_file, cell_name) |
422 index_result = 3 | 538 for bounds_file, cell_name in zip(ARGS.input_files, ARGS.file_names) |
423 if("pFBA" in ARGS.output_type_analysis): | 539 ) |
540 | |
541 else: | |
542 # MODE 2: Multiple complete models | |
543 print("=== MODE 2: Multiple complete models ===") | |
544 | |
545 # Process each complete model file | |
546 results = Parallel(n_jobs=num_processors)( | |
547 delayed(perform_sampling_and_analysis)(model_utils.build_cobra_model_from_csv(model_file), cell_name) | |
548 for model_file, cell_name in zip(ARGS.input_files, ARGS.file_names) | |
549 ) | |
550 | |
551 # Handle sampling outputs (only if sampling was performed) | |
552 if perform_sampling: | |
553 print("=== PROCESSING SAMPLING RESULTS ===") | |
554 | |
555 all_mean = pd.concat([result[0] for result in results], ignore_index=False) | |
556 all_median = pd.concat([result[1] for result in results], ignore_index=False) | |
557 all_quantiles = pd.concat([result[2] for result in results], ignore_index=False) | |
558 | |
559 if "mean" in ARGS.output_types: | |
560 all_mean = all_mean.fillna(0.0) | |
561 all_mean = all_mean.sort_index() | |
562 write_to_file(all_mean.T, "mean", True) | |
563 | |
564 if "median" in ARGS.output_types: | |
565 all_median = all_median.fillna(0.0) | |
566 all_median = all_median.sort_index() | |
567 write_to_file(all_median.T, "median", True) | |
568 | |
569 if "quantiles" in ARGS.output_types: | |
570 all_quantiles = all_quantiles.fillna(0.0) | |
571 all_quantiles = all_quantiles.sort_index() | |
572 write_to_file(all_quantiles.T, "quantiles", True) | |
573 else: | |
574 print("=== SAMPLING SKIPPED (n_samples = 0 or sampling disabled) ===") | |
575 | |
576 # Handle optimization analysis outputs (always available) | |
577 print("=== PROCESSING OPTIMIZATION RESULTS ===") | |
578 | |
579 # Determine the starting index for optimization results | |
580 # If sampling was performed, optimization results start at index 3 | |
581 # If no sampling, optimization results start at index 0 | |
582 index_result = 3 if perform_sampling else 0 | |
583 | |
584 if "pFBA" in ARGS.output_type_analysis: | |
424 all_pFBA = pd.concat([result[index_result] for result in results], ignore_index=False) | 585 all_pFBA = pd.concat([result[index_result] for result in results], ignore_index=False) |
425 all_pFBA = all_pFBA.sort_index() | 586 all_pFBA = all_pFBA.sort_index() |
426 write_to_file(all_pFBA.T, "pFBA", True) | 587 write_to_file(all_pFBA.T, "pFBA", True) |
427 index_result+=1 | 588 index_result += 1 |
428 if("FVA" in ARGS.output_type_analysis): | 589 |
429 all_FVA= pd.concat([result[index_result] for result in results], ignore_index=False) | 590 if "FVA" in ARGS.output_type_analysis: |
591 all_FVA = pd.concat([result[index_result] for result in results], ignore_index=False) | |
430 all_FVA = all_FVA.sort_index() | 592 all_FVA = all_FVA.sort_index() |
431 write_to_file(all_FVA.T, "FVA", True) | 593 write_to_file(all_FVA.T, "FVA", True) |
432 index_result+=1 | 594 index_result += 1 |
433 if("sensitivity" in ARGS.output_type_analysis): | 595 |
596 if "sensitivity" in ARGS.output_type_analysis: | |
434 all_sensitivity = pd.concat([result[index_result] for result in results], ignore_index=False) | 597 all_sensitivity = pd.concat([result[index_result] for result in results], ignore_index=False) |
435 all_sensitivity = all_sensitivity.sort_index() | 598 all_sensitivity = all_sensitivity.sort_index() |
436 write_to_file(all_sensitivity.T, "sensitivity", True) | 599 write_to_file(all_sensitivity.T, "sensitivity", True) |
437 | 600 |
438 pass | 601 return |
439 | 602 |
440 ############################################################################## | 603 ############################################################################## |
441 if __name__ == "__main__": | 604 if __name__ == "__main__": |
442 main() | 605 main() |