comparison COBRAxy/src/ras_to_bounds.py @ 539:2fb97466e404 draft

Uploaded
author francesco_lapi
date Sat, 25 Oct 2025 14:55:13 +0000
parents
children fcdbc81feb45
comparison
equal deleted inserted replaced
538:fd53d42348bd 539:2fb97466e404
1 """
2 Apply RAS-based scaling to reaction bounds and optionally save updated models.
3
4 Workflow:
5 - Read one or more RAS matrices (patients/samples x reactions)
6 - Normalize and merge them, optionally adding class suffixes to sample IDs
7 - Build a COBRA model from a tabular CSV
8 - Run FVA to initialize bounds, then scale per-sample based on RAS values
9 - Save bounds per sample and optionally export updated models in chosen formats
10 """
11 import argparse
12 import utils.general_utils as utils
13 from typing import Optional, Dict, Set, List, Tuple, Union
14 import os
15 import numpy as np
16 import pandas as pd
17 import cobra
18 from cobra import Model
19 import sys
20 from joblib import Parallel, delayed, cpu_count
21 import utils.model_utils as modelUtils
22
23 ################################# process args ###############################
24 def process_args(args :List[str] = None) -> argparse.Namespace:
25 """
26 Processes command-line arguments.
27
28 Args:
29 args (list): List of command-line arguments.
30
31 Returns:
32 Namespace: An object containing parsed arguments.
33 """
34 parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
35 description = 'process some value\'s')
36
37
38 parser.add_argument("-mo", "--model_upload", type = str,
39 help = "path to input file with custom rules, if provided")
40
41 parser.add_argument('-ol', '--out_log',
42 help = "Output log")
43
44 parser.add_argument('-td', '--tool_dir',
45 type = str,
46 required = True,
47 help = 'your tool directory')
48
49 parser.add_argument('-ir', '--input_ras',
50 type=str,
51 required = False,
52 help = 'input ras')
53
54 parser.add_argument('-rn', '--name',
55 type=str,
56 help = 'ras class names')
57
58 parser.add_argument('-cc', '--cell_class',
59 type = str,
60 help = 'output of cell class')
61 parser.add_argument(
62 '-idop', '--output_path',
63 type = str,
64 default='ras_to_bounds/',
65 help = 'output path for maps')
66
67 parser.add_argument('-sm', '--save_models',
68 type=utils.Bool("save_models"),
69 default=False,
70 help = 'whether to save models with applied bounds')
71
72 parser.add_argument('-smp', '--save_models_path',
73 type = str,
74 default='saved_models/',
75 help = 'output path for saved models')
76
77 parser.add_argument('-smf', '--save_models_format',
78 type = str,
79 default='csv',
80 help = 'format for saved models (csv, xml, json, mat, yaml, tabular)')
81
82
83 ARGS = parser.parse_args(args)
84 return ARGS
85
86 ########################### warning ###########################################
87 def warning(s :str) -> None:
88 """
89 Log a warning message to an output log file and print it to the console.
90
91 Args:
92 s (str): The warning message to be logged and printed.
93
94 Returns:
95 None
96 """
97 if ARGS.out_log:
98 with open(ARGS.out_log, 'a') as log:
99 log.write(s + "\n\n")
100 print(s)
101
102 ############################ dataset input ####################################
103 def read_dataset(data :str, name :str) -> pd.DataFrame:
104 """
105 Read a dataset from a CSV file and return it as a pandas DataFrame.
106
107 Args:
108 data (str): Path to the CSV file containing the dataset.
109 name (str): Name of the dataset, used in error messages.
110
111 Returns:
112 pandas.DataFrame: DataFrame containing the dataset.
113
114 Raises:
115 pd.errors.EmptyDataError: If the CSV file is empty.
116 sys.exit: If the CSV file has the wrong format, the execution is aborted.
117 """
118 try:
119 dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python')
120 except pd.errors.EmptyDataError:
121 sys.exit('Execution aborted: wrong format of ' + name + '\n')
122 if len(dataset.columns) < 2:
123 sys.exit('Execution aborted: wrong format of ' + name + '\n')
124 return dataset
125
126
127 def apply_ras_bounds(bounds, ras_row):
128 """
129 Adjust the bounds of reactions in the model based on RAS values.
130
131 Args:
132 bounds (pd.DataFrame): Model bounds.
133 ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds.
134 Returns:
135 new_bounds (pd.DataFrame): integrated bounds.
136 """
137 new_bounds = bounds.copy()
138 for reaction in ras_row.index:
139 scaling_factor = ras_row[reaction]
140 if not np.isnan(scaling_factor):
141 lower_bound=bounds.loc[reaction, "lower_bound"]
142 upper_bound=bounds.loc[reaction, "upper_bound"]
143 valMax=float((upper_bound)*scaling_factor)
144 valMin=float((lower_bound)*scaling_factor)
145 if upper_bound!=0 and lower_bound==0:
146 new_bounds.loc[reaction, "upper_bound"] = valMax
147 if upper_bound==0 and lower_bound!=0:
148 new_bounds.loc[reaction, "lower_bound"] = valMin
149 if upper_bound!=0 and lower_bound!=0:
150 new_bounds.loc[reaction, "lower_bound"] = valMin
151 new_bounds.loc[reaction, "upper_bound"] = valMax
152 return new_bounds
153
154
155 def save_model(model, filename, output_folder, file_format='csv'):
156 """
157 Save a COBRA model to file in the specified format.
158
159 Args:
160 model (cobra.Model): The model to save.
161 filename (str): Base filename (without extension).
162 output_folder (str): Output directory.
163 file_format (str): File format ('xml', 'json', 'mat', 'yaml', 'tabular', 'csv').
164
165 Returns:
166 None
167 """
168 if not os.path.exists(output_folder):
169 os.makedirs(output_folder)
170
171 try:
172 if file_format == 'tabular' or file_format == 'csv':
173 # Special handling for tabular format using utils functions
174 filepath = os.path.join(output_folder, f"{filename}.csv")
175
176 # Use unified function for tabular export
177 merged = modelUtils.export_model_to_tabular(
178 model=model,
179 output_path=filepath,
180 include_objective=True
181 )
182
183 else:
184 # Standard COBRA formats
185 filepath = os.path.join(output_folder, f"{filename}.{file_format}")
186
187 if file_format == 'xml':
188 cobra.io.write_sbml_model(model, filepath)
189 elif file_format == 'json':
190 cobra.io.save_json_model(model, filepath)
191 elif file_format == 'mat':
192 cobra.io.save_matlab_model(model, filepath)
193 elif file_format == 'yaml':
194 cobra.io.save_yaml_model(model, filepath)
195 else:
196 raise ValueError(f"Unsupported format: {file_format}")
197
198 print(f"Model saved: {filepath}")
199
200 except Exception as e:
201 warning(f"Error saving model {filename}: {str(e)}")
202
203 def apply_bounds_to_model(model, bounds):
204 """
205 Apply bounds from a DataFrame to a COBRA model.
206
207 Args:
208 model (cobra.Model): The metabolic model to modify.
209 bounds (pd.DataFrame): DataFrame with reaction bounds.
210
211 Returns:
212 cobra.Model: Modified model with new bounds.
213 """
214 model_copy = model.copy()
215 for reaction_id in bounds.index:
216 try:
217 reaction = model_copy.reactions.get_by_id(reaction_id)
218 reaction.lower_bound = bounds.loc[reaction_id, "lower_bound"]
219 reaction.upper_bound = bounds.loc[reaction_id, "upper_bound"]
220 except KeyError:
221 # Reaction not found in model, skip
222 continue
223 return model_copy
224
225 def process_ras_cell(cellName, ras_row, model, rxns_ids, output_folder, save_models=False, save_models_path='saved_models/', save_models_format='csv'):
226 """
227 Process a single RAS cell, apply bounds, and save the bounds to a CSV file.
228
229 Args:
230 cellName (str): The name of the RAS cell (used for naming the output file).
231 ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds.
232 model (cobra.Model): The metabolic model to be modified.
233 rxns_ids (list of str): List of reaction IDs to which the scaling factors will be applied.
234 output_folder (str): Folder path where the output CSV file will be saved.
235 save_models (bool): Whether to save models with applied bounds.
236 save_models_path (str): Path where to save models.
237 save_models_format (str): Format for saved models.
238
239 Returns:
240 None
241 """
242 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"])
243 new_bounds = apply_ras_bounds(bounds, ras_row)
244 new_bounds.to_csv(output_folder + cellName + ".csv", sep='\t', index=True)
245
246 # Save model if requested
247 if save_models:
248 modified_model = apply_bounds_to_model(model, new_bounds)
249 save_model(modified_model, cellName, save_models_path, save_models_format)
250
251 return
252
253 def generate_bounds_model(model: cobra.Model, ras=None, output_folder='output/', save_models=False, save_models_path='saved_models/', save_models_format='csv') -> pd.DataFrame:
254 """
255 Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments.
256
257 Args:
258 model (cobra.Model): The metabolic model for which bounds will be generated.
259 ras (pd.DataFrame, optional): RAS pandas dataframe. Defaults to None.
260 output_folder (str, optional): Folder path where output CSV files will be saved. Defaults to 'output/'.
261 save_models (bool): Whether to save models with applied bounds.
262 save_models_path (str): Path where to save models.
263 save_models_format (str): Format for saved models.
264
265 Returns:
266 pd.DataFrame: DataFrame containing the bounds of reactions in the model.
267 """
268 rxns_ids = [rxn.id for rxn in model.reactions]
269
270 # Perform Flux Variability Analysis (FVA) on this medium
271 df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8)
272
273 # Set FVA bounds
274 for reaction in rxns_ids:
275 model.reactions.get_by_id(reaction).lower_bound = float(df_FVA.loc[reaction, "minimum"])
276 model.reactions.get_by_id(reaction).upper_bound = float(df_FVA.loc[reaction, "maximum"])
277
278 if ras is not None:
279 Parallel(n_jobs=cpu_count())(delayed(process_ras_cell)(
280 cellName, ras_row, model, rxns_ids, output_folder,
281 save_models, save_models_path, save_models_format
282 ) for cellName, ras_row in ras.iterrows())
283 else:
284 raise ValueError("RAS DataFrame is None. Cannot generate bounds without RAS data.")
285 return
286
287 ############################# main ###########################################
288 def main(args:List[str] = None) -> None:
289 """
290 Initialize and execute RAS-to-bounds pipeline based on the frontend input arguments.
291
292 Returns:
293 None
294 """
295 if not os.path.exists('ras_to_bounds'):
296 os.makedirs('ras_to_bounds')
297
298 global ARGS
299 ARGS = process_args(args)
300
301
302 ras_file_list = ARGS.input_ras.split(",")
303 ras_file_names = ARGS.name.split(",")
304 if len(ras_file_names) != len(set(ras_file_names)):
305 error_message = "Duplicated file names in the uploaded RAS matrices."
306 warning(error_message)
307 raise ValueError(error_message)
308
309 ras_class_names = []
310 for file in ras_file_names:
311 ras_class_names.append(file.rsplit(".", 1)[0])
312 ras_list = []
313 class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"])
314 for ras_matrix, ras_class_name in zip(ras_file_list, ras_class_names):
315 ras = read_dataset(ras_matrix, "ras dataset")
316 ras.replace("None", None, inplace=True)
317 ras.set_index("Reactions", drop=True, inplace=True)
318 ras = ras.T
319 ras = ras.astype(float)
320 if(len(ras_file_list)>1):
321 # Append class name to patient id (DataFrame index)
322 ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index]
323 else:
324 ras.index = [f"{idx}" for idx in ras.index]
325 ras_list.append(ras)
326 for patient_id in ras.index:
327 class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name]
328
329
330 # Concatenate all RAS DataFrames into a single DataFrame
331 ras_combined = pd.concat(ras_list, axis=0)
332 # Normalize RAS values column-wise by max RAS
333 ras_combined = ras_combined.div(ras_combined.max(axis=0))
334 ras_combined.dropna(axis=1, how='all', inplace=True)
335
336 model = modelUtils.build_cobra_model_from_csv(ARGS.model_upload)
337
338 validation = modelUtils.validate_model(model)
339
340 print("\n=== MODEL VALIDATION ===")
341 for key, value in validation.items():
342 print(f"{key}: {value}")
343
344
345 generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path,
346 save_models=ARGS.save_models, save_models_path=ARGS.save_models_path,
347 save_models_format=ARGS.save_models_format)
348 class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False)
349
350
351 return
352
353 ##############################################################################
354 if __name__ == "__main__":
355 main()