Mercurial > repos > bimib > cobraxy
comparison COBRAxy/ras_to_bounds_beta.py @ 456:a6e45049c1b9 draft
Uploaded
author | francesco_lapi |
---|---|
date | Fri, 12 Sep 2025 17:28:45 +0000 |
parents | 0485c4b1943d |
children |
comparison
equal
deleted
inserted
replaced
455:4e2bc80764b6 | 456:a6e45049c1b9 |
---|---|
1 """ | |
2 Apply RAS-based scaling to reaction bounds and optionally save updated models. | |
3 | |
4 Workflow: | |
5 - Read one or more RAS matrices (patients/samples x reactions) | |
6 - Normalize and merge them, optionally adding class suffixes to sample IDs | |
7 - Build a COBRA model from a tabular CSV | |
8 - Run FVA to initialize bounds, then scale per-sample based on RAS values | |
9 - Save bounds per sample and optionally export updated models in chosen formats | |
10 """ | |
1 import argparse | 11 import argparse |
2 import utils.general_utils as utils | 12 import utils.general_utils as utils |
3 from typing import Optional, Dict, Set, List, Tuple, Union | 13 from typing import Optional, Dict, Set, List, Tuple, Union |
4 import os | 14 import os |
5 import numpy as np | 15 import numpy as np |
6 import pandas as pd | 16 import pandas as pd |
7 import cobra | 17 import cobra |
8 from cobra import Model, Reaction, Metabolite | 18 from cobra import Model |
9 import re | |
10 import sys | 19 import sys |
11 import csv | |
12 from joblib import Parallel, delayed, cpu_count | 20 from joblib import Parallel, delayed, cpu_count |
13 import utils.rule_parsing as rulesUtils | |
14 import utils.reaction_parsing as reactionUtils | |
15 import utils.model_utils as modelUtils | 21 import utils.model_utils as modelUtils |
16 | 22 |
17 ################################# process args ############################### | 23 ################################# process args ############################### |
18 def process_args(args :List[str] = None) -> argparse.Namespace: | 24 def process_args(args :List[str] = None) -> argparse.Namespace: |
19 """ | 25 """ |
179 | 185 |
180 df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"]) | 186 df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"]) |
181 df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"]) | 187 df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"]) |
182 df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"}) | 188 df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"}) |
183 df_medium = medium.rename(columns = {"reaction": "ReactionID"}) | 189 df_medium = medium.rename(columns = {"reaction": "ReactionID"}) |
184 df_medium["InMedium"] = True # flag per indicare la presenza nel medium | 190 df_medium["InMedium"] = True |
185 | 191 |
186 merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer") | 192 merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer") |
187 merged = merged.merge(df_bounds, on = "ReactionID", how = "outer") | 193 merged = merged.merge(df_bounds, on = "ReactionID", how = "outer") |
188 | 194 |
189 # Add compartments only if they exist and model name is ENGRO2 | 195 # Add compartments only if they exist and model name is ENGRO2 |
262 # Save model if requested | 268 # Save model if requested |
263 if save_models: | 269 if save_models: |
264 modified_model = apply_bounds_to_model(model, new_bounds) | 270 modified_model = apply_bounds_to_model(model, new_bounds) |
265 save_model(modified_model, cellName, save_models_path, save_models_format) | 271 save_model(modified_model, cellName, save_models_path, save_models_format) |
266 | 272 |
267 pass | 273 return |
268 | 274 |
269 def generate_bounds_model(model: cobra.Model, ras=None, output_folder='output/', save_models=False, save_models_path='saved_models/', save_models_format='csv') -> pd.DataFrame: | 275 def generate_bounds_model(model: cobra.Model, ras=None, output_folder='output/', save_models=False, save_models_path='saved_models/', save_models_format='csv') -> pd.DataFrame: |
270 """ | 276 """ |
271 Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments. | 277 Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments. |
272 | 278 |
296 cellName, ras_row, model, rxns_ids, output_folder, | 302 cellName, ras_row, model, rxns_ids, output_folder, |
297 save_models, save_models_path, save_models_format | 303 save_models, save_models_path, save_models_format |
298 ) for cellName, ras_row in ras.iterrows()) | 304 ) for cellName, ras_row in ras.iterrows()) |
299 else: | 305 else: |
300 raise ValueError("RAS DataFrame is None. Cannot generate bounds without RAS data.") | 306 raise ValueError("RAS DataFrame is None. Cannot generate bounds without RAS data.") |
301 pass | 307 return |
302 | 308 |
303 ############################# main ########################################### | 309 ############################# main ########################################### |
304 def main(args:List[str] = None) -> None: | 310 def main(args:List[str] = None) -> None: |
305 """ | 311 """ |
306 Initializes everything and sets the program in motion based on the fronted input arguments. | 312 Initialize and execute RAS-to-bounds pipeline based on the frontend input arguments. |
307 | 313 |
308 Returns: | 314 Returns: |
309 None | 315 None |
310 """ | 316 """ |
311 if not os.path.exists('ras_to_bounds'): | 317 if not os.path.exists('ras_to_bounds'): |
319 ras_file_names = ARGS.name.split(",") | 325 ras_file_names = ARGS.name.split(",") |
320 if len(ras_file_names) != len(set(ras_file_names)): | 326 if len(ras_file_names) != len(set(ras_file_names)): |
321 error_message = "Duplicated file names in the uploaded RAS matrices." | 327 error_message = "Duplicated file names in the uploaded RAS matrices." |
322 warning(error_message) | 328 warning(error_message) |
323 raise ValueError(error_message) | 329 raise ValueError(error_message) |
324 pass | 330 |
325 ras_class_names = [] | 331 ras_class_names = [] |
326 for file in ras_file_names: | 332 for file in ras_file_names: |
327 ras_class_names.append(file.rsplit(".", 1)[0]) | 333 ras_class_names.append(file.rsplit(".", 1)[0]) |
328 ras_list = [] | 334 ras_list = [] |
329 class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"]) | 335 class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"]) |
332 ras.replace("None", None, inplace=True) | 338 ras.replace("None", None, inplace=True) |
333 ras.set_index("Reactions", drop=True, inplace=True) | 339 ras.set_index("Reactions", drop=True, inplace=True) |
334 ras = ras.T | 340 ras = ras.T |
335 ras = ras.astype(float) | 341 ras = ras.astype(float) |
336 if(len(ras_file_list)>1): | 342 if(len(ras_file_list)>1): |
337 #append class name to patient id (dataframe index) | 343 # Append class name to patient id (DataFrame index) |
338 ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index] | 344 ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index] |
339 else: | 345 else: |
340 ras.index = [f"{idx}" for idx in ras.index] | 346 ras.index = [f"{idx}" for idx in ras.index] |
341 ras_list.append(ras) | 347 ras_list.append(ras) |
342 for patient_id in ras.index: | 348 for patient_id in ras.index: |
343 class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name] | 349 class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name] |
344 | 350 |
345 | 351 |
346 # Concatenate all ras DataFrames into a single DataFrame | 352 # Concatenate all RAS DataFrames into a single DataFrame |
347 ras_combined = pd.concat(ras_list, axis=0) | 353 ras_combined = pd.concat(ras_list, axis=0) |
348 # Normalize the RAS values by max RAS | 354 # Normalize RAS values column-wise by max RAS |
349 ras_combined = ras_combined.div(ras_combined.max(axis=0)) | 355 ras_combined = ras_combined.div(ras_combined.max(axis=0)) |
350 ras_combined.dropna(axis=1, how='all', inplace=True) | 356 ras_combined.dropna(axis=1, how='all', inplace=True) |
351 | 357 |
352 model = modelUtils.build_cobra_model_from_csv(ARGS.model_upload) | 358 model = modelUtils.build_cobra_model_from_csv(ARGS.model_upload) |
353 | 359 |
354 validation = modelUtils.validate_model(model) | 360 validation = modelUtils.validate_model(model) |
355 | 361 |
356 print("\n=== VALIDAZIONE MODELLO ===") | 362 print("\n=== MODEL VALIDATION ===") |
357 for key, value in validation.items(): | 363 for key, value in validation.items(): |
358 print(f"{key}: {value}") | 364 print(f"{key}: {value}") |
359 | 365 |
360 | 366 |
361 generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path, | 367 generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path, |
362 save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, | 368 save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, |
363 save_models_format=ARGS.save_models_format) | 369 save_models_format=ARGS.save_models_format) |
364 class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False) | 370 class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False) |
365 | 371 |
366 | 372 |
367 pass | 373 return |
368 | 374 |
369 ############################################################################## | 375 ############################################################################## |
370 if __name__ == "__main__": | 376 if __name__ == "__main__": |
371 main() | 377 main() |