comparison COBRAxy/ras_to_bounds_beta.py @ 456:a6e45049c1b9 draft

Uploaded
author francesco_lapi
date Fri, 12 Sep 2025 17:28:45 +0000
parents 0485c4b1943d
children
comparison
equal deleted inserted replaced
455:4e2bc80764b6 456:a6e45049c1b9
1 """
2 Apply RAS-based scaling to reaction bounds and optionally save updated models.
3
4 Workflow:
5 - Read one or more RAS matrices (patients/samples x reactions)
6 - Normalize and merge them, optionally adding class suffixes to sample IDs
7 - Build a COBRA model from a tabular CSV
8 - Run FVA to initialize bounds, then scale per-sample based on RAS values
9 - Save bounds per sample and optionally export updated models in chosen formats
10 """
1 import argparse 11 import argparse
2 import utils.general_utils as utils 12 import utils.general_utils as utils
3 from typing import Optional, Dict, Set, List, Tuple, Union 13 from typing import Optional, Dict, Set, List, Tuple, Union
4 import os 14 import os
5 import numpy as np 15 import numpy as np
6 import pandas as pd 16 import pandas as pd
7 import cobra 17 import cobra
8 from cobra import Model, Reaction, Metabolite 18 from cobra import Model
9 import re
10 import sys 19 import sys
11 import csv
12 from joblib import Parallel, delayed, cpu_count 20 from joblib import Parallel, delayed, cpu_count
13 import utils.rule_parsing as rulesUtils
14 import utils.reaction_parsing as reactionUtils
15 import utils.model_utils as modelUtils 21 import utils.model_utils as modelUtils
16 22
17 ################################# process args ############################### 23 ################################# process args ###############################
18 def process_args(args :List[str] = None) -> argparse.Namespace: 24 def process_args(args :List[str] = None) -> argparse.Namespace:
19 """ 25 """
179 185
180 df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"]) 186 df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"])
181 df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"]) 187 df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"])
182 df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"}) 188 df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"})
183 df_medium = medium.rename(columns = {"reaction": "ReactionID"}) 189 df_medium = medium.rename(columns = {"reaction": "ReactionID"})
184 df_medium["InMedium"] = True # flag per indicare la presenza nel medium 190 df_medium["InMedium"] = True
185 191
186 merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer") 192 merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer")
187 merged = merged.merge(df_bounds, on = "ReactionID", how = "outer") 193 merged = merged.merge(df_bounds, on = "ReactionID", how = "outer")
188 194
189 # Add compartments only if they exist and model name is ENGRO2 195 # Add compartments only if they exist and model name is ENGRO2
262 # Save model if requested 268 # Save model if requested
263 if save_models: 269 if save_models:
264 modified_model = apply_bounds_to_model(model, new_bounds) 270 modified_model = apply_bounds_to_model(model, new_bounds)
265 save_model(modified_model, cellName, save_models_path, save_models_format) 271 save_model(modified_model, cellName, save_models_path, save_models_format)
266 272
267 pass 273 return
268 274
269 def generate_bounds_model(model: cobra.Model, ras=None, output_folder='output/', save_models=False, save_models_path='saved_models/', save_models_format='csv') -> pd.DataFrame: 275 def generate_bounds_model(model: cobra.Model, ras=None, output_folder='output/', save_models=False, save_models_path='saved_models/', save_models_format='csv') -> pd.DataFrame:
270 """ 276 """
271 Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments. 277 Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments.
272 278
296 cellName, ras_row, model, rxns_ids, output_folder, 302 cellName, ras_row, model, rxns_ids, output_folder,
297 save_models, save_models_path, save_models_format 303 save_models, save_models_path, save_models_format
298 ) for cellName, ras_row in ras.iterrows()) 304 ) for cellName, ras_row in ras.iterrows())
299 else: 305 else:
300 raise ValueError("RAS DataFrame is None. Cannot generate bounds without RAS data.") 306 raise ValueError("RAS DataFrame is None. Cannot generate bounds without RAS data.")
301 pass 307 return
302 308
303 ############################# main ########################################### 309 ############################# main ###########################################
304 def main(args:List[str] = None) -> None: 310 def main(args:List[str] = None) -> None:
305 """ 311 """
306 Initializes everything and sets the program in motion based on the fronted input arguments. 312 Initialize and execute RAS-to-bounds pipeline based on the frontend input arguments.
307 313
308 Returns: 314 Returns:
309 None 315 None
310 """ 316 """
311 if not os.path.exists('ras_to_bounds'): 317 if not os.path.exists('ras_to_bounds'):
319 ras_file_names = ARGS.name.split(",") 325 ras_file_names = ARGS.name.split(",")
320 if len(ras_file_names) != len(set(ras_file_names)): 326 if len(ras_file_names) != len(set(ras_file_names)):
321 error_message = "Duplicated file names in the uploaded RAS matrices." 327 error_message = "Duplicated file names in the uploaded RAS matrices."
322 warning(error_message) 328 warning(error_message)
323 raise ValueError(error_message) 329 raise ValueError(error_message)
324 pass 330
325 ras_class_names = [] 331 ras_class_names = []
326 for file in ras_file_names: 332 for file in ras_file_names:
327 ras_class_names.append(file.rsplit(".", 1)[0]) 333 ras_class_names.append(file.rsplit(".", 1)[0])
328 ras_list = [] 334 ras_list = []
329 class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"]) 335 class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"])
332 ras.replace("None", None, inplace=True) 338 ras.replace("None", None, inplace=True)
333 ras.set_index("Reactions", drop=True, inplace=True) 339 ras.set_index("Reactions", drop=True, inplace=True)
334 ras = ras.T 340 ras = ras.T
335 ras = ras.astype(float) 341 ras = ras.astype(float)
336 if(len(ras_file_list)>1): 342 if(len(ras_file_list)>1):
337 #append class name to patient id (dataframe index) 343 # Append class name to patient id (DataFrame index)
338 ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index] 344 ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index]
339 else: 345 else:
340 ras.index = [f"{idx}" for idx in ras.index] 346 ras.index = [f"{idx}" for idx in ras.index]
341 ras_list.append(ras) 347 ras_list.append(ras)
342 for patient_id in ras.index: 348 for patient_id in ras.index:
343 class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name] 349 class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name]
344 350
345 351
346 # Concatenate all ras DataFrames into a single DataFrame 352 # Concatenate all RAS DataFrames into a single DataFrame
347 ras_combined = pd.concat(ras_list, axis=0) 353 ras_combined = pd.concat(ras_list, axis=0)
348 # Normalize the RAS values by max RAS 354 # Normalize RAS values column-wise by max RAS
349 ras_combined = ras_combined.div(ras_combined.max(axis=0)) 355 ras_combined = ras_combined.div(ras_combined.max(axis=0))
350 ras_combined.dropna(axis=1, how='all', inplace=True) 356 ras_combined.dropna(axis=1, how='all', inplace=True)
351 357
352 model = modelUtils.build_cobra_model_from_csv(ARGS.model_upload) 358 model = modelUtils.build_cobra_model_from_csv(ARGS.model_upload)
353 359
354 validation = modelUtils.validate_model(model) 360 validation = modelUtils.validate_model(model)
355 361
356 print("\n=== VALIDAZIONE MODELLO ===") 362 print("\n=== MODEL VALIDATION ===")
357 for key, value in validation.items(): 363 for key, value in validation.items():
358 print(f"{key}: {value}") 364 print(f"{key}: {value}")
359 365
360 366
361 generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path, 367 generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path,
362 save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, 368 save_models=ARGS.save_models, save_models_path=ARGS.save_models_path,
363 save_models_format=ARGS.save_models_format) 369 save_models_format=ARGS.save_models_format)
364 class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False) 370 class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False)
365 371
366 372
367 pass 373 return
368 374
369 ############################################################################## 375 ##############################################################################
370 if __name__ == "__main__": 376 if __name__ == "__main__":
371 main() 377 main()