Mercurial > repos > bimib > cobraxy
comparison COBRAxy/ras_to_bounds_beta.py @ 407:6619f237aebe draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Mon, 08 Sep 2025 16:52:46 +0000 |
| parents | 187cee1a00e2 |
| children | f413b78d61bf |
comparison
equal
deleted
inserted
replaced
| 406:187cee1a00e2 | 407:6619f237aebe |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import utils.general_utils as utils | 2 import utils.general_utils as utils |
| 3 from typing import Optional, List | 3 from typing import Optional, Dict, Set, List, Tuple |
| 4 import os | 4 import os |
| 5 import numpy as np | 5 import numpy as np |
| 6 import pandas as pd | 6 import pandas as pd |
| 7 import cobra | 7 import cobra |
| 8 from cobra import Model, Reaction, Metabolite | |
| 9 import re | |
| 8 import sys | 10 import sys |
| 9 import csv | 11 import csv |
| 10 from joblib import Parallel, delayed, cpu_count | 12 from joblib import Parallel, delayed, cpu_count |
| 11 | 13 |
| 14 # , medium | |
| 15 | |
| 12 ################################# process args ############################### | 16 ################################# process args ############################### |
| 13 def process_args(args :List[str] = None) -> argparse.Namespace: | 17 def process_args(args :List[str] = None) -> argparse.Namespace: |
| 14 """ | 18 """ |
| 15 Processes command-line arguments. | 19 Processes command-line arguments. |
| 16 | 20 |
| 21 Namespace: An object containing parsed arguments. | 25 Namespace: An object containing parsed arguments. |
| 22 """ | 26 """ |
| 23 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | 27 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', |
| 24 description = 'process some value\'s') | 28 description = 'process some value\'s') |
| 25 | 29 |
| 26 parser.add_argument( | 30 |
| 27 '-ms', '--model_selector', | 31 parser.add_argument("-mo", "--model_upload", type = str, |
| 28 type = utils.Model, default = utils.Model.ENGRO2, choices = [utils.Model.ENGRO2, utils.Model.Custom], | |
| 29 help = 'chose which type of model you want use') | |
| 30 | |
| 31 parser.add_argument("-mo", "--model", type = str, | |
| 32 help = "path to input file with custom rules, if provided") | 32 help = "path to input file with custom rules, if provided") |
| 33 | |
| 34 parser.add_argument("-mn", "--model_name", type = str, help = "custom mode name") | |
| 35 | |
| 36 parser.add_argument( | |
| 37 '-mes', '--medium_selector', | |
| 38 default = "allOpen", | |
| 39 help = 'chose which type of medium you want use') | |
| 40 | 33 |
| 41 parser.add_argument("-meo", "--medium", type = str, | 34 parser.add_argument("-meo", "--medium", type = str, |
| 42 help = "path to input file with custom medium, if provided") | 35 help = "path to input file with custom medium, if provided") |
| 43 | 36 |
| 44 parser.add_argument('-ol', '--out_log', | 37 parser.add_argument('-ol', '--out_log', |
| 160 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) | 153 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) |
| 161 new_bounds = apply_ras_bounds(bounds, ras_row) | 154 new_bounds = apply_ras_bounds(bounds, ras_row) |
| 162 new_bounds.to_csv(output_folder + cellName + ".csv", sep='\t', index=True) | 155 new_bounds.to_csv(output_folder + cellName + ".csv", sep='\t', index=True) |
| 163 pass | 156 pass |
| 164 | 157 |
| 165 def generate_bounds(model: cobra.Model, medium: dict, ras=None, output_folder='output/') -> pd.DataFrame: | 158 def generate_bounds(model: cobra.Model, ras=None, output_folder='output/') -> pd.DataFrame: |
| 166 """ | 159 """ |
| 167 Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments. | 160 Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments. |
| 168 | 161 |
| 169 Args: | 162 Args: |
| 170 model (cobra.Model): The metabolic model for which bounds will be generated. | 163 model (cobra.Model): The metabolic model for which bounds will be generated. |
| 173 output_folder (str, optional): Folder path where output CSV files will be saved. Defaults to 'output/'. | 166 output_folder (str, optional): Folder path where output CSV files will be saved. Defaults to 'output/'. |
| 174 | 167 |
| 175 Returns: | 168 Returns: |
| 176 pd.DataFrame: DataFrame containing the bounds of reactions in the model. | 169 pd.DataFrame: DataFrame containing the bounds of reactions in the model. |
| 177 """ | 170 """ |
| 178 rxns_ids = [rxn.id for rxn in model.reactions] | 171 rxns_ids = [rxn.id for rxn in model.reactions] |
| 179 | |
| 180 # Set all reactions to zero in the medium | |
| 181 for rxn_id, _ in model.medium.items(): | |
| 182 model.reactions.get_by_id(rxn_id).lower_bound = float(0.0) | |
| 183 | |
| 184 # Set medium conditions | |
| 185 for reaction, value in medium.items(): | |
| 186 if value is not None: | |
| 187 model.reactions.get_by_id(reaction).lower_bound = -float(value) | |
| 188 | |
| 189 | 172 |
| 190 # Perform Flux Variability Analysis (FVA) on this medium | 173 # Perform Flux Variability Analysis (FVA) on this medium |
| 191 df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8) | 174 df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8) |
| 192 | 175 |
| 193 # Set FVA bounds | 176 # Set FVA bounds |
| 201 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) | 184 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) |
| 202 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) | 185 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) |
| 203 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) | 186 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) |
| 204 pass | 187 pass |
| 205 | 188 |
| 189 # TODO: VALUTARE QUALI DI QUESTE FUNZIONI METTERE IN UTILS.PY | |
| 190 def build_cobra_model_from_csv(csv_path: str, model_id: str = "ENGRO2_custom") -> cobra.Model: | |
| 191 """ | |
| 192 Costruisce un modello COBRApy a partire da un file CSV con i dati delle reazioni. | |
| 193 | |
| 194 Args: | |
| 195 csv_path: Path al file CSV (separato da tab) | |
| 196 model_id: ID del modello da creare | |
| 197 | |
| 198 Returns: | |
| 199 cobra.Model: Il modello COBRApy costruito | |
| 200 """ | |
| 201 | |
| 202 # Leggi i dati dal CSV | |
| 203 df = pd.read_csv(csv_path, sep='\t') | |
| 204 | |
| 205 # Crea il modello vuoto | |
| 206 model = Model(model_id) | |
| 207 | |
| 208 # Dict per tenere traccia di metaboliti e compartimenti | |
| 209 metabolites_dict = {} | |
| 210 compartments_dict = {} | |
| 211 | |
| 212 print(f"Costruendo modello da {len(df)} reazioni...") | |
| 213 | |
| 214 # Prima passata: estrai metaboliti e compartimenti dalle formule delle reazioni | |
| 215 for idx, row in df.iterrows(): | |
| 216 reaction_formula = str(row['Reaction']).strip() | |
| 217 if not reaction_formula or reaction_formula == 'nan': | |
| 218 continue | |
| 219 | |
| 220 # Estrai metaboliti dalla formula della reazione | |
| 221 metabolites = extract_metabolites_from_reaction(reaction_formula) | |
| 222 | |
| 223 for met_id in metabolites: | |
| 224 compartment = extract_compartment_from_metabolite(met_id) | |
| 225 | |
| 226 # Aggiungi compartimento se non esiste | |
| 227 if compartment not in compartments_dict: | |
| 228 compartments_dict[compartment] = compartment | |
| 229 | |
| 230 # Aggiungi metabolita se non esiste | |
| 231 if met_id not in metabolites_dict: | |
| 232 metabolites_dict[met_id] = Metabolite( | |
| 233 id=met_id, | |
| 234 compartment=compartment, | |
| 235 name=met_id.replace(f"_{compartment}", "").replace("__", "_") | |
| 236 ) | |
| 237 | |
| 238 # Aggiungi compartimenti al modello | |
| 239 model.compartments = compartments_dict | |
| 240 | |
| 241 # Aggiungi metaboliti al modello | |
| 242 model.add_metabolites(list(metabolites_dict.values())) | |
| 243 | |
| 244 print(f"Aggiunti {len(metabolites_dict)} metaboliti e {len(compartments_dict)} compartimenti") | |
| 245 | |
| 246 # Seconda passata: aggiungi le reazioni | |
| 247 reactions_added = 0 | |
| 248 reactions_skipped = 0 | |
| 249 | |
| 250 for idx, row in df.iterrows(): | |
| 251 try: | |
| 252 reaction_id = str(row['ReactionID']).strip() | |
| 253 if reaction_id == 'EX_thbpt_e': | |
| 254 print('qui') | |
| 255 print(reaction_id) | |
| 256 print(str(row['Reaction']).strip()) | |
| 257 print('qui') | |
| 258 reaction_formula = str(row['Reaction']).strip() | |
| 259 | |
| 260 # Salta reazioni senza formula | |
| 261 if not reaction_formula or reaction_formula == 'nan': | |
| 262 reactions_skipped += 1 | |
| 263 continue | |
| 264 | |
| 265 # Crea la reazione | |
| 266 reaction = Reaction(reaction_id) | |
| 267 reaction.name = reaction_id | |
| 268 | |
| 269 # Imposta bounds | |
| 270 reaction.lower_bound = float(row['lower_bound']) if pd.notna(row['lower_bound']) else -1000.0 | |
| 271 reaction.upper_bound = float(row['upper_bound']) if pd.notna(row['upper_bound']) else 1000.0 | |
| 272 | |
| 273 # Aggiungi gene rule se presente | |
| 274 if pd.notna(row['Rule']) and str(row['Rule']).strip(): | |
| 275 reaction.gene_reaction_rule = str(row['Rule']).strip() | |
| 276 | |
| 277 # Parse della formula della reazione | |
| 278 try: | |
| 279 parse_reaction_formula(reaction, reaction_formula, metabolites_dict) | |
| 280 except Exception as e: | |
| 281 print(f"Errore nel parsing della reazione {reaction_id}: {e}") | |
| 282 reactions_skipped += 1 | |
| 283 continue | |
| 284 | |
| 285 # Aggiungi la reazione al modello | |
| 286 model.add_reactions([reaction]) | |
| 287 reactions_added += 1 | |
| 288 | |
| 289 except Exception as e: | |
| 290 print(f"Errore nell'aggiungere la reazione {reaction_id}: {e}") | |
| 291 reactions_skipped += 1 | |
| 292 continue | |
| 293 | |
| 294 print(f"Aggiunte {reactions_added} reazioni, saltate {reactions_skipped} reazioni") | |
| 295 | |
| 296 # Imposta l'obiettivo di biomassa | |
| 297 set_biomass_objective(model) | |
| 298 | |
| 299 # Imposta il medium | |
| 300 set_medium_from_data(model, df) | |
| 301 | |
| 302 print(f"Modello completato: {len(model.reactions)} reazioni, {len(model.metabolites)} metaboliti") | |
| 303 | |
| 304 return model | |
| 305 | |
| 306 | |
| 307 # Estrae tutti gli ID metaboliti nella formula (gestisce prefissi numerici + underscore) | |
| 308 def extract_metabolites_from_reaction(reaction_formula: str) -> Set[str]: | |
| 309 """ | |
| 310 Estrae gli ID dei metaboliti da una formula di reazione. | |
| 311 Pattern robusto: cattura token che terminano con _<compartimento> (es. _c, _m, _e) | |
| 312 e permette che comincino con cifre o underscore. | |
| 313 """ | |
| 314 metabolites = set() | |
| 315 # coefficiente opzionale seguito da un token che termina con _<letters> | |
| 316 pattern = r'(?:\d+(?:\.\d+)?\s+)?([A-Za-z0-9_]+_[a-z]+)' | |
| 317 matches = re.findall(pattern, reaction_formula) | |
| 318 metabolites.update(matches) | |
| 319 return metabolites | |
| 320 | |
| 321 | |
| 322 def extract_compartment_from_metabolite(metabolite_id: str) -> str: | |
| 323 """ | |
| 324 Estrae il compartimento dall'ID del metabolita. | |
| 325 """ | |
| 326 # Il compartimento รจ solitamente l'ultima lettera dopo l'underscore | |
| 327 if '_' in metabolite_id: | |
| 328 return metabolite_id.split('_')[-1] | |
| 329 return 'c' # default cytoplasm | |
| 330 | |
| 331 | |
| 332 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]): | |
| 333 """ | |
| 334 Parsa una formula di reazione e imposta i metaboliti con i loro coefficienti. | |
| 335 """ | |
| 336 | |
| 337 if reaction.id == 'EX_thbpt_e': | |
| 338 print(reaction.id) | |
| 339 print(formula) | |
| 340 # Dividi in parte sinistra e destra | |
| 341 if '<=>' in formula: | |
| 342 left, right = formula.split('<=>') | |
| 343 reversible = True | |
| 344 elif '<--' in formula: | |
| 345 left, right = formula.split('<--') | |
| 346 reversible = False | |
| 347 left, right = left, right | |
| 348 elif '-->' in formula: | |
| 349 left, right = formula.split('-->') | |
| 350 reversible = False | |
| 351 elif '<-' in formula: | |
| 352 left, right = formula.split('<-') | |
| 353 reversible = False | |
| 354 left, right = left, right | |
| 355 else: | |
| 356 raise ValueError(f"Formato reazione non riconosciuto: {formula}") | |
| 357 | |
| 358 # Parse dei metaboliti e coefficienti | |
| 359 reactants = parse_metabolites_side(left.strip()) | |
| 360 products = parse_metabolites_side(right.strip()) | |
| 361 | |
| 362 # Aggiungi metaboliti alla reazione | |
| 363 metabolites_to_add = {} | |
| 364 | |
| 365 # Reagenti (coefficienti negativi) | |
| 366 for met_id, coeff in reactants.items(): | |
| 367 if met_id in metabolites_dict: | |
| 368 metabolites_to_add[metabolites_dict[met_id]] = -coeff | |
| 369 | |
| 370 # Prodotti (coefficienti positivi) | |
| 371 for met_id, coeff in products.items(): | |
| 372 if met_id in metabolites_dict: | |
| 373 metabolites_to_add[metabolites_dict[met_id]] = coeff | |
| 374 | |
| 375 reaction.add_metabolites(metabolites_to_add) | |
| 376 | |
| 377 | |
| 378 def parse_metabolites_side(side_str: str) -> Dict[str, float]: | |
| 379 """ | |
| 380 Parsa un lato della reazione per estrarre metaboliti e coefficienti. | |
| 381 """ | |
| 382 metabolites = {} | |
| 383 if not side_str or side_str.strip() == '': | |
| 384 return metabolites | |
| 385 | |
| 386 terms = side_str.split('+') | |
| 387 for term in terms: | |
| 388 term = term.strip() | |
| 389 if not term: | |
| 390 continue | |
| 391 | |
| 392 # pattern allineato: coefficiente opzionale + id che termina con _<compartimento> | |
| 393 match = re.match(r'(?:(\d+\.?\d*)\s+)?([A-Za-z0-9_]+_[a-z]+)', term) | |
| 394 if match: | |
| 395 coeff_str, met_id = match.groups() | |
| 396 coeff = float(coeff_str) if coeff_str else 1.0 | |
| 397 metabolites[met_id] = coeff | |
| 398 | |
| 399 return metabolites | |
| 400 | |
| 401 | |
| 402 | |
| 403 def set_biomass_objective(model: Model): | |
| 404 """ | |
| 405 Imposta la reazione di biomassa come obiettivo. | |
| 406 """ | |
| 407 biomass_reactions = [r for r in model.reactions if 'biomass' in r.id.lower()] | |
| 408 | |
| 409 if biomass_reactions: | |
| 410 model.objective = biomass_reactions[0].id | |
| 411 print(f"Obiettivo impostato su: {biomass_reactions[0].id}") | |
| 412 else: | |
| 413 print("Nessuna reazione di biomassa trovata") | |
| 414 | |
| 415 | |
| 416 def set_medium_from_data(model: Model, df: pd.DataFrame): | |
| 417 """ | |
| 418 Imposta il medium basato sulla colonna InMedium. | |
| 419 """ | |
| 420 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist() | |
| 421 | |
| 422 medium_dict = {} | |
| 423 for rxn_id in medium_reactions: | |
| 424 if rxn_id in [r.id for r in model.reactions]: | |
| 425 reaction = model.reactions.get_by_id(rxn_id) | |
| 426 if reaction.lower_bound < 0: # Solo reazioni di uptake | |
| 427 medium_dict[rxn_id] = abs(reaction.lower_bound) | |
| 428 | |
| 429 if medium_dict: | |
| 430 model.medium = medium_dict | |
| 431 print(f"Medium impostato con {len(medium_dict)} componenti") | |
| 432 | |
| 433 | |
| 434 def validate_model(model: Model) -> Dict[str, any]: | |
| 435 """ | |
| 436 Valida il modello e fornisce statistiche di base. | |
| 437 """ | |
| 438 validation = { | |
| 439 'num_reactions': len(model.reactions), | |
| 440 'num_metabolites': len(model.metabolites), | |
| 441 'num_genes': len(model.genes), | |
| 442 'num_compartments': len(model.compartments), | |
| 443 'objective': str(model.objective), | |
| 444 'medium_size': len(model.medium), | |
| 445 'reversible_reactions': len([r for r in model.reactions if r.reversibility]), | |
| 446 'exchange_reactions': len([r for r in model.reactions if r.id.startswith('EX_')]), | |
| 447 } | |
| 448 | |
| 449 try: | |
| 450 # Test di crescita | |
| 451 solution = model.optimize() | |
| 452 validation['growth_rate'] = solution.objective_value | |
| 453 validation['status'] = solution.status | |
| 454 except Exception as e: | |
| 455 validation['growth_rate'] = None | |
| 456 validation['status'] = f"Error: {e}" | |
| 457 | |
| 458 return validation | |
| 206 | 459 |
| 207 | 460 |
| 208 ############################# main ########################################### | 461 ############################# main ########################################### |
| 209 def main(args:List[str] = None) -> None: | 462 def main(args:List[str] = None) -> None: |
| 210 """ | 463 """ |
| 255 ras_combined = ras_combined.div(ras_combined.max(axis=0)) | 508 ras_combined = ras_combined.div(ras_combined.max(axis=0)) |
| 256 ras_combined.dropna(axis=1, how='all', inplace=True) | 509 ras_combined.dropna(axis=1, how='all', inplace=True) |
| 257 | 510 |
| 258 | 511 |
| 259 | 512 |
| 260 model_type :utils.Model = ARGS.model_selector | 513 #model_type :utils.Model = ARGS.model_selector |
| 261 if model_type is utils.Model.Custom: | 514 #if model_type is utils.Model.Custom: |
| 262 model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext) | 515 # model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext) |
| 263 else: | 516 #else: |
| 264 model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) | 517 # model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) |
| 265 | 518 |
| 266 if(ARGS.medium_selector == "Custom"): | 519 # TODO LOAD MODEL FROM UPLOAD |
| 267 medium = read_dataset(ARGS.medium, "medium dataset") | 520 |
| 268 medium.set_index(medium.columns[0], inplace=True) | 521 model = build_cobra_model_from_csv(ARGS.model_upload) |
| 269 medium = medium.astype(float) | 522 |
| 270 medium = medium[medium.columns[0]].to_dict() | 523 validation = validate_model(model) |
| 271 else: | 524 |
| 272 df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0) | 525 print("\n=== VALIDAZIONE MODELLO ===") |
| 273 ARGS.medium_selector = ARGS.medium_selector.replace("_", " ") | 526 for key, value in validation.items(): |
| 274 medium = df_mediums[[ARGS.medium_selector]] | 527 print(f"{key}: {value}") |
| 275 medium = medium[ARGS.medium_selector].to_dict() | 528 |
| 529 #if(ARGS.medium_selector == "Custom"): | |
| 530 # medium = read_dataset(ARGS.medium, "medium dataset") | |
| 531 # medium.set_index(medium.columns[0], inplace=True) | |
| 532 # medium = medium.astype(float) | |
| 533 # medium = medium[medium.columns[0]].to_dict() | |
| 534 #else: | |
| 535 # df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0) | |
| 536 # ARGS.medium_selector = ARGS.medium_selector.replace("_", " ") | |
| 537 # medium = df_mediums[[ARGS.medium_selector]] | |
| 538 # medium = medium[ARGS.medium_selector].to_dict() | |
| 276 | 539 |
| 277 if(ARGS.ras_selector == True): | 540 if(ARGS.ras_selector == True): |
| 278 generate_bounds(model, medium, ras = ras_combined, output_folder=ARGS.output_path) | 541 generate_bounds(model, ras = ras_combined, output_folder=ARGS.output_path) |
| 279 class_assignments.to_csv(ARGS.cell_class, sep = '\t', index = False) | 542 class_assignments.to_csv(ARGS.cell_class, sep = '\t', index = False) |
| 280 else: | 543 else: |
| 281 generate_bounds(model, medium, output_folder=ARGS.output_path) | 544 generate_bounds(model, output_folder=ARGS.output_path) |
| 282 | 545 |
| 283 pass | 546 pass |
| 284 | 547 |
| 285 ############################################################################## | 548 ############################################################################## |
| 286 if __name__ == "__main__": | 549 if __name__ == "__main__": |
