Mercurial > repos > bimib > cobraxy
comparison COBRAxy/ras_to_bounds_beta.py @ 408:f413b78d61bf draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Mon, 08 Sep 2025 17:12:35 +0000 |
| parents | 6619f237aebe |
| children | 6b015d3184ab |
comparison
equal
deleted
inserted
replaced
| 407:6619f237aebe | 408:f413b78d61bf |
|---|---|
| 183 else: | 183 else: |
| 184 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) | 184 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) |
| 185 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) | 185 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) |
| 186 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) | 186 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) |
| 187 pass | 187 pass |
| 188 | |
| 189 # TODO: VALUTARE QUALI DI QUESTE FUNZIONI METTERE IN UTILS.PY | |
| 190 def build_cobra_model_from_csv(csv_path: str, model_id: str = "ENGRO2_custom") -> cobra.Model: | |
| 191 """ | |
| 192 Costruisce un modello COBRApy a partire da un file CSV con i dati delle reazioni. | |
| 193 | |
| 194 Args: | |
| 195 csv_path: Path al file CSV (separato da tab) | |
| 196 model_id: ID del modello da creare | |
| 197 | |
| 198 Returns: | |
| 199 cobra.Model: Il modello COBRApy costruito | |
| 200 """ | |
| 201 | |
| 202 # Leggi i dati dal CSV | |
| 203 df = pd.read_csv(csv_path, sep='\t') | |
| 204 | |
| 205 # Crea il modello vuoto | |
| 206 model = Model(model_id) | |
| 207 | |
| 208 # Dict per tenere traccia di metaboliti e compartimenti | |
| 209 metabolites_dict = {} | |
| 210 compartments_dict = {} | |
| 211 | |
| 212 print(f"Costruendo modello da {len(df)} reazioni...") | |
| 213 | |
| 214 # Prima passata: estrai metaboliti e compartimenti dalle formule delle reazioni | |
| 215 for idx, row in df.iterrows(): | |
| 216 reaction_formula = str(row['Reaction']).strip() | |
| 217 if not reaction_formula or reaction_formula == 'nan': | |
| 218 continue | |
| 219 | |
| 220 # Estrai metaboliti dalla formula della reazione | |
| 221 metabolites = extract_metabolites_from_reaction(reaction_formula) | |
| 222 | |
| 223 for met_id in metabolites: | |
| 224 compartment = extract_compartment_from_metabolite(met_id) | |
| 225 | |
| 226 # Aggiungi compartimento se non esiste | |
| 227 if compartment not in compartments_dict: | |
| 228 compartments_dict[compartment] = compartment | |
| 229 | |
| 230 # Aggiungi metabolita se non esiste | |
| 231 if met_id not in metabolites_dict: | |
| 232 metabolites_dict[met_id] = Metabolite( | |
| 233 id=met_id, | |
| 234 compartment=compartment, | |
| 235 name=met_id.replace(f"_{compartment}", "").replace("__", "_") | |
| 236 ) | |
| 237 | |
| 238 # Aggiungi compartimenti al modello | |
| 239 model.compartments = compartments_dict | |
| 240 | |
| 241 # Aggiungi metaboliti al modello | |
| 242 model.add_metabolites(list(metabolites_dict.values())) | |
| 243 | |
| 244 print(f"Aggiunti {len(metabolites_dict)} metaboliti e {len(compartments_dict)} compartimenti") | |
| 245 | |
| 246 # Seconda passata: aggiungi le reazioni | |
| 247 reactions_added = 0 | |
| 248 reactions_skipped = 0 | |
| 249 | |
| 250 for idx, row in df.iterrows(): | |
| 251 try: | |
| 252 reaction_id = str(row['ReactionID']).strip() | |
| 253 if reaction_id == 'EX_thbpt_e': | |
| 254 print('qui') | |
| 255 print(reaction_id) | |
| 256 print(str(row['Reaction']).strip()) | |
| 257 print('qui') | |
| 258 reaction_formula = str(row['Reaction']).strip() | |
| 259 | |
| 260 # Salta reazioni senza formula | |
| 261 if not reaction_formula or reaction_formula == 'nan': | |
| 262 reactions_skipped += 1 | |
| 263 continue | |
| 264 | |
| 265 # Crea la reazione | |
| 266 reaction = Reaction(reaction_id) | |
| 267 reaction.name = reaction_id | |
| 268 | |
| 269 # Imposta bounds | |
| 270 reaction.lower_bound = float(row['lower_bound']) if pd.notna(row['lower_bound']) else -1000.0 | |
| 271 reaction.upper_bound = float(row['upper_bound']) if pd.notna(row['upper_bound']) else 1000.0 | |
| 272 | |
| 273 # Aggiungi gene rule se presente | |
| 274 if pd.notna(row['Rule']) and str(row['Rule']).strip(): | |
| 275 reaction.gene_reaction_rule = str(row['Rule']).strip() | |
| 276 | |
| 277 # Parse della formula della reazione | |
| 278 try: | |
| 279 parse_reaction_formula(reaction, reaction_formula, metabolites_dict) | |
| 280 except Exception as e: | |
| 281 print(f"Errore nel parsing della reazione {reaction_id}: {e}") | |
| 282 reactions_skipped += 1 | |
| 283 continue | |
| 284 | |
| 285 # Aggiungi la reazione al modello | |
| 286 model.add_reactions([reaction]) | |
| 287 reactions_added += 1 | |
| 288 | |
| 289 except Exception as e: | |
| 290 print(f"Errore nell'aggiungere la reazione {reaction_id}: {e}") | |
| 291 reactions_skipped += 1 | |
| 292 continue | |
| 293 | |
| 294 print(f"Aggiunte {reactions_added} reazioni, saltate {reactions_skipped} reazioni") | |
| 295 | |
| 296 # Imposta l'obiettivo di biomassa | |
| 297 set_biomass_objective(model) | |
| 298 | |
| 299 # Imposta il medium | |
| 300 set_medium_from_data(model, df) | |
| 301 | |
| 302 print(f"Modello completato: {len(model.reactions)} reazioni, {len(model.metabolites)} metaboliti") | |
| 303 | |
| 304 return model | |
| 305 | |
| 306 | |
| 307 # Estrae tutti gli ID metaboliti nella formula (gestisce prefissi numerici + underscore) | |
| 308 def extract_metabolites_from_reaction(reaction_formula: str) -> Set[str]: | |
| 309 """ | |
| 310 Estrae gli ID dei metaboliti da una formula di reazione. | |
| 311 Pattern robusto: cattura token che terminano con _<compartimento> (es. _c, _m, _e) | |
| 312 e permette che comincino con cifre o underscore. | |
| 313 """ | |
| 314 metabolites = set() | |
| 315 # coefficiente opzionale seguito da un token che termina con _<letters> | |
| 316 pattern = r'(?:\d+(?:\.\d+)?\s+)?([A-Za-z0-9_]+_[a-z]+)' | |
| 317 matches = re.findall(pattern, reaction_formula) | |
| 318 metabolites.update(matches) | |
| 319 return metabolites | |
| 320 | |
| 321 | |
| 322 def extract_compartment_from_metabolite(metabolite_id: str) -> str: | |
| 323 """ | |
| 324 Estrae il compartimento dall'ID del metabolita. | |
| 325 """ | |
| 326 # Il compartimento รจ solitamente l'ultima lettera dopo l'underscore | |
| 327 if '_' in metabolite_id: | |
| 328 return metabolite_id.split('_')[-1] | |
| 329 return 'c' # default cytoplasm | |
| 330 | |
| 331 | |
| 332 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]): | |
| 333 """ | |
| 334 Parsa una formula di reazione e imposta i metaboliti con i loro coefficienti. | |
| 335 """ | |
| 336 | |
| 337 if reaction.id == 'EX_thbpt_e': | |
| 338 print(reaction.id) | |
| 339 print(formula) | |
| 340 # Dividi in parte sinistra e destra | |
| 341 if '<=>' in formula: | |
| 342 left, right = formula.split('<=>') | |
| 343 reversible = True | |
| 344 elif '<--' in formula: | |
| 345 left, right = formula.split('<--') | |
| 346 reversible = False | |
| 347 left, right = left, right | |
| 348 elif '-->' in formula: | |
| 349 left, right = formula.split('-->') | |
| 350 reversible = False | |
| 351 elif '<-' in formula: | |
| 352 left, right = formula.split('<-') | |
| 353 reversible = False | |
| 354 left, right = left, right | |
| 355 else: | |
| 356 raise ValueError(f"Formato reazione non riconosciuto: {formula}") | |
| 357 | |
| 358 # Parse dei metaboliti e coefficienti | |
| 359 reactants = parse_metabolites_side(left.strip()) | |
| 360 products = parse_metabolites_side(right.strip()) | |
| 361 | |
| 362 # Aggiungi metaboliti alla reazione | |
| 363 metabolites_to_add = {} | |
| 364 | |
| 365 # Reagenti (coefficienti negativi) | |
| 366 for met_id, coeff in reactants.items(): | |
| 367 if met_id in metabolites_dict: | |
| 368 metabolites_to_add[metabolites_dict[met_id]] = -coeff | |
| 369 | |
| 370 # Prodotti (coefficienti positivi) | |
| 371 for met_id, coeff in products.items(): | |
| 372 if met_id in metabolites_dict: | |
| 373 metabolites_to_add[metabolites_dict[met_id]] = coeff | |
| 374 | |
| 375 reaction.add_metabolites(metabolites_to_add) | |
| 376 | |
| 377 | |
| 378 def parse_metabolites_side(side_str: str) -> Dict[str, float]: | |
| 379 """ | |
| 380 Parsa un lato della reazione per estrarre metaboliti e coefficienti. | |
| 381 """ | |
| 382 metabolites = {} | |
| 383 if not side_str or side_str.strip() == '': | |
| 384 return metabolites | |
| 385 | |
| 386 terms = side_str.split('+') | |
| 387 for term in terms: | |
| 388 term = term.strip() | |
| 389 if not term: | |
| 390 continue | |
| 391 | |
| 392 # pattern allineato: coefficiente opzionale + id che termina con _<compartimento> | |
| 393 match = re.match(r'(?:(\d+\.?\d*)\s+)?([A-Za-z0-9_]+_[a-z]+)', term) | |
| 394 if match: | |
| 395 coeff_str, met_id = match.groups() | |
| 396 coeff = float(coeff_str) if coeff_str else 1.0 | |
| 397 metabolites[met_id] = coeff | |
| 398 | |
| 399 return metabolites | |
| 400 | |
| 401 | |
| 402 | |
| 403 def set_biomass_objective(model: Model): | |
| 404 """ | |
| 405 Imposta la reazione di biomassa come obiettivo. | |
| 406 """ | |
| 407 biomass_reactions = [r for r in model.reactions if 'biomass' in r.id.lower()] | |
| 408 | |
| 409 if biomass_reactions: | |
| 410 model.objective = biomass_reactions[0].id | |
| 411 print(f"Obiettivo impostato su: {biomass_reactions[0].id}") | |
| 412 else: | |
| 413 print("Nessuna reazione di biomassa trovata") | |
| 414 | |
| 415 | |
| 416 def set_medium_from_data(model: Model, df: pd.DataFrame): | |
| 417 """ | |
| 418 Imposta il medium basato sulla colonna InMedium. | |
| 419 """ | |
| 420 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist() | |
| 421 | |
| 422 medium_dict = {} | |
| 423 for rxn_id in medium_reactions: | |
| 424 if rxn_id in [r.id for r in model.reactions]: | |
| 425 reaction = model.reactions.get_by_id(rxn_id) | |
| 426 if reaction.lower_bound < 0: # Solo reazioni di uptake | |
| 427 medium_dict[rxn_id] = abs(reaction.lower_bound) | |
| 428 | |
| 429 if medium_dict: | |
| 430 model.medium = medium_dict | |
| 431 print(f"Medium impostato con {len(medium_dict)} componenti") | |
| 432 | |
| 433 | |
| 434 def validate_model(model: Model) -> Dict[str, any]: | |
| 435 """ | |
| 436 Valida il modello e fornisce statistiche di base. | |
| 437 """ | |
| 438 validation = { | |
| 439 'num_reactions': len(model.reactions), | |
| 440 'num_metabolites': len(model.metabolites), | |
| 441 'num_genes': len(model.genes), | |
| 442 'num_compartments': len(model.compartments), | |
| 443 'objective': str(model.objective), | |
| 444 'medium_size': len(model.medium), | |
| 445 'reversible_reactions': len([r for r in model.reactions if r.reversibility]), | |
| 446 'exchange_reactions': len([r for r in model.reactions if r.id.startswith('EX_')]), | |
| 447 } | |
| 448 | |
| 449 try: | |
| 450 # Test di crescita | |
| 451 solution = model.optimize() | |
| 452 validation['growth_rate'] = solution.objective_value | |
| 453 validation['status'] = solution.status | |
| 454 except Exception as e: | |
| 455 validation['growth_rate'] = None | |
| 456 validation['status'] = f"Error: {e}" | |
| 457 | |
| 458 return validation | |
| 459 | |
| 460 | 188 |
| 461 ############################# main ########################################### | 189 ############################# main ########################################### |
| 462 def main(args:List[str] = None) -> None: | 190 def main(args:List[str] = None) -> None: |
| 463 """ | 191 """ |
| 464 Initializes everything and sets the program in motion based on the fronted input arguments. | 192 Initializes everything and sets the program in motion based on the fronted input arguments. |
| 516 #else: | 244 #else: |
| 517 # model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) | 245 # model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) |
| 518 | 246 |
| 519 # TODO LOAD MODEL FROM UPLOAD | 247 # TODO LOAD MODEL FROM UPLOAD |
| 520 | 248 |
| 521 model = build_cobra_model_from_csv(ARGS.model_upload) | 249 model = utils.build_cobra_model_from_csv(ARGS.model_upload) |
| 522 | 250 |
| 523 validation = validate_model(model) | 251 validation = utils.validate_model(model) |
| 524 | 252 |
| 525 print("\n=== VALIDAZIONE MODELLO ===") | 253 print("\n=== VALIDAZIONE MODELLO ===") |
| 526 for key, value in validation.items(): | 254 for key, value in validation.items(): |
| 527 print(f"{key}: {value}") | 255 print(f"{key}: {value}") |
| 528 | 256 |
