comparison COBRAxy/ras_to_bounds_beta.py @ 408:f413b78d61bf draft

Uploaded
author francesco_lapi
date Mon, 08 Sep 2025 17:12:35 +0000
parents 6619f237aebe
children 6b015d3184ab
comparison
equal deleted inserted replaced
407:6619f237aebe 408:f413b78d61bf
183 else: 183 else:
184 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) 184 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"])
185 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) 185 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids))
186 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) 186 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True)
187 pass 187 pass
188
189 # TODO: VALUTARE QUALI DI QUESTE FUNZIONI METTERE IN UTILS.PY
190 def build_cobra_model_from_csv(csv_path: str, model_id: str = "ENGRO2_custom") -> cobra.Model:
191 """
192 Costruisce un modello COBRApy a partire da un file CSV con i dati delle reazioni.
193
194 Args:
195 csv_path: Path al file CSV (separato da tab)
196 model_id: ID del modello da creare
197
198 Returns:
199 cobra.Model: Il modello COBRApy costruito
200 """
201
202 # Leggi i dati dal CSV
203 df = pd.read_csv(csv_path, sep='\t')
204
205 # Crea il modello vuoto
206 model = Model(model_id)
207
208 # Dict per tenere traccia di metaboliti e compartimenti
209 metabolites_dict = {}
210 compartments_dict = {}
211
212 print(f"Costruendo modello da {len(df)} reazioni...")
213
214 # Prima passata: estrai metaboliti e compartimenti dalle formule delle reazioni
215 for idx, row in df.iterrows():
216 reaction_formula = str(row['Reaction']).strip()
217 if not reaction_formula or reaction_formula == 'nan':
218 continue
219
220 # Estrai metaboliti dalla formula della reazione
221 metabolites = extract_metabolites_from_reaction(reaction_formula)
222
223 for met_id in metabolites:
224 compartment = extract_compartment_from_metabolite(met_id)
225
226 # Aggiungi compartimento se non esiste
227 if compartment not in compartments_dict:
228 compartments_dict[compartment] = compartment
229
230 # Aggiungi metabolita se non esiste
231 if met_id not in metabolites_dict:
232 metabolites_dict[met_id] = Metabolite(
233 id=met_id,
234 compartment=compartment,
235 name=met_id.replace(f"_{compartment}", "").replace("__", "_")
236 )
237
238 # Aggiungi compartimenti al modello
239 model.compartments = compartments_dict
240
241 # Aggiungi metaboliti al modello
242 model.add_metabolites(list(metabolites_dict.values()))
243
244 print(f"Aggiunti {len(metabolites_dict)} metaboliti e {len(compartments_dict)} compartimenti")
245
246 # Seconda passata: aggiungi le reazioni
247 reactions_added = 0
248 reactions_skipped = 0
249
250 for idx, row in df.iterrows():
251 try:
252 reaction_id = str(row['ReactionID']).strip()
253 if reaction_id == 'EX_thbpt_e':
254 print('qui')
255 print(reaction_id)
256 print(str(row['Reaction']).strip())
257 print('qui')
258 reaction_formula = str(row['Reaction']).strip()
259
260 # Salta reazioni senza formula
261 if not reaction_formula or reaction_formula == 'nan':
262 reactions_skipped += 1
263 continue
264
265 # Crea la reazione
266 reaction = Reaction(reaction_id)
267 reaction.name = reaction_id
268
269 # Imposta bounds
270 reaction.lower_bound = float(row['lower_bound']) if pd.notna(row['lower_bound']) else -1000.0
271 reaction.upper_bound = float(row['upper_bound']) if pd.notna(row['upper_bound']) else 1000.0
272
273 # Aggiungi gene rule se presente
274 if pd.notna(row['Rule']) and str(row['Rule']).strip():
275 reaction.gene_reaction_rule = str(row['Rule']).strip()
276
277 # Parse della formula della reazione
278 try:
279 parse_reaction_formula(reaction, reaction_formula, metabolites_dict)
280 except Exception as e:
281 print(f"Errore nel parsing della reazione {reaction_id}: {e}")
282 reactions_skipped += 1
283 continue
284
285 # Aggiungi la reazione al modello
286 model.add_reactions([reaction])
287 reactions_added += 1
288
289 except Exception as e:
290 print(f"Errore nell'aggiungere la reazione {reaction_id}: {e}")
291 reactions_skipped += 1
292 continue
293
294 print(f"Aggiunte {reactions_added} reazioni, saltate {reactions_skipped} reazioni")
295
296 # Imposta l'obiettivo di biomassa
297 set_biomass_objective(model)
298
299 # Imposta il medium
300 set_medium_from_data(model, df)
301
302 print(f"Modello completato: {len(model.reactions)} reazioni, {len(model.metabolites)} metaboliti")
303
304 return model
305
306
307 # Estrae tutti gli ID metaboliti nella formula (gestisce prefissi numerici + underscore)
308 def extract_metabolites_from_reaction(reaction_formula: str) -> Set[str]:
309 """
310 Estrae gli ID dei metaboliti da una formula di reazione.
311 Pattern robusto: cattura token che terminano con _<compartimento> (es. _c, _m, _e)
312 e permette che comincino con cifre o underscore.
313 """
314 metabolites = set()
315 # coefficiente opzionale seguito da un token che termina con _<letters>
316 pattern = r'(?:\d+(?:\.\d+)?\s+)?([A-Za-z0-9_]+_[a-z]+)'
317 matches = re.findall(pattern, reaction_formula)
318 metabolites.update(matches)
319 return metabolites
320
321
322 def extract_compartment_from_metabolite(metabolite_id: str) -> str:
323 """
324 Estrae il compartimento dall'ID del metabolita.
325 """
326 # Il compartimento รจ solitamente l'ultima lettera dopo l'underscore
327 if '_' in metabolite_id:
328 return metabolite_id.split('_')[-1]
329 return 'c' # default cytoplasm
330
331
332 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]):
333 """
334 Parsa una formula di reazione e imposta i metaboliti con i loro coefficienti.
335 """
336
337 if reaction.id == 'EX_thbpt_e':
338 print(reaction.id)
339 print(formula)
340 # Dividi in parte sinistra e destra
341 if '<=>' in formula:
342 left, right = formula.split('<=>')
343 reversible = True
344 elif '<--' in formula:
345 left, right = formula.split('<--')
346 reversible = False
347 left, right = left, right
348 elif '-->' in formula:
349 left, right = formula.split('-->')
350 reversible = False
351 elif '<-' in formula:
352 left, right = formula.split('<-')
353 reversible = False
354 left, right = left, right
355 else:
356 raise ValueError(f"Formato reazione non riconosciuto: {formula}")
357
358 # Parse dei metaboliti e coefficienti
359 reactants = parse_metabolites_side(left.strip())
360 products = parse_metabolites_side(right.strip())
361
362 # Aggiungi metaboliti alla reazione
363 metabolites_to_add = {}
364
365 # Reagenti (coefficienti negativi)
366 for met_id, coeff in reactants.items():
367 if met_id in metabolites_dict:
368 metabolites_to_add[metabolites_dict[met_id]] = -coeff
369
370 # Prodotti (coefficienti positivi)
371 for met_id, coeff in products.items():
372 if met_id in metabolites_dict:
373 metabolites_to_add[metabolites_dict[met_id]] = coeff
374
375 reaction.add_metabolites(metabolites_to_add)
376
377
378 def parse_metabolites_side(side_str: str) -> Dict[str, float]:
379 """
380 Parsa un lato della reazione per estrarre metaboliti e coefficienti.
381 """
382 metabolites = {}
383 if not side_str or side_str.strip() == '':
384 return metabolites
385
386 terms = side_str.split('+')
387 for term in terms:
388 term = term.strip()
389 if not term:
390 continue
391
392 # pattern allineato: coefficiente opzionale + id che termina con _<compartimento>
393 match = re.match(r'(?:(\d+\.?\d*)\s+)?([A-Za-z0-9_]+_[a-z]+)', term)
394 if match:
395 coeff_str, met_id = match.groups()
396 coeff = float(coeff_str) if coeff_str else 1.0
397 metabolites[met_id] = coeff
398
399 return metabolites
400
401
402
403 def set_biomass_objective(model: Model):
404 """
405 Imposta la reazione di biomassa come obiettivo.
406 """
407 biomass_reactions = [r for r in model.reactions if 'biomass' in r.id.lower()]
408
409 if biomass_reactions:
410 model.objective = biomass_reactions[0].id
411 print(f"Obiettivo impostato su: {biomass_reactions[0].id}")
412 else:
413 print("Nessuna reazione di biomassa trovata")
414
415
416 def set_medium_from_data(model: Model, df: pd.DataFrame):
417 """
418 Imposta il medium basato sulla colonna InMedium.
419 """
420 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist()
421
422 medium_dict = {}
423 for rxn_id in medium_reactions:
424 if rxn_id in [r.id for r in model.reactions]:
425 reaction = model.reactions.get_by_id(rxn_id)
426 if reaction.lower_bound < 0: # Solo reazioni di uptake
427 medium_dict[rxn_id] = abs(reaction.lower_bound)
428
429 if medium_dict:
430 model.medium = medium_dict
431 print(f"Medium impostato con {len(medium_dict)} componenti")
432
433
434 def validate_model(model: Model) -> Dict[str, any]:
435 """
436 Valida il modello e fornisce statistiche di base.
437 """
438 validation = {
439 'num_reactions': len(model.reactions),
440 'num_metabolites': len(model.metabolites),
441 'num_genes': len(model.genes),
442 'num_compartments': len(model.compartments),
443 'objective': str(model.objective),
444 'medium_size': len(model.medium),
445 'reversible_reactions': len([r for r in model.reactions if r.reversibility]),
446 'exchange_reactions': len([r for r in model.reactions if r.id.startswith('EX_')]),
447 }
448
449 try:
450 # Test di crescita
451 solution = model.optimize()
452 validation['growth_rate'] = solution.objective_value
453 validation['status'] = solution.status
454 except Exception as e:
455 validation['growth_rate'] = None
456 validation['status'] = f"Error: {e}"
457
458 return validation
459
460 188
461 ############################# main ########################################### 189 ############################# main ###########################################
462 def main(args:List[str] = None) -> None: 190 def main(args:List[str] = None) -> None:
463 """ 191 """
464 Initializes everything and sets the program in motion based on the fronted input arguments. 192 Initializes everything and sets the program in motion based on the fronted input arguments.
516 #else: 244 #else:
517 # model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) 245 # model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir)
518 246
519 # TODO LOAD MODEL FROM UPLOAD 247 # TODO LOAD MODEL FROM UPLOAD
520 248
521 model = build_cobra_model_from_csv(ARGS.model_upload) 249 model = utils.build_cobra_model_from_csv(ARGS.model_upload)
522 250
523 validation = validate_model(model) 251 validation = utils.validate_model(model)
524 252
525 print("\n=== VALIDAZIONE MODELLO ===") 253 print("\n=== VALIDAZIONE MODELLO ===")
526 for key, value in validation.items(): 254 for key, value in validation.items():
527 print(f"{key}: {value}") 255 print(f"{key}: {value}")
528 256