Mercurial > repos > bimib > cobraxy
comparison COBRAxy/ras_to_bounds_beta.py @ 408:f413b78d61bf draft
Uploaded
author | francesco_lapi |
---|---|
date | Mon, 08 Sep 2025 17:12:35 +0000 |
parents | 6619f237aebe |
children | 6b015d3184ab |
comparison
equal
deleted
inserted
replaced
407:6619f237aebe | 408:f413b78d61bf |
---|---|
183 else: | 183 else: |
184 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) | 184 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) |
185 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) | 185 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) |
186 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) | 186 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) |
187 pass | 187 pass |
188 | |
189 # TODO: VALUTARE QUALI DI QUESTE FUNZIONI METTERE IN UTILS.PY | |
190 def build_cobra_model_from_csv(csv_path: str, model_id: str = "ENGRO2_custom") -> cobra.Model: | |
191 """ | |
192 Costruisce un modello COBRApy a partire da un file CSV con i dati delle reazioni. | |
193 | |
194 Args: | |
195 csv_path: Path al file CSV (separato da tab) | |
196 model_id: ID del modello da creare | |
197 | |
198 Returns: | |
199 cobra.Model: Il modello COBRApy costruito | |
200 """ | |
201 | |
202 # Leggi i dati dal CSV | |
203 df = pd.read_csv(csv_path, sep='\t') | |
204 | |
205 # Crea il modello vuoto | |
206 model = Model(model_id) | |
207 | |
208 # Dict per tenere traccia di metaboliti e compartimenti | |
209 metabolites_dict = {} | |
210 compartments_dict = {} | |
211 | |
212 print(f"Costruendo modello da {len(df)} reazioni...") | |
213 | |
214 # Prima passata: estrai metaboliti e compartimenti dalle formule delle reazioni | |
215 for idx, row in df.iterrows(): | |
216 reaction_formula = str(row['Reaction']).strip() | |
217 if not reaction_formula or reaction_formula == 'nan': | |
218 continue | |
219 | |
220 # Estrai metaboliti dalla formula della reazione | |
221 metabolites = extract_metabolites_from_reaction(reaction_formula) | |
222 | |
223 for met_id in metabolites: | |
224 compartment = extract_compartment_from_metabolite(met_id) | |
225 | |
226 # Aggiungi compartimento se non esiste | |
227 if compartment not in compartments_dict: | |
228 compartments_dict[compartment] = compartment | |
229 | |
230 # Aggiungi metabolita se non esiste | |
231 if met_id not in metabolites_dict: | |
232 metabolites_dict[met_id] = Metabolite( | |
233 id=met_id, | |
234 compartment=compartment, | |
235 name=met_id.replace(f"_{compartment}", "").replace("__", "_") | |
236 ) | |
237 | |
238 # Aggiungi compartimenti al modello | |
239 model.compartments = compartments_dict | |
240 | |
241 # Aggiungi metaboliti al modello | |
242 model.add_metabolites(list(metabolites_dict.values())) | |
243 | |
244 print(f"Aggiunti {len(metabolites_dict)} metaboliti e {len(compartments_dict)} compartimenti") | |
245 | |
246 # Seconda passata: aggiungi le reazioni | |
247 reactions_added = 0 | |
248 reactions_skipped = 0 | |
249 | |
250 for idx, row in df.iterrows(): | |
251 try: | |
252 reaction_id = str(row['ReactionID']).strip() | |
253 if reaction_id == 'EX_thbpt_e': | |
254 print('qui') | |
255 print(reaction_id) | |
256 print(str(row['Reaction']).strip()) | |
257 print('qui') | |
258 reaction_formula = str(row['Reaction']).strip() | |
259 | |
260 # Salta reazioni senza formula | |
261 if not reaction_formula or reaction_formula == 'nan': | |
262 reactions_skipped += 1 | |
263 continue | |
264 | |
265 # Crea la reazione | |
266 reaction = Reaction(reaction_id) | |
267 reaction.name = reaction_id | |
268 | |
269 # Imposta bounds | |
270 reaction.lower_bound = float(row['lower_bound']) if pd.notna(row['lower_bound']) else -1000.0 | |
271 reaction.upper_bound = float(row['upper_bound']) if pd.notna(row['upper_bound']) else 1000.0 | |
272 | |
273 # Aggiungi gene rule se presente | |
274 if pd.notna(row['Rule']) and str(row['Rule']).strip(): | |
275 reaction.gene_reaction_rule = str(row['Rule']).strip() | |
276 | |
277 # Parse della formula della reazione | |
278 try: | |
279 parse_reaction_formula(reaction, reaction_formula, metabolites_dict) | |
280 except Exception as e: | |
281 print(f"Errore nel parsing della reazione {reaction_id}: {e}") | |
282 reactions_skipped += 1 | |
283 continue | |
284 | |
285 # Aggiungi la reazione al modello | |
286 model.add_reactions([reaction]) | |
287 reactions_added += 1 | |
288 | |
289 except Exception as e: | |
290 print(f"Errore nell'aggiungere la reazione {reaction_id}: {e}") | |
291 reactions_skipped += 1 | |
292 continue | |
293 | |
294 print(f"Aggiunte {reactions_added} reazioni, saltate {reactions_skipped} reazioni") | |
295 | |
296 # Imposta l'obiettivo di biomassa | |
297 set_biomass_objective(model) | |
298 | |
299 # Imposta il medium | |
300 set_medium_from_data(model, df) | |
301 | |
302 print(f"Modello completato: {len(model.reactions)} reazioni, {len(model.metabolites)} metaboliti") | |
303 | |
304 return model | |
305 | |
306 | |
307 # Estrae tutti gli ID metaboliti nella formula (gestisce prefissi numerici + underscore) | |
308 def extract_metabolites_from_reaction(reaction_formula: str) -> Set[str]: | |
309 """ | |
310 Estrae gli ID dei metaboliti da una formula di reazione. | |
311 Pattern robusto: cattura token che terminano con _<compartimento> (es. _c, _m, _e) | |
312 e permette che comincino con cifre o underscore. | |
313 """ | |
314 metabolites = set() | |
315 # coefficiente opzionale seguito da un token che termina con _<letters> | |
316 pattern = r'(?:\d+(?:\.\d+)?\s+)?([A-Za-z0-9_]+_[a-z]+)' | |
317 matches = re.findall(pattern, reaction_formula) | |
318 metabolites.update(matches) | |
319 return metabolites | |
320 | |
321 | |
322 def extract_compartment_from_metabolite(metabolite_id: str) -> str: | |
323 """ | |
324 Estrae il compartimento dall'ID del metabolita. | |
325 """ | |
326 # Il compartimento รจ solitamente l'ultima lettera dopo l'underscore | |
327 if '_' in metabolite_id: | |
328 return metabolite_id.split('_')[-1] | |
329 return 'c' # default cytoplasm | |
330 | |
331 | |
332 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]): | |
333 """ | |
334 Parsa una formula di reazione e imposta i metaboliti con i loro coefficienti. | |
335 """ | |
336 | |
337 if reaction.id == 'EX_thbpt_e': | |
338 print(reaction.id) | |
339 print(formula) | |
340 # Dividi in parte sinistra e destra | |
341 if '<=>' in formula: | |
342 left, right = formula.split('<=>') | |
343 reversible = True | |
344 elif '<--' in formula: | |
345 left, right = formula.split('<--') | |
346 reversible = False | |
347 left, right = left, right | |
348 elif '-->' in formula: | |
349 left, right = formula.split('-->') | |
350 reversible = False | |
351 elif '<-' in formula: | |
352 left, right = formula.split('<-') | |
353 reversible = False | |
354 left, right = left, right | |
355 else: | |
356 raise ValueError(f"Formato reazione non riconosciuto: {formula}") | |
357 | |
358 # Parse dei metaboliti e coefficienti | |
359 reactants = parse_metabolites_side(left.strip()) | |
360 products = parse_metabolites_side(right.strip()) | |
361 | |
362 # Aggiungi metaboliti alla reazione | |
363 metabolites_to_add = {} | |
364 | |
365 # Reagenti (coefficienti negativi) | |
366 for met_id, coeff in reactants.items(): | |
367 if met_id in metabolites_dict: | |
368 metabolites_to_add[metabolites_dict[met_id]] = -coeff | |
369 | |
370 # Prodotti (coefficienti positivi) | |
371 for met_id, coeff in products.items(): | |
372 if met_id in metabolites_dict: | |
373 metabolites_to_add[metabolites_dict[met_id]] = coeff | |
374 | |
375 reaction.add_metabolites(metabolites_to_add) | |
376 | |
377 | |
378 def parse_metabolites_side(side_str: str) -> Dict[str, float]: | |
379 """ | |
380 Parsa un lato della reazione per estrarre metaboliti e coefficienti. | |
381 """ | |
382 metabolites = {} | |
383 if not side_str or side_str.strip() == '': | |
384 return metabolites | |
385 | |
386 terms = side_str.split('+') | |
387 for term in terms: | |
388 term = term.strip() | |
389 if not term: | |
390 continue | |
391 | |
392 # pattern allineato: coefficiente opzionale + id che termina con _<compartimento> | |
393 match = re.match(r'(?:(\d+\.?\d*)\s+)?([A-Za-z0-9_]+_[a-z]+)', term) | |
394 if match: | |
395 coeff_str, met_id = match.groups() | |
396 coeff = float(coeff_str) if coeff_str else 1.0 | |
397 metabolites[met_id] = coeff | |
398 | |
399 return metabolites | |
400 | |
401 | |
402 | |
403 def set_biomass_objective(model: Model): | |
404 """ | |
405 Imposta la reazione di biomassa come obiettivo. | |
406 """ | |
407 biomass_reactions = [r for r in model.reactions if 'biomass' in r.id.lower()] | |
408 | |
409 if biomass_reactions: | |
410 model.objective = biomass_reactions[0].id | |
411 print(f"Obiettivo impostato su: {biomass_reactions[0].id}") | |
412 else: | |
413 print("Nessuna reazione di biomassa trovata") | |
414 | |
415 | |
416 def set_medium_from_data(model: Model, df: pd.DataFrame): | |
417 """ | |
418 Imposta il medium basato sulla colonna InMedium. | |
419 """ | |
420 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist() | |
421 | |
422 medium_dict = {} | |
423 for rxn_id in medium_reactions: | |
424 if rxn_id in [r.id for r in model.reactions]: | |
425 reaction = model.reactions.get_by_id(rxn_id) | |
426 if reaction.lower_bound < 0: # Solo reazioni di uptake | |
427 medium_dict[rxn_id] = abs(reaction.lower_bound) | |
428 | |
429 if medium_dict: | |
430 model.medium = medium_dict | |
431 print(f"Medium impostato con {len(medium_dict)} componenti") | |
432 | |
433 | |
434 def validate_model(model: Model) -> Dict[str, any]: | |
435 """ | |
436 Valida il modello e fornisce statistiche di base. | |
437 """ | |
438 validation = { | |
439 'num_reactions': len(model.reactions), | |
440 'num_metabolites': len(model.metabolites), | |
441 'num_genes': len(model.genes), | |
442 'num_compartments': len(model.compartments), | |
443 'objective': str(model.objective), | |
444 'medium_size': len(model.medium), | |
445 'reversible_reactions': len([r for r in model.reactions if r.reversibility]), | |
446 'exchange_reactions': len([r for r in model.reactions if r.id.startswith('EX_')]), | |
447 } | |
448 | |
449 try: | |
450 # Test di crescita | |
451 solution = model.optimize() | |
452 validation['growth_rate'] = solution.objective_value | |
453 validation['status'] = solution.status | |
454 except Exception as e: | |
455 validation['growth_rate'] = None | |
456 validation['status'] = f"Error: {e}" | |
457 | |
458 return validation | |
459 | |
460 | 188 |
461 ############################# main ########################################### | 189 ############################# main ########################################### |
462 def main(args:List[str] = None) -> None: | 190 def main(args:List[str] = None) -> None: |
463 """ | 191 """ |
464 Initializes everything and sets the program in motion based on the fronted input arguments. | 192 Initializes everything and sets the program in motion based on the fronted input arguments. |
516 #else: | 244 #else: |
517 # model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) | 245 # model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) |
518 | 246 |
519 # TODO LOAD MODEL FROM UPLOAD | 247 # TODO LOAD MODEL FROM UPLOAD |
520 | 248 |
521 model = build_cobra_model_from_csv(ARGS.model_upload) | 249 model = utils.build_cobra_model_from_csv(ARGS.model_upload) |
522 | 250 |
523 validation = validate_model(model) | 251 validation = utils.validate_model(model) |
524 | 252 |
525 print("\n=== VALIDAZIONE MODELLO ===") | 253 print("\n=== VALIDAZIONE MODELLO ===") |
526 for key, value in validation.items(): | 254 for key, value in validation.items(): |
527 print(f"{key}: {value}") | 255 print(f"{key}: {value}") |
528 | 256 |