Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/general_utils.py @ 419:ed2c1f9e20ba draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Tue, 09 Sep 2025 09:08:17 +0000 |
| parents | 4a248b45273c |
| children | 4a385fdb9e58 |
comparison
equal
deleted
inserted
replaced
| 418:919b5b71a61c | 419:ed2c1f9e20ba |
|---|---|
| 702 | 702 |
| 703 | 703 |
| 704 def __str__(self) -> str: return self.value | 704 def __str__(self) -> str: return self.value |
| 705 | 705 |
| 706 | 706 |
| 707 def convert_genes(model,annotation): | |
| 708 from cobra.manipulation import rename_genes | |
| 709 model2=model.copy() | |
| 710 try: | |
| 711 dict_genes={gene.id:gene.notes[annotation] for gene in model2.genes} | |
| 712 except: | |
| 713 print("No annotation in gene dict!") | |
| 714 return -1 | |
| 715 rename_genes(model2,dict_genes) | |
| 716 | |
| 717 return model2 | |
| 718 | |
| 719 | |
| 720 def build_cobra_model_from_csv(csv_path: str, model_id: str = "new_model") -> cobra.Model: | |
| 721 """ | |
| 722 Costruisce un modello COBRApy a partire da un file CSV con i dati delle reazioni. | |
| 723 | |
| 724 Args: | |
| 725 csv_path: Path al file CSV (separato da tab) | |
| 726 model_id: ID del modello da creare | |
| 727 | |
| 728 Returns: | |
| 729 cobra.Model: Il modello COBRApy costruito | |
| 730 """ | |
| 731 | |
| 732 # Leggi i dati dal CSV | |
| 733 df = pd.read_csv(csv_path, sep='\t') | |
| 734 | |
| 735 # Crea il modello vuoto | |
| 736 model = cobraModel(model_id) | |
| 737 | |
| 738 # Dict per tenere traccia di metaboliti e compartimenti | |
| 739 metabolites_dict = {} | |
| 740 compartments_dict = {} | |
| 741 | |
| 742 print(f"Costruendo modello da {len(df)} reazioni...") | |
| 743 | |
| 744 # Prima passata: estrai metaboliti e compartimenti dalle formule delle reazioni | |
| 745 for idx, row in df.iterrows(): | |
| 746 reaction_formula = str(row['Reaction']).strip() | |
| 747 if not reaction_formula or reaction_formula == 'nan': | |
| 748 continue | |
| 749 | |
| 750 # Estrai metaboliti dalla formula della reazione | |
| 751 metabolites = extract_metabolites_from_reaction(reaction_formula) | |
| 752 | |
| 753 for met_id in metabolites: | |
| 754 compartment = extract_compartment_from_metabolite(met_id) | |
| 755 | |
| 756 # Aggiungi compartimento se non esiste | |
| 757 if compartment not in compartments_dict: | |
| 758 compartments_dict[compartment] = compartment | |
| 759 | |
| 760 # Aggiungi metabolita se non esiste | |
| 761 if met_id not in metabolites_dict: | |
| 762 metabolites_dict[met_id] = Metabolite( | |
| 763 id=met_id, | |
| 764 compartment=compartment, | |
| 765 name=met_id.replace(f"_{compartment}", "").replace("__", "_") | |
| 766 ) | |
| 767 | |
| 768 # Aggiungi compartimenti al modello | |
| 769 model.compartments = compartments_dict | |
| 770 | |
| 771 # Aggiungi metaboliti al modello | |
| 772 model.add_metabolites(list(metabolites_dict.values())) | |
| 773 | |
| 774 print(f"Aggiunti {len(metabolites_dict)} metaboliti e {len(compartments_dict)} compartimenti") | |
| 775 | |
| 776 # Seconda passata: aggiungi le reazioni | |
| 777 reactions_added = 0 | |
| 778 reactions_skipped = 0 | |
| 779 | |
| 780 for idx, row in df.iterrows(): | |
| 781 | |
| 782 reaction_id = str(row['ReactionID']).strip() | |
| 783 reaction_formula = str(row['Reaction']).strip() | |
| 784 | |
| 785 # Salta reazioni senza formula | |
| 786 if not reaction_formula or reaction_formula == 'nan': | |
| 787 raise ValueError(f"Formula della reazione mancante {reaction_id}") | |
| 788 | |
| 789 # Crea la reazione | |
| 790 reaction = Reaction(reaction_id) | |
| 791 reaction.name = reaction_id | |
| 792 | |
| 793 # Imposta bounds | |
| 794 reaction.lower_bound = float(row['lower_bound']) if pd.notna(row['lower_bound']) else -1000.0 | |
| 795 reaction.upper_bound = float(row['upper_bound']) if pd.notna(row['upper_bound']) else 1000.0 | |
| 796 | |
| 797 # Aggiungi gene rule se presente | |
| 798 if pd.notna(row['Rule']) and str(row['Rule']).strip(): | |
| 799 reaction.gene_reaction_rule = str(row['Rule']).strip() | |
| 800 | |
| 801 # Parse della formula della reazione | |
| 802 try: | |
| 803 parse_reaction_formula(reaction, reaction_formula, metabolites_dict) | |
| 804 except Exception as e: | |
| 805 print(f"Errore nel parsing della reazione {reaction_id}: {e}") | |
| 806 reactions_skipped += 1 | |
| 807 continue | |
| 808 | |
| 809 # Aggiungi la reazione al modello | |
| 810 model.add_reactions([reaction]) | |
| 811 reactions_added += 1 | |
| 812 | |
| 813 | |
| 814 print(f"Aggiunte {reactions_added} reazioni, saltate {reactions_skipped} reazioni") | |
| 815 | |
| 816 # Imposta l'obiettivo di biomassa | |
| 817 set_biomass_objective(model) | |
| 818 | |
| 819 # Imposta il medium | |
| 820 set_medium_from_data(model, df) | |
| 821 | |
| 822 print(f"Modello completato: {len(model.reactions)} reazioni, {len(model.metabolites)} metaboliti") | |
| 823 | |
| 824 return model | |
| 825 | |
| 826 | |
| 827 # Estrae tutti gli ID metaboliti nella formula (gestisce prefissi numerici + underscore) | |
| 828 def extract_metabolites_from_reaction(reaction_formula: str) -> Set[str]: | |
| 829 """ | |
| 830 Estrae gli ID dei metaboliti da una formula di reazione. | |
| 831 Pattern robusto: cattura token che terminano con _<compartimento> (es. _c, _m, _e) | |
| 832 e permette che comincino con cifre o underscore. | |
| 833 """ | |
| 834 metabolites = set() | |
| 835 # coefficiente opzionale seguito da un token che termina con _<letters> | |
| 836 pattern = r'(?:\d+(?:\.\d+)?\s+)?([A-Za-z0-9_]+_[a-z]+)' | |
| 837 matches = re.findall(pattern, reaction_formula) | |
| 838 metabolites.update(matches) | |
| 839 return metabolites | |
| 840 | |
| 841 | |
| 842 def extract_compartment_from_metabolite(metabolite_id: str) -> str: | |
| 843 """ | |
| 844 Estrae il compartimento dall'ID del metabolita. | |
| 845 """ | |
| 846 # Il compartimento รจ solitamente l'ultima lettera dopo l'underscore | |
| 847 if '_' in metabolite_id: | |
| 848 return metabolite_id.split('_')[-1] | |
| 849 return 'c' # default cytoplasm | |
| 850 | |
| 851 | |
| 852 def parse_reaction_formula(reaction: Reaction, formula: str, metabolites_dict: Dict[str, Metabolite]): | |
| 853 """ | |
| 854 Parsa una formula di reazione e imposta i metaboliti con i loro coefficienti. | |
| 855 """ | |
| 856 | |
| 857 if reaction.id == 'EX_thbpt_e': | |
| 858 print(reaction.id) | |
| 859 print(formula) | |
| 860 # Dividi in parte sinistra e destra | |
| 861 if '<=>' in formula: | |
| 862 left, right = formula.split('<=>') | |
| 863 reversible = True | |
| 864 elif '<--' in formula: | |
| 865 left, right = formula.split('<--') | |
| 866 reversible = False | |
| 867 left, right = left, right | |
| 868 elif '-->' in formula: | |
| 869 left, right = formula.split('-->') | |
| 870 reversible = False | |
| 871 elif '<-' in formula: | |
| 872 left, right = formula.split('<-') | |
| 873 reversible = False | |
| 874 left, right = left, right | |
| 875 else: | |
| 876 raise ValueError(f"Formato reazione non riconosciuto: {formula}") | |
| 877 | |
| 878 # Parse dei metaboliti e coefficienti | |
| 879 reactants = parse_metabolites_side(left.strip()) | |
| 880 products = parse_metabolites_side(right.strip()) | |
| 881 | |
| 882 # Aggiungi metaboliti alla reazione | |
| 883 metabolites_to_add = {} | |
| 884 | |
| 885 # Reagenti (coefficienti negativi) | |
| 886 for met_id, coeff in reactants.items(): | |
| 887 if met_id in metabolites_dict: | |
| 888 metabolites_to_add[metabolites_dict[met_id]] = -coeff | |
| 889 | |
| 890 # Prodotti (coefficienti positivi) | |
| 891 for met_id, coeff in products.items(): | |
| 892 if met_id in metabolites_dict: | |
| 893 metabolites_to_add[metabolites_dict[met_id]] = coeff | |
| 894 | |
| 895 reaction.add_metabolites(metabolites_to_add) | |
| 896 | |
| 897 | |
| 898 def parse_metabolites_side(side_str: str) -> Dict[str, float]: | |
| 899 """ | |
| 900 Parsa un lato della reazione per estrarre metaboliti e coefficienti. | |
| 901 """ | |
| 902 metabolites = {} | |
| 903 if not side_str or side_str.strip() == '': | |
| 904 return metabolites | |
| 905 | |
| 906 terms = side_str.split('+') | |
| 907 for term in terms: | |
| 908 term = term.strip() | |
| 909 if not term: | |
| 910 continue | |
| 911 | |
| 912 # pattern allineato: coefficiente opzionale + id che termina con _<compartimento> | |
| 913 match = re.match(r'(?:(\d+\.?\d*)\s+)?([A-Za-z0-9_]+_[a-z]+)', term) | |
| 914 if match: | |
| 915 coeff_str, met_id = match.groups() | |
| 916 coeff = float(coeff_str) if coeff_str else 1.0 | |
| 917 metabolites[met_id] = coeff | |
| 918 | |
| 919 return metabolites | |
| 920 | |
| 921 | |
| 922 | |
| 923 def set_biomass_objective(model: Model): | |
| 924 """ | |
| 925 Imposta la reazione di biomassa come obiettivo. | |
| 926 """ | |
| 927 biomass_reactions = [r for r in model.reactions if 'biomass' in r.id.lower()] | |
| 928 | |
| 929 if biomass_reactions: | |
| 930 model.objective = biomass_reactions[0].id | |
| 931 print(f"Obiettivo impostato su: {biomass_reactions[0].id}") | |
| 932 else: | |
| 933 print("Nessuna reazione di biomassa trovata") | |
| 934 | |
| 935 | |
| 936 def set_medium_from_data(model: Model, df: pd.DataFrame): | |
| 937 """ | |
| 938 Imposta il medium basato sulla colonna InMedium. | |
| 939 """ | |
| 940 medium_reactions = df[df['InMedium'] == True]['ReactionID'].tolist() | |
| 941 | |
| 942 medium_dict = {} | |
| 943 for rxn_id in medium_reactions: | |
| 944 if rxn_id in [r.id for r in model.reactions]: | |
| 945 reaction = model.reactions.get_by_id(rxn_id) | |
| 946 if reaction.lower_bound < 0: # Solo reazioni di uptake | |
| 947 medium_dict[rxn_id] = abs(reaction.lower_bound) | |
| 948 | |
| 949 if medium_dict: | |
| 950 model.medium = medium_dict | |
| 951 print(f"Medium impostato con {len(medium_dict)} componenti") | |
| 952 | |
| 953 | |
| 954 def validate_model(model: Model) -> Dict[str, any]: | |
| 955 """ | |
| 956 Valida il modello e fornisce statistiche di base. | |
| 957 """ | |
| 958 validation = { | |
| 959 'num_reactions': len(model.reactions), | |
| 960 'num_metabolites': len(model.metabolites), | |
| 961 'num_genes': len(model.genes), | |
| 962 'num_compartments': len(model.compartments), | |
| 963 'objective': str(model.objective), | |
| 964 'medium_size': len(model.medium), | |
| 965 'reversible_reactions': len([r for r in model.reactions if r.reversibility]), | |
| 966 'exchange_reactions': len([r for r in model.reactions if r.id.startswith('EX_')]), | |
| 967 } | |
| 968 | |
| 969 try: | |
| 970 # Test di crescita | |
| 971 solution = model.optimize() | |
| 972 validation['growth_rate'] = solution.objective_value | |
| 973 validation['status'] = solution.status | |
| 974 except Exception as e: | |
| 975 validation['growth_rate'] = None | |
| 976 validation['status'] = f"Error: {e}" | |
| 977 | |
| 978 return validation |
