Mercurial > repos > bimib > cobraxy
diff COBRAxy/utils/model_utils.py @ 418:919b5b71a61c draft
Uploaded
| author | francesco_lapi | 
|---|---|
| date | Tue, 09 Sep 2025 07:36:30 +0000 | 
| parents | |
| children | ed2c1f9e20ba | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COBRAxy/utils/model_utils.py Tue Sep 09 07:36:30 2025 +0000 @@ -0,0 +1,129 @@ +import os +import csv +import cobra +import pickle +import argparse +import pandas as pd +from typing import Optional, Tuple, Union, List, Dict +import utils.general_utils as utils +import utils.rule_parsing as rulesUtils + +################################- DATA GENERATION -################################ +ReactionId = str +def generate_rules(model: cobra.Model, *, asParsed = True) -> Union[Dict[ReactionId, rulesUtils.OpList], Dict[ReactionId, str]]: + """ + Generates a dictionary mapping reaction ids to rules from the model. + + Args: + model : the model to derive data from. + asParsed : if True parses the rules to an optimized runtime format, otherwise leaves them as strings. + + Returns: + Dict[ReactionId, rulesUtils.OpList] : the generated dictionary of parsed rules. + Dict[ReactionId, str] : the generated dictionary of raw rules. + """ + # Is the below approach convoluted? yes + # Ok but is it inefficient? probably + # Ok but at least I don't have to repeat the check at every rule (I'm clinically insane) + _ruleGetter = lambda reaction : reaction.gene_reaction_rule + ruleExtractor = (lambda reaction : + rulesUtils.parseRuleToNestedList(_ruleGetter(reaction))) if asParsed else _ruleGetter + + return { + reaction.id : ruleExtractor(reaction) + for reaction in model.reactions + if reaction.gene_reaction_rule } + +def generate_reactions(model :cobra.Model, *, asParsed = True) -> Dict[ReactionId, str]: + """ + Generates a dictionary mapping reaction ids to reaction formulas from the model. + + Args: + model : the model to derive data from. + asParsed : if True parses the reactions to an optimized runtime format, otherwise leaves them as they are. + + Returns: + Dict[ReactionId, str] : the generated dictionary. + """ + + unparsedReactions = { + reaction.id : reaction.reaction + for reaction in model.reactions + if reaction.reaction + } + + if not asParsed: return unparsedReactions + + return reactionUtils.create_reaction_dict(unparsedReactions) + +def get_medium(model:cobra.Model) -> pd.DataFrame: + trueMedium=[] + for r in model.reactions: + positiveCoeff=0 + for m in r.metabolites: + if r.get_coefficient(m.id)>0: + positiveCoeff=1; + if (positiveCoeff==0 and r.lower_bound<0): + trueMedium.append(r.id) + + df_medium = pd.DataFrame() + df_medium["reaction"] = trueMedium + return df_medium + +def generate_bounds(model:cobra.Model) -> pd.DataFrame: + + rxns = [] + for reaction in model.reactions: + rxns.append(reaction.id) + + bounds = pd.DataFrame(columns = ["lower_bound", "upper_bound"], index=rxns) + + for reaction in model.reactions: + bounds.loc[reaction.id] = [reaction.lower_bound, reaction.upper_bound] + return bounds + + + +def generate_compartments(model: cobra.Model) -> pd.DataFrame: + """ + Generates a DataFrame containing compartment information for each reaction. + Creates columns for each compartment position (Compartment_1, Compartment_2, etc.) + + Args: + model: the COBRA model to extract compartment data from. + + Returns: + pd.DataFrame: DataFrame with ReactionID and compartment columns + """ + pathway_data = [] + + # First pass: determine the maximum number of pathways any reaction has + max_pathways = 0 + reaction_pathways = {} + + for reaction in model.reactions: + # Get unique pathways from all metabolites in the reaction + if type(reaction.annotation['pathways']) == list: + reaction_pathways[reaction.id] = reaction.annotation['pathways'] + max_pathways = max(max_pathways, len(reaction.annotation['pathways'])) + else: + reaction_pathways[reaction.id] = [reaction.annotation['pathways']] + + # Create column names for pathways + pathway_columns = [f"Pathway_{i+1}" for i in range(max_pathways)] + + # Second pass: create the data + for reaction_id, pathways in reaction_pathways.items(): + row = {"ReactionID": reaction_id} + + # Fill pathway columns + for i in range(max_pathways): + col_name = pathway_columns[i] + if i < len(pathways): + row[col_name] = pathways[i] + else: + row[col_name] = None # or "" if you prefer empty strings + + pathway_data.append(row) + + return pd.DataFrame(pathway_data) \ No newline at end of file
