annotate COBRAxy/src/rps_generator.py @ 542:fcdbc81feb45 draft

Uploaded
author francesco_lapi
date Sun, 26 Oct 2025 19:27:41 +0000
parents 2fb97466e404
children 5d5583dc6082
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
539
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
1 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
2 Compute Reaction Propensity Scores (RPS) from metabolite abundances and reaction stoichiometry.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
3
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
4 Given a tabular dataset (metabolites x samples) and a reaction set, this script
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
5 maps metabolite names via synonyms, fills missing metabolites, and computes RPS
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
6 per reaction for each sample using a log-normalized formula.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
7 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
8 import math
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
9 import argparse
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
10
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
11 import numpy as np
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
12 import pickle as pk
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
13 import pandas as pd
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
14
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
15 from typing import Optional, List, Dict
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
16
542
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
17 try:
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
18 from .utils import general_utils as utils
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
19 from .utils import reaction_parsing as reactionUtils
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
20 except:
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
21 import utils.general_utils as utils
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
22 import utils.reaction_parsing as reactionUtils
539
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
23
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
24 ########################## argparse ##########################################
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
25 ARGS :argparse.Namespace
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
26 def process_args(args:List[str] = None) -> argparse.Namespace:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
27 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
28 Processes command-line arguments.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
29
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
30 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
31 args (list): List of command-line arguments.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
32
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
33 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
34 Namespace: An object containing parsed arguments.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
35 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
36 parser = argparse.ArgumentParser(
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
37 usage='%(prog)s [options]',
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
38 description='Process abundances and reactions to create RPS scores.'
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
39 )
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
40
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
41 parser.add_argument("-rl", "--model_upload", type = str,
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
42 help = "path to input file containing the reactions")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
43
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
44 parser.add_argument('-td', '--tool_dir',
542
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
45 type = str,
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
46 default = os.path.dirname(os.path.abspath(__file__)),
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
47 help = 'your tool directory (default: auto-detected package location)')
fcdbc81feb45 Uploaded
francesco_lapi
parents: 539
diff changeset
48
539
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
49 parser.add_argument('-ol', '--out_log',
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
50 help = "Output log")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
51 parser.add_argument('-id', '--input',
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
52 type = str,
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
53 required = True,
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
54 help = 'input dataset')
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
55 parser.add_argument('-rp', '--rps_output',
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
56 type = str,
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
57 required = True,
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
58 help = 'rps output')
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
59
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
60 args = parser.parse_args(args)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
61 return args
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
62
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
63 ############################ dataset name #####################################
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
64 def name_dataset(name_data :str, count :int) -> str:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
65 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
66 Produces a unique name for a dataset based on what was provided by the user. The default name for any dataset is "Dataset", thus if the user didn't change it this function appends f"_{count}" to make it unique.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
67
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
68 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
69 name_data: Name associated with the dataset (from frontend input params).
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
70 count: Counter starting at 1 to make names unique when default.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
71
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
72 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
73 str : the name made unique
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
74 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
75 if str(name_data) == 'Dataset':
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
76 return str(name_data) + '_' + str(count)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
77 else:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
78 return str(name_data)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
79
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
80 ############################ get_abund_data ####################################
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
81 def get_abund_data(dataset: pd.DataFrame, cell_line_index:int) -> Optional[pd.Series]:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
82 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
83 Extracts abundance data and turns it into a series for a specific cell line from the dataset, which rows are
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
84 metabolites and columns are cell lines.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
85
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
86 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
87 dataset (pandas.DataFrame): The DataFrame containing abundance data for all cell lines and metabolites.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
88 cell_line_index (int): The index of the cell line of interest in the dataset.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
89
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
90 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
91 pd.Series or None: A series containing abundance values for the specified cell line.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
92 The name of the series is the name of the cell line.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
93 Returns None if the cell index is invalid.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
94 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
95 if cell_line_index < 0 or cell_line_index >= len(dataset.index):
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
96 print(f"Error: cell line index '{cell_line_index}' is not valid.")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
97 return None
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
98
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
99 cell_line_name = dataset.columns[cell_line_index]
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
100 abundances_series = dataset[cell_line_name][1:]
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
101
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
102 return abundances_series
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
103
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
104 ############################ clean_metabolite_name ####################################
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
105 def clean_metabolite_name(name :str) -> str:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
106 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
107 Removes some characters from a metabolite's name, provided as input, and makes it lowercase in order to simplify
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
108 the search of a match in the dictionary of synonyms.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
109
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
110 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
111 name : the metabolite's name, as given in the dataset.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
112
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
113 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
114 str : a new string with the cleaned name.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
115 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
116 return "".join(ch for ch in name if ch not in ",;-_'([{ }])").lower()
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
117
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
118 ############################ get_metabolite_id ####################################
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
119 def get_metabolite_id(name :str, syn_dict :Dict[str, List[str]]) -> str:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
120 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
121 Looks through a dictionary of synonyms to find a match for a given metabolite's name.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
122
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
123 Args:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
124 name : the metabolite's name, as given in the dataset.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
125 syn_dict : the dictionary of synonyms, using unique identifiers as keys and lists of clean synonyms as values.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
126
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
127 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
128 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
129 An empty string is returned if a match isn't found.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
130 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
131 name = clean_metabolite_name(name)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
132 for id, synonyms in syn_dict.items():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
133 if name in synonyms:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
134 return id
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
135
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
136 return ""
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
137
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
138 ############################ check_missing_metab ####################################
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
139 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
140 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
141 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
142
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
143 Parameters:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
144 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
145 stoichiometric coefficients as values.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
146 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
147 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
148
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
149 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
150 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
151
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
152 Side effects:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
153 dataset_by_rows: mutated to include missing metabolites with default abundances.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
154 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
155 missing_list = []
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
156 for reaction in reactions.values():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
157 for metabolite in reaction.keys():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
158 if metabolite not in dataset_by_rows:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
159 dataset_by_rows[metabolite] = [1] * cell_lines_amt
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
160 missing_list.append(metabolite)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
161
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
162 return missing_list
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
163
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
164 ############################ calculate_rps ####################################
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
165 def calculate_rps(reactions: Dict[str, Dict[str, int]], abundances: Dict[str, float], black_list: List[str], missing_list: List[str], substrateFreqTable: Dict[str, int]) -> Dict[str, float]:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
166 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
167 Calculate the Reaction Propensity scores (RPS) based on the availability of reaction substrates, for (ideally) each input model reaction and for each sample.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
168 The score is computed as the product of the concentrations of the reacting substances, with each concentration raised to a power equal to its stoichiometric coefficient
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
169 for each reaction using the provided coefficient and abundance values. The value is then normalized, based on how frequent the metabolite is in the selected model's reactions,
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
170 and log-transformed.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
171
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
172 Parameters:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
173 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
174 abundances (dict): A dictionary representing metabolite abundances where keys are metabolite names and values are their corresponding abundances.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
175 black_list (list): A list containing metabolite names that should be excluded from the RPS calculation.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
176 missing_list (list): A list containing metabolite names that were missing in the original abundances dictionary and thus their values were set to 1.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
177 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value).
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
178
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
179 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
180 dict: A dictionary containing Reaction Propensity Scores (RPS) where keys are reaction names and values are the corresponding RPS scores.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
181 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
182 rps_scores = {}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
183
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
184 for reaction_name, substrates in reactions.items():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
185 total_contribution = 0
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
186 metab_significant = False
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
187 for metabolite, stoichiometry in substrates.items():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
188 abundance = 1 if math.isnan(abundances[metabolite]) else abundances[metabolite]
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
189 if metabolite not in black_list and metabolite not in missing_list:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
190 metab_significant = True
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
191
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
192 total_contribution += math.log((abundance + np.finfo(float).eps) / substrateFreqTable[metabolite]) * stoichiometry
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
193
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
194 rps_scores[reaction_name] = total_contribution if metab_significant else math.nan
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
195
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
196 return rps_scores
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
197
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
198 ############################ rps_for_cell_lines ####################################
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
199 def rps_for_cell_lines(dataset: List[List[str]], reactions: Dict[str, Dict[str, int]], black_list: List[str], syn_dict: Dict[str, List[str]], substrateFreqTable: Dict[str, int]) -> None:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
200 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
201 Calculate Reaction Propensity Scores (RPS) for each cell line represented in the dataframe and creates an output file.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
202
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
203 Parameters:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
204 dataset : the dataset's data, by rows
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
205 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
206 black_list (list): A list containing metabolite names that should be excluded from the RPS calculation.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
207 syn_dict (dict): A dictionary where keys are general metabolite names and values are lists of possible synonyms.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
208 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value).
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
209
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
210 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
211 None
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
212 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
213
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
214 cell_lines = dataset[0][1:]
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
215 abundances_dict = {}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
216
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
217 for row in dataset[1:]:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
218 id = get_metabolite_id(row[0], syn_dict)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
219 if id:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
220 abundances_dict[id] = list(map(utils.Float(), row[1:]))
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
221
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
222 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines)))
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
223
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
224 rps_scores :Dict[Dict[str, float]] = {}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
225 for pos, cell_line_name in enumerate(cell_lines):
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
226 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() }
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
227
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
228 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
229
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
230 df = pd.DataFrame.from_dict(rps_scores)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
231 df = df.loc[list(reactions.keys()),:]
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
232 # Optional preview: first 10 rows
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
233 # print(df.head(10))
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
234 df.index.name = 'Reactions'
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
235 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
236
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
237 ############################ main ####################################
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
238 def main(args:List[str] = None) -> None:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
239 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
240 Initializes everything and sets the program in motion based on the fronted input arguments.
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
241
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
242 Returns:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
243 None
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
244 """
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
245 global ARGS
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
246 ARGS = process_args(args)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
247
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
248 # Load support data (black list and synonyms)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
249 with open(ARGS.tool_dir + '/local/pickle files/black_list.pickle', 'rb') as bl:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
250 black_list = pk.load(bl)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
251
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
252 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
253 syn_dict = pk.load(sd)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
254
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
255 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader=False)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
256 tmp_dict = None
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
257
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
258 # Parse custom reactions from uploaded file
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
259 reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
260 for r, s in reactions.items():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
261 tmp_list = list(s.keys())
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
262 for k in tmp_list:
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
263 if k[-2] == '_':
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
264 s[k[:-2]] = s.pop(k)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
265 substrateFreqTable = {}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
266 for _, substrates in reactions.items():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
267 for substrateName, _ in substrates.items():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
268 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
269 substrateFreqTable[substrateName] += 1
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
270
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
271 # Debug prints (can be enabled during troubleshooting)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
272 # print(f"Reactions: {reactions}")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
273 # print(f"Substrate Frequencies: {substrateFreqTable}")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
274 # print(f"Synonyms: {syn_dict}")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
275 tmp_dict = {}
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
276 for metabName, freq in substrateFreqTable.items():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
277 tmp_metabName = clean_metabolite_name(metabName)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
278 for syn_key, syn_list in syn_dict.items():
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
279 if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key):
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
280 # print(f"Mapping {tmp_metabName} to {syn_key}")
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
281 tmp_dict[syn_key] = syn_list
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
282 tmp_dict[syn_key].append(tmp_metabName)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
283
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
284 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable)
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
285 print('Execution succeeded')
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
286
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
287 ##############################################################################
2fb97466e404 Uploaded
francesco_lapi
parents:
diff changeset
288 if __name__ == "__main__": main()