comparison COBRAxy/rps_generator.py @ 381:0a3ca20848f3 draft

Uploaded
author francesco_lapi
date Fri, 05 Sep 2025 09:18:26 +0000
parents 3dccdf56cb24
children ccccb731c953
comparison
equal deleted inserted replaced
380:03a7ba63813f 381:0a3ca20848f3
119 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use. 119 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use.
120 An empty string is returned if a match isn't found. 120 An empty string is returned if a match isn't found.
121 """ 121 """
122 name = clean_metabolite_name(name) 122 name = clean_metabolite_name(name)
123 for id, synonyms in syn_dict.items(): 123 for id, synonyms in syn_dict.items():
124 if name in synonyms: return id 124 if name in synonyms:
125 return id
125 126
126 return "" 127 return ""
127 128
128 ############################ check_missing_metab #################################### 129 ############################ check_missing_metab ####################################
129 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]: 130 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]:
130 """ 131 """
131 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly. 132 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly.
132 133
133 Parameters: 134 Parameters:
134 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values. 135 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and
136 stoichiometric coefficients as values.
135 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines. 137 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines.
136 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites. 138 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites.
137 139
138 Returns: 140 Returns:
139 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1. 141 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1.
197 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value). 199 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value).
198 200
199 Returns: 201 Returns:
200 None 202 None
201 """ 203 """
204
202 cell_lines = dataset[0][1:] 205 cell_lines = dataset[0][1:]
203 abundances_dict = {} 206 abundances_dict = {}
204 207
205 translationIsApplied = ARGS.reaction_choice == "default" 208 translationIsApplied = ARGS.reaction_choice == "default"
206 for row in dataset[1:]: 209 for row in dataset[1:]:
207 id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0] 210 id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0]
208 if id: abundances_dict[id] = list(map(utils.Float(), row[1:])) 211 if id:
209 212 abundances_dict[id] = list(map(utils.Float(), row[1:]))
213
210 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines))) 214 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines)))
211 215
212 rps_scores :Dict[Dict[str, float]] = {} 216 rps_scores :Dict[Dict[str, float]] = {}
213 for pos, cell_line_name in enumerate(cell_lines): 217 for pos, cell_line_name in enumerate(cell_lines):
214 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() } 218 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() }
219
215 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable) 220 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable)
216 221
217 df = pd.DataFrame.from_dict(rps_scores) 222 df = pd.DataFrame.from_dict(rps_scores)
218 223 df = df.loc[list(reactions.keys()),:]
224 print(df.head(10))
219 df.index.name = 'Reactions' 225 df.index.name = 'Reactions'
220 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True) 226 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True)
221 227
222 ############################ main #################################### 228 ############################ main ####################################
223 def main(args:List[str] = None) -> None: 229 def main(args:List[str] = None) -> None:
236 242
237 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd: 243 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd:
238 syn_dict = pk.load(sd) 244 syn_dict = pk.load(sd)
239 245
240 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False) 246 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False)
241 247 tmp_dict = None
242 if ARGS.reaction_choice == 'default': 248 if ARGS.reaction_choice == 'default':
243 reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb')) 249 reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
244 substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb')) 250 substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb'))
245 251
246 elif ARGS.reaction_choice == 'custom': 252 elif ARGS.reaction_choice == 'custom':
247 reactions = reactionUtils.parse_custom_reactions(ARGS.custom) 253 reactions = reactionUtils.parse_custom_reactions(ARGS.custom)
254 for r, s in reactions.items():
255 tmp_list = list(s.keys())
256 for k in tmp_list:
257 if k[-2] == '_':
258 s[k[:-2]] = s.pop(k)
248 substrateFreqTable = {} 259 substrateFreqTable = {}
249 for _, substrates in reactions.items(): 260 for _, substrates in reactions.items():
250 for substrateName, _ in substrates.items(): 261 for substrateName, _ in substrates.items():
251 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 262 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
252 substrateFreqTable[substrateName] += 1 263 substrateFreqTable[substrateName] += 1
253 264
265 print(f"Reactions: {reactions}")
266 print(f"Substrate Frequencies: {substrateFreqTable}")
267 print(f"Synonyms: {syn_dict}")
268 tmp_dict = {}
269 for metabName, freq in substrateFreqTable.items():
270 tmp_metabName = clean_metabolite_name(metabName)
271 for syn_key, syn_list in syn_dict.items():
272 if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key):
273 print(f"Mapping {tmp_metabName} to {syn_key}")
274 tmp_dict[syn_key] = syn_list
275 tmp_dict[syn_key].append(tmp_metabName)
276
254 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable) 277 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable)
255 print('Execution succeded') 278 print('Execution succeded')
256 279
257 ############################################################################## 280 ##############################################################################
258 if __name__ == "__main__": main() 281 if __name__ == "__main__": main()