Mercurial > repos > bimib > cobraxy
comparison COBRAxy/rps_generator.py @ 381:0a3ca20848f3 draft
Uploaded
author | francesco_lapi |
---|---|
date | Fri, 05 Sep 2025 09:18:26 +0000 |
parents | 3dccdf56cb24 |
children | ccccb731c953 |
comparison
equal
deleted
inserted
replaced
380:03a7ba63813f | 381:0a3ca20848f3 |
---|---|
119 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use. | 119 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use. |
120 An empty string is returned if a match isn't found. | 120 An empty string is returned if a match isn't found. |
121 """ | 121 """ |
122 name = clean_metabolite_name(name) | 122 name = clean_metabolite_name(name) |
123 for id, synonyms in syn_dict.items(): | 123 for id, synonyms in syn_dict.items(): |
124 if name in synonyms: return id | 124 if name in synonyms: |
125 return id | |
125 | 126 |
126 return "" | 127 return "" |
127 | 128 |
128 ############################ check_missing_metab #################################### | 129 ############################ check_missing_metab #################################### |
129 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]: | 130 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]: |
130 """ | 131 """ |
131 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly. | 132 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly. |
132 | 133 |
133 Parameters: | 134 Parameters: |
134 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values. | 135 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and |
136 stoichiometric coefficients as values. | |
135 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines. | 137 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines. |
136 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites. | 138 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites. |
137 | 139 |
138 Returns: | 140 Returns: |
139 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1. | 141 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1. |
197 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value). | 199 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value). |
198 | 200 |
199 Returns: | 201 Returns: |
200 None | 202 None |
201 """ | 203 """ |
204 | |
202 cell_lines = dataset[0][1:] | 205 cell_lines = dataset[0][1:] |
203 abundances_dict = {} | 206 abundances_dict = {} |
204 | 207 |
205 translationIsApplied = ARGS.reaction_choice == "default" | 208 translationIsApplied = ARGS.reaction_choice == "default" |
206 for row in dataset[1:]: | 209 for row in dataset[1:]: |
207 id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0] | 210 id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0] |
208 if id: abundances_dict[id] = list(map(utils.Float(), row[1:])) | 211 if id: |
209 | 212 abundances_dict[id] = list(map(utils.Float(), row[1:])) |
213 | |
210 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines))) | 214 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines))) |
211 | 215 |
212 rps_scores :Dict[Dict[str, float]] = {} | 216 rps_scores :Dict[Dict[str, float]] = {} |
213 for pos, cell_line_name in enumerate(cell_lines): | 217 for pos, cell_line_name in enumerate(cell_lines): |
214 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() } | 218 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() } |
219 | |
215 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable) | 220 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable) |
216 | 221 |
217 df = pd.DataFrame.from_dict(rps_scores) | 222 df = pd.DataFrame.from_dict(rps_scores) |
218 | 223 df = df.loc[list(reactions.keys()),:] |
224 print(df.head(10)) | |
219 df.index.name = 'Reactions' | 225 df.index.name = 'Reactions' |
220 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True) | 226 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True) |
221 | 227 |
222 ############################ main #################################### | 228 ############################ main #################################### |
223 def main(args:List[str] = None) -> None: | 229 def main(args:List[str] = None) -> None: |
236 | 242 |
237 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd: | 243 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd: |
238 syn_dict = pk.load(sd) | 244 syn_dict = pk.load(sd) |
239 | 245 |
240 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False) | 246 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False) |
241 | 247 tmp_dict = None |
242 if ARGS.reaction_choice == 'default': | 248 if ARGS.reaction_choice == 'default': |
243 reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb')) | 249 reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb')) |
244 substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb')) | 250 substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb')) |
245 | 251 |
246 elif ARGS.reaction_choice == 'custom': | 252 elif ARGS.reaction_choice == 'custom': |
247 reactions = reactionUtils.parse_custom_reactions(ARGS.custom) | 253 reactions = reactionUtils.parse_custom_reactions(ARGS.custom) |
254 for r, s in reactions.items(): | |
255 tmp_list = list(s.keys()) | |
256 for k in tmp_list: | |
257 if k[-2] == '_': | |
258 s[k[:-2]] = s.pop(k) | |
248 substrateFreqTable = {} | 259 substrateFreqTable = {} |
249 for _, substrates in reactions.items(): | 260 for _, substrates in reactions.items(): |
250 for substrateName, _ in substrates.items(): | 261 for substrateName, _ in substrates.items(): |
251 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 | 262 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 |
252 substrateFreqTable[substrateName] += 1 | 263 substrateFreqTable[substrateName] += 1 |
253 | 264 |
265 print(f"Reactions: {reactions}") | |
266 print(f"Substrate Frequencies: {substrateFreqTable}") | |
267 print(f"Synonyms: {syn_dict}") | |
268 tmp_dict = {} | |
269 for metabName, freq in substrateFreqTable.items(): | |
270 tmp_metabName = clean_metabolite_name(metabName) | |
271 for syn_key, syn_list in syn_dict.items(): | |
272 if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key): | |
273 print(f"Mapping {tmp_metabName} to {syn_key}") | |
274 tmp_dict[syn_key] = syn_list | |
275 tmp_dict[syn_key].append(tmp_metabName) | |
276 | |
254 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable) | 277 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable) |
255 print('Execution succeded') | 278 print('Execution succeded') |
256 | 279 |
257 ############################################################################## | 280 ############################################################################## |
258 if __name__ == "__main__": main() | 281 if __name__ == "__main__": main() |