comparison COBRAxy/rps_generator.py @ 406:187cee1a00e2 draft

Uploaded
author francesco_lapi
date Mon, 08 Sep 2025 14:44:15 +0000
parents ccccb731c953
children 97eea560a10f
comparison
equal deleted inserted replaced
405:716b1a638fb5 406:187cee1a00e2
23 Namespace: An object containing parsed arguments. 23 Namespace: An object containing parsed arguments.
24 """ 24 """
25 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', 25 parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
26 description = 'process some value\'s'+ 26 description = 'process some value\'s'+
27 ' abundances and reactions to create RPS scores.') 27 ' abundances and reactions to create RPS scores.')
28 28 parser.add_argument('-rc', '--reaction_choice',
29 parser.add_argument("-rl", "--model_upload", type = str, 29 type = str,
30 help = "path to input file containing the reactions") 30 default = 'default',
31 31 choices = ['default','custom'],
32 # model_upload custom 32 help = 'chose which type of reaction dataset you want use')
33 parser.add_argument('-cm', '--custom',
34 type = str,
35 help='your dataset if you want custom reactions')
33 parser.add_argument('-td', '--tool_dir', 36 parser.add_argument('-td', '--tool_dir',
34 type = str, 37 type = str,
35 required = True, 38 required = True,
36 help = 'your tool directory') 39 help = 'your tool directory')
37 parser.add_argument('-ol', '--out_log', 40 parser.add_argument('-ol', '--out_log',
116 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use. 119 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use.
117 An empty string is returned if a match isn't found. 120 An empty string is returned if a match isn't found.
118 """ 121 """
119 name = clean_metabolite_name(name) 122 name = clean_metabolite_name(name)
120 for id, synonyms in syn_dict.items(): 123 for id, synonyms in syn_dict.items():
121 if name in synonyms: 124 if name in synonyms: return id
122 return id
123 125
124 return "" 126 return ""
125 127
126 ############################ check_missing_metab #################################### 128 ############################ check_missing_metab ####################################
127 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]: 129 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]:
128 """ 130 """
129 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly. 131 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly.
130 132
131 Parameters: 133 Parameters:
132 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and 134 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
133 stoichiometric coefficients as values.
134 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines. 135 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines.
135 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites. 136 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites.
136 137
137 Returns: 138 Returns:
138 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1. 139 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1.
196 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value). 197 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value).
197 198
198 Returns: 199 Returns:
199 None 200 None
200 """ 201 """
201
202 cell_lines = dataset[0][1:] 202 cell_lines = dataset[0][1:]
203 abundances_dict = {} 203 abundances_dict = {}
204 204
205 translationIsApplied = ARGS.reaction_choice == "default"
205 for row in dataset[1:]: 206 for row in dataset[1:]:
206 id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0] 207 id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0]
207 if id: 208 if id: abundances_dict[id] = list(map(utils.Float(), row[1:]))
208 abundances_dict[id] = list(map(utils.Float(), row[1:])) 209
209
210 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines))) 210 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines)))
211 211
212 rps_scores :Dict[Dict[str, float]] = {} 212 rps_scores :Dict[Dict[str, float]] = {}
213 for pos, cell_line_name in enumerate(cell_lines): 213 for pos, cell_line_name in enumerate(cell_lines):
214 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() } 214 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() }
215
216 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable) 215 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable)
217 216
218 df = pd.DataFrame.from_dict(rps_scores) 217 df = pd.DataFrame.from_dict(rps_scores)
219 df = df.loc[list(reactions.keys()),:] 218
220 print(df.head(10))
221 df.index.name = 'Reactions' 219 df.index.name = 'Reactions'
222 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True) 220 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True)
223 221
224 ############################ main #################################### 222 ############################ main ####################################
225 def main(args:List[str] = None) -> None: 223 def main(args:List[str] = None) -> None:
238 236
239 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd: 237 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd:
240 syn_dict = pk.load(sd) 238 syn_dict = pk.load(sd)
241 239
242 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False) 240 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False)
243 tmp_dict = None 241
244 #if ARGS.reaction_choice == 'default': 242 if ARGS.reaction_choice == 'default':
245 # reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb')) 243 reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
246 # substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb')) 244 substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb'))
247 245
248 #elif ARGS.reaction_choice == 'custom': 246 elif ARGS.reaction_choice == 'custom':
249 reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload) 247 reactions = reactionUtils.parse_custom_reactions(ARGS.custom)
250 for r, s in reactions.items(): 248 substrateFreqTable = {}
251 tmp_list = list(s.keys()) 249 for _, substrates in reactions.items():
252 for k in tmp_list: 250 for substrateName, _ in substrates.items():
253 if k[-2] == '_': 251 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
254 s[k[:-2]] = s.pop(k) 252 substrateFreqTable[substrateName] += 1
255 substrateFreqTable = {}
256 for _, substrates in reactions.items():
257 for substrateName, _ in substrates.items():
258 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
259 substrateFreqTable[substrateName] += 1
260
261 print(f"Reactions: {reactions}")
262 print(f"Substrate Frequencies: {substrateFreqTable}")
263 print(f"Synonyms: {syn_dict}")
264 tmp_dict = {}
265 for metabName, freq in substrateFreqTable.items():
266 tmp_metabName = clean_metabolite_name(metabName)
267 for syn_key, syn_list in syn_dict.items():
268 if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key):
269 print(f"Mapping {tmp_metabName} to {syn_key}")
270 tmp_dict[syn_key] = syn_list
271 tmp_dict[syn_key].append(tmp_metabName)
272 253
273 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable) 254 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable)
274 print('Execution succeded') 255 print('Execution succeded')
275 256
276 ############################################################################## 257 ##############################################################################