Mercurial > repos > bimib > cobraxy
comparison COBRAxy/rps_generator.py @ 406:187cee1a00e2 draft
Uploaded
author | francesco_lapi |
---|---|
date | Mon, 08 Sep 2025 14:44:15 +0000 |
parents | ccccb731c953 |
children | 97eea560a10f |
comparison
equal
deleted
inserted
replaced
405:716b1a638fb5 | 406:187cee1a00e2 |
---|---|
23 Namespace: An object containing parsed arguments. | 23 Namespace: An object containing parsed arguments. |
24 """ | 24 """ |
25 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | 25 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', |
26 description = 'process some value\'s'+ | 26 description = 'process some value\'s'+ |
27 ' abundances and reactions to create RPS scores.') | 27 ' abundances and reactions to create RPS scores.') |
28 | 28 parser.add_argument('-rc', '--reaction_choice', |
29 parser.add_argument("-rl", "--model_upload", type = str, | 29 type = str, |
30 help = "path to input file containing the reactions") | 30 default = 'default', |
31 | 31 choices = ['default','custom'], |
32 # model_upload custom | 32 help = 'chose which type of reaction dataset you want use') |
33 parser.add_argument('-cm', '--custom', | |
34 type = str, | |
35 help='your dataset if you want custom reactions') | |
33 parser.add_argument('-td', '--tool_dir', | 36 parser.add_argument('-td', '--tool_dir', |
34 type = str, | 37 type = str, |
35 required = True, | 38 required = True, |
36 help = 'your tool directory') | 39 help = 'your tool directory') |
37 parser.add_argument('-ol', '--out_log', | 40 parser.add_argument('-ol', '--out_log', |
116 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use. | 119 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use. |
117 An empty string is returned if a match isn't found. | 120 An empty string is returned if a match isn't found. |
118 """ | 121 """ |
119 name = clean_metabolite_name(name) | 122 name = clean_metabolite_name(name) |
120 for id, synonyms in syn_dict.items(): | 123 for id, synonyms in syn_dict.items(): |
121 if name in synonyms: | 124 if name in synonyms: return id |
122 return id | |
123 | 125 |
124 return "" | 126 return "" |
125 | 127 |
126 ############################ check_missing_metab #################################### | 128 ############################ check_missing_metab #################################### |
127 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]: | 129 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]: |
128 """ | 130 """ |
129 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly. | 131 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly. |
130 | 132 |
131 Parameters: | 133 Parameters: |
132 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and | 134 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values. |
133 stoichiometric coefficients as values. | |
134 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines. | 135 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines. |
135 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites. | 136 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites. |
136 | 137 |
137 Returns: | 138 Returns: |
138 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1. | 139 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1. |
196 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value). | 197 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value). |
197 | 198 |
198 Returns: | 199 Returns: |
199 None | 200 None |
200 """ | 201 """ |
201 | |
202 cell_lines = dataset[0][1:] | 202 cell_lines = dataset[0][1:] |
203 abundances_dict = {} | 203 abundances_dict = {} |
204 | 204 |
205 translationIsApplied = ARGS.reaction_choice == "default" | |
205 for row in dataset[1:]: | 206 for row in dataset[1:]: |
206 id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0] | 207 id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0] |
207 if id: | 208 if id: abundances_dict[id] = list(map(utils.Float(), row[1:])) |
208 abundances_dict[id] = list(map(utils.Float(), row[1:])) | 209 |
209 | |
210 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines))) | 210 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines))) |
211 | 211 |
212 rps_scores :Dict[Dict[str, float]] = {} | 212 rps_scores :Dict[Dict[str, float]] = {} |
213 for pos, cell_line_name in enumerate(cell_lines): | 213 for pos, cell_line_name in enumerate(cell_lines): |
214 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() } | 214 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() } |
215 | |
216 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable) | 215 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable) |
217 | 216 |
218 df = pd.DataFrame.from_dict(rps_scores) | 217 df = pd.DataFrame.from_dict(rps_scores) |
219 df = df.loc[list(reactions.keys()),:] | 218 |
220 print(df.head(10)) | |
221 df.index.name = 'Reactions' | 219 df.index.name = 'Reactions' |
222 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True) | 220 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True) |
223 | 221 |
224 ############################ main #################################### | 222 ############################ main #################################### |
225 def main(args:List[str] = None) -> None: | 223 def main(args:List[str] = None) -> None: |
238 | 236 |
239 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd: | 237 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd: |
240 syn_dict = pk.load(sd) | 238 syn_dict = pk.load(sd) |
241 | 239 |
242 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False) | 240 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False) |
243 tmp_dict = None | 241 |
244 #if ARGS.reaction_choice == 'default': | 242 if ARGS.reaction_choice == 'default': |
245 # reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb')) | 243 reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb')) |
246 # substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb')) | 244 substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb')) |
247 | 245 |
248 #elif ARGS.reaction_choice == 'custom': | 246 elif ARGS.reaction_choice == 'custom': |
249 reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload) | 247 reactions = reactionUtils.parse_custom_reactions(ARGS.custom) |
250 for r, s in reactions.items(): | 248 substrateFreqTable = {} |
251 tmp_list = list(s.keys()) | 249 for _, substrates in reactions.items(): |
252 for k in tmp_list: | 250 for substrateName, _ in substrates.items(): |
253 if k[-2] == '_': | 251 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 |
254 s[k[:-2]] = s.pop(k) | 252 substrateFreqTable[substrateName] += 1 |
255 substrateFreqTable = {} | |
256 for _, substrates in reactions.items(): | |
257 for substrateName, _ in substrates.items(): | |
258 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 | |
259 substrateFreqTable[substrateName] += 1 | |
260 | |
261 print(f"Reactions: {reactions}") | |
262 print(f"Substrate Frequencies: {substrateFreqTable}") | |
263 print(f"Synonyms: {syn_dict}") | |
264 tmp_dict = {} | |
265 for metabName, freq in substrateFreqTable.items(): | |
266 tmp_metabName = clean_metabolite_name(metabName) | |
267 for syn_key, syn_list in syn_dict.items(): | |
268 if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key): | |
269 print(f"Mapping {tmp_metabName} to {syn_key}") | |
270 tmp_dict[syn_key] = syn_list | |
271 tmp_dict[syn_key].append(tmp_metabName) | |
272 | 253 |
273 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable) | 254 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable) |
274 print('Execution succeded') | 255 print('Execution succeded') |
275 | 256 |
276 ############################################################################## | 257 ############################################################################## |