diff COBRAxy/rps_generator.py @ 381:0a3ca20848f3 draft

Uploaded
author francesco_lapi
date Fri, 05 Sep 2025 09:18:26 +0000
parents 3dccdf56cb24
children
line wrap: on
line diff
--- a/COBRAxy/rps_generator.py	Fri Sep 05 08:27:04 2025 +0000
+++ b/COBRAxy/rps_generator.py	Fri Sep 05 09:18:26 2025 +0000
@@ -121,7 +121,8 @@
     """
     name = clean_metabolite_name(name)
     for id, synonyms in syn_dict.items():
-        if name in synonyms: return id
+        if name in synonyms:
+            return id
     
     return ""
 
@@ -131,7 +132,8 @@
     Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly.
 
     Parameters:
-        reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
+        reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and 
+                          stoichiometric coefficients as values.
         dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines.
         cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites.
 
@@ -199,23 +201,27 @@
     Returns:
         None
     """
+
     cell_lines = dataset[0][1:]
     abundances_dict = {}
 
     translationIsApplied = ARGS.reaction_choice == "default"
     for row in dataset[1:]:
-        id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0]
-        if id: abundances_dict[id] = list(map(utils.Float(), row[1:]))
-    
+        id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0]
+        if id: 
+            abundances_dict[id] = list(map(utils.Float(), row[1:]))
+
     missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines)))
-    
+
     rps_scores :Dict[Dict[str, float]] = {}
     for pos, cell_line_name in enumerate(cell_lines):
         abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() }
+
         rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable)
     
     df = pd.DataFrame.from_dict(rps_scores)
-    
+    df = df.loc[list(reactions.keys()),:]
+    print(df.head(10))
     df.index.name = 'Reactions'
     df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True)
 
@@ -238,19 +244,36 @@
         syn_dict = pk.load(sd)
 
     dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False)
-
+    tmp_dict = None
     if ARGS.reaction_choice == 'default':
         reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
         substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb'))
     
     elif ARGS.reaction_choice == 'custom':
         reactions = reactionUtils.parse_custom_reactions(ARGS.custom)
+        for r, s in reactions.items():
+            tmp_list = list(s.keys())
+            for k in tmp_list:
+                if k[-2] == '_':
+                    s[k[:-2]] = s.pop(k)
         substrateFreqTable = {}
         for _, substrates in reactions.items():
             for substrateName, _ in substrates.items():
                 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
                 substrateFreqTable[substrateName] += 1
 
+        print(f"Reactions: {reactions}")
+        print(f"Substrate Frequencies: {substrateFreqTable}")
+        print(f"Synonyms: {syn_dict}")
+        tmp_dict = {}
+        for metabName, freq in substrateFreqTable.items():
+            tmp_metabName = clean_metabolite_name(metabName)
+            for syn_key, syn_list in syn_dict.items():
+                if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key):
+                    print(f"Mapping {tmp_metabName} to {syn_key}")
+                    tmp_dict[syn_key] = syn_list
+                    tmp_dict[syn_key].append(tmp_metabName)
+
     rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable)
     print('Execution succeded')