cobraxy: COBRAxy/rps_generator.py comparison

comparison COBRAxy/rps_generator.py @ 406:187cee1a00e2 draft

Uploaded

author	francesco_lapi
date	Mon, 08 Sep 2025 14:44:15 +0000
parents	ccccb731c953
children	97eea560a10f

comparison

equal deleted inserted replaced

-:716b1a638fb5
+:187cee1a00e2
 Namespace: An object containing parsed arguments.
 """
 parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
 description = 'process some value\'s'+
 ' abundances and reactions to create RPS scores.')
+parser.add_argument('-rc', '--reaction_choice',
-parser.add_argument("-rl", "--model_upload", type = str,
+type = str,
-help = "path to input file containing the reactions")
+default = 'default',
+choices = ['default','custom'],
-# model_upload custom
+help = 'chose which type of reaction dataset you want use')
+parser.add_argument('-cm', '--custom',
+type = str,
+help='your dataset if you want custom reactions')
 parser.add_argument('-td', '--tool_dir',
 type = str,
 required = True,
 help = 'your tool directory')
 parser.add_argument('-ol', '--out_log',
 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use.
 An empty string is returned if a match isn't found.
 """
 name = clean_metabolite_name(name)
 for id, synonyms in syn_dict.items():
-if name in synonyms:
+if name in synonyms: return id
-return id
 return ""
 ############################ check_missing_metab ####################################
 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]:
 """
 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly.
 Parameters:
-reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and
+reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
-stoichiometric coefficients as values.
 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines.
 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites.
 Returns:
 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1.
 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value).
 Returns:
 None
 """
 cell_lines = dataset[0][1:]
 abundances_dict = {}
+translationIsApplied = ARGS.reaction_choice == "default"
 for row in dataset[1:]:
-id = get_metabolite_id(row[0], syn_dict) #if translationIsApplied else row[0]
+id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0]
-if id:
+if id: abundances_dict[id] = list(map(utils.Float(), row[1:]))
-abundances_dict[id] = list(map(utils.Float(), row[1:]))
 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines)))
 rps_scores :Dict[Dict[str, float]] = {}
 for pos, cell_line_name in enumerate(cell_lines):
 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() }
 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable)
 df = pd.DataFrame.from_dict(rps_scores)
-df = df.loc[list(reactions.keys()),:]
-print(df.head(10))
 df.index.name = 'Reactions'
 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True)
 ############################ main ####################################
 def main(args:List[str] = None) -> None:
 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd:
 syn_dict = pk.load(sd)
 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False)
-tmp_dict = None
-#if ARGS.reaction_choice == 'default':
+if ARGS.reaction_choice == 'default':
-#    reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
+reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
-#    substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb'))
+substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb'))
-#elif ARGS.reaction_choice == 'custom':
+elif ARGS.reaction_choice == 'custom':
-reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload)
+reactions = reactionUtils.parse_custom_reactions(ARGS.custom)
-for r, s in reactions.items():
+substrateFreqTable = {}
-tmp_list = list(s.keys())
+for _, substrates in reactions.items():
-for k in tmp_list:
+for substrateName, _ in substrates.items():
-if k[-2] == '_':
+if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
-s[k[:-2]] = s.pop(k)
+substrateFreqTable[substrateName] += 1
-substrateFreqTable = {}
-for _, substrates in reactions.items():
-for substrateName, _ in substrates.items():
-if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
-substrateFreqTable[substrateName] += 1
-print(f"Reactions: {reactions}")
-print(f"Substrate Frequencies: {substrateFreqTable}")
-print(f"Synonyms: {syn_dict}")
-tmp_dict = {}
-for metabName, freq in substrateFreqTable.items():
-tmp_metabName = clean_metabolite_name(metabName)
-for syn_key, syn_list in syn_dict.items():
-if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key):
-print(f"Mapping {tmp_metabName} to {syn_key}")
-tmp_dict[syn_key] = syn_list
-tmp_dict[syn_key].append(tmp_metabName)
 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable)
 print('Execution succeded')
 ##############################################################################

Mercurial > repos > bimib > cobraxy

comparison COBRAxy/rps_generator.py @ 406:187cee1a00e2 draft