cobraxy: COBRAxy/ras_generator.py comparison

comparison COBRAxy/ras_generator.py @ 406:187cee1a00e2 draft

Uploaded

author	francesco_lapi
date	Mon, 08 Sep 2025 14:44:15 +0000
parents	ccccb731c953
children	97eea560a10f

comparison

equal deleted inserted replaced

-:716b1a638fb5
+:187cee1a00e2
 """
 parser = argparse.ArgumentParser(
 usage = '%(prog)s [options]',
 description = "process some value's genes to create a comparison's map.")
-parser.add_argument("-rl", "--model_upload", type = str,
+parser.add_argument(
-help = "path to input file containing the rules")
+'-rs', '--rules_selector',
+type = utils.Model, default = utils.Model.ENGRO2, choices = list(utils.Model),
-parser.add_argument("-rn", "--model_upload_name", type = str, help = "custom rules name")
+help = 'chose which type of dataset you want use')
+parser.add_argument("-rl", "--rule_list", type = str,
+help = "path to input file with custom rules, if provided")
+parser.add_argument("-rn", "--rules_name", type = str, help = "custom rules name")
 # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in
 parser.add_argument(
 '-n', '--none',
 type = utils.Bool("none"), default = True,
 dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary
 where each key corresponds to a reaction ID and each value is its computed RAS score.
 """
 ras_values_by_cell_line = {}
 dataset.set_index(dataset.columns[0], inplace=True)
+# Considera tutte le colonne tranne la prima in cui ci sono gli hugo quindi va scartata
-for cell_line_name in dataset.columns: #[1:]:
+for cell_line_name in dataset.columns[1:]:
 cell_line = dataset[cell_line_name].to_dict()
 ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line)
 return ras_values_by_cell_line
 def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]:
 performed, significantly impacting the runtime.
 Returns:
 Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules.
 """
-datFilePath = utils.FilePath.fromStrPath(ARGS.model_upload) # actual file, stored in galaxy as a .dat
+datFilePath = utils.FilePath.fromStrPath(ARGS.rule_list) # actual file, stored in galaxy as a .dat
-#try: filenamePath = utils.FilePath.fromStrPath(ARGS.model_upload_name) # file's name in input, to determine its original ext
+try: filenamePath = utils.FilePath.fromStrPath(ARGS.rules_name) # file's name in input, to determine its original ext
-#except utils.PathErr as err:
+except utils.PathErr as err:
-#    utils.logWarning(f"Cannot determine file extension from filename '{ARGS.model_upload_name}'. Assuming tabular format.", ARGS.out_log)
+raise utils.PathErr(filenamePath, f"Please make sure your file's name is a valid file path, {err.msg}")
-#    filenamePath = None
-#if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath)
+if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath)
-dict_rule = {}
-try:
-# Proviamo prima con delimitatore tab
-for line in utils.readCsv(datFilePath, delimiter = "\t"):
-if len(line) < 3:  # Controlliamo che ci siano almeno 3 colonne
-utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
-continue
-if line[2] == "":
-dict_rule[line[0]] = ruleUtils.OpList([""])
-else:
-dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
-except Exception as e:
-# Se fallisce con tab, proviamo con virgola
-try:
-dict_rule = {}
-for line in utils.readCsv(datFilePath, delimiter = ","):
-if len(line) < 3:
-utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
-continue
-if line[2] == "":
-dict_rule[line[0]] = ruleUtils.OpList([""])
-else:
-dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
-except Exception as e2:
-raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}")
-if not dict_rule:
-raise ValueError("No valid rules found in the uploaded file. Please check the file format.")
 # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed.
-return dict_rule
+return { line[0] : ruleUtils.parseRuleToNestedList(line[1]) for line in utils.readCsv(datFilePath) }
 def main(args:List[str] = None) -> None:
 """
 Initializes everything and sets the program in motion based on the fronted input arguments.
 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
 # remove versioning from gene names
 dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0]
-rules = load_custom_rules()
-reactions = list(rules.keys())
-save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
-if ERRORS: utils.logWarning(
-f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
-ARGS.out_log)
-############
 # handle custom models
-#model :utils.Model = ARGS.rules_selector
+model :utils.Model = ARGS.rules_selector
-#if model is utils.Model.Custom:
+if model is utils.Model.Custom:
-#    rules = load_custom_rules()
+rules = load_custom_rules()
-#    reactions = list(rules.keys())
+reactions = list(rules.keys())
-#    save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
+save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
-#    if ERRORS: utils.logWarning(
+if ERRORS: utils.logWarning(
-#        f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
+f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
-#        ARGS.out_log)
+ARGS.out_log)
-#    return
+return
 # This is the standard flow of the ras_generator program, for non-custom models.
-#name = "RAS Dataset"
+name = "RAS Dataset"
-#type_gene = gene_type(dataset.iloc[0, 0], name)
+type_gene = gene_type(dataset.iloc[0, 0], name)
-#rules      = model.getRules(ARGS.tool_dir)
+rules      = model.getRules(ARGS.tool_dir)
-#genes      = data_gene(dataset, type_gene, name, None)
+genes      = data_gene(dataset, type_gene, name, None)
-#ids, rules = load_id_rules(rules.get(type_gene))
+ids, rules = load_id_rules(rules.get(type_gene))
-#resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name)
+resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name)
-#create_ras(resolve_rules, name, rules, ids, ARGS.ras_output)
+create_ras(resolve_rules, name, rules, ids, ARGS.ras_output)
-#if err: utils.logWarning(
+if err: utils.logWarning(
-#    f"Warning: gene(s) {err} not found in class \"{name}\", " +
+f"Warning: gene(s) {err} not found in class \"{name}\", " +
-#    "the expression level for this gene will be considered NaN",
+"the expression level for this gene will be considered NaN",
-#    ARGS.out_log)
+ARGS.out_log)
 print("Execution succeded")
 ###############################################################################
 if __name__ == "__main__":

Mercurial > repos > bimib > cobraxy

comparison COBRAxy/ras_generator.py @ 406:187cee1a00e2 draft