comparison COBRAxy/ras_generator.py @ 406:187cee1a00e2 draft

Uploaded
author francesco_lapi
date Mon, 08 Sep 2025 14:44:15 +0000
parents ccccb731c953
children 97eea560a10f
comparison
equal deleted inserted replaced
405:716b1a638fb5 406:187cee1a00e2
25 """ 25 """
26 parser = argparse.ArgumentParser( 26 parser = argparse.ArgumentParser(
27 usage = '%(prog)s [options]', 27 usage = '%(prog)s [options]',
28 description = "process some value's genes to create a comparison's map.") 28 description = "process some value's genes to create a comparison's map.")
29 29
30 parser.add_argument("-rl", "--model_upload", type = str, 30 parser.add_argument(
31 help = "path to input file containing the rules") 31 '-rs', '--rules_selector',
32 32 type = utils.Model, default = utils.Model.ENGRO2, choices = list(utils.Model),
33 parser.add_argument("-rn", "--model_upload_name", type = str, help = "custom rules name") 33 help = 'chose which type of dataset you want use')
34
35 parser.add_argument("-rl", "--rule_list", type = str,
36 help = "path to input file with custom rules, if provided")
37
38 parser.add_argument("-rn", "--rules_name", type = str, help = "custom rules name")
34 # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in 39 # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in
35 40
36 parser.add_argument( 41 parser.add_argument(
37 '-n', '--none', 42 '-n', '--none',
38 type = utils.Bool("none"), default = True, 43 type = utils.Bool("none"), default = True,
516 dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary 521 dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary
517 where each key corresponds to a reaction ID and each value is its computed RAS score. 522 where each key corresponds to a reaction ID and each value is its computed RAS score.
518 """ 523 """
519 ras_values_by_cell_line = {} 524 ras_values_by_cell_line = {}
520 dataset.set_index(dataset.columns[0], inplace=True) 525 dataset.set_index(dataset.columns[0], inplace=True)
521 526 # Considera tutte le colonne tranne la prima in cui ci sono gli hugo quindi va scartata
522 for cell_line_name in dataset.columns: #[1:]: 527 for cell_line_name in dataset.columns[1:]:
523 cell_line = dataset[cell_line_name].to_dict() 528 cell_line = dataset[cell_line_name].to_dict()
524 ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line) 529 ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line)
525 return ras_values_by_cell_line 530 return ras_values_by_cell_line
526 531
527 def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]: 532 def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]:
635 performed, significantly impacting the runtime. 640 performed, significantly impacting the runtime.
636 641
637 Returns: 642 Returns:
638 Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules. 643 Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules.
639 """ 644 """
640 datFilePath = utils.FilePath.fromStrPath(ARGS.model_upload) # actual file, stored in galaxy as a .dat 645 datFilePath = utils.FilePath.fromStrPath(ARGS.rule_list) # actual file, stored in galaxy as a .dat
641 646
642 #try: filenamePath = utils.FilePath.fromStrPath(ARGS.model_upload_name) # file's name in input, to determine its original ext 647 try: filenamePath = utils.FilePath.fromStrPath(ARGS.rules_name) # file's name in input, to determine its original ext
643 #except utils.PathErr as err: 648 except utils.PathErr as err:
644 # utils.logWarning(f"Cannot determine file extension from filename '{ARGS.model_upload_name}'. Assuming tabular format.", ARGS.out_log) 649 raise utils.PathErr(filenamePath, f"Please make sure your file's name is a valid file path, {err.msg}")
645 # filenamePath = None
646 650
647 #if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath) 651 if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath)
648 652
649 dict_rule = {}
650
651 try:
652 # Proviamo prima con delimitatore tab
653 for line in utils.readCsv(datFilePath, delimiter = "\t"):
654 if len(line) < 3: # Controlliamo che ci siano almeno 3 colonne
655 utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
656 continue
657
658 if line[2] == "":
659 dict_rule[line[0]] = ruleUtils.OpList([""])
660 else:
661 dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
662
663 except Exception as e:
664 # Se fallisce con tab, proviamo con virgola
665 try:
666 dict_rule = {}
667 for line in utils.readCsv(datFilePath, delimiter = ","):
668 if len(line) < 3:
669 utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
670 continue
671
672 if line[2] == "":
673 dict_rule[line[0]] = ruleUtils.OpList([""])
674 else:
675 dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
676 except Exception as e2:
677 raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}")
678
679 if not dict_rule:
680 raise ValueError("No valid rules found in the uploaded file. Please check the file format.")
681 # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed. 653 # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed.
682 return dict_rule 654 return { line[0] : ruleUtils.parseRuleToNestedList(line[1]) for line in utils.readCsv(datFilePath) }
683
684 655
685 def main(args:List[str] = None) -> None: 656 def main(args:List[str] = None) -> None:
686 """ 657 """
687 Initializes everything and sets the program in motion based on the fronted input arguments. 658 Initializes everything and sets the program in motion based on the fronted input arguments.
688 659
698 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) 669 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)
699 670
700 # remove versioning from gene names 671 # remove versioning from gene names
701 dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0] 672 dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0]
702 673
703 rules = load_custom_rules()
704 reactions = list(rules.keys())
705
706 save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
707 if ERRORS: utils.logWarning(
708 f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
709 ARGS.out_log)
710
711
712 ############
713
714 # handle custom models 674 # handle custom models
715 #model :utils.Model = ARGS.rules_selector 675 model :utils.Model = ARGS.rules_selector
716 676
717 #if model is utils.Model.Custom: 677 if model is utils.Model.Custom:
718 # rules = load_custom_rules() 678 rules = load_custom_rules()
719 # reactions = list(rules.keys()) 679 reactions = list(rules.keys())
720 680
721 # save_as_tsv(ras_for_cell_lines(dataset, rules), reactions) 681 save_as_tsv(ras_for_cell_lines(dataset, rules), reactions)
722 # if ERRORS: utils.logWarning( 682 if ERRORS: utils.logWarning(
723 # f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}", 683 f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}",
724 # ARGS.out_log) 684 ARGS.out_log)
725 685
726 # return 686 return
727 687
728 # This is the standard flow of the ras_generator program, for non-custom models. 688 # This is the standard flow of the ras_generator program, for non-custom models.
729 #name = "RAS Dataset" 689 name = "RAS Dataset"
730 #type_gene = gene_type(dataset.iloc[0, 0], name) 690 type_gene = gene_type(dataset.iloc[0, 0], name)
731 691
732 #rules = model.getRules(ARGS.tool_dir) 692 rules = model.getRules(ARGS.tool_dir)
733 #genes = data_gene(dataset, type_gene, name, None) 693 genes = data_gene(dataset, type_gene, name, None)
734 #ids, rules = load_id_rules(rules.get(type_gene)) 694 ids, rules = load_id_rules(rules.get(type_gene))
735 695
736 #resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name) 696 resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name)
737 #create_ras(resolve_rules, name, rules, ids, ARGS.ras_output) 697 create_ras(resolve_rules, name, rules, ids, ARGS.ras_output)
738 698
739 #if err: utils.logWarning( 699 if err: utils.logWarning(
740 # f"Warning: gene(s) {err} not found in class \"{name}\", " + 700 f"Warning: gene(s) {err} not found in class \"{name}\", " +
741 # "the expression level for this gene will be considered NaN", 701 "the expression level for this gene will be considered NaN",
742 # ARGS.out_log) 702 ARGS.out_log)
743 703
744 print("Execution succeded") 704 print("Execution succeded")
745 705
746 ############################################################################### 706 ###############################################################################
747 if __name__ == "__main__": 707 if __name__ == "__main__":