Mercurial > repos > bimib > cobraxy
comparison COBRAxy/ras_generator.py @ 406:187cee1a00e2 draft
Uploaded
author | francesco_lapi |
---|---|
date | Mon, 08 Sep 2025 14:44:15 +0000 |
parents | ccccb731c953 |
children | 97eea560a10f |
comparison
equal
deleted
inserted
replaced
405:716b1a638fb5 | 406:187cee1a00e2 |
---|---|
25 """ | 25 """ |
26 parser = argparse.ArgumentParser( | 26 parser = argparse.ArgumentParser( |
27 usage = '%(prog)s [options]', | 27 usage = '%(prog)s [options]', |
28 description = "process some value's genes to create a comparison's map.") | 28 description = "process some value's genes to create a comparison's map.") |
29 | 29 |
30 parser.add_argument("-rl", "--model_upload", type = str, | 30 parser.add_argument( |
31 help = "path to input file containing the rules") | 31 '-rs', '--rules_selector', |
32 | 32 type = utils.Model, default = utils.Model.ENGRO2, choices = list(utils.Model), |
33 parser.add_argument("-rn", "--model_upload_name", type = str, help = "custom rules name") | 33 help = 'chose which type of dataset you want use') |
34 | |
35 parser.add_argument("-rl", "--rule_list", type = str, | |
36 help = "path to input file with custom rules, if provided") | |
37 | |
38 parser.add_argument("-rn", "--rules_name", type = str, help = "custom rules name") | |
34 # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in | 39 # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in |
35 | 40 |
36 parser.add_argument( | 41 parser.add_argument( |
37 '-n', '--none', | 42 '-n', '--none', |
38 type = utils.Bool("none"), default = True, | 43 type = utils.Bool("none"), default = True, |
516 dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary | 521 dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary |
517 where each key corresponds to a reaction ID and each value is its computed RAS score. | 522 where each key corresponds to a reaction ID and each value is its computed RAS score. |
518 """ | 523 """ |
519 ras_values_by_cell_line = {} | 524 ras_values_by_cell_line = {} |
520 dataset.set_index(dataset.columns[0], inplace=True) | 525 dataset.set_index(dataset.columns[0], inplace=True) |
521 | 526 # Considera tutte le colonne tranne la prima in cui ci sono gli hugo quindi va scartata |
522 for cell_line_name in dataset.columns: #[1:]: | 527 for cell_line_name in dataset.columns[1:]: |
523 cell_line = dataset[cell_line_name].to_dict() | 528 cell_line = dataset[cell_line_name].to_dict() |
524 ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line) | 529 ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line) |
525 return ras_values_by_cell_line | 530 return ras_values_by_cell_line |
526 | 531 |
527 def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]: | 532 def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]: |
635 performed, significantly impacting the runtime. | 640 performed, significantly impacting the runtime. |
636 | 641 |
637 Returns: | 642 Returns: |
638 Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules. | 643 Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules. |
639 """ | 644 """ |
640 datFilePath = utils.FilePath.fromStrPath(ARGS.model_upload) # actual file, stored in galaxy as a .dat | 645 datFilePath = utils.FilePath.fromStrPath(ARGS.rule_list) # actual file, stored in galaxy as a .dat |
641 | 646 |
642 #try: filenamePath = utils.FilePath.fromStrPath(ARGS.model_upload_name) # file's name in input, to determine its original ext | 647 try: filenamePath = utils.FilePath.fromStrPath(ARGS.rules_name) # file's name in input, to determine its original ext |
643 #except utils.PathErr as err: | 648 except utils.PathErr as err: |
644 # utils.logWarning(f"Cannot determine file extension from filename '{ARGS.model_upload_name}'. Assuming tabular format.", ARGS.out_log) | 649 raise utils.PathErr(filenamePath, f"Please make sure your file's name is a valid file path, {err.msg}") |
645 # filenamePath = None | |
646 | 650 |
647 #if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath) | 651 if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath) |
648 | 652 |
649 dict_rule = {} | |
650 | |
651 try: | |
652 # Proviamo prima con delimitatore tab | |
653 for line in utils.readCsv(datFilePath, delimiter = "\t"): | |
654 if len(line) < 3: # Controlliamo che ci siano almeno 3 colonne | |
655 utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) | |
656 continue | |
657 | |
658 if line[2] == "": | |
659 dict_rule[line[0]] = ruleUtils.OpList([""]) | |
660 else: | |
661 dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2]) | |
662 | |
663 except Exception as e: | |
664 # Se fallisce con tab, proviamo con virgola | |
665 try: | |
666 dict_rule = {} | |
667 for line in utils.readCsv(datFilePath, delimiter = ","): | |
668 if len(line) < 3: | |
669 utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) | |
670 continue | |
671 | |
672 if line[2] == "": | |
673 dict_rule[line[0]] = ruleUtils.OpList([""]) | |
674 else: | |
675 dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2]) | |
676 except Exception as e2: | |
677 raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}") | |
678 | |
679 if not dict_rule: | |
680 raise ValueError("No valid rules found in the uploaded file. Please check the file format.") | |
681 # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed. | 653 # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed. |
682 return dict_rule | 654 return { line[0] : ruleUtils.parseRuleToNestedList(line[1]) for line in utils.readCsv(datFilePath) } |
683 | |
684 | 655 |
685 def main(args:List[str] = None) -> None: | 656 def main(args:List[str] = None) -> None: |
686 """ | 657 """ |
687 Initializes everything and sets the program in motion based on the fronted input arguments. | 658 Initializes everything and sets the program in motion based on the fronted input arguments. |
688 | 659 |
698 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | 669 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) |
699 | 670 |
700 # remove versioning from gene names | 671 # remove versioning from gene names |
701 dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0] | 672 dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0] |
702 | 673 |
703 rules = load_custom_rules() | |
704 reactions = list(rules.keys()) | |
705 | |
706 save_as_tsv(ras_for_cell_lines(dataset, rules), reactions) | |
707 if ERRORS: utils.logWarning( | |
708 f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}", | |
709 ARGS.out_log) | |
710 | |
711 | |
712 ############ | |
713 | |
714 # handle custom models | 674 # handle custom models |
715 #model :utils.Model = ARGS.rules_selector | 675 model :utils.Model = ARGS.rules_selector |
716 | 676 |
717 #if model is utils.Model.Custom: | 677 if model is utils.Model.Custom: |
718 # rules = load_custom_rules() | 678 rules = load_custom_rules() |
719 # reactions = list(rules.keys()) | 679 reactions = list(rules.keys()) |
720 | 680 |
721 # save_as_tsv(ras_for_cell_lines(dataset, rules), reactions) | 681 save_as_tsv(ras_for_cell_lines(dataset, rules), reactions) |
722 # if ERRORS: utils.logWarning( | 682 if ERRORS: utils.logWarning( |
723 # f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}", | 683 f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}", |
724 # ARGS.out_log) | 684 ARGS.out_log) |
725 | 685 |
726 # return | 686 return |
727 | 687 |
728 # This is the standard flow of the ras_generator program, for non-custom models. | 688 # This is the standard flow of the ras_generator program, for non-custom models. |
729 #name = "RAS Dataset" | 689 name = "RAS Dataset" |
730 #type_gene = gene_type(dataset.iloc[0, 0], name) | 690 type_gene = gene_type(dataset.iloc[0, 0], name) |
731 | 691 |
732 #rules = model.getRules(ARGS.tool_dir) | 692 rules = model.getRules(ARGS.tool_dir) |
733 #genes = data_gene(dataset, type_gene, name, None) | 693 genes = data_gene(dataset, type_gene, name, None) |
734 #ids, rules = load_id_rules(rules.get(type_gene)) | 694 ids, rules = load_id_rules(rules.get(type_gene)) |
735 | 695 |
736 #resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name) | 696 resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name) |
737 #create_ras(resolve_rules, name, rules, ids, ARGS.ras_output) | 697 create_ras(resolve_rules, name, rules, ids, ARGS.ras_output) |
738 | 698 |
739 #if err: utils.logWarning( | 699 if err: utils.logWarning( |
740 # f"Warning: gene(s) {err} not found in class \"{name}\", " + | 700 f"Warning: gene(s) {err} not found in class \"{name}\", " + |
741 # "the expression level for this gene will be considered NaN", | 701 "the expression level for this gene will be considered NaN", |
742 # ARGS.out_log) | 702 ARGS.out_log) |
743 | 703 |
744 print("Execution succeded") | 704 print("Execution succeded") |
745 | 705 |
746 ############################################################################### | 706 ############################################################################### |
747 if __name__ == "__main__": | 707 if __name__ == "__main__": |