Mercurial > repos > bimib > cobraxy
comparison COBRAxy/ras_generator.py @ 406:187cee1a00e2 draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Mon, 08 Sep 2025 14:44:15 +0000 |
| parents | ccccb731c953 |
| children | 97eea560a10f |
comparison
equal
deleted
inserted
replaced
| 405:716b1a638fb5 | 406:187cee1a00e2 |
|---|---|
| 25 """ | 25 """ |
| 26 parser = argparse.ArgumentParser( | 26 parser = argparse.ArgumentParser( |
| 27 usage = '%(prog)s [options]', | 27 usage = '%(prog)s [options]', |
| 28 description = "process some value's genes to create a comparison's map.") | 28 description = "process some value's genes to create a comparison's map.") |
| 29 | 29 |
| 30 parser.add_argument("-rl", "--model_upload", type = str, | 30 parser.add_argument( |
| 31 help = "path to input file containing the rules") | 31 '-rs', '--rules_selector', |
| 32 | 32 type = utils.Model, default = utils.Model.ENGRO2, choices = list(utils.Model), |
| 33 parser.add_argument("-rn", "--model_upload_name", type = str, help = "custom rules name") | 33 help = 'chose which type of dataset you want use') |
| 34 | |
| 35 parser.add_argument("-rl", "--rule_list", type = str, | |
| 36 help = "path to input file with custom rules, if provided") | |
| 37 | |
| 38 parser.add_argument("-rn", "--rules_name", type = str, help = "custom rules name") | |
| 34 # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in | 39 # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in |
| 35 | 40 |
| 36 parser.add_argument( | 41 parser.add_argument( |
| 37 '-n', '--none', | 42 '-n', '--none', |
| 38 type = utils.Bool("none"), default = True, | 43 type = utils.Bool("none"), default = True, |
| 516 dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary | 521 dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary |
| 517 where each key corresponds to a reaction ID and each value is its computed RAS score. | 522 where each key corresponds to a reaction ID and each value is its computed RAS score. |
| 518 """ | 523 """ |
| 519 ras_values_by_cell_line = {} | 524 ras_values_by_cell_line = {} |
| 520 dataset.set_index(dataset.columns[0], inplace=True) | 525 dataset.set_index(dataset.columns[0], inplace=True) |
| 521 | 526 # Considera tutte le colonne tranne la prima in cui ci sono gli hugo quindi va scartata |
| 522 for cell_line_name in dataset.columns: #[1:]: | 527 for cell_line_name in dataset.columns[1:]: |
| 523 cell_line = dataset[cell_line_name].to_dict() | 528 cell_line = dataset[cell_line_name].to_dict() |
| 524 ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line) | 529 ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line) |
| 525 return ras_values_by_cell_line | 530 return ras_values_by_cell_line |
| 526 | 531 |
| 527 def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]: | 532 def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]: |
| 635 performed, significantly impacting the runtime. | 640 performed, significantly impacting the runtime. |
| 636 | 641 |
| 637 Returns: | 642 Returns: |
| 638 Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules. | 643 Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules. |
| 639 """ | 644 """ |
| 640 datFilePath = utils.FilePath.fromStrPath(ARGS.model_upload) # actual file, stored in galaxy as a .dat | 645 datFilePath = utils.FilePath.fromStrPath(ARGS.rule_list) # actual file, stored in galaxy as a .dat |
| 641 | 646 |
| 642 #try: filenamePath = utils.FilePath.fromStrPath(ARGS.model_upload_name) # file's name in input, to determine its original ext | 647 try: filenamePath = utils.FilePath.fromStrPath(ARGS.rules_name) # file's name in input, to determine its original ext |
| 643 #except utils.PathErr as err: | 648 except utils.PathErr as err: |
| 644 # utils.logWarning(f"Cannot determine file extension from filename '{ARGS.model_upload_name}'. Assuming tabular format.", ARGS.out_log) | 649 raise utils.PathErr(filenamePath, f"Please make sure your file's name is a valid file path, {err.msg}") |
| 645 # filenamePath = None | |
| 646 | 650 |
| 647 #if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath) | 651 if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath) |
| 648 | 652 |
| 649 dict_rule = {} | |
| 650 | |
| 651 try: | |
| 652 # Proviamo prima con delimitatore tab | |
| 653 for line in utils.readCsv(datFilePath, delimiter = "\t"): | |
| 654 if len(line) < 3: # Controlliamo che ci siano almeno 3 colonne | |
| 655 utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) | |
| 656 continue | |
| 657 | |
| 658 if line[2] == "": | |
| 659 dict_rule[line[0]] = ruleUtils.OpList([""]) | |
| 660 else: | |
| 661 dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2]) | |
| 662 | |
| 663 except Exception as e: | |
| 664 # Se fallisce con tab, proviamo con virgola | |
| 665 try: | |
| 666 dict_rule = {} | |
| 667 for line in utils.readCsv(datFilePath, delimiter = ","): | |
| 668 if len(line) < 3: | |
| 669 utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) | |
| 670 continue | |
| 671 | |
| 672 if line[2] == "": | |
| 673 dict_rule[line[0]] = ruleUtils.OpList([""]) | |
| 674 else: | |
| 675 dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2]) | |
| 676 except Exception as e2: | |
| 677 raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}") | |
| 678 | |
| 679 if not dict_rule: | |
| 680 raise ValueError("No valid rules found in the uploaded file. Please check the file format.") | |
| 681 # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed. | 653 # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed. |
| 682 return dict_rule | 654 return { line[0] : ruleUtils.parseRuleToNestedList(line[1]) for line in utils.readCsv(datFilePath) } |
| 683 | |
| 684 | 655 |
| 685 def main(args:List[str] = None) -> None: | 656 def main(args:List[str] = None) -> None: |
| 686 """ | 657 """ |
| 687 Initializes everything and sets the program in motion based on the fronted input arguments. | 658 Initializes everything and sets the program in motion based on the fronted input arguments. |
| 688 | 659 |
| 698 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | 669 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) |
| 699 | 670 |
| 700 # remove versioning from gene names | 671 # remove versioning from gene names |
| 701 dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0] | 672 dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0] |
| 702 | 673 |
| 703 rules = load_custom_rules() | |
| 704 reactions = list(rules.keys()) | |
| 705 | |
| 706 save_as_tsv(ras_for_cell_lines(dataset, rules), reactions) | |
| 707 if ERRORS: utils.logWarning( | |
| 708 f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}", | |
| 709 ARGS.out_log) | |
| 710 | |
| 711 | |
| 712 ############ | |
| 713 | |
| 714 # handle custom models | 674 # handle custom models |
| 715 #model :utils.Model = ARGS.rules_selector | 675 model :utils.Model = ARGS.rules_selector |
| 716 | 676 |
| 717 #if model is utils.Model.Custom: | 677 if model is utils.Model.Custom: |
| 718 # rules = load_custom_rules() | 678 rules = load_custom_rules() |
| 719 # reactions = list(rules.keys()) | 679 reactions = list(rules.keys()) |
| 720 | 680 |
| 721 # save_as_tsv(ras_for_cell_lines(dataset, rules), reactions) | 681 save_as_tsv(ras_for_cell_lines(dataset, rules), reactions) |
| 722 # if ERRORS: utils.logWarning( | 682 if ERRORS: utils.logWarning( |
| 723 # f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}", | 683 f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}", |
| 724 # ARGS.out_log) | 684 ARGS.out_log) |
| 725 | 685 |
| 726 # return | 686 return |
| 727 | 687 |
| 728 # This is the standard flow of the ras_generator program, for non-custom models. | 688 # This is the standard flow of the ras_generator program, for non-custom models. |
| 729 #name = "RAS Dataset" | 689 name = "RAS Dataset" |
| 730 #type_gene = gene_type(dataset.iloc[0, 0], name) | 690 type_gene = gene_type(dataset.iloc[0, 0], name) |
| 731 | 691 |
| 732 #rules = model.getRules(ARGS.tool_dir) | 692 rules = model.getRules(ARGS.tool_dir) |
| 733 #genes = data_gene(dataset, type_gene, name, None) | 693 genes = data_gene(dataset, type_gene, name, None) |
| 734 #ids, rules = load_id_rules(rules.get(type_gene)) | 694 ids, rules = load_id_rules(rules.get(type_gene)) |
| 735 | 695 |
| 736 #resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name) | 696 resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name) |
| 737 #create_ras(resolve_rules, name, rules, ids, ARGS.ras_output) | 697 create_ras(resolve_rules, name, rules, ids, ARGS.ras_output) |
| 738 | 698 |
| 739 #if err: utils.logWarning( | 699 if err: utils.logWarning( |
| 740 # f"Warning: gene(s) {err} not found in class \"{name}\", " + | 700 f"Warning: gene(s) {err} not found in class \"{name}\", " + |
| 741 # "the expression level for this gene will be considered NaN", | 701 "the expression level for this gene will be considered NaN", |
| 742 # ARGS.out_log) | 702 ARGS.out_log) |
| 743 | 703 |
| 744 print("Execution succeded") | 704 print("Execution succeded") |
| 745 | 705 |
| 746 ############################################################################### | 706 ############################################################################### |
| 747 if __name__ == "__main__": | 707 if __name__ == "__main__": |
