Mercurial > repos > bimib > marea
comparison Marea/marea.py @ 35:7b1971251c63 draft
Uploaded
author | bimib |
---|---|
date | Mon, 25 Nov 2019 05:40:30 -0500 |
parents | 1a97d1537623 |
children | 3af9d394367c |
comparison
equal
deleted
inserted
replaced
34:1a97d1537623 | 35:7b1971251c63 |
---|---|
3 import pandas as pd | 3 import pandas as pd |
4 import itertools as it | 4 import itertools as it |
5 import scipy.stats as st | 5 import scipy.stats as st |
6 import collections | 6 import collections |
7 import lxml.etree as ET | 7 import lxml.etree as ET |
8 import shutil | |
9 import pickle as pk | 8 import pickle as pk |
10 import math | 9 import math |
11 import os | 10 import os |
12 import argparse | 11 import argparse |
13 from svglib.svglib import svg2rlg | 12 from svglib.svglib import svg2rlg |
198 tmp.append(value) | 197 tmp.append(value) |
199 if value == None: | 198 if value == None: |
200 err.append(l[0]) | 199 err.append(l[0]) |
201 l = l[1:] | 200 l = l[1:] |
202 return (tmp, err) | 201 return (tmp, err) |
202 | |
203 | 203 |
204 def replace_gene(l, d): | 204 def replace_gene(l, d): |
205 if l =='and' or l == 'or': | 205 if l =='and' or l == 'or': |
206 return l | 206 return l |
207 else: | 207 else: |
575 return (ids, split_rules, gene_in_rule) | 575 return (ids, split_rules, gene_in_rule) |
576 | 576 |
577 ############################ gene ############################################# | 577 ############################ gene ############################################# |
578 | 578 |
579 def data_gene(gene, type_gene, name, gene_custom): | 579 def data_gene(gene, type_gene, name, gene_custom): |
580 args = process_args(sys.argv) | 580 args = process_args(sys.argv) |
581 for i in range(len(gene)): | 581 for i in range(len(gene)): |
582 tmp = gene.iloc[i, 0] | 582 tmp = gene.iloc[i, 0] |
583 if tmp.startswith(' ') or tmp.endswith(' '): | 583 if tmp.startswith(' ') or tmp.endswith(' '): |
584 gene.iloc[i, 0] = (tmp.lstrip()).rstrip() | 584 gene.iloc[i, 0] = (tmp.lstrip()).rstrip() |
585 gene_dup = [item for item, count in | 585 gene_dup = [item for item, count in |
586 collections.Counter(gene[gene.columns[0]]).items() if count > 1] | 586 collections.Counter(gene[gene.columns[0]]).items() if count > 1] |
587 pat_dup = [item for item, count in | 587 pat_dup = [item for item, count in |
588 collections.Counter(list(gene.columns)).items() if count > 1] | 588 collections.Counter(list(gene.columns)).items() if count > 1] |
589 | |
589 if gene_dup: | 590 if gene_dup: |
590 if gene_custom == None: | 591 if gene_custom == None: |
591 if args.rules_selector == 'HMRcore': | 592 if args.rules_selector == 'HMRcore': |
592 gene_in_rule = pk.load(open(args.tool_dir + | 593 gene_in_rule = pk.load(open(args.tool_dir + |
593 '/local/HMRcore_genes.p', 'rb')) | 594 '/local/HMRcore_genes.p', 'rb')) |
605 sys.exit('Execution aborted because gene ID ' | 606 sys.exit('Execution aborted because gene ID ' |
606 +str(tmp)+' in '+name+' is duplicated\n') | 607 +str(tmp)+' in '+name+' is duplicated\n') |
607 if pat_dup: | 608 if pat_dup: |
608 warning('Warning: duplicated label\n' + str(pat_dup) + 'in ' + name + | 609 warning('Warning: duplicated label\n' + str(pat_dup) + 'in ' + name + |
609 '\n') | 610 '\n') |
611 | |
610 return (gene.set_index(gene.columns[0])).to_dict() | 612 return (gene.set_index(gene.columns[0])).to_dict() |
611 | 613 |
612 ############################ resolve ########################################## | 614 ############################ resolve ########################################## |
613 | 615 |
614 def resolve(genes, rules, ids, resolve_none, name): | 616 def resolve(genes, rules, ids, resolve_none, name): |
615 resolve_rules = {} | 617 resolve_rules = {} |
618 names_array = [] | |
616 not_found = [] | 619 not_found = [] |
617 flag = False | 620 flag = False |
618 for key, value in genes.items(): | 621 for key, value in genes.items(): |
619 tmp_resolve = [] | 622 tmp_resolve = [] |
620 for i in range(len(rules)): | 623 for i in range(len(rules)): |
628 tmp_resolve.append(None) | 631 tmp_resolve.append(None) |
629 else: | 632 else: |
630 tmp_resolve.append(ris) | 633 tmp_resolve.append(ris) |
631 flag = True | 634 flag = True |
632 else: | 635 else: |
633 tmp_resolve.append(None) | 636 tmp_resolve.append(None) |
634 resolve_rules[key] = tmp_resolve | 637 resolve_rules[key] = tmp_resolve |
635 if flag is False: | 638 if flag is False: |
636 warning('Warning: no computable score (due to missing gene values)' + | 639 warning('Warning: no computable score (due to missing gene values)' + |
637 'for class ' + name + ', the class has been disregarded\n') | 640 'for class ' + name + ', the class has been disregarded\n') |
638 return (None, None) | 641 return (None, None) |
660 ', the class has been disregarded\n') | 663 ', the class has been disregarded\n') |
661 return class_pat | 664 return class_pat |
662 | 665 |
663 ############################ create_ras ####################################### | 666 ############################ create_ras ####################################### |
664 | 667 |
665 def create_ras (resolve_rules, dataset_name, single_ras): | 668 def create_ras (resolve_rules, dataset_name, single_ras, rules, ids): |
666 | 669 |
667 if resolve_rules == None: | 670 if resolve_rules == None: |
668 warning("Couldn't generate RAS for current dataset: " + dataset_name) | 671 warning("Couldn't generate RAS for current dataset: " + dataset_name) |
669 | 672 |
670 for geni in resolve_rules.values(): | 673 for geni in resolve_rules.values(): |
671 for i, valori in enumerate(geni): | 674 for i, valori in enumerate(geni): |
672 if valori == None: | 675 if valori == None: |
673 geni[i] = 'None' | 676 geni[i] = 'None' |
674 | 677 |
675 output_ras = pd.DataFrame.from_dict(resolve_rules) | 678 output_ras = pd.DataFrame.from_dict(resolve_rules) |
679 | |
680 output_ras.insert(0, 'Reactions', ids) | |
676 output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) | 681 output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) |
677 | 682 |
678 if (single_ras): | 683 if (single_ras): |
679 args = process_args(sys.argv) | 684 args = process_args(sys.argv) |
680 text_file = open(args.single_ras_file, "w") | 685 text_file = open(args.single_ras_file, "w") |
739 args = process_args(sys.argv) | 744 args = process_args(sys.argv) |
740 | 745 |
741 create_svg = check_bool(args.generate_svg) | 746 create_svg = check_bool(args.generate_svg) |
742 create_pdf = check_bool(args.generate_pdf) | 747 create_pdf = check_bool(args.generate_pdf) |
743 generate_ras = check_bool(args.generate_ras) | 748 generate_ras = check_bool(args.generate_ras) |
744 | 749 |
745 os.makedirs('result') | 750 os.makedirs('result') |
746 | 751 |
747 if generate_ras: | 752 if generate_ras: |
748 os.makedirs('ras') | 753 os.makedirs('ras') |
749 | 754 |
769 if args.rules_selector != 'Custom': | 774 if args.rules_selector != 'Custom': |
770 genes = data_gene(dataset, type_gene, name, None) | 775 genes = data_gene(dataset, type_gene, name, None) |
771 ids, rules = load_id_rules(recon.get(type_gene)) | 776 ids, rules = load_id_rules(recon.get(type_gene)) |
772 elif args.rules_selector == 'Custom': | 777 elif args.rules_selector == 'Custom': |
773 genes = data_gene(dataset, type_gene, name, gene_in_rule) | 778 genes = data_gene(dataset, type_gene, name, gene_in_rule) |
774 | 779 |
775 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | 780 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) |
776 | 781 |
777 create_ras(resolve_rules, name, True) | 782 create_ras(resolve_rules, name, True, rules, ids) |
778 | 783 |
779 if err != None and err: | 784 if err != None and err: |
780 warning('Warning: gene\n' + str(err) + '\nnot found in class ' | 785 warning('Warning: gene\n' + str(err) + '\nnot found in class ' |
781 + name + ', the expression level for this gene ' + | 786 + name + ', the expression level for this gene ' + |
782 'will be considered NaN\n') | 787 'will be considered NaN\n') |
800 genes = data_gene(dataset, type_gene, name, None) | 805 genes = data_gene(dataset, type_gene, name, None) |
801 ids, rules = load_id_rules(recon.get(type_gene)) | 806 ids, rules = load_id_rules(recon.get(type_gene)) |
802 elif args.rules_selector == 'Custom': | 807 elif args.rules_selector == 'Custom': |
803 genes = data_gene(dataset, type_gene, name, gene_in_rule) | 808 genes = data_gene(dataset, type_gene, name, gene_in_rule) |
804 | 809 |
810 | |
805 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) | 811 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) |
806 | 812 |
807 if generate_ras: | 813 if generate_ras: |
808 create_ras(resolve_rules, name, False) | 814 create_ras(resolve_rules, name, False, rules, ids) |
809 | 815 |
810 if err != None and err: | 816 if err != None and err: |
811 warning('Warning: gene\n' + str(err) + '\nnot found in class ' | 817 warning('Warning: gene\n' + str(err) + '\nnot found in class ' |
812 + name + ', the expression level for this gene ' + | 818 + name + ', the expression level for this gene ' + |
813 'will be considered NaN\n') | 819 'will be considered NaN\n') |
835 + name + ', the expression level for this gene ' + | 841 + name + ', the expression level for this gene ' + |
836 'will be considered NaN\n') | 842 'will be considered NaN\n') |
837 if resolve_rules != None: | 843 if resolve_rules != None: |
838 class_pat = split_class(classes, resolve_rules) | 844 class_pat = split_class(classes, resolve_rules) |
839 if generate_ras: | 845 if generate_ras: |
840 create_ras(resolve_rules, name, False) | 846 create_ras(resolve_rules, name, False, rules, ids) |
841 | 847 |
842 | 848 |
843 if args.rules_selector == 'Custom': | 849 if args.rules_selector == 'Custom': |
844 if args.yes_no == 'yes': | 850 if args.yes_no == 'yes': |
845 try: | 851 try: |