comparison Marea/marea.py @ 35:7b1971251c63 draft

Uploaded
author bimib
date Mon, 25 Nov 2019 05:40:30 -0500
parents 1a97d1537623
children 3af9d394367c
comparison
equal deleted inserted replaced
34:1a97d1537623 35:7b1971251c63
3 import pandas as pd 3 import pandas as pd
4 import itertools as it 4 import itertools as it
5 import scipy.stats as st 5 import scipy.stats as st
6 import collections 6 import collections
7 import lxml.etree as ET 7 import lxml.etree as ET
8 import shutil
9 import pickle as pk 8 import pickle as pk
10 import math 9 import math
11 import os 10 import os
12 import argparse 11 import argparse
13 from svglib.svglib import svg2rlg 12 from svglib.svglib import svg2rlg
198 tmp.append(value) 197 tmp.append(value)
199 if value == None: 198 if value == None:
200 err.append(l[0]) 199 err.append(l[0])
201 l = l[1:] 200 l = l[1:]
202 return (tmp, err) 201 return (tmp, err)
202
203 203
204 def replace_gene(l, d): 204 def replace_gene(l, d):
205 if l =='and' or l == 'or': 205 if l =='and' or l == 'or':
206 return l 206 return l
207 else: 207 else:
575 return (ids, split_rules, gene_in_rule) 575 return (ids, split_rules, gene_in_rule)
576 576
577 ############################ gene ############################################# 577 ############################ gene #############################################
578 578
579 def data_gene(gene, type_gene, name, gene_custom): 579 def data_gene(gene, type_gene, name, gene_custom):
580 args = process_args(sys.argv) 580 args = process_args(sys.argv)
581 for i in range(len(gene)): 581 for i in range(len(gene)):
582 tmp = gene.iloc[i, 0] 582 tmp = gene.iloc[i, 0]
583 if tmp.startswith(' ') or tmp.endswith(' '): 583 if tmp.startswith(' ') or tmp.endswith(' '):
584 gene.iloc[i, 0] = (tmp.lstrip()).rstrip() 584 gene.iloc[i, 0] = (tmp.lstrip()).rstrip()
585 gene_dup = [item for item, count in 585 gene_dup = [item for item, count in
586 collections.Counter(gene[gene.columns[0]]).items() if count > 1] 586 collections.Counter(gene[gene.columns[0]]).items() if count > 1]
587 pat_dup = [item for item, count in 587 pat_dup = [item for item, count in
588 collections.Counter(list(gene.columns)).items() if count > 1] 588 collections.Counter(list(gene.columns)).items() if count > 1]
589
589 if gene_dup: 590 if gene_dup:
590 if gene_custom == None: 591 if gene_custom == None:
591 if args.rules_selector == 'HMRcore': 592 if args.rules_selector == 'HMRcore':
592 gene_in_rule = pk.load(open(args.tool_dir + 593 gene_in_rule = pk.load(open(args.tool_dir +
593 '/local/HMRcore_genes.p', 'rb')) 594 '/local/HMRcore_genes.p', 'rb'))
605 sys.exit('Execution aborted because gene ID ' 606 sys.exit('Execution aborted because gene ID '
606 +str(tmp)+' in '+name+' is duplicated\n') 607 +str(tmp)+' in '+name+' is duplicated\n')
607 if pat_dup: 608 if pat_dup:
608 warning('Warning: duplicated label\n' + str(pat_dup) + 'in ' + name + 609 warning('Warning: duplicated label\n' + str(pat_dup) + 'in ' + name +
609 '\n') 610 '\n')
611
610 return (gene.set_index(gene.columns[0])).to_dict() 612 return (gene.set_index(gene.columns[0])).to_dict()
611 613
612 ############################ resolve ########################################## 614 ############################ resolve ##########################################
613 615
614 def resolve(genes, rules, ids, resolve_none, name): 616 def resolve(genes, rules, ids, resolve_none, name):
615 resolve_rules = {} 617 resolve_rules = {}
618 names_array = []
616 not_found = [] 619 not_found = []
617 flag = False 620 flag = False
618 for key, value in genes.items(): 621 for key, value in genes.items():
619 tmp_resolve = [] 622 tmp_resolve = []
620 for i in range(len(rules)): 623 for i in range(len(rules)):
628 tmp_resolve.append(None) 631 tmp_resolve.append(None)
629 else: 632 else:
630 tmp_resolve.append(ris) 633 tmp_resolve.append(ris)
631 flag = True 634 flag = True
632 else: 635 else:
633 tmp_resolve.append(None) 636 tmp_resolve.append(None)
634 resolve_rules[key] = tmp_resolve 637 resolve_rules[key] = tmp_resolve
635 if flag is False: 638 if flag is False:
636 warning('Warning: no computable score (due to missing gene values)' + 639 warning('Warning: no computable score (due to missing gene values)' +
637 'for class ' + name + ', the class has been disregarded\n') 640 'for class ' + name + ', the class has been disregarded\n')
638 return (None, None) 641 return (None, None)
660 ', the class has been disregarded\n') 663 ', the class has been disregarded\n')
661 return class_pat 664 return class_pat
662 665
663 ############################ create_ras ####################################### 666 ############################ create_ras #######################################
664 667
665 def create_ras (resolve_rules, dataset_name, single_ras): 668 def create_ras (resolve_rules, dataset_name, single_ras, rules, ids):
666 669
667 if resolve_rules == None: 670 if resolve_rules == None:
668 warning("Couldn't generate RAS for current dataset: " + dataset_name) 671 warning("Couldn't generate RAS for current dataset: " + dataset_name)
669 672
670 for geni in resolve_rules.values(): 673 for geni in resolve_rules.values():
671 for i, valori in enumerate(geni): 674 for i, valori in enumerate(geni):
672 if valori == None: 675 if valori == None:
673 geni[i] = 'None' 676 geni[i] = 'None'
674 677
675 output_ras = pd.DataFrame.from_dict(resolve_rules) 678 output_ras = pd.DataFrame.from_dict(resolve_rules)
679
680 output_ras.insert(0, 'Reactions', ids)
676 output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) 681 output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False)
677 682
678 if (single_ras): 683 if (single_ras):
679 args = process_args(sys.argv) 684 args = process_args(sys.argv)
680 text_file = open(args.single_ras_file, "w") 685 text_file = open(args.single_ras_file, "w")
739 args = process_args(sys.argv) 744 args = process_args(sys.argv)
740 745
741 create_svg = check_bool(args.generate_svg) 746 create_svg = check_bool(args.generate_svg)
742 create_pdf = check_bool(args.generate_pdf) 747 create_pdf = check_bool(args.generate_pdf)
743 generate_ras = check_bool(args.generate_ras) 748 generate_ras = check_bool(args.generate_ras)
744 749
745 os.makedirs('result') 750 os.makedirs('result')
746 751
747 if generate_ras: 752 if generate_ras:
748 os.makedirs('ras') 753 os.makedirs('ras')
749 754
769 if args.rules_selector != 'Custom': 774 if args.rules_selector != 'Custom':
770 genes = data_gene(dataset, type_gene, name, None) 775 genes = data_gene(dataset, type_gene, name, None)
771 ids, rules = load_id_rules(recon.get(type_gene)) 776 ids, rules = load_id_rules(recon.get(type_gene))
772 elif args.rules_selector == 'Custom': 777 elif args.rules_selector == 'Custom':
773 genes = data_gene(dataset, type_gene, name, gene_in_rule) 778 genes = data_gene(dataset, type_gene, name, gene_in_rule)
774 779
775 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) 780 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
776 781
777 create_ras(resolve_rules, name, True) 782 create_ras(resolve_rules, name, True, rules, ids)
778 783
779 if err != None and err: 784 if err != None and err:
780 warning('Warning: gene\n' + str(err) + '\nnot found in class ' 785 warning('Warning: gene\n' + str(err) + '\nnot found in class '
781 + name + ', the expression level for this gene ' + 786 + name + ', the expression level for this gene ' +
782 'will be considered NaN\n') 787 'will be considered NaN\n')
800 genes = data_gene(dataset, type_gene, name, None) 805 genes = data_gene(dataset, type_gene, name, None)
801 ids, rules = load_id_rules(recon.get(type_gene)) 806 ids, rules = load_id_rules(recon.get(type_gene))
802 elif args.rules_selector == 'Custom': 807 elif args.rules_selector == 'Custom':
803 genes = data_gene(dataset, type_gene, name, gene_in_rule) 808 genes = data_gene(dataset, type_gene, name, gene_in_rule)
804 809
810
805 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name) 811 resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)
806 812
807 if generate_ras: 813 if generate_ras:
808 create_ras(resolve_rules, name, False) 814 create_ras(resolve_rules, name, False, rules, ids)
809 815
810 if err != None and err: 816 if err != None and err:
811 warning('Warning: gene\n' + str(err) + '\nnot found in class ' 817 warning('Warning: gene\n' + str(err) + '\nnot found in class '
812 + name + ', the expression level for this gene ' + 818 + name + ', the expression level for this gene ' +
813 'will be considered NaN\n') 819 'will be considered NaN\n')
835 + name + ', the expression level for this gene ' + 841 + name + ', the expression level for this gene ' +
836 'will be considered NaN\n') 842 'will be considered NaN\n')
837 if resolve_rules != None: 843 if resolve_rules != None:
838 class_pat = split_class(classes, resolve_rules) 844 class_pat = split_class(classes, resolve_rules)
839 if generate_ras: 845 if generate_ras:
840 create_ras(resolve_rules, name, False) 846 create_ras(resolve_rules, name, False, rules, ids)
841 847
842 848
843 if args.rules_selector == 'Custom': 849 if args.rules_selector == 'Custom':
844 if args.yes_no == 'yes': 850 if args.yes_no == 'yes':
845 try: 851 try: