Mercurial > repos > bimib > marea
comparison Marea/marea.py @ 65:a61733753aec draft
Uploaded
author | bimib |
---|---|
date | Mon, 16 Mar 2020 08:21:29 -0400 |
parents | 53b4fca5bdf9 |
children | 19d704e24977 |
comparison
equal
deleted
inserted
replaced
64:53b4fca5bdf9 | 65:a61733753aec |
---|---|
535 split_rules.append([]) | 535 split_rules.append([]) |
536 if err_rules: | 536 if err_rules: |
537 warning('Warning: wrong format rule in ' + str(err_rules) + '\n') | 537 warning('Warning: wrong format rule in ' + str(err_rules) + '\n') |
538 return (split_rules, list(set(tmp_gene_in_rule))) | 538 return (split_rules, list(set(tmp_gene_in_rule))) |
539 | 539 |
540 def make_recon(data): | |
541 try: | |
542 import cobra as cb | |
543 import warnings | |
544 with warnings.catch_warnings(): | |
545 warnings.simplefilter('ignore') | |
546 recon = cb.io.read_sbml_model(data) | |
547 react = recon.reactions | |
548 rules = [react[i].gene_reaction_rule for i in range(len(react))] | |
549 ids = [react[i].id for i in range(len(react))] | |
550 except cb.io.sbml.CobraSBMLError: | |
551 try: | |
552 data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('') | |
553 if len(data.columns) < 2: | |
554 sys.exit('Execution aborted: wrong format of '+ | |
555 'custom datarules\n') | |
556 if not len(data.columns) == 2: | |
557 warning('Warning: more than 2 columns in custom datarules.\n' + | |
558 'Extra columns have been disregarded\n') | |
559 ids = list(data.iloc[:, 0]) | |
560 rules = list(data.iloc[:, 1]) | |
561 except pd.errors.EmptyDataError: | |
562 sys.exit('Execution aborted: wrong format of custom datarules\n') | |
563 except pd.errors.ParserError: | |
564 sys.exit('Execution aborted: wrong format of custom datarules\n') | |
565 split_rules, tmp_genes = do_rules(rules) | |
566 gene_in_rule = {} | |
567 for i in tmp_genes: | |
568 gene_in_rule[i] = 'ok' | |
569 return (ids, split_rules, gene_in_rule) | |
570 | 540 |
571 ############################ gene ############################################# | 541 ############################ gene ############################################# |
572 | 542 |
573 def data_gene(gene, type_gene, name, gene_custom): | 543 def data_gene(gene, type_gene, name, gene_custom): |
574 args = process_args(sys.argv) | 544 args = process_args(sys.argv) |
663 if (not class_pat) or (len(class_pat.keys()) < 2): | 633 if (not class_pat) or (len(class_pat.keys()) < 2): |
664 sys.exit('Execution aborted: classes provided for comparisons are ' + | 634 sys.exit('Execution aborted: classes provided for comparisons are ' + |
665 'less than two\n') | 635 'less than two\n') |
666 | 636 |
667 if comparison == "manyvsmany": | 637 if comparison == "manyvsmany": |
668 for i, j in it.combinations(class_pat.keys(), 2): | |
669 | |
670 tmp = {} | |
671 count = 0 | |
672 max_F_C = 0 | |
673 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): | |
674 try: | |
675 stat_D, p_value = st.ks_2samp(l1, l2) | |
676 #sum(l1) da errore secondo me perchè ha null | |
677 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) | |
678 if not isinstance(avg, str): | |
679 if max_F_C < abs(avg): | |
680 max_F_C = abs(avg) | |
681 tmp[ids[count]] = [float(p_value), avg] | |
682 count += 1 | |
683 except (TypeError, ZeroDivisionError): | |
684 count += 1 | |
685 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' | |
686 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | |
687 tmp_csv = tmp_csv.reset_index() | |
688 header = ['ids', 'P_Value', 'Log2(fold change)'] | |
689 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | |
690 | |
691 if create_svg or create_pdf: | |
692 if args.custom_rules == 'false' or (args.custom_rules == 'true' | |
693 and args.custom_map != ''): | |
694 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | |
695 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' | |
696 with open(file_svg, 'wb') as new_map: | |
697 new_map.write(ET.tostring(core_map)) | |
698 | |
699 | |
700 if create_pdf: | |
701 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' | |
702 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
703 | |
704 if not create_svg: | |
705 #Ho utilizzato il file svg per generare il pdf, | |
706 #ma l'utente non ne ha richiesto il ritorno, quindi | |
707 #lo elimino | |
708 | |
709 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') | |
710 elif comparison == "onevsrest": | |
711 for single_cluster in class_pat.keys(): | |
712 t = [] | |
713 for k in class_pat.keys(): | |
714 if k != single_cluster: | |
715 t.append(class_pat.get(k)) | |
716 rest = [] | |
717 for i in t: | |
718 rest = rest + i | |
719 | |
720 tmp = {} | |
721 count = 0 | |
722 max_F_C = 0 | |
723 | |
724 for l1, l2 in zip(rest, class_pat.get(single_cluster)): | |
725 try: | |
726 stat_D, p_value = st.ks_2samp(l1, l2) | |
727 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) | |
728 if not isinstance(avg, str): | |
729 if max_F_C < abs(avg): | |
730 max_F_C = abs(avg) | |
731 tmp[ids[count]] = [float(p_value), avg] | |
732 count += 1 | |
733 except (TypeError, ZeroDivisionError): | |
734 count += 1 | |
735 tab = 'result/' + single_cluster + '_vs_rest (Tabular Result).tsv' | |
736 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | |
737 tmp_csv = tmp_csv.reset_index() | |
738 header = ['ids', 'P_Value', 'Log2(fold change)'] | |
739 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | |
740 | |
741 if create_svg or create_pdf: | |
742 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' | |
743 and args.yes_no == 'yes'): | |
744 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | |
745 file_svg = 'result/' + single_cluster + '_vs_ rest (SVG Map).svg' | |
746 with open(file_svg, 'wb') as new_map: | |
747 new_map.write(ET.tostring(core_map)) | |
748 | |
749 | |
750 if create_pdf: | |
751 file_pdf = 'result/' + single_cluster + '_vs_ rest (PDF Map).pdf' | |
752 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
753 | |
754 if not create_svg: | |
755 os.remove('result/' + single_cluster + '_vs_ rest (SVG Map).svg') | |
756 | |
757 elif comparison == "onevsmany": | |
758 for i, j in it.combinations(class_pat.keys(), 2): | 638 for i, j in it.combinations(class_pat.keys(), 2): |
759 tmp = {} | 639 tmp = {} |
760 count = 0 | 640 count = 0 |
761 max_F_C = 0 | 641 max_F_C = 0 |
762 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): | 642 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): |
776 tmp_csv = tmp_csv.reset_index() | 656 tmp_csv = tmp_csv.reset_index() |
777 header = ['ids', 'P_Value', 'Log2(fold change)'] | 657 header = ['ids', 'P_Value', 'Log2(fold change)'] |
778 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | 658 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) |
779 | 659 |
780 if create_svg or create_pdf: | 660 if create_svg or create_pdf: |
781 if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom' | 661 if args.custom_rules == 'false' or (args.custom_rules == 'true' |
782 and args.yes_no == 'yes'): | 662 and args.custom_map != ''): |
783 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | 663 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) |
784 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' | 664 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' |
785 with open(file_svg, 'wb') as new_map: | 665 with open(file_svg, 'wb') as new_map: |
786 new_map.write(ET.tostring(core_map)) | 666 new_map.write(ET.tostring(core_map)) |
787 | 667 |
789 if create_pdf: | 669 if create_pdf: |
790 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' | 670 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' |
791 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | 671 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) |
792 | 672 |
793 if not create_svg: | 673 if not create_svg: |
794 #Ho utilizzato il file svg per generare il pdf, | 674 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') |
795 #ma l'utente non ne ha richiesto il ritorno, quindi | 675 elif comparison == "onevsrest": |
796 #lo elimino | 676 for single_cluster in class_pat.keys(): |
677 t = [] | |
678 for k in class_pat.keys(): | |
679 if k != single_cluster: | |
680 t.append(class_pat.get(k)) | |
681 rest = [] | |
682 for i in t: | |
683 rest = rest + i | |
684 | |
685 tmp = {} | |
686 count = 0 | |
687 max_F_C = 0 | |
688 | |
689 for l1, l2 in zip(rest, class_pat.get(single_cluster)): | |
690 try: | |
691 stat_D, p_value = st.ks_2samp(l1, l2) | |
692 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) | |
693 if not isinstance(avg, str): | |
694 if max_F_C < abs(avg): | |
695 max_F_C = abs(avg) | |
696 tmp[ids[count]] = [float(p_value), avg] | |
697 count += 1 | |
698 except (TypeError, ZeroDivisionError): | |
699 count += 1 | |
700 tab = 'result/' + single_cluster + '_vs_rest (Tabular Result).tsv' | |
701 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | |
702 tmp_csv = tmp_csv.reset_index() | |
703 header = ['ids', 'P_Value', 'Log2(fold change)'] | |
704 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | |
705 | |
706 if create_svg or create_pdf: | |
707 if args.custom_rules == 'false' or (args.custom_rules == 'true' | |
708 and args.custom_map != ''): | |
709 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | |
710 file_svg = 'result/' + single_cluster + '_vs_ rest (SVG Map).svg' | |
711 with open(file_svg, 'wb') as new_map: | |
712 new_map.write(ET.tostring(core_map)) | |
797 | 713 |
714 | |
715 if create_pdf: | |
716 file_pdf = 'result/' + single_cluster + '_vs_ rest (PDF Map).pdf' | |
717 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
718 | |
719 if not create_svg: | |
720 os.remove('result/' + single_cluster + '_vs_ rest (SVG Map).svg') | |
721 | |
722 elif comparison == "onevsmany": | |
723 for i, j in it.combinations(class_pat.keys(), 2): | |
724 if i != control and j != control: | |
725 continue | |
726 if i == control and j == control: | |
727 continue | |
728 tmp = {} | |
729 count = 0 | |
730 max_F_C = 0 | |
731 for l1, l2 in zip(class_pat.get(i), class_pat.get(j)): | |
732 try: | |
733 stat_D, p_value = st.ks_2samp(l1, l2) | |
734 #sum(l1) da errore secondo me perchè ha null | |
735 avg = fold_change(sum(l1) / len(l1), sum(l2) / len(l2)) | |
736 if not isinstance(avg, str): | |
737 if max_F_C < abs(avg): | |
738 max_F_C = abs(avg) | |
739 tmp[ids[count]] = [float(p_value), avg] | |
740 count += 1 | |
741 except (TypeError, ZeroDivisionError): | |
742 count += 1 | |
743 tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' | |
744 tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") | |
745 tmp_csv = tmp_csv.reset_index() | |
746 header = ['ids', 'P_Value', 'Log2(fold change)'] | |
747 tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) | |
748 | |
749 if create_svg or create_pdf: | |
750 if args.custom_rules == 'false' or (args.custom_rules == 'true' | |
751 and args.custom_map != ''): | |
752 fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C) | |
753 file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg' | |
754 with open(file_svg, 'wb') as new_map: | |
755 new_map.write(ET.tostring(core_map)) | |
756 | |
757 | |
758 if create_pdf: | |
759 file_pdf = 'result/' + i + '_vs_' + j + ' (PDF Map).pdf' | |
760 renderPDF.drawToFile(svg2rlg(file_svg), file_pdf) | |
761 | |
762 if not create_svg: | |
798 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') | 763 os.remove('result/' + i + '_vs_' + j + ' (SVG Map).svg') |
799 | 764 |
800 | 765 |
801 | 766 |
802 | 767 |
810 create_svg = check_bool(args.generate_svg) | 775 create_svg = check_bool(args.generate_svg) |
811 create_pdf = check_bool(args.generate_pdf) | 776 create_pdf = check_bool(args.generate_pdf) |
812 | 777 |
813 if os.path.isdir('result') == False: | 778 if os.path.isdir('result') == False: |
814 os.makedirs('result') | 779 os.makedirs('result') |
815 | |
816 if args.custom_rules == 'true': | |
817 ids, rules, gene_in_rule = make_recon(args.custom_rule) | |
818 | 780 |
819 class_pat = {} | 781 class_pat = {} |
820 | 782 |
821 if args.option == 'datasets': | 783 if args.option == 'datasets': |
822 num = 1 | 784 num = 1 |
877 except (ET.XMLSyntaxError, ET.XMLSchemaParseError): | 839 except (ET.XMLSyntaxError, ET.XMLSchemaParseError): |
878 sys.exit('Execution aborted: custom map in wrong format') | 840 sys.exit('Execution aborted: custom map in wrong format') |
879 else: | 841 else: |
880 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') | 842 core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg') |
881 | 843 |
882 maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf, args.comparison, args.control) | 844 class_pat_trim = {} |
845 | |
846 for key in class_pat.keys(): | |
847 class_pat_trim[key.strip()] = class_pat[key] | |
848 | |
849 | |
850 maps(core_map, class_pat_trim, ids, args.pValue, args.fChange, create_svg, create_pdf, args.comparison, args.control) | |
883 | 851 |
884 print('Execution succeded') | 852 print('Execution succeded') |
885 | 853 |
886 return None | 854 return None |
887 | 855 |