Mercurial > repos > petr-novak > profrep
comparison visualization.py @ 0:a5f1638b73be draft
Uploaded
author | petr-novak |
---|---|
date | Wed, 26 Jun 2019 08:01:42 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a5f1638b73be |
---|---|
1 #!/usr/bin/env python3 | |
2 """ visualization module """ | |
3 | |
4 import numpy as np | |
5 import configuration | |
6 import matplotlib.pyplot as plt | |
7 import matplotlib.lines as mlines | |
8 | |
9 | |
10 def vis_profrep(seq_ids_all, files_dict, seq_lengths_all, CN, HTML_DATA, | |
11 seqs_all_part): | |
12 ''' visualization of repetitive profiles''' | |
13 graphs_dict = {} | |
14 seq_id_repeats = [] | |
15 th_length = configuration.SEQ_LEN_VIZ | |
16 exclude = set(['ALL']) | |
17 sorted_keys = sorted(set(files_dict.keys()).difference(exclude)) | |
18 sorted_keys.insert(0, "ALL") | |
19 plot_num = 0 | |
20 seqs_long = [] | |
21 seqs_count = 1 | |
22 seqs_max_limit = [] | |
23 for repeat in sorted_keys: | |
24 with open(files_dict[repeat][0], "r") as repeat_f: | |
25 positions_all = [] | |
26 hits_all = [] | |
27 include = True | |
28 first_line = repeat_f.readline() | |
29 seq_id_repeat = first_line.rstrip().split("chrom=")[1] | |
30 seq_len_repeat = seq_lengths_all[seq_ids_all.index(seq_id_repeat)] | |
31 if seq_id_repeat not in graphs_dict.keys(): | |
32 if seq_len_repeat > th_length: | |
33 if seq_id_repeat not in seqs_long: | |
34 seqs_long.append(seq_id_repeat) | |
35 include = False | |
36 else: | |
37 [fig, ax] = plot_figure(seq_id_repeat, seq_len_repeat, CN) | |
38 graphs_dict[seq_id_repeat] = [fig, ax] | |
39 seq_id_repeats.append(seq_id_repeat) | |
40 for line in repeat_f: | |
41 if "chrom" in line: | |
42 seqs_count += 1 | |
43 if include: | |
44 graphs_dict = plot_profile( | |
45 graphs_dict, seq_id_repeats[-1], positions_all, | |
46 hits_all, repeat, plot_num) | |
47 positions_all = [] | |
48 hits_all = [] | |
49 seq_id_repeat = line.rstrip().split("chrom=")[1] | |
50 seq_len_repeat = seq_lengths_all[seq_ids_all.index( | |
51 seq_id_repeat)] | |
52 if seq_id_repeat not in graphs_dict.keys(): | |
53 if seq_len_repeat > th_length: | |
54 if seq_id_repeat not in seqs_long: | |
55 seqs_long.append(seq_id_repeat) | |
56 include = False | |
57 else: | |
58 [fig, ax] = plot_figure(seq_id_repeat, | |
59 seq_len_repeat, CN) | |
60 graphs_dict[seq_id_repeat] = [fig, ax] | |
61 seq_id_repeats.append(seq_id_repeat) | |
62 if seq_id_repeat not in seqs_all_part: | |
63 break | |
64 else: | |
65 if include: | |
66 positions_all.append(line.rstrip().split("\t")[0]) | |
67 hits_all.append(line.rstrip().split("\t")[1]) | |
68 if include: | |
69 graphs_dict = plot_profile(graphs_dict, seq_id_repeats[-1], | |
70 positions_all, hits_all, repeat, | |
71 plot_num) | |
72 seq_id_repeats.append(seq_id_repeat) | |
73 positions_all = [] | |
74 hits_all = [] | |
75 plot_num += 1 | |
76 return graphs_dict, seqs_long | |
77 | |
78 | |
79 def plot_figure(seq_id, seq_length, CN): | |
80 fig = plt.figure(figsize=(18, 8)) | |
81 ax = fig.add_subplot(111) | |
82 ax.set_xlabel('sequence bp') | |
83 if CN: | |
84 ax.set_ylabel('copy numbers') | |
85 else: | |
86 ax.set_ylabel('hits') | |
87 ax.set_title(seq_id) | |
88 plt.xlim([0, seq_length]) | |
89 return fig, ax | |
90 | |
91 | |
92 def plot_profile(graphs_dict, seq_id_repeat, positions_all, hits_all, repeat, | |
93 plot_num): | |
94 if "|" in repeat: | |
95 graphs_dict[seq_id_repeat][1].plot( | |
96 positions_all, | |
97 hits_all, | |
98 label="|".join(repeat.split("|")[-2:]), | |
99 color=configuration.COLORS_HEX[plot_num]) | |
100 else: | |
101 graphs_dict[seq_id_repeat][1].plot( | |
102 positions_all, | |
103 hits_all, | |
104 label=repeat, | |
105 color=configuration.COLORS_HEX[plot_num]) | |
106 return graphs_dict | |
107 | |
108 | |
109 def vis_domains(fig, ax, seq_id, xminimal, xmaximal, domains): | |
110 ''' visualization of protein domains''' | |
111 y_upper_lim = ax.get_ylim()[1] | |
112 dom_uniq = list(set(domains)) | |
113 colors = [configuration.COLORS_HEX[dom_uniq.index(domain)] | |
114 for domain in domains] | |
115 colors_dom = [ | |
116 list(reversed(configuration.COLORS_HEX))[dom_uniq.index(domain)] | |
117 for domain in domains | |
118 ] | |
119 colors_legend = list(reversed(configuration.COLORS_HEX))[0:len(dom_uniq)] | |
120 ax.hlines([y_upper_lim + y_upper_lim / 10] * len(xminimal), | |
121 xminimal, | |
122 xmaximal, | |
123 color=colors_dom, | |
124 lw=2, | |
125 label=dom_uniq) | |
126 lines_legend = [] | |
127 ax2 = ax.twinx() # add second axis for domains | |
128 for count_uniq in list(range(len(dom_uniq))): | |
129 lines_legend.append(mlines.Line2D([], [], | |
130 color=colors_legend[count_uniq], | |
131 markersize=15, | |
132 label=dom_uniq[count_uniq])) | |
133 ax2.legend(lines_legend, [line.get_label() for line in lines_legend], | |
134 bbox_to_anchor=(1.05, 1), | |
135 loc='upper left', | |
136 borderaxespad=0.) | |
137 ax2.yaxis.set_visible(False) | |
138 return fig, ax | |
139 | |
140 | |
141 def main(): | |
142 pass | |
143 | |
144 | |
145 if __name__ == "__main__": | |
146 main() |