0
|
1 #!/usr/bin/env python3
|
|
2 """ visualization module """
|
|
3
|
|
4 import numpy as np
|
|
5 import configuration
|
|
6 import matplotlib.pyplot as plt
|
|
7 import matplotlib.lines as mlines
|
|
8
|
|
9
|
|
10 def vis_profrep(seq_ids_all, files_dict, seq_lengths_all, CN, HTML_DATA,
|
|
11 seqs_all_part):
|
|
12 ''' visualization of repetitive profiles'''
|
|
13 graphs_dict = {}
|
|
14 seq_id_repeats = []
|
|
15 th_length = configuration.SEQ_LEN_VIZ
|
|
16 exclude = set(['ALL'])
|
|
17 sorted_keys = sorted(set(files_dict.keys()).difference(exclude))
|
|
18 sorted_keys.insert(0, "ALL")
|
|
19 plot_num = 0
|
|
20 seqs_long = []
|
|
21 seqs_count = 1
|
|
22 seqs_max_limit = []
|
|
23 for repeat in sorted_keys:
|
|
24 with open(files_dict[repeat][0], "r") as repeat_f:
|
|
25 positions_all = []
|
|
26 hits_all = []
|
|
27 include = True
|
|
28 first_line = repeat_f.readline()
|
|
29 seq_id_repeat = first_line.rstrip().split("chrom=")[1]
|
|
30 seq_len_repeat = seq_lengths_all[seq_ids_all.index(seq_id_repeat)]
|
|
31 if seq_id_repeat not in graphs_dict.keys():
|
|
32 if seq_len_repeat > th_length:
|
|
33 if seq_id_repeat not in seqs_long:
|
|
34 seqs_long.append(seq_id_repeat)
|
|
35 include = False
|
|
36 else:
|
|
37 [fig, ax] = plot_figure(seq_id_repeat, seq_len_repeat, CN)
|
|
38 graphs_dict[seq_id_repeat] = [fig, ax]
|
|
39 seq_id_repeats.append(seq_id_repeat)
|
|
40 for line in repeat_f:
|
|
41 if "chrom" in line:
|
|
42 seqs_count += 1
|
|
43 if include:
|
|
44 graphs_dict = plot_profile(
|
|
45 graphs_dict, seq_id_repeats[-1], positions_all,
|
|
46 hits_all, repeat, plot_num)
|
|
47 positions_all = []
|
|
48 hits_all = []
|
|
49 seq_id_repeat = line.rstrip().split("chrom=")[1]
|
|
50 seq_len_repeat = seq_lengths_all[seq_ids_all.index(
|
|
51 seq_id_repeat)]
|
|
52 if seq_id_repeat not in graphs_dict.keys():
|
|
53 if seq_len_repeat > th_length:
|
|
54 if seq_id_repeat not in seqs_long:
|
|
55 seqs_long.append(seq_id_repeat)
|
|
56 include = False
|
|
57 else:
|
|
58 [fig, ax] = plot_figure(seq_id_repeat,
|
|
59 seq_len_repeat, CN)
|
|
60 graphs_dict[seq_id_repeat] = [fig, ax]
|
|
61 seq_id_repeats.append(seq_id_repeat)
|
|
62 if seq_id_repeat not in seqs_all_part:
|
|
63 break
|
|
64 else:
|
|
65 if include:
|
|
66 positions_all.append(line.rstrip().split("\t")[0])
|
|
67 hits_all.append(line.rstrip().split("\t")[1])
|
|
68 if include:
|
|
69 graphs_dict = plot_profile(graphs_dict, seq_id_repeats[-1],
|
|
70 positions_all, hits_all, repeat,
|
|
71 plot_num)
|
|
72 seq_id_repeats.append(seq_id_repeat)
|
|
73 positions_all = []
|
|
74 hits_all = []
|
|
75 plot_num += 1
|
|
76 return graphs_dict, seqs_long
|
|
77
|
|
78
|
|
79 def plot_figure(seq_id, seq_length, CN):
|
|
80 fig = plt.figure(figsize=(18, 8))
|
|
81 ax = fig.add_subplot(111)
|
|
82 ax.set_xlabel('sequence bp')
|
|
83 if CN:
|
|
84 ax.set_ylabel('copy numbers')
|
|
85 else:
|
|
86 ax.set_ylabel('hits')
|
|
87 ax.set_title(seq_id)
|
|
88 plt.xlim([0, seq_length])
|
|
89 return fig, ax
|
|
90
|
|
91
|
|
92 def plot_profile(graphs_dict, seq_id_repeat, positions_all, hits_all, repeat,
|
|
93 plot_num):
|
|
94 if "|" in repeat:
|
|
95 graphs_dict[seq_id_repeat][1].plot(
|
|
96 positions_all,
|
|
97 hits_all,
|
|
98 label="|".join(repeat.split("|")[-2:]),
|
|
99 color=configuration.COLORS_HEX[plot_num])
|
|
100 else:
|
|
101 graphs_dict[seq_id_repeat][1].plot(
|
|
102 positions_all,
|
|
103 hits_all,
|
|
104 label=repeat,
|
|
105 color=configuration.COLORS_HEX[plot_num])
|
|
106 return graphs_dict
|
|
107
|
|
108
|
|
109 def vis_domains(fig, ax, seq_id, xminimal, xmaximal, domains):
|
|
110 ''' visualization of protein domains'''
|
|
111 y_upper_lim = ax.get_ylim()[1]
|
|
112 dom_uniq = list(set(domains))
|
|
113 colors = [configuration.COLORS_HEX[dom_uniq.index(domain)]
|
|
114 for domain in domains]
|
|
115 colors_dom = [
|
|
116 list(reversed(configuration.COLORS_HEX))[dom_uniq.index(domain)]
|
|
117 for domain in domains
|
|
118 ]
|
|
119 colors_legend = list(reversed(configuration.COLORS_HEX))[0:len(dom_uniq)]
|
|
120 ax.hlines([y_upper_lim + y_upper_lim / 10] * len(xminimal),
|
|
121 xminimal,
|
|
122 xmaximal,
|
|
123 color=colors_dom,
|
|
124 lw=2,
|
|
125 label=dom_uniq)
|
|
126 lines_legend = []
|
|
127 ax2 = ax.twinx() # add second axis for domains
|
|
128 for count_uniq in list(range(len(dom_uniq))):
|
|
129 lines_legend.append(mlines.Line2D([], [],
|
|
130 color=colors_legend[count_uniq],
|
|
131 markersize=15,
|
|
132 label=dom_uniq[count_uniq]))
|
|
133 ax2.legend(lines_legend, [line.get_label() for line in lines_legend],
|
|
134 bbox_to_anchor=(1.05, 1),
|
|
135 loc='upper left',
|
|
136 borderaxespad=0.)
|
|
137 ax2.yaxis.set_visible(False)
|
|
138 return fig, ax
|
|
139
|
|
140
|
|
141 def main():
|
|
142 pass
|
|
143
|
|
144
|
|
145 if __name__ == "__main__":
|
|
146 main()
|