annotate visualization.py @ 6:1c25246f6e68 draft default tip

Uploaded
author petr-novak
date Thu, 27 Jun 2019 09:51:41 -0400
parents a5f1638b73be
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env python3
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
2 """ visualization module """
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
3
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
4 import numpy as np
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
5 import configuration
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
6 import matplotlib.pyplot as plt
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
7 import matplotlib.lines as mlines
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
8
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
9
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
10 def vis_profrep(seq_ids_all, files_dict, seq_lengths_all, CN, HTML_DATA,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
11 seqs_all_part):
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
12 ''' visualization of repetitive profiles'''
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
13 graphs_dict = {}
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
14 seq_id_repeats = []
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
15 th_length = configuration.SEQ_LEN_VIZ
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
16 exclude = set(['ALL'])
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
17 sorted_keys = sorted(set(files_dict.keys()).difference(exclude))
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
18 sorted_keys.insert(0, "ALL")
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
19 plot_num = 0
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
20 seqs_long = []
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
21 seqs_count = 1
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
22 seqs_max_limit = []
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
23 for repeat in sorted_keys:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
24 with open(files_dict[repeat][0], "r") as repeat_f:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
25 positions_all = []
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
26 hits_all = []
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
27 include = True
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
28 first_line = repeat_f.readline()
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
29 seq_id_repeat = first_line.rstrip().split("chrom=")[1]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
30 seq_len_repeat = seq_lengths_all[seq_ids_all.index(seq_id_repeat)]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
31 if seq_id_repeat not in graphs_dict.keys():
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
32 if seq_len_repeat > th_length:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
33 if seq_id_repeat not in seqs_long:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
34 seqs_long.append(seq_id_repeat)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
35 include = False
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
36 else:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
37 [fig, ax] = plot_figure(seq_id_repeat, seq_len_repeat, CN)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
38 graphs_dict[seq_id_repeat] = [fig, ax]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
39 seq_id_repeats.append(seq_id_repeat)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
40 for line in repeat_f:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
41 if "chrom" in line:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
42 seqs_count += 1
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
43 if include:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
44 graphs_dict = plot_profile(
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
45 graphs_dict, seq_id_repeats[-1], positions_all,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
46 hits_all, repeat, plot_num)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
47 positions_all = []
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
48 hits_all = []
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
49 seq_id_repeat = line.rstrip().split("chrom=")[1]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
50 seq_len_repeat = seq_lengths_all[seq_ids_all.index(
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
51 seq_id_repeat)]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
52 if seq_id_repeat not in graphs_dict.keys():
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
53 if seq_len_repeat > th_length:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
54 if seq_id_repeat not in seqs_long:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
55 seqs_long.append(seq_id_repeat)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
56 include = False
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
57 else:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
58 [fig, ax] = plot_figure(seq_id_repeat,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
59 seq_len_repeat, CN)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
60 graphs_dict[seq_id_repeat] = [fig, ax]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
61 seq_id_repeats.append(seq_id_repeat)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
62 if seq_id_repeat not in seqs_all_part:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
63 break
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
64 else:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
65 if include:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
66 positions_all.append(line.rstrip().split("\t")[0])
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
67 hits_all.append(line.rstrip().split("\t")[1])
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
68 if include:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
69 graphs_dict = plot_profile(graphs_dict, seq_id_repeats[-1],
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
70 positions_all, hits_all, repeat,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
71 plot_num)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
72 seq_id_repeats.append(seq_id_repeat)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
73 positions_all = []
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
74 hits_all = []
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
75 plot_num += 1
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
76 return graphs_dict, seqs_long
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
77
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
78
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
79 def plot_figure(seq_id, seq_length, CN):
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
80 fig = plt.figure(figsize=(18, 8))
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
81 ax = fig.add_subplot(111)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
82 ax.set_xlabel('sequence bp')
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
83 if CN:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
84 ax.set_ylabel('copy numbers')
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
85 else:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
86 ax.set_ylabel('hits')
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
87 ax.set_title(seq_id)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
88 plt.xlim([0, seq_length])
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
89 return fig, ax
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
90
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
91
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
92 def plot_profile(graphs_dict, seq_id_repeat, positions_all, hits_all, repeat,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
93 plot_num):
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
94 if "|" in repeat:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
95 graphs_dict[seq_id_repeat][1].plot(
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
96 positions_all,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
97 hits_all,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
98 label="|".join(repeat.split("|")[-2:]),
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
99 color=configuration.COLORS_HEX[plot_num])
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
100 else:
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
101 graphs_dict[seq_id_repeat][1].plot(
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
102 positions_all,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
103 hits_all,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
104 label=repeat,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
105 color=configuration.COLORS_HEX[plot_num])
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
106 return graphs_dict
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
107
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
108
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
109 def vis_domains(fig, ax, seq_id, xminimal, xmaximal, domains):
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
110 ''' visualization of protein domains'''
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
111 y_upper_lim = ax.get_ylim()[1]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
112 dom_uniq = list(set(domains))
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
113 colors = [configuration.COLORS_HEX[dom_uniq.index(domain)]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
114 for domain in domains]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
115 colors_dom = [
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
116 list(reversed(configuration.COLORS_HEX))[dom_uniq.index(domain)]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
117 for domain in domains
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
118 ]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
119 colors_legend = list(reversed(configuration.COLORS_HEX))[0:len(dom_uniq)]
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
120 ax.hlines([y_upper_lim + y_upper_lim / 10] * len(xminimal),
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
121 xminimal,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
122 xmaximal,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
123 color=colors_dom,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
124 lw=2,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
125 label=dom_uniq)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
126 lines_legend = []
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
127 ax2 = ax.twinx() # add second axis for domains
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
128 for count_uniq in list(range(len(dom_uniq))):
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
129 lines_legend.append(mlines.Line2D([], [],
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
130 color=colors_legend[count_uniq],
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
131 markersize=15,
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
132 label=dom_uniq[count_uniq]))
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
133 ax2.legend(lines_legend, [line.get_label() for line in lines_legend],
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
134 bbox_to_anchor=(1.05, 1),
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
135 loc='upper left',
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
136 borderaxespad=0.)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
137 ax2.yaxis.set_visible(False)
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
138 return fig, ax
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
139
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
140
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
141 def main():
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
142 pass
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
143
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
144
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
145 if __name__ == "__main__":
a5f1638b73be Uploaded
petr-novak
parents:
diff changeset
146 main()