Mercurial > repos > petr-novak > profrep
comparison visualization.py @ 0:a5f1638b73be draft
Uploaded
| author | petr-novak |
|---|---|
| date | Wed, 26 Jun 2019 08:01:42 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a5f1638b73be |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 """ visualization module """ | |
| 3 | |
| 4 import numpy as np | |
| 5 import configuration | |
| 6 import matplotlib.pyplot as plt | |
| 7 import matplotlib.lines as mlines | |
| 8 | |
| 9 | |
| 10 def vis_profrep(seq_ids_all, files_dict, seq_lengths_all, CN, HTML_DATA, | |
| 11 seqs_all_part): | |
| 12 ''' visualization of repetitive profiles''' | |
| 13 graphs_dict = {} | |
| 14 seq_id_repeats = [] | |
| 15 th_length = configuration.SEQ_LEN_VIZ | |
| 16 exclude = set(['ALL']) | |
| 17 sorted_keys = sorted(set(files_dict.keys()).difference(exclude)) | |
| 18 sorted_keys.insert(0, "ALL") | |
| 19 plot_num = 0 | |
| 20 seqs_long = [] | |
| 21 seqs_count = 1 | |
| 22 seqs_max_limit = [] | |
| 23 for repeat in sorted_keys: | |
| 24 with open(files_dict[repeat][0], "r") as repeat_f: | |
| 25 positions_all = [] | |
| 26 hits_all = [] | |
| 27 include = True | |
| 28 first_line = repeat_f.readline() | |
| 29 seq_id_repeat = first_line.rstrip().split("chrom=")[1] | |
| 30 seq_len_repeat = seq_lengths_all[seq_ids_all.index(seq_id_repeat)] | |
| 31 if seq_id_repeat not in graphs_dict.keys(): | |
| 32 if seq_len_repeat > th_length: | |
| 33 if seq_id_repeat not in seqs_long: | |
| 34 seqs_long.append(seq_id_repeat) | |
| 35 include = False | |
| 36 else: | |
| 37 [fig, ax] = plot_figure(seq_id_repeat, seq_len_repeat, CN) | |
| 38 graphs_dict[seq_id_repeat] = [fig, ax] | |
| 39 seq_id_repeats.append(seq_id_repeat) | |
| 40 for line in repeat_f: | |
| 41 if "chrom" in line: | |
| 42 seqs_count += 1 | |
| 43 if include: | |
| 44 graphs_dict = plot_profile( | |
| 45 graphs_dict, seq_id_repeats[-1], positions_all, | |
| 46 hits_all, repeat, plot_num) | |
| 47 positions_all = [] | |
| 48 hits_all = [] | |
| 49 seq_id_repeat = line.rstrip().split("chrom=")[1] | |
| 50 seq_len_repeat = seq_lengths_all[seq_ids_all.index( | |
| 51 seq_id_repeat)] | |
| 52 if seq_id_repeat not in graphs_dict.keys(): | |
| 53 if seq_len_repeat > th_length: | |
| 54 if seq_id_repeat not in seqs_long: | |
| 55 seqs_long.append(seq_id_repeat) | |
| 56 include = False | |
| 57 else: | |
| 58 [fig, ax] = plot_figure(seq_id_repeat, | |
| 59 seq_len_repeat, CN) | |
| 60 graphs_dict[seq_id_repeat] = [fig, ax] | |
| 61 seq_id_repeats.append(seq_id_repeat) | |
| 62 if seq_id_repeat not in seqs_all_part: | |
| 63 break | |
| 64 else: | |
| 65 if include: | |
| 66 positions_all.append(line.rstrip().split("\t")[0]) | |
| 67 hits_all.append(line.rstrip().split("\t")[1]) | |
| 68 if include: | |
| 69 graphs_dict = plot_profile(graphs_dict, seq_id_repeats[-1], | |
| 70 positions_all, hits_all, repeat, | |
| 71 plot_num) | |
| 72 seq_id_repeats.append(seq_id_repeat) | |
| 73 positions_all = [] | |
| 74 hits_all = [] | |
| 75 plot_num += 1 | |
| 76 return graphs_dict, seqs_long | |
| 77 | |
| 78 | |
| 79 def plot_figure(seq_id, seq_length, CN): | |
| 80 fig = plt.figure(figsize=(18, 8)) | |
| 81 ax = fig.add_subplot(111) | |
| 82 ax.set_xlabel('sequence bp') | |
| 83 if CN: | |
| 84 ax.set_ylabel('copy numbers') | |
| 85 else: | |
| 86 ax.set_ylabel('hits') | |
| 87 ax.set_title(seq_id) | |
| 88 plt.xlim([0, seq_length]) | |
| 89 return fig, ax | |
| 90 | |
| 91 | |
| 92 def plot_profile(graphs_dict, seq_id_repeat, positions_all, hits_all, repeat, | |
| 93 plot_num): | |
| 94 if "|" in repeat: | |
| 95 graphs_dict[seq_id_repeat][1].plot( | |
| 96 positions_all, | |
| 97 hits_all, | |
| 98 label="|".join(repeat.split("|")[-2:]), | |
| 99 color=configuration.COLORS_HEX[plot_num]) | |
| 100 else: | |
| 101 graphs_dict[seq_id_repeat][1].plot( | |
| 102 positions_all, | |
| 103 hits_all, | |
| 104 label=repeat, | |
| 105 color=configuration.COLORS_HEX[plot_num]) | |
| 106 return graphs_dict | |
| 107 | |
| 108 | |
| 109 def vis_domains(fig, ax, seq_id, xminimal, xmaximal, domains): | |
| 110 ''' visualization of protein domains''' | |
| 111 y_upper_lim = ax.get_ylim()[1] | |
| 112 dom_uniq = list(set(domains)) | |
| 113 colors = [configuration.COLORS_HEX[dom_uniq.index(domain)] | |
| 114 for domain in domains] | |
| 115 colors_dom = [ | |
| 116 list(reversed(configuration.COLORS_HEX))[dom_uniq.index(domain)] | |
| 117 for domain in domains | |
| 118 ] | |
| 119 colors_legend = list(reversed(configuration.COLORS_HEX))[0:len(dom_uniq)] | |
| 120 ax.hlines([y_upper_lim + y_upper_lim / 10] * len(xminimal), | |
| 121 xminimal, | |
| 122 xmaximal, | |
| 123 color=colors_dom, | |
| 124 lw=2, | |
| 125 label=dom_uniq) | |
| 126 lines_legend = [] | |
| 127 ax2 = ax.twinx() # add second axis for domains | |
| 128 for count_uniq in list(range(len(dom_uniq))): | |
| 129 lines_legend.append(mlines.Line2D([], [], | |
| 130 color=colors_legend[count_uniq], | |
| 131 markersize=15, | |
| 132 label=dom_uniq[count_uniq])) | |
| 133 ax2.legend(lines_legend, [line.get_label() for line in lines_legend], | |
| 134 bbox_to_anchor=(1.05, 1), | |
| 135 loc='upper left', | |
| 136 borderaxespad=0.) | |
| 137 ax2.yaxis.set_visible(False) | |
| 138 return fig, ax | |
| 139 | |
| 140 | |
| 141 def main(): | |
| 142 pass | |
| 143 | |
| 144 | |
| 145 if __name__ == "__main__": | |
| 146 main() |
