Mercurial > repos > petr-novak > profrep
view visualization.py @ 0:a5f1638b73be draft
Uploaded
author | petr-novak |
---|---|
date | Wed, 26 Jun 2019 08:01:42 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python3 """ visualization module """ import numpy as np import configuration import matplotlib.pyplot as plt import matplotlib.lines as mlines def vis_profrep(seq_ids_all, files_dict, seq_lengths_all, CN, HTML_DATA, seqs_all_part): ''' visualization of repetitive profiles''' graphs_dict = {} seq_id_repeats = [] th_length = configuration.SEQ_LEN_VIZ exclude = set(['ALL']) sorted_keys = sorted(set(files_dict.keys()).difference(exclude)) sorted_keys.insert(0, "ALL") plot_num = 0 seqs_long = [] seqs_count = 1 seqs_max_limit = [] for repeat in sorted_keys: with open(files_dict[repeat][0], "r") as repeat_f: positions_all = [] hits_all = [] include = True first_line = repeat_f.readline() seq_id_repeat = first_line.rstrip().split("chrom=")[1] seq_len_repeat = seq_lengths_all[seq_ids_all.index(seq_id_repeat)] if seq_id_repeat not in graphs_dict.keys(): if seq_len_repeat > th_length: if seq_id_repeat not in seqs_long: seqs_long.append(seq_id_repeat) include = False else: [fig, ax] = plot_figure(seq_id_repeat, seq_len_repeat, CN) graphs_dict[seq_id_repeat] = [fig, ax] seq_id_repeats.append(seq_id_repeat) for line in repeat_f: if "chrom" in line: seqs_count += 1 if include: graphs_dict = plot_profile( graphs_dict, seq_id_repeats[-1], positions_all, hits_all, repeat, plot_num) positions_all = [] hits_all = [] seq_id_repeat = line.rstrip().split("chrom=")[1] seq_len_repeat = seq_lengths_all[seq_ids_all.index( seq_id_repeat)] if seq_id_repeat not in graphs_dict.keys(): if seq_len_repeat > th_length: if seq_id_repeat not in seqs_long: seqs_long.append(seq_id_repeat) include = False else: [fig, ax] = plot_figure(seq_id_repeat, seq_len_repeat, CN) graphs_dict[seq_id_repeat] = [fig, ax] seq_id_repeats.append(seq_id_repeat) if seq_id_repeat not in seqs_all_part: break else: if include: positions_all.append(line.rstrip().split("\t")[0]) hits_all.append(line.rstrip().split("\t")[1]) if include: graphs_dict = plot_profile(graphs_dict, seq_id_repeats[-1], positions_all, hits_all, repeat, plot_num) seq_id_repeats.append(seq_id_repeat) positions_all = [] hits_all = [] plot_num += 1 return graphs_dict, seqs_long def plot_figure(seq_id, seq_length, CN): fig = plt.figure(figsize=(18, 8)) ax = fig.add_subplot(111) ax.set_xlabel('sequence bp') if CN: ax.set_ylabel('copy numbers') else: ax.set_ylabel('hits') ax.set_title(seq_id) plt.xlim([0, seq_length]) return fig, ax def plot_profile(graphs_dict, seq_id_repeat, positions_all, hits_all, repeat, plot_num): if "|" in repeat: graphs_dict[seq_id_repeat][1].plot( positions_all, hits_all, label="|".join(repeat.split("|")[-2:]), color=configuration.COLORS_HEX[plot_num]) else: graphs_dict[seq_id_repeat][1].plot( positions_all, hits_all, label=repeat, color=configuration.COLORS_HEX[plot_num]) return graphs_dict def vis_domains(fig, ax, seq_id, xminimal, xmaximal, domains): ''' visualization of protein domains''' y_upper_lim = ax.get_ylim()[1] dom_uniq = list(set(domains)) colors = [configuration.COLORS_HEX[dom_uniq.index(domain)] for domain in domains] colors_dom = [ list(reversed(configuration.COLORS_HEX))[dom_uniq.index(domain)] for domain in domains ] colors_legend = list(reversed(configuration.COLORS_HEX))[0:len(dom_uniq)] ax.hlines([y_upper_lim + y_upper_lim / 10] * len(xminimal), xminimal, xmaximal, color=colors_dom, lw=2, label=dom_uniq) lines_legend = [] ax2 = ax.twinx() # add second axis for domains for count_uniq in list(range(len(dom_uniq))): lines_legend.append(mlines.Line2D([], [], color=colors_legend[count_uniq], markersize=15, label=dom_uniq[count_uniq])) ax2.legend(lines_legend, [line.get_label() for line in lines_legend], bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.) ax2.yaxis.set_visible(False) return fig, ax def main(): pass if __name__ == "__main__": main()