diff visualization.py @ 0:a5f1638b73be draft

Uploaded
author petr-novak
date Wed, 26 Jun 2019 08:01:42 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/visualization.py	Wed Jun 26 08:01:42 2019 -0400
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+""" visualization module """
+
+import numpy as np
+import configuration
+import matplotlib.pyplot as plt
+import matplotlib.lines as mlines
+
+
+def vis_profrep(seq_ids_all, files_dict, seq_lengths_all, CN, HTML_DATA,
+                seqs_all_part):
+    ''' visualization of repetitive profiles'''
+    graphs_dict = {}
+    seq_id_repeats = []
+    th_length = configuration.SEQ_LEN_VIZ
+    exclude = set(['ALL'])
+    sorted_keys = sorted(set(files_dict.keys()).difference(exclude))
+    sorted_keys.insert(0, "ALL")
+    plot_num = 0
+    seqs_long = []
+    seqs_count = 1
+    seqs_max_limit = []
+    for repeat in sorted_keys:
+        with open(files_dict[repeat][0], "r") as repeat_f:
+            positions_all = []
+            hits_all = []
+            include = True
+            first_line = repeat_f.readline()
+            seq_id_repeat = first_line.rstrip().split("chrom=")[1]
+            seq_len_repeat = seq_lengths_all[seq_ids_all.index(seq_id_repeat)]
+            if seq_id_repeat not in graphs_dict.keys():
+                if seq_len_repeat > th_length:
+                    if seq_id_repeat not in seqs_long:
+                        seqs_long.append(seq_id_repeat)
+                    include = False
+                else:
+                    [fig, ax] = plot_figure(seq_id_repeat, seq_len_repeat, CN)
+                    graphs_dict[seq_id_repeat] = [fig, ax]
+            seq_id_repeats.append(seq_id_repeat)
+            for line in repeat_f:
+                if "chrom" in line:
+                    seqs_count += 1
+                    if include:
+                        graphs_dict = plot_profile(
+                            graphs_dict, seq_id_repeats[-1], positions_all,
+                            hits_all, repeat, plot_num)
+                        positions_all = []
+                        hits_all = []
+                    seq_id_repeat = line.rstrip().split("chrom=")[1]
+                    seq_len_repeat = seq_lengths_all[seq_ids_all.index(
+                        seq_id_repeat)]
+                    if seq_id_repeat not in graphs_dict.keys():
+                        if seq_len_repeat > th_length:
+                            if seq_id_repeat not in seqs_long:
+                                seqs_long.append(seq_id_repeat)
+                            include = False
+                        else:
+                            [fig, ax] = plot_figure(seq_id_repeat,
+                                                    seq_len_repeat, CN)
+                            graphs_dict[seq_id_repeat] = [fig, ax]
+                    seq_id_repeats.append(seq_id_repeat)
+                    if seq_id_repeat not in seqs_all_part:
+                        break
+                else:
+                    if include:
+                        positions_all.append(line.rstrip().split("\t")[0])
+                        hits_all.append(line.rstrip().split("\t")[1])
+        if include:
+            graphs_dict = plot_profile(graphs_dict, seq_id_repeats[-1],
+                                       positions_all, hits_all, repeat,
+                                       plot_num)
+            seq_id_repeats.append(seq_id_repeat)
+            positions_all = []
+            hits_all = []
+        plot_num += 1
+    return graphs_dict, seqs_long
+
+
+def plot_figure(seq_id, seq_length, CN):
+    fig = plt.figure(figsize=(18, 8))
+    ax = fig.add_subplot(111)
+    ax.set_xlabel('sequence bp')
+    if CN:
+        ax.set_ylabel('copy numbers')
+    else:
+        ax.set_ylabel('hits')
+    ax.set_title(seq_id)
+    plt.xlim([0, seq_length])
+    return fig, ax
+
+
+def plot_profile(graphs_dict, seq_id_repeat, positions_all, hits_all, repeat,
+                 plot_num):
+    if "|" in repeat:
+        graphs_dict[seq_id_repeat][1].plot(
+            positions_all,
+            hits_all,
+            label="|".join(repeat.split("|")[-2:]),
+            color=configuration.COLORS_HEX[plot_num])
+    else:
+        graphs_dict[seq_id_repeat][1].plot(
+            positions_all,
+            hits_all,
+            label=repeat,
+            color=configuration.COLORS_HEX[plot_num])
+    return graphs_dict
+
+
+def vis_domains(fig, ax, seq_id, xminimal, xmaximal, domains):
+    ''' visualization of protein domains'''
+    y_upper_lim = ax.get_ylim()[1]
+    dom_uniq = list(set(domains))
+    colors = [configuration.COLORS_HEX[dom_uniq.index(domain)]
+              for domain in domains]
+    colors_dom = [
+        list(reversed(configuration.COLORS_HEX))[dom_uniq.index(domain)]
+        for domain in domains
+    ]
+    colors_legend = list(reversed(configuration.COLORS_HEX))[0:len(dom_uniq)]
+    ax.hlines([y_upper_lim + y_upper_lim / 10] * len(xminimal),
+              xminimal,
+              xmaximal,
+              color=colors_dom,
+              lw=2,
+              label=dom_uniq)
+    lines_legend = []
+    ax2 = ax.twinx()  # add second axis for domains
+    for count_uniq in list(range(len(dom_uniq))):
+        lines_legend.append(mlines.Line2D([], [],
+                                          color=colors_legend[count_uniq],
+                                          markersize=15,
+                                          label=dom_uniq[count_uniq]))
+    ax2.legend(lines_legend, [line.get_label() for line in lines_legend],
+               bbox_to_anchor=(1.05, 1),
+               loc='upper left',
+               borderaxespad=0.)
+    ax2.yaxis.set_visible(False)
+    return fig, ax
+
+
+def main():
+    pass
+
+
+if __name__ == "__main__":
+    main()