Mercurial > repos > miller-lab > genome_diversity
view dpmix_plot.py @ 32:03c22b722882
remove BeautifulSoup dependency
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 20 Sep 2013 13:54:23 -0400 |
parents | a631c2f6d913 |
children |
line wrap: on
line source
#!/usr/bin/env python import os import sys import math import matplotlib as mpl mpl.use('PDF') from matplotlib.backends.backend_pdf import PdfPages import matplotlib.pyplot as plt from matplotlib.path import Path import matplotlib.patches as patches ################################################################################ def build_chrom_len_dict(dbkey, galaxy_data_index_dir): chrom_len_root = os.path.join(galaxy_data_index_dir, 'shared/ucsc/chrom') chrom_len_file = '{0}.len'.format(dbkey) chrom_len_path = os.path.join(chrom_len_root, chrom_len_file) chrom_len = {} try: with open(chrom_len_path) as fh: for line in fh: line = line.rstrip('\r\n') elems = line.split() if len(elems) == 2: chrom = elems[0] length = int(elems[1]) chrom_len[chrom] = length except: pass return chrom_len def parse_input_file(input_file): chroms = [] individuals = [] data = {} chrom_len = {} used_states = [] with open(input_file) as fh: for line in fh: line = line.strip() if line: elems = line.split() chrom = elems[0] p1, p2, state = map(int, elems[1:4]) id = elems[4] if state not in used_states: used_states.append(state) if chrom not in chroms: chroms.append(chrom) if id not in individuals: individuals.append(id) data.setdefault(chrom, {}) data[chrom].setdefault(id, []) data[chrom][id].append((p1, p2, state)) if p2 > chrom_len.setdefault(chrom, 0): chrom_len[chrom] = p2 return chroms, individuals, data, chrom_len, used_states def check_chroms(chroms, chrom_len, dbkey): error = 0 for chrom in chroms: if chrom not in chrom_len: print >> sys.stderr, "Can't find length for {0} chromosome {1}".format(dbkey, chrom) error = 1 if error: sys.exit(1) def check_data(data, chrom_len, dbkey): error = 0 for chrom in data: chrom_beg = 0 chrom_end = chrom_len[chrom] for individual in data[chrom]: for p1, p2, state in data[chrom][individual]: if p1 >= p2: print >> sys.stderr, "Bad data line: begin >= end: {0} {1} {2} {3}".format(chrom, p1, p2, state, individual) error = 1 if p1 < chrom_beg or p2 > chrom_end: print >> sys.stderr, "Bad data line: outside {0} boundaries[{1} - {2}]: {3} {4} {5} {6}".format(dbkey, chrom_beg, chrom_end, chrom, p1, p2, state, individual) error = 1 if error: sys.exit(1) def make_rectangle(p1, p2, color, bottom=0.0, top=1.0): verts = [ (p1, bottom), # left, bottom (p1, top), # left, top (p2, top), # right, top (p2, bottom), # right, bottom (0.0, 0.0) # ignored ] codes = [ Path.MOVETO, Path.LINETO, Path.LINETO, Path.LINETO, Path.CLOSEPOLY ] path = Path(verts, codes) return patches.PathPatch(path, facecolor=color, lw=0) def make_split_rectangle(p1, p2, top_color, bottom_color): patch1 = make_rectangle(p1, p2, bottom_color, top=0.5) patch2 = make_rectangle(p1, p2, top_color, bottom=0.5) return [patch1, patch2] def make_state_rectangle_2pop(p1, p2, state, chrom, individual): p1_color = 'r' p2_color = 'g' heterochromatin_color = '#c7c7c7' if state == 0: return [ make_rectangle(p1, p2, heterochromatin_color) ] elif state == 1: return [ make_rectangle(p1, p2, p1_color) ] elif state == 2: return [ make_rectangle(p1, p2, p2_color) ] elif state == 3: return make_split_rectangle(p1, p2, p1_color, p2_color) else: print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual) sys.exit(1) def make_state_rectangle_3pop(p1, p2, state, chrom, individual): p1_color = 'r' p2_color = 'g' p3_color = 'b' heterochromatin_color = '#c7c7c7' if state == 0: return [ make_rectangle(p1, p2, heterochromatin_color) ] if state == 1: return [ make_rectangle(p1, p2, p1_color) ] if state == 2: return [ make_rectangle(p1, p2, p2_color) ] if state == 3: return [ make_rectangle(p1, p2, p3_color) ] if state == 4: return make_split_rectangle(p1, p2, p1_color, p2_color) if state == 5: return make_split_rectangle(p1, p2, p1_color, p3_color) if state == 6: return make_split_rectangle(p1, p2, p2_color, p3_color) else: print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual) sys.exit(1) def nicenum(num, round=False): if num == 0: return 0.0 exp = int(math.floor(math.log10(num))) f = num / math.pow(10, exp) if round: if f < 1.5: nf = 1.0 elif f < 3.0: nf = 2.0 elif f < 7.0: nf = 5.0 else: nf = 10.0 else: if f <= 1.0: nf = 1.0 elif f <= 2.0: nf = 2.0 elif f <= 5.0: nf = 5.0 else: nf = 10.0 return nf * pow(10, exp) def tick_foo(beg, end, loose=False): ntick = 10 range = nicenum(end - beg, round=False) d = nicenum(range/(ntick - 1), round=True) digits = int(math.floor(math.log10(d))) if loose: graph_min = math.floor(beg/d) * d graph_max = math.ceil(end/d) * d else: graph_min = beg graph_max = end nfrac = max([-1 * digits, 0]) vals = [] stop = graph_max if loose: stop = graph_max + (0.5 * d) x = graph_min while x <= stop: vals.append(int(x)) x += d vals = vals[1:] # if not loose: # if vals[-1] < graph_max: # vals.append(int(graph_max)) labels = [] for val in vals: labels.append('{0}'.format(int(val/math.pow(10, digits)))) # labels.append('{0:.1f}'.format(vals[-1]/math.pow(10, digits))) return vals, labels ################################################################################ ################################################################################ ################################################################################ ################################################################################ def space_for_legend(plot_params): space = 0.0 legend_states = plot_params['legend_states'] if legend_states: ind_space = plot_params['ind_space'] ind_height = plot_params['ind_height'] space += len(legend_states) * (ind_space + ind_height) - ind_space return space ################################################################################ def space_for_chroms(plot_params, chroms, individuals, data): space_dict = {} chrom_height = plot_params['chrom_height'] ind_space = plot_params['ind_space'] ind_height = plot_params['ind_height'] for chrom in chroms: space_dict[chrom] = chrom_height individual_count = 0 for individual in individuals: if individual in data[chrom]: individual_count += 1 space_dict[chrom] += individual_count * (ind_space + ind_height) return space_dict ################################################################################ def make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir, state2name=None, populations=3): fs_chrom_len = build_chrom_len_dict(input_dbkey, galaxy_data_index_dir) chroms, individuals, data, chrom_len, used_states = parse_input_file(input_file) ## populate chrom_len for chrom in chrom_len.keys(): if chrom in fs_chrom_len: chrom_len[chrom] = fs_chrom_len[chrom] #check_chroms(chroms, chrom_len, input_dbkey) check_data(data, chrom_len, input_dbkey) ## plot parameters plot_params = { 'plot_dpi': 300, 'page_width': 8.50, 'page_height': 11.00, 'top_margin': 0.10, 'bottom_margin': 0.10, 'chrom_space': 0.25, 'chrom_height': 0.25, 'ind_space': 0.10, 'ind_height': 0.25, 'legend_space': 0.10 } ## in the legend, only print out states that are ## 1) in the data ## - AND - ## 2) in the state2name map legend_states = [] if state2name is not None: for state in used_states: if state in state2name: legend_states.append(state) plot_params['legend_states'] = legend_states ## choose the correct make_state_rectangle method if populations == 3: plot_params['rectangle_method'] = make_state_rectangle_3pop elif populations == 2: plot_params['rectangle_method'] = make_state_rectangle_2pop pdf_pages = PdfPages(output_file) ## generate a list of chroms for each page needed_for_legend = space_for_legend(plot_params) needed_for_chroms = space_for_chroms(plot_params, chroms, individuals, data) chrom_space_per_page = plot_params['page_height'] chrom_space_per_page -= plot_params['top_margin'] + plot_params['bottom_margin'] chrom_space_per_page -= needed_for_legend + plot_params['legend_space'] chrom_space_per_page -= plot_params['chrom_space'] chroms_left = chroms[:] pages = [] space_left = chrom_space_per_page chrom_list = [] while chroms_left: chrom = chroms_left.pop(0) space_needed = needed_for_chroms[chrom] + plot_params['chrom_space'] if (space_needed > chrom_space_per_page): print >> sys.stderr, 'Multipage chroms not yet supported' sys.exit(1) ## sometimes 1.9 - 1.9 < 0 (-4.4408920985e-16) ## so, we make sure it's not more than a millimeter over if space_left - space_needed > -0.04: chrom_list.append(chrom) space_left -= space_needed else: pages.append(chrom_list[:]) chrom_list = [] chroms_left.insert(0, chrom) space_left = chrom_space_per_page ############################################################################ plot_dpi = plot_params['plot_dpi'] page_width = plot_params['page_width'] page_height = plot_params['page_height'] top_margin = plot_params['top_margin'] ind_space = plot_params['ind_space'] ind_height = plot_params['ind_height'] make_state_rectangle = plot_params['rectangle_method'] legend_space = plot_params['legend_space'] chrom_space = plot_params['chrom_space'] chrom_height = plot_params['chrom_height'] for page in pages: fig = plt.figure(figsize=(page_width, page_height), dpi=plot_dpi) bottom = 1.0 - (top_margin/page_height) # print legend if legend_states: top = True for state in sorted(legend_states): if top: bottom -= ind_height/page_height top = False else: bottom -= (ind_space + ind_height)/page_height ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/page_height]) plt.axis('off') ax1.set_xlim(0, 1) ax1.set_ylim(0, 1) for patch in make_state_rectangle(0, 1, state, 'legend', state2name[state]): ax1.add_patch(patch) ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/page_height], frame_on=False) plt.axis('off') plt.text(0.0, 0.5, state2name[state], fontsize=10, ha='left', va='center') bottom -= legend_space/page_height # print chroms top = True for chrom in page: length = chrom_len[chrom] vals, labels = tick_foo(0, length) if top: bottom -= chrom_height/page_height top = False else: bottom -= (chrom_space + chrom_height)/page_height ax = fig.add_axes([0.0, bottom, 1.0, chrom_height/page_height]) plt.axis('off') plt.text(0.5, 0.5, chrom, fontsize=14, ha='center') individual_count = 0 for individual in individuals: if individual in data[chrom]: individual_count += 1 i = 0 for individual in individuals: if individual in data[chrom]: i += 1 bottom -= (ind_space + ind_height)/page_height ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/page_height]) plt.axis('off') plt.text(1.0, 0.5, individual, fontsize=10, ha='right', va='center') ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/page_height], frame_on=False) ax2.set_xlim(0, length) ax2.set_ylim(0, 1) if i != individual_count: plt.axis('off') else: ax2.tick_params(top=False, left=False, right=False, labelleft=False) ax2.set_xticks(vals) ax2.set_xticklabels(labels) for p1, p2, state in sorted(data[chrom][individual]): for patch in make_state_rectangle(p1, p2, state, chrom, individual): ax2.add_patch(patch) # extend last state to end of chrom if p2 < length: for patch in make_state_rectangle(p2, length, state, chrom, individual): ax2.add_patch(patch) pdf_pages.savefig(fig) plt.close(fig) pdf_pages.close() ################################################################################ if __name__ == '__main__': make_dpmix_plot('loxAfr3', 'output.dat', 'output2_files/picture.pdf', '/scratch/galaxy/home/oocyte/galaxy_oocyte/tool-data', state2name={0: 'heterochromatin', 1: 'reference', 2: 'asian'}, populations=2) # input_dbkey, input_file, output_file, galaxy_data_index_dir = sys.argv[1:5] # make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir) sys.exit(0) ## notes # 1) pass in a state to name mapping # 2) only print out names for states which exist in the data, and are in the state to name mapping