Mercurial > repos > thondeboer > neat_genreads
annotate utilities/plotMutModel.py @ 2:8a739c944dbf draft
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author | thondeboer |
---|---|
date | Tue, 15 May 2018 16:22:08 -0400 |
parents | 6e75a84e9338 |
children |
rev | line source |
---|---|
0
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
1 #!/usr/bin/env python |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
2 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
3 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
4 # a quick script for comparing mutation models |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
5 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
6 # python plotMutModel.py -i model1.p [model2.p] [model3.p]... -l legend_label1 [legend_label2] [legend_label3]... -o path/to/pdf_plot_prefix |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
7 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
8 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
9 import sys |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
10 import pickle |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
11 import bisect |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
12 import numpy as np |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
13 import matplotlib.pyplot as mpl |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
14 import matplotlib.colors as colors |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
15 import matplotlib.cm as cmx |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
16 import argparse |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
17 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
18 #mpl.rc('text',usetex=True) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
19 #mpl.rcParams['text.latex.preamble']=[r"\usepackage{amsmath}"] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
20 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
21 parser = argparse.ArgumentParser(description='Plot and compare mutation models from genMutModel.py Usage: python plotMutModel.py -i model1.p [model2.p] [model3.p]... -l legend_label1 [legend_label2] [legend_label3]... -o path/to/pdf_plot_prefix') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
22 parser.add_argument('-i', type=str, required=True, metavar='<str>', nargs='+', help="* mutation_model_1.p [mutation_model_2.p] [mutation_model_3] ...") |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
23 parser.add_argument('-l', type=str, required=True, metavar='<str>', nargs='+', help="* legend labels: model1_name [model2_name] [model3_name]...") |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
24 parser.add_argument('-o', type=str, required=True, metavar='<str>', help="* output pdf prefix") |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
25 args = parser.parse_args() |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
26 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
27 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
28 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
29 def getColor(i,N,colormap='jet'): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
30 cm = mpl.get_cmap(colormap) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
31 cNorm = colors.Normalize(vmin=0, vmax=N+1) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
32 scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
33 colorVal = scalarMap.to_rgba(i) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
34 return colorVal |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
35 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
36 def isInBed(track,ind): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
37 if ind in track: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
38 return True |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
39 elif bisect.bisect(track,ind)%1 == 1: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
40 return True |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
41 else: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
42 return False |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
43 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
44 def getBedOverlap(track,ind_s,ind_e): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
45 if ind_s in track: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
46 myInd = track.index(ind_s) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
47 return min([track[myInd+1]-ind_s+1,ind_e-ind_s+1]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
48 else: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
49 myInd = bisect.bisect(track,ind_s) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
50 if myInd%1 and myInd < len(track)-1: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
51 return min([track[myInd+1]-ind_s+1,ind_e-ind_s+1]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
52 return 0 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
53 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
54 # a waaaaaaay slower version of the above function ^^ |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
55 #def getTrackOverlap(track1,track2): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
56 # otrack = [0 for n in xrange(max(track1+track2)+1)] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
57 # for i in xrange(0,len(track1),2): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
58 # for j in xrange(track1[i],track1[i+1]+1): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
59 # otrack[j] = 1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
60 # ocount = 0 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
61 # for i in xrange(0,len(track2),2): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
62 # for j in xrange(track2[i],track2[i+1]+1): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
63 # if otrack[j]: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
64 # ocount += 1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
65 # return ocount |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
66 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
67 OUP = args.o |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
68 LAB = args.l |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
69 #print LAB |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
70 INP = args.i |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
71 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
72 N_FILES = len(INP) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
73 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
74 mpl.rcParams.update({'font.size': 13, 'font.weight':'bold', 'lines.linewidth': 3}) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
75 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
76 ################################################# |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
77 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
78 # BASIC STATS |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
79 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
80 ################################################# |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
81 mpl.figure(0,figsize=(12,10)) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
82 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
83 mpl.subplot(2,2,1) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
84 colorInd = 0 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
85 for fn in INP: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
86 myCol = getColor(colorInd,N_FILES) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
87 colorInd += 1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
88 DATA_DICT = pickle.load( open( fn, "rb" ) ) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
89 [AVG_MUT_RATE, SNP_FREQ, INDEL_FREQ] = [DATA_DICT['AVG_MUT_RATE'], DATA_DICT['SNP_FREQ'], DATA_DICT['INDEL_FREQ']] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
90 mpl.bar([colorInd-1],[AVG_MUT_RATE],1.,color=myCol) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
91 mpl.xlim([-1,N_FILES+1]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
92 mpl.grid() |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
93 mpl.xticks([],[]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
94 mpl.ylabel('Frequency') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
95 mpl.title('Overall mutation rate (1/bp)') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
96 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
97 mpl.subplot(2,2,2) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
98 colorInd = 0 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
99 for fn in INP: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
100 myCol = getColor(colorInd,N_FILES) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
101 colorInd += 1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
102 DATA_DICT = pickle.load( open( fn, "rb" ) ) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
103 [AVG_MUT_RATE, SNP_FREQ, INDEL_FREQ] = [DATA_DICT['AVG_MUT_RATE'], DATA_DICT['SNP_FREQ'], DATA_DICT['INDEL_FREQ']] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
104 mpl.bar([colorInd-1],[SNP_FREQ],1.,color=myCol) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
105 mpl.bar([colorInd-1],[1.-SNP_FREQ],1.,color=myCol,bottom=[SNP_FREQ],hatch='/') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
106 mpl.axis([-1,N_FILES+1,0,1.2]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
107 mpl.grid() |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
108 mpl.xticks([],[]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
109 mpl.yticks([0,.2,.4,.6,.8,1.],[0,0.2,0.4,0.6,0.8,1.0]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
110 mpl.ylabel('Frequency') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
111 mpl.title('SNP freq [ ] & indel freq [//]') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
112 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
113 mpl.subplot(2,1,2) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
114 colorInd = 0 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
115 legText = LAB |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
116 for fn in INP: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
117 myCol = getColor(colorInd,N_FILES) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
118 colorInd += 1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
119 DATA_DICT = pickle.load( open( fn, "rb" ) ) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
120 [AVG_MUT_RATE, SNP_FREQ, INDEL_FREQ] = [DATA_DICT['AVG_MUT_RATE'], DATA_DICT['SNP_FREQ'], DATA_DICT['INDEL_FREQ']] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
121 x = sorted(INDEL_FREQ.keys()) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
122 y = [INDEL_FREQ[n] for n in x] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
123 mpl.plot(x,y,color=myCol) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
124 #legText.append(fn) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
125 mpl.grid() |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
126 mpl.xlabel('Indel size (bp)', fontweight='bold') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
127 mpl.ylabel('Frequency') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
128 mpl.title('Indel frequency by size (- deletion, + insertion)') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
129 mpl.legend(legText) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
130 #mpl.show() |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
131 mpl.savefig(OUP+'_plot1_mutRates.pdf') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
132 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
133 ################################################# |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
134 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
135 # TRINUC PRIOR PROB |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
136 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
137 ################################################# |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
138 mpl.figure(1,figsize=(14,6)) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
139 colorInd = 0 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
140 legText = LAB |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
141 for fn in INP: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
142 myCol = getColor(colorInd,N_FILES) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
143 colorInd += 1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
144 DATA_DICT = pickle.load( open( fn, "rb" ) ) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
145 TRINUC_MUT_PROB = DATA_DICT['TRINUC_MUT_PROB'] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
146 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
147 x = range(colorInd-1,len(TRINUC_MUT_PROB)*N_FILES,N_FILES) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
148 xt = sorted(TRINUC_MUT_PROB.keys()) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
149 y = [TRINUC_MUT_PROB[k] for k in xt] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
150 markerline, stemlines, baseline = mpl.stem(x,y,'-.') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
151 mpl.setp(markerline, 'markerfacecolor', myCol) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
152 mpl.setp(markerline, 'markeredgecolor', myCol) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
153 mpl.setp(baseline, 'color', myCol, 'linewidth', 0) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
154 mpl.setp(stemlines, 'color', myCol, 'linewidth', 3) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
155 if colorInd == 1: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
156 mpl.xticks(x,xt,rotation=90) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
157 #legText.append(fn) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
158 mpl.grid() |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
159 mpl.ylabel('p(trinucleotide mutates)') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
160 mpl.legend(legText) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
161 #mpl.show() |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
162 mpl.savefig(OUP+'_plot2_trinucPriors.pdf') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
163 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
164 ################################################# |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
165 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
166 # TRINUC TRANS PROB |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
167 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
168 ################################################# |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
169 plotNum = 3 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
170 for fn in INP: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
171 fig = mpl.figure(plotNum,figsize=(12,10)) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
172 DATA_DICT = pickle.load( open( fn, "rb" ) ) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
173 TRINUC_TRANS_PROBS = DATA_DICT['TRINUC_TRANS_PROBS'] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
174 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
175 xt2 = [m[3] for m in sorted([(n[0],n[2],n[1],n) for n in xt])] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
176 reverse_dict = {xt2[i]:i for i in xrange(len(xt2))} |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
177 Z = np.zeros((64,64)) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
178 L = [['' for n in xrange(64)] for m in xrange(64)] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
179 for k in TRINUC_TRANS_PROBS: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
180 i = reverse_dict[k[0]] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
181 j = reverse_dict[k[1]] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
182 Z[i][j] = TRINUC_TRANS_PROBS[k] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
183 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
184 HARDCODED_LABEL = ['A_A','A_C','A_G','A_T', |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
185 'C_A','C_C','C_G','C_T', |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
186 'G_A','G_C','G_G','G_T', |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
187 'T_A','T_C','T_G','T_T'] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
188 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
189 for pi in xrange(16): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
190 mpl.subplot(4,4,pi+1) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
191 Z2 = Z[pi*4:(pi+1)*4,pi*4:(pi+1)*4] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
192 X, Y = np.meshgrid( range(0,len(Z2[0])+1), range(0,len(Z2)+1) ) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
193 im = mpl.pcolormesh(X,Y,Z2[::-1,:],vmin=0.0,vmax=0.5) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
194 mpl.axis([0,4,0,4]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
195 mpl.xticks([0.5,1.5,2.5,3.5],['A','C','G','T']) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
196 mpl.yticks([0.5,1.5,2.5,3.5],['T','G','C','A']) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
197 mpl.text(1.6, 1.8, HARDCODED_LABEL[pi], color='white') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
198 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
199 # colorbar haxx |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
200 fig.subplots_adjust(right=0.8) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
201 cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
202 cb = fig.colorbar(im,cax=cbar_ax) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
203 cb.set_label(r"p(X$Y_1$Z->X$Y_2$Z | X_Z mutates)") |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
204 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
205 #mpl.tight_layout() |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
206 #mpl.figtext(0.24,0.94,'Trinucleotide Mutation Frequency',size=20) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
207 #mpl.show() |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
208 mpl.savefig(OUP+'_plot'+str(plotNum)+'_trinucTrans.pdf') |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
209 plotNum += 1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
210 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
211 ################################################# |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
212 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
213 # HIGH MUT REGIONS |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
214 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
215 ################################################# |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
216 track_byFile_byChr = [{} for n in INP] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
217 bp_total_byFile = [0 for n in INP] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
218 colorInd = 0 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
219 for fn in INP: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
220 DATA_DICT = pickle.load( open( fn, "rb" ) ) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
221 HIGH_MUT_REGIONS = DATA_DICT['HIGH_MUT_REGIONS'] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
222 for region in HIGH_MUT_REGIONS: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
223 if region[0] not in track_byFile_byChr[colorInd]: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
224 track_byFile_byChr[colorInd][region[0]] = [] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
225 track_byFile_byChr[colorInd][region[0]].extend([region[1],region[2]]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
226 bp_total_byFile[colorInd] += region[2]-region[1]+1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
227 colorInd += 1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
228 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
229 bp_overlap_count = [[0 for m in INP] for n in INP] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
230 for i in xrange(N_FILES): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
231 bp_overlap_count[i][i] = bp_total_byFile[i] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
232 for j in xrange(i+1,N_FILES): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
233 for k in track_byFile_byChr[i].keys(): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
234 if k in track_byFile_byChr[j]: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
235 for ii in xrange(len(track_byFile_byChr[i][k][::2])): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
236 bp_overlap_count[i][j] += getBedOverlap(track_byFile_byChr[j][k],track_byFile_byChr[i][k][ii*2],track_byFile_byChr[i][k][ii*2+1]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
237 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
238 print '' |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
239 print 'HIGH_MUT_REGION OVERLAP BETWEEN '+str(N_FILES)+' MODELS...' |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
240 for i in xrange(N_FILES): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
241 for j in xrange(i,N_FILES): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
242 nDissimilar = (bp_overlap_count[i][i]-bp_overlap_count[i][j]) + (bp_overlap_count[j][j]-bp_overlap_count[i][j]) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
243 if bp_overlap_count[i][j] == 0: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
244 percentageV = 0.0 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
245 else: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
246 percentageV = bp_overlap_count[i][j]/float(bp_overlap_count[i][j]+nDissimilar) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
247 print 'overlap['+str(i)+','+str(j)+'] = '+str(bp_overlap_count[i][j])+' bp ({0:.3f}%)'.format(percentageV*100.) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
248 print '' |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
249 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
250 ################################################# |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
251 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
252 # COMMON VARIANTS |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
253 # |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
254 ################################################# |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
255 setofVars = [set([]) for n in INP] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
256 colorInd = 0 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
257 for fn in INP: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
258 DATA_DICT = pickle.load( open( fn, "rb" ) ) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
259 COMMON_VARIANTS = DATA_DICT['COMMON_VARIANTS'] |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
260 for n in COMMON_VARIANTS: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
261 setofVars[colorInd].add(n) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
262 colorInd += 1 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
263 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
264 print '' |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
265 print 'COMMON_VARIANTS OVERLAP BETWEEN '+str(N_FILES)+' MODELS...' |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
266 for i in xrange(N_FILES): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
267 for j in xrange(i,N_FILES): |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
268 overlapCount = len(setofVars[i].intersection(setofVars[j])) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
269 nDissimilar = (len(setofVars[i])-overlapCount) + (len(setofVars[j])-overlapCount) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
270 if overlapCount == 0: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
271 percentageV = 0.0 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
272 else: |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
273 percentageV = overlapCount/float(overlapCount+nDissimilar) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
274 print 'overlap['+str(i)+','+str(j)+'] = '+str(overlapCount)+' variants ({0:.3f}%)'.format(percentageV*100.) |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
275 print '' |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
276 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
277 |
6e75a84e9338
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
thondeboer
parents:
diff
changeset
|
278 |