annotate GenotypeTRcorrection.py @ 14:20dc70f85ff7 draft default tip

Uploaded
author arkarachai-fungtammasan
date Sun, 24 Jul 2016 18:15:32 -0400
parents d5ed5c2e25c3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
1 ### import libraries ###
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
2 import sys
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
3 import collections, math
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
4 import heapq
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
5
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
6
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
7
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
8
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
9
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
10 ### basic function ###
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
11 def stop_err(msg):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
12 sys.stderr.write(msg)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
13 sys.exit()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
14
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
15 def averagelist(a,b,expectedlevelofminor):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
16 product=[]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
17 for i in range(len(a)):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
18 product.append((1-expectedlevelofminor)*a[i]+expectedlevelofminor*b[i])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
19
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
20 return product
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
21
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
22 def complement_base(read):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
23 collect=''
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
24 for i in read:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
25 if i.upper()=='A':
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
26 collect+='T'
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
27 elif i.upper()=='T':
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
28 collect+='A'
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
29 elif i.upper()=='C':
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
30 collect+='G'
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
31 elif i.upper()=='G':
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
32 collect+='C'
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
33 return collect
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
34 def makeallpossible(read):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
35 collect=[]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
36 for i in range(len(read)):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
37 tmp= read[i:]+read[:i]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
38 collect.append(tmp)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
39 collect.append(complement_base(tmp))
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
40 return collect
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
41
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
42 def motifsimplify(base):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
43 '''str--> str
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
44 '''
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
45 motiflength=len(base)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
46 temp=list(set(ALLMOTIF[motiflength]).intersection(set(makeallpossible(base))))
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
47
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
48 return temp[0]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
49
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
50 def majorallele(seq):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
51 binseq=list(set(seq))
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
52 binseq.sort(reverse=True) # highly mutate mode
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
53 #binseq.sort() # majority mode
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
54 storeform=''
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
55 storevalue=0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
56 for i in binseq:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
57 if seq.count(i)>storevalue:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
58 storeform=i
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
59 storevalue=seq.count(i)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
60
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
61 return int(storeform)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
62
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
63 ### decide global parameter ###
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
64 COORDINATECOLUMN=1
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
65 ALLELECOLUMN=2
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
66 MOTIFCOLUMN=3
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
67 ##(0.01-0.5)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
68 MINIMUMMUTABLE=1.2*(1.0/(10**8)) #http://www.ncbi.nlm.nih.gov/pubmed/22914163 Kong et al 2012
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
69
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
70
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
71 ## Fixed global variable
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
72 inputname=sys.argv[1]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
73 errorprofile=sys.argv[2]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
74 Genotypingcorrected=sys.argv[3]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
75 EXPECTEDLEVELOFMINOR=float(sys.argv[4])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
76 if EXPECTEDLEVELOFMINOR >0.5:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
77 try:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
78 expected_contribution_of_minor_allele=int('expected_contribution_of_minor_allele')
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
79 except Exception, eee:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
80 print eee
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
81 stop_err("Expected contribution of minor allele must be at least 0 and not more than 0.5")
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
82 ALLREPEATTYPE=[1,2,3,4]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
83 ALLREPEATTYPENAME=['mono','di','tri','tetra']
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
84 monomotif=['A','C']
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
85 dimotif=['AC','AG','AT','CG']
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
86 trimotif=['AAC','AAG','AAT','ACC','ACG','ACT','AGC','AGG','ATC','CCG']
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
87 tetramotif=['AAAC','AAAG','AAAT','AACC','AACG','AACT','AAGC','AAGG','AAGT','AATC','AATG','AATT',\
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
88 'ACAG','ACAT','ACCC','ACCG','ACCT','ACGC','ACGG','ACGT','ACTC','ACTG','AGAT','AGCC','AGCG','AGCT',\
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
89 'AGGC','AGGG','ATCC','ATCG','ATGC','CCCG','CCGG','AGTC']
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
90 ALLMOTIF={1:monomotif,2:dimotif,3:trimotif,4:tetramotif}
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
91 monorange=range(5,60)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
92 dirange=range(6,60)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
93 trirange=range(9,60)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
94 tetrarange=range(12,80)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
95 ALLRANGE={1:monorange,2:dirange,3:trirange,4:tetrarange}
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
96
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
97 #########################################
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
98 ######## Prob calculation sector ########
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
99 #########################################
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
100 def multinomial_prob(majorallele,STRlength,motif,probdatabase):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
101 '''int,int,str,dict-->int
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
102 ### get prob for each STRlength to be generated from major allele
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
103 '''
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
104 #print (majorallele,STRlength,motif)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
105 prob=probdatabase[len(motif)][motif][majorallele][STRlength]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
106 return prob
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
107
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
108 ################################################
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
109 ######## error model database sector ###########
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
110 ################################################
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
111
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
112 ## structure generator
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
113 errormodeldatabase={1:{},2:{},3:{},4:{}}
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
114 sumbymajoralleledatabase={1:{},2:{},3:{},4:{}}
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
115 for repeattype in ALLREPEATTYPE:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
116 for motif in ALLMOTIF[repeattype]:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
117 errormodeldatabase[repeattype][motif]={}
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
118 sumbymajoralleledatabase[repeattype][motif]={}
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
119 for motifsize1 in ALLRANGE[repeattype]:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
120 errormodeldatabase[repeattype][motif][motifsize1]={}
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
121 sumbymajoralleledatabase[repeattype][motif][motifsize1]=0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
122 for motifsize2 in ALLRANGE[repeattype]:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
123 errormodeldatabase[repeattype][motif][motifsize1][motifsize2]=MINIMUMMUTABLE
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
124
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
125 #print errormodeldatabase
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
126 ## read database
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
127
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
128
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
129 ## get read count for each major allele
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
130 fd=open(errorprofile)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
131 lines=fd.readlines()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
132 for line in lines:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
133 temp=line.strip().split('\t')
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
134 t_major=int(temp[0])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
135 t_count=int(temp[2])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
136 motif=temp[3]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
137 sumbymajoralleledatabase[len(motif)][motif][t_major]+=t_count
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
138 fd.close()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
139 ##print sumbymajoralleledatabase
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
140
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
141 ## get probability
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
142 fd=open(errorprofile)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
143 lines=fd.readlines()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
144 for line in lines:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
145 temp=line.strip().split('\t')
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
146 t_major=int(temp[0])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
147 t_read=int(temp[1])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
148 t_count=int(temp[2])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
149 motif=temp[3]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
150 if sumbymajoralleledatabase[len(motif)][motif][t_major]>0:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
151 errormodeldatabase[len(motif)][motif][t_major][t_read]=t_count/(sumbymajoralleledatabase[len(motif)][motif][t_major]*1.0)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
152 #errormodeldatabase[repeattype][motif][t_major][t_read]=math.log(t_count/(sumbymajorallele[t_major]*1.0))
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
153
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
154 #else:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
155 # errormodeldatabase[repeattype][motif][t_major][t_read]=0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
156 fd.close()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
157
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
158 #########################################
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
159 ######## input reading sector ###########
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
160 #########################################
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
161 fdout=open(Genotypingcorrected,'w')
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
162
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
163 fd = open(inputname)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
164
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
165 lines=fd.xreadlines()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
166 for line in lines:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
167 i_read=[]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
168 i2_read=[]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
169 temp=line.strip().split('\t')
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
170 i_coordinate=temp[COORDINATECOLUMN-1]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
171 i_motif=motifsimplify(temp[MOTIFCOLUMN-1])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
172 i_read=temp[ALLELECOLUMN-1].split(',')
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
173 i_read=map(int,i_read)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
174 coverage=len(i_read)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
175
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
176 ### Evaluate 1 major allele ###
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
177 i_all_allele=list(set(i_read))
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
178 i_major_allele=majorallele(i_read)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
179 f_majorallele=i_read.count(i_major_allele)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
180 ### Evaluate 2 major allele ###
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
181 if len(i_all_allele)>1:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
182 i2_read=filter(lambda a: a != i_major_allele, i_read)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
183 i_major2_allele=majorallele(i2_read)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
184 f_majorallele2=i_read.count(i_major2_allele)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
185 ### Evaluate 3 major allele ###
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
186 if len(i_all_allele)>2:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
187 i3_read=filter(lambda a: a != i_major2_allele, i2_read)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
188 i_major3_allele=majorallele(i3_read)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
189 f_majorallele3=i_read.count(i_major3_allele)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
190 ### No 3 major allele ###
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
191 elif len(i_all_allele)==2:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
192 i_major3_allele=i_major2_allele
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
193 ### No 2 major allele ###
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
194 elif len(i_all_allele)==1:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
195 #i_major2_allele=majorallele(i_read)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
196 i_major2_allele=i_major_allele+len(i_motif)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
197 i_major3_allele=i_major2_allele
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
198 #print line.strip()+'\t'+'\t'.join(['homo','only',str(i_major_allele),str(i_major_allele),'NA'])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
199 #continue
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
200 else:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
201 print("no allele is reading")
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
202 sys.exit()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
203
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
204 ## scope filter
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
205
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
206 #########################################
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents: 0
diff changeset
207 ######## prob calculation sector ########
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
208 #########################################
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
209 homozygous_collector=0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
210 heterozygous_collector=0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
211
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
212
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
213 alist=[multinomial_prob(i_major_allele,x,i_motif,errormodeldatabase)for x in i_read]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
214 blist=[multinomial_prob(i_major2_allele,x,i_motif,errormodeldatabase)for x in i_read]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
215 clist=[multinomial_prob(i_major3_allele,x,i_motif,errormodeldatabase)for x in i_read]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
216
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
217 ablist=averagelist(alist,blist,EXPECTEDLEVELOFMINOR)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
218 bclist=averagelist(blist,clist,EXPECTEDLEVELOFMINOR)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
219 aclist=averagelist(alist,clist,EXPECTEDLEVELOFMINOR)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
220
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
221 #print alist,blist,clist
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
222 majora=sum([math.log(i,10) for i in alist])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
223 majorb=sum([math.log(i,10) for i in blist])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
224 majorc=sum([math.log(i,10) for i in clist])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
225 homozygous_collector=max(majora,majorb,majorc)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
226
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
227 homomajor1=max([(majora,i_major_allele),(majorb,i_major2_allele),(majorc,i_major3_allele)])[1]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
228 homomajordict={i_major_allele:majora,i_major2_allele:majorb,i_major3_allele:majorc}
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
229
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
230 majorab=sum([math.log(i,10) for i in ablist])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
231 majorbc=sum([math.log(i,10) for i in bclist])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
232 majorac=sum([math.log(i,10) for i in aclist])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
233 heterozygous_collector=max(majorab,majorbc,majorac)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
234 bothheteromajor=max([(majorab,(i_major_allele,i_major2_allele)),(majorbc,(i_major2_allele,i_major3_allele)),(majorac,(i_major_allele,i_major3_allele))])[1]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
235 ##heteromajor1=max(bothheteromajor)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
236 ##heteromajor2=min(bothheteromajor)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
237 pre_heteromajor1=bothheteromajor[0]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
238 pre_heteromajor2=bothheteromajor[1]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
239 heteromajor1=max((homomajordict[pre_heteromajor1],pre_heteromajor1),(homomajordict[pre_heteromajor2],pre_heteromajor2))[1]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
240 heteromajor2=min((homomajordict[pre_heteromajor1],pre_heteromajor1),(homomajordict[pre_heteromajor2],pre_heteromajor2))[1]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
241
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
242 logratio_homo=homozygous_collector-heterozygous_collector
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
243
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
244 if logratio_homo>0:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
245 fdout.writelines(line.strip()+'\t'+'\t'.join(['homo',str(logratio_homo),str(homomajor1),str(heteromajor1),str(heteromajor2)])+'\n')
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
246 elif logratio_homo<0:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
247 fdout.writelines(line.strip()+'\t'+'\t'.join(['hetero',str(logratio_homo),str(homomajor1),str(heteromajor1),str(heteromajor2)])+'\n')
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
248 fd.close()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
249 fdout.close()