0
|
1
|
|
2 class Junction(object):
|
|
3 def __init__(self,init_str):
|
|
4 'init_str looks like PCBP2:12:53873398_GFAP:17:42985517'
|
|
5 part1,part2=init_str.split('_')
|
|
6 info1,info2=part1.split(':'), part2.split(':')
|
|
7 self.gene1=info1[0]
|
|
8 self.end1_chr=info1[1]
|
|
9 self.end1_pos=int(info1[2])
|
|
10 self.gene2=info2[0]
|
|
11 self.end2_chr=info2[1]
|
|
12 self.end2_pos=int(info2[2])
|
|
13 self.name='%s.%s.%s.%s.%s.%s'%(self.gene1,self.end1_chr,self.end1_pos,self.gene2,self.end2_chr,self.end2_pos)
|
|
14 def distance(self):
|
|
15 betwn_junc_dist=None
|
|
16 if self.end1_chr==self.end2_chr:
|
|
17 betwn_junc_dist=abs(int(self.end1_pos) - int(self.end2_pos))
|
|
18 return betwn_junc_dist
|
|
19 def junc_category(self):
|
|
20 cat=None
|
|
21 if self.end1_chr==self.end2_chr:
|
|
22 cat='intrachromosome'
|
|
23 else:
|
|
24 cat='interchromosome'
|
|
25 return cat
|
|
26
|
|
27 class JSR(object):
|
|
28 '''Ideally it should extend pysam.AlignedRead class, but run into segment error.
|
|
29 Read is pysam.AlignedRead object'''
|
|
30 def __init__(self,read,junction):
|
|
31 self.read=read
|
|
32 self.junction=junction
|
|
33
|
|
34 class GeneFusion(object):
|
|
35 '''discs is [(r1,r2),...]; junc_rds is [jsr1,jsr2,...];'''
|
|
36 def __init__(self,gene1,gene2,discordantpairs=[],junc_reads=[]):
|
|
37 self.gene1=gene1
|
|
38 self.gene2=gene2
|
|
39 self.discordantpairs=discordantpairs
|
|
40 self.fusionreads=junc_reads
|
|
41 def update(self,mm=1):
|
|
42 '''Generate a new PRADA object with the update parameter. Extendable.'''
|
|
43 filtdp,filtfus=[],[] #hold updated elements
|
|
44 #apply mm filter
|
|
45 for rp in self.discordantpairs:
|
|
46 r1,r2=rp
|
|
47 nm1=[x[1] for x in r1.tags if x[0]=='NM'][0]
|
|
48 nm2=[x[1] for x in r2.tags if x[0]=='NM'][0]
|
|
49 if nm1 <= mm and nm2 <= mm:
|
|
50 filtdp.append(rp)
|
|
51 for fp in self.fusionreads:
|
|
52 nm=[x[1] for x in fp.read.tags if x[0]=='NM'][0]
|
|
53 if nm <= mm:
|
|
54 filtfus.append(fp)
|
|
55 newobject=GeneFusion(self.gene1,self.gene2,filtdp,filtfus)
|
|
56 return newobject
|
|
57 def get_junction_freq(self):
|
|
58 juncs={}
|
|
59 for item in self.fusionreads:
|
|
60 if juncs.has_key(item.junction):
|
|
61 juncs[item.junction]+=1
|
|
62 else:
|
|
63 juncs[item.junction]=1
|
|
64 return juncs.items()
|
|
65 def get_junctions(self):
|
|
66 juncs=set([])
|
|
67 for item in self.fusionreads:
|
|
68 juncs.add(item.junction)
|
|
69 junobjdb=[Junction(x) for x in juncs]
|
|
70 return junobjdb
|
|
71 def get_perfect_JSR(self):
|
|
72 pjsr=[]
|
|
73 for item in self.fusionreads:
|
|
74 r=item.read
|
|
75 nm=[x[1] for x in r.tags if x[0]=='NM'][0]
|
|
76 if nm==0:
|
|
77 pjsr.append(item)
|
|
78 return pjsr
|
|
79 def positioncheck(self):
|
|
80 if len(self.fusionreads)==0: #no junction and junction spanning reads.
|
|
81 return 'NA'
|
|
82 if len(self.discordantpairs)==0:
|
|
83 return 'NA'
|
|
84 junctions=self.get_junctions()
|
|
85 jA=[x.end1_pos for x in junctions]
|
|
86 jA_min,jA_max=min(jA),max(jA)
|
|
87 jB=[x.end2_pos for x in junctions]
|
|
88 jB_min,jB_max=min(jB),max(jB) ##
|
|
89 fwd=[x[0].pos for x in self.discordantpairs]
|
|
90 fwd_min,fwd_max=min(fwd),max(fwd)
|
|
91 rev=[x[1].pos for x in self.discordantpairs]
|
|
92 rev_min,rev_max=min(rev),max(rev)
|
|
93 #print 'junctionA',jA_min,jA_max
|
|
94 #print 'junctionB',jB_min,jB_max
|
|
95 #print 'Fwd Read',fwd_min,fwd_max
|
|
96 #print 'Rev Read',rev_min,rev_max
|
|
97 #####################################
|
|
98 #The following scoring process is translated from M. Berger's perl script.
|
|
99 const_score=0
|
|
100 if not self.discordantpairs[0][0].is_reverse: #gene A on + strand
|
|
101 if jA_min > fwd_max:
|
|
102 const_score=const_score+3
|
|
103 elif jA_max > fwd_min:
|
|
104 const_score=const_score+2
|
|
105 elif self.discordantpairs[0][0].is_reverse: #gene A on - strand
|
|
106 if jA_max < fwd_min:
|
|
107 const_score=const_score+3
|
|
108 elif jA_min < fwd_max:
|
|
109 const_score=const_score+2
|
|
110 if self.discordantpairs[0][1].is_reverse: #gene B on + strand // disc read map to -
|
|
111 if jB_max < rev_min:
|
|
112 const_score=const_score+3
|
|
113 elif jB_min < rev_max:
|
|
114 const_score=const_score+2
|
|
115 elif not self.discordantpairs[0][1].is_reverse: #gene B on - strand
|
|
116 if jB_min > rev_max:
|
|
117 const_score=const_score+3
|
|
118 elif jB_max > rev_min:
|
|
119 const_score=const_score+2
|
|
120 if const_score==6:
|
|
121 tag='YES'
|
|
122 elif const_score>=4:
|
|
123 tag='PARTIALLY'
|
|
124 else:
|
|
125 tag='NO'
|
|
126 return tag
|
|
127
|