Mercurial > repos > siyuan > prada
diff pyPRADA_1.2/gfclass.py @ 0:acc2ca1a3ba4
Uploaded
author | siyuan |
---|---|
date | Thu, 20 Feb 2014 00:44:58 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyPRADA_1.2/gfclass.py Thu Feb 20 00:44:58 2014 -0500 @@ -0,0 +1,127 @@ + +class Junction(object): + def __init__(self,init_str): + 'init_str looks like PCBP2:12:53873398_GFAP:17:42985517' + part1,part2=init_str.split('_') + info1,info2=part1.split(':'), part2.split(':') + self.gene1=info1[0] + self.end1_chr=info1[1] + self.end1_pos=int(info1[2]) + self.gene2=info2[0] + self.end2_chr=info2[1] + self.end2_pos=int(info2[2]) + self.name='%s.%s.%s.%s.%s.%s'%(self.gene1,self.end1_chr,self.end1_pos,self.gene2,self.end2_chr,self.end2_pos) + def distance(self): + betwn_junc_dist=None + if self.end1_chr==self.end2_chr: + betwn_junc_dist=abs(int(self.end1_pos) - int(self.end2_pos)) + return betwn_junc_dist + def junc_category(self): + cat=None + if self.end1_chr==self.end2_chr: + cat='intrachromosome' + else: + cat='interchromosome' + return cat + +class JSR(object): + '''Ideally it should extend pysam.AlignedRead class, but run into segment error. + Read is pysam.AlignedRead object''' + def __init__(self,read,junction): + self.read=read + self.junction=junction + +class GeneFusion(object): + '''discs is [(r1,r2),...]; junc_rds is [jsr1,jsr2,...];''' + def __init__(self,gene1,gene2,discordantpairs=[],junc_reads=[]): + self.gene1=gene1 + self.gene2=gene2 + self.discordantpairs=discordantpairs + self.fusionreads=junc_reads + def update(self,mm=1): + '''Generate a new PRADA object with the update parameter. Extendable.''' + filtdp,filtfus=[],[] #hold updated elements + #apply mm filter + for rp in self.discordantpairs: + r1,r2=rp + nm1=[x[1] for x in r1.tags if x[0]=='NM'][0] + nm2=[x[1] for x in r2.tags if x[0]=='NM'][0] + if nm1 <= mm and nm2 <= mm: + filtdp.append(rp) + for fp in self.fusionreads: + nm=[x[1] for x in fp.read.tags if x[0]=='NM'][0] + if nm <= mm: + filtfus.append(fp) + newobject=GeneFusion(self.gene1,self.gene2,filtdp,filtfus) + return newobject + def get_junction_freq(self): + juncs={} + for item in self.fusionreads: + if juncs.has_key(item.junction): + juncs[item.junction]+=1 + else: + juncs[item.junction]=1 + return juncs.items() + def get_junctions(self): + juncs=set([]) + for item in self.fusionreads: + juncs.add(item.junction) + junobjdb=[Junction(x) for x in juncs] + return junobjdb + def get_perfect_JSR(self): + pjsr=[] + for item in self.fusionreads: + r=item.read + nm=[x[1] for x in r.tags if x[0]=='NM'][0] + if nm==0: + pjsr.append(item) + return pjsr + def positioncheck(self): + if len(self.fusionreads)==0: #no junction and junction spanning reads. + return 'NA' + if len(self.discordantpairs)==0: + return 'NA' + junctions=self.get_junctions() + jA=[x.end1_pos for x in junctions] + jA_min,jA_max=min(jA),max(jA) + jB=[x.end2_pos for x in junctions] + jB_min,jB_max=min(jB),max(jB) ## + fwd=[x[0].pos for x in self.discordantpairs] + fwd_min,fwd_max=min(fwd),max(fwd) + rev=[x[1].pos for x in self.discordantpairs] + rev_min,rev_max=min(rev),max(rev) + #print 'junctionA',jA_min,jA_max + #print 'junctionB',jB_min,jB_max + #print 'Fwd Read',fwd_min,fwd_max + #print 'Rev Read',rev_min,rev_max + ##################################### + #The following scoring process is translated from M. Berger's perl script. + const_score=0 + if not self.discordantpairs[0][0].is_reverse: #gene A on + strand + if jA_min > fwd_max: + const_score=const_score+3 + elif jA_max > fwd_min: + const_score=const_score+2 + elif self.discordantpairs[0][0].is_reverse: #gene A on - strand + if jA_max < fwd_min: + const_score=const_score+3 + elif jA_min < fwd_max: + const_score=const_score+2 + if self.discordantpairs[0][1].is_reverse: #gene B on + strand // disc read map to - + if jB_max < rev_min: + const_score=const_score+3 + elif jB_min < rev_max: + const_score=const_score+2 + elif not self.discordantpairs[0][1].is_reverse: #gene B on - strand + if jB_min > rev_max: + const_score=const_score+3 + elif jB_max > rev_min: + const_score=const_score+2 + if const_score==6: + tag='YES' + elif const_score>=4: + tag='PARTIALLY' + else: + tag='NO' + return tag +