Mercurial > repos > siyuan > prada
view pyPRADA_1.2/gfclass.py @ 0:acc2ca1a3ba4
Uploaded
author | siyuan |
---|---|
date | Thu, 20 Feb 2014 00:44:58 -0500 |
parents | |
children |
line wrap: on
line source
class Junction(object): def __init__(self,init_str): 'init_str looks like PCBP2:12:53873398_GFAP:17:42985517' part1,part2=init_str.split('_') info1,info2=part1.split(':'), part2.split(':') self.gene1=info1[0] self.end1_chr=info1[1] self.end1_pos=int(info1[2]) self.gene2=info2[0] self.end2_chr=info2[1] self.end2_pos=int(info2[2]) self.name='%s.%s.%s.%s.%s.%s'%(self.gene1,self.end1_chr,self.end1_pos,self.gene2,self.end2_chr,self.end2_pos) def distance(self): betwn_junc_dist=None if self.end1_chr==self.end2_chr: betwn_junc_dist=abs(int(self.end1_pos) - int(self.end2_pos)) return betwn_junc_dist def junc_category(self): cat=None if self.end1_chr==self.end2_chr: cat='intrachromosome' else: cat='interchromosome' return cat class JSR(object): '''Ideally it should extend pysam.AlignedRead class, but run into segment error. Read is pysam.AlignedRead object''' def __init__(self,read,junction): self.read=read self.junction=junction class GeneFusion(object): '''discs is [(r1,r2),...]; junc_rds is [jsr1,jsr2,...];''' def __init__(self,gene1,gene2,discordantpairs=[],junc_reads=[]): self.gene1=gene1 self.gene2=gene2 self.discordantpairs=discordantpairs self.fusionreads=junc_reads def update(self,mm=1): '''Generate a new PRADA object with the update parameter. Extendable.''' filtdp,filtfus=[],[] #hold updated elements #apply mm filter for rp in self.discordantpairs: r1,r2=rp nm1=[x[1] for x in r1.tags if x[0]=='NM'][0] nm2=[x[1] for x in r2.tags if x[0]=='NM'][0] if nm1 <= mm and nm2 <= mm: filtdp.append(rp) for fp in self.fusionreads: nm=[x[1] for x in fp.read.tags if x[0]=='NM'][0] if nm <= mm: filtfus.append(fp) newobject=GeneFusion(self.gene1,self.gene2,filtdp,filtfus) return newobject def get_junction_freq(self): juncs={} for item in self.fusionreads: if juncs.has_key(item.junction): juncs[item.junction]+=1 else: juncs[item.junction]=1 return juncs.items() def get_junctions(self): juncs=set([]) for item in self.fusionreads: juncs.add(item.junction) junobjdb=[Junction(x) for x in juncs] return junobjdb def get_perfect_JSR(self): pjsr=[] for item in self.fusionreads: r=item.read nm=[x[1] for x in r.tags if x[0]=='NM'][0] if nm==0: pjsr.append(item) return pjsr def positioncheck(self): if len(self.fusionreads)==0: #no junction and junction spanning reads. return 'NA' if len(self.discordantpairs)==0: return 'NA' junctions=self.get_junctions() jA=[x.end1_pos for x in junctions] jA_min,jA_max=min(jA),max(jA) jB=[x.end2_pos for x in junctions] jB_min,jB_max=min(jB),max(jB) ## fwd=[x[0].pos for x in self.discordantpairs] fwd_min,fwd_max=min(fwd),max(fwd) rev=[x[1].pos for x in self.discordantpairs] rev_min,rev_max=min(rev),max(rev) #print 'junctionA',jA_min,jA_max #print 'junctionB',jB_min,jB_max #print 'Fwd Read',fwd_min,fwd_max #print 'Rev Read',rev_min,rev_max ##################################### #The following scoring process is translated from M. Berger's perl script. const_score=0 if not self.discordantpairs[0][0].is_reverse: #gene A on + strand if jA_min > fwd_max: const_score=const_score+3 elif jA_max > fwd_min: const_score=const_score+2 elif self.discordantpairs[0][0].is_reverse: #gene A on - strand if jA_max < fwd_min: const_score=const_score+3 elif jA_min < fwd_max: const_score=const_score+2 if self.discordantpairs[0][1].is_reverse: #gene B on + strand // disc read map to - if jB_max < rev_min: const_score=const_score+3 elif jB_min < rev_max: const_score=const_score+2 elif not self.discordantpairs[0][1].is_reverse: #gene B on - strand if jB_min > rev_max: const_score=const_score+3 elif jB_max > rev_min: const_score=const_score+2 if const_score==6: tag='YES' elif const_score>=4: tag='PARTIALLY' else: tag='NO' return tag