comparison snpEff_cds_report.py @ 2:15a54fa11ad7

snpEff_cds_report report base before and after stop codon of variant sequence
author Jim Johnson <jj@umn.edu>
date Fri, 19 Apr 2013 11:04:27 -0500
parents 29b286896c50
children 429a6b4df5e5
comparison
equal deleted inserted replaced
1:29b286896c50 2:15a54fa11ad7
285 """ 285 """
286 Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] ) 286 Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )
287 FRAME_SHIFT(HIGH||||745|CHUK|protein_coding|CODING|ENST00000370397|exon_10_101964263_101964414) 287 FRAME_SHIFT(HIGH||||745|CHUK|protein_coding|CODING|ENST00000370397|exon_10_101964263_101964414)
288 """ 288 """
289 class SnpEffect( object ): 289 class SnpEffect( object ):
290 report_headings = ['Gene','Variant Position','Reference','Variation','Penetrance Percent','Sequencing Depth','Transcript','AA Position','AA change','AA Length','Stop Codon','AA Variation'] 290 report_headings = ['Gene','Variant Position','Reference','Variation','Penetrance','Sequencing Depth','Transcript','AA Position','AA change','AA Length','Stop Codon','Stop Region','AA Variation']
291 def __init__(self,chrom,pos,ref,alt,freq,depth,effect = None, snpEffVersion = None, biomart_host = None, filter_ccds = False): 291 def __init__(self,chrom,pos,ref,alt,freq,depth,effect = None, snpEffVersion = None, biomart_host = None, filter_ccds = False):
292 self.chrom = chrom 292 self.chrom = chrom
293 self.pos = int(pos) 293 self.pos = int(pos)
294 self.ref = ref 294 self.ref = ref
295 self.alt = alt 295 self.alt = alt
308 self.coding = None 308 self.coding = None
309 self.transcript = None 309 self.transcript = None
310 self.transcript_ids = None 310 self.transcript_ids = None
311 self.exon = None 311 self.exon = None
312 self.cds_stop_codon = None 312 self.cds_stop_codon = None
313 self.cds_stop_pos = None
313 # retrieve from ensembl 314 # retrieve from ensembl
314 self.strand = None 315 self.strand = None
315 self.cds_pos = None 316 self.cds_pos = None
316 self.cds_ref = None 317 self.cds_ref = None
317 self.cds_alt = None 318 self.cds_alt = None
318 self.cds = None 319 self.cds = None
319 self.aa_pos = None 320 self.aa_pos = None
320 self.aa_len = None 321 self.aa_len = None
322 self.alt_stop_pos = None
321 self.alt_stop_codon = None 323 self.alt_stop_codon = None
324 self.alt_stop_region = None ## includes base before and after alt_stop_codon
322 def chrPos(self): 325 def chrPos(self):
323 return "%s:%s" % (self.chrom,self.pos) 326 return "%s:%s" % (self.chrom,self.pos)
324 def setEffect(self, effect, snpEffVersion = None): 327 def setEffect(self, effect, snpEffVersion = None):
325 if snpEffVersion and snpEffVersion == '2': 328 if snpEffVersion and snpEffVersion == '2':
326 (effect_impact,functional_class,codon_change,amino_acid_change,gene_name,gene_biotype,coding,transcript,exon) = effect[0:9] 329 (effect_impact,functional_class,codon_change,amino_acid_change,gene_name,gene_biotype,coding,transcript,exon) = effect[0:9]
414 if var_stop < 0: 417 if var_stop < 0:
415 var_aa_end = len(var_aa) 418 var_aa_end = len(var_aa)
416 else: 419 else:
417 var_aa_end = var_stop + 1 # include the stop codon 420 var_aa_end = var_stop + 1 # include the stop codon
418 stop_pos = var_stop * 3 421 stop_pos = var_stop * 3
422 self.alt_stop_pos = stop_pos
419 self.alt_stop_codon = var_dna[stop_pos:stop_pos+3] 423 self.alt_stop_codon = var_dna[stop_pos:stop_pos+3]
424 self.alt_stop_region = (var_dna[stop_pos - 1] + '-' if stop_pos > 0 else '') + \
425 self.alt_stop_codon + \
426 ( '-' + var_dna[stop_pos+3] if len(var_dna) > stop_pos+3 else '')
420 return [coding_dna,var_dna,self.cds_pos,coding_aa,var_aa, var_aa_pos, var_aa_end] 427 return [coding_dna,var_dna,self.cds_pos,coding_aa,var_aa, var_aa_pos, var_aa_end]
421 def inHomopolymer(self,polyA_limit): 428 def inHomopolymer(self,polyA_limit):
422 if polyA_limit: ## library prep can results in inserting or deleting an A in a poly A region 429 if polyA_limit: ## library prep can results in inserting or deleting an A in a poly A region
423 ## check if base changes at cds_pos involve A or T 430 ## check if base changes at cds_pos involve A or T
424 bdiff = None # the difference between the cds_ref and cds_alt 431 bdiff = None # the difference between the cds_ref and cds_alt
464 depth = "%d" % self.depth if self.depth else '' 471 depth = "%d" % self.depth if self.depth else ''
465 aa_pos = "%d" % (self.aa_pos + 1) if self.aa_pos else '' 472 aa_pos = "%d" % (self.aa_pos + 1) if self.aa_pos else ''
466 aa_len = "%d" % self.aa_len if self.aa_len else '' 473 aa_len = "%d" % self.aa_len if self.aa_len else ''
467 gene_tx_id = '|'.join([self.gene_id,self.transcript]) if self.gene_id else self.transcript 474 gene_tx_id = '|'.join([self.gene_id,self.transcript]) if self.gene_id else self.transcript
468 stop_codon = self.alt_stop_codon if self.alt_stop_codon else '' 475 stop_codon = self.alt_stop_codon if self.alt_stop_codon else ''
476 stop_region = self.alt_stop_region if self.alt_stop_region else ''
469 chrpos = self.chrPos() 477 chrpos = self.chrPos()
470 aa_change = self.amino_acid_change if self.amino_acid_change else hgvs[5] 478 aa_change = self.amino_acid_change if self.amino_acid_change else hgvs[5]
471 return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,var_aa if var_aa else ''] 479 return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,stop_region,var_aa if var_aa else '']
472 480
473 def __main__(): 481 def __main__():
474 482
475 def printReportTsv(output_file, snpEffects): 483 def printReportTsv(output_file, snpEffects):
476 if output_file: 484 if output_file:
477 print >> output_file, "# %s" % '\t'.join(SnpEffect.report_headings) 485 print >> output_file, "# %s" % '\t'.join(SnpEffect.report_headings)
478 for snpEffect in snpEffects: 486 for snpEffect in snpEffects:
479 (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide) = snpEffect.getReportFields() 487 (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,stop_region,novel_peptide) = snpEffect.getReportFields()
480 if not snpEffect.effect == 'FRAME_SHIFT': 488 if not snpEffect.effect == 'FRAME_SHIFT':
481 (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False) 489 (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False)
482 novel_peptide = "%s_%s_%s" % (preAA,varAA,postAA) 490 novel_peptide = "%s_%s_%s" % (preAA,varAA,postAA)
483 print >> output_file, "%s" % '\t'.join([gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide]) 491 print >> output_file, "%s" % '\t'.join([gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,novel_peptide])
484 492
485 """ 493 """
486 http://www.ensembl.org/Homo_sapiens/Search/Details?species=Homo_sapiens;idx=Gene;end=1;q=CCDC111 494 http://www.ensembl.org/Homo_sapiens/Search/Details?species=Homo_sapiens;idx=Gene;end=1;q=CCDC111
487 http://www.ensembl.org/Homo_sapiens/Search/Results?species=Homo_sapiens;idx=;q=ENST00000314970 495 http://www.ensembl.org/Homo_sapiens/Search/Results?species=Homo_sapiens;idx=;q=ENST00000314970
488 http://www.ensembl.org/Homo_sapiens/Transcript/Summary?g=ENSG00000164306;r=4:185570821-185616117;t=ENST00000515774 496 http://www.ensembl.org/Homo_sapiens/Transcript/Summary?g=ENSG00000164306;r=4:185570821-185616117;t=ENST00000515774
489 """ 497 """
490 def printReportHtml(output_file, detail_dir, snpEffects): 498 def printReportHtml(output_file, detail_dir, snpEffects):
491 if output_file: 499 if output_file:
492 print >> output_file, '<HTML><BODY>\n' 500 print >> output_file, '<HTML><BODY>\n'
493 print >> output_file, '<TABLE BORDER="1">\n' 501 print >> output_file, '<TABLE BORDER="1">\n'
494 print >> output_file, '<TR align="LEFT"><TH>Gene</TH><TH>Variant Position</TH><TH>Reference</TH><TH>Variation</TH><TH>Penetrance Percent</TH><TH>Sequencing Depth</TH><TH>Transcript Details</TH><TH>AA Position</TH><TH>AA Change</TH><TH>AA Length</TH> <TH>Stop Codon</TH><TH>AA Variation</TH></TR>\n' 502 print >> output_file, '<TR align="LEFT"><TH>Gene</TH><TH>Variant Position</TH><TH>Reference</TH><TH>Variation</TH><TH>Penetrance</TH><TH>Sequencing Depth</TH><TH>Transcript Details</TH><TH>AA Position</TH><TH>AA Change</TH><TH>AA Length</TH> <TH>Stop Codon</TH><TH>AA Variation</TH></TR>\n'
495 for snpEffect in snpEffects: 503 for snpEffect in snpEffects:
496 (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide) = snpEffect.getReportFields() 504 (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,stop_region,novel_peptide) = snpEffect.getReportFields()
497 refname = '_'.join([snpEffect.transcript,str(snpEffect.pos),snpEffect.ref,snpEffect.alt]) + '.html' 505 refname = '_'.join([snpEffect.transcript,str(snpEffect.pos),snpEffect.ref,snpEffect.alt]) + '.html'
498 aa_ref = '#'.join([refname,aa_anchor]) 506 aa_ref = '#'.join([refname,aa_anchor])
499 refpath = os.path.join(detail_dir,refname) 507 refpath = os.path.join(detail_dir,refname)
500 try: 508 try:
501 ref_file = open(refpath,'w') 509 ref_file = open(refpath,'w')
502 printEffDetailHtml(ref_file,snpEffect) 510 printEffDetailHtml(ref_file,snpEffect)
503 ref_file.close() 511 ref_file.close()
504 except Exception, e: 512 except Exception, e:
505 sys.stderr.write( "SnpEffect:%s %s\n" % (refpath,e) ) 513 sys.stderr.write( "SnpEffect:%s %s\n" % (refpath,e) )
506 if snpEffect.effect == 'FRAME_SHIFT': 514 if snpEffect.effect == 'FRAME_SHIFT':
507 print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s">%s</A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,aa_ref,novel_peptide) 515 print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s">%s</A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,aa_ref,novel_peptide)
508 else: 516 else:
509 (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False) 517 (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False)
510 print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s"><small><I>%s</I></small><B>%s</B><small><I>%s</I></small></A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,aa_ref, preAA,varAA,postAA) 518 print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s"><small><I>%s</I></small><B>%s</B><small><I>%s</I></small></A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,aa_ref, preAA,varAA,postAA)
511 print >> output_file, '</TABLE>\n' 519 print >> output_file, '</TABLE>\n'
512 print >> output_file, '</BODY></HTML>\n' 520 print >> output_file, '</BODY></HTML>\n'
513 521
514 def printEffDetailHtml(output_file,snpEffect,wrap_len=60): 522 def printEffDetailHtml(output_file,snpEffect,wrap_len=60):
515 (coding_seq, alt_seq, pos, coding_aa, alt_aa, alt_aa_pos, alt_aa_end) = snpEffect.getSequences() 523 (coding_seq, alt_seq, pos, coding_aa, alt_aa, alt_aa_pos, alt_aa_end) = snpEffect.getSequences()