Mercurial > repos > jjohnson > snpeff_cds_report
comparison snpEff_cds_report.py @ 2:15a54fa11ad7
snpEff_cds_report report base before and after stop codon of variant sequence
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Fri, 19 Apr 2013 11:04:27 -0500 |
parents | 29b286896c50 |
children | 429a6b4df5e5 |
comparison
equal
deleted
inserted
replaced
1:29b286896c50 | 2:15a54fa11ad7 |
---|---|
285 """ | 285 """ |
286 Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] ) | 286 Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] ) |
287 FRAME_SHIFT(HIGH||||745|CHUK|protein_coding|CODING|ENST00000370397|exon_10_101964263_101964414) | 287 FRAME_SHIFT(HIGH||||745|CHUK|protein_coding|CODING|ENST00000370397|exon_10_101964263_101964414) |
288 """ | 288 """ |
289 class SnpEffect( object ): | 289 class SnpEffect( object ): |
290 report_headings = ['Gene','Variant Position','Reference','Variation','Penetrance Percent','Sequencing Depth','Transcript','AA Position','AA change','AA Length','Stop Codon','AA Variation'] | 290 report_headings = ['Gene','Variant Position','Reference','Variation','Penetrance','Sequencing Depth','Transcript','AA Position','AA change','AA Length','Stop Codon','Stop Region','AA Variation'] |
291 def __init__(self,chrom,pos,ref,alt,freq,depth,effect = None, snpEffVersion = None, biomart_host = None, filter_ccds = False): | 291 def __init__(self,chrom,pos,ref,alt,freq,depth,effect = None, snpEffVersion = None, biomart_host = None, filter_ccds = False): |
292 self.chrom = chrom | 292 self.chrom = chrom |
293 self.pos = int(pos) | 293 self.pos = int(pos) |
294 self.ref = ref | 294 self.ref = ref |
295 self.alt = alt | 295 self.alt = alt |
308 self.coding = None | 308 self.coding = None |
309 self.transcript = None | 309 self.transcript = None |
310 self.transcript_ids = None | 310 self.transcript_ids = None |
311 self.exon = None | 311 self.exon = None |
312 self.cds_stop_codon = None | 312 self.cds_stop_codon = None |
313 self.cds_stop_pos = None | |
313 # retrieve from ensembl | 314 # retrieve from ensembl |
314 self.strand = None | 315 self.strand = None |
315 self.cds_pos = None | 316 self.cds_pos = None |
316 self.cds_ref = None | 317 self.cds_ref = None |
317 self.cds_alt = None | 318 self.cds_alt = None |
318 self.cds = None | 319 self.cds = None |
319 self.aa_pos = None | 320 self.aa_pos = None |
320 self.aa_len = None | 321 self.aa_len = None |
322 self.alt_stop_pos = None | |
321 self.alt_stop_codon = None | 323 self.alt_stop_codon = None |
324 self.alt_stop_region = None ## includes base before and after alt_stop_codon | |
322 def chrPos(self): | 325 def chrPos(self): |
323 return "%s:%s" % (self.chrom,self.pos) | 326 return "%s:%s" % (self.chrom,self.pos) |
324 def setEffect(self, effect, snpEffVersion = None): | 327 def setEffect(self, effect, snpEffVersion = None): |
325 if snpEffVersion and snpEffVersion == '2': | 328 if snpEffVersion and snpEffVersion == '2': |
326 (effect_impact,functional_class,codon_change,amino_acid_change,gene_name,gene_biotype,coding,transcript,exon) = effect[0:9] | 329 (effect_impact,functional_class,codon_change,amino_acid_change,gene_name,gene_biotype,coding,transcript,exon) = effect[0:9] |
414 if var_stop < 0: | 417 if var_stop < 0: |
415 var_aa_end = len(var_aa) | 418 var_aa_end = len(var_aa) |
416 else: | 419 else: |
417 var_aa_end = var_stop + 1 # include the stop codon | 420 var_aa_end = var_stop + 1 # include the stop codon |
418 stop_pos = var_stop * 3 | 421 stop_pos = var_stop * 3 |
422 self.alt_stop_pos = stop_pos | |
419 self.alt_stop_codon = var_dna[stop_pos:stop_pos+3] | 423 self.alt_stop_codon = var_dna[stop_pos:stop_pos+3] |
424 self.alt_stop_region = (var_dna[stop_pos - 1] + '-' if stop_pos > 0 else '') + \ | |
425 self.alt_stop_codon + \ | |
426 ( '-' + var_dna[stop_pos+3] if len(var_dna) > stop_pos+3 else '') | |
420 return [coding_dna,var_dna,self.cds_pos,coding_aa,var_aa, var_aa_pos, var_aa_end] | 427 return [coding_dna,var_dna,self.cds_pos,coding_aa,var_aa, var_aa_pos, var_aa_end] |
421 def inHomopolymer(self,polyA_limit): | 428 def inHomopolymer(self,polyA_limit): |
422 if polyA_limit: ## library prep can results in inserting or deleting an A in a poly A region | 429 if polyA_limit: ## library prep can results in inserting or deleting an A in a poly A region |
423 ## check if base changes at cds_pos involve A or T | 430 ## check if base changes at cds_pos involve A or T |
424 bdiff = None # the difference between the cds_ref and cds_alt | 431 bdiff = None # the difference between the cds_ref and cds_alt |
464 depth = "%d" % self.depth if self.depth else '' | 471 depth = "%d" % self.depth if self.depth else '' |
465 aa_pos = "%d" % (self.aa_pos + 1) if self.aa_pos else '' | 472 aa_pos = "%d" % (self.aa_pos + 1) if self.aa_pos else '' |
466 aa_len = "%d" % self.aa_len if self.aa_len else '' | 473 aa_len = "%d" % self.aa_len if self.aa_len else '' |
467 gene_tx_id = '|'.join([self.gene_id,self.transcript]) if self.gene_id else self.transcript | 474 gene_tx_id = '|'.join([self.gene_id,self.transcript]) if self.gene_id else self.transcript |
468 stop_codon = self.alt_stop_codon if self.alt_stop_codon else '' | 475 stop_codon = self.alt_stop_codon if self.alt_stop_codon else '' |
476 stop_region = self.alt_stop_region if self.alt_stop_region else '' | |
469 chrpos = self.chrPos() | 477 chrpos = self.chrPos() |
470 aa_change = self.amino_acid_change if self.amino_acid_change else hgvs[5] | 478 aa_change = self.amino_acid_change if self.amino_acid_change else hgvs[5] |
471 return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,var_aa if var_aa else ''] | 479 return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,stop_region,var_aa if var_aa else ''] |
472 | 480 |
473 def __main__(): | 481 def __main__(): |
474 | 482 |
475 def printReportTsv(output_file, snpEffects): | 483 def printReportTsv(output_file, snpEffects): |
476 if output_file: | 484 if output_file: |
477 print >> output_file, "# %s" % '\t'.join(SnpEffect.report_headings) | 485 print >> output_file, "# %s" % '\t'.join(SnpEffect.report_headings) |
478 for snpEffect in snpEffects: | 486 for snpEffect in snpEffects: |
479 (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide) = snpEffect.getReportFields() | 487 (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,stop_region,novel_peptide) = snpEffect.getReportFields() |
480 if not snpEffect.effect == 'FRAME_SHIFT': | 488 if not snpEffect.effect == 'FRAME_SHIFT': |
481 (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False) | 489 (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False) |
482 novel_peptide = "%s_%s_%s" % (preAA,varAA,postAA) | 490 novel_peptide = "%s_%s_%s" % (preAA,varAA,postAA) |
483 print >> output_file, "%s" % '\t'.join([gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide]) | 491 print >> output_file, "%s" % '\t'.join([gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,novel_peptide]) |
484 | 492 |
485 """ | 493 """ |
486 http://www.ensembl.org/Homo_sapiens/Search/Details?species=Homo_sapiens;idx=Gene;end=1;q=CCDC111 | 494 http://www.ensembl.org/Homo_sapiens/Search/Details?species=Homo_sapiens;idx=Gene;end=1;q=CCDC111 |
487 http://www.ensembl.org/Homo_sapiens/Search/Results?species=Homo_sapiens;idx=;q=ENST00000314970 | 495 http://www.ensembl.org/Homo_sapiens/Search/Results?species=Homo_sapiens;idx=;q=ENST00000314970 |
488 http://www.ensembl.org/Homo_sapiens/Transcript/Summary?g=ENSG00000164306;r=4:185570821-185616117;t=ENST00000515774 | 496 http://www.ensembl.org/Homo_sapiens/Transcript/Summary?g=ENSG00000164306;r=4:185570821-185616117;t=ENST00000515774 |
489 """ | 497 """ |
490 def printReportHtml(output_file, detail_dir, snpEffects): | 498 def printReportHtml(output_file, detail_dir, snpEffects): |
491 if output_file: | 499 if output_file: |
492 print >> output_file, '<HTML><BODY>\n' | 500 print >> output_file, '<HTML><BODY>\n' |
493 print >> output_file, '<TABLE BORDER="1">\n' | 501 print >> output_file, '<TABLE BORDER="1">\n' |
494 print >> output_file, '<TR align="LEFT"><TH>Gene</TH><TH>Variant Position</TH><TH>Reference</TH><TH>Variation</TH><TH>Penetrance Percent</TH><TH>Sequencing Depth</TH><TH>Transcript Details</TH><TH>AA Position</TH><TH>AA Change</TH><TH>AA Length</TH> <TH>Stop Codon</TH><TH>AA Variation</TH></TR>\n' | 502 print >> output_file, '<TR align="LEFT"><TH>Gene</TH><TH>Variant Position</TH><TH>Reference</TH><TH>Variation</TH><TH>Penetrance</TH><TH>Sequencing Depth</TH><TH>Transcript Details</TH><TH>AA Position</TH><TH>AA Change</TH><TH>AA Length</TH> <TH>Stop Codon</TH><TH>AA Variation</TH></TR>\n' |
495 for snpEffect in snpEffects: | 503 for snpEffect in snpEffects: |
496 (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide) = snpEffect.getReportFields() | 504 (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,stop_region,novel_peptide) = snpEffect.getReportFields() |
497 refname = '_'.join([snpEffect.transcript,str(snpEffect.pos),snpEffect.ref,snpEffect.alt]) + '.html' | 505 refname = '_'.join([snpEffect.transcript,str(snpEffect.pos),snpEffect.ref,snpEffect.alt]) + '.html' |
498 aa_ref = '#'.join([refname,aa_anchor]) | 506 aa_ref = '#'.join([refname,aa_anchor]) |
499 refpath = os.path.join(detail_dir,refname) | 507 refpath = os.path.join(detail_dir,refname) |
500 try: | 508 try: |
501 ref_file = open(refpath,'w') | 509 ref_file = open(refpath,'w') |
502 printEffDetailHtml(ref_file,snpEffect) | 510 printEffDetailHtml(ref_file,snpEffect) |
503 ref_file.close() | 511 ref_file.close() |
504 except Exception, e: | 512 except Exception, e: |
505 sys.stderr.write( "SnpEffect:%s %s\n" % (refpath,e) ) | 513 sys.stderr.write( "SnpEffect:%s %s\n" % (refpath,e) ) |
506 if snpEffect.effect == 'FRAME_SHIFT': | 514 if snpEffect.effect == 'FRAME_SHIFT': |
507 print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s">%s</A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,aa_ref,novel_peptide) | 515 print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s">%s</A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,aa_ref,novel_peptide) |
508 else: | 516 else: |
509 (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False) | 517 (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False) |
510 print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s"><small><I>%s</I></small><B>%s</B><small><I>%s</I></small></A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,aa_ref, preAA,varAA,postAA) | 518 print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s"><small><I>%s</I></small><B>%s</B><small><I>%s</I></small></A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,aa_ref, preAA,varAA,postAA) |
511 print >> output_file, '</TABLE>\n' | 519 print >> output_file, '</TABLE>\n' |
512 print >> output_file, '</BODY></HTML>\n' | 520 print >> output_file, '</BODY></HTML>\n' |
513 | 521 |
514 def printEffDetailHtml(output_file,snpEffect,wrap_len=60): | 522 def printEffDetailHtml(output_file,snpEffect,wrap_len=60): |
515 (coding_seq, alt_seq, pos, coding_aa, alt_aa, alt_aa_pos, alt_aa_end) = snpEffect.getSequences() | 523 (coding_seq, alt_seq, pos, coding_aa, alt_aa, alt_aa_pos, alt_aa_end) = snpEffect.getSequences() |