comparison snpEff_cds_report.py @ 5:85b933b7d231

Make the reporting of the SnpEff Amino_Acid_change field an option
author Jim Johnson <jj@umn.edu>
date Fri, 24 May 2013 08:50:03 -0500
parents 429a6b4df5e5
children a64ef0611117
comparison
equal deleted inserted replaced
4:429a6b4df5e5 5:85b933b7d231
1 #!/usr/bin/python 1 #!/usr/bin/python
2 ## version 1.2
2 import sys,os,tempfile,re 3 import sys,os,tempfile,re
3 import httplib, urllib 4 import httplib, urllib
4 import optparse 5 import optparse
5 #import vcfClass 6 #import vcfClass
6 #from vcfClass import * 7 #from vcfClass import *
320 self.aa_pos = None 321 self.aa_pos = None
321 self.aa_len = None 322 self.aa_len = None
322 self.alt_stop_pos = None 323 self.alt_stop_pos = None
323 self.alt_stop_codon = None 324 self.alt_stop_codon = None
324 self.alt_stop_region = None ## includes base before and after alt_stop_codon 325 self.alt_stop_region = None ## includes base before and after alt_stop_codon
326 self.use_eff_aa_change = False
325 def chrPos(self): 327 def chrPos(self):
326 return "%s:%s" % (self.chrom,self.pos) 328 return "%s:%s" % (self.chrom,self.pos)
327 def setEffect(self, effect, snpEffVersion = None): 329 def setEffect(self, effect, snpEffVersion = None):
328 if snpEffVersion and snpEffVersion == '2': 330 if snpEffVersion and snpEffVersion == '2':
329 (effect_impact,functional_class,codon_change,amino_acid_change,gene_name,gene_biotype,coding,transcript,exon) = effect[0:9] 331 (effect_impact,functional_class,codon_change,amino_acid_change,gene_name,gene_biotype,coding,transcript,exon) = effect[0:9]
387 end_offset = end_count + 1 389 end_offset = end_count + 1
388 end_pos = min(var_aa_pos+end_offset,len(var_aa)) if end_offset and int(end_offset) >= 0 else var_aa_end 390 end_pos = min(var_aa_pos+end_offset,len(var_aa)) if end_offset and int(end_offset) >= 0 else var_aa_end
389 try: 391 try:
390 varAA = var_aa[var_aa_pos] if var_aa_pos < len(var_aa) else '_' 392 varAA = var_aa[var_aa_pos] if var_aa_pos < len(var_aa) else '_'
391 if debug: print >> sys.stdout, "HGVS %s %s pos:\t%d %d %d" % (self.transcript, self.effect, start_pos, var_aa_pos, end_pos) 393 if debug: print >> sys.stdout, "HGVS %s %s pos:\t%d %d %d" % (self.transcript, self.effect, start_pos, var_aa_pos, end_pos)
392 mutation = "p.%s%d%s" % (coding_aa[var_aa_pos],var_aa_pos+1,varAA) 394 mutation = "%s%d%s" % (coding_aa[var_aa_pos],var_aa_pos+1,varAA)
393 preAA = var_aa[start_pos:var_aa_pos] # N-term side 395 preAA = var_aa[start_pos:var_aa_pos] # N-term side
394 postAA = var_aa[var_aa_pos+1:end_pos] if var_aa_pos+1 < len(var_aa) else '' # C-term side 396 postAA = var_aa[var_aa_pos+1:end_pos] if var_aa_pos+1 < len(var_aa) else '' # C-term side
395 novel_peptide = var_aa[start_pos:end_pos] 397 novel_peptide = var_aa[start_pos:end_pos]
396 return [preAA,varAA,postAA,novel_peptide,var_aa_pos - start_pos,mutation] 398 return [preAA,varAA,postAA,novel_peptide,var_aa_pos - start_pos,mutation]
397 except Exception, e: 399 except Exception, e:
473 aa_len = "%d" % self.aa_len if self.aa_len else '' 475 aa_len = "%d" % self.aa_len if self.aa_len else ''
474 gene_tx_id = '|'.join([self.gene_id,self.transcript]) if self.gene_id else self.transcript 476 gene_tx_id = '|'.join([self.gene_id,self.transcript]) if self.gene_id else self.transcript
475 stop_codon = self.alt_stop_codon if self.alt_stop_codon else '' 477 stop_codon = self.alt_stop_codon if self.alt_stop_codon else ''
476 stop_region = self.alt_stop_region if self.alt_stop_region else '' 478 stop_region = self.alt_stop_region if self.alt_stop_region else ''
477 chrpos = self.chrPos() 479 chrpos = self.chrPos()
478 aa_change = self.amino_acid_change if self.amino_acid_change else hgvs[5] 480 aa_change = self.amino_acid_change if (self.use_eff_aa_change and self.amino_acid_change) else hgvs[5]
479 return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,stop_region,var_aa if var_aa else ''] 481 return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,stop_region,var_aa if var_aa else '']
480 482
481 def __main__(): 483 def __main__():
482 484
483 def printReportTsv(output_file, snpEffects): 485 def printReportTsv(output_file, snpEffects):
644 parser.add_option("-O", "--ensembl_dataset", 646 parser.add_option("-O", "--ensembl_dataset",
645 action="store", type="string", 647 action="store", type="string",
646 dest="ensembl_dataset", default='hsapiens_gene_ensembl', help='bimoart ensembl dataset default: hsapiens_gene_ensembl') 648 dest="ensembl_dataset", default='hsapiens_gene_ensembl', help='bimoart ensembl dataset default: hsapiens_gene_ensembl')
647 parser.add_option("-p", "--polyA_limit", 649 parser.add_option("-p", "--polyA_limit",
648 action="store", type="int", default=None, help='ignore variants that are in a poly A longer than this value' ) 650 action="store", type="int", default=None, help='ignore variants that are in a poly A longer than this value' )
651 parser.add_option('-S', '--snpeff_aa_change', dest='snpeff_aa_change', action='store_true', default=False, help='Report the SnpEff Amino_Acid_change field' )
649 parser.add_option('-a', '--all_effects', dest='all_effects', action='store_true', default=False, help='Search for all effects' ) 652 parser.add_option('-a', '--all_effects', dest='all_effects', action='store_true', default=False, help='Search for all effects' )
650 parser.add_option('-c', '--with_ccds', dest='with_ccds', action='store_true', default=False, help='Only variants with CCDS entries' ) 653 parser.add_option('-c', '--with_ccds', dest='with_ccds', action='store_true', default=False, help='Only variants with CCDS entries' )
651 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) 654 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' )
652 655
653 (options, args) = parser.parse_args() 656 (options, args) = parser.parse_args()
779 if debug: print >> sys.stdout, "ccds_dict: %s" % (ccds_dict) 782 if debug: print >> sys.stdout, "ccds_dict: %s" % (ccds_dict)
780 for effect in effects: 783 for effect in effects:
781 snpEffect = SnpEffect(chrom,pos,ref,alt,freq,depth) 784 snpEffect = SnpEffect(chrom,pos,ref,alt,freq,depth)
782 snpEffect.transcript_ids = transcript_ids 785 snpEffect.transcript_ids = transcript_ids
783 snpEffect.parseEffect(effect,snpEffVersion=SnpEffVersion) 786 snpEffect.parseEffect(effect,snpEffVersion=SnpEffVersion)
787 if options.snpeff_aa_change:
788 snpEffect.use_eff_aa_change = True
784 if effects_list and not snpEffect.effect in effects_list: 789 if effects_list and not snpEffect.effect in effects_list:
785 continue 790 continue
786 if snpEffect.transcript and (not options.with_ccds or snpEffect.transcript in ccds_dict): 791 if snpEffect.transcript and (not options.with_ccds or snpEffect.transcript in ccds_dict):
787 if snpEffect.fetchEnsemblInfo(ensembl_host,options.ensembl_dataset,options.with_ccds): 792 if snpEffect.fetchEnsemblInfo(ensembl_host,options.ensembl_dataset,options.with_ccds):
788 if snpEffect.cds_pos: 793 if snpEffect.cds_pos: