diff snpEff_cds_report.py @ 5:85b933b7d231

Make the reporting of the SnpEff Amino_Acid_change field an option
author Jim Johnson <jj@umn.edu>
date Fri, 24 May 2013 08:50:03 -0500
parents 429a6b4df5e5
children a64ef0611117
line wrap: on
line diff
--- a/snpEff_cds_report.py	Wed Apr 24 08:00:04 2013 -0500
+++ b/snpEff_cds_report.py	Fri May 24 08:50:03 2013 -0500
@@ -1,4 +1,5 @@
 #!/usr/bin/python
+## version 1.2
 import sys,os,tempfile,re
 import httplib, urllib
 import optparse
@@ -322,6 +323,7 @@
     self.alt_stop_pos = None
     self.alt_stop_codon = None
     self.alt_stop_region = None  ## includes base before and after alt_stop_codon
+    self.use_eff_aa_change = False
   def chrPos(self):
     return "%s:%s" % (self.chrom,self.pos)
   def setEffect(self, effect, snpEffVersion = None):
@@ -389,7 +391,7 @@
       try:
         varAA = var_aa[var_aa_pos] if var_aa_pos < len(var_aa) else '_' 
         if debug: print >> sys.stdout, "HGVS %s %s pos:\t%d %d %d" % (self.transcript, self.effect, start_pos, var_aa_pos, end_pos)
-        mutation = "p.%s%d%s" % (coding_aa[var_aa_pos],var_aa_pos+1,varAA)
+        mutation = "%s%d%s" % (coding_aa[var_aa_pos],var_aa_pos+1,varAA)
         preAA = var_aa[start_pos:var_aa_pos] # N-term side
         postAA = var_aa[var_aa_pos+1:end_pos] if var_aa_pos+1 < len(var_aa) else '' # C-term side
         novel_peptide = var_aa[start_pos:end_pos]
@@ -475,7 +477,7 @@
     stop_codon = self.alt_stop_codon if self.alt_stop_codon else ''
     stop_region = self.alt_stop_region if self.alt_stop_region else ''
     chrpos = self.chrPos()
-    aa_change = self.amino_acid_change if self.amino_acid_change else hgvs[5]
+    aa_change = self.amino_acid_change if (self.use_eff_aa_change and self.amino_acid_change) else hgvs[5]
     return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,stop_region,var_aa if var_aa else '']
 
 def __main__():
@@ -646,6 +648,7 @@
                     dest="ensembl_dataset", default='hsapiens_gene_ensembl', help='bimoart ensembl dataset default: hsapiens_gene_ensembl')
   parser.add_option("-p", "--polyA_limit",
                     action="store", type="int", default=None, help='ignore variants that are in a poly A longer than this value' )
+  parser.add_option('-S', '--snpeff_aa_change', dest='snpeff_aa_change', action='store_true', default=False, help='Report the SnpEff Amino_Acid_change field'  )
   parser.add_option('-a', '--all_effects', dest='all_effects', action='store_true', default=False, help='Search for all effects'  )
   parser.add_option('-c', '--with_ccds', dest='with_ccds', action='store_true', default=False, help='Only variants with CCDS entries'  )
   parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout'  )
@@ -781,6 +784,8 @@
                     snpEffect = SnpEffect(chrom,pos,ref,alt,freq,depth)
                     snpEffect.transcript_ids = transcript_ids
                     snpEffect.parseEffect(effect,snpEffVersion=SnpEffVersion)
+                    if options.snpeff_aa_change:
+                      snpEffect.use_eff_aa_change = True
                     if effects_list and not snpEffect.effect in effects_list:
                       continue
                     if snpEffect.transcript and (not options.with_ccds or snpEffect.transcript in ccds_dict):