changeset 5:85b933b7d231

Make the reporting of the SnpEff Amino_Acid_change field an option
author Jim Johnson <jj@umn.edu>
date Fri, 24 May 2013 08:50:03 -0500
parents 429a6b4df5e5
children a64ef0611117
files snpEff_cds_report.py snpEff_cds_report.xml
diffstat 2 files changed, 10 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/snpEff_cds_report.py	Wed Apr 24 08:00:04 2013 -0500
+++ b/snpEff_cds_report.py	Fri May 24 08:50:03 2013 -0500
@@ -1,4 +1,5 @@
 #!/usr/bin/python
+## version 1.2
 import sys,os,tempfile,re
 import httplib, urllib
 import optparse
@@ -322,6 +323,7 @@
     self.alt_stop_pos = None
     self.alt_stop_codon = None
     self.alt_stop_region = None  ## includes base before and after alt_stop_codon
+    self.use_eff_aa_change = False
   def chrPos(self):
     return "%s:%s" % (self.chrom,self.pos)
   def setEffect(self, effect, snpEffVersion = None):
@@ -389,7 +391,7 @@
       try:
         varAA = var_aa[var_aa_pos] if var_aa_pos < len(var_aa) else '_' 
         if debug: print >> sys.stdout, "HGVS %s %s pos:\t%d %d %d" % (self.transcript, self.effect, start_pos, var_aa_pos, end_pos)
-        mutation = "p.%s%d%s" % (coding_aa[var_aa_pos],var_aa_pos+1,varAA)
+        mutation = "%s%d%s" % (coding_aa[var_aa_pos],var_aa_pos+1,varAA)
         preAA = var_aa[start_pos:var_aa_pos] # N-term side
         postAA = var_aa[var_aa_pos+1:end_pos] if var_aa_pos+1 < len(var_aa) else '' # C-term side
         novel_peptide = var_aa[start_pos:end_pos]
@@ -475,7 +477,7 @@
     stop_codon = self.alt_stop_codon if self.alt_stop_codon else ''
     stop_region = self.alt_stop_region if self.alt_stop_region else ''
     chrpos = self.chrPos()
-    aa_change = self.amino_acid_change if self.amino_acid_change else hgvs[5]
+    aa_change = self.amino_acid_change if (self.use_eff_aa_change and self.amino_acid_change) else hgvs[5]
     return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,stop_region,var_aa if var_aa else '']
 
 def __main__():
@@ -646,6 +648,7 @@
                     dest="ensembl_dataset", default='hsapiens_gene_ensembl', help='bimoart ensembl dataset default: hsapiens_gene_ensembl')
   parser.add_option("-p", "--polyA_limit",
                     action="store", type="int", default=None, help='ignore variants that are in a poly A longer than this value' )
+  parser.add_option('-S', '--snpeff_aa_change', dest='snpeff_aa_change', action='store_true', default=False, help='Report the SnpEff Amino_Acid_change field'  )
   parser.add_option('-a', '--all_effects', dest='all_effects', action='store_true', default=False, help='Search for all effects'  )
   parser.add_option('-c', '--with_ccds', dest='with_ccds', action='store_true', default=False, help='Only variants with CCDS entries'  )
   parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout'  )
@@ -781,6 +784,8 @@
                     snpEffect = SnpEffect(chrom,pos,ref,alt,freq,depth)
                     snpEffect.transcript_ids = transcript_ids
                     snpEffect.parseEffect(effect,snpEffVersion=SnpEffVersion)
+                    if options.snpeff_aa_change:
+                      snpEffect.use_eff_aa_change = True
                     if effects_list and not snpEffect.effect in effects_list:
                       continue
                     if snpEffect.transcript and (not options.with_ccds or snpEffect.transcript in ccds_dict):
--- a/snpEff_cds_report.xml	Wed Apr 24 08:00:04 2013 -0500
+++ b/snpEff_cds_report.xml	Fri May 24 08:50:03 2013 -0500
@@ -1,4 +1,4 @@
-<tool id="SnpEff-cds-report" name="SnpEff Ensembl CDS" version="1.1">
+<tool id="SnpEff-cds-report" name="SnpEff Ensembl CDS" version="1.2">
   <description>Report Variant coding sequence changes for SnpEffects</description>
   <command interpreter="python">
    snpEff_cds_report.py --in $snp_effect_vcf 
@@ -16,6 +16,7 @@
    #end if
    $all_effects
    $with_ccds
+   $snpeff_aa_change
    #if $report_format.__str__.find('html') >= 0:
      --html_report $html_report
      --html_dir $html_report.extra_files_path
@@ -136,6 +137,7 @@
     </param>
     <param name="all_effects" type="boolean" truevalue="--all_effects" falsevalue="" checked="false" label="Report the variant coding for each Ensembl Transcript at the variant position" help="Default is to report only the first Transcript that has a cDNA change"/>
     <param name="with_ccds" type="boolean" truevalue="--with_ccds" falsevalue="" checked="false" label="Report the variant coding only for Ensembl Transcripts with a CCDS ID" help=""/>
+    <param name="snpeff_aa_change" type="boolean" truevalue="--snpeff_aa_change" falsevalue="" checked="false" label="Report the Amino_Acid_change from SnpEff" help="Otherwise, report the first changed Animo Acid as: AposA (ref_AA pos_from_Nterminus alt_AA)"/>
     <param name="polya" type="integer" value="5" optional="true" label="Ignore variants that are part of a Poly-A of at least this length" help="Leave blank to turn off poly A filtering">
       <validator type="in_range" message="poly A between 2 and 100 bases" min="2" max="100"/>
     </param>