# HG changeset patch # User Jim Johnson # Date 1366387467 18000 # Node ID 15a54fa11ad72affaceb1037532b0ed816a90692 # Parent 29b286896c500afb711c8a6f054cc7636abf6368 snpEff_cds_report report base before and after stop codon of variant sequence diff -r 29b286896c50 -r 15a54fa11ad7 snpEff_cds_report.py --- a/snpEff_cds_report.py Fri Apr 19 10:49:05 2013 -0500 +++ b/snpEff_cds_report.py Fri Apr 19 11:04:27 2013 -0500 @@ -287,7 +287,7 @@ FRAME_SHIFT(HIGH||||745|CHUK|protein_coding|CODING|ENST00000370397|exon_10_101964263_101964414) """ class SnpEffect( object ): - report_headings = ['Gene','Variant Position','Reference','Variation','Penetrance Percent','Sequencing Depth','Transcript','AA Position','AA change','AA Length','Stop Codon','AA Variation'] + report_headings = ['Gene','Variant Position','Reference','Variation','Penetrance','Sequencing Depth','Transcript','AA Position','AA change','AA Length','Stop Codon','Stop Region','AA Variation'] def __init__(self,chrom,pos,ref,alt,freq,depth,effect = None, snpEffVersion = None, biomart_host = None, filter_ccds = False): self.chrom = chrom self.pos = int(pos) @@ -310,6 +310,7 @@ self.transcript_ids = None self.exon = None self.cds_stop_codon = None + self.cds_stop_pos = None # retrieve from ensembl self.strand = None self.cds_pos = None @@ -318,7 +319,9 @@ self.cds = None self.aa_pos = None self.aa_len = None + self.alt_stop_pos = None self.alt_stop_codon = None + self.alt_stop_region = None ## includes base before and after alt_stop_codon def chrPos(self): return "%s:%s" % (self.chrom,self.pos) def setEffect(self, effect, snpEffVersion = None): @@ -416,7 +419,11 @@ else: var_aa_end = var_stop + 1 # include the stop codon stop_pos = var_stop * 3 + self.alt_stop_pos = stop_pos self.alt_stop_codon = var_dna[stop_pos:stop_pos+3] + self.alt_stop_region = (var_dna[stop_pos - 1] + '-' if stop_pos > 0 else '') + \ + self.alt_stop_codon + \ + ( '-' + var_dna[stop_pos+3] if len(var_dna) > stop_pos+3 else '') return [coding_dna,var_dna,self.cds_pos,coding_aa,var_aa, var_aa_pos, var_aa_end] def inHomopolymer(self,polyA_limit): if polyA_limit: ## library prep can results in inserting or deleting an A in a poly A region @@ -466,9 +473,10 @@ aa_len = "%d" % self.aa_len if self.aa_len else '' gene_tx_id = '|'.join([self.gene_id,self.transcript]) if self.gene_id else self.transcript stop_codon = self.alt_stop_codon if self.alt_stop_codon else '' + stop_region = self.alt_stop_region if self.alt_stop_region else '' chrpos = self.chrPos() aa_change = self.amino_acid_change if self.amino_acid_change else hgvs[5] - return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,var_aa if var_aa else ''] + return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,stop_region,var_aa if var_aa else ''] def __main__(): @@ -476,11 +484,11 @@ if output_file: print >> output_file, "# %s" % '\t'.join(SnpEffect.report_headings) for snpEffect in snpEffects: - (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide) = snpEffect.getReportFields() + (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,stop_region,novel_peptide) = snpEffect.getReportFields() if not snpEffect.effect == 'FRAME_SHIFT': (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False) novel_peptide = "%s_%s_%s" % (preAA,varAA,postAA) - print >> output_file, "%s" % '\t'.join([gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide]) + print >> output_file, "%s" % '\t'.join([gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,novel_peptide]) """ http://www.ensembl.org/Homo_sapiens/Search/Details?species=Homo_sapiens;idx=Gene;end=1;q=CCDC111 @@ -491,9 +499,9 @@ if output_file: print >> output_file, '\n' print >> output_file, '\n' - print >> output_file, '\n' + print >> output_file, '\n' for snpEffect in snpEffects: - (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide) = snpEffect.getReportFields() + (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,stop_region,novel_peptide) = snpEffect.getReportFields() refname = '_'.join([snpEffect.transcript,str(snpEffect.pos),snpEffect.ref,snpEffect.alt]) + '.html' aa_ref = '#'.join([refname,aa_anchor]) refpath = os.path.join(detail_dir,refname) @@ -504,10 +512,10 @@ except Exception, e: sys.stderr.write( "SnpEffect:%s %s\n" % (refpath,e) ) if snpEffect.effect == 'FRAME_SHIFT': - print >> output_file, '\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,aa_ref,novel_peptide) + print >> output_file, '\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,aa_ref,novel_peptide) else: (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False) - print >> output_file, '\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,aa_ref, preAA,varAA,postAA) + print >> output_file, '\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,aa_ref, preAA,varAA,postAA) print >> output_file, '
GeneVariant PositionReferenceVariationPenetrance PercentSequencing DepthTranscript DetailsAA PositionAA ChangeAA Length Stop CodonAA Variation
GeneVariant PositionReferenceVariationPenetranceSequencing DepthTranscript DetailsAA PositionAA ChangeAA Length Stop CodonAA Variation
%s%s%s%s%s%s%s%s%s%s%s%s
%s%s%s%s%s%s%s%s%s%s%s%s
%s%s%s%s%s%s%s%s%s%s%s%s%s%s
%s%s%s%s%s%s%s%s%s%s%s%s%s%s
\n' print >> output_file, '\n'