diff ensembl_variant_report.py @ 2:f87fe6bc48f4 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/ensembl_variant_report commit 6b6e5c13531bf909c4c70b7f8f9e28b4206d9068-dirty
author jjohnson
date Mon, 18 Mar 2019 21:43:34 -0400
parents a67b4de184c2
children 652d35c42bca
line wrap: on
line diff
--- a/ensembl_variant_report.py	Thu Jun 14 18:08:15 2018 -0400
+++ b/ensembl_variant_report.py	Mon Mar 18 21:43:34 2019 -0400
@@ -96,7 +96,8 @@
             dp = int(fields[di])
             dpr = [int(x) for x in fields[fi].split(',')]
             for i,alt in enumerate(alts.split(',')):
-                freq = float(dpr[i+1])/float(sum(dpr)) if dpr else None
+                freq = float(dpr[i+1])/float(dp) if dp and dpr else \
+                    float(dpr[i+1])/float(sum(dpr)) if dpr else None
                 yield (transcript,pos,ref,alt,dp,freq)
 
     def parse_snpeff_vcf():
@@ -115,13 +116,16 @@
                 qual = float(qual)
                 dp = None
                 dpr = None
+                af = None
                 for info_item in info.split(';'):
                     if info_item.find('=') < 0: continue
                     (key, val) = info_item.split('=', 1)
                     if key == 'DP':
                         dp = int(val)
-                    if key == 'DPR':
+                    if key == 'DPR' or key == 'AD':
                         dpr = [int(x) for x in val.split(',')]
+                    if key == 'AF':
+                        af = [float(x) for x in val.split(',')]
                     if key in ['EFF','ANN']:
                         for effect in val.split(','):
                             if options.debug: print >> sys.stderr, "\n%s" % (effect.split('|'))
@@ -131,8 +135,15 @@
                                 (eff, effs) = effect.rstrip(')').split('(')
                                 (impact, functional_class, codon_change, aa_change, aa_len, gene_name, biotype, coding, transcript, exon, alt) = effs.split('|')[0:11]
                             i = alt_list.index(alt) if alt in alt_list else 0
-                            freq = float(dpr[i+1])/float(sum(dpr)) if dpr else None
-                            yield (transcript,pos,ref,alt,dp,freq)
+                            if af:
+                                freq = af[i]
+                            elif dpr:
+                                freq = float(dpr[i+1])/float(dp) if dp else \
+                                    float(dpr[i+1])/float(sum(dpr))
+                            else: 
+                                freq = None
+                            if freq:
+                                yield (transcript,pos,ref,alt,dp,freq)
 
 
     #Process gene model
@@ -179,6 +190,8 @@
                 alt_seq = alt if tx.gene.strand else reverse_complement(alt)
                 ref_seq = ref if tx.gene.strand else reverse_complement(ref)
                 cds_pos = ens_ref.genome_to_cds_pos(tid, spos)
+                if cds_pos is None:
+                    continue
                 if options.debug: print >> sys.stderr, "cds_pos: %s" % (str(cds_pos))
                 alt_cds = cds[:cds_pos] + alt_seq + cds[cds_pos+len(ref):] if cds_pos+len(ref) < len(cds) else '' 
                 offset = 0