changeset 1:bb0ac51f1b02

Handle delection char: *
author Jim Johnson <jj@umn.edu>
date Mon, 18 Feb 2013 19:11:47 -0600
parents 3890f8ba0e4d
children d6de2d1f4af9
files pileup_to_vcf.py test-data/test.pileup
diffstat 2 files changed, 6 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/pileup_to_vcf.py	Mon Feb 18 11:32:50 2013 -0500
+++ b/pileup_to_vcf.py	Mon Feb 18 19:11:47 2013 -0600
@@ -49,7 +49,7 @@
   parser.add_option( '-i', '--input', dest='input', help='The input pileup file (else read from stdin)' )
   parser.add_option( '-o', '--output', dest='output', help='The output vcf file (else write to stdout)' )
   # filters
-  parser.add_option( '-c', '--min_coverage', type='int', default='0', dest='min_coverage', help='The minimum read coverage depth to report a variant [default 0]' )
+  parser.add_option( '-c', '--min_coverage', type='int', default='1', dest='min_coverage', help='The minimum read coverage depth to report a variant [default 1]' )
   parser.add_option( '-r', '--report_depth', type='choice', choices=['all','ref','qual'], default='ref', dest='report_depth', help='Coverage depth as: ref - reads covercovering base, all - reads spanning base, qual - reads with base call above min_base_quality' )
   parser.add_option( '-b', '--min_base_quality', type='int', default='0', dest='min_base_quality', help='The minimum base quality for a base call to be counted' )
   parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' )
@@ -194,6 +194,8 @@
               deletions[indel] += 1
             else:
               deletions[indel] = 1
+        elif base == '*' : # a deletion from a prior base
+          qi += 1
         elif base == '^' : #start of read (followed by read quality char)
           read_mapping_qual = ord(bases[bi]) - 33 
           bi += 1
@@ -201,6 +203,8 @@
           pass # no associated quality in either bases or quals
       dp = rdp if dp_as == 'ref' else qdp if dp_as == 'qual' else adp
       if debug: print >> sys.stderr, "snps: %s\tins: %s\tdel: %s" % (snps,insertions,deletions)
+      if dp < max(1,min_coverage) :
+        continue
       # output any variants that meet the depth coverage requirement
       vcf_pos = pos
       vcf_ref = ref
--- a/test-data/test.pileup	Mon Feb 18 11:32:50 2013 -0500
+++ b/test-data/test.pileup	Mon Feb 18 19:11:47 2013 -0600
@@ -8,6 +8,7 @@
 seq1	279	C	23	A..T,,.,.,...,,,.,.....	;75&<<<<<<<<<=<<<9<<:<<
 seq2	156	A	11	.$......+2AG.+2AG.+2AGGG	<975;:<<<<<
 seq3	200	A	20	,,,,,..,.-4CACC.-4CACC....,.,,.^~.	==<<<<<<<<<<<::<;2<<
+chr1	44520828	C	35	***********************************	!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 chr1	95935183	G	34	.,.,,,+1c,+1c.+1C,+1c,+1c.+1C.+1C.+1C,+1c.+1C,+1c,+1c.+1C,+1c,+1c.+1C,+1c.+1C.+1C,+1c,+1c,+1c.+1C.+1C.+1C.+1C.+1C^~c^~c	IFEGIHIBIIIBAGIDI@IHCIIGHIHHEIIG!!
 chr1	158571283	A	8	<,.c.c^~t^~c	GIC#I#'!
 chr6	125070485	C	216	<><<>>>>>>><<>>>><<<><<<<<>>>>><>>>>>><>>>>>>><><><>><><><>><>><<><<<>>>><<<>>>><<>>>><<<<<<<<<<<><<<<<<<><><<<<<><<<>><<<<<<<<>>>><<<<<<><><>>>>>>>>>>><<<<<<>><<<<>><><>>>>><>...,,,,,,,GGGGGGggggggGGGGggggggggg^~G^~g^~g^~g^~g	IHHH:GFDH<H@IHCEIIIIHIIIDI#EACGGIEHB#HDHEBHGGDI?BHIEIIIGHBHIEDIIHHHIGHFIIDI<IIIIEEIIIIGAHIHEGGIBHBIHE;@CFIBHGGCDBGDGGGIIDC@GEAGDIHIIEEHBIIGIEGHIHHIHHHH2@G#>EBHICD@#GHHHFI9HII#I##AIIIE5@G!!!!!!!!!#!!!!!!!!$$!!$!$!#!!!