# HG changeset patch # User Jim Johnson # Date 1361236307 21600 # Node ID bb0ac51f1b02780d3205f02180559a5d0fde17e2 # Parent 3890f8ba0e4d37393d46c4ab445ca0ef6af7313c Handle delection char: * diff -r 3890f8ba0e4d -r bb0ac51f1b02 pileup_to_vcf.py --- a/pileup_to_vcf.py Mon Feb 18 11:32:50 2013 -0500 +++ b/pileup_to_vcf.py Mon Feb 18 19:11:47 2013 -0600 @@ -49,7 +49,7 @@ parser.add_option( '-i', '--input', dest='input', help='The input pileup file (else read from stdin)' ) parser.add_option( '-o', '--output', dest='output', help='The output vcf file (else write to stdout)' ) # filters - parser.add_option( '-c', '--min_coverage', type='int', default='0', dest='min_coverage', help='The minimum read coverage depth to report a variant [default 0]' ) + parser.add_option( '-c', '--min_coverage', type='int', default='1', dest='min_coverage', help='The minimum read coverage depth to report a variant [default 1]' ) parser.add_option( '-r', '--report_depth', type='choice', choices=['all','ref','qual'], default='ref', dest='report_depth', help='Coverage depth as: ref - reads covercovering base, all - reads spanning base, qual - reads with base call above min_base_quality' ) parser.add_option( '-b', '--min_base_quality', type='int', default='0', dest='min_base_quality', help='The minimum base quality for a base call to be counted' ) parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' ) @@ -194,6 +194,8 @@ deletions[indel] += 1 else: deletions[indel] = 1 + elif base == '*' : # a deletion from a prior base + qi += 1 elif base == '^' : #start of read (followed by read quality char) read_mapping_qual = ord(bases[bi]) - 33 bi += 1 @@ -201,6 +203,8 @@ pass # no associated quality in either bases or quals dp = rdp if dp_as == 'ref' else qdp if dp_as == 'qual' else adp if debug: print >> sys.stderr, "snps: %s\tins: %s\tdel: %s" % (snps,insertions,deletions) + if dp < max(1,min_coverage) : + continue # output any variants that meet the depth coverage requirement vcf_pos = pos vcf_ref = ref diff -r 3890f8ba0e4d -r bb0ac51f1b02 test-data/test.pileup --- a/test-data/test.pileup Mon Feb 18 11:32:50 2013 -0500 +++ b/test-data/test.pileup Mon Feb 18 19:11:47 2013 -0600 @@ -8,6 +8,7 @@ seq1 279 C 23 A..T,,.,.,...,,,.,..... ;75&<<<<<<<<<=<<<9<<:<< seq2 156 A 11 .$......+2AG.+2AG.+2AGGG <975;:<<<<< seq3 200 A 20 ,,,,,..,.-4CACC.-4CACC....,.,,.^~. ==<<<<<<<<<<<::<;2<< +chr1 44520828 C 35 *********************************** !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! chr1 95935183 G 34 .,.,,,+1c,+1c.+1C,+1c,+1c.+1C.+1C.+1C,+1c.+1C,+1c,+1c.+1C,+1c,+1c.+1C,+1c.+1C.+1C,+1c,+1c,+1c.+1C.+1C.+1C.+1C.+1C^~c^~c IFEGIHIBIIIBAGIDI@IHCIIGHIHHEIIG!! chr1 158571283 A 8 <,.c.c^~t^~c GIC#I#'! chr6 125070485 C 216 <><<>>>>>>><<>>>><<<><<<<<>>>>><>>>>>><>>>>>>><><><>><><><>><>><<><<<>>>><<<>>>><<>>>><<<<<<<<<<<><<<<<<<><><<<<<><<<>><<<<<<<<>>>><<<<<<><><>>>>>>>>>>><<<<<<>><<<<>><><>>>>><>...,,,,,,,GGGGGGggggggGGGGggggggggg^~G^~g^~g^~g^~g IHHH:GFDHEBHICD@#GHHHFI9HII#I##AIIIE5@G!!!!!!!!!#!!!!!!!!$$!!$!$!#!!!