comparison pileup_to_vcf.py @ 1:bb0ac51f1b02

Handle delection char: *
author Jim Johnson <jj@umn.edu>
date Mon, 18 Feb 2013 19:11:47 -0600
parents 3890f8ba0e4d
children d6de2d1f4af9
comparison
equal deleted inserted replaced
0:3890f8ba0e4d 1:bb0ac51f1b02
47 parser = optparse.OptionParser() 47 parser = optparse.OptionParser()
48 # files 48 # files
49 parser.add_option( '-i', '--input', dest='input', help='The input pileup file (else read from stdin)' ) 49 parser.add_option( '-i', '--input', dest='input', help='The input pileup file (else read from stdin)' )
50 parser.add_option( '-o', '--output', dest='output', help='The output vcf file (else write to stdout)' ) 50 parser.add_option( '-o', '--output', dest='output', help='The output vcf file (else write to stdout)' )
51 # filters 51 # filters
52 parser.add_option( '-c', '--min_coverage', type='int', default='0', dest='min_coverage', help='The minimum read coverage depth to report a variant [default 0]' ) 52 parser.add_option( '-c', '--min_coverage', type='int', default='1', dest='min_coverage', help='The minimum read coverage depth to report a variant [default 1]' )
53 parser.add_option( '-r', '--report_depth', type='choice', choices=['all','ref','qual'], default='ref', dest='report_depth', help='Coverage depth as: ref - reads covercovering base, all - reads spanning base, qual - reads with base call above min_base_quality' ) 53 parser.add_option( '-r', '--report_depth', type='choice', choices=['all','ref','qual'], default='ref', dest='report_depth', help='Coverage depth as: ref - reads covercovering base, all - reads spanning base, qual - reads with base call above min_base_quality' )
54 parser.add_option( '-b', '--min_base_quality', type='int', default='0', dest='min_base_quality', help='The minimum base quality for a base call to be counted' ) 54 parser.add_option( '-b', '--min_base_quality', type='int', default='0', dest='min_base_quality', help='The minimum base quality for a base call to be counted' )
55 parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' ) 55 parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' )
56 parser.add_option( '-m', '--allow_multiples', action="store_true", dest='allow_multiples', default=False, help='Allow multiple alleles to be reported' ) 56 parser.add_option( '-m', '--allow_multiples', action="store_true", dest='allow_multiples', default=False, help='Allow multiple alleles to be reported' )
57 parser.add_option( '-s', '--snps_only', action="store_true", dest='snps_only', default=False, help='Only report SNPs, not indels' ) 57 parser.add_option( '-s', '--snps_only', action="store_true", dest='snps_only', default=False, help='Only report SNPs, not indels' )
192 max_deletion = max(indel_len,max_deletion) 192 max_deletion = max(indel_len,max_deletion)
193 if indel in deletions: 193 if indel in deletions:
194 deletions[indel] += 1 194 deletions[indel] += 1
195 else: 195 else:
196 deletions[indel] = 1 196 deletions[indel] = 1
197 elif base == '*' : # a deletion from a prior base
198 qi += 1
197 elif base == '^' : #start of read (followed by read quality char) 199 elif base == '^' : #start of read (followed by read quality char)
198 read_mapping_qual = ord(bases[bi]) - 33 200 read_mapping_qual = ord(bases[bi]) - 33
199 bi += 1 201 bi += 1
200 elif base == '$' : #end of read 202 elif base == '$' : #end of read
201 pass # no associated quality in either bases or quals 203 pass # no associated quality in either bases or quals
202 dp = rdp if dp_as == 'ref' else qdp if dp_as == 'qual' else adp 204 dp = rdp if dp_as == 'ref' else qdp if dp_as == 'qual' else adp
203 if debug: print >> sys.stderr, "snps: %s\tins: %s\tdel: %s" % (snps,insertions,deletions) 205 if debug: print >> sys.stderr, "snps: %s\tins: %s\tdel: %s" % (snps,insertions,deletions)
206 if dp < max(1,min_coverage) :
207 continue
204 # output any variants that meet the depth coverage requirement 208 # output any variants that meet the depth coverage requirement
205 vcf_pos = pos 209 vcf_pos = pos
206 vcf_ref = ref 210 vcf_ref = ref
207 suffix = '' 211 suffix = ''
208 alts = [] 212 alts = []