# HG changeset patch # User Jim Johnson # Date 1363716280 18000 # Node ID fafa105e5f585cecb025101a2ab8645c5db8fd29 # Parent aa76c8dd97e601c0f210d10685984827cb6978d3 Fix base quality diff -r aa76c8dd97e6 -r fafa105e5f58 pileup_to_vcf.py --- a/pileup_to_vcf.py Tue Mar 19 12:49:45 2013 -0500 +++ b/pileup_to_vcf.py Tue Mar 19 13:04:40 2013 -0500 @@ -155,12 +155,13 @@ mc = 0 # match count while bi < len(bases): base = bases[bi] - if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi,base,qi,quals[qi],ord(quals[qi]) - 33) + # if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi,base,qi,quals[qi],ord(quals[qi]) - 33) bi += 1 if base in '<>' : #reference skip (between paired reads) + if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) qi += 1 - pass; elif base in '.,' : # match reference on forward/reverse strand + if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) mc += 1 adp += 1 qual = ord(quals[qi]) -33 @@ -169,6 +170,7 @@ if qual >= min_base_quality: qdp += 1 elif base in 'ACGTNacgtn' : # SNP on forward/reverse strand + if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) adp += 1 qual = ord(quals[qi]) -33 qi += 1 @@ -183,6 +185,7 @@ bi += len(indel_len_str) # increment the index by the len of the len string indel_len = int(indel_len_str) indel = bases[bi:bi+indel_len].upper() # get the indel bases + if debug: print >> sys.stderr, "%3d\t%s%s" % (bi-2,base,indel) bi += indel_len # increment the index by the len of the indel if base == '+': # record insertion variant @@ -198,11 +201,14 @@ else: deletions[indel] = 1 elif base == '*' : # a deletion from a prior base + if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) qi += 1 elif base == '^' : #start of read (followed by read quality char) + if debug: print >> sys.stderr, "%3d\t%s%s" % (bi-1,base,bases[bi]) read_mapping_qual = ord(bases[bi]) - 33 bi += 1 elif base == '$' : #end of read + if debug: print >> sys.stderr, "%3d\t%s" % (bi-1,base) pass # no associated quality in either bases or quals dp = rdp if dp_as == 'ref' else qdp if dp_as == 'qual' else sdp if dp_as == 'source' else adp if debug: print >> sys.stderr, "match count: %d" % mc diff -r aa76c8dd97e6 -r fafa105e5f58 pileup_to_vcf.xml --- a/pileup_to_vcf.xml Tue Mar 19 12:49:45 2013 -0500 +++ b/pileup_to_vcf.xml Tue Mar 19 13:04:40 2013 -0500 @@ -1,4 +1,4 @@ - + Converts a pileup to VCF with filtering pileup_to_vcf.py -i $input_file -o $output_file #if $min_cvrg.__str__ != '':