Repository 'vcf_filter'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/vcf_filter

Changeset 0:da1a6f33b504 (2014-01-27)
Commit message:
Imported from capsule None
added:
bedClass.py
filter.py
filter.xml
test-data/test.small.vcf
test-data/test_filter_quality_9_DP_2000_lt.vcf
test-data/test_filter_quality_9_NS_360_gt.vcf
tools.py
vcfClass.py
vcfPytools.py
b
diff -r 000000000000 -r da1a6f33b504 bedClass.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bedClass.py Mon Jan 27 09:29:09 2014 -0500
[
@@ -0,0 +1,80 @@
+#!/usr/bin/python
+
+import os.path
+import sys
+
+class bed:
+  def __init__(self):
+    self.numberTargets = 0
+    self.referenceSequences = {}
+    self.referenceSequenceList = []
+
+  def openBed(self, filename):
+    if filename == "stdin": self.filehandle = sys.stdin
+    else:
+      try: self.filehandle = open(filename,"r")
+      except IOError:
+        print >> sys.stderr, "Failed to find file: ",filename
+        exit(1)
+
+# Get a bed record.
+  def getRecord(self):
+    self.record = self.filehandle.readline()
+    if not self.record: return False
+
+    self.numberTargets = self.numberTargets + 1
+    self.ref = ""
+    self.start = 0
+    self.end = 0
+
+# bed file should be 0-based, half-open, so the start coordinate
+# must be that in the bed file plus one.
+    entries = self.record.rstrip("\n").split("\t")
+    self.referenceSequence = entries[0]
+
+# Add the reference sequence to the dictionary.  If it didn't previously
+# exist append the reference sequence to the end of the list as well. 
+# This ensures that the order in which the reference sequences appeared
+# in the header can be preserved.
+    if self.referenceSequence not in self.referenceSequences:
+      self.referenceSequences[self.referenceSequence] = True
+      self.referenceSequenceList.append(self.referenceSequence)
+
+    try: self.start = int(entries[1]) + 1
+    except:
+      text = "start position need is not an integer"
+      self.generalError(text, "start", entries[1])
+
+    try: self.end = int(entries[2])
+    except:
+      text = "end position need is not an integer"
+      self.generalError(text, "end", entries[2])
+
+# Check that the record is a valid interval.
+    if self.end - self.start < 0:
+      print >> sys.stderr, "Invalid target interval:\n\t", self.record
+      exit(1)
+
+    return True
+
+# Parse through the bed file until the correct reference sequence is
+# encountered and the end position is greater than or equal to that requested.
+  def parseBed(self, referenceSequence, position):
+    success = True
+    if self.referenceSequence != referenceSequence:
+      while self.referenceSequence != referenceSequence and success: success = self.getRecord()
+
+    while self.referenceSequence == referenceSequence and self.end < position and success: success = self.getRecord()
+
+    return success
+
+# Close the bed file.
+  def closeBed(self, filename):
+    self.filehandle.close()
+
+# Define error messages for different handled errors.
+  def generalError(self, text, field, fieldValue):
+    print >> sys.stderr, "\nError encountered when attempting to read:"
+    if field != "": print >> sys.stderr, "\t", field, ":             ", fieldValue
+    print >> sys.stderr,  "\n", text
+    exit(1)
b
diff -r 000000000000 -r da1a6f33b504 filter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter.py Mon Jan 27 09:29:09 2014 -0500
[
@@ -0,0 +1,162 @@
+#!/usr/bin/python
+
+import os.path
+import sys
+import optparse
+
+import vcfClass
+from vcfClass import *
+
+import tools
+from tools import *
+
+if __name__ == "__main__":
+  main()
+
+def filterFail(text, file):
+  print >> sys.stderr, text
+  if file != None: os.remove(file)
+  exit(1)
+
+def main():
+
+# Parse the command line options
+  usage = "Usage: vcfPytools.py filter [options]"
+  parser = optparse.OptionParser(usage = usage)
+  parser.add_option("-i", "--in",
+                    action="store", type="string",
+                    dest="vcfFile", help="input vcf file")
+  parser.add_option("-o", "--out",
+                    action="store", type="string",
+                    dest="output", help="output vcf file")
+  parser.add_option("-q", "--quality",
+                    action="store", type="int",
+                    dest="quality", help="filter out SNPs with qualities lower than selected value")
+  parser.add_option("-n", "--info",
+                    action="append", type="string", nargs=3,
+                    dest="infoFilters", help="filter based on entries in the info string")
+  parser.add_option("-r", "--remove-genotypes",
+                    action="store_true", default=False,
+                    dest="removeGeno", help="remove the genotype strings from the vcf file")
+  parser.add_option("-m", "--mark-as-pass",
+                    action="store_true", default=False,
+                    dest="markPass", help="Mark all records as having passed filters")
+
+  (options, args) = parser.parse_args()
+
+# Check that a single vcf file is given.
+  if options.vcfFile == None:
+    parser.print_help()
+    print >> sys.stderr, "\nInput vcf file (-i, --input) is required for vcf filtering."
+    exit(1)
+
+# The --mark-as-pass option can only be used if no actual filters
+# have been specified.
+  if options.markPass and options.infoFilters:
+    print >> sys.stderr, "--mark-as-pass cannot be used in conjunction with filters."
+    exit(1)
+
+# Set the output file to stdout if no output file was specified.
+  outputFile, writeOut = setOutput(options.output) # tools.py
+
+  v = vcf() # Define vcf object.
+
+# Open the vcf file.
+  v.openVcf(options.vcfFile)
+
+# Read in the header information.
+  v.parseHeader(options.vcfFile, writeOut)
+  taskDescriptor = "##vcfPytools="
+  if options.infoFilters:
+    taskDescriptor += "filtered using the following filters: "
+    for filter, value, logic in options.infoFilters: taskDescriptor += str(filter) + str(value) + ","
+    taskDescriptor = taskDescriptor.rstrip(",")
+  if options.markPass: taskDescriptor += "marked all records as PASS"
+    
+  writeHeader(outputFile, v, options.removeGeno, taskDescriptor)
+
+# Check that specified filters from the info field are either integers or floats.
+  if options.infoFilters:
+    v.processInfo = True # Process the info string
+    filters = {}
+    filterValues = {}
+    filterLogic = {}
+    for filter, value, logic in options.infoFilters:
+      filterName = str(filter) + str(value)
+      if "-" in filter or "-" in value or "-" in logic:
+        print >> sys.stderr, "\n--info (-n) requires three arguments, for example:"
+        print >> sys.stderr, "\t--info DP 5 lt: filter records with DP less than (lt) 5.\n"
+        print >> sys.stderr, "allowed logic arguments:\n\tgt: greater than\n\tlt: less than."
+        print >> sys.stderr, "\nError in:", filter
+        exit(1)
+      if logic != "gt" and logic != "lt":
+        print >> sys.stderr, "\nfilter logic not recognised."
+        print >> sys.stderr, "allowed logic arguments:\n\tgt: greater than\n\tlt: less than."
+        print >> sys.stderr, "\nError in:", filter
+        exit(1)
+      if v.infoHeaderTags.has_key(filter):
+        if v.infoHeaderTags[filter][1].lower() == "integer":
+          try:
+            filters[filterName] = filter
+            filterValues[filterName] = int(value)
+            filterLogic[filterName] = logic
+            #filterLogic[filterName] = logic
+          except ValueError:
+            text = "Filter " + filter + " requires an integer entry, not " + str(type(value))
+            filterFail(text, options.output)
+
+        if v.infoHeaderTags[filter][1].lower() == "float":
+          try:
+            filters[filterName] = filter
+            filterValues[filterName] = float(value)
+            filterLogic[filterName] = logic
+            #filters[filterName] = float(value)
+            #filterLogic[filterName] = logic
+          except ValueError:
+            text = "Filter " + filter + " requires an float entry, not " + str(type(value))
+            filterFail(text, options.output)
+
+      else:
+        text = "Filter " + filter + " has no explanation in the header.  Unknown type for the entry."
+        filterFail(text, options.output)
+
+# Parse the vcf file and check if any of the filters are failed.  If
+# so, build up a string of failed filters.
+  while v.getRecord():
+    filterString = ""
+
+# Mark the record as "PASS" if --mark-as-pass was applied.
+    if options.markPass: v.filters = "PASS"
+
+# Check for quality filtering.
+    if options.quality != None:
+      if v.quality < options.quality:
+        filterString = filterString + ";" + "Q" + str(options.quality) if filterString != "" else "Q" + str(options.quality)
+
+# Check for filtering on info string filters.
+    if options.infoFilters:
+      for filterName, filter in filters.iteritems():
+        value = filterValues[filterName]
+        logic = filterLogic[filterName]
+        if v.infoTags.has_key(filter):
+          if type(value) == int:
+            if logic == "lt" and int(v.infoTags[filter]) < value:
+              filterString = filterString + ";" + filter + str(value) if filterString != "" else filter + str(value)
+            if logic == "gt" and int(v.infoTags[filter]) > value:
+              filterString = filterString + ";" + filter + str(value) if filterString != "" else filter + str(value)
+          elif type(value) == float:
+            if logic == "lt" and float(v.infoTags[filter]) < value:
+              filterString = filterString + ";" + filter + str(value) if filterString != "" else filter + str(value)
+            if logic == "gt" and float(v.infoTags[filter]) > value:
+              filterString = filterString + ";" + filter + str(value) if filterString != "" else filter + str(value)
+
+    filterString = "PASS" if filterString == "" else filterString
+    v.filters = filterString
+    record = v.buildRecord(options.removeGeno)
+    outputFile.write(record)
+
+# Close the vcf files.
+  v.closeVcf(options.vcfFile)
+
+# Terminate the program.
+  return 0
b
diff -r 000000000000 -r da1a6f33b504 filter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter.xml Mon Jan 27 09:29:09 2014 -0500
[
@@ -0,0 +1,64 @@
+<tool id="vcf_filter" name="Filter" version="1.0.0">
+  <description>a VCF file</description>
+  <command interpreter="python">
+    vcfPytools.py
+      filter 
+      --in=$input1
+      --out=$output1
+      --quality=$quality
+      #for $i in $info_filter:
+        --info ${i.info}
+      #end for
+      $remove_genotypes
+      $mark_as_pass
+  </command>
+  <inputs>
+    <param name="input1" label="VCF file" type="data" format="vcf" />
+    <param name="quality" label="Filter by quality" type="integer" value='' help="Filter out SNPs with qualities lower than selected value" />
+    <repeat name="info_filter" title="Filter based on entries in the info string">
+      <param name="info" label="Filter" type="text" value='' help='This option takes three values: the info string tag, the cutoff value and whether to filter out those records with less than (lt) or greater than (gt) this value.  For example: DP 10 lt ' />
+    </repeat>
+    <param name="remove_genotypes" label="Remove the genotype strings" type="boolean" truevalue="--remove-genotypes" falsevalue="" checked="False" />
+    <param name="mark_as_pass" label="Mark all records as having passed filters" type="boolean" truevalue="--mark-as-pass" falsevalue="" checked="False" />
+  </inputs>
+  <tests>
+    <test>
+      <param name="input1" value="test.small.vcf" ftype="vcf" />
+      <param name="quality" value="9" />
+      <param name="info" value="NS 360 gt"/>
+      <param name="remove_genotypes" value="" />
+      <param name="mark_as_pass" value="" />
+      <output name="output" file="test_filter_quality_9_NS_360_gt.vcf" lines_diff="6" ftype="vcf" />
+    </test>
+    <test>
+      <param name="input1" value="test.small.vcf" ftype="vcf" />
+      <param name="quality" value="9" />
+      <param name="info" value="DP 2000 lt"/>
+      <param name="remove_genotypes" value="" />
+      <param name="mark_as_pass" value="" />
+      <output name="output" file="test_filter_quality_9_DP_2000_lt.vcf" lines_diff="6" ftype="vcf" />
+    </test>
+  </tests>
+  <outputs>
+    <data format="vcf" name="output1" label="${tool.name} ${on_string}" />
+  </outputs>
+  <help>
+
+**What it does**
+
+This tool uses vcfPytools_' filter command
+
+.. _vcfPytools: https://github.com/AlistairNWard/vcfPytools
+
+Quality option will check the variant quality for each record and if it is below the defined value, the filter field will be populated with the filter entry Q[value].
+
+Any value in the info string can be used for filtering by using the 'Filter by info' option.  This option takes three values: the info string tag, the cutoff value and whether to filter out those records with less than (lt) or greater than (gt) this value.  For example:
+
+  DP 10 lt 
+
+would filter out all varianta with a depth (DP) less than 10 and the filter field would be populated with DP10.
+
+This option can be defined as many times as required.
+
+  </help>
+</tool>
b
diff -r 000000000000 -r da1a6f33b504 test-data/test.small.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.small.vcf Mon Jan 27 09:29:09 2014 -0500
b
b'@@ -0,0 +1,1000 @@\n+##fileformat=VCFv4.0\n+##fileDate=20110215\n+##source=freeBayes version 0.4.1\n+##reference=/d2/data/references/build_37/human_reference_v37.fa\n+##phasing=none\n+##commandline="/share/software/freebayes/bin/freebayes --stdin --no-filters --min-alternate-count 2 --posterior-integration-depth 100 --pvar 0.01 --vcf /d1/data/1000G/20101123/filteringTests/AFR/mosaik/noFilters/Pipeline/none//freebayes/freebayes.20:0-1000000.baq.20110210.vcf --fasta-reference /d2/data/references/build_37/human_reference_v37.fa --region 20:0..1000000"\n+##vcfPytools=merge freebayes.20:0-1000000.baq.20110210.vcf, freebayes.20:1000000-2000000.baq.20110210.vcf, freebayes.20:2000000-3000000.baq.20110210.vcf, freebayes.20:3000000-4000000.baq.20110210.vcf, freebayes.20:4000000-5000000.baq.20110210.vcf, freebayes.20:5000000-6000000.baq.20110210.vcf, freebayes.20:6000000-7000000.baq.20110210.vcf, freebayes.20:7000000-8000000.baq.20110210.vcf, freebayes.20:8000000-9000000.baq.20110210.vcf, freebayes.20:9000000-10000000.baq.20110210.vcf, freebayes.20:10000000-11000000.baq.20110210.vcf, freebayes.20:11000000-12000000.baq.20110210.vcf, freebayes.20:12000000-13000000.baq.20110210.vcf, freebayes.20:13000000-14000000.baq.20110210.vcf, freebayes.20:14000000-15000000.baq.20110210.vcf, freebayes.20:15000000-16000000.baq.20110210.vcf, freebayes.20:16000000-17000000.baq.20110210.vcf, freebayes.20:17000000-18000000.baq.20110210.vcf, freebayes.20:18000000-19000000.baq.20110210.vcf, freebayes.20:19000000-20000000.baq.20110210.vcf, freebayes.20:20000000-21000000.baq.20110210.vcf, freebayes.20:21000000-22000000.baq.20110210.vcf, freebayes.20:22000000-23000000.baq.20110210.vcf, freebayes.20:23000000-24000000.baq.20110210.vcf, freebayes.20:24000000-25000000.baq.20110210.vcf, freebayes.20:25000000-26000000.baq.20110210.vcf, freebayes.20:26000000-27000000.baq.20110210.vcf, freebayes.20:27000000-28000000.baq.20110210.vcf, freebayes.20:28000000-29000000.baq.20110210.vcf, freebayes.20:29000000-30000000.baq.20110210.vcf, freebayes.20:30000000-31000000.baq.20110210.vcf, freebayes.20:31000000-32000000.baq.20110210.vcf, freebayes.20:32000000-33000000.baq.20110210.vcf, freebayes.20:33000000-34000000.baq.20110210.vcf, freebayes.20:34000000-35000000.baq.20110210.vcf, freebayes.20:35000000-36000000.baq.20110210.vcf, freebayes.20:36000000-37000000.baq.20110210.vcf, freebayes.20:37000000-38000000.baq.20110210.vcf, freebayes.20:38000000-39000000.baq.20110210.vcf, freebayes.20:39000000-40000000.baq.20110210.vcf, freebayes.20:40000000-41000000.baq.20110210.vcf, freebayes.20:41000000-42000000.baq.20110210.vcf, freebayes.20:42000000-43000000.baq.20110210.vcf, freebayes.20:43000000-44000000.baq.20110210.vcf, freebayes.20:44000000-45000000.baq.20110210.vcf, freebayes.20:45000000-46000000.baq.20110210.vcf, freebayes.20:46000000-47000000.baq.20110210.vcf, freebayes.20:47000000-48000000.baq.20110210.vcf, freebayes.20:48000000-49000000.baq.20110210.vcf, freebayes.20:49000000-50000000.baq.20110210.vcf, freebayes.20:50000000-51000000.baq.20110210.vcf, freebayes.20:51000000-52000000.baq.20110210.vcf, freebayes.20:52000000-53000000.baq.20110210.vcf, freebayes.20:53000000-54000000.baq.20110210.vcf, freebayes.20:54000000-55000000.baq.20110210.vcf, freebayes.20:55000000-56000000.baq.20110210.vcf, freebayes.20:56000000-57000000.baq.20110210.vcf, freebayes.20:57000000-58000000.baq.20110210.vcf, freebayes.20:58000000-59000000.baq.20110210.vcf, freebayes.20:59000000-60000000.baq.20110210.vcf, freebayes.20:60000000-61000000.baq.20110210.vcf, freebayes.20:61000000-62000000.baq.20110210.vcf, freebayes.20:62000000-63000000.baq.20110210.vcf, freebayes.20:63000000-63025520.baq.20110210.vcf\n+##vcfPytools=\n+##INFO=<ID=AB,Number=1,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">\n+##INFO=<ID=DEL,Number=1,Type=Integer,Description="Len'..b'74;RA=2141;AA=3;SR=1118|1023;SA=1|2;SB=0.33333;AB=0.75;ABR=6;ABA=2;RUN=2;SNP;TV\n+20\t124059\t.\tC\tT\t37.8\tPASS\tNS=358;DP=1984;AC=3;AN=716;AF=0.0041899;RA=1971;AA=7;SR=971|1000;SA=5|2;SB=0.71429;AB=0.44444;ABR=4;ABA=5;RUN=1;SNP;TS;CpG\n+20\t124060\t.\tG\tA\t34.6\tPASS\tNS=359;DP=1991;AC=1;AN=718;AF=0.0013928;RA=1983;AA=3;SR=995|988;SA=2|1;SB=0.66667;AB=0.5;ABR=3;ABA=3;RUN=2;SNP;TS;CpG\n+20\t124137\t.\tC\tA\t0.0701\tPASS\tNS=357;DP=1933;AC=1;AN=714;AF=0.0014006;RA=1922;AA=4;SR=990|932;SA=0|4;SB=0;AB=0.5;ABR=1;ABA=1;RUN=2;SNP;TV\n+20\t124148\t.\tA\tG\t0.344\tPASS\tNS=358;DP=1957;AC=2;AN=716;AF=0.0027933;RA=1947;AA=6;SR=989|958;SA=1|5;SB=0.16667;AB=0.8;ABR=12;ABA=3;RUN=1;SNP;TS\n+20\t124229\t.\tT\tC\t0.672\tPASS\tNS=356;DP=1979;AC=1;AN=712;AF=0.0014045;RA=1972;AA=4;SR=1025|947;SA=1|3;SB=0.25;AB=0.8;ABR=8;ABA=2;RUN=1;SNP;TS\n+20\t124230\t.\tA\tG\t0.0816\tPASS\tNS=356;DP=1970;AC=1;AN=712;AF=0.0014045;RA=1959;AA=4;SR=1026|933;SA=1|3;SB=0.25;AB=0.81818;ABR=9;ABA=2;RUN=1;SNP;TS\n+20\t124249\t.\tA\tG\t99.0\tPASS\tNS=359;DP=1969;AC=41;AN=718;AF=0.057103;RA=1864;AA=101;SR=974|890;SA=55|46;SB=0.54455;AB=0.51977;ABR=92;ABA=85;RUN=1;SNP;TS;CpG\n+20\t124302\t.\tA\tC\t99.0\tPASS\tNS=358;DP=2007;AC=319;AN=716;AF=0.44553;RA=1185;AA=819;SR=577|608;SA=392|427;SB=0.47863;AB=0.51934;ABR=376;ABA=348;RUN=2;SNP;TV\n+20\t124335\t.\tC\tT\t99.0\tPASS\tNS=361;DP=2035;AC=4;AN=722;AF=0.0055402;RA=2013;AA=15;SR=929|1084;SA=10|5;SB=0.66667;AB=0.3913;ABR=9;ABA=14;RUN=1;SNP;TS\n+20\t124412\t.\tC\tT\t99.0\tPASS\tNS=357;DP=2005;AC=307;AN=714;AF=0.42997;RA=1207;AA=795;SR=602|605;SA=400|395;SB=0.50314;AB=0.51286;ABR=379;ABA=360;RUN=1;SNP;TS\n+20\t124511\t.\tT\tA\t0.647\tPASS\tNS=365;DP=2141;AC=1;AN=730;AF=0.0013699;RA=2133;AA=4;SR=1077|1056;SA=2|2;SB=0.5;AB=0.66667;ABR=4;ABA=2;RUN=2;SNP;TV\n+20\t124607\t.\tA\tG\t2.28\tPASS\tNS=357;DP=1872;AC=1;AN=714;AF=0.0014006;RA=1865;AA=2;SR=930|935;SA=1|1;SB=0.5;AB=0.75;ABR=6;ABA=2;RUN=1;SNP;TS\n+20\t124737\t.\tA\tC\t99.0\tPASS\tNS=367;DP=2024;AC=4;AN=734;AF=0.0054496;RA=2003;AA=16;SR=1046|957;SA=11|5;SB=0.6875;AB=0.26316;ABR=5;ABA=14;RUN=1;SNP;TV\n+20\t124827\t.\tT\tC\t1.7\tPASS\tNS=367;DP=2059;AC=1;AN=734;AF=0.0013624;RA=2048;AA=4;SR=987|1061;SA=2|2;SB=0.5;AB=0.6;ABR=3;ABA=2;RUN=2;SNP;TS\n+20\t124848\t.\tA\tG\t36.5\tPASS\tNS=364;DP=2077;AC=1;AN=728;AF=0.0013736;RA=2060;AA=6;SR=966|1094;SA=2|4;SB=0.33333;AB=0.57143;ABR=4;ABA=3;RUN=1;SNP;TS\n+20\t124878\t.\tG\tA\t0.22\tPASS\tNS=361;DP=2060;AC=1;AN=722;AF=0.001385;RA=2049;AA=4;SR=974|1075;SA=3|1;SB=0.75;AB=0.77778;ABR=7;ABA=2;RUN=1;SNP;TS\n+20\t124920\t.\tA\tG\t76.0\tPASS\tNS=361;DP=1956;AC=1;AN=722;AF=0.001385;RA=1949;AA=6;SR=984|965;SA=2|4;SB=0.33333;AB=0.14286;ABR=1;ABA=6;RUN=1;SNP;TS;CpG\n+20\t124942\t.\tA\tT\t1.44\tPASS\tNS=362;DP=2015;AC=1;AN=724;AF=0.0013812;RA=2007;AA=4;SR=1107|900;SA=1|3;SB=0.25;AB=0.6;ABR=3;ABA=2;RUN=1;SNP;TV\n+20\t124998\t.\tT\tC\t99.0\tPASS\tNS=366;DP=2204;AC=29;AN=732;AF=0.039617;RA=2131;AA=66;SR=1132|999;SA=36|30;SB=0.54545;AB=0.54902;ABR=56;ABA=46;RUN=1;SNP;TS\n+20\t125096\t.\tC\tG\t4.18\tPASS\tNS=363;DP=2095;AC=1;AN=726;AF=0.0013774;RA=2082;AA=2;SR=1071|1011;SA=2|0;SB=1;AB=0.75;ABR=6;ABA=2;RUN=1;SNP;TV\n+20\t125109\t.\tA\tT\t0.0858\tPASS\tNS=365;DP=2091;AC=2;AN=730;AF=0.0027397;RA=2080;AA=7;SR=1103|977;SA=7|0;SB=1;AB=0.66667;ABR=6;ABA=3;RUN=8;SNP;TV\n+20\t125121\t.\tC\tT\t99.0\tPASS\tNS=364;DP=2075;AC=208;AN=728;AF=0.28571;RA=1472;AA=599;SR=826|646;SA=335|264;SB=0.55927;AB=0.49868;ABR=377;ABA=378;RUN=1;SNP;TS\n+20\t125179\t.\tC\tA\t99.0\tPASS\tNS=368;DP=2155;AC=7;AN=736;AF=0.0095109;RA=2127;AA=26;SR=1123|1004;SA=16|10;SB=0.61538;AB=0.36842;ABR=14;ABA=24;RUN=2;SNP;TV\n+20\t125266\t.\tT\tA\t3.47\tPASS\tNS=364;DP=2138;AC=1;AN=728;AF=0.0013736;RA=2123;AA=5;SR=907|1216;SA=1|4;SB=0.2;AB=0.6;ABR=3;ABA=2;RUN=1;SNP;TV\n+20\t125381\t.\tA\tG\t4.69\tPASS\tNS=362;DP=2102;AC=1;AN=724;AF=0.0013812;RA=2094;AA=3;SR=919|1175;SA=1|2;SB=0.33333;AB=0.66667;ABR=4;ABA=2;RUN=1;SNP;TS\n+20\t125450\t.\tG\tA\t0.364\tPASS\tNS=359;DP=1881;AC=1;AN=718;AF=0.0013928;RA=1868;AA=5;SR=862|1006;SA=1|4;SB=0.2;AB=0.71429;ABR=5;ABA=2;RUN=2;SNP;TS\n+20\t125470\t.\tA\tG\t0.602\tPASS\tNS=360;DP=1870;AC=1;AN=720;AF=0.0013889;RA=1855;AA=5;SR=909|946;SA=3|2;SB=0.6;AB=0.75;ABR=6;ABA=2;RUN=1;SNP;TS\n'
b
diff -r 000000000000 -r da1a6f33b504 test-data/test_filter_quality_9_DP_2000_lt.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_filter_quality_9_DP_2000_lt.vcf Mon Jan 27 09:29:09 2014 -0500
b
b'@@ -0,0 +1,1001 @@\n+##fileformat=VCFv4.0\n+##fileDate=20110215\n+##source=freeBayes version 0.4.1\n+##reference=/d2/data/references/build_37/human_reference_v37.fa\n+##phasing=none\n+##commandline="/share/software/freebayes/bin/freebayes --stdin --no-filters --min-alternate-count 2 --posterior-integration-depth 100 --pvar 0.01 --vcf /d1/data/1000G/20101123/filteringTests/AFR/mosaik/noFilters/Pipeline/none//freebayes/freebayes.20:0-1000000.baq.20110210.vcf --fasta-reference /d2/data/references/build_37/human_reference_v37.fa --region 20:0..1000000"\n+##vcfPytools=merge freebayes.20:0-1000000.baq.20110210.vcf, freebayes.20:1000000-2000000.baq.20110210.vcf, freebayes.20:2000000-3000000.baq.20110210.vcf, freebayes.20:3000000-4000000.baq.20110210.vcf, freebayes.20:4000000-5000000.baq.20110210.vcf, freebayes.20:5000000-6000000.baq.20110210.vcf, freebayes.20:6000000-7000000.baq.20110210.vcf, freebayes.20:7000000-8000000.baq.20110210.vcf, freebayes.20:8000000-9000000.baq.20110210.vcf, freebayes.20:9000000-10000000.baq.20110210.vcf, freebayes.20:10000000-11000000.baq.20110210.vcf, freebayes.20:11000000-12000000.baq.20110210.vcf, freebayes.20:12000000-13000000.baq.20110210.vcf, freebayes.20:13000000-14000000.baq.20110210.vcf, freebayes.20:14000000-15000000.baq.20110210.vcf, freebayes.20:15000000-16000000.baq.20110210.vcf, freebayes.20:16000000-17000000.baq.20110210.vcf, freebayes.20:17000000-18000000.baq.20110210.vcf, freebayes.20:18000000-19000000.baq.20110210.vcf, freebayes.20:19000000-20000000.baq.20110210.vcf, freebayes.20:20000000-21000000.baq.20110210.vcf, freebayes.20:21000000-22000000.baq.20110210.vcf, freebayes.20:22000000-23000000.baq.20110210.vcf, freebayes.20:23000000-24000000.baq.20110210.vcf, freebayes.20:24000000-25000000.baq.20110210.vcf, freebayes.20:25000000-26000000.baq.20110210.vcf, freebayes.20:26000000-27000000.baq.20110210.vcf, freebayes.20:27000000-28000000.baq.20110210.vcf, freebayes.20:28000000-29000000.baq.20110210.vcf, freebayes.20:29000000-30000000.baq.20110210.vcf, freebayes.20:30000000-31000000.baq.20110210.vcf, freebayes.20:31000000-32000000.baq.20110210.vcf, freebayes.20:32000000-33000000.baq.20110210.vcf, freebayes.20:33000000-34000000.baq.20110210.vcf, freebayes.20:34000000-35000000.baq.20110210.vcf, freebayes.20:35000000-36000000.baq.20110210.vcf, freebayes.20:36000000-37000000.baq.20110210.vcf, freebayes.20:37000000-38000000.baq.20110210.vcf, freebayes.20:38000000-39000000.baq.20110210.vcf, freebayes.20:39000000-40000000.baq.20110210.vcf, freebayes.20:40000000-41000000.baq.20110210.vcf, freebayes.20:41000000-42000000.baq.20110210.vcf, freebayes.20:42000000-43000000.baq.20110210.vcf, freebayes.20:43000000-44000000.baq.20110210.vcf, freebayes.20:44000000-45000000.baq.20110210.vcf, freebayes.20:45000000-46000000.baq.20110210.vcf, freebayes.20:46000000-47000000.baq.20110210.vcf, freebayes.20:47000000-48000000.baq.20110210.vcf, freebayes.20:48000000-49000000.baq.20110210.vcf, freebayes.20:49000000-50000000.baq.20110210.vcf, freebayes.20:50000000-51000000.baq.20110210.vcf, freebayes.20:51000000-52000000.baq.20110210.vcf, freebayes.20:52000000-53000000.baq.20110210.vcf, freebayes.20:53000000-54000000.baq.20110210.vcf, freebayes.20:54000000-55000000.baq.20110210.vcf, freebayes.20:55000000-56000000.baq.20110210.vcf, freebayes.20:56000000-57000000.baq.20110210.vcf, freebayes.20:57000000-58000000.baq.20110210.vcf, freebayes.20:58000000-59000000.baq.20110210.vcf, freebayes.20:59000000-60000000.baq.20110210.vcf, freebayes.20:60000000-61000000.baq.20110210.vcf, freebayes.20:61000000-62000000.baq.20110210.vcf, freebayes.20:62000000-63000000.baq.20110210.vcf, freebayes.20:63000000-63025520.baq.20110210.vcf\n+##vcfPytools=\n+##vcfPytools=filtered using the following filters: DP2000\n+##INFO=<ID=REPEAT,Number=1,Type=String,Description="Description of the local repeat structures flanking the current position">\n+##INFO=<ID=DEL,Number=1,Type=Integer,Description="Length of deletion allele, if present">\n+##INFO=<ID=INS,Number=1,Ty'..b'3;SA=1|2;SB=0.33333;AB=0.75;ABR=6;ABA=2;RUN=2;SNP;TV\n+20\t124059\t.\tC\tT\t37.8\tDP2000\tNS=358;DP=1984;AC=3;AN=716;AF=0.0041899;RA=1971;AA=7;SR=971|1000;SA=5|2;SB=0.71429;AB=0.44444;ABR=4;ABA=5;RUN=1;SNP;TS;CpG\n+20\t124060\t.\tG\tA\t34.6\tDP2000\tNS=359;DP=1991;AC=1;AN=718;AF=0.0013928;RA=1983;AA=3;SR=995|988;SA=2|1;SB=0.66667;AB=0.5;ABR=3;ABA=3;RUN=2;SNP;TS;CpG\n+20\t124137\t.\tC\tA\t0.0701\tQ9;DP2000\tNS=357;DP=1933;AC=1;AN=714;AF=0.0014006;RA=1922;AA=4;SR=990|932;SA=0|4;SB=0;AB=0.5;ABR=1;ABA=1;RUN=2;SNP;TV\n+20\t124148\t.\tA\tG\t0.344\tQ9;DP2000\tNS=358;DP=1957;AC=2;AN=716;AF=0.0027933;RA=1947;AA=6;SR=989|958;SA=1|5;SB=0.16667;AB=0.8;ABR=12;ABA=3;RUN=1;SNP;TS\n+20\t124229\t.\tT\tC\t0.672\tQ9;DP2000\tNS=356;DP=1979;AC=1;AN=712;AF=0.0014045;RA=1972;AA=4;SR=1025|947;SA=1|3;SB=0.25;AB=0.8;ABR=8;ABA=2;RUN=1;SNP;TS\n+20\t124230\t.\tA\tG\t0.0816\tQ9;DP2000\tNS=356;DP=1970;AC=1;AN=712;AF=0.0014045;RA=1959;AA=4;SR=1026|933;SA=1|3;SB=0.25;AB=0.81818;ABR=9;ABA=2;RUN=1;SNP;TS\n+20\t124249\t.\tA\tG\t99.0\tDP2000\tNS=359;DP=1969;AC=41;AN=718;AF=0.057103;RA=1864;AA=101;SR=974|890;SA=55|46;SB=0.54455;AB=0.51977;ABR=92;ABA=85;RUN=1;SNP;TS;CpG\n+20\t124302\t.\tA\tC\t99.0\tPASS\tNS=358;DP=2007;AC=319;AN=716;AF=0.44553;RA=1185;AA=819;SR=577|608;SA=392|427;SB=0.47863;AB=0.51934;ABR=376;ABA=348;RUN=2;SNP;TV\n+20\t124335\t.\tC\tT\t99.0\tPASS\tNS=361;DP=2035;AC=4;AN=722;AF=0.0055402;RA=2013;AA=15;SR=929|1084;SA=10|5;SB=0.66667;AB=0.3913;ABR=9;ABA=14;RUN=1;SNP;TS\n+20\t124412\t.\tC\tT\t99.0\tPASS\tNS=357;DP=2005;AC=307;AN=714;AF=0.42997;RA=1207;AA=795;SR=602|605;SA=400|395;SB=0.50314;AB=0.51286;ABR=379;ABA=360;RUN=1;SNP;TS\n+20\t124511\t.\tT\tA\t0.647\tQ9\tNS=365;DP=2141;AC=1;AN=730;AF=0.0013699;RA=2133;AA=4;SR=1077|1056;SA=2|2;SB=0.5;AB=0.66667;ABR=4;ABA=2;RUN=2;SNP;TV\n+20\t124607\t.\tA\tG\t2.28\tQ9;DP2000\tNS=357;DP=1872;AC=1;AN=714;AF=0.0014006;RA=1865;AA=2;SR=930|935;SA=1|1;SB=0.5;AB=0.75;ABR=6;ABA=2;RUN=1;SNP;TS\n+20\t124737\t.\tA\tC\t99.0\tPASS\tNS=367;DP=2024;AC=4;AN=734;AF=0.0054496;RA=2003;AA=16;SR=1046|957;SA=11|5;SB=0.6875;AB=0.26316;ABR=5;ABA=14;RUN=1;SNP;TV\n+20\t124827\t.\tT\tC\t1.7\tQ9\tNS=367;DP=2059;AC=1;AN=734;AF=0.0013624;RA=2048;AA=4;SR=987|1061;SA=2|2;SB=0.5;AB=0.6;ABR=3;ABA=2;RUN=2;SNP;TS\n+20\t124848\t.\tA\tG\t36.5\tPASS\tNS=364;DP=2077;AC=1;AN=728;AF=0.0013736;RA=2060;AA=6;SR=966|1094;SA=2|4;SB=0.33333;AB=0.57143;ABR=4;ABA=3;RUN=1;SNP;TS\n+20\t124878\t.\tG\tA\t0.22\tQ9\tNS=361;DP=2060;AC=1;AN=722;AF=0.001385;RA=2049;AA=4;SR=974|1075;SA=3|1;SB=0.75;AB=0.77778;ABR=7;ABA=2;RUN=1;SNP;TS\n+20\t124920\t.\tA\tG\t76.0\tDP2000\tNS=361;DP=1956;AC=1;AN=722;AF=0.001385;RA=1949;AA=6;SR=984|965;SA=2|4;SB=0.33333;AB=0.14286;ABR=1;ABA=6;RUN=1;SNP;TS;CpG\n+20\t124942\t.\tA\tT\t1.44\tQ9\tNS=362;DP=2015;AC=1;AN=724;AF=0.0013812;RA=2007;AA=4;SR=1107|900;SA=1|3;SB=0.25;AB=0.6;ABR=3;ABA=2;RUN=1;SNP;TV\n+20\t124998\t.\tT\tC\t99.0\tPASS\tNS=366;DP=2204;AC=29;AN=732;AF=0.039617;RA=2131;AA=66;SR=1132|999;SA=36|30;SB=0.54545;AB=0.54902;ABR=56;ABA=46;RUN=1;SNP;TS\n+20\t125096\t.\tC\tG\t4.18\tQ9\tNS=363;DP=2095;AC=1;AN=726;AF=0.0013774;RA=2082;AA=2;SR=1071|1011;SA=2|0;SB=1;AB=0.75;ABR=6;ABA=2;RUN=1;SNP;TV\n+20\t125109\t.\tA\tT\t0.0858\tQ9\tNS=365;DP=2091;AC=2;AN=730;AF=0.0027397;RA=2080;AA=7;SR=1103|977;SA=7|0;SB=1;AB=0.66667;ABR=6;ABA=3;RUN=8;SNP;TV\n+20\t125121\t.\tC\tT\t99.0\tPASS\tNS=364;DP=2075;AC=208;AN=728;AF=0.28571;RA=1472;AA=599;SR=826|646;SA=335|264;SB=0.55927;AB=0.49868;ABR=377;ABA=378;RUN=1;SNP;TS\n+20\t125179\t.\tC\tA\t99.0\tPASS\tNS=368;DP=2155;AC=7;AN=736;AF=0.0095109;RA=2127;AA=26;SR=1123|1004;SA=16|10;SB=0.61538;AB=0.36842;ABR=14;ABA=24;RUN=2;SNP;TV\n+20\t125266\t.\tT\tA\t3.47\tQ9\tNS=364;DP=2138;AC=1;AN=728;AF=0.0013736;RA=2123;AA=5;SR=907|1216;SA=1|4;SB=0.2;AB=0.6;ABR=3;ABA=2;RUN=1;SNP;TV\n+20\t125381\t.\tA\tG\t4.69\tQ9\tNS=362;DP=2102;AC=1;AN=724;AF=0.0013812;RA=2094;AA=3;SR=919|1175;SA=1|2;SB=0.33333;AB=0.66667;ABR=4;ABA=2;RUN=1;SNP;TS\n+20\t125450\t.\tG\tA\t0.364\tQ9;DP2000\tNS=359;DP=1881;AC=1;AN=718;AF=0.0013928;RA=1868;AA=5;SR=862|1006;SA=1|4;SB=0.2;AB=0.71429;ABR=5;ABA=2;RUN=2;SNP;TS\n+20\t125470\t.\tA\tG\t0.602\tQ9;DP2000\tNS=360;DP=1870;AC=1;AN=720;AF=0.0013889;RA=1855;AA=5;SR=909|946;SA=3|2;SB=0.6;AB=0.75;ABR=6;ABA=2;RUN=1;SNP;TS\n'
b
diff -r 000000000000 -r da1a6f33b504 test-data/test_filter_quality_9_NS_360_gt.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_filter_quality_9_NS_360_gt.vcf Mon Jan 27 09:29:09 2014 -0500
b
b'@@ -0,0 +1,1001 @@\n+##fileformat=VCFv4.0\n+##fileDate=20110215\n+##source=freeBayes version 0.4.1\n+##reference=/d2/data/references/build_37/human_reference_v37.fa\n+##phasing=none\n+##commandline="/share/software/freebayes/bin/freebayes --stdin --no-filters --min-alternate-count 2 --posterior-integration-depth 100 --pvar 0.01 --vcf /d1/data/1000G/20101123/filteringTests/AFR/mosaik/noFilters/Pipeline/none//freebayes/freebayes.20:0-1000000.baq.20110210.vcf --fasta-reference /d2/data/references/build_37/human_reference_v37.fa --region 20:0..1000000"\n+##vcfPytools=merge freebayes.20:0-1000000.baq.20110210.vcf, freebayes.20:1000000-2000000.baq.20110210.vcf, freebayes.20:2000000-3000000.baq.20110210.vcf, freebayes.20:3000000-4000000.baq.20110210.vcf, freebayes.20:4000000-5000000.baq.20110210.vcf, freebayes.20:5000000-6000000.baq.20110210.vcf, freebayes.20:6000000-7000000.baq.20110210.vcf, freebayes.20:7000000-8000000.baq.20110210.vcf, freebayes.20:8000000-9000000.baq.20110210.vcf, freebayes.20:9000000-10000000.baq.20110210.vcf, freebayes.20:10000000-11000000.baq.20110210.vcf, freebayes.20:11000000-12000000.baq.20110210.vcf, freebayes.20:12000000-13000000.baq.20110210.vcf, freebayes.20:13000000-14000000.baq.20110210.vcf, freebayes.20:14000000-15000000.baq.20110210.vcf, freebayes.20:15000000-16000000.baq.20110210.vcf, freebayes.20:16000000-17000000.baq.20110210.vcf, freebayes.20:17000000-18000000.baq.20110210.vcf, freebayes.20:18000000-19000000.baq.20110210.vcf, freebayes.20:19000000-20000000.baq.20110210.vcf, freebayes.20:20000000-21000000.baq.20110210.vcf, freebayes.20:21000000-22000000.baq.20110210.vcf, freebayes.20:22000000-23000000.baq.20110210.vcf, freebayes.20:23000000-24000000.baq.20110210.vcf, freebayes.20:24000000-25000000.baq.20110210.vcf, freebayes.20:25000000-26000000.baq.20110210.vcf, freebayes.20:26000000-27000000.baq.20110210.vcf, freebayes.20:27000000-28000000.baq.20110210.vcf, freebayes.20:28000000-29000000.baq.20110210.vcf, freebayes.20:29000000-30000000.baq.20110210.vcf, freebayes.20:30000000-31000000.baq.20110210.vcf, freebayes.20:31000000-32000000.baq.20110210.vcf, freebayes.20:32000000-33000000.baq.20110210.vcf, freebayes.20:33000000-34000000.baq.20110210.vcf, freebayes.20:34000000-35000000.baq.20110210.vcf, freebayes.20:35000000-36000000.baq.20110210.vcf, freebayes.20:36000000-37000000.baq.20110210.vcf, freebayes.20:37000000-38000000.baq.20110210.vcf, freebayes.20:38000000-39000000.baq.20110210.vcf, freebayes.20:39000000-40000000.baq.20110210.vcf, freebayes.20:40000000-41000000.baq.20110210.vcf, freebayes.20:41000000-42000000.baq.20110210.vcf, freebayes.20:42000000-43000000.baq.20110210.vcf, freebayes.20:43000000-44000000.baq.20110210.vcf, freebayes.20:44000000-45000000.baq.20110210.vcf, freebayes.20:45000000-46000000.baq.20110210.vcf, freebayes.20:46000000-47000000.baq.20110210.vcf, freebayes.20:47000000-48000000.baq.20110210.vcf, freebayes.20:48000000-49000000.baq.20110210.vcf, freebayes.20:49000000-50000000.baq.20110210.vcf, freebayes.20:50000000-51000000.baq.20110210.vcf, freebayes.20:51000000-52000000.baq.20110210.vcf, freebayes.20:52000000-53000000.baq.20110210.vcf, freebayes.20:53000000-54000000.baq.20110210.vcf, freebayes.20:54000000-55000000.baq.20110210.vcf, freebayes.20:55000000-56000000.baq.20110210.vcf, freebayes.20:56000000-57000000.baq.20110210.vcf, freebayes.20:57000000-58000000.baq.20110210.vcf, freebayes.20:58000000-59000000.baq.20110210.vcf, freebayes.20:59000000-60000000.baq.20110210.vcf, freebayes.20:60000000-61000000.baq.20110210.vcf, freebayes.20:61000000-62000000.baq.20110210.vcf, freebayes.20:62000000-63000000.baq.20110210.vcf, freebayes.20:63000000-63025520.baq.20110210.vcf\n+##vcfPytools=\n+##vcfPytools=filtered using the following filters: NS360\n+##INFO=<ID=REPEAT,Number=1,Type=String,Description="Description of the local repeat structures flanking the current position">\n+##INFO=<ID=DEL,Number=1,Type=Integer,Description="Length of deletion allele, if present">\n+##INFO=<ID=INS,Number=1,Typ'..b'023;SA=1|2;SB=0.33333;AB=0.75;ABR=6;ABA=2;RUN=2;SNP;TV\n+20\t124059\t.\tC\tT\t37.8\tPASS\tNS=358;DP=1984;AC=3;AN=716;AF=0.0041899;RA=1971;AA=7;SR=971|1000;SA=5|2;SB=0.71429;AB=0.44444;ABR=4;ABA=5;RUN=1;SNP;TS;CpG\n+20\t124060\t.\tG\tA\t34.6\tPASS\tNS=359;DP=1991;AC=1;AN=718;AF=0.0013928;RA=1983;AA=3;SR=995|988;SA=2|1;SB=0.66667;AB=0.5;ABR=3;ABA=3;RUN=2;SNP;TS;CpG\n+20\t124137\t.\tC\tA\t0.0701\tQ9\tNS=357;DP=1933;AC=1;AN=714;AF=0.0014006;RA=1922;AA=4;SR=990|932;SA=0|4;SB=0;AB=0.5;ABR=1;ABA=1;RUN=2;SNP;TV\n+20\t124148\t.\tA\tG\t0.344\tQ9\tNS=358;DP=1957;AC=2;AN=716;AF=0.0027933;RA=1947;AA=6;SR=989|958;SA=1|5;SB=0.16667;AB=0.8;ABR=12;ABA=3;RUN=1;SNP;TS\n+20\t124229\t.\tT\tC\t0.672\tQ9\tNS=356;DP=1979;AC=1;AN=712;AF=0.0014045;RA=1972;AA=4;SR=1025|947;SA=1|3;SB=0.25;AB=0.8;ABR=8;ABA=2;RUN=1;SNP;TS\n+20\t124230\t.\tA\tG\t0.0816\tQ9\tNS=356;DP=1970;AC=1;AN=712;AF=0.0014045;RA=1959;AA=4;SR=1026|933;SA=1|3;SB=0.25;AB=0.81818;ABR=9;ABA=2;RUN=1;SNP;TS\n+20\t124249\t.\tA\tG\t99.0\tPASS\tNS=359;DP=1969;AC=41;AN=718;AF=0.057103;RA=1864;AA=101;SR=974|890;SA=55|46;SB=0.54455;AB=0.51977;ABR=92;ABA=85;RUN=1;SNP;TS;CpG\n+20\t124302\t.\tA\tC\t99.0\tPASS\tNS=358;DP=2007;AC=319;AN=716;AF=0.44553;RA=1185;AA=819;SR=577|608;SA=392|427;SB=0.47863;AB=0.51934;ABR=376;ABA=348;RUN=2;SNP;TV\n+20\t124335\t.\tC\tT\t99.0\tNS360\tNS=361;DP=2035;AC=4;AN=722;AF=0.0055402;RA=2013;AA=15;SR=929|1084;SA=10|5;SB=0.66667;AB=0.3913;ABR=9;ABA=14;RUN=1;SNP;TS\n+20\t124412\t.\tC\tT\t99.0\tPASS\tNS=357;DP=2005;AC=307;AN=714;AF=0.42997;RA=1207;AA=795;SR=602|605;SA=400|395;SB=0.50314;AB=0.51286;ABR=379;ABA=360;RUN=1;SNP;TS\n+20\t124511\t.\tT\tA\t0.647\tQ9;NS360\tNS=365;DP=2141;AC=1;AN=730;AF=0.0013699;RA=2133;AA=4;SR=1077|1056;SA=2|2;SB=0.5;AB=0.66667;ABR=4;ABA=2;RUN=2;SNP;TV\n+20\t124607\t.\tA\tG\t2.28\tQ9\tNS=357;DP=1872;AC=1;AN=714;AF=0.0014006;RA=1865;AA=2;SR=930|935;SA=1|1;SB=0.5;AB=0.75;ABR=6;ABA=2;RUN=1;SNP;TS\n+20\t124737\t.\tA\tC\t99.0\tNS360\tNS=367;DP=2024;AC=4;AN=734;AF=0.0054496;RA=2003;AA=16;SR=1046|957;SA=11|5;SB=0.6875;AB=0.26316;ABR=5;ABA=14;RUN=1;SNP;TV\n+20\t124827\t.\tT\tC\t1.7\tQ9;NS360\tNS=367;DP=2059;AC=1;AN=734;AF=0.0013624;RA=2048;AA=4;SR=987|1061;SA=2|2;SB=0.5;AB=0.6;ABR=3;ABA=2;RUN=2;SNP;TS\n+20\t124848\t.\tA\tG\t36.5\tNS360\tNS=364;DP=2077;AC=1;AN=728;AF=0.0013736;RA=2060;AA=6;SR=966|1094;SA=2|4;SB=0.33333;AB=0.57143;ABR=4;ABA=3;RUN=1;SNP;TS\n+20\t124878\t.\tG\tA\t0.22\tQ9;NS360\tNS=361;DP=2060;AC=1;AN=722;AF=0.001385;RA=2049;AA=4;SR=974|1075;SA=3|1;SB=0.75;AB=0.77778;ABR=7;ABA=2;RUN=1;SNP;TS\n+20\t124920\t.\tA\tG\t76.0\tNS360\tNS=361;DP=1956;AC=1;AN=722;AF=0.001385;RA=1949;AA=6;SR=984|965;SA=2|4;SB=0.33333;AB=0.14286;ABR=1;ABA=6;RUN=1;SNP;TS;CpG\n+20\t124942\t.\tA\tT\t1.44\tQ9;NS360\tNS=362;DP=2015;AC=1;AN=724;AF=0.0013812;RA=2007;AA=4;SR=1107|900;SA=1|3;SB=0.25;AB=0.6;ABR=3;ABA=2;RUN=1;SNP;TV\n+20\t124998\t.\tT\tC\t99.0\tNS360\tNS=366;DP=2204;AC=29;AN=732;AF=0.039617;RA=2131;AA=66;SR=1132|999;SA=36|30;SB=0.54545;AB=0.54902;ABR=56;ABA=46;RUN=1;SNP;TS\n+20\t125096\t.\tC\tG\t4.18\tQ9;NS360\tNS=363;DP=2095;AC=1;AN=726;AF=0.0013774;RA=2082;AA=2;SR=1071|1011;SA=2|0;SB=1;AB=0.75;ABR=6;ABA=2;RUN=1;SNP;TV\n+20\t125109\t.\tA\tT\t0.0858\tQ9;NS360\tNS=365;DP=2091;AC=2;AN=730;AF=0.0027397;RA=2080;AA=7;SR=1103|977;SA=7|0;SB=1;AB=0.66667;ABR=6;ABA=3;RUN=8;SNP;TV\n+20\t125121\t.\tC\tT\t99.0\tNS360\tNS=364;DP=2075;AC=208;AN=728;AF=0.28571;RA=1472;AA=599;SR=826|646;SA=335|264;SB=0.55927;AB=0.49868;ABR=377;ABA=378;RUN=1;SNP;TS\n+20\t125179\t.\tC\tA\t99.0\tNS360\tNS=368;DP=2155;AC=7;AN=736;AF=0.0095109;RA=2127;AA=26;SR=1123|1004;SA=16|10;SB=0.61538;AB=0.36842;ABR=14;ABA=24;RUN=2;SNP;TV\n+20\t125266\t.\tT\tA\t3.47\tQ9;NS360\tNS=364;DP=2138;AC=1;AN=728;AF=0.0013736;RA=2123;AA=5;SR=907|1216;SA=1|4;SB=0.2;AB=0.6;ABR=3;ABA=2;RUN=1;SNP;TV\n+20\t125381\t.\tA\tG\t4.69\tQ9;NS360\tNS=362;DP=2102;AC=1;AN=724;AF=0.0013812;RA=2094;AA=3;SR=919|1175;SA=1|2;SB=0.33333;AB=0.66667;ABR=4;ABA=2;RUN=1;SNP;TS\n+20\t125450\t.\tG\tA\t0.364\tQ9\tNS=359;DP=1881;AC=1;AN=718;AF=0.0013928;RA=1868;AA=5;SR=862|1006;SA=1|4;SB=0.2;AB=0.71429;ABR=5;ABA=2;RUN=2;SNP;TS\n+20\t125470\t.\tA\tG\t0.602\tQ9\tNS=360;DP=1870;AC=1;AN=720;AF=0.0013889;RA=1855;AA=5;SR=909|946;SA=3|2;SB=0.6;AB=0.75;ABR=6;ABA=2;RUN=1;SNP;TS\n'
b
diff -r 000000000000 -r da1a6f33b504 tools.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools.py Mon Jan 27 09:29:09 2014 -0500
[
b'@@ -0,0 +1,188 @@\n+#!/usr/bin/python\n+\n+import os.path\n+import sys\n+import vcfPytools\n+from vcfPytools import __version__\n+\n+# Determine whether to output to a file or stdout.\n+def setOutput(output):\n+  if output == None:\n+    outputFile = sys.stdout\n+    writeOut = False\n+  else:\n+    output = os.path.abspath(output)\n+    outputFile = open(output, \'w\')\n+    writeOut = True\n+\n+  return outputFile, writeOut\n+\n+# Determine which file has priority for writing out records.\n+def setVcfPriority(priorityFile, vcfFiles):\n+  if priorityFile == None: priority = 0\n+  elif priorityFile == vcfFiles[0]: priority = 1\n+  elif priorityFile == vcfFiles[1]: priority = 2\n+  elif priorityFile.lower() == "merge": priority = 3\n+  else:\n+    print >> sys.stderr, "vcf file give priority must be one of the two input vcf files or merge."\n+    exit(1)\n+\n+  return priority\n+\n+# If the union or intersection of two vcf files is being performed\n+# and the output vcf file is to contain the information from both\n+# files, the headers need to be merged to ensure that all info and\n+# format entries have an explanation.\n+def mergeHeaders(v1, v2, v3):\n+\n+# If either file does not have a header, terminate the program.\n+# In order to merge the headers, the different fields must be\n+# checked to ensure the files are compatible.\n+  if not v1.hasHeader or not v2.hasHeader:\n+    print >> sys.stderr, "Both vcf files must have a header in order to merge data sets."\n+    exit(1)\n+\n+  v3.infoHeaderTags = v1.infoHeaderTags.copy()\n+  v3.formatHeaderTags = v1.formatHeaderTags.copy()\n+  v3.numberDataSets = v1.numberDataSets\n+  v3.includedDataSets = v1.includedDataSets.copy()\n+  v3.headerText = v1.headerText\n+  v3.headerTitles = v1.headerTitles\n+  v3.infoHeaderString = v1.infoHeaderString.copy()\n+  v3.formatHeaderString = v1.formatHeaderString.copy()\n+\n+# Merge the info field descriptions.\n+  for tag in v2.infoHeaderTags:\n+    if v1.infoHeaderTags.has_key(tag):\n+      if v1.infoHeaderTags[tag][0] != v2.infoHeaderTags[tag][0] or \\\n+         v1.infoHeaderTags[tag][1] != v2.infoHeaderTags[tag][1]:\n+        print v1.infoHeaderTags[tag][0]\n+        print v1.infoHeaderTags[tag][1]\n+        print v1.infoHeaderTags[tag][2]\n+        print >> sys.stderr, "Input vcf files have different definitions for " + tag + " field."\n+        exit(1)\n+    else: v3.infoHeaderTags[tag] = v2.infoHeaderTags[tag]\n+\n+# Merge the format field descriptions.\n+  for tag in v2.formatHeaderTags:\n+    if v1.formatHeaderTags.has_key(tag):\n+      if v1.formatHeaderTags[tag][0] != v2.formatHeaderTags[tag][0] or \\\n+         v1.formatHeaderTags[tag][1] != v2.formatHeaderTags[tag][1]:\n+        print >> sys.stderr, "Input vcf files have different definitions for " + tag + " field."\n+        exit(1)\n+    else: v3.formatHeaderTags[tag] = v2.formatHeaderTags[tag]\n+\n+# Now check to see if the vcf files contain information from multiple\n+# records themselves and create an ordered list in which the data\n+# will appear in the file.  For instance, of the first file has\n+# already got two sets of data and is being intersected with a file\n+# with one set of data, the order of data in the new vcf file will be\n+# the two sets from the first file followed by the second, e.g.\n+# AB=3/2/4, where the 3 and 2 are from the first file and the 4 is the\n+# value of AC from the second vcf.  The header will have a ##FILE for\n+# each of the three files, so the origin if the data can be recovered.\n+  if v1.numberDataSets == 0:\n+    v3.includedDataSets[v3.numberDataSets + 1] = v1.filename\n+    v3.numberDataSets += 1\n+  if v2.numberDataSets == 0:\n+    v3.includedDataSets[v3.numberDataSets + 1] = v2.filename\n+    v3.numberDataSets += 1\n+  else:\n+    for i in range(1, v2.numberDataSets + 1):\n+      v3.includedDataSets[v3.numberDataSets + 1] = v2.includedDataSets[i]\n+      v3.numberDataSets += 1\n+\n+# If either of the input files contain multiple data sets (e.g. multiple\n+# vcf files have undergone intersection or union calculations and all\n+# inform'..b'OR:"\n+    print >> sys.stderr, "input vcf file(s) contain data sets from multiple vcf files."\n+    print >> sys.stderr, "Further intersection or union operations must include --priority-file merge"\n+    print >> sys.stderr, "Other tools may be incompatible with this format."\n+    exit(1)\n+\n+# Write the header to file.\n+def writeHeader (outputFile, v, removeGenotypes, taskDescriptor):\n+  if not v.hasHeader: \n+    v.headerText = "##fileformat=VCFv4.0\\n##source=vcfPytools " + __version__ + "\\n"\n+    v.headerTitles = "#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\n"\n+  outputFile.write(v.headerText) if v.headerText != "" else None\n+  print >> outputFile, taskDescriptor\n+  for tag in v.infoHeaderString: print >> outputFile, v.infoHeaderString[tag]\n+  for tag in v.formatHeaderString: print >> outputFile, v.formatHeaderString[tag]\n+\n+# Write out a list of files indicating which data set belongs to which file.\n+  if v.numberDataSets != 0:\n+    for i in range(1, v.numberDataSets + 1):\n+      print >> outputFile, "##FILE=<ID=" + str(i) + ",\\"" + v.includedDataSets[i] + "\\">"\n+\n+  if removeGenotypes:\n+    line = v.headerTitles.rstrip("\\n").split("\\t")\n+    newHeaderTitles = line[0]\n+    for i in range(1,8):\n+      newHeaderTitles = newHeaderTitles + "\\t" + line[i]\n+    newHeaderTitles = newHeaderTitles + "\\n"\n+    outputFile.write( newHeaderTitles )\n+  else:\n+    outputFile.write( v.headerTitles )\n+\n+# Check that the two reference sequence lists are identical.\n+# If there are a different number or order, the results may\n+# not be as expected.\n+def checkReferenceSequenceLists(list1, list2):\n+  errorMessage = False\n+  if len(list1) != len(list2):\n+    print >> sys.stderr, "WARNING: Input files contain a different number of reference sequences."\n+    errorMessage = True\n+  elif list1 != list2:\n+    print >> sys.stderr, "WARNING: Input files contain different or differently ordered reference sequences."\n+    errorMessage = True\n+  if errorMessage:\n+    print >> sys.stderr, "Results may not be as expected."\n+    print >> sys.stderr, "Ensure that input files have the same reference sequences in the same order."\n+    print >> sys.stderr, "Reference sequence lists observed were:\\n\\t", list1, "\\n\\t", list2\n+\n+# Write out a vcf record to file.  The record written depends on the\n+# value of \'priority\' and could therefore be the record from either\n+# of the vcf files, or a combination of them.\n+\n+def writeVcfRecord(priority, v1, v2, outputFile):\n+  if priority == 0:\n+    if v1.quality >= v2.quality: outputFile.write(v1.record)\n+    else: outputFile.write(v2.record)\n+  elif priority == 1: outputFile.write(v1.record)\n+  elif priority == 2: outputFile.write(v2.record)\n+  elif priority == 3:\n+\n+# Define the missing entry values (depends on the number of data sets\n+# in the file).\n+    info = ""\n+    missingEntry1 = missingEntry2 = "."\n+    for i in range(1, v1.numberDataSets): missingEntry1 += "/."\n+    for i in range(1, v2.numberDataSets): missingEntry2 += "/."\n+    secondList = v2.infoTags.copy()\n+\n+# Build up the info field.\n+    for tag in v1.infoTags:\n+      if secondList.has_key(tag):\n+        if v1.infoHeaderTags[tag][1].lower() != "flag": info += tag + "=" + v1.infoTags[tag] + "/" + v2.infoTags[tag] + ";"\n+        del secondList[tag]\n+      else: \n+        if v1.infoHeaderTags[tag][1].lower() != "flag": info += tag + "=" + v1.infoTags[tag] + "/" + missingEntry2 + ";"\n+\n+# Now include the info tags that are not populated in the first vcf file.\n+    for tag in secondList:\n+      if v2.infoHeaderTags[tag][1].lower() != "flag": info += tag + "=" + missingEntry1 + "/" + v2.infoTags[tag] + ";"\n+\n+# Build the complete record.\n+    info = info.rstrip(";")\n+    record = v1.referenceSequence + "\\t" + str(v1.position) + "\\t" + v1.rsid + "\\t" + v1.ref + "\\t" + \\\n+             v1.alt + "/" + v2.alt + "\\t" + v1.quality + "/" + v2.quality + "\\t.\\t" + info\n+    print >> outputFile, record\n+  else:\n+    print >> sys.sterr, "Unknown file priority."\n+    exit(1)\n'
b
diff -r 000000000000 -r da1a6f33b504 vcfClass.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/vcfClass.py Mon Jan 27 09:29:09 2014 -0500
[
b'@@ -0,0 +1,440 @@\n+#!/usr/bin/python\n+\n+import os.path\n+import sys\n+import re\n+\n+class vcf:\n+  def __init__(self):\n+\n+# Header info.\n+    self.filename = ""\n+    self.hasHeader = True\n+    self.headerText = ""\n+    self.headerTitles = ""\n+    self.vcfFormat = ""\n+    #self.headerInfoText = ""\n+    #self.headerFormatText = ""\n+\n+# Store the info and format tags as well as the lines that describe\n+# them in a dictionary.\n+    self.numberDataSets = 0\n+    self.includedDataSets = {}\n+    self.infoHeaderTags = {}\n+    self.infoHeaderString = {}\n+    self.formatHeaderTags = {}\n+    self.formatHeaderString = {}\n+\n+# Genotype information.\n+    self.genotypes = False\n+    self.infoField = {}\n+\n+# Reference sequence information.\n+    self.referenceSequences = {}\n+    self.referenceSequenceList = []\n+    self.referenceSequence = ""\n+\n+# Record information.\n+    self.position = -1\n+    self.samplesList = []\n+\n+# Determine which fields to process.\n+    self.processInfo = False\n+    self.processGenotypes = False\n+    self.dbsnpVcf = False\n+    self.hapmapVcf = False\n+\n+# Open a vcf file.\n+  def openVcf(self, filename):\n+    if filename == "stdin":\n+      self.filehandle = sys.stdin\n+      self.filename = "stdin"\n+    else:\n+      try: self.filehandle = open(filename,"r")\n+      except IOError:\n+        print >> sys.stderr, "Failed to find file: ",filename\n+        exit(1)\n+      self.filename = os.path.abspath(filename)\n+\n+# Parse the vcf header.\n+  def parseHeader(self, filename, writeOut):\n+    while self.getHeaderLine(filename, writeOut):\n+      continue\n+\n+# Determine the type of information in the header line.\n+  def getHeaderLine(self, filename, writeOut):\n+    self.headerLine = self.filehandle.readline().rstrip("\\n")\n+    if self.headerLine.startswith("##fileformat"): success = self.getvcfFormat()\n+    if self.headerLine.startswith("##INFO"): success = self.headerInfo(writeOut, "info")\n+    elif self.headerLine.startswith("##FORMAT"): success = self.headerInfo(writeOut, "format")\n+    elif self.headerLine.startswith("##FILE"): success = self.headerFiles(writeOut)\n+    elif self.headerLine.startswith("##"): success = self.headerAdditional()\n+    elif self.headerLine.startswith("#"): success = self.headerTitleString(filename, writeOut)\n+    else: success = self.noHeader(filename, writeOut)\n+\n+    return success\n+\n+# Read VCF format\n+  def getvcfFormat(self):\n+      try:\n+          self.vcfFormat = self.headerLine.split("=",1)[1]\n+          self.vcfFormat = float( self.vcfFormat.split("VCFv",1)[1] )## Extract the version number rather than the whole string\n+      except IndexError:\n+          print >> sys.stderr, "\\nError parsing the fileformat"\n+          print >> sys.stderr, "The following fileformat header is wrongly formatted: ", self.headerLine\n+          exit(1)\n+      return True\n+\n+\n+# Read information on an info field from the header line.\n+  def headerInfo(self, writeOut, lineType):\n+    tag = self.headerLine.split("=",1)\n+    tagID = (tag[1].split("ID=",1))[1].split(",",1)\n+\n+# Check if this info field has already been defined.\n+    if (lineType == "info" and self.infoHeaderTags.has_key(tagID[0])) or (lineType == "format" and self.formatHeaderTags.has_key(tagID[0])):\n+      print >> sys.stderr, "Info tag \\"", tagID[0], "\\" is defined multiple times in the header."\n+      exit(1)\n+\n+# Determine the number of entries, entry type and description.\n+    tagNumber = (tagID[1].split("Number=",1))[1].split(",",1)\n+    tagType = (tagNumber[1].split("Type=",1))[1].split(",",1)\n+    try: tagDescription = ( ( (tagType[1].split("Description=\\"",1))[1] ).split("\\">") )[0]\n+    except IndexError: tagDescription = ""\n+    tagID = tagID[0]; tagNumber = tagNumber[0]; tagType = tagType[0]\n+\n+# Check that the number of fields associated with the tag is either\n+# an integer or a \'.\' to indicate variable number of entries.\n+    if tagNumber == ".": tagNumber = "variable"\n+    else:\n+      if self.vcfFormat<4.1:\n+\n+        try:\n+            tagNum'..b'lues != 0 and infoType == "Flag":\n+          print >> sys.stderr, "ERROR"\n+          exit(1)\n+        else:\n+          fields = self.infoTags[tag].split(",")\n+          if len(fields) != numberValues:\n+            text = "Unexpected number of entries"\n+            self.generalError(text, "information tag", tag)\n+\n+          for i in range(infoNumber):\n+            try: result.append(fields[i])\n+            except IndexError:\n+              text = "Insufficient values. Expected: " + self.infoHeaderTags[tag][0]\n+              self.generalError(text, "tag:", tag)\n+      else: numberValues = 0\n+\n+    else:\n+      text = "information field does not have a definition in the header"\n+      self.generalError(text, "tag", tag)\n+\n+    return numberValues, infoType, result\n+\n+# Get the genotype information.\n+  def getGenotypeInfo(self, sample, tag):\n+    result = []\n+    if self.formatHeaderTags.has_key(tag):\n+      infoNumber = self.formatHeaderTags[tag][0]\n+      infoType = self.formatHeaderTags[tag][1]\n+      numberValues = infoNumber\n+\n+      if self.genotypeFields[sample] == "." and len(self.genotypeFields[sample]) == 1:\n+        numberValues = 0\n+        result = "."\n+      else:\n+        if self.genotypeFields[sample].has_key(tag):\n+          if tag == "GT":\n+            if len(self.genotypeFields[sample][tag]) != 3 and len(self.genotypeFields[sample][tag]) != 1:\n+              text = "Unexected number of characters in genotype (GT) field"\n+              self.generalError(text, "sample", sample)\n+\n+# If a diploid call, check whether or not the genotype is phased.\n+            elif len(self.genotypeFields[sample][tag]) == 3:\n+              self.phased = True if self.genotypeFields[sample][tag][1] == "|" else False\n+              result.append( self.genotypeFields[sample][tag][0] )\n+              result.append( self.genotypeFields[sample][tag][2] )\n+            elif len(self.genotypeFields[sample][tag]) == 3:\n+              result.append( self.genotypeFields[sample][tag][0] )\n+          else:\n+            fields = self.genotypeFields[sample][tag].split(",")\n+            if len(fields) != numberValues:\n+              text = "Unexpected number of characters in " + tag + " field"\n+              self.generalError(text, "sample", sample)\n+\n+            for i in range(infoNumber): result.append(fields[i])\n+    else:\n+      text = "genotype field does not have a definition in the header"\n+      self.generalError(text, "tag", tag)\n+\n+    return numberValues, result\n+\n+# Parse the dbsnp entry.  If the entry conforms to the required variant type,\n+# return the dbsnp rsid value, otherwise ".".\n+  def getDbsnpInfo(self):\n+\n+# First check that the variant class (VC) is listed as SNP.\n+    vc = self.info.split("VC=",1)\n+    if vc[1].find(";") != -1: snp = vc[1].split(";",1)\n+    else:\n+      snp = []\n+      snp.append(vc[1])\n+\n+    if snp[0].lower() == "snp": rsid = self.rsid\n+    else: rsid = "."\n+\n+    return rsid\n+\n+# Build a new vcf record.\n+  def buildRecord(self, removeGenotypes):\n+    record = self.referenceSequence + "\\t" + \\\n+                str(self.position) + "\\t" + \\\n+                self.rsid + "\\t" + \\\n+                self.ref + "\\t" + \\\n+                self.alt + "\\t" + \\\n+                str(self.quality) + "\\t" + \\\n+                self.filters + "\\t" + \\\n+                self.info\n+\n+    if self.hasGenotypes and not removeGenotypes: record += self.genotypeString\n+\n+    record += "\\n"\n+\n+    return record\n+\n+# Close the vcf file.\n+  def closeVcf(self, filename):\n+    self.filehandle.close()\n+\n+# Define error messages for different handled errors.\n+  def generalError(self, text, field, fieldValue):\n+    print >> sys.stderr, "\\nError encountered when attempting to read:"\n+    print >> sys.stderr, "\\treference sequence :\\t", self.referenceSequence\n+    print >> sys.stderr, "\\tposition :\\t\\t", self.position\n+    if field != "": print >> sys.stderr, "\\t", field, ":\\t", fieldValue\n+    print >> sys.stderr,  "\\n", text\n+    exit(1)\n'
b
diff -r 000000000000 -r da1a6f33b504 vcfPytools.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/vcfPytools.py Mon Jan 27 09:29:09 2014 -0500
[
@@ -0,0 +1,82 @@
+#!/usr/bin/python
+
+import os.path
+import sys
+
+__author__ = "alistair ward"
+__version__ = "version 0.26"
+__date__ = "february 2011"
+
+def main():
+  usage = "Usage: vcfPytools.py [tool] [options]\n\n" + \
+          "Available tools:\n" + \
+          "  annotate:\n\tAnnotate the vcf file with membership in other vcf files.\n" + \
+          "  extract:\n\tExtract vcf records from a region.\n" + \
+          "  filter:\n\tFilter the vcf file.\n" + \
+          "  intersect:\n\tGenerate the intersection of two vcf files.\n" + \
+          "  merge:\n\tMerge a list of vcf files.\n" + \
+          "  multi:\n\tFind the intersections and unique fractions of multiple vcf files.\n" + \
+          "  sort:\n\tSort a vcf file.\n" + \
+          "  stats:\n\tGenerate statistics from a vcf file.\n" + \
+          "  union:\n\tGenerate the union of two vcf files.\n" + \
+          "  unique:\n\tGenerate the unique fraction from two vcf files.\n" + \
+          "  validate:\n\tValidate the input vcf file.\n\n" + \
+          "vcfPytools.py [tool] --help for information on a specific tool."
+
+# Determine the requested tool.
+
+  if len(sys.argv) > 1:
+    tool = sys.argv[1]
+  else:
+    print >> sys.stderr, usage
+    exit(1)
+
+  if tool == "annotate":
+    import annotate
+    success = annotate.main()
+  elif tool == "extract":
+    import extract
+    success = extract.main()
+  elif tool == "filter":
+    import filter
+    success = filter.main()
+  elif tool == "intersect":
+    import intersect
+    success = intersect.main()
+  elif tool == "multi":
+    import multi
+    success = multi.main()
+  elif tool == "merge":
+    import merge
+    success = merge.main()
+  elif tool == "sort":
+    import sort
+    success = sort.main()
+  elif tool == "stats":
+    import stats
+    success = stats.main()
+  elif tool == "union":
+    import union
+    success = union.main()
+  elif tool == "unique":
+    import unique
+    success = unique.main()
+  elif tool == "test":
+    import test
+    success = test.main()
+  elif tool == "validate":
+    import validate
+    success = validate.main()
+  elif tool == "--help" or tool == "-h" or tool == "?":
+    print >> sys.stderr, usage
+  else:
+    print >> sys.stderr, "Unknown tool: ",tool
+    print >> sys.stderr, "\n", usage
+    exit(1)
+
+# If program completed properly, terminate.
+
+  if success == 0: exit(0)
+
+if __name__ == "__main__":
+  main()