Repository 'allele_counts'
hg clone https://toolshed.g2.bx.psu.edu/repos/nick/allele_counts

Changeset 8:411adeff1eec (2016-08-23)
Previous changeset 7:a72277535a2c (2015-12-09) Next changeset 9:6cc488e11544 (2020-03-31)
Commit message:
Handle "." sample columns, update tests to work with BIAS column.
modified:
allele-counts.py
tests/artificial-nofilt.csv.out
tests/artificial-samples.csv.out
tests/artificial.csv.out
tests/real-mit-s.csv.out
tests/real-mit.csv.out
tests/real-nofilt.csv.out
tests/real.csv.out
tests/run-tests.py
b
diff -r a72277535a2c -r 411adeff1eec allele-counts.py
--- a/allele-counts.py Wed Dec 09 11:37:02 2015 -0500
+++ b/allele-counts.py Tue Aug 23 02:30:56 2016 -0400
[
@@ -238,7 +238,7 @@
 
   if len(fields) < 9:
     fail("Error in input VCF: wrong number of fields in data line. "
-          +"Failed on line:\n"+line)
+         "Failed on line:\n"+line)
 
   site['chr'] = fields[0]
   site['pos'] = fields[1]
@@ -246,35 +246,38 @@
 
   if len(samples) < len(sample_names):
     fail("Error in input VCF: missing sample fields in data line. "
-          +"Failed on line:\n"+line)
+         "Failed on line:\n"+line)
   elif len(samples) > len(sample_names):
     fail("Error in input VCF: more sample fields in data line than in header. "
-          +"Failed on line:\n"+line)
+         "Failed on line:\n"+line)
 
   sample_counts = {}
   for i in range(len(samples)):
-    
+
     variant_counts = {}
     counts = samples[i].split(':')[-1]
     counts = counts.split(',')
 
     for count in counts:
-      if not count:
+      if not count or count == '.':
         continue
       fields = count.split('=')
       if len(fields) != 2:
         fail("Error in input VCF: Incorrect variant data format (must contain "
-          +"a single '='). Failed on line:\n"+line)
+             "a single '='). Failed on data \"{}\" in line:\n{}"
+             .format(count, line))
       (variant, reads) = fields
       if variant[1:] not in canonical:
         continue
-      if variant[0] != '-' and variant[0] != '+':
-        fail("Error in input VCF: variant data not strand-specific. "
-          +"Failed on line:\n"+line)
+      if not variant.startswith('-') and not variant.startswith('+'):
+        fail("Error in input VCF: variant data not strand-specific. Failed on "
+             "data \"{}\" on line:\n{}".format(variant, line))
       try:
         variant_counts[variant] = int(float(reads))
       except ValueError:
-        fail("Error in input VCF: Variant count not a valid number. Failed on variant count string '"+reads+"'\nIn the following line:\n"+line)
+        fail("Error in input VCF: Variant count not a valid number. Failed on "
+             "variant count string \"{}\"\nIn the following line:\n{}"
+             .format(reads, line))
 
     sample_counts[sample_names[i]] = variant_counts
 
b
diff -r a72277535a2c -r 411adeff1eec tests/artificial-nofilt.csv.out
--- a/tests/artificial-nofilt.csv.out Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/artificial-nofilt.csv.out Tue Aug 23 02:30:56 2016 -0400
b
@@ -1,27 +1,27 @@
-#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC.
-THYROID chr1 0 30 0 0 0 30 1 A . 0.0
-THYROID chr1 10 30 0 2 0 32 2 A G 0.0625
-THYROID chr1 20 31 0 1 0 32 0 A G 0.03125
-THYROID chr1 30 21 0 4 0 25 2 A G 0.16
-THYROID chr1 40 22 0 3 0 25 0 A G 0.12
-THYROID chr1 50 3 0 0 0 3 1 A . 0.0
-THYROID chr1 60 2 0 2 0 4 2 A G 0.5
-THYROID chr1 70 1 0 3 0 4 0 G A 0.25
-THYROID chr1 80 104 0 3 0 107 0 A G 0.02804
-THYROID chr1 90 100 2 11 0 113 3 A G 0.09735
-THYROID chr1 100 100 1 11 0 112 0 A G 0.09821
-THYROID chr1 120 0 0 0 0 0 0 . . 0.0
-THYROID chr1 130 0 0 2 0 2 1 G . 0.0
-THYROID chr1 140 0 0 1 0 1 0 G . 0.0
-THYROID chr1 150 0 0 4 0 4 1 G . 0.0
-THYROID chr1 160 0 0 3 0 3 0 G . 0.0
-THYROID chr1 260 106 0 14 0 120 2 A G 0.11667
-THYROID chr1 300 2 0 2 76 80 3 T G 0.025
-THYROID chr1 310 12 0 12 76 100 3 T G 0.12
-THYROID chr1 320 12 0 12 56 80 3 T A 0.15
-THYROID chr1 330 7 0 7 66 80 3 T G 0.0875
-THYROID chr1 340 1 0 1 98 100 0 T G 0.01
-THYROID chr1 350 11 0 11 78 100 0 T A 0.11
-THYROID chr1 400 32 0 8 0 40 2 A G 0.2
-THYROID chr1 410 1 0 2 97 100 0 T G 0.02
-THYROID chr1 420 104 0 0 0 104 1 A . 0.0
+#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS
+THYROID chr1 0 30 0 0 0 30 1 A . 0.0 .
+THYROID chr1 10 30 0 2 0 32 2 A G 0.0625 0.0
+THYROID chr1 20 31 0 1 0 32 0 A G 0.03125 2.0
+THYROID chr1 30 21 0 4 0 25 2 A G 0.16 0.08013
+THYROID chr1 40 22 0 3 0 25 0 A G 0.12 1.78571
+THYROID chr1 50 3 0 0 0 3 1 A . 0.0 .
+THYROID chr1 60 2 0 2 0 4 2 A G 0.5 0.0
+THYROID chr1 70 1 0 3 0 4 0 G A 0.25 2.0
+THYROID chr1 80 104 0 3 0 107 0 A G 0.02804 1.01905
+THYROID chr1 90 100 2 11 0 113 3 A G 0.09735 0.16381
+THYROID chr1 100 100 1 11 0 112 0 A G 0.09821 0.16381
+THYROID chr1 120 0 0 0 0 0 0 . . 0.0 .
+THYROID chr1 130 0 0 2 0 2 1 G . 0.0 .
+THYROID chr1 140 0 0 1 0 1 0 G . 0.0 .
+THYROID chr1 150 0 0 4 0 4 1 G . 0.0 .
+THYROID chr1 160 0 0 3 0 3 0 G . 0.0 .
+THYROID chr1 260 106 0 14 0 120 2 A G 0.11667 2.4
+THYROID chr1 300 2 0 2 76 80 3 T G 0.025 0.0
+THYROID chr1 310 12 0 12 76 100 3 T G 0.12 0.0
+THYROID chr1 320 12 0 12 56 80 3 T A 0.15 0.64394
+THYROID chr1 330 7 0 7 66 80 3 T G 0.0875 1.06247
+THYROID chr1 340 1 0 1 98 100 0 T G 0.01 5.21053
+THYROID chr1 350 11 0 11 78 100 0 T A 0.11 1.25352
+THYROID chr1 400 32 0 8 0 40 2 A G 0.2 0.0
+THYROID chr1 410 1 0 2 97 100 0 T G 0.02 5.5
+THYROID chr1 420 104 0 0 0 104 1 A . 0.0 .
b
diff -r a72277535a2c -r 411adeff1eec tests/artificial-samples.csv.out
--- a/tests/artificial-samples.csv.out Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/artificial-samples.csv.out Tue Aug 23 02:30:56 2016 -0400
b
@@ -1,13 +1,13 @@
-BRAIN chr1 0 30 0 0 0 30 1 A . 0.0
-ARTERY chr1 0 0 0 30 0 30 1 G . 0.0
-THYROID chr1 0 0 30 0 0 30 1 C . 0.0
-BRAIN chr1 10 30 0 0 0 30 1 A . 0.0
-ARTERY chr1 10 30 0 2 0 32 1 A G 0.0625
-THYROID chr1 10 31 0 1 0 32 1 A G 0.03125
-BRAIN chr1 20 30 0 2 0 32 1 A G 0.0625
-ARTERY chr1 20 34 0 6 0 40 2 A G 0.15
-THYROID chr1 20 30 0 2 0 32 0 A G 0.0625
-BRAIN chr1 30 30 0 0 0 30 1 A . 0.0
-BRAIN chr1 40 0 0 0 30 30 1 T . 0.0
-ARTERY chr1 40 1 0 2 97 100 0 T G 0.02
-THYROID chr1 40 0 69 0 31 100 0 C T 0.31
+BRAIN chr1 0 30 0 0 0 30 1 A . 0.0 .
+ARTERY chr1 0 0 0 30 0 30 1 G . 0.0 .
+THYROID chr1 0 0 30 0 0 30 1 C . 0.0 .
+BRAIN chr1 10 30 0 0 0 30 1 A . 0.0 .
+ARTERY chr1 10 30 0 2 0 32 1 A G 0.0625 0.0
+THYROID chr1 10 31 0 1 0 32 1 A G 0.03125 2.0
+BRAIN chr1 20 30 0 2 0 32 1 A G 0.0625 0.0
+ARTERY chr1 20 34 0 6 0 40 2 A G 0.15 0.0
+THYROID chr1 20 30 0 2 0 32 0 A G 0.0625 1.88235
+BRAIN chr1 30 30 0 0 0 30 1 A . 0.0 .
+BRAIN chr1 40 0 0 0 30 30 1 T . 0.0 .
+ARTERY chr1 40 1 0 2 97 100 0 T G 0.02 5.5
+THYROID chr1 40 0 69 0 31 100 0 C T 0.31 1.00096
b
diff -r a72277535a2c -r 411adeff1eec tests/artificial.csv.out
--- a/tests/artificial.csv.out Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/artificial.csv.out Tue Aug 23 02:30:56 2016 -0400
b
@@ -1,35 +1,35 @@
-THYROID chr1 0 30 0 0 0 30 1 A . 0.0
-THYROID chr1 10 30 0 2 0 32 1 A G 0.0625
-THYROID chr1 20 31 0 1 0 32 1 A G 0.03125
-THYROID chr1 30 21 0 4 0 25 2 A G 0.16
-THYROID chr1 40 22 0 3 0 25 0 A G 0.12
-THYROID chr1 50 30 0 0 0 30 1 A . 0.0
-THYROID chr1 60 31 0 0 0 31 1 A . 0.0
-THYROID chr1 70 21 0 0 0 21 1 A . 0.0
-THYROID chr1 80 22 0 0 0 22 1 A . 0.0
-THYROID chr1 82 30 0 2 0 32 1 A G 0.0625
-THYROID chr1 84 31 0 1 0 32 1 A G 0.03125
-THYROID chr1 86 21 0 4 0 25 2 A G 0.16
-THYROID chr1 88 22 0 3 0 25 0 A G 0.12
-THYROID chr1 90 30 0 0 0 30 1 A . 0.0
-THYROID chr1 100 31 0 0 0 31 1 A . 0.0
-THYROID chr1 110 21 0 0 0 21 1 A . 0.0
-THYROID chr1 120 22 0 0 0 22 1 A . 0.0
-THYROID chr1 210 20 0 0 0 20 1 A . 0.0
-THYROID chr1 220 22 0 0 0 22 1 A . 0.0
-THYROID chr1 230 182 0 18 0 200 1 A G 0.09
-THYROID chr1 240 180 0 20 0 200 2 A G 0.1
-THYROID chr1 250 178 0 22 0 200 2 A G 0.11
-THYROID chr1 260 106 0 14 0 120 0 A G 0.11667
-THYROID chr1 300 2 0 2 76 80 1 T G 0.025
-THYROID chr1 310 12 0 12 76 100 3 T G 0.12
-THYROID chr1 320 12 0 12 56 80 3 T A 0.15
-THYROID chr1 330 7 0 7 66 80 0 T G 0.0875
-THYROID chr1 340 1 0 1 98 100 1 T G 0.01
-THYROID chr1 350 11 0 11 78 100 0 T A 0.11
-THYROID chr1 400 32 0 8 0 40 2 A G 0.2
-THYROID chr1 410 1 0 2 97 100 0 T G 0.02
-THYROID chr1 420 104 0 0 0 104 1 A . 0.0
-THYROID chr1 430 30 0 0 0 30 1 A . 0.0
-THYROID chr1 440 30 0 0 0 30 1 A . 0.0
-THYROID 27 1234567890 29 0 0 0 29 1 A . 0.0
+THYROID chr1 0 30 0 0 0 30 1 A . 0.0 .
+THYROID chr1 10 30 0 2 0 32 1 A G 0.0625 0.0
+THYROID chr1 20 31 0 1 0 32 1 A G 0.03125 2.0
+THYROID chr1 30 21 0 4 0 25 2 A G 0.16 0.08013
+THYROID chr1 40 22 0 3 0 25 0 A G 0.12 1.78571
+THYROID chr1 50 30 0 0 0 30 1 A . 0.0 .
+THYROID chr1 60 31 0 0 0 31 1 A . 0.0 .
+THYROID chr1 70 21 0 0 0 21 1 A . 0.0 .
+THYROID chr1 80 22 0 0 0 22 1 A . 0.0 .
+THYROID chr1 82 30 0 2 0 32 1 A G 0.0625 0.0
+THYROID chr1 84 31 0 1 0 32 1 A G 0.03125 2.0
+THYROID chr1 86 21 0 4 0 25 2 A G 0.16 0.08013
+THYROID chr1 88 22 0 3 0 25 0 A G 0.12 1.78571
+THYROID chr1 90 30 0 0 0 30 1 A . 0.0 .
+THYROID chr1 100 31 0 0 0 31 1 A . 0.0 .
+THYROID chr1 110 21 0 0 0 21 1 A . 0.0 .
+THYROID chr1 120 22 0 0 0 22 1 A . 0.0 .
+THYROID chr1 210 20 0 0 0 20 1 A . 0.0 .
+THYROID chr1 220 22 0 0 0 22 1 A . 0.0 .
+THYROID chr1 230 182 0 18 0 200 1 A G 0.09 0.0
+THYROID chr1 240 180 0 20 0 200 2 A G 0.1 0.0
+THYROID chr1 250 178 0 22 0 200 2 A G 0.11 0.0
+THYROID chr1 260 106 0 14 0 120 0 A G 0.11667 2.4
+THYROID chr1 300 2 0 2 76 80 1 T G 0.025 0.0
+THYROID chr1 310 12 0 12 76 100 3 T G 0.12 0.0
+THYROID chr1 320 12 0 12 56 80 3 T A 0.15 0.64394
+THYROID chr1 330 7 0 7 66 80 0 T G 0.0875 1.06247
+THYROID chr1 340 1 0 1 98 100 1 T G 0.01 5.21053
+THYROID chr1 350 11 0 11 78 100 0 T A 0.11 1.25352
+THYROID chr1 400 32 0 8 0 40 2 A G 0.2 0.0
+THYROID chr1 410 1 0 2 97 100 0 T G 0.02 5.5
+THYROID chr1 420 104 0 0 0 104 1 A . 0.0 .
+THYROID chr1 430 30 0 0 0 30 1 A . 0.0 .
+THYROID chr1 440 30 0 0 0 30 1 A . 0.0 .
+THYROID 27 1234567890 29 0 0 0 29 1 A . 0.0 .
b
diff -r a72277535a2c -r 411adeff1eec tests/real-mit-s.csv.out
--- a/tests/real-mit-s.csv.out Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/real-mit-s.csv.out Tue Aug 23 02:30:56 2016 -0400
b
@@ -1,12 +1,12 @@
-#SAMPLE CHR POS +A +C +G +T -A -C -G -T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC.
-S1 chrM 2000 1 9095 1 0 7 5808 0 1 14913 1 C A 0.00054
-S3 chrM 2000 0 7933 0 4 10 5242 1 2 13192 1 C A 0.00076
-S1 chrM 3000 17399 7 22 8 10567 35 22 4 28064 0 A G 0.00157
-S2 chrM 3000 12535 3 24 2 7937 13 12 2 20528 1 A G 0.00175
-S3 chrM 3000 18981 7 29 6 11286 33 17 4 30363 0 A G 0.00152
-S4 chrM 3000 9254 1 15 2 6240 16 14 1 15543 0 A G 0.00187
-S1 chrM 4000 6134 2 1 3 6124 1 1 1 12267 1 A T 0.00033
-S1 chrM 7000 0 17 1 6216 4 9 2 7529 13778 0 T C 0.00189
-S2 chrM 7000 0 7 2 5104 0 9 4 6288 11414 1 T C 0.0014
-S3 chrM 7000 0 9 0 6446 4 4 10 7506 13979 1 T C 0.00093
-S3 chrM 8000 3 1 5023 1 1 0 5043 2 10074 1 G A 0.0004
+#SAMPLE CHR POS +A +C +G +T -A -C -G -T CVRG ALLELES MAJOR MINOR MAF BIAS
+S1 chrM 2000 1 9095 1 0 7 5808 0 1 14913 1 C A 0.00054 2.03879
+S3 chrM 2000 0 7933 0 4 10 5242 1 2 13192 1 C A 0.00076 2.51047
+S1 chrM 3000 17399 7 22 8 10567 35 22 4 28064 0 A G 0.00157 0.51868
+S2 chrM 3000 12535 3 24 2 7937 13 12 2 20528 1 A G 0.00175 0.22864
+S3 chrM 3000 18981 7 29 6 11286 33 17 4 30363 0 A G 0.00152 0.01416
+S4 chrM 3000 9254 1 15 2 6240 16 14 1 15543 0 A G 0.00187 0.33202
+S1 chrM 4000 6134 2 1 3 6124 1 1 1 12267 1 A T 0.00033 0.99804
+S1 chrM 7000 0 17 1 6216 4 9 2 7529 13778 0 T C 0.00189 0.81221
+S2 chrM 7000 0 7 2 5104 0 9 4 6288 11414 1 T C 0.0014 0.04254
+S3 chrM 7000 0 9 0 6446 4 4 10 7506 13979 1 T C 0.00093 0.92561
+S3 chrM 8000 3 1 5023 1 1 0 5043 2 10074 1 G A 0.0004 1.00358
b
diff -r a72277535a2c -r 411adeff1eec tests/real-mit.csv.out
--- a/tests/real-mit.csv.out Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/real-mit.csv.out Tue Aug 23 02:30:56 2016 -0400
b
@@ -1,12 +1,12 @@
-#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC.
-S1 chrM 2000 8 14903 1 1 14913 1 C A 0.00054
-S3 chrM 2000 10 13175 1 6 13192 1 C A 0.00076
-S1 chrM 3000 27966 42 44 12 28064 0 A G 0.00157
-S2 chrM 3000 20472 16 36 4 20528 1 A G 0.00175
-S3 chrM 3000 30267 40 46 10 30363 0 A G 0.00152
-S4 chrM 3000 15494 17 29 3 15543 0 A G 0.00187
-S1 chrM 4000 12258 3 2 4 12267 1 A T 0.00033
-S1 chrM 7000 4 26 3 13745 13778 0 T C 0.00189
-S2 chrM 7000 0 16 6 11392 11414 1 T C 0.0014
-S3 chrM 7000 4 13 10 13952 13979 1 T C 0.00093
-S3 chrM 8000 4 1 10066 3 10074 1 G A 0.0004
+#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS
+S1 chrM 2000 8 14903 1 1 14913 1 C A 0.00054 2.03879
+S3 chrM 2000 10 13175 1 6 13192 1 C A 0.00076 2.51047
+S1 chrM 3000 27966 42 44 12 28064 0 A G 0.00157 0.51868
+S2 chrM 3000 20472 16 36 4 20528 1 A G 0.00175 0.22864
+S3 chrM 3000 30267 40 46 10 30363 0 A G 0.00152 0.01416
+S4 chrM 3000 15494 17 29 3 15543 0 A G 0.00187 0.33202
+S1 chrM 4000 12258 3 2 4 12267 1 A T 0.00033 0.99804
+S1 chrM 7000 4 26 3 13745 13778 0 T C 0.00189 0.81221
+S2 chrM 7000 0 16 6 11392 11414 1 T C 0.0014 0.04254
+S3 chrM 7000 4 13 10 13952 13979 1 T C 0.00093 0.92561
+S3 chrM 8000 4 1 10066 3 10074 1 G A 0.0004 1.00358
b
diff -r a72277535a2c -r 411adeff1eec tests/real-nofilt.csv.out
--- a/tests/real-nofilt.csv.out Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/real-nofilt.csv.out Tue Aug 23 02:30:56 2016 -0400
b
@@ -1,15 +1,15 @@
-#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC.
-THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0
-THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0
-THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0
-THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0
-THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04
-THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704
-THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0
-THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096
-THYROID chr1 246729215 1 0 1 88 90 0 T G 0.01111
-THYROID chr1 246729216 1 0 1 90 92 0 T G 0.01087
-THYROID chr1 246729378 16 7 0 0 23 0 A C 0.30435
-THYROID chr1 246729392 29 0 10 0 39 0 A G 0.25641
-THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0
-THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279
+#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS
+THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0 .
+THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0 .
+THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0 .
+THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0 .
+THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04 1.36364
+THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704 2.14286
+THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0 .
+THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096 1.22996
+THYROID chr1 246729215 1 0 1 88 90 0 T G 0.01111 11.125
+THYROID chr1 246729216 1 0 1 90 92 0 T G 0.01087 9.1
+THYROID chr1 246729378 16 7 0 0 23 0 A C 0.30435 .
+THYROID chr1 246729392 29 0 10 0 39 0 A G 0.25641 .
+THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0 .
+THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279 1.79167
b
diff -r a72277535a2c -r 411adeff1eec tests/real.csv.out
--- a/tests/real.csv.out Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/real.csv.out Tue Aug 23 02:30:56 2016 -0400
b
@@ -1,11 +1,11 @@
-THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0
-THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0
-THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0
-THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0
-THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04
-THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704
-THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0
-THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096
-THYROID chr1 246729216 1 0 1 90 92 0 T G 0.01087
-THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0
-THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279
+THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0 .
+THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0 .
+THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0 .
+THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0 .
+THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04 1.36364
+THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704 2.14286
+THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0 .
+THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096 1.22996
+THYROID chr1 246729216 1 0 1 90 92 0 T G 0.01087 9.1
+THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0 .
+THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279 1.79167
b
diff -r a72277535a2c -r 411adeff1eec tests/run-tests.py
--- a/tests/run-tests.py Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/run-tests.py Tue Aug 23 02:30:56 2016 -0400
[
@@ -3,6 +3,7 @@
 import sys
 import subprocess
 
+SCRIPT_NAME = 'allele-counts.py'
 DATASETS = [
   'artificial',
   'artificial-samples',
@@ -16,15 +17,52 @@
 OUT_EXT = '.csv.out'
 ARGS_KEY = '##comment="ARGS='
 
+XML = {
+  'tests_start':'  <tests>',
+  'test_start': '    <test>',
+  'input':      '      <param name="input" value="tests/%s" />',
+  'param':      '      <param name="%s" value="%s" />',
+  'output':     '      <output name="output" file="tests/%s" />',
+  'test_end':   '    </test>',
+  'tests_end':  '  </tests>',
+}
+PARAMS = {
+  '-f':'freq',
+  '-c':'covg',
+  '-H':'header',
+  '-s':'stranded',
+  '-n':'nofilt',
+  '-r':'seed',
+}
+PARAM_ARG = {
+  '-f':True,
+  '-c':True,
+  '-H':False,
+  '-s':False,
+  '-n':False,
+  '-r':True,
+}
+
 def main():
 
-  test_dir = os.path.dirname(os.path.relpath(sys.argv[0]))
-  if test_dir:
-    test_dir += os.sep
+  do_print_xml = False
+  if len(sys.argv) > 1:
+    if sys.argv[1] == '-x':
+      do_print_xml = True
+    else:
+      sys.stderr.write("Error: unrecognized option '"+sys.argv[1]+"'\n")
+      sys.exit(1)
+
+  test_dir = os.path.dirname(os.path.realpath(__file__))
+  script_dir = os.path.relpath(os.path.dirname(test_dir))
+  test_dir = os.path.relpath(test_dir)
+
+  if do_print_xml:
+    print XML.get('tests_start')
 
   for dataset in DATASETS:
-    infile  = test_dir+dataset+IN_EXT
-    outfile = test_dir+dataset+OUT_EXT
+    infile  = os.path.join(test_dir, dataset+IN_EXT)
+    outfile = os.path.join(test_dir, dataset+OUT_EXT)
 
     if not os.path.exists(infile):
       sys.stderr.write("Error: file not found: "+infile+"\n")
@@ -34,11 +72,51 @@
       continue
 
     options = read_options(infile)
-    script_cmd = 'allele-counts.py '+options+' -i '+infile
-    bash_cmd = 'diff '+outfile+' <('+script_cmd+')'
-    # print infile+":"
-    print script_cmd
-    subprocess.call(['bash', '-c', bash_cmd])
+    if do_print_xml:
+      print_xml(infile, outfile, options, XML, PARAMS, PARAM_ARG)
+    else:
+      run_tests(infile, outfile, options, script_dir)
+
+  if do_print_xml:
+    print XML.get('tests_end')
+
+
+def run_tests(infile, outfile, options, script_dir):
+  script_cmd = os.path.join(script_dir, SCRIPT_NAME)+' '+options+' -i '+infile
+  bash_cmd = 'diff '+outfile+' <('+script_cmd+')'
+  print script_cmd
+  subprocess.call(['bash', '-c', bash_cmd])
+
+
+def print_xml(infile, outfile, options_str, xml, params, param_arg):
+  infile = os.path.basename(infile)
+  outfile = os.path.basename(outfile)
+
+  options = options_str.split()  # on whitespace
+
+  print xml.get('test_start')
+  print xml.get('input') % infile
+
+  # read in options one at a time, print <param> line
+  i = 0
+  while i < len(options):
+    opt = options[i]
+    if not params.has_key(opt) or not param_arg.has_key(opt):
+      sys.stderr.write("Error: unknown option '"+opt+"' in ARGS list in file "
+        +infile+"\n")
+      sys.exit(1)
+    # takes argument
+    if param_arg[opt]:
+      i+=1
+      arg = options[i]
+      print xml.get('param') % (params[opt], arg)
+    # no argument (boolean)
+    else:
+      print xml.get('param') % (params[opt], 'true')
+    i+=1
+
+  print xml.get('output') % outfile
+  print xml.get('test_end')
 
 
 def read_options(infile):