changeset 8:411adeff1eec draft

Handle "." sample columns, update tests to work with BIAS column.
author nick
date Tue, 23 Aug 2016 02:30:56 -0400
parents a72277535a2c
children 6cc488e11544
files allele-counts.py tests/artificial-nofilt.csv.out tests/artificial-samples.csv.out tests/artificial.csv.out tests/real-mit-s.csv.out tests/real-mit.csv.out tests/real-nofilt.csv.out tests/real.csv.out tests/run-tests.py
diffstat 9 files changed, 226 insertions(+), 145 deletions(-) [+]
line wrap: on
line diff
--- a/allele-counts.py	Wed Dec 09 11:37:02 2015 -0500
+++ b/allele-counts.py	Tue Aug 23 02:30:56 2016 -0400
@@ -238,7 +238,7 @@
 
   if len(fields) < 9:
     fail("Error in input VCF: wrong number of fields in data line. "
-          +"Failed on line:\n"+line)
+         "Failed on line:\n"+line)
 
   site['chr'] = fields[0]
   site['pos'] = fields[1]
@@ -246,35 +246,38 @@
 
   if len(samples) < len(sample_names):
     fail("Error in input VCF: missing sample fields in data line. "
-          +"Failed on line:\n"+line)
+         "Failed on line:\n"+line)
   elif len(samples) > len(sample_names):
     fail("Error in input VCF: more sample fields in data line than in header. "
-          +"Failed on line:\n"+line)
+         "Failed on line:\n"+line)
 
   sample_counts = {}
   for i in range(len(samples)):
-    
+
     variant_counts = {}
     counts = samples[i].split(':')[-1]
     counts = counts.split(',')
 
     for count in counts:
-      if not count:
+      if not count or count == '.':
         continue
       fields = count.split('=')
       if len(fields) != 2:
         fail("Error in input VCF: Incorrect variant data format (must contain "
-          +"a single '='). Failed on line:\n"+line)
+             "a single '='). Failed on data \"{}\" in line:\n{}"
+             .format(count, line))
       (variant, reads) = fields
       if variant[1:] not in canonical:
         continue
-      if variant[0] != '-' and variant[0] != '+':
-        fail("Error in input VCF: variant data not strand-specific. "
-          +"Failed on line:\n"+line)
+      if not variant.startswith('-') and not variant.startswith('+'):
+        fail("Error in input VCF: variant data not strand-specific. Failed on "
+             "data \"{}\" on line:\n{}".format(variant, line))
       try:
         variant_counts[variant] = int(float(reads))
       except ValueError:
-        fail("Error in input VCF: Variant count not a valid number. Failed on variant count string '"+reads+"'\nIn the following line:\n"+line)
+        fail("Error in input VCF: Variant count not a valid number. Failed on "
+             "variant count string \"{}\"\nIn the following line:\n{}"
+             .format(reads, line))
 
     sample_counts[sample_names[i]] = variant_counts
 
--- a/tests/artificial-nofilt.csv.out	Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/artificial-nofilt.csv.out	Tue Aug 23 02:30:56 2016 -0400
@@ -1,27 +1,27 @@
-#SAMPLE	CHR	POS	A	C	G	T	CVRG	ALLELES	MAJOR	MINOR	MINOR.FREQ.PERC.
-THYROID	chr1	0	30	0	0	0	30	1	A	.	0.0
-THYROID	chr1	10	30	0	2	0	32	2	A	G	0.0625
-THYROID	chr1	20	31	0	1	0	32	0	A	G	0.03125
-THYROID	chr1	30	21	0	4	0	25	2	A	G	0.16
-THYROID	chr1	40	22	0	3	0	25	0	A	G	0.12
-THYROID	chr1	50	3	0	0	0	3	1	A	.	0.0
-THYROID	chr1	60	2	0	2	0	4	2	A	G	0.5
-THYROID	chr1	70	1	0	3	0	4	0	G	A	0.25
-THYROID	chr1	80	104	0	3	0	107	0	A	G	0.02804
-THYROID	chr1	90	100	2	11	0	113	3	A	G	0.09735
-THYROID	chr1	100	100	1	11	0	112	0	A	G	0.09821
-THYROID	chr1	120	0	0	0	0	0	0	.	.	0.0
-THYROID	chr1	130	0	0	2	0	2	1	G	.	0.0
-THYROID	chr1	140	0	0	1	0	1	0	G	.	0.0
-THYROID	chr1	150	0	0	4	0	4	1	G	.	0.0
-THYROID	chr1	160	0	0	3	0	3	0	G	.	0.0
-THYROID	chr1	260	106	0	14	0	120	2	A	G	0.11667
-THYROID	chr1	300	2	0	2	76	80	3	T	G	0.025
-THYROID	chr1	310	12	0	12	76	100	3	T	G	0.12
-THYROID	chr1	320	12	0	12	56	80	3	T	A	0.15
-THYROID	chr1	330	7	0	7	66	80	3	T	G	0.0875
-THYROID	chr1	340	1	0	1	98	100	0	T	G	0.01
-THYROID	chr1	350	11	0	11	78	100	0	T	A	0.11
-THYROID	chr1	400	32	0	8	0	40	2	A	G	0.2
-THYROID	chr1	410	1	0	2	97	100	0	T	G	0.02
-THYROID	chr1	420	104	0	0	0	104	1	A	.	0.0
+#SAMPLE	CHR	POS	A	C	G	T	CVRG	ALLELES	MAJOR	MINOR	MAF	BIAS
+THYROID	chr1	0	30	0	0	0	30	1	A	.	0.0	.
+THYROID	chr1	10	30	0	2	0	32	2	A	G	0.0625	0.0
+THYROID	chr1	20	31	0	1	0	32	0	A	G	0.03125	2.0
+THYROID	chr1	30	21	0	4	0	25	2	A	G	0.16	0.08013
+THYROID	chr1	40	22	0	3	0	25	0	A	G	0.12	1.78571
+THYROID	chr1	50	3	0	0	0	3	1	A	.	0.0	.
+THYROID	chr1	60	2	0	2	0	4	2	A	G	0.5	0.0
+THYROID	chr1	70	1	0	3	0	4	0	G	A	0.25	2.0
+THYROID	chr1	80	104	0	3	0	107	0	A	G	0.02804	1.01905
+THYROID	chr1	90	100	2	11	0	113	3	A	G	0.09735	0.16381
+THYROID	chr1	100	100	1	11	0	112	0	A	G	0.09821	0.16381
+THYROID	chr1	120	0	0	0	0	0	0	.	.	0.0	.
+THYROID	chr1	130	0	0	2	0	2	1	G	.	0.0	.
+THYROID	chr1	140	0	0	1	0	1	0	G	.	0.0	.
+THYROID	chr1	150	0	0	4	0	4	1	G	.	0.0	.
+THYROID	chr1	160	0	0	3	0	3	0	G	.	0.0	.
+THYROID	chr1	260	106	0	14	0	120	2	A	G	0.11667	2.4
+THYROID	chr1	300	2	0	2	76	80	3	T	G	0.025	0.0
+THYROID	chr1	310	12	0	12	76	100	3	T	G	0.12	0.0
+THYROID	chr1	320	12	0	12	56	80	3	T	A	0.15	0.64394
+THYROID	chr1	330	7	0	7	66	80	3	T	G	0.0875	1.06247
+THYROID	chr1	340	1	0	1	98	100	0	T	G	0.01	5.21053
+THYROID	chr1	350	11	0	11	78	100	0	T	A	0.11	1.25352
+THYROID	chr1	400	32	0	8	0	40	2	A	G	0.2	0.0
+THYROID	chr1	410	1	0	2	97	100	0	T	G	0.02	5.5
+THYROID	chr1	420	104	0	0	0	104	1	A	.	0.0	.
--- a/tests/artificial-samples.csv.out	Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/artificial-samples.csv.out	Tue Aug 23 02:30:56 2016 -0400
@@ -1,13 +1,13 @@
-BRAIN	chr1	0	30	0	0	0	30	1	A	.	0.0
-ARTERY	chr1	0	0	0	30	0	30	1	G	.	0.0
-THYROID	chr1	0	0	30	0	0	30	1	C	.	0.0
-BRAIN	chr1	10	30	0	0	0	30	1	A	.	0.0
-ARTERY	chr1	10	30	0	2	0	32	1	A	G	0.0625
-THYROID	chr1	10	31	0	1	0	32	1	A	G	0.03125
-BRAIN	chr1	20	30	0	2	0	32	1	A	G	0.0625
-ARTERY	chr1	20	34	0	6	0	40	2	A	G	0.15
-THYROID	chr1	20	30	0	2	0	32	0	A	G	0.0625
-BRAIN	chr1	30	30	0	0	0	30	1	A	.	0.0
-BRAIN	chr1	40	0	0	0	30	30	1	T	.	0.0
-ARTERY	chr1	40	1	0	2	97	100	0	T	G	0.02
-THYROID	chr1	40	0	69	0	31	100	0	C	T	0.31
+BRAIN	chr1	0	30	0	0	0	30	1	A	.	0.0	.
+ARTERY	chr1	0	0	0	30	0	30	1	G	.	0.0	.
+THYROID	chr1	0	0	30	0	0	30	1	C	.	0.0	.
+BRAIN	chr1	10	30	0	0	0	30	1	A	.	0.0	.
+ARTERY	chr1	10	30	0	2	0	32	1	A	G	0.0625	0.0
+THYROID	chr1	10	31	0	1	0	32	1	A	G	0.03125	2.0
+BRAIN	chr1	20	30	0	2	0	32	1	A	G	0.0625	0.0
+ARTERY	chr1	20	34	0	6	0	40	2	A	G	0.15	0.0
+THYROID	chr1	20	30	0	2	0	32	0	A	G	0.0625	1.88235
+BRAIN	chr1	30	30	0	0	0	30	1	A	.	0.0	.
+BRAIN	chr1	40	0	0	0	30	30	1	T	.	0.0	.
+ARTERY	chr1	40	1	0	2	97	100	0	T	G	0.02	5.5
+THYROID	chr1	40	0	69	0	31	100	0	C	T	0.31	1.00096
--- a/tests/artificial.csv.out	Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/artificial.csv.out	Tue Aug 23 02:30:56 2016 -0400
@@ -1,35 +1,35 @@
-THYROID	chr1	0	30	0	0	0	30	1	A	.	0.0
-THYROID	chr1	10	30	0	2	0	32	1	A	G	0.0625
-THYROID	chr1	20	31	0	1	0	32	1	A	G	0.03125
-THYROID	chr1	30	21	0	4	0	25	2	A	G	0.16
-THYROID	chr1	40	22	0	3	0	25	0	A	G	0.12
-THYROID	chr1	50	30	0	0	0	30	1	A	.	0.0
-THYROID	chr1	60	31	0	0	0	31	1	A	.	0.0
-THYROID	chr1	70	21	0	0	0	21	1	A	.	0.0
-THYROID	chr1	80	22	0	0	0	22	1	A	.	0.0
-THYROID	chr1	82	30	0	2	0	32	1	A	G	0.0625
-THYROID	chr1	84	31	0	1	0	32	1	A	G	0.03125
-THYROID	chr1	86	21	0	4	0	25	2	A	G	0.16
-THYROID	chr1	88	22	0	3	0	25	0	A	G	0.12
-THYROID	chr1	90	30	0	0	0	30	1	A	.	0.0
-THYROID	chr1	100	31	0	0	0	31	1	A	.	0.0
-THYROID	chr1	110	21	0	0	0	21	1	A	.	0.0
-THYROID	chr1	120	22	0	0	0	22	1	A	.	0.0
-THYROID	chr1	210	20	0	0	0	20	1	A	.	0.0
-THYROID	chr1	220	22	0	0	0	22	1	A	.	0.0
-THYROID	chr1	230	182	0	18	0	200	1	A	G	0.09
-THYROID	chr1	240	180	0	20	0	200	2	A	G	0.1
-THYROID	chr1	250	178	0	22	0	200	2	A	G	0.11
-THYROID	chr1	260	106	0	14	0	120	0	A	G	0.11667
-THYROID	chr1	300	2	0	2	76	80	1	T	G	0.025
-THYROID	chr1	310	12	0	12	76	100	3	T	G	0.12
-THYROID	chr1	320	12	0	12	56	80	3	T	A	0.15
-THYROID	chr1	330	7	0	7	66	80	0	T	G	0.0875
-THYROID	chr1	340	1	0	1	98	100	1	T	G	0.01
-THYROID	chr1	350	11	0	11	78	100	0	T	A	0.11
-THYROID	chr1	400	32	0	8	0	40	2	A	G	0.2
-THYROID	chr1	410	1	0	2	97	100	0	T	G	0.02
-THYROID	chr1	420	104	0	0	0	104	1	A	.	0.0
-THYROID	chr1	430	30	0	0	0	30	1	A	.	0.0
-THYROID	chr1	440	30	0	0	0	30	1	A	.	0.0
-THYROID	27	1234567890	29	0	0	0	29	1	A	.	0.0
+THYROID	chr1	0	30	0	0	0	30	1	A	.	0.0	.
+THYROID	chr1	10	30	0	2	0	32	1	A	G	0.0625	0.0
+THYROID	chr1	20	31	0	1	0	32	1	A	G	0.03125	2.0
+THYROID	chr1	30	21	0	4	0	25	2	A	G	0.16	0.08013
+THYROID	chr1	40	22	0	3	0	25	0	A	G	0.12	1.78571
+THYROID	chr1	50	30	0	0	0	30	1	A	.	0.0	.
+THYROID	chr1	60	31	0	0	0	31	1	A	.	0.0	.
+THYROID	chr1	70	21	0	0	0	21	1	A	.	0.0	.
+THYROID	chr1	80	22	0	0	0	22	1	A	.	0.0	.
+THYROID	chr1	82	30	0	2	0	32	1	A	G	0.0625	0.0
+THYROID	chr1	84	31	0	1	0	32	1	A	G	0.03125	2.0
+THYROID	chr1	86	21	0	4	0	25	2	A	G	0.16	0.08013
+THYROID	chr1	88	22	0	3	0	25	0	A	G	0.12	1.78571
+THYROID	chr1	90	30	0	0	0	30	1	A	.	0.0	.
+THYROID	chr1	100	31	0	0	0	31	1	A	.	0.0	.
+THYROID	chr1	110	21	0	0	0	21	1	A	.	0.0	.
+THYROID	chr1	120	22	0	0	0	22	1	A	.	0.0	.
+THYROID	chr1	210	20	0	0	0	20	1	A	.	0.0	.
+THYROID	chr1	220	22	0	0	0	22	1	A	.	0.0	.
+THYROID	chr1	230	182	0	18	0	200	1	A	G	0.09	0.0
+THYROID	chr1	240	180	0	20	0	200	2	A	G	0.1	0.0
+THYROID	chr1	250	178	0	22	0	200	2	A	G	0.11	0.0
+THYROID	chr1	260	106	0	14	0	120	0	A	G	0.11667	2.4
+THYROID	chr1	300	2	0	2	76	80	1	T	G	0.025	0.0
+THYROID	chr1	310	12	0	12	76	100	3	T	G	0.12	0.0
+THYROID	chr1	320	12	0	12	56	80	3	T	A	0.15	0.64394
+THYROID	chr1	330	7	0	7	66	80	0	T	G	0.0875	1.06247
+THYROID	chr1	340	1	0	1	98	100	1	T	G	0.01	5.21053
+THYROID	chr1	350	11	0	11	78	100	0	T	A	0.11	1.25352
+THYROID	chr1	400	32	0	8	0	40	2	A	G	0.2	0.0
+THYROID	chr1	410	1	0	2	97	100	0	T	G	0.02	5.5
+THYROID	chr1	420	104	0	0	0	104	1	A	.	0.0	.
+THYROID	chr1	430	30	0	0	0	30	1	A	.	0.0	.
+THYROID	chr1	440	30	0	0	0	30	1	A	.	0.0	.
+THYROID	27	1234567890	29	0	0	0	29	1	A	.	0.0	.
--- a/tests/real-mit-s.csv.out	Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/real-mit-s.csv.out	Tue Aug 23 02:30:56 2016 -0400
@@ -1,12 +1,12 @@
-#SAMPLE	CHR	POS	+A	+C	+G	+T	-A	-C	-G	-T	CVRG	ALLELES	MAJOR	MINOR	MINOR.FREQ.PERC.
-S1	chrM	2000	1	9095	1	0	7	5808	0	1	14913	1	C	A	0.00054
-S3	chrM	2000	0	7933	0	4	10	5242	1	2	13192	1	C	A	0.00076
-S1	chrM	3000	17399	7	22	8	10567	35	22	4	28064	0	A	G	0.00157
-S2	chrM	3000	12535	3	24	2	7937	13	12	2	20528	1	A	G	0.00175
-S3	chrM	3000	18981	7	29	6	11286	33	17	4	30363	0	A	G	0.00152
-S4	chrM	3000	9254	1	15	2	6240	16	14	1	15543	0	A	G	0.00187
-S1	chrM	4000	6134	2	1	3	6124	1	1	1	12267	1	A	T	0.00033
-S1	chrM	7000	0	17	1	6216	4	9	2	7529	13778	0	T	C	0.00189
-S2	chrM	7000	0	7	2	5104	0	9	4	6288	11414	1	T	C	0.0014
-S3	chrM	7000	0	9	0	6446	4	4	10	7506	13979	1	T	C	0.00093
-S3	chrM	8000	3	1	5023	1	1	0	5043	2	10074	1	G	A	0.0004
+#SAMPLE	CHR	POS	+A	+C	+G	+T	-A	-C	-G	-T	CVRG	ALLELES	MAJOR	MINOR	MAF	BIAS
+S1	chrM	2000	1	9095	1	0	7	5808	0	1	14913	1	C	A	0.00054	2.03879
+S3	chrM	2000	0	7933	0	4	10	5242	1	2	13192	1	C	A	0.00076	2.51047
+S1	chrM	3000	17399	7	22	8	10567	35	22	4	28064	0	A	G	0.00157	0.51868
+S2	chrM	3000	12535	3	24	2	7937	13	12	2	20528	1	A	G	0.00175	0.22864
+S3	chrM	3000	18981	7	29	6	11286	33	17	4	30363	0	A	G	0.00152	0.01416
+S4	chrM	3000	9254	1	15	2	6240	16	14	1	15543	0	A	G	0.00187	0.33202
+S1	chrM	4000	6134	2	1	3	6124	1	1	1	12267	1	A	T	0.00033	0.99804
+S1	chrM	7000	0	17	1	6216	4	9	2	7529	13778	0	T	C	0.00189	0.81221
+S2	chrM	7000	0	7	2	5104	0	9	4	6288	11414	1	T	C	0.0014	0.04254
+S3	chrM	7000	0	9	0	6446	4	4	10	7506	13979	1	T	C	0.00093	0.92561
+S3	chrM	8000	3	1	5023	1	1	0	5043	2	10074	1	G	A	0.0004	1.00358
--- a/tests/real-mit.csv.out	Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/real-mit.csv.out	Tue Aug 23 02:30:56 2016 -0400
@@ -1,12 +1,12 @@
-#SAMPLE	CHR	POS	A	C	G	T	CVRG	ALLELES	MAJOR	MINOR	MINOR.FREQ.PERC.
-S1	chrM	2000	8	14903	1	1	14913	1	C	A	0.00054
-S3	chrM	2000	10	13175	1	6	13192	1	C	A	0.00076
-S1	chrM	3000	27966	42	44	12	28064	0	A	G	0.00157
-S2	chrM	3000	20472	16	36	4	20528	1	A	G	0.00175
-S3	chrM	3000	30267	40	46	10	30363	0	A	G	0.00152
-S4	chrM	3000	15494	17	29	3	15543	0	A	G	0.00187
-S1	chrM	4000	12258	3	2	4	12267	1	A	T	0.00033
-S1	chrM	7000	4	26	3	13745	13778	0	T	C	0.00189
-S2	chrM	7000	0	16	6	11392	11414	1	T	C	0.0014
-S3	chrM	7000	4	13	10	13952	13979	1	T	C	0.00093
-S3	chrM	8000	4	1	10066	3	10074	1	G	A	0.0004
+#SAMPLE	CHR	POS	A	C	G	T	CVRG	ALLELES	MAJOR	MINOR	MAF	BIAS
+S1	chrM	2000	8	14903	1	1	14913	1	C	A	0.00054	2.03879
+S3	chrM	2000	10	13175	1	6	13192	1	C	A	0.00076	2.51047
+S1	chrM	3000	27966	42	44	12	28064	0	A	G	0.00157	0.51868
+S2	chrM	3000	20472	16	36	4	20528	1	A	G	0.00175	0.22864
+S3	chrM	3000	30267	40	46	10	30363	0	A	G	0.00152	0.01416
+S4	chrM	3000	15494	17	29	3	15543	0	A	G	0.00187	0.33202
+S1	chrM	4000	12258	3	2	4	12267	1	A	T	0.00033	0.99804
+S1	chrM	7000	4	26	3	13745	13778	0	T	C	0.00189	0.81221
+S2	chrM	7000	0	16	6	11392	11414	1	T	C	0.0014	0.04254
+S3	chrM	7000	4	13	10	13952	13979	1	T	C	0.00093	0.92561
+S3	chrM	8000	4	1	10066	3	10074	1	G	A	0.0004	1.00358
--- a/tests/real-nofilt.csv.out	Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/real-nofilt.csv.out	Tue Aug 23 02:30:56 2016 -0400
@@ -1,15 +1,15 @@
-#SAMPLE	CHR	POS	A	C	G	T	CVRG	ALLELES	MAJOR	MINOR	MINOR.FREQ.PERC.
-THYROID	chr1	246704250	29	0	0	0	29	1	A	.	0.0
-THYROID	chr1	246704257	0	0	0	71	71	1	T	.	0.0
-THYROID	chr1	246704268	104	0	0	0	104	1	A	.	0.0
-THYROID	chr1	246704269	0	0	0	105	105	1	T	.	0.0
-THYROID	chr1	246704363	0	72	3	0	75	0	C	G	0.04
-THYROID	chr1	246704437	5	130	0	0	135	0	C	A	0.03704
-THYROID	chr1	246707878	0	0	131	0	131	1	G	.	0.0
-THYROID	chr1	246714587	30	0	43	0	73	2	G	A	0.41096
-THYROID	chr1	246729215	1	0	1	88	90	0	T	G	0.01111
-THYROID	chr1	246729216	1	0	1	90	92	0	T	G	0.01087
-THYROID	chr1	246729378	16	7	0	0	23	0	A	C	0.30435
-THYROID	chr1	246729392	29	0	10	0	39	0	A	G	0.25641
-THYROID	chr7	91502881	0	0	0	26	26	1	T	.	0.0
-THYROID	chr7	91502897	7	36	0	0	43	0	C	A	0.16279
+#SAMPLE	CHR	POS	A	C	G	T	CVRG	ALLELES	MAJOR	MINOR	MAF	BIAS
+THYROID	chr1	246704250	29	0	0	0	29	1	A	.	0.0	.
+THYROID	chr1	246704257	0	0	0	71	71	1	T	.	0.0	.
+THYROID	chr1	246704268	104	0	0	0	104	1	A	.	0.0	.
+THYROID	chr1	246704269	0	0	0	105	105	1	T	.	0.0	.
+THYROID	chr1	246704363	0	72	3	0	75	0	C	G	0.04	1.36364
+THYROID	chr1	246704437	5	130	0	0	135	0	C	A	0.03704	2.14286
+THYROID	chr1	246707878	0	0	131	0	131	1	G	.	0.0	.
+THYROID	chr1	246714587	30	0	43	0	73	2	G	A	0.41096	1.22996
+THYROID	chr1	246729215	1	0	1	88	90	0	T	G	0.01111	11.125
+THYROID	chr1	246729216	1	0	1	90	92	0	T	G	0.01087	9.1
+THYROID	chr1	246729378	16	7	0	0	23	0	A	C	0.30435	.
+THYROID	chr1	246729392	29	0	10	0	39	0	A	G	0.25641	.
+THYROID	chr7	91502881	0	0	0	26	26	1	T	.	0.0	.
+THYROID	chr7	91502897	7	36	0	0	43	0	C	A	0.16279	1.79167
--- a/tests/real.csv.out	Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/real.csv.out	Tue Aug 23 02:30:56 2016 -0400
@@ -1,11 +1,11 @@
-THYROID	chr1	246704250	29	0	0	0	29	1	A	.	0.0
-THYROID	chr1	246704257	0	0	0	71	71	1	T	.	0.0
-THYROID	chr1	246704268	104	0	0	0	104	1	A	.	0.0
-THYROID	chr1	246704269	0	0	0	105	105	1	T	.	0.0
-THYROID	chr1	246704363	0	72	3	0	75	0	C	G	0.04
-THYROID	chr1	246704437	5	130	0	0	135	0	C	A	0.03704
-THYROID	chr1	246707878	0	0	131	0	131	1	G	.	0.0
-THYROID	chr1	246714587	30	0	43	0	73	2	G	A	0.41096
-THYROID	chr1	246729216	1	0	1	90	92	0	T	G	0.01087
-THYROID	chr7	91502881	0	0	0	26	26	1	T	.	0.0
-THYROID	chr7	91502897	7	36	0	0	43	0	C	A	0.16279
+THYROID	chr1	246704250	29	0	0	0	29	1	A	.	0.0	.
+THYROID	chr1	246704257	0	0	0	71	71	1	T	.	0.0	.
+THYROID	chr1	246704268	104	0	0	0	104	1	A	.	0.0	.
+THYROID	chr1	246704269	0	0	0	105	105	1	T	.	0.0	.
+THYROID	chr1	246704363	0	72	3	0	75	0	C	G	0.04	1.36364
+THYROID	chr1	246704437	5	130	0	0	135	0	C	A	0.03704	2.14286
+THYROID	chr1	246707878	0	0	131	0	131	1	G	.	0.0	.
+THYROID	chr1	246714587	30	0	43	0	73	2	G	A	0.41096	1.22996
+THYROID	chr1	246729216	1	0	1	90	92	0	T	G	0.01087	9.1
+THYROID	chr7	91502881	0	0	0	26	26	1	T	.	0.0	.
+THYROID	chr7	91502897	7	36	0	0	43	0	C	A	0.16279	1.79167
--- a/tests/run-tests.py	Wed Dec 09 11:37:02 2015 -0500
+++ b/tests/run-tests.py	Tue Aug 23 02:30:56 2016 -0400
@@ -3,6 +3,7 @@
 import sys
 import subprocess
 
+SCRIPT_NAME = 'allele-counts.py'
 DATASETS = [
   'artificial',
   'artificial-samples',
@@ -16,15 +17,52 @@
 OUT_EXT = '.csv.out'
 ARGS_KEY = '##comment="ARGS='
 
+XML = {
+  'tests_start':'  <tests>',
+  'test_start': '    <test>',
+  'input':      '      <param name="input" value="tests/%s" />',
+  'param':      '      <param name="%s" value="%s" />',
+  'output':     '      <output name="output" file="tests/%s" />',
+  'test_end':   '    </test>',
+  'tests_end':  '  </tests>',
+}
+PARAMS = {
+  '-f':'freq',
+  '-c':'covg',
+  '-H':'header',
+  '-s':'stranded',
+  '-n':'nofilt',
+  '-r':'seed',
+}
+PARAM_ARG = {
+  '-f':True,
+  '-c':True,
+  '-H':False,
+  '-s':False,
+  '-n':False,
+  '-r':True,
+}
+
 def main():
 
-  test_dir = os.path.dirname(os.path.relpath(sys.argv[0]))
-  if test_dir:
-    test_dir += os.sep
+  do_print_xml = False
+  if len(sys.argv) > 1:
+    if sys.argv[1] == '-x':
+      do_print_xml = True
+    else:
+      sys.stderr.write("Error: unrecognized option '"+sys.argv[1]+"'\n")
+      sys.exit(1)
+
+  test_dir = os.path.dirname(os.path.realpath(__file__))
+  script_dir = os.path.relpath(os.path.dirname(test_dir))
+  test_dir = os.path.relpath(test_dir)
+
+  if do_print_xml:
+    print XML.get('tests_start')
 
   for dataset in DATASETS:
-    infile  = test_dir+dataset+IN_EXT
-    outfile = test_dir+dataset+OUT_EXT
+    infile  = os.path.join(test_dir, dataset+IN_EXT)
+    outfile = os.path.join(test_dir, dataset+OUT_EXT)
 
     if not os.path.exists(infile):
       sys.stderr.write("Error: file not found: "+infile+"\n")
@@ -34,11 +72,51 @@
       continue
 
     options = read_options(infile)
-    script_cmd = 'allele-counts.py '+options+' -i '+infile
-    bash_cmd = 'diff '+outfile+' <('+script_cmd+')'
-    # print infile+":"
-    print script_cmd
-    subprocess.call(['bash', '-c', bash_cmd])
+    if do_print_xml:
+      print_xml(infile, outfile, options, XML, PARAMS, PARAM_ARG)
+    else:
+      run_tests(infile, outfile, options, script_dir)
+
+  if do_print_xml:
+    print XML.get('tests_end')
+
+
+def run_tests(infile, outfile, options, script_dir):
+  script_cmd = os.path.join(script_dir, SCRIPT_NAME)+' '+options+' -i '+infile
+  bash_cmd = 'diff '+outfile+' <('+script_cmd+')'
+  print script_cmd
+  subprocess.call(['bash', '-c', bash_cmd])
+
+
+def print_xml(infile, outfile, options_str, xml, params, param_arg):
+  infile = os.path.basename(infile)
+  outfile = os.path.basename(outfile)
+
+  options = options_str.split()  # on whitespace
+
+  print xml.get('test_start')
+  print xml.get('input') % infile
+
+  # read in options one at a time, print <param> line
+  i = 0
+  while i < len(options):
+    opt = options[i]
+    if not params.has_key(opt) or not param_arg.has_key(opt):
+      sys.stderr.write("Error: unknown option '"+opt+"' in ARGS list in file "
+        +infile+"\n")
+      sys.exit(1)
+    # takes argument
+    if param_arg[opt]:
+      i+=1
+      arg = options[i]
+      print xml.get('param') % (params[opt], arg)
+    # no argument (boolean)
+    else:
+      print xml.get('param') % (params[opt], 'true')
+    i+=1
+
+  print xml.get('output') % outfile
+  print xml.get('test_end')
 
 
 def read_options(infile):