Mercurial > repos > nick > allele_counts
changeset 8:411adeff1eec draft
Handle "." sample columns, update tests to work with BIAS column.
author | nick |
---|---|
date | Tue, 23 Aug 2016 02:30:56 -0400 |
parents | a72277535a2c |
children | 6cc488e11544 |
files | allele-counts.py tests/artificial-nofilt.csv.out tests/artificial-samples.csv.out tests/artificial.csv.out tests/real-mit-s.csv.out tests/real-mit.csv.out tests/real-nofilt.csv.out tests/real.csv.out tests/run-tests.py |
diffstat | 9 files changed, 226 insertions(+), 145 deletions(-) [+] |
line wrap: on
line diff
--- a/allele-counts.py Wed Dec 09 11:37:02 2015 -0500 +++ b/allele-counts.py Tue Aug 23 02:30:56 2016 -0400 @@ -238,7 +238,7 @@ if len(fields) < 9: fail("Error in input VCF: wrong number of fields in data line. " - +"Failed on line:\n"+line) + "Failed on line:\n"+line) site['chr'] = fields[0] site['pos'] = fields[1] @@ -246,35 +246,38 @@ if len(samples) < len(sample_names): fail("Error in input VCF: missing sample fields in data line. " - +"Failed on line:\n"+line) + "Failed on line:\n"+line) elif len(samples) > len(sample_names): fail("Error in input VCF: more sample fields in data line than in header. " - +"Failed on line:\n"+line) + "Failed on line:\n"+line) sample_counts = {} for i in range(len(samples)): - + variant_counts = {} counts = samples[i].split(':')[-1] counts = counts.split(',') for count in counts: - if not count: + if not count or count == '.': continue fields = count.split('=') if len(fields) != 2: fail("Error in input VCF: Incorrect variant data format (must contain " - +"a single '='). Failed on line:\n"+line) + "a single '='). Failed on data \"{}\" in line:\n{}" + .format(count, line)) (variant, reads) = fields if variant[1:] not in canonical: continue - if variant[0] != '-' and variant[0] != '+': - fail("Error in input VCF: variant data not strand-specific. " - +"Failed on line:\n"+line) + if not variant.startswith('-') and not variant.startswith('+'): + fail("Error in input VCF: variant data not strand-specific. Failed on " + "data \"{}\" on line:\n{}".format(variant, line)) try: variant_counts[variant] = int(float(reads)) except ValueError: - fail("Error in input VCF: Variant count not a valid number. Failed on variant count string '"+reads+"'\nIn the following line:\n"+line) + fail("Error in input VCF: Variant count not a valid number. Failed on " + "variant count string \"{}\"\nIn the following line:\n{}" + .format(reads, line)) sample_counts[sample_names[i]] = variant_counts
--- a/tests/artificial-nofilt.csv.out Wed Dec 09 11:37:02 2015 -0500 +++ b/tests/artificial-nofilt.csv.out Tue Aug 23 02:30:56 2016 -0400 @@ -1,27 +1,27 @@ -#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC. -THYROID chr1 0 30 0 0 0 30 1 A . 0.0 -THYROID chr1 10 30 0 2 0 32 2 A G 0.0625 -THYROID chr1 20 31 0 1 0 32 0 A G 0.03125 -THYROID chr1 30 21 0 4 0 25 2 A G 0.16 -THYROID chr1 40 22 0 3 0 25 0 A G 0.12 -THYROID chr1 50 3 0 0 0 3 1 A . 0.0 -THYROID chr1 60 2 0 2 0 4 2 A G 0.5 -THYROID chr1 70 1 0 3 0 4 0 G A 0.25 -THYROID chr1 80 104 0 3 0 107 0 A G 0.02804 -THYROID chr1 90 100 2 11 0 113 3 A G 0.09735 -THYROID chr1 100 100 1 11 0 112 0 A G 0.09821 -THYROID chr1 120 0 0 0 0 0 0 . . 0.0 -THYROID chr1 130 0 0 2 0 2 1 G . 0.0 -THYROID chr1 140 0 0 1 0 1 0 G . 0.0 -THYROID chr1 150 0 0 4 0 4 1 G . 0.0 -THYROID chr1 160 0 0 3 0 3 0 G . 0.0 -THYROID chr1 260 106 0 14 0 120 2 A G 0.11667 -THYROID chr1 300 2 0 2 76 80 3 T G 0.025 -THYROID chr1 310 12 0 12 76 100 3 T G 0.12 -THYROID chr1 320 12 0 12 56 80 3 T A 0.15 -THYROID chr1 330 7 0 7 66 80 3 T G 0.0875 -THYROID chr1 340 1 0 1 98 100 0 T G 0.01 -THYROID chr1 350 11 0 11 78 100 0 T A 0.11 -THYROID chr1 400 32 0 8 0 40 2 A G 0.2 -THYROID chr1 410 1 0 2 97 100 0 T G 0.02 -THYROID chr1 420 104 0 0 0 104 1 A . 0.0 +#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS +THYROID chr1 0 30 0 0 0 30 1 A . 0.0 . +THYROID chr1 10 30 0 2 0 32 2 A G 0.0625 0.0 +THYROID chr1 20 31 0 1 0 32 0 A G 0.03125 2.0 +THYROID chr1 30 21 0 4 0 25 2 A G 0.16 0.08013 +THYROID chr1 40 22 0 3 0 25 0 A G 0.12 1.78571 +THYROID chr1 50 3 0 0 0 3 1 A . 0.0 . +THYROID chr1 60 2 0 2 0 4 2 A G 0.5 0.0 +THYROID chr1 70 1 0 3 0 4 0 G A 0.25 2.0 +THYROID chr1 80 104 0 3 0 107 0 A G 0.02804 1.01905 +THYROID chr1 90 100 2 11 0 113 3 A G 0.09735 0.16381 +THYROID chr1 100 100 1 11 0 112 0 A G 0.09821 0.16381 +THYROID chr1 120 0 0 0 0 0 0 . . 0.0 . +THYROID chr1 130 0 0 2 0 2 1 G . 0.0 . +THYROID chr1 140 0 0 1 0 1 0 G . 0.0 . +THYROID chr1 150 0 0 4 0 4 1 G . 0.0 . +THYROID chr1 160 0 0 3 0 3 0 G . 0.0 . +THYROID chr1 260 106 0 14 0 120 2 A G 0.11667 2.4 +THYROID chr1 300 2 0 2 76 80 3 T G 0.025 0.0 +THYROID chr1 310 12 0 12 76 100 3 T G 0.12 0.0 +THYROID chr1 320 12 0 12 56 80 3 T A 0.15 0.64394 +THYROID chr1 330 7 0 7 66 80 3 T G 0.0875 1.06247 +THYROID chr1 340 1 0 1 98 100 0 T G 0.01 5.21053 +THYROID chr1 350 11 0 11 78 100 0 T A 0.11 1.25352 +THYROID chr1 400 32 0 8 0 40 2 A G 0.2 0.0 +THYROID chr1 410 1 0 2 97 100 0 T G 0.02 5.5 +THYROID chr1 420 104 0 0 0 104 1 A . 0.0 .
--- a/tests/artificial-samples.csv.out Wed Dec 09 11:37:02 2015 -0500 +++ b/tests/artificial-samples.csv.out Tue Aug 23 02:30:56 2016 -0400 @@ -1,13 +1,13 @@ -BRAIN chr1 0 30 0 0 0 30 1 A . 0.0 -ARTERY chr1 0 0 0 30 0 30 1 G . 0.0 -THYROID chr1 0 0 30 0 0 30 1 C . 0.0 -BRAIN chr1 10 30 0 0 0 30 1 A . 0.0 -ARTERY chr1 10 30 0 2 0 32 1 A G 0.0625 -THYROID chr1 10 31 0 1 0 32 1 A G 0.03125 -BRAIN chr1 20 30 0 2 0 32 1 A G 0.0625 -ARTERY chr1 20 34 0 6 0 40 2 A G 0.15 -THYROID chr1 20 30 0 2 0 32 0 A G 0.0625 -BRAIN chr1 30 30 0 0 0 30 1 A . 0.0 -BRAIN chr1 40 0 0 0 30 30 1 T . 0.0 -ARTERY chr1 40 1 0 2 97 100 0 T G 0.02 -THYROID chr1 40 0 69 0 31 100 0 C T 0.31 +BRAIN chr1 0 30 0 0 0 30 1 A . 0.0 . +ARTERY chr1 0 0 0 30 0 30 1 G . 0.0 . +THYROID chr1 0 0 30 0 0 30 1 C . 0.0 . +BRAIN chr1 10 30 0 0 0 30 1 A . 0.0 . +ARTERY chr1 10 30 0 2 0 32 1 A G 0.0625 0.0 +THYROID chr1 10 31 0 1 0 32 1 A G 0.03125 2.0 +BRAIN chr1 20 30 0 2 0 32 1 A G 0.0625 0.0 +ARTERY chr1 20 34 0 6 0 40 2 A G 0.15 0.0 +THYROID chr1 20 30 0 2 0 32 0 A G 0.0625 1.88235 +BRAIN chr1 30 30 0 0 0 30 1 A . 0.0 . +BRAIN chr1 40 0 0 0 30 30 1 T . 0.0 . +ARTERY chr1 40 1 0 2 97 100 0 T G 0.02 5.5 +THYROID chr1 40 0 69 0 31 100 0 C T 0.31 1.00096
--- a/tests/artificial.csv.out Wed Dec 09 11:37:02 2015 -0500 +++ b/tests/artificial.csv.out Tue Aug 23 02:30:56 2016 -0400 @@ -1,35 +1,35 @@ -THYROID chr1 0 30 0 0 0 30 1 A . 0.0 -THYROID chr1 10 30 0 2 0 32 1 A G 0.0625 -THYROID chr1 20 31 0 1 0 32 1 A G 0.03125 -THYROID chr1 30 21 0 4 0 25 2 A G 0.16 -THYROID chr1 40 22 0 3 0 25 0 A G 0.12 -THYROID chr1 50 30 0 0 0 30 1 A . 0.0 -THYROID chr1 60 31 0 0 0 31 1 A . 0.0 -THYROID chr1 70 21 0 0 0 21 1 A . 0.0 -THYROID chr1 80 22 0 0 0 22 1 A . 0.0 -THYROID chr1 82 30 0 2 0 32 1 A G 0.0625 -THYROID chr1 84 31 0 1 0 32 1 A G 0.03125 -THYROID chr1 86 21 0 4 0 25 2 A G 0.16 -THYROID chr1 88 22 0 3 0 25 0 A G 0.12 -THYROID chr1 90 30 0 0 0 30 1 A . 0.0 -THYROID chr1 100 31 0 0 0 31 1 A . 0.0 -THYROID chr1 110 21 0 0 0 21 1 A . 0.0 -THYROID chr1 120 22 0 0 0 22 1 A . 0.0 -THYROID chr1 210 20 0 0 0 20 1 A . 0.0 -THYROID chr1 220 22 0 0 0 22 1 A . 0.0 -THYROID chr1 230 182 0 18 0 200 1 A G 0.09 -THYROID chr1 240 180 0 20 0 200 2 A G 0.1 -THYROID chr1 250 178 0 22 0 200 2 A G 0.11 -THYROID chr1 260 106 0 14 0 120 0 A G 0.11667 -THYROID chr1 300 2 0 2 76 80 1 T G 0.025 -THYROID chr1 310 12 0 12 76 100 3 T G 0.12 -THYROID chr1 320 12 0 12 56 80 3 T A 0.15 -THYROID chr1 330 7 0 7 66 80 0 T G 0.0875 -THYROID chr1 340 1 0 1 98 100 1 T G 0.01 -THYROID chr1 350 11 0 11 78 100 0 T A 0.11 -THYROID chr1 400 32 0 8 0 40 2 A G 0.2 -THYROID chr1 410 1 0 2 97 100 0 T G 0.02 -THYROID chr1 420 104 0 0 0 104 1 A . 0.0 -THYROID chr1 430 30 0 0 0 30 1 A . 0.0 -THYROID chr1 440 30 0 0 0 30 1 A . 0.0 -THYROID 27 1234567890 29 0 0 0 29 1 A . 0.0 +THYROID chr1 0 30 0 0 0 30 1 A . 0.0 . +THYROID chr1 10 30 0 2 0 32 1 A G 0.0625 0.0 +THYROID chr1 20 31 0 1 0 32 1 A G 0.03125 2.0 +THYROID chr1 30 21 0 4 0 25 2 A G 0.16 0.08013 +THYROID chr1 40 22 0 3 0 25 0 A G 0.12 1.78571 +THYROID chr1 50 30 0 0 0 30 1 A . 0.0 . +THYROID chr1 60 31 0 0 0 31 1 A . 0.0 . +THYROID chr1 70 21 0 0 0 21 1 A . 0.0 . +THYROID chr1 80 22 0 0 0 22 1 A . 0.0 . +THYROID chr1 82 30 0 2 0 32 1 A G 0.0625 0.0 +THYROID chr1 84 31 0 1 0 32 1 A G 0.03125 2.0 +THYROID chr1 86 21 0 4 0 25 2 A G 0.16 0.08013 +THYROID chr1 88 22 0 3 0 25 0 A G 0.12 1.78571 +THYROID chr1 90 30 0 0 0 30 1 A . 0.0 . +THYROID chr1 100 31 0 0 0 31 1 A . 0.0 . +THYROID chr1 110 21 0 0 0 21 1 A . 0.0 . +THYROID chr1 120 22 0 0 0 22 1 A . 0.0 . +THYROID chr1 210 20 0 0 0 20 1 A . 0.0 . +THYROID chr1 220 22 0 0 0 22 1 A . 0.0 . +THYROID chr1 230 182 0 18 0 200 1 A G 0.09 0.0 +THYROID chr1 240 180 0 20 0 200 2 A G 0.1 0.0 +THYROID chr1 250 178 0 22 0 200 2 A G 0.11 0.0 +THYROID chr1 260 106 0 14 0 120 0 A G 0.11667 2.4 +THYROID chr1 300 2 0 2 76 80 1 T G 0.025 0.0 +THYROID chr1 310 12 0 12 76 100 3 T G 0.12 0.0 +THYROID chr1 320 12 0 12 56 80 3 T A 0.15 0.64394 +THYROID chr1 330 7 0 7 66 80 0 T G 0.0875 1.06247 +THYROID chr1 340 1 0 1 98 100 1 T G 0.01 5.21053 +THYROID chr1 350 11 0 11 78 100 0 T A 0.11 1.25352 +THYROID chr1 400 32 0 8 0 40 2 A G 0.2 0.0 +THYROID chr1 410 1 0 2 97 100 0 T G 0.02 5.5 +THYROID chr1 420 104 0 0 0 104 1 A . 0.0 . +THYROID chr1 430 30 0 0 0 30 1 A . 0.0 . +THYROID chr1 440 30 0 0 0 30 1 A . 0.0 . +THYROID 27 1234567890 29 0 0 0 29 1 A . 0.0 .
--- a/tests/real-mit-s.csv.out Wed Dec 09 11:37:02 2015 -0500 +++ b/tests/real-mit-s.csv.out Tue Aug 23 02:30:56 2016 -0400 @@ -1,12 +1,12 @@ -#SAMPLE CHR POS +A +C +G +T -A -C -G -T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC. -S1 chrM 2000 1 9095 1 0 7 5808 0 1 14913 1 C A 0.00054 -S3 chrM 2000 0 7933 0 4 10 5242 1 2 13192 1 C A 0.00076 -S1 chrM 3000 17399 7 22 8 10567 35 22 4 28064 0 A G 0.00157 -S2 chrM 3000 12535 3 24 2 7937 13 12 2 20528 1 A G 0.00175 -S3 chrM 3000 18981 7 29 6 11286 33 17 4 30363 0 A G 0.00152 -S4 chrM 3000 9254 1 15 2 6240 16 14 1 15543 0 A G 0.00187 -S1 chrM 4000 6134 2 1 3 6124 1 1 1 12267 1 A T 0.00033 -S1 chrM 7000 0 17 1 6216 4 9 2 7529 13778 0 T C 0.00189 -S2 chrM 7000 0 7 2 5104 0 9 4 6288 11414 1 T C 0.0014 -S3 chrM 7000 0 9 0 6446 4 4 10 7506 13979 1 T C 0.00093 -S3 chrM 8000 3 1 5023 1 1 0 5043 2 10074 1 G A 0.0004 +#SAMPLE CHR POS +A +C +G +T -A -C -G -T CVRG ALLELES MAJOR MINOR MAF BIAS +S1 chrM 2000 1 9095 1 0 7 5808 0 1 14913 1 C A 0.00054 2.03879 +S3 chrM 2000 0 7933 0 4 10 5242 1 2 13192 1 C A 0.00076 2.51047 +S1 chrM 3000 17399 7 22 8 10567 35 22 4 28064 0 A G 0.00157 0.51868 +S2 chrM 3000 12535 3 24 2 7937 13 12 2 20528 1 A G 0.00175 0.22864 +S3 chrM 3000 18981 7 29 6 11286 33 17 4 30363 0 A G 0.00152 0.01416 +S4 chrM 3000 9254 1 15 2 6240 16 14 1 15543 0 A G 0.00187 0.33202 +S1 chrM 4000 6134 2 1 3 6124 1 1 1 12267 1 A T 0.00033 0.99804 +S1 chrM 7000 0 17 1 6216 4 9 2 7529 13778 0 T C 0.00189 0.81221 +S2 chrM 7000 0 7 2 5104 0 9 4 6288 11414 1 T C 0.0014 0.04254 +S3 chrM 7000 0 9 0 6446 4 4 10 7506 13979 1 T C 0.00093 0.92561 +S3 chrM 8000 3 1 5023 1 1 0 5043 2 10074 1 G A 0.0004 1.00358
--- a/tests/real-mit.csv.out Wed Dec 09 11:37:02 2015 -0500 +++ b/tests/real-mit.csv.out Tue Aug 23 02:30:56 2016 -0400 @@ -1,12 +1,12 @@ -#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC. -S1 chrM 2000 8 14903 1 1 14913 1 C A 0.00054 -S3 chrM 2000 10 13175 1 6 13192 1 C A 0.00076 -S1 chrM 3000 27966 42 44 12 28064 0 A G 0.00157 -S2 chrM 3000 20472 16 36 4 20528 1 A G 0.00175 -S3 chrM 3000 30267 40 46 10 30363 0 A G 0.00152 -S4 chrM 3000 15494 17 29 3 15543 0 A G 0.00187 -S1 chrM 4000 12258 3 2 4 12267 1 A T 0.00033 -S1 chrM 7000 4 26 3 13745 13778 0 T C 0.00189 -S2 chrM 7000 0 16 6 11392 11414 1 T C 0.0014 -S3 chrM 7000 4 13 10 13952 13979 1 T C 0.00093 -S3 chrM 8000 4 1 10066 3 10074 1 G A 0.0004 +#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS +S1 chrM 2000 8 14903 1 1 14913 1 C A 0.00054 2.03879 +S3 chrM 2000 10 13175 1 6 13192 1 C A 0.00076 2.51047 +S1 chrM 3000 27966 42 44 12 28064 0 A G 0.00157 0.51868 +S2 chrM 3000 20472 16 36 4 20528 1 A G 0.00175 0.22864 +S3 chrM 3000 30267 40 46 10 30363 0 A G 0.00152 0.01416 +S4 chrM 3000 15494 17 29 3 15543 0 A G 0.00187 0.33202 +S1 chrM 4000 12258 3 2 4 12267 1 A T 0.00033 0.99804 +S1 chrM 7000 4 26 3 13745 13778 0 T C 0.00189 0.81221 +S2 chrM 7000 0 16 6 11392 11414 1 T C 0.0014 0.04254 +S3 chrM 7000 4 13 10 13952 13979 1 T C 0.00093 0.92561 +S3 chrM 8000 4 1 10066 3 10074 1 G A 0.0004 1.00358
--- a/tests/real-nofilt.csv.out Wed Dec 09 11:37:02 2015 -0500 +++ b/tests/real-nofilt.csv.out Tue Aug 23 02:30:56 2016 -0400 @@ -1,15 +1,15 @@ -#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MINOR.FREQ.PERC. -THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0 -THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0 -THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0 -THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0 -THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04 -THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704 -THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0 -THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096 -THYROID chr1 246729215 1 0 1 88 90 0 T G 0.01111 -THYROID chr1 246729216 1 0 1 90 92 0 T G 0.01087 -THYROID chr1 246729378 16 7 0 0 23 0 A C 0.30435 -THYROID chr1 246729392 29 0 10 0 39 0 A G 0.25641 -THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0 -THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279 +#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS +THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0 . +THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0 . +THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0 . +THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0 . +THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04 1.36364 +THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704 2.14286 +THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0 . +THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096 1.22996 +THYROID chr1 246729215 1 0 1 88 90 0 T G 0.01111 11.125 +THYROID chr1 246729216 1 0 1 90 92 0 T G 0.01087 9.1 +THYROID chr1 246729378 16 7 0 0 23 0 A C 0.30435 . +THYROID chr1 246729392 29 0 10 0 39 0 A G 0.25641 . +THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0 . +THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279 1.79167
--- a/tests/real.csv.out Wed Dec 09 11:37:02 2015 -0500 +++ b/tests/real.csv.out Tue Aug 23 02:30:56 2016 -0400 @@ -1,11 +1,11 @@ -THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0 -THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0 -THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0 -THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0 -THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04 -THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704 -THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0 -THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096 -THYROID chr1 246729216 1 0 1 90 92 0 T G 0.01087 -THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0 -THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279 +THYROID chr1 246704250 29 0 0 0 29 1 A . 0.0 . +THYROID chr1 246704257 0 0 0 71 71 1 T . 0.0 . +THYROID chr1 246704268 104 0 0 0 104 1 A . 0.0 . +THYROID chr1 246704269 0 0 0 105 105 1 T . 0.0 . +THYROID chr1 246704363 0 72 3 0 75 0 C G 0.04 1.36364 +THYROID chr1 246704437 5 130 0 0 135 0 C A 0.03704 2.14286 +THYROID chr1 246707878 0 0 131 0 131 1 G . 0.0 . +THYROID chr1 246714587 30 0 43 0 73 2 G A 0.41096 1.22996 +THYROID chr1 246729216 1 0 1 90 92 0 T G 0.01087 9.1 +THYROID chr7 91502881 0 0 0 26 26 1 T . 0.0 . +THYROID chr7 91502897 7 36 0 0 43 0 C A 0.16279 1.79167
--- a/tests/run-tests.py Wed Dec 09 11:37:02 2015 -0500 +++ b/tests/run-tests.py Tue Aug 23 02:30:56 2016 -0400 @@ -3,6 +3,7 @@ import sys import subprocess +SCRIPT_NAME = 'allele-counts.py' DATASETS = [ 'artificial', 'artificial-samples', @@ -16,15 +17,52 @@ OUT_EXT = '.csv.out' ARGS_KEY = '##comment="ARGS=' +XML = { + 'tests_start':' <tests>', + 'test_start': ' <test>', + 'input': ' <param name="input" value="tests/%s" />', + 'param': ' <param name="%s" value="%s" />', + 'output': ' <output name="output" file="tests/%s" />', + 'test_end': ' </test>', + 'tests_end': ' </tests>', +} +PARAMS = { + '-f':'freq', + '-c':'covg', + '-H':'header', + '-s':'stranded', + '-n':'nofilt', + '-r':'seed', +} +PARAM_ARG = { + '-f':True, + '-c':True, + '-H':False, + '-s':False, + '-n':False, + '-r':True, +} + def main(): - test_dir = os.path.dirname(os.path.relpath(sys.argv[0])) - if test_dir: - test_dir += os.sep + do_print_xml = False + if len(sys.argv) > 1: + if sys.argv[1] == '-x': + do_print_xml = True + else: + sys.stderr.write("Error: unrecognized option '"+sys.argv[1]+"'\n") + sys.exit(1) + + test_dir = os.path.dirname(os.path.realpath(__file__)) + script_dir = os.path.relpath(os.path.dirname(test_dir)) + test_dir = os.path.relpath(test_dir) + + if do_print_xml: + print XML.get('tests_start') for dataset in DATASETS: - infile = test_dir+dataset+IN_EXT - outfile = test_dir+dataset+OUT_EXT + infile = os.path.join(test_dir, dataset+IN_EXT) + outfile = os.path.join(test_dir, dataset+OUT_EXT) if not os.path.exists(infile): sys.stderr.write("Error: file not found: "+infile+"\n") @@ -34,11 +72,51 @@ continue options = read_options(infile) - script_cmd = 'allele-counts.py '+options+' -i '+infile - bash_cmd = 'diff '+outfile+' <('+script_cmd+')' - # print infile+":" - print script_cmd - subprocess.call(['bash', '-c', bash_cmd]) + if do_print_xml: + print_xml(infile, outfile, options, XML, PARAMS, PARAM_ARG) + else: + run_tests(infile, outfile, options, script_dir) + + if do_print_xml: + print XML.get('tests_end') + + +def run_tests(infile, outfile, options, script_dir): + script_cmd = os.path.join(script_dir, SCRIPT_NAME)+' '+options+' -i '+infile + bash_cmd = 'diff '+outfile+' <('+script_cmd+')' + print script_cmd + subprocess.call(['bash', '-c', bash_cmd]) + + +def print_xml(infile, outfile, options_str, xml, params, param_arg): + infile = os.path.basename(infile) + outfile = os.path.basename(outfile) + + options = options_str.split() # on whitespace + + print xml.get('test_start') + print xml.get('input') % infile + + # read in options one at a time, print <param> line + i = 0 + while i < len(options): + opt = options[i] + if not params.has_key(opt) or not param_arg.has_key(opt): + sys.stderr.write("Error: unknown option '"+opt+"' in ARGS list in file " + +infile+"\n") + sys.exit(1) + # takes argument + if param_arg[opt]: + i+=1 + arg = options[i] + print xml.get('param') % (params[opt], arg) + # no argument (boolean) + else: + print xml.get('param') % (params[opt], 'true') + i+=1 + + print xml.get('output') % outfile + print xml.get('test_end') def read_options(infile):