Repository 'nanopolish_variants'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/nanopolish_variants

Changeset 5:63af3144371a (2020-05-29)
Previous changeset 4:de5b3d8f5b90 (2019-06-23) Next changeset 6:3185b305255a (2021-05-07)
Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/nanopolish commit dff183f4eb2d3df42917ec4fed0fbdb2ea11e19a"
modified:
macros.xml
nanopolish_variants.xml
test-data/methylation_calls.tsv
test-data/polished.fa
test-data/t2-polished.fa
test-data/t2-variants.vcf
test-data/t3_polished.fa
test-data/t3_variants.vcf
test-data/t4_polished.fa
test-data/t4_variants.vcf
test-data/variants.vcf
added:
test-data/all_fasta.loc.test
b
diff -r de5b3d8f5b90 -r 63af3144371a macros.xml
--- a/macros.xml Sun Jun 23 06:04:27 2019 -0400
+++ b/macros.xml Fri May 29 13:29:14 2020 -0400
b
@@ -1,7 +1,8 @@
 <macros>
+    <token name="@VERSION@">0.13.2</token>
     <xml name="requirements">
         <requirements>
-        <requirement type="package" version="0.11.1">nanopolish</requirement>
+        <requirement type="package" version="@VERSION@">nanopolish</requirement>
             <yield/>
         </requirements>
     </xml>
b
diff -r de5b3d8f5b90 -r 63af3144371a nanopolish_variants.xml
--- a/nanopolish_variants.xml Sun Jun 23 06:04:27 2019 -0400
+++ b/nanopolish_variants.xml Fri May 29 13:29:14 2020 -0400
[
@@ -1,4 +1,4 @@
-<tool id="nanopolish_variants" name="Nanopolish variants" version="0.11.1">
+<tool id="nanopolish_variants" name="Nanopolish variants" version="@VERSION@+galaxy0">
     <description>- Find SNPs of basecalled merged Nanopore reads and polishes the consensus sequences</description>
     <macros>
         <import>macros.xml</import>
@@ -167,7 +167,18 @@
             <param name="ref_file" value="draft_single_seq.fa" />
             <param name="w" value="tig00000001:200000-202000" />
             <output name="output_polished" file="polished.fa" />
-            <output name="output_variants" file="variants.vcf"/>
+            <output name="output_variants">
+              <assert_contents>
+                <has_text text="TotalReads" />
+                <has_text text="AlleleCount" />
+                <has_text text="SupportFraction" />
+                <has_text text="200061" />
+                <has_text text="200776" />
+                <has_text text="201588" />
+                <has_text text="tig00000001" />
+                <has_n_lines n="27" />
+              </assert_contents>
+            </output>
         </test>
         <test>
             <param name="input_merged" ftype="fasta" value="reads.fasta" />
@@ -177,7 +188,18 @@
             <param name="ref_file" value="draft_single_seq.fa" />
             <param name="w" value="tig00000001:200000-202000" />
             <output name="output_polished" file="t3_polished.fa" />
-            <output name="output_variants" file="t3_variants.vcf"/>
+            <output name="output_variants">
+              <assert_contents>
+                <has_text text="TotalReads" />
+                <has_text text="AlleleCount" />
+                <has_text text="SupportFraction" />
+                <has_text text="200061" />
+                <has_text text="200776" />
+                <has_text text="201588" />
+                <has_text text="tig00000001" />
+                <has_n_lines n="27" />
+              </assert_contents>
+            </output>
         </test>
         <test>
             <param name="input_merged" ftype="fasta" value="reads.fasta" />
@@ -187,7 +209,18 @@
             <param name="ref_file" value="draft_single_seq.fa" />
             <param name="w" value="tig00000001:200000-202000" />
             <output name="output_polished" file="t4_polished.fa" />
-            <output name="output_variants" file="t4_variants.vcf"/>
+            <output name="output_variants">
+              <assert_contents>
+                <has_text text="TotalReads" />
+                <has_text text="AlleleCount" />
+                <has_text text="SupportFraction" />
+                <has_text text="200061" />
+                <has_text text="200776" />
+                <has_text text="201588" />
+                <has_text text="tig00000001" />
+                <has_n_lines n="27" />
+              </assert_contents>
+            </output>
         </test>
         <test>
             <param name="input_merged" ftype="fasta" value="reads.fasta" />
@@ -203,7 +236,16 @@
             <param name="consensus" value="false" /> 
             <param name="min_flanking_sequence" value="10" />
             <output name="output_polished" file="t2-polished.fa" />
-            <output name="output_variants" file="t2-variants.vcf"/>
+            <output name="output_variants">
+              <assert_contents>
+                <has_text text="TotalReads" />
+                <has_text text="AlleleCount" />
+                <has_text text="SupportFraction" />
+                <has_text text="tig00000001" />
+                <has_text text="198000-202000" />
+                <has_n_lines n="15" />
+              </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
b
diff -r de5b3d8f5b90 -r 63af3144371a test-data/all_fasta.loc.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc.test Fri May 29 13:29:14 2020 -0400
b
@@ -0,0 +1,1 @@
+draft draft draft ${__HERE__}/draft.fa
\ No newline at end of file
b
diff -r de5b3d8f5b90 -r 63af3144371a test-data/methylation_calls.tsv
--- a/test-data/methylation_calls.tsv Sun Jun 23 06:04:27 2019 -0400
+++ b/test-data/methylation_calls.tsv Fri May 29 13:29:14 2020 -0400
b
b'@@ -1,316 +1,4 @@\n chromosome\tstrand\tstart\tend\tread_name\tlog_lik_ratio\tlog_lik_methylated\tlog_lik_unmethylated\tnum_calling_strands\tnum_motifs\tsequence\n-tig00000001\t+\t191153\t191157\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-6.31\t-130.59\t-124.28\t1\t2\tTATTACGACCGCTGA\n-tig00000001\t+\t191181\t191181\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-0.75\t-101.63\t-100.88\t1\t1\tTTTGGCGTTGA\n-tig00000001\t+\t191196\t191215\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-11.45\t-223.51\t-212.05\t1\t3\tCAGTGCGGCAAACAGCGGATAGAACGGGCT\n-tig00000001\t+\t191229\t191229\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-5.42\t-101.51\t-96.09\t1\t1\tGGAGGCGTGCA\n-tig00000001\t+\t191244\t191244\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-2.28\t-82.87\t-80.59\t1\t1\tAAAGGCGTTGT\n-tig00000001\t+\t191255\t191273\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-9.25\t-167.61\t-158.35\t1\t3\tTCATGCGTTTGTGCGGTACATAACGCTGT\n-tig00000001\t+\t191354\t191354\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-4.06\t-98.18\t-94.12\t1\t1\tGATTGCGTAAC\n-tig00000001\t+\t191369\t191374\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-1.70\t-119.73\t-118.02\t1\t2\tATACCCGGATCGTTCT\n-tig00000001\t+\t191399\t191420\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-14.84\t-212.15\t-197.31\t1\t4\tAACAGCGGCGAACAGTCCGCCATCATCGGAAT\n-tig00000001\t+\t191440\t191440\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-2.39\t-97.48\t-95.10\t1\t1\tATAGCCGACCC\n-tig00000001\t+\t191502\t191524\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-7.13\t-196.90\t-189.78\t1\t5\tCTTGGCGGGCGTTATAAATCGTACCGTCGTAGG\n-tig00000001\t+\t191548\t191562\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t2.03\t-164.90\t-166.93\t1\t3\tCACAGCGAGGCGGAAAGGACGAGCC\n-tig00000001\t+\t191576\t191595\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-16.47\t-198.51\t-182.04\t1\t5\tTTGCCCGCTGCGGTGCGACTTCCGCGATCA\n-tig00000001\t+\t191606\t191606\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-0.43\t-106.24\t-105.81\t1\t1\tGCTCACGCAGG\n-tig00000001\t+\t191632\t191632\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-5.03\t-101.77\t-96.74\t1\t1\tCAGTGCGCATC\n-tig00000001\t+\t191646\t191646\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-0.52\t-102.26\t-101.75\t1\t1\tGCCACCGATAA\n-tig00000001\t+\t191659\t191671\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-6.16\t-155.98\t-149.82\t1\t3\tCCATACGGGTTACGTGCCGTTTC\n-tig00000001\t+\t191686\t191686\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-1.12\t-84.86\t-83.73\t1\t1\tTAAACCGGTGT\n-tig00000001\t+\t191710\t191715\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-1.66\t-141.17\t-139.51\t1\t2\tAGCAACGCTCCGTGGT\n-tig00000001\t+\t191741\t191741\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-2.49\t-76.49\t-74.01\t1\t1\tTATTGCGATCA\n-tig00000001\t+\t191764\t191764\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-2.28\t-106.49\t-104.21\t1\t1\tTCACCCGGTGT\n-tig00000001\t+\t191778\t191778\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-8.95\t-111.16\t-102.21\t1\t1\tCAGGGCGTTTA\n-tig00000001\t+\t191820\t191820\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-1.97\t-129.21\t-127.24\t1\t1\tTAAAACGAAGT\n-tig00000001\t+\t191835\t191835\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-0.83\t-101.24\t-100.41\t1\t1\tTTTATCGGCAT\n-tig00000001\t+\t191854\t191854\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-3.07\t-94.72\t-91.65\t1\t1\tTTTGCCGCATG\n-tig00000001\t+\t191878\t191886\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-11.48\t-176.19\t-164.71\t1\t3\tCATGGCGCGCCTTCGTGAA\n-tig00000001\t+\t191901\t191919\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-5.54\t-179.90\t-174.37\t1\t5\tCAGATCGCCCATCGCTACGTCGGCGTTGC\n-tig00000001\t+\t191931\t191944\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-19.26\t-177.78\t-158.53\t1\t3\tCAAGTCGGCACGGAACAGCGCCTC\n-tig00000001\t+\t191977\t192002\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-8.41\t-244.04\t-235.63\t1\t6\tTTCCCCGCCGGATGGCGACGGAAAATTCGCCGCCCT\n-tig00000001\t+\t192027\t192029\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-0.80\t-105.66\t-104.86\t1\t2\tTCAAACGCGCTGT\n-tig00000001\t+\t192053\t192064\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-2.40\t-160.39\t-157.99\t1\t3\tATAATCGACCAGTGCGCGGAAG\n-tig00000001\t+\t192079\t192079\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-7.16\t-99.97\t-92.81\t1\t1\tGTGGGCGCAGT\n-tig00000001\t+\t192107\t192107\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-0.87\t-96.22\t-95.35\t1\t1\tGGCAGCGGTTT\n-tig00000001\t+\t192121\t192121\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-0.94\t-100.94\t-100.00\t1\t1\tACTGGCGACCA\n-tig00000001\t+\t192136\t192146\td57afb7d-903e-46cf-a43d-0e17fb0949d8\t-4.34\t-143.10\t-138.77\t1\t4\tATTCTCGTCGCGATTCGC'..b'7cd320d229a4\t-1.02\t-218.40\t-217.38\t1\t4\tCATCACGCGCATCACCACGCCGGATG\n-tig00000001\t-\t202851\t202854\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-0.97\t-151.89\t-150.93\t1\t2\tGGCATCGGCGTTTT\n-tig00000001\t-\t202884\t202903\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-3.39\t-239.50\t-236.10\t1\t3\tGTTAACGTCAGCCACCGGGCCTGACGGGCT\n-tig00000001\t-\t202916\t202919\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-5.57\t-158.84\t-153.27\t1\t2\tAAATACGCCGGGAA\n-tig00000001\t-\t202940\t202940\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-0.01\t-137.40\t-137.40\t1\t1\tGGGGCCGTCTG\n-tig00000001\t-\t202966\t202985\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-0.70\t-224.43\t-223.73\t1\t4\tCCTGACGGCGATATCACCCGCTACCGTTAT\n-tig00000001\t-\t203017\t203022\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t0.16\t-138.31\t-138.47\t1\t2\tCCCTGCGCAACGGAAG\n-tig00000001\t-\t203035\t203042\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-0.21\t-179.23\t-179.03\t1\t2\tGCCACCGGCAGCCGGAAA\n-tig00000001\t-\t203055\t203068\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t3.76\t-206.83\t-210.60\t1\t3\tCATGACGTGGAGCCGTTACGGTCA\n-tig00000001\t-\t203098\t203098\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t1.17\t-125.07\t-126.23\t1\t1\tTGTTCCGGTTA\n-tig00000001\t-\t203111\t203111\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-0.97\t-148.33\t-147.36\t1\t1\tTAACCCGCTAT\n-tig00000001\t-\t203126\t203126\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-7.02\t-103.83\t-96.81\t1\t1\tATGACCGTTTT\n-tig00000001\t-\t203142\t203155\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-6.70\t-214.24\t-207.54\t1\t4\tGGTGACGGCGGTGCACCGCGAGGA\n-tig00000001\t-\t203177\t203192\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-0.92\t-216.14\t-215.23\t1\t4\tAGTACCGCGCATACGACAGCCGTGGA\n-tig00000001\t-\t203209\t203209\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-2.79\t-123.73\t-120.94\t1\t1\tATTGCCGTGAA\n-tig00000001\t-\t203220\t203220\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-1.36\t-114.22\t-112.86\t1\t1\tAGACACGCAGG\n-tig00000001\t-\t203235\t203237\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-4.35\t-124.77\t-120.42\t1\t2\tTGAAACGCGGTAT\n-tig00000001\t-\t203251\t203257\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-5.88\t-153.96\t-148.09\t1\t3\tTACAACGCCGCCGGTGA\n-tig00000001\t-\t203272\t203272\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-1.72\t-105.43\t-103.71\t1\t1\tACCACCGTCAT\n-tig00000001\t-\t203283\t203287\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t0.27\t-130.94\t-131.20\t1\t2\tTGCCCCGGACGGCAG\n-tig00000001\t-\t203299\t203299\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-0.22\t-118.31\t-118.09\t1\t1\tAGAAACGGGAC\n-tig00000001\t-\t203311\t203316\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-10.31\t-125.90\t-115.59\t1\t2\tCAGTACGATGCGTGGG\n-tig00000001\t-\t203340\t203357\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-11.70\t-169.31\t-157.61\t1\t4\tTACCACGCAGGGCGGTCTGACGCGCAGT\n-tig00000001\t-\t203371\t203393\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-0.98\t-246.59\t-245.61\t1\t4\tGAATACGATGCTGCCGGACGGGTCATCCGCCTG\n-tig00000001\t-\t203410\t203410\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t3.62\t-109.90\t-113.52\t1\t1\tGAAAACGGCAG\n-tig00000001\t-\t203429\t203447\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-12.27\t-220.57\t-208.29\t1\t4\tCCTTCCGTTACGATGTACTCGACCGGCTG\n-tig00000001\t-\t203464\t203486\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-2.89\t-240.35\t-237.45\t1\t4\tGAAACCGGCTTTGACGGCCGCACACAGCGTTAT\n-tig00000001\t-\t203497\t203506\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-3.73\t-154.07\t-150.34\t1\t2\tCACCACGACCTGACCGGCAA\n-tig00000001\t-\t203519\t203524\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-7.21\t-149.81\t-142.60\t1\t2\tTTATCCGCAGCGAGGA\n-tig00000001\t-\t203560\t203609\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-7.10\t-430.95\t-423.84\t1\t8\tTATGACGAAGCAGACCGCCTCACGCACCGCACCGTGAATGGCGAAACCGCAGAGCGGTGG\n-tig00000001\t-\t203623\t203629\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-1.64\t-163.12\t-161.49\t1\t3\tTATGACGAACGCGGCTG\n-tig00000001\t-\t203659\t203676\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-0.40\t-230.02\t-229.62\t1\t3\tATCAGCGAAGGGCACCGGGTGACGGTGC\n-tig00000001\t-\t203705\t203710\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-7.62\t-166.05\t-158.43\t1\t2\tAAGGCCGCCTCGCCAG\n-tig00000001\t-\t203727\t203727\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-0.50\t-190.77\t-190.27\t1\t1\tCCTGACGGTGC\n-tig00000001\t-\t203739\t203745\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t-0.18\t-205.41\t-205.23\t1\t2\tTCATCCGCAGACGAATG\n-tig00000001\t-\t203781\t203788\t0eb7ac67-e215-4aa1-958a-7cd320d229a4\t0.12\t-163.10\t-163.22\t1\t2\tACATGCGTACAACGCACA\n'
b
diff -r de5b3d8f5b90 -r 63af3144371a test-data/polished.fa
--- a/test-data/polished.fa Sun Jun 23 06:04:27 2019 -0400
+++ b/test-data/polished.fa Fri May 29 13:29:14 2020 -0400
b
b'@@ -1,2 +1,2 @@\n >tig00000001\n-AGATGCTTTGAAAGAAACGCAGAATAGATCTCTATGTAATGATATGGAATACTCTGGTATTGTCTGTAAAGATACTAATGGAAAATATTTTGCATCTAAGGCAGAAACTGATAATTTAAGAAAGGAGTCATATCCTCTGAAAAGAAAATGTCCCACAGGTACAGATAGAGTTGCTGCTTATCATACTCACGGTGCAGATAGTCATGGCGATTATGTTGATGAATTTTTTCAAGTAGCGATAAAATCTTGTAAGAAGTAAAGATAATAATCTTGAAGCATTTTATCTCGCAACACCTGATGGACGATTTGAGGCGCTTAATAATAAAGGAGAATATATTTTATCAGAAATAGTGTCCCGGGATTGAGTTCAGTATGCATACCGTATCATGATTAATTTTAGTGCTTTTATTAGTGGGGCCTATAGGAGATTCAATGAAATATAGTTCAATATTTTCGATGCTTTCATTTTTATACTATTTGCCTGTAATGAGACAGCTGTTTACGGTTCTGATGAAAACATTATTTTTATGAGGTATGTGGAAAAATTACATTTAGATAAATACTCTGTTAAAAATACGGTAAAACTGAAACAATGGCGATACAATTAGCTGAAATATATGTTAGGTATCGCTATGGCGAACGGATTGCAGAAGAAAAACCATATTTAATTACGGAACTACCAGATAGTTGGGTTGTTGAGGGAGCAAAGTTACCTTATGAAGTTGCGGGTGGTGTATTTATTATAGAAATTAATAAGAAAAATGGATGTGTTTTGAATTTCCTACATAGTAAATAATGCTGGCGCTGATGGATGCGGATGGAAACATTGCGTGGAGCGGGGAGTATGATGAGTGGGGCAACCAGCTGAATGAAGAGAACCCGCATCACCTGCACCAGCCGTACCGGCTGCCGGGGCAGCAGTATGATAAGGAGTCGGGGCTGTACTACAACCGGAACCGGTACTACGATCCGTTGCAGGGCGGTATATCACCAGGACCCGATAGGGCTGAGGGGGATGGAGTCTGTATGCGTATCCGCTGAATCCGGTGAATGGTATTGATCCATTAGGTTAAGTCCCGCAGATGTAGCGCTAATAAGAAGAAAAGATCAACTAAACCATCAAAGAGCATGGGATATATTATCTGATACTTATGAAGATATGAAGAGATTAAATTTAGGTGGGACTGATCAATTTTTCCATTGTATGGCATTTTGTCGAGTGTCTAAATTAAATGACGCTGGTGTTAGCCGATCGGCGAAAGGGCTGGGTTATGAAAAAGAGATTAGAGATTACGGGTTAAATCTGTTCGGTATGTACGGCAGAAAAGTAAAGCTATCCCATTCTGAAATGATTGAAGATAATAAAAAGACTTGGCTGTAAATGACCATGGGTTGACATGTCCATCAACAACAGATTGCTCAGATAGATGTAGTGATTATATTAATCCAGCATAAAAAACGATAAAGGCTTTACAAGATGCTGGCTATCTCAAGTAATCTATCAAAGATGATAATATTTATTTTTGCTATTATAATCATTGTTGTTTTATGCGTAATTACTTATCTTTATTTATACAAAGATGAATCTCTTGTAAGTAAACATTACATAAACTATATGGCAATACCAGAAAATGATGGAGTTTTTACATGGCTCCCAGATTTTTTCCGCACGTAGCGGTGGATATATCAATATACACAAATGTAGAAGATGATTATTTTTTCTTATTTTCCCTAACAAATGATGATGGGGTAGGTTTAAGAAAACATTGACAGTGAGGGCCAGGGAACAAGTGGCGAAAATCGTATCAAAGAATGATCCAGATACAAAAAAGTGTGGTGTAAATATGGTAAGATACCAGGGCAAGGGATGGTGTAAACCTTTTTTGTTGGTGAAATTAATGTTACGCATTATTTTATAACAAATATTGGAGCTGGATTGCCTGATGCTTGTGCAGAGTAATTGCTTGAATTAAGAGTCTATCCCATATCGAAGTCGTCAACTTCGTAGTGAGGAAAAGTAAAATTCCTGACTGAGAAAAGACATGTCGGCTATTGTGTAAAGCCATATAGCTCAGACGATGAATATCTACTCGTATTCAGTTGTTTATTGAGGGTGAGTTCCGACCCTGAAACAACAAATAAAATGAACAGTCAGAGAGTTTACATAGAATTGCACTGGTCTTTTACGATATCTGACATTGTGTAATACATATTCAGCCATGCATTAATTAAAATGTTACGTGTTTAATGTGAGTCCCTATCTGAAAATAAATAATCCTTCCGGATTAAAATAAATTCTTGCCGGGAAAGAAAGAGGAAATAAACCATTAGCGGAAAACCAGCGGCACGCCAGGGTGACATGCCCCAGTACGGCGGCCCGATAGTCCAGGGTTCAGCGGGGTACTGATAGTCGCGCCGACTGCATCGAGAACTCCATACAACTTTTACGGAACGTCACCTGATAAAATTTGTTCAGTATCGTCTTTGAGATTCTCAAGGTTATGGAGTAACCAGACAGAACGGACACCACTGTCGGAGATAAAACCCCTGTTTACGCAGCTCGTTGCTGGCCCGGTGCTGACCATGTGTCGGGAAAGCGACGGCGTAATCAACAACAGCCTGTTCAGTTGCCTCATCGGTACGGTTCTTAAGGTTAGGAGCGCGGCGACTACGATTAATCTGCGCATCCACACCGCCTTCAGCGACCAGTTCGCGGTAACGATAAAACGTATCACGCGAAACGCCCATGATTTTACAGGCTTTTGATACGTTGCTGAGTTCTTCAGCCAGATTGAGCAAACCGGCTTTGTGTTTGATGACGGGATTGGCAGTATGAAGCATGAGAGTTACCTCTTGTTTTGGATAAGGATTCGACACTCATATCAAAACCGGTAACTCTCAACCTTTCAAGGCCATGTGTCAGATCAAGTCGCGACTAATACAAATACGTCCCTCATTACCGCGCCTTAACCCATTCCGCCACTTCCGCCCACTCACCGCGAAAGACAACTTTTTCCGCTTTTTCTCAAGCTGATAGCGATACATCGGGTCGTAATATTCTTCAAGTAACGGCACCAGCCAGGCCAGATGACCGTCGGTGCTGCCGGTGGTGAGTTGCGTTGTCAGTGCTGCATCCAGCCTTGCAGCCAGTTCGTTATAGCGCTGTAGCCCCAGCCGACGCTTAATCGCCGAAAGTCCGTGATGCAGGTATTCGCAATACTCCTGCCAGCCCTGTTCGTCGCCGTACGCGTGGGTAAAATCATGATGCATACGCAAGAAATACTCTTCGTTCAGGCGCTCAAGACGGATCTCAAACGGATCTTCTACCACCGCAATCGCCGCCTGAGTCATTCGCTCGCGCAGGCATTCCGGCAGGTGATTCGAACCGATCATCCGGCTTTCGTCTTCCAGCACCCACAGGCGCAAATTCTGACGGGCGTCGGTTTTAGCATTTCGGCAGCCAGCAGGTTTTCAAAACTCGCCTGGCTAAGTTGTGGTTGTAACGTGCGACCAAACGCCGAACCGCGATGACGCGCCAACCCTTCCAGATCAACACCGTTCGGCTGTTGCTGCACTAACAGCGTTTTACCGCTGCCGGTACAACCGCCAATCAGCACTATCGGTTTTGTGCCAGTTCAATAGTCGCCTGAATCGCGGTCTGGCGCAGTGCCTTATAACCGCCTTCCACCAGCGGATAATCAATCCCCGCTGCATGCAACCAGCTTTGCACAATATGTGAGCGCTGACCGCCACGGGCGCAGCAGAGAATACCTTGCGGATTTTGCAGGCACGCTGCCCGCCAGGCGTCCATGCGCTGCTGACGAATTTCACCCGCCACCAGTTTATGTCCCAGCGCCAGCGCTGCGTCTGAGCCTTGCTGTTTATAGCAGGTGCCAACGGCGGCGCGTTCATCGTTATTCATTAACGGCAGATTGATAGCGGCGGGCATTGCGCCGTGCTCAAACTCG'..b'GGTAAAGCCTTTGTGAATATTGATGGTGAGCACCTTGAACGAAAATTGTTGTGTTTGATCGGGCATAATTTTCCTGTCTTTGCCTCTTATCTCATTGAAATAGTGTAGTCGGCGTCACAAAAGGTGCGGTCTTACGGAATTTTCCGTAAAGTTCGGTACTCTGAGTAAGTAGAGATAAATTCTTCAGGAGAGAAGCCATGAAGTGGCAACAACGTGTTCGTGTCGCAACGGGTCTAAGTTGCTGGCAGATTATGTTGCATTTACTGGTAGTGGCGCTGCTGGTGGTGGGCTGGATGAGTAAGACTCTGGTTCACGTCGGCGTGGGATTATGCGCACTGTATTGTGTCACGGTAGTGATGATGCTGGTGTTTCAGCGCCACCCCGAGCAACGCTGGCGTGAGGTGGCAGACGTGCTGGAAGAGCTGACCACGACCTGGTATTTTGGCGCAGCGCTGATTGTGCTGTGGCTGTTGTCCCGCGTTCTGGAAAACAACTTTTGCTGGCAATTGCAGGGCTGGCAATCCTTGCCGGCCCGGCGGTAGTGTCTTTGCTGGCGAAAGATAAGAAGTTACATCACCTTACGTCTAAACATCGCGTACGCCGCTGACCCTGTCGTGGCCGTTATCACCAGTAGCGGCCACAAACTATTCCACACAATATCCAGACTCGCATCCTTCAAATAAATCTGCTTGGTAATGTCCGTAAAGTGGCGAATAGGGTTAATCCACGTCAGGTTTTGCAGCCATACCGGCATGTTTTCCACCGGAGAAACGTAACCGGAAAGGAGAATGGCGGGCATCATAAAGACAAACACGCCGATAAACGCCTGCTGTTGTGTTGAACAGAGTGATGAAATCAACAGACCGAATCCCACCAGCGATAAACCATAAATCACCATCGTAAAGTAGAACAGCGCCAGCGATCCGGCGAAGGGATTTGATACGCCCAGATACCAATCGCCAGCACAATGGTGGCCTGGAAGGTGGCGACAATTAACGCCGGTACGGCTTTGCCGATGAAGATCTGCCAGGTGGTGAGCGGCGAAACCAGTAGCTGATCGAGCGTACCTTGTTCACGTTCGCGGGCGACGGAAAGTGAAGTGACGATCATTACGCCGATAGTGGTGATCATGGCGATCAGTGACGGCACCACAAACCATTTGTAGTCGAGATTCGGGTTATACCAGTTGCGTACCACCAGCTCGCTGTTGTTAGGTTTCGGTTTTCCTTCCAGCAGCTCCTGCTGATAATTTTGACGATCTGTTGCAGGTAGTTGGCGGCAATTTGCGCACTGTTGGAGTTACGCCCGTCGAGGATCAACTGCAAAGGCGCGGTCTGGAAGGTATCCAGTTTGCGCGAGAAGTCAGCCGGGAAACGCACCAGTAGTAACGCCTTTGTGTGTCGATGGTTGGGCGGATCTCCTGTGGGCTTTTCAGCAGCAGCACATGAGTAAAGGCGCTGGCGCGGGCAAAACGTTGGGTCAGCTCCACCGAATGCTCGCCGTTATCTTCATCGTAGATGGCGATGGTGGCGTTAGTCACTTCCAGCGTGGCGGCGAACGGGAACAGGATCACCTGAATTAGCACGGGTAAAATCAGAATCGCGCGGGTTTGCGGTTCGCGCAGCAACGACTGCAACTCTTTGCGGATTAACGTCCATAAGCGATGAAACATGCTCTTCTCCTAATCCAGCCGACGTTTGGTTTTCAGCCACGTCAGGCCGATAAACATCACCGCCGAAGCGATCAAAAACAGCACGTTTACCACCAGCACCACTGGAATATTCCCGGCGAGGAACAGGCTTTGCAGGGTGCTGACGAAATAACGAGCGGGAATAATGTACGTCACCGCGCGGATCACCGCGGGCATACTGTCGATCTGAAAAATAAAGCCGGAAAGCATAATCGACGGCAGAAAAGCGGCGTTCAGGGCGACCTGAGCGGCATTGAACTGGTTGCGGGTAATCGTGGAAATCAGCAGCCCCATCCCCAGGGTACTGAGTAAAACAGGCTGGAGATAAAAACAGAATCAGCAGCGACCCGCGATACGGCACGCCGAGAATAAACACTGACACCAGCATACACAGCAACATCGCCAGCATCCCGAGAAAGTAATAAGGGATCAGCTTACACAGCAGCAGTTCCGTGCGGGTAATCTCCGTAGAGAGCAGAGCCTCCATGGTGCCGCGTTCCCATTCTCGCGCCACCACCAGCGAGGTGAGAATCGCGCCGATGACCGTCATGATAATGGTCACCGCACCGGGGATAATGAAGTGCTGGCTAATCGCCGCCGGGTTAAACCAGTAGCGGGTTTGTACATCAATAAGCGGTTCAAAAGTCTGCCCGTTGTCCTCCGCTCGCTGCATTTGCCAGATCTGCCAGATCCCTTCGACATACCCTGTACAAAGTTAGCGGTATTCGGCTCACTGCCGTCGGTGATCACCTGAATCGGTGCGGTGGCGTTGGCGCGCTCCATCTGTTCCGCAAAATCCACCGGAATAACCACCAGACCGCGAATTTTCCCCGCCTGCATTTTGGCGATCAGTTCCTGACGGTTATCGCTGATGGTGGCGTCGATGTAGGGCGAACCGGTCATGGTGTGGTGAAATCCAGCGCCGCTTCGCTACGCTGTTCCAGTAAAATCCCGACCCGCAGCTTGCTGGAGTCGAGGTTAATGCCGTAACCAAAATAAACAGCAGTAGCAGCGGGATCACTACCGCAATCAGCCAGCTACTCGGATCGCGAACGATCTGCCGCGTCTCTTTAACGCACAGCGCCCGTACGCGACGCCAGGACAGGATCGGGTTACTCATTGCTATGCTCCTTATCCCAGTCGTGGATCAACTGAATAAAGGCTTGCTCCATGGTGGGATCGGGTTGCTCATCGTTAGCCGACTGTGCTTTCAAATCGTCCGGCGTGCCGCTGGCGATTAATTTCCCGCGGTACACCAGGCCGATGCGGTCGCAATATTCCGCTTCATCCATAAAGTGGGTGGTGACCATCACCGTGACGCCTTTCTCTACCATGCTGTTGATGTGCAGCCAAAATTCACGGCGGGTGAGGGGTCAACGCCGGAAGTCGGTTCGTCGAGAAACAGAATGTCCGGTTCATGCATCAGCGAACAGGCCAGCGCCAGCCGCTGTTTAAAACCTAATGGCAGTTCATCGGTGGCGTGGGAGGCGATACTTTTCAGGCCGAACGCCTCGCTCATGCGGGAGATTTTTCGTTCTGCGCCCGACCGCGTAAGCCATACACACCAGAGAAAAAGCGTAAATTCTGTTCGACCGTCAGGTTACCGTAGAGCGAAAATTTTTGCGCCATATAGCCGAGATGCTGGCGCGCTTTACCGGAACTCTCTTTCAGATCCATCCCCAGCACCAGCGCCTGGCCGGAAGTCGGCACCAGCAAACCGCACATCATCTTAAAGGTGGTCGATTTACCCGCGCCGTTTGGCCCCAGCAAACCAAAAATCTCCCCACGTTTAACGGCAAAGTTGACGTGATCGGTGGCGGCAAAATCCCCAAATTTCTTGGTCAGTTCTTTCGCTTCGATCACCGTCTCGCCGGGTGTGCCTTCTACCGTATGTAATATTGCGCCCAGCGGCGATTCCGAGGTTCCGGCACCGCCCAGCAAATCAATAAACGCATCTTCAAAACGCGGCGTAGTTTCGTTGATGTTGATTTCCGGCATCCCGTCGGCATGGCGAATATCGTCTGGTGTGGCCTCTTTTTGAGGATCAGACGTACCGATTTCCCCTGAATCATGCCGTCGCTGACCTGCGGCAGTTTCAAGGCGCGTTGCAACAGTTTGCGGTTGCCCTCGTGTGGACTGGTCATCAGAAAGCTGCGTCCGGCCATGGTTTGTGTCAGGGCTTTTGGTTCTCCCTGATACAGCAACTCGCCTTCGTTCATCAGTAACACGTCACGGCACTGCTCGGCTTCGTCGAGATACGAGGTACTCCAGAGGATTAACATCCCTTCGCCCGCCAGCTCATGCACCA\n'
b
diff -r de5b3d8f5b90 -r 63af3144371a test-data/t2-polished.fa
--- a/test-data/t2-polished.fa Sun Jun 23 06:04:27 2019 -0400
+++ b/test-data/t2-polished.fa Fri May 29 13:29:14 2020 -0400
b
b'@@ -1,2 +1,2 @@\n >tig00000001\n-AGATGCTTTGAAAGAAACGCAGAATAGATCTCTATGTAATGATATGGAATACTCTGGTATTGTCTGTAAAGATACTAATGGAAAATATTTTGCATCTAAGGCAGAAACTGATAATTTAAGAAAGGAGTCATATCCTCTGAAAAGAAAATGTCCCACAGGTACAGATAGAGTTGCTGCTTATCATACTCACGGTGCAGATAGTCATGGCGATTATGTTGATGAATTTTTTCAAGTAGCGATAAAATCTTGTAAGAAGTAAAGATAATAATCTTGAAGCATTTTATCTCGCAACACCTGATGGACGATTTGAGGCGCTTAATAATAAAGGAGAATATATTTTATCAGAAATAGTGTCCCGGGATTGAGTTCAGTATGCATACCGTATCATGATTAATTTTAGTGCTTTTATTAGTGGGGCCTATAGGAGATTCAATGAAATATAGTTCAATATTTTCGATGCTTTCATTTTTATACTATTTGCCTGTAATGAGACAGCTGTTTACGGTTCTGATGAAAACATTATTTTTATGAGGTATGTGGAAAAATTACATTTAGATAAATACTCTGTTAAAAATACGGTAAAACTGAAACAATGGCGATACAATTAGCTGAAATATATGTTAGGTATCGCTATGGCGAACGGATTGCAGAAGAAAAACCATATTTAATTACGGAACTACCAGATAGTTGGGTTGTTGAGGGAGCAAAGTTACCTTATGAAGTTGCGGGTGGTGTATTTATTATAGAAATTAATAAGAAAAATGGATGTGTTTTGAATTTCCTACATAGTAAATAATGCTGGCGCTGATGGATGCGGATGGAAACATTGCGTGGAGCGGGGAGTATGATGAGTGGGGCAACCAGCTGAATGAAGAGAACCCGCATCACCTGCACCAGCCGTACCGGCTGCCGGGGCAGCAGTATGATAAGGAGTCGGGGCTGTACTACAACCGGAACCGGTACTACGATCCGTTGCAGGGCGGTATATCACCAGGACCCGATAGGGCTGAGGGGGATGGAGTCTGTATGCGTATCCGCTGAATCCGGTGAATGGTATTGATCCATTAGGTTAAGTCCCGCAGATGTAGCGCTAATAAGAAGAAAAGATCAACTAAACCATCAAAGAGCATGGGATATATTATCTGATACTTATGAAGATATGAAGAGATTAAATTTAGGTGGGACTGATCAATTTTTCCATTGTATGGCATTTTGTCGAGTGTCTAAATTAAATGACGCTGGTGTTAGCCGATCGGCGAAAGGGCTGGGTTATGAAAAAGAGATTAGAGATTACGGGTTAAATCTGTTCGGTATGTACGGCAGAAAAGTAAAGCTATCCCATTCTGAAATGATTGAAGATAATAAAAAGACTTGGCTGTAAATGACCATGGGTTGACATGTCCATCAACAACAGATTGCTCAGATAGATGTAGTGATTATATTAATCCAGCATAAAAAACGATAAAGGCTTTACAAGATGCTGGCTATCTCAAGTAATCTATCAAAGATGATAATATTTATTTTTGCTATTATAATCATTGTTGTTTTATGCGTAATTACTTATCTTTATTTATACAAAGATGAATCTCTTGTAAGTAAACATTACATAAACTATATGGCAATACCAGAAAATGATGGAGTTTTTACATGGCTCCCAGATTTTTTCCGCACGTAGCGGTGGATATATCAATATACACAAATGTAGAAGATGATTATTTTTTCTTATTTTCCCTAACAAATGATGATGGGGTAGGTTTAAGAAAACATTGACAGTGAGGGCCAGGGAACAAGTGGCGAAAATCGTATCAAAGAATGATCCAGATACAAAAAAGTGTGGTGTAAATATGGTAAGATACCAGGGCAAGGGATGGTGTAAACCTTTTTTGTTGGTGAAATTAATGTTACGCATTATTTTATAACAAATATTGGAGCTGGATTGCCTGATGCTTGTGCAGAGTAATTGCTTGAATTAAGAGTCTATCCCATATCGAAGTCGTCAACTTCGTAGTGAGGAAAAGTAAAATTCCTGACTGAGAAAAGACATGTCGGCTATTGTGTAAAGCCATATAGCTCAGACGATGAATATCTACTCGTATTCAGTTGTTTATTGAGGGTGAGTTCCGACCCTGAAACAACAAATAAAATGAACAGTCAGAGAGTTTACATAGAATTGCACTGGTCTTTTACGATATCTGACATTGTGTAATACATATTCAGCCATGCATTAATTAAAATGTTACGTGTTTAATGTGAGTCCCTATCTGAAAATAAATAATCCTTCCGGATTAAAATAAATTCTTGCCGGGAAAGAAAGAGGAAATAAACCATTAGCGGAAAACCAGCGGCACGCCAGGGTGACATGCCCCAGTACGGCGGCCCGATAGTCCAGGGTTCAGCGGGGTACTGATAGTCGCGCCGACTGCATCGAGAACTCCATACAACTTTTACGGAACGTCACCTGATAAAATTTGTTCAGTATCGTCTTTGAGATTCTCAAGGTTATGGAGTAACCAGACAGAACGGACACCACTGTCGGAGATAAAACCCCTGTTTACGCAGCTCGTTGCTGGCCCGGTGCTGACCATGTGTCGGGAAAGCGACGGCGTAATCAACAACAGCCTGTTCAGTTGCCTCATCGGTACGGTTCTTAAGGTTAGGAGCGCGGCGACTACGATTAATCTGCGCATCCACACCGCCTTCAGCGACCAGTTCGCGGTAACGATAAAACGTATCACGCGAAACGCCCATGATTTTACAGGCTTTTGATACGTTGCTGAGTTCTTCAGCCAGATTGAGCAAACCGGCTTTGTGTTTGATGACGGGATTGGCAGTATGAAGCATGAGAGTTACCTCTTGTTTTGGATAAGGATTCGACACTCATATCAAAACCGGTAACTCTCAACCTTTCAAGGCCATGTGTCAGATCAAGTCGCGACTAATACAAATACGTCCCTCATTACCGCGCCTTAACCCATTCCGCCACTTCCGCCCACTCACCGCGAAAGACAACTTTTTCCGCTTTTTCTCAAGCTGATAGCGATACATCGGGTCGTAATATTCTTCAAGTAACGGCACCAGCCAGGCCAGATGACCGTCGGTGCTGCCGGTGGTGAGTTGCGTTGTCAGTGCTGCATCCAGCCTTGCAGCCAGTTCGTTATAGCGCTGTAGCCCCAGCCGACGCTTAATCGCCGAAAGTCCGTGATGCAGGTATTCGCAATACTCCTGCCAGCCCTGTTCGTCGCCGTACGCGTGGGTAAAATCATGATGCATACGCAAGAAATACTCTTCGTTCAGGCGCTCAAGACGGATCTCAAACGGATCTTCTACCACCGCAATCGCCGCCTGAGTCATTCGCTCGCGCAGGCATTCCGGCAGGTGATTCGAACCGATCATCCGGCTTTCGTCTTCCAGCACCCACAGGCGCAAATTCTGACGGGCGTCGGTTTTAGCATTTCGGCAGCCAGCAGGTTTTCAAAACTCGCCTGGCTAAGTTGTGGTTGTAACGTGCGACCAAACGCCGAACCGCGATGACGCGCCAACCCTTCCAGATCAACACCGTTCGGCTGTTGCTGCACTAACAGCGTTTTACCGCTGCCGGTACAACCGCCAATCAGCACTATCGGTTTTGTGCCAGTTCAATAGTCGCCTGAATCGCGGTCTGGCGCAGTGCCTTATAACCGCCTTCCACCAGCGGATAATCAATCCCCGCTGCATGCAACCAGCTTTGCACAATATGTGAGCGCTGACCGCCACGGGCGCAGCAGAGAATACCTTGCGGATTTTGCAGGCACGCTGCCCGCCAGGCGTCCATGCGCTGCTGACGAATTTCACCCGCCACCAGTTTATGTCCCAGCGCCAGCGCTGCGTCTGAGCCTTGCTGTTTATAGCAGGTGCCAACGGCGGCGCGTTCATCGTTATTCATTAACGGCAGATTGATAGCGGCGGGCATTGCGCCGTGCTCAAACTCG'..b'GGTAAAGCCTTTGTGAATATTGATGGTGAGCACCTTGAACGAAAATTGTTGTGTTTGATCGGGCATAATTTTCCTGTCTTTGCCTCTTATCTCATTGAAATAGTGTAGTCGGCGTCACAAAAGGTGCGGTCTTACGGAATTTTCCGTAAAGTTCGGTACTCTGAGTAAGTAGAGATAAATTCTTCAGGAGAGAAGCCATGAAGTGGCAACAACGTGTTCGTGTCGCAACGGGTCTAAGTTGCTGGCAGATTATGTTGCATTTACTGGTAGTGGCGCTGCTGGTGGTGGGCTGGATGAGTAAGACTCTGGTTCACGTCGGCGTGGGATTATGCGCACTGTATTGTGTCACGGTAGTGATGATGCTGGTGTTTCAGCGCCACCCCGAGCAACGCTGGCGTGAGGTGGCAGACGTGCTGGAAGAGCTGACCACGACCTGGTATTTTGGCGCAGCGCTGATTGTGCTGTGGCTGTTGTCCCGCGTTCTGGAAAACAACTTTTGCTGGCAATTGCAGGGCTGGCAATCCTTGCCGGCCCGGCGGTAGTGTCTTTGCTGGCGAAAGATAAGAAGTTACATCACCTTACGTCTAAACATCGCGTACGCCGCTGACCCTGTCGTGGCCGTTATCACCAGTAGCGGCCACAAACTATTCCACACAATATCCAGACTCGCATCCTTCAAATAAATCTGCTTGGTAATGTCCGTAAAGTGGCGAATAGGGTTAATCCACGTCAGGTTTTGCAGCCATACCGGCATGTTTTCCACCGGAGAAACGTAACCGGAAAGGAGAATGGCGGGCATCATAAAGACAAACACGCCGATAAACGCCTGCTGTTGTGTTGAACAGAGTGATGAAATCAACAGACCGAATCCCACCAGCGATAAACCATAAATCACCATCGTAAAGTAGAACAGCGCCAGCGATCCGGCGAAGGGATTTGATACGCCCAGATACCAATCGCCAGCACAATGGTGGCCTGGAAGGTGGCGACAATTAACGCCGGTACGGCTTTGCCGATGAAGATCTGCCAGGTGGTGAGCGGCGAAACCAGTAGCTGATCGAGCGTACCTTGTTCACGTTCGCGGGCGACGGAAAGTGAAGTGACGATCATTACGCCGATAGTGGTGATCATGGCGATCAGTGACGGCACCACAAACCATTTGTAGTCGAGATTCGGGTTATACCAGTTGCGTACCACCAGCTCGCTGTTGTTAGGTTTCGGTTTTCCTTCCAGCAGCTCCTGCTGATAATTTTGACGATCTGTTGCAGGTAGTTGGCGGCAATTTGCGCACTGTTGGAGTTACGCCCGTCGAGGATCAACTGCAAAGGCGCGGTCTGGAAGGTATCCAGTTTGCGCGAGAAGTCAGCCGGGAAACGCACCAGTAGTAACGCCTTTGTGTGTCGATGGTTGGGCGGATCTCCTGTGGGCTTTTCAGCAGCAGCACATGAGTAAAGGCGCTGGCGCGGGCAAAACGTTGGGTCAGCTCCACCGAATGCTCGCCGTTATCTTCATCGTAGATGGCGATGGTGGCGTTAGTCACTTCCAGCGTGGCGGCGAACGGGAACAGGATCACCTGAATTAGCACGGGTAAAATCAGAATCGCGCGGGTTTGCGGTTCGCGCAGCAACGACTGCAACTCTTTGCGGATTAACGTCCATAAGCGATGAAACATGCTCTTCTCCTAATCCAGCCGACGTTTGGTTTTCAGCCACGTCAGGCCGATAAACATCACCGCCGAAGCGATCAAAAACAGCACGTTTACCACCAGCACCACTGGAATATTCCCGGCGAGGAACAGGCTTTGCAGGGTGCTGACGAAATAACGAGCGGGAATAATGTACGTCACCGCGCGGATCACCGCGGGCATACTGTCGATCTGAAAAATAAAGCCGGAAAGCATAATCGACGGCAGAAAAGCGGCGTTCAGGGCGACCTGAGCGGCATTGAACTGGTTGCGGGTAATCGTGGAAATCAGCAGCCCCATCCCCAGGGTACTGAGTAAAACAGGCTGGAGATAAAAACAGAATCAGCAGCGACCCGCGATACGGCACGCCGAGAATAAACACTGACACCAGCATACACAGCAACATCGCCAGCATCCCGAGAAAGTAATAAGGGATCAGCTTACACAGCAGCAGTTCCGTGCGGGTAATCTCCGTAGAGAGCAGAGCCTCCATGGTGCCGCGTTCCCATTCTCGCGCCACCACCAGCGAGGTGAGAATCGCGCCGATGACCGTCATGATAATGGTCACCGCACCGGGGATAATGAAGTGCTGGCTAATCGCCGCCGGGTTAAACCAGTAGCGGGTTTGTACATCAATAAGCGGTTCAAAAGTCTGCCCGTTGTCCTCCGCTCGCTGCATTTGCCAGATCTGCCAGATCCCTTCGACATACCCTGTACAAAGTTAGCGGTATTCGGCTCACTGCCGTCGGTGATCACCTGAATCGGTGCGGTGGCGTTGGCGCGCTCCATCTGTTCCGCAAAATCCACCGGAATAACCACCAGACCGCGAATTTTCCCCGCCTGCATTTTGGCGATCAGTTCCTGACGGTTATCGCTGATGGTGGCGTCGATGTAGGGCGAACCGGTCATGGTGTGGTGAAATCCAGCGCCGCTTCGCTACGCTGTTCCAGTAAAATCCCGACCCGCAGCTTGCTGGAGTCGAGGTTAATGCCGTAACCAAAATAAACAGCAGTAGCAGCGGGATCACTACCGCAATCAGCCAGCTACTCGGATCGCGAACGATCTGCCGCGTCTCTTTAACGCACAGCGCCCGTACGCGACGCCAGGACAGGATCGGGTTACTCATTGCTATGCTCCTTATCCCAGTCGTGGATCAACTGAATAAAGGCTTGCTCCATGGTGGGATCGGGTTGCTCATCGTTAGCCGACTGTGCTTTCAAATCGTCCGGCGTGCCGCTGGCGATTAATTTCCCGCGGTACACCAGGCCGATGCGGTCGCAATATTCCGCTTCATCCATAAAGTGGGTGGTGACCATCACCGTGACGCCTTTCTCTACCATGCTGTTGATGTGCAGCCAAAATTCACGGCGGGTGAGGGGTCAACGCCGGAAGTCGGTTCGTCGAGAAACAGAATGTCCGGTTCATGCATCAGCGAACAGGCCAGCGCCAGCCGCTGTTTAAAACCTAATGGCAGTTCATCGGTGGCGTGGGAGGCGATACTTTTCAGGCCGAACGCCTCGCTCATGCGGGAGATTTTTCGTTCTGCGCCCGACCGCGTAAGCCATACACACCAGAGAAAAAGCGTAAATTCTGTTCGACCGTCAGGTTACCGTAGAGCGAAAATTTTTGCGCCATATAGCCGAGATGCTGGCGCGCTTTACCGGAACTCTCTTTCAGATCCATCCCCAGCACCAGCGCCTGGCCGGAAGTCGGCACCAGCAAACCGCACATCATCTTAAAGGTGGTCGATTTACCCGCGCCGTTTGGCCCCAGCAAACCAAAAATCTCCCCACGTTTAACGGCAAAGTTGACGTGATCGGTGGCGGCAAAATCCCCAAATTTCTTGGTCAGTTCTTTCGCTTCGATCACCGTCTCGCCGGGTGTGCCTTCTACCGTATGTAATATTGCGCCCAGCGGCGATTCCGAGGTTCCGGCACCGCCCAGCAAATCAATAAACGCATCTTCAAAACGCGGCGTAGTTTCGTTGATGTTGATTTCCGGCATCCCGTCGGCATGGCGAATATCGTCTGGTGTGGCCTCTTTTTGAGGATCAGACGTACCGATTTCCCCTGAATCATGCCGTCGCTGACCTGCGGCAGTTTCAAGGCGCGTTGCAACAGTTTGCGGTTGCCCTCGTGTGGACTGGTCATCAGAAAGCTGCGTCCGGCCATGGTTTGTGTCAGGGCTTTTGGTTCTCCCTGATACAGCAACTCGCCTTCGTTCATCAGTAACACGTCACGGCACTGCTCGGCTTCGTCGAGATACGAGGTACTCCAGAGGATTAACATCCCTTCGCCCGCCAGCTCATGCACCA\n'
b
diff -r de5b3d8f5b90 -r 63af3144371a test-data/t2-variants.vcf
--- a/test-data/t2-variants.vcf Sun Jun 23 06:04:27 2019 -0400
+++ b/test-data/t2-variants.vcf Fri May 29 13:29:14 2020 -0400
b
@@ -2,10 +2,14 @@
 ##nanopolish_window=tig00000001:198000-202000
 ##INFO=<ID=TotalReads,Number=1,Type=Integer,Description="The number of event-space reads used to call the variant">
 ##INFO=<ID=SupportFraction,Number=1,Type=Float,Description="The fraction of event-space reads that support the variant">
+##INFO=<ID=SupportFractionByStrand,Number=2,Type=Float,Description="Fraction of event-space reads that support the variant for each strand">
 ##INFO=<ID=BaseCalledReadsWithVariant,Number=1,Type=Integer,Description="The number of base-space reads that support the variant">
 ##INFO=<ID=BaseCalledFraction,Number=1,Type=Float,Description="The fraction of base-space reads that support the variant">
 ##INFO=<ID=AlleleCount,Number=1,Type=Integer,Description="The inferred number of copies of the allele">
-##INFO=<ID=SupportFractionByBase,Number=4,Type=Integer,Description="The fraction of reads supporting A,C,G,T at this position">
+##INFO=<ID=StrandSupport,Number=4,Type=Integer,Description="Number of reads supporting the REF and ALT allele, by strand">
+##INFO=<ID=StrandFisherTest,Number=1,Type=Integer,Description="Strand bias fisher test">
+##INFO=<ID=SOR,Number=1,Type=Float,Description="StrandOddsRatio test from GATK">
+##INFO=<ID=RefContext,Number=1,Type=String,Description="The reference sequence context surrounding the variant call">
+##INFO=<ID=SupportFractionByBase,Number=4,Type=Float,Description="The fraction of reads supporting A,C,G,T at this position">
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample
-tig00000001 198435 . G A 22.3 PASS BaseCalledReadsWithVariant=10;BaseCalledFraction=0.238095;TotalReads=42;AlleleCount=1;SupportFraction=0.388965;SupportFractionByBase=0.210,0.030,0.718,0.041 GT 0/1
b
diff -r de5b3d8f5b90 -r 63af3144371a test-data/t3_polished.fa
--- a/test-data/t3_polished.fa Sun Jun 23 06:04:27 2019 -0400
+++ b/test-data/t3_polished.fa Fri May 29 13:29:14 2020 -0400
b
b'@@ -1,2 +1,2 @@\n >tig00000001\n-AGATGCTTTGAAAGAAACGCAGAATAGATCTCTATGTAATGATATGGAATACTCTGGTATTGTCTGTAAAGATACTAATGGAAAATATTTTGCATCTAAGGCAGAAACTGATAATTTAAGAAAGGAGTCATATCCTCTGAAAAGAAAATGTCCCACAGGTACAGATAGAGTTGCTGCTTATCATACTCACGGTGCAGATAGTCATGGCGATTATGTTGATGAATTTTTTCAAGTAGCGATAAAATCTTGTAAGAAGTAAAGATAATAATCTTGAAGCATTTTATCTCGCAACACCTGATGGACGATTTGAGGCGCTTAATAATAAAGGAGAATATATTTTATCAGAAATAGTGTCCCGGGATTGAGTTCAGTATGCATACCGTATCATGATTAATTTTAGTGCTTTTATTAGTGGGGCCTATAGGAGATTCAATGAAATATAGTTCAATATTTTCGATGCTTTCATTTTTATACTATTTGCCTGTAATGAGACAGCTGTTTACGGTTCTGATGAAAACATTATTTTTATGAGGTATGTGGAAAAATTACATTTAGATAAATACTCTGTTAAAAATACGGTAAAACTGAAACAATGGCGATACAATTAGCTGAAATATATGTTAGGTATCGCTATGGCGAACGGATTGCAGAAGAAAAACCATATTTAATTACGGAACTACCAGATAGTTGGGTTGTTGAGGGAGCAAAGTTACCTTATGAAGTTGCGGGTGGTGTATTTATTATAGAAATTAATAAGAAAAATGGATGTGTTTTGAATTTCCTACATAGTAAATAATGCTGGCGCTGATGGATGCGGATGGAAACATTGCGTGGAGCGGGGAGTATGATGAGTGGGGCAACCAGCTGAATGAAGAGAACCCGCATCACCTGCACCAGCCGTACCGGCTGCCGGGGCAGCAGTATGATAAGGAGTCGGGGCTGTACTACAACCGGAACCGGTACTACGATCCGTTGCAGGGCGGTATATCACCAGGACCCGATAGGGCTGAGGGGGATGGAGTCTGTATGCGTATCCGCTGAATCCGGTGAATGGTATTGATCCATTAGGTTAAGTCCCGCAGATGTAGCGCTAATAAGAAGAAAAGATCAACTAAACCATCAAAGAGCATGGGATATATTATCTGATACTTATGAAGATATGAAGAGATTAAATTTAGGTGGGACTGATCAATTTTTCCATTGTATGGCATTTTGTCGAGTGTCTAAATTAAATGACGCTGGTGTTAGCCGATCGGCGAAAGGGCTGGGTTATGAAAAAGAGATTAGAGATTACGGGTTAAATCTGTTCGGTATGTACGGCAGAAAAGTAAAGCTATCCCATTCTGAAATGATTGAAGATAATAAAAAGACTTGGCTGTAAATGACCATGGGTTGACATGTCCATCAACAACAGATTGCTCAGATAGATGTAGTGATTATATTAATCCAGCATAAAAAACGATAAAGGCTTTACAAGATGCTGGCTATCTCAAGTAATCTATCAAAGATGATAATATTTATTTTTGCTATTATAATCATTGTTGTTTTATGCGTAATTACTTATCTTTATTTATACAAAGATGAATCTCTTGTAAGTAAACATTACATAAACTATATGGCAATACCAGAAAATGATGGAGTTTTTACATGGCTCCCAGATTTTTTCCGCACGTAGCGGTGGATATATCAATATACACAAATGTAGAAGATGATTATTTTTTCTTATTTTCCCTAACAAATGATGATGGGGTAGGTTTAAGAAAACATTGACAGTGAGGGCCAGGGAACAAGTGGCGAAAATCGTATCAAAGAATGATCCAGATACAAAAAAGTGTGGTGTAAATATGGTAAGATACCAGGGCAAGGGATGGTGTAAACCTTTTTTGTTGGTGAAATTAATGTTACGCATTATTTTATAACAAATATTGGAGCTGGATTGCCTGATGCTTGTGCAGAGTAATTGCTTGAATTAAGAGTCTATCCCATATCGAAGTCGTCAACTTCGTAGTGAGGAAAAGTAAAATTCCTGACTGAGAAAAGACATGTCGGCTATTGTGTAAAGCCATATAGCTCAGACGATGAATATCTACTCGTATTCAGTTGTTTATTGAGGGTGAGTTCCGACCCTGAAACAACAAATAAAATGAACAGTCAGAGAGTTTACATAGAATTGCACTGGTCTTTTACGATATCTGACATTGTGTAATACATATTCAGCCATGCATTAATTAAAATGTTACGTGTTTAATGTGAGTCCCTATCTGAAAATAAATAATCCTTCCGGATTAAAATAAATTCTTGCCGGGAAAGAAAGAGGAAATAAACCATTAGCGGAAAACCAGCGGCACGCCAGGGTGACATGCCCCAGTACGGCGGCCCGATAGTCCAGGGTTCAGCGGGGTACTGATAGTCGCGCCGACTGCATCGAGAACTCCATACAACTTTTACGGAACGTCACCTGATAAAATTTGTTCAGTATCGTCTTTGAGATTCTCAAGGTTATGGAGTAACCAGACAGAACGGACACCACTGTCGGAGATAAAACCCCTGTTTACGCAGCTCGTTGCTGGCCCGGTGCTGACCATGTGTCGGGAAAGCGACGGCGTAATCAACAACAGCCTGTTCAGTTGCCTCATCGGTACGGTTCTTAAGGTTAGGAGCGCGGCGACTACGATTAATCTGCGCATCCACACCGCCTTCAGCGACCAGTTCGCGGTAACGATAAAACGTATCACGCGAAACGCCCATGATTTTACAGGCTTTTGATACGTTGCTGAGTTCTTCAGCCAGATTGAGCAAACCGGCTTTGTGTTTGATGACGGGATTGGCAGTATGAAGCATGAGAGTTACCTCTTGTTTTGGATAAGGATTCGACACTCATATCAAAACCGGTAACTCTCAACCTTTCAAGGCCATGTGTCAGATCAAGTCGCGACTAATACAAATACGTCCCTCATTACCGCGCCTTAACCCATTCCGCCACTTCCGCCCACTCACCGCGAAAGACAACTTTTTCCGCTTTTTCTCAAGCTGATAGCGATACATCGGGTCGTAATATTCTTCAAGTAACGGCACCAGCCAGGCCAGATGACCGTCGGTGCTGCCGGTGGTGAGTTGCGTTGTCAGTGCTGCATCCAGCCTTGCAGCCAGTTCGTTATAGCGCTGTAGCCCCAGCCGACGCTTAATCGCCGAAAGTCCGTGATGCAGGTATTCGCAATACTCCTGCCAGCCCTGTTCGTCGCCGTACGCGTGGGTAAAATCATGATGCATACGCAAGAAATACTCTTCGTTCAGGCGCTCAAGACGGATCTCAAACGGATCTTCTACCACCGCAATCGCCGCCTGAGTCATTCGCTCGCGCAGGCATTCCGGCAGGTGATTCGAACCGATCATCCGGCTTTCGTCTTCCAGCACCCACAGGCGCAAATTCTGACGGGCGTCGGTTTTAGCATTTCGGCAGCCAGCAGGTTTTCAAAACTCGCCTGGCTAAGTTGTGGTTGTAACGTGCGACCAAACGCCGAACCGCGATGACGCGCCAACCCTTCCAGATCAACACCGTTCGGCTGTTGCTGCACTAACAGCGTTTTACCGCTGCCGGTACAACCGCCAATCAGCACTATCGGTTTTGTGCCAGTTCAATAGTCGCCTGAATCGCGGTCTGGCGCAGTGCCTTATAACCGCCTTCCACCAGCGGATAATCAATCCCCGCTGCATGCAACCAGCTTTGCACAATATGTGAGCGCTGACCGCCACGGGCGCAGCAGAGAATACCTTGCGGATTTTGCAGGCACGCTGCCCGCCAGGCGTCCATGCGCTGCTGACGAATTTCACCCGCCACCAGTTTATGTCCCAGCGCCAGCGCTGCGTCTGAGCCTTGCTGTTTATAGCAGGTGCCAACGGCGGCGCGTTCATCGTTATTCATTAACGGCAGATTGATAGCGGCGGGCATTGCGCCGTGCTCAAACTCG'..b'GGTAAAGCCTTTGTGAATATTGATGGTGAGCACCTTGAACGAAAATTGTTGTGTTTGATCGGGCATAATTTTCCTGTCTTTGCCTCTTATCTCATTGAAATAGTGTAGTCGGCGTCACAAAAGGTGCGGTCTTACGGAATTTTCCGTAAAGTTCGGTACTCTGAGTAAGTAGAGATAAATTCTTCAGGAGAGAAGCCATGAAGTGGCAACAACGTGTTCGTGTCGCAACGGGTCTAAGTTGCTGGCAGATTATGTTGCATTTACTGGTAGTGGCGCTGCTGGTGGTGGGCTGGATGAGTAAGACTCTGGTTCACGTCGGCGTGGGATTATGCGCACTGTATTGTGTCACGGTAGTGATGATGCTGGTGTTTCAGCGCCACCCCGAGCAACGCTGGCGTGAGGTGGCAGACGTGCTGGAAGAGCTGACCACGACCTGGTATTTTGGCGCAGCGCTGATTGTGCTGTGGCTGTTGTCCCGCGTTCTGGAAAACAACTTTTGCTGGCAATTGCAGGGCTGGCAATCCTTGCCGGCCCGGCGGTAGTGTCTTTGCTGGCGAAAGATAAGAAGTTACATCACCTTACGTCTAAACATCGCGTACGCCGCTGACCCTGTCGTGGCCGTTATCACCAGTAGCGGCCACAAACTATTCCACACAATATCCAGACTCGCATCCTTCAAATAAATCTGCTTGGTAATGTCCGTAAAGTGGCGAATAGGGTTAATCCACGTCAGGTTTTGCAGCCATACCGGCATGTTTTCCACCGGAGAAACGTAACCGGAAAGGAGAATGGCGGGCATCATAAAGACAAACACGCCGATAAACGCCTGCTGTTGTGTTGAACAGAGTGATGAAATCAACAGACCGAATCCCACCAGCGATAAACCATAAATCACCATCGTAAAGTAGAACAGCGCCAGCGATCCGGCGAAGGGATTTGATACGCCCAGATACCAATCGCCAGCACAATGGTGGCCTGGAAGGTGGCGACAATTAACGCCGGTACGGCTTTGCCGATGAAGATCTGCCAGGTGGTGAGCGGCGAAACCAGTAGCTGATCGAGCGTACCTTGTTCACGTTCGCGGGCGACGGAAAGTGAAGTGACGATCATTACGCCGATAGTGGTGATCATGGCGATCAGTGACGGCACCACAAACCATTTGTAGTCGAGATTCGGGTTATACCAGTTGCGTACCACCAGCTCGCTGTTGTTAGGTTTCGGTTTTCCTTCCAGCAGCTCCTGCTGATAATTTTGACGATCTGTTGCAGGTAGTTGGCGGCAATTTGCGCACTGTTGGAGTTACGCCCGTCGAGGATCAACTGCAAAGGCGCGGTCTGGAAGGTATCCAGTTTGCGCGAGAAGTCAGCCGGGAAACGCACCAGTAGTAACGCCTTTGTGTGTCGATGGTTGGGCGGATCTCCTGTGGGCTTTTCAGCAGCAGCACATGAGTAAAGGCGCTGGCGCGGGCAAAACGTTGGGTCAGCTCCACCGAATGCTCGCCGTTATCTTCATCGTAGATGGCGATGGTGGCGTTAGTCACTTCCAGCGTGGCGGCGAACGGGAACAGGATCACCTGAATTAGCACGGGTAAAATCAGAATCGCGCGGGTTTGCGGTTCGCGCAGCAACGACTGCAACTCTTTGCGGATTAACGTCCATAAGCGATGAAACATGCTCTTCTCCTAATCCAGCCGACGTTTGGTTTTCAGCCACGTCAGGCCGATAAACATCACCGCCGAAGCGATCAAAAACAGCACGTTTACCACCAGCACCACTGGAATATTCCCGGCGAGGAACAGGCTTTGCAGGGTGCTGACGAAATAACGAGCGGGAATAATGTACGTCACCGCGCGGATCACCGCGGGCATACTGTCGATCTGAAAAATAAAGCCGGAAAGCATAATCGACGGCAGAAAAGCGGCGTTCAGGGCGACCTGAGCGGCATTGAACTGGTTGCGGGTAATCGTGGAAATCAGCAGCCCCATCCCCAGGGTACTGAGTAAAACAGGCTGGAGATAAAAACAGAATCAGCAGCGACCCGCGATACGGCACGCCGAGAATAAACACTGACACCAGCATACACAGCAACATCGCCAGCATCCCGAGAAAGTAATAAGGGATCAGCTTACACAGCAGCAGTTCCGTGCGGGTAATCTCCGTAGAGAGCAGAGCCTCCATGGTGCCGCGTTCCCATTCTCGCGCCACCACCAGCGAGGTGAGAATCGCGCCGATGACCGTCATGATAATGGTCACCGCACCGGGGATAATGAAGTGCTGGCTAATCGCCGCCGGGTTAAACCAGTAGCGGGTTTGTACATCAATAAGCGGTTCAAAAGTCTGCCCGTTGTCCTCCGCTCGCTGCATTTGCCAGATCTGCCAGATCCCTTCGACATACCCTGTACAAAGTTAGCGGTATTCGGCTCACTGCCGTCGGTGATCACCTGAATCGGTGCGGTGGCGTTGGCGCGCTCCATCTGTTCCGCAAAATCCACCGGAATAACCACCAGACCGCGAATTTTCCCCGCCTGCATTTTGGCGATCAGTTCCTGACGGTTATCGCTGATGGTGGCGTCGATGTAGGGCGAACCGGTCATGGTGTGGTGAAATCCAGCGCCGCTTCGCTACGCTGTTCCAGTAAAATCCCGACCCGCAGCTTGCTGGAGTCGAGGTTAATGCCGTAACCAAAATAAACAGCAGTAGCAGCGGGATCACTACCGCAATCAGCCAGCTACTCGGATCGCGAACGATCTGCCGCGTCTCTTTAACGCACAGCGCCCGTACGCGACGCCAGGACAGGATCGGGTTACTCATTGCTATGCTCCTTATCCCAGTCGTGGATCAACTGAATAAAGGCTTGCTCCATGGTGGGATCGGGTTGCTCATCGTTAGCCGACTGTGCTTTCAAATCGTCCGGCGTGCCGCTGGCGATTAATTTCCCGCGGTACACCAGGCCGATGCGGTCGCAATATTCCGCTTCATCCATAAAGTGGGTGGTGACCATCACCGTGACGCCTTTCTCTACCATGCTGTTGATGTGCAGCCAAAATTCACGGCGGGTGAGGGGTCAACGCCGGAAGTCGGTTCGTCGAGAAACAGAATGTCCGGTTCATGCATCAGCGAACAGGCCAGCGCCAGCCGCTGTTTAAAACCTAATGGCAGTTCATCGGTGGCGTGGGAGGCGATACTTTTCAGGCCGAACGCCTCGCTCATGCGGGAGATTTTTCGTTCTGCGCCCGACCGCGTAAGCCATACACACCAGAGAAAAAGCGTAAATTCTGTTCGACCGTCAGGTTACCGTAGAGCGAAAATTTTTGCGCCATATAGCCGAGATGCTGGCGCGCTTTACCGGAACTCTCTTTCAGATCCATCCCCAGCACCAGCGCCTGGCCGGAAGTCGGCACCAGCAAACCGCACATCATCTTAAAGGTGGTCGATTTACCCGCGCCGTTTGGCCCCAGCAAACCAAAAATCTCCCCACGTTTAACGGCAAAGTTGACGTGATCGGTGGCGGCAAAATCCCCAAATTTCTTGGTCAGTTCTTTCGCTTCGATCACCGTCTCGCCGGGTGTGCCTTCTACCGTATGTAATATTGCGCCCAGCGGCGATTCCGAGGTTCCGGCACCGCCCAGCAAATCAATAAACGCATCTTCAAAACGCGGCGTAGTTTCGTTGATGTTGATTTCCGGCATCCCGTCGGCATGGCGAATATCGTCTGGTGTGGCCTCTTTTTGAGGATCAGACGTACCGATTTCCCCTGAATCATGCCGTCGCTGACCTGCGGCAGTTTCAAGGCGCGTTGCAACAGTTTGCGGTTGCCCTCGTGTGGACTGGTCATCAGAAAGCTGCGTCCGGCCATGGTTTGTGTCAGGGCTTTTGGTTCTCCCTGATACAGCAACTCGCCTTCGTTCATCAGTAACACGTCACGGCACTGCTCGGCTTCGTCGAGATACGAGGTACTCCAGAGGATTAACATCCCTTCGCCCGCCAGCTCATGCACCA\n'
b
diff -r de5b3d8f5b90 -r 63af3144371a test-data/t3_variants.vcf
--- a/test-data/t3_variants.vcf Sun Jun 23 06:04:27 2019 -0400
+++ b/test-data/t3_variants.vcf Fri May 29 13:29:14 2020 -0400
b
@@ -2,20 +2,26 @@
 ##nanopolish_window=tig00000001:200000-202000
 ##INFO=<ID=TotalReads,Number=1,Type=Integer,Description="The number of event-space reads used to call the variant">
 ##INFO=<ID=SupportFraction,Number=1,Type=Float,Description="The fraction of event-space reads that support the variant">
+##INFO=<ID=SupportFractionByStrand,Number=2,Type=Float,Description="Fraction of event-space reads that support the variant for each strand">
 ##INFO=<ID=BaseCalledReadsWithVariant,Number=1,Type=Integer,Description="The number of base-space reads that support the variant">
 ##INFO=<ID=BaseCalledFraction,Number=1,Type=Float,Description="The fraction of base-space reads that support the variant">
 ##INFO=<ID=AlleleCount,Number=1,Type=Integer,Description="The inferred number of copies of the allele">
+##INFO=<ID=StrandSupport,Number=4,Type=Integer,Description="Number of reads supporting the REF and ALT allele, by strand">
+##INFO=<ID=StrandFisherTest,Number=1,Type=Integer,Description="Strand bias fisher test">
+##INFO=<ID=SOR,Number=1,Type=Float,Description="StrandOddsRatio test from GATK">
+##INFO=<ID=RefContext,Number=1,Type=String,Description="The reference sequence context surrounding the variant call">
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample
-tig00000001 200061 . T TA 22.4 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.583757 GT 1
-tig00000001 200180 . C CA 30.6 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.596279 GT 1
-tig00000001 200484 . G GA 25.3 PASS TotalReads=63;AlleleCount=1;SupportFraction=0.569915 GT 1
-tig00000001 200672 . T TA 94.1 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.431464 GT 1
-tig00000001 200776 . C CA 82.3 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.362655 GT 1
-tig00000001 200796 . T TAA 117.1 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.268565 GT 1
-tig00000001 201007 . A AG 31.0 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.604821 GT 1
-tig00000001 201216 . A AT 85.8 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.457699 GT 1
-tig00000001 201273 . G GT 25.2 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.588877 GT 1
-tig00000001 201554 . G GC 49.9 PASS TotalReads=76;AlleleCount=1;SupportFraction=0.632029 GT 1
-tig00000001 201588 . C CG 125.4 PASS TotalReads=75;AlleleCount=1;SupportFraction=0.387616 GT 1
-tig00000001 201712 . C CA 21.1 PASS TotalReads=74;AlleleCount=1;SupportFraction=0.57415 GT 1
+tig00000001 200061 . T TA 36.4 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.334093;SupportFractionByStrand=0.354592,0.312974;StrandSupport=22,23,12,10;StrandFisherTest=1;SOR=0.818803;RefContext=TTGAATAAAAA GT 1
+tig00000001 200180 . C CA 45.3 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.637264;SupportFractionByStrand=0.75298,0.521547;StrandSupport=8,16,25,17;StrandFisherTest=9;SOR=0.838011;RefContext=GAGGGCAAAAT GT 1
+tig00000001 200389 . T TA 33.2 PASS TotalReads=64;AlleleCount=1;SupportFraction=0.618644;SupportFractionByStrand=0.625518,0.611327;StrandSupport=12,12,21,19;StrandFisherTest=0;SOR=0.75277;RefContext=CAACATAAAAA GT 1
+tig00000001 200484 . G GA 36.3 PASS TotalReads=63;AlleleCount=1;SupportFraction=0.600783;SupportFractionByStrand=0.593951,0.607834;StrandSupport=13,12,19,19;StrandFisherTest=0;SOR=0.641151;RefContext=TGCGCGAAAAG GT 1
+tig00000001 200672 . T TA 106.6 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.41069;SupportFractionByStrand=0.507758,0.304229;StrandSupport=17,22,17,9;StrandFisherTest=9;SOR=1.30379;RefContext=AGTAATAAAAC GT 1
+tig00000001 200776 . C CA 96.0 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.343276;SupportFractionByStrand=0.355235,0.330195;StrandSupport=23,21,12,11;StrandFisherTest=0;SOR=0.798543;RefContext=TTTCGCAAAAT GT 1
+tig00000001 200796 . T TAAA 163.5 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.273151;SupportFractionByStrand=0.306456,0.237765;StrandSupport=24,24,10,8;StrandFisherTest=2;SOR=0.992084;RefContext=GGGTGTAAAAG GT 1
+tig00000001 201007 . A AG 45.9 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.343541;SupportFractionByStrand=0.341375,0.345774;StrandSupport=22,21,11,11;StrandFisherTest=0;SOR=0.674028;RefContext=ACAAAAGGGGA GT 1
+tig00000001 201216 . A AT 101.1 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.443739;SupportFractionByStrand=0.364917,0.520243;StrandSupport=21,16,12,18;StrandFisherTest=5;SOR=0.983873;RefContext=TGGTGATTTTA GT 1
+tig00000001 201273 . G GT 39.5 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.343151;SupportFractionByStrand=0.346289,0.340197;StrandSupport=21,22,11,12;StrandFisherTest=0;SOR=0.666106;RefContext=TGGCGGTTTTT GT 1
+tig00000001 201554 . G GC 64.8 PASS TotalReads=76;AlleleCount=1;SupportFraction=0.386911;SupportFractionByStrand=0.359184,0.414639;StrandSupport=24,22,14,16;StrandFisherTest=1;SOR=0.76503;RefContext=TCGGTGCCCCA GT 1
+tig00000001 201588 . C CG 135.2 PASS TotalReads=75;AlleleCount=1;SupportFraction=0.375988;SupportFractionByStrand=0.37007,0.382065;StrandSupport=24,23,14,14;StrandFisherTest=0;SOR=0.655214;RefContext=CCCGGCGGGTG GT 1
+tig00000001 201712 . C CA 34.7 PASS TotalReads=74;AlleleCount=1;SupportFraction=0.608624;SupportFractionByStrand=0.68759,0.529659;StrandSupport=12,17,25,20;StrandFisherTest=6;SOR=0.748413;RefContext=CCGGACAAAAC GT 1
b
diff -r de5b3d8f5b90 -r 63af3144371a test-data/t4_polished.fa
--- a/test-data/t4_polished.fa Sun Jun 23 06:04:27 2019 -0400
+++ b/test-data/t4_polished.fa Fri May 29 13:29:14 2020 -0400
b
b'@@ -1,2 +1,2 @@\n >tig00000001\n-AGATGCTTTGAAAGAAACGCAGAATAGATCTCTATGTAATGATATGGAATACTCTGGTATTGTCTGTAAAGATACTAATGGAAAATATTTTGCATCTAAGGCAGAAACTGATAATTTAAGAAAGGAGTCATATCCTCTGAAAAGAAAATGTCCCACAGGTACAGATAGAGTTGCTGCTTATCATACTCACGGTGCAGATAGTCATGGCGATTATGTTGATGAATTTTTTCAAGTAGCGATAAAATCTTGTAAGAAGTAAAGATAATAATCTTGAAGCATTTTATCTCGCAACACCTGATGGACGATTTGAGGCGCTTAATAATAAAGGAGAATATATTTTATCAGAAATAGTGTCCCGGGATTGAGTTCAGTATGCATACCGTATCATGATTAATTTTAGTGCTTTTATTAGTGGGGCCTATAGGAGATTCAATGAAATATAGTTCAATATTTTCGATGCTTTCATTTTTATACTATTTGCCTGTAATGAGACAGCTGTTTACGGTTCTGATGAAAACATTATTTTTATGAGGTATGTGGAAAAATTACATTTAGATAAATACTCTGTTAAAAATACGGTAAAACTGAAACAATGGCGATACAATTAGCTGAAATATATGTTAGGTATCGCTATGGCGAACGGATTGCAGAAGAAAAACCATATTTAATTACGGAACTACCAGATAGTTGGGTTGTTGAGGGAGCAAAGTTACCTTATGAAGTTGCGGGTGGTGTATTTATTATAGAAATTAATAAGAAAAATGGATGTGTTTTGAATTTCCTACATAGTAAATAATGCTGGCGCTGATGGATGCGGATGGAAACATTGCGTGGAGCGGGGAGTATGATGAGTGGGGCAACCAGCTGAATGAAGAGAACCCGCATCACCTGCACCAGCCGTACCGGCTGCCGGGGCAGCAGTATGATAAGGAGTCGGGGCTGTACTACAACCGGAACCGGTACTACGATCCGTTGCAGGGCGGTATATCACCAGGACCCGATAGGGCTGAGGGGGATGGAGTCTGTATGCGTATCCGCTGAATCCGGTGAATGGTATTGATCCATTAGGTTAAGTCCCGCAGATGTAGCGCTAATAAGAAGAAAAGATCAACTAAACCATCAAAGAGCATGGGATATATTATCTGATACTTATGAAGATATGAAGAGATTAAATTTAGGTGGGACTGATCAATTTTTCCATTGTATGGCATTTTGTCGAGTGTCTAAATTAAATGACGCTGGTGTTAGCCGATCGGCGAAAGGGCTGGGTTATGAAAAAGAGATTAGAGATTACGGGTTAAATCTGTTCGGTATGTACGGCAGAAAAGTAAAGCTATCCCATTCTGAAATGATTGAAGATAATAAAAAGACTTGGCTGTAAATGACCATGGGTTGACATGTCCATCAACAACAGATTGCTCAGATAGATGTAGTGATTATATTAATCCAGCATAAAAAACGATAAAGGCTTTACAAGATGCTGGCTATCTCAAGTAATCTATCAAAGATGATAATATTTATTTTTGCTATTATAATCATTGTTGTTTTATGCGTAATTACTTATCTTTATTTATACAAAGATGAATCTCTTGTAAGTAAACATTACATAAACTATATGGCAATACCAGAAAATGATGGAGTTTTTACATGGCTCCCAGATTTTTTCCGCACGTAGCGGTGGATATATCAATATACACAAATGTAGAAGATGATTATTTTTTCTTATTTTCCCTAACAAATGATGATGGGGTAGGTTTAAGAAAACATTGACAGTGAGGGCCAGGGAACAAGTGGCGAAAATCGTATCAAAGAATGATCCAGATACAAAAAAGTGTGGTGTAAATATGGTAAGATACCAGGGCAAGGGATGGTGTAAACCTTTTTTGTTGGTGAAATTAATGTTACGCATTATTTTATAACAAATATTGGAGCTGGATTGCCTGATGCTTGTGCAGAGTAATTGCTTGAATTAAGAGTCTATCCCATATCGAAGTCGTCAACTTCGTAGTGAGGAAAAGTAAAATTCCTGACTGAGAAAAGACATGTCGGCTATTGTGTAAAGCCATATAGCTCAGACGATGAATATCTACTCGTATTCAGTTGTTTATTGAGGGTGAGTTCCGACCCTGAAACAACAAATAAAATGAACAGTCAGAGAGTTTACATAGAATTGCACTGGTCTTTTACGATATCTGACATTGTGTAATACATATTCAGCCATGCATTAATTAAAATGTTACGTGTTTAATGTGAGTCCCTATCTGAAAATAAATAATCCTTCCGGATTAAAATAAATTCTTGCCGGGAAAGAAAGAGGAAATAAACCATTAGCGGAAAACCAGCGGCACGCCAGGGTGACATGCCCCAGTACGGCGGCCCGATAGTCCAGGGTTCAGCGGGGTACTGATAGTCGCGCCGACTGCATCGAGAACTCCATACAACTTTTACGGAACGTCACCTGATAAAATTTGTTCAGTATCGTCTTTGAGATTCTCAAGGTTATGGAGTAACCAGACAGAACGGACACCACTGTCGGAGATAAAACCCCTGTTTACGCAGCTCGTTGCTGGCCCGGTGCTGACCATGTGTCGGGAAAGCGACGGCGTAATCAACAACAGCCTGTTCAGTTGCCTCATCGGTACGGTTCTTAAGGTTAGGAGCGCGGCGACTACGATTAATCTGCGCATCCACACCGCCTTCAGCGACCAGTTCGCGGTAACGATAAAACGTATCACGCGAAACGCCCATGATTTTACAGGCTTTTGATACGTTGCTGAGTTCTTCAGCCAGATTGAGCAAACCGGCTTTGTGTTTGATGACGGGATTGGCAGTATGAAGCATGAGAGTTACCTCTTGTTTTGGATAAGGATTCGACACTCATATCAAAACCGGTAACTCTCAACCTTTCAAGGCCATGTGTCAGATCAAGTCGCGACTAATACAAATACGTCCCTCATTACCGCGCCTTAACCCATTCCGCCACTTCCGCCCACTCACCGCGAAAGACAACTTTTTCCGCTTTTTCTCAAGCTGATAGCGATACATCGGGTCGTAATATTCTTCAAGTAACGGCACCAGCCAGGCCAGATGACCGTCGGTGCTGCCGGTGGTGAGTTGCGTTGTCAGTGCTGCATCCAGCCTTGCAGCCAGTTCGTTATAGCGCTGTAGCCCCAGCCGACGCTTAATCGCCGAAAGTCCGTGATGCAGGTATTCGCAATACTCCTGCCAGCCCTGTTCGTCGCCGTACGCGTGGGTAAAATCATGATGCATACGCAAGAAATACTCTTCGTTCAGGCGCTCAAGACGGATCTCAAACGGATCTTCTACCACCGCAATCGCCGCCTGAGTCATTCGCTCGCGCAGGCATTCCGGCAGGTGATTCGAACCGATCATCCGGCTTTCGTCTTCCAGCACCCACAGGCGCAAATTCTGACGGGCGTCGGTTTTAGCATTTCGGCAGCCAGCAGGTTTTCAAAACTCGCCTGGCTAAGTTGTGGTTGTAACGTGCGACCAAACGCCGAACCGCGATGACGCGCCAACCCTTCCAGATCAACACCGTTCGGCTGTTGCTGCACTAACAGCGTTTTACCGCTGCCGGTACAACCGCCAATCAGCACTATCGGTTTTGTGCCAGTTCAATAGTCGCCTGAATCGCGGTCTGGCGCAGTGCCTTATAACCGCCTTCCACCAGCGGATAATCAATCCCCGCTGCATGCAACCAGCTTTGCACAATATGTGAGCGCTGACCGCCACGGGCGCAGCAGAGAATACCTTGCGGATTTTGCAGGCACGCTGCCCGCCAGGCGTCCATGCGCTGCTGACGAATTTCACCCGCCACCAGTTTATGTCCCAGCGCCAGCGCTGCGTCTGAGCCTTGCTGTTTATAGCAGGTGCCAACGGCGGCGCGTTCATCGTTATTCATTAACGGCAGATTGATAGCGGCGGGCATTGCGCCGTGCTCAAACTCG'..b'GGTAAAGCCTTTGTGAATATTGATGGTGAGCACCTTGAACGAAAATTGTTGTGTTTGATCGGGCATAATTTTCCTGTCTTTGCCTCTTATCTCATTGAAATAGTGTAGTCGGCGTCACAAAAGGTGCGGTCTTACGGAATTTTCCGTAAAGTTCGGTACTCTGAGTAAGTAGAGATAAATTCTTCAGGAGAGAAGCCATGAAGTGGCAACAACGTGTTCGTGTCGCAACGGGTCTAAGTTGCTGGCAGATTATGTTGCATTTACTGGTAGTGGCGCTGCTGGTGGTGGGCTGGATGAGTAAGACTCTGGTTCACGTCGGCGTGGGATTATGCGCACTGTATTGTGTCACGGTAGTGATGATGCTGGTGTTTCAGCGCCACCCCGAGCAACGCTGGCGTGAGGTGGCAGACGTGCTGGAAGAGCTGACCACGACCTGGTATTTTGGCGCAGCGCTGATTGTGCTGTGGCTGTTGTCCCGCGTTCTGGAAAACAACTTTTGCTGGCAATTGCAGGGCTGGCAATCCTTGCCGGCCCGGCGGTAGTGTCTTTGCTGGCGAAAGATAAGAAGTTACATCACCTTACGTCTAAACATCGCGTACGCCGCTGACCCTGTCGTGGCCGTTATCACCAGTAGCGGCCACAAACTATTCCACACAATATCCAGACTCGCATCCTTCAAATAAATCTGCTTGGTAATGTCCGTAAAGTGGCGAATAGGGTTAATCCACGTCAGGTTTTGCAGCCATACCGGCATGTTTTCCACCGGAGAAACGTAACCGGAAAGGAGAATGGCGGGCATCATAAAGACAAACACGCCGATAAACGCCTGCTGTTGTGTTGAACAGAGTGATGAAATCAACAGACCGAATCCCACCAGCGATAAACCATAAATCACCATCGTAAAGTAGAACAGCGCCAGCGATCCGGCGAAGGGATTTGATACGCCCAGATACCAATCGCCAGCACAATGGTGGCCTGGAAGGTGGCGACAATTAACGCCGGTACGGCTTTGCCGATGAAGATCTGCCAGGTGGTGAGCGGCGAAACCAGTAGCTGATCGAGCGTACCTTGTTCACGTTCGCGGGCGACGGAAAGTGAAGTGACGATCATTACGCCGATAGTGGTGATCATGGCGATCAGTGACGGCACCACAAACCATTTGTAGTCGAGATTCGGGTTATACCAGTTGCGTACCACCAGCTCGCTGTTGTTAGGTTTCGGTTTTCCTTCCAGCAGCTCCTGCTGATAATTTTGACGATCTGTTGCAGGTAGTTGGCGGCAATTTGCGCACTGTTGGAGTTACGCCCGTCGAGGATCAACTGCAAAGGCGCGGTCTGGAAGGTATCCAGTTTGCGCGAGAAGTCAGCCGGGAAACGCACCAGTAGTAACGCCTTTGTGTGTCGATGGTTGGGCGGATCTCCTGTGGGCTTTTCAGCAGCAGCACATGAGTAAAGGCGCTGGCGCGGGCAAAACGTTGGGTCAGCTCCACCGAATGCTCGCCGTTATCTTCATCGTAGATGGCGATGGTGGCGTTAGTCACTTCCAGCGTGGCGGCGAACGGGAACAGGATCACCTGAATTAGCACGGGTAAAATCAGAATCGCGCGGGTTTGCGGTTCGCGCAGCAACGACTGCAACTCTTTGCGGATTAACGTCCATAAGCGATGAAACATGCTCTTCTCCTAATCCAGCCGACGTTTGGTTTTCAGCCACGTCAGGCCGATAAACATCACCGCCGAAGCGATCAAAAACAGCACGTTTACCACCAGCACCACTGGAATATTCCCGGCGAGGAACAGGCTTTGCAGGGTGCTGACGAAATAACGAGCGGGAATAATGTACGTCACCGCGCGGATCACCGCGGGCATACTGTCGATCTGAAAAATAAAGCCGGAAAGCATAATCGACGGCAGAAAAGCGGCGTTCAGGGCGACCTGAGCGGCATTGAACTGGTTGCGGGTAATCGTGGAAATCAGCAGCCCCATCCCCAGGGTACTGAGTAAAACAGGCTGGAGATAAAAACAGAATCAGCAGCGACCCGCGATACGGCACGCCGAGAATAAACACTGACACCAGCATACACAGCAACATCGCCAGCATCCCGAGAAAGTAATAAGGGATCAGCTTACACAGCAGCAGTTCCGTGCGGGTAATCTCCGTAGAGAGCAGAGCCTCCATGGTGCCGCGTTCCCATTCTCGCGCCACCACCAGCGAGGTGAGAATCGCGCCGATGACCGTCATGATAATGGTCACCGCACCGGGGATAATGAAGTGCTGGCTAATCGCCGCCGGGTTAAACCAGTAGCGGGTTTGTACATCAATAAGCGGTTCAAAAGTCTGCCCGTTGTCCTCCGCTCGCTGCATTTGCCAGATCTGCCAGATCCCTTCGACATACCCTGTACAAAGTTAGCGGTATTCGGCTCACTGCCGTCGGTGATCACCTGAATCGGTGCGGTGGCGTTGGCGCGCTCCATCTGTTCCGCAAAATCCACCGGAATAACCACCAGACCGCGAATTTTCCCCGCCTGCATTTTGGCGATCAGTTCCTGACGGTTATCGCTGATGGTGGCGTCGATGTAGGGCGAACCGGTCATGGTGTGGTGAAATCCAGCGCCGCTTCGCTACGCTGTTCCAGTAAAATCCCGACCCGCAGCTTGCTGGAGTCGAGGTTAATGCCGTAACCAAAATAAACAGCAGTAGCAGCGGGATCACTACCGCAATCAGCCAGCTACTCGGATCGCGAACGATCTGCCGCGTCTCTTTAACGCACAGCGCCCGTACGCGACGCCAGGACAGGATCGGGTTACTCATTGCTATGCTCCTTATCCCAGTCGTGGATCAACTGAATAAAGGCTTGCTCCATGGTGGGATCGGGTTGCTCATCGTTAGCCGACTGTGCTTTCAAATCGTCCGGCGTGCCGCTGGCGATTAATTTCCCGCGGTACACCAGGCCGATGCGGTCGCAATATTCCGCTTCATCCATAAAGTGGGTGGTGACCATCACCGTGACGCCTTTCTCTACCATGCTGTTGATGTGCAGCCAAAATTCACGGCGGGTGAGGGGTCAACGCCGGAAGTCGGTTCGTCGAGAAACAGAATGTCCGGTTCATGCATCAGCGAACAGGCCAGCGCCAGCCGCTGTTTAAAACCTAATGGCAGTTCATCGGTGGCGTGGGAGGCGATACTTTTCAGGCCGAACGCCTCGCTCATGCGGGAGATTTTTCGTTCTGCGCCCGACCGCGTAAGCCATACACACCAGAGAAAAAGCGTAAATTCTGTTCGACCGTCAGGTTACCGTAGAGCGAAAATTTTTGCGCCATATAGCCGAGATGCTGGCGCGCTTTACCGGAACTCTCTTTCAGATCCATCCCCAGCACCAGCGCCTGGCCGGAAGTCGGCACCAGCAAACCGCACATCATCTTAAAGGTGGTCGATTTACCCGCGCCGTTTGGCCCCAGCAAACCAAAAATCTCCCCACGTTTAACGGCAAAGTTGACGTGATCGGTGGCGGCAAAATCCCCAAATTTCTTGGTCAGTTCTTTCGCTTCGATCACCGTCTCGCCGGGTGTGCCTTCTACCGTATGTAATATTGCGCCCAGCGGCGATTCCGAGGTTCCGGCACCGCCCAGCAAATCAATAAACGCATCTTCAAAACGCGGCGTAGTTTCGTTGATGTTGATTTCCGGCATCCCGTCGGCATGGCGAATATCGTCTGGTGTGGCCTCTTTTTGAGGATCAGACGTACCGATTTCCCCTGAATCATGCCGTCGCTGACCTGCGGCAGTTTCAAGGCGCGTTGCAACAGTTTGCGGTTGCCCTCGTGTGGACTGGTCATCAGAAAGCTGCGTCCGGCCATGGTTTGTGTCAGGGCTTTTGGTTCTCCCTGATACAGCAACTCGCCTTCGTTCATCAGTAACACGTCACGGCACTGCTCGGCTTCGTCGAGATACGAGGTACTCCAGAGGATTAACATCCCTTCGCCCGCCAGCTCATGCACCA\n'
b
diff -r de5b3d8f5b90 -r 63af3144371a test-data/t4_variants.vcf
--- a/test-data/t4_variants.vcf Sun Jun 23 06:04:27 2019 -0400
+++ b/test-data/t4_variants.vcf Fri May 29 13:29:14 2020 -0400
b
@@ -2,20 +2,26 @@
 ##nanopolish_window=tig00000001:200000-202000
 ##INFO=<ID=TotalReads,Number=1,Type=Integer,Description="The number of event-space reads used to call the variant">
 ##INFO=<ID=SupportFraction,Number=1,Type=Float,Description="The fraction of event-space reads that support the variant">
+##INFO=<ID=SupportFractionByStrand,Number=2,Type=Float,Description="Fraction of event-space reads that support the variant for each strand">
 ##INFO=<ID=BaseCalledReadsWithVariant,Number=1,Type=Integer,Description="The number of base-space reads that support the variant">
 ##INFO=<ID=BaseCalledFraction,Number=1,Type=Float,Description="The fraction of base-space reads that support the variant">
 ##INFO=<ID=AlleleCount,Number=1,Type=Integer,Description="The inferred number of copies of the allele">
+##INFO=<ID=StrandSupport,Number=4,Type=Integer,Description="Number of reads supporting the REF and ALT allele, by strand">
+##INFO=<ID=StrandFisherTest,Number=1,Type=Integer,Description="Strand bias fisher test">
+##INFO=<ID=SOR,Number=1,Type=Float,Description="StrandOddsRatio test from GATK">
+##INFO=<ID=RefContext,Number=1,Type=String,Description="The reference sequence context surrounding the variant call">
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample
-tig00000001 200061 . T TA 22.4 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.583757 GT 1
-tig00000001 200180 . C CA 30.6 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.596279 GT 1
-tig00000001 200484 . G GA 25.3 PASS TotalReads=63;AlleleCount=1;SupportFraction=0.569915 GT 1
-tig00000001 200672 . T TA 94.1 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.431464 GT 1
-tig00000001 200776 . C CA 82.3 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.362655 GT 1
-tig00000001 200796 . T TAA 117.1 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.268565 GT 1
-tig00000001 201007 . A AG 31.0 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.604821 GT 1
-tig00000001 201216 . A AT 85.8 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.457699 GT 1
-tig00000001 201273 . G GT 25.2 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.588877 GT 1
-tig00000001 201554 . G GC 49.9 PASS TotalReads=76;AlleleCount=1;SupportFraction=0.632029 GT 1
-tig00000001 201588 . C CG 125.4 PASS TotalReads=75;AlleleCount=1;SupportFraction=0.387616 GT 1
-tig00000001 201712 . C CA 21.1 PASS TotalReads=74;AlleleCount=1;SupportFraction=0.57415 GT 1
+tig00000001 200061 . T TA 36.4 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.334093;SupportFractionByStrand=0.354592,0.312974;StrandSupport=22,23,12,10;StrandFisherTest=1;SOR=0.818803;RefContext=TTGAATAAAAA GT 1
+tig00000001 200180 . C CA 45.3 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.637264;SupportFractionByStrand=0.75298,0.521547;StrandSupport=8,16,25,17;StrandFisherTest=9;SOR=0.838011;RefContext=GAGGGCAAAAT GT 1
+tig00000001 200389 . T TA 33.2 PASS TotalReads=64;AlleleCount=1;SupportFraction=0.618644;SupportFractionByStrand=0.625518,0.611327;StrandSupport=12,12,21,19;StrandFisherTest=0;SOR=0.75277;RefContext=CAACATAAAAA GT 1
+tig00000001 200484 . G GA 36.3 PASS TotalReads=63;AlleleCount=1;SupportFraction=0.600783;SupportFractionByStrand=0.593951,0.607834;StrandSupport=13,12,19,19;StrandFisherTest=0;SOR=0.641151;RefContext=TGCGCGAAAAG GT 1
+tig00000001 200672 . T TA 106.6 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.41069;SupportFractionByStrand=0.507758,0.304229;StrandSupport=17,22,17,9;StrandFisherTest=9;SOR=1.30379;RefContext=AGTAATAAAAC GT 1
+tig00000001 200776 . C CA 96.0 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.343276;SupportFractionByStrand=0.355235,0.330195;StrandSupport=23,21,12,11;StrandFisherTest=0;SOR=0.798543;RefContext=TTTCGCAAAAT GT 1
+tig00000001 200796 . T TAAA 163.5 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.273151;SupportFractionByStrand=0.306456,0.237765;StrandSupport=24,24,10,8;StrandFisherTest=2;SOR=0.992084;RefContext=GGGTGTAAAAG GT 1
+tig00000001 201007 . A AG 45.9 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.343541;SupportFractionByStrand=0.341375,0.345774;StrandSupport=22,21,11,11;StrandFisherTest=0;SOR=0.674028;RefContext=ACAAAAGGGGA GT 1
+tig00000001 201216 . A AT 101.1 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.443739;SupportFractionByStrand=0.364917,0.520243;StrandSupport=21,16,12,18;StrandFisherTest=5;SOR=0.983873;RefContext=TGGTGATTTTA GT 1
+tig00000001 201273 . G GT 39.5 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.343151;SupportFractionByStrand=0.346289,0.340197;StrandSupport=21,22,11,12;StrandFisherTest=0;SOR=0.666106;RefContext=TGGCGGTTTTT GT 1
+tig00000001 201554 . G GC 64.8 PASS TotalReads=76;AlleleCount=1;SupportFraction=0.386911;SupportFractionByStrand=0.359184,0.414639;StrandSupport=24,22,14,16;StrandFisherTest=1;SOR=0.76503;RefContext=TCGGTGCCCCA GT 1
+tig00000001 201588 . C CG 135.2 PASS TotalReads=75;AlleleCount=1;SupportFraction=0.375988;SupportFractionByStrand=0.37007,0.382065;StrandSupport=24,23,14,14;StrandFisherTest=0;SOR=0.655214;RefContext=CCCGGCGGGTG GT 1
+tig00000001 201712 . C CA 34.7 PASS TotalReads=74;AlleleCount=1;SupportFraction=0.608624;SupportFractionByStrand=0.68759,0.529659;StrandSupport=12,17,25,20;StrandFisherTest=6;SOR=0.748413;RefContext=CCGGACAAAAC GT 1
b
diff -r de5b3d8f5b90 -r 63af3144371a test-data/variants.vcf
--- a/test-data/variants.vcf Sun Jun 23 06:04:27 2019 -0400
+++ b/test-data/variants.vcf Fri May 29 13:29:14 2020 -0400
b
@@ -2,20 +2,26 @@
 ##nanopolish_window=tig00000001:200000-202000
 ##INFO=<ID=TotalReads,Number=1,Type=Integer,Description="The number of event-space reads used to call the variant">
 ##INFO=<ID=SupportFraction,Number=1,Type=Float,Description="The fraction of event-space reads that support the variant">
+##INFO=<ID=SupportFractionByStrand,Number=2,Type=Float,Description="Fraction of event-space reads that support the variant for each strand">
 ##INFO=<ID=BaseCalledReadsWithVariant,Number=1,Type=Integer,Description="The number of base-space reads that support the variant">
 ##INFO=<ID=BaseCalledFraction,Number=1,Type=Float,Description="The fraction of base-space reads that support the variant">
 ##INFO=<ID=AlleleCount,Number=1,Type=Integer,Description="The inferred number of copies of the allele">
+##INFO=<ID=StrandSupport,Number=4,Type=Integer,Description="Number of reads supporting the REF and ALT allele, by strand">
+##INFO=<ID=StrandFisherTest,Number=1,Type=Integer,Description="Strand bias fisher test">
+##INFO=<ID=SOR,Number=1,Type=Float,Description="StrandOddsRatio test from GATK">
+##INFO=<ID=RefContext,Number=1,Type=String,Description="The reference sequence context surrounding the variant call">
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample
-tig00000001 200061 . T TA 22.4 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.583757 GT 1
-tig00000001 200180 . C CA 30.6 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.596279 GT 1
-tig00000001 200484 . G GA 25.3 PASS TotalReads=63;AlleleCount=1;SupportFraction=0.569915 GT 1
-tig00000001 200672 . T TA 94.1 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.431464 GT 1
-tig00000001 200776 . C CA 82.3 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.362655 GT 1
-tig00000001 200796 . T TAA 117.1 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.268565 GT 1
-tig00000001 201007 . A AG 31.0 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.604821 GT 1
-tig00000001 201216 . A AT 85.8 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.457699 GT 1
-tig00000001 201273 . G GT 25.2 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.588877 GT 1
-tig00000001 201554 . G GC 49.9 PASS TotalReads=76;AlleleCount=1;SupportFraction=0.632029 GT 1
-tig00000001 201588 . C CG 125.4 PASS TotalReads=75;AlleleCount=1;SupportFraction=0.387616 GT 1
-tig00000001 201712 . C CA 21.1 PASS TotalReads=74;AlleleCount=1;SupportFraction=0.57415 GT 1
+tig00000001 200061 . T TA 36.4 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.334093;SupportFractionByStrand=0.354592,0.312974;StrandSupport=22,23,12,10;StrandFisherTest=1;SOR=0.818803;RefContext=TTGAATAAAAA GT 1
+tig00000001 200180 . C CA 45.3 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.637264;SupportFractionByStrand=0.75298,0.521547;StrandSupport=8,16,25,17;StrandFisherTest=9;SOR=0.838011;RefContext=GAGGGCAAAAT GT 1
+tig00000001 200389 . T TA 33.2 PASS TotalReads=64;AlleleCount=1;SupportFraction=0.618644;SupportFractionByStrand=0.625518,0.611327;StrandSupport=12,12,21,19;StrandFisherTest=0;SOR=0.75277;RefContext=CAACATAAAAA GT 1
+tig00000001 200484 . G GA 36.3 PASS TotalReads=63;AlleleCount=1;SupportFraction=0.600783;SupportFractionByStrand=0.593951,0.607834;StrandSupport=13,12,19,19;StrandFisherTest=0;SOR=0.641151;RefContext=TGCGCGAAAAG GT 1
+tig00000001 200672 . T TA 106.6 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.41069;SupportFractionByStrand=0.507758,0.304229;StrandSupport=17,22,17,9;StrandFisherTest=9;SOR=1.30379;RefContext=AGTAATAAAAC GT 1
+tig00000001 200776 . C CA 96.0 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.343276;SupportFractionByStrand=0.355235,0.330195;StrandSupport=23,21,12,11;StrandFisherTest=0;SOR=0.798543;RefContext=TTTCGCAAAAT GT 1
+tig00000001 200796 . T TAAA 163.5 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.273151;SupportFractionByStrand=0.306456,0.237765;StrandSupport=24,24,10,8;StrandFisherTest=2;SOR=0.992084;RefContext=GGGTGTAAAAG GT 1
+tig00000001 201007 . A AG 45.9 PASS TotalReads=65;AlleleCount=1;SupportFraction=0.343541;SupportFractionByStrand=0.341375,0.345774;StrandSupport=22,21,11,11;StrandFisherTest=0;SOR=0.674028;RefContext=ACAAAAGGGGA GT 1
+tig00000001 201216 . A AT 101.1 PASS TotalReads=67;AlleleCount=1;SupportFraction=0.443739;SupportFractionByStrand=0.364917,0.520243;StrandSupport=21,16,12,18;StrandFisherTest=5;SOR=0.983873;RefContext=TGGTGATTTTA GT 1
+tig00000001 201273 . G GT 39.5 PASS TotalReads=66;AlleleCount=1;SupportFraction=0.343151;SupportFractionByStrand=0.346289,0.340197;StrandSupport=21,22,11,12;StrandFisherTest=0;SOR=0.666106;RefContext=TGGCGGTTTTT GT 1
+tig00000001 201554 . G GC 64.8 PASS TotalReads=76;AlleleCount=1;SupportFraction=0.386911;SupportFractionByStrand=0.359184,0.414639;StrandSupport=24,22,14,16;StrandFisherTest=1;SOR=0.76503;RefContext=TCGGTGCCCCA GT 1
+tig00000001 201588 . C CG 135.2 PASS TotalReads=75;AlleleCount=1;SupportFraction=0.375988;SupportFractionByStrand=0.37007,0.382065;StrandSupport=24,23,14,14;StrandFisherTest=0;SOR=0.655214;RefContext=CCCGGCGGGTG GT 1
+tig00000001 201712 . C CA 34.7 PASS TotalReads=74;AlleleCount=1;SupportFraction=0.608624;SupportFractionByStrand=0.68759,0.529659;StrandSupport=12,17,25,20;StrandFisherTest=6;SOR=0.748413;RefContext=CCGGACAAAAC GT 1