Repository 'bcftools_plugin_split_vep'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/bcftools_plugin_split_vep

Changeset 0:3b6cd8086498 (2022-07-23)
Next changeset 1:70276425d001 (2022-09-20)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit c45135e52ae5039e09272ac6f504d0ceb574aa70
added:
README.md
bcftools_plugin_split_vep.xml
macros.xml
test-data/23andme.fa
test-data/annotate.hdr
test-data/annotate.tab
test-data/annotate.vcf
test-data/annotate2.tab
test-data/annotate2.vcf
test-data/annotate3.vcf
test-data/annots.bcf
test-data/annots.vcf
test-data/annots2.vcf
test-data/check.vcf
test-data/cnv.vcf
test-data/cnv_baf_only.vcf
test-data/cnv_pairwise_summary.tab
test-data/cnv_summary.tab
test-data/color_chrs.dat
test-data/color_chrs_unrelated.dat
test-data/concat.1.a.vcf
test-data/concat.1.b.vcf
test-data/concat.2.a.vcf
test-data/concat.2.b.vcf
test-data/consensus.fa
test-data/consensus.tab
test-data/consensus.vcf
test-data/convert.23andme
test-data/convert.gs.gt.gen
test-data/convert.gs.gt.samples
test-data/convert.gvcf.vcf
test-data/convert.hls.gt.hap
test-data/convert.hls.gt.legend
test-data/convert.hls.gt.samples
test-data/convert.hs.gt.hap
test-data/convert.hs.gt.samples
test-data/convert.vcf
test-data/csq.fa
test-data/csq.gff3
test-data/csq.vcf
test-data/fasta_indexes.loc
test-data/filter.1.vcf
test-data/filter.2.vcf
test-data/filter.3.vcf
test-data/fixploidy.ploidy
test-data/fixploidy.samples
test-data/fixploidy.vcf
test-data/gvcf.fa
test-data/isec.a.vcf
test-data/isec.b.vcf
test-data/merge.2.a.vcf
test-data/merge.2.b.vcf
test-data/merge.3.a.vcf
test-data/merge.3.b.vcf
test-data/merge.4.a.vcf
test-data/merge.4.b.vcf
test-data/merge.a.vcf
test-data/merge.b.vcf
test-data/merge.c.vcf
test-data/mpileup.1.bam
test-data/mpileup.1.bam.bai
test-data/mpileup.2.bam
test-data/mpileup.2.bam.bai
test-data/mpileup.3.bam
test-data/mpileup.3.bam.bai
test-data/mpileup.3.cram
test-data/mpileup.X.vcf
test-data/mpileup.ploidy
test-data/mpileup.ref.fa
test-data/mpileup.samples
test-data/mpileup.vcf
test-data/norm.fa
test-data/norm.merge.vcf
test-data/norm.setref.vcf
test-data/norm.split.vcf
test-data/norm.vcf
test-data/plugin1.vcf
test-data/plugin2.vcf
test-data/plugin_frameshifts.bed
test-data/query.out
test-data/query.vcf
test-data/reheader.hdr
test-data/reheader.samples
test-data/reheader.samples2
test-data/reheader.vcf
test-data/roh.vcf
test-data/stats.a.vcf
test-data/stats.b.vcf
test-data/summary.pdf
test-data/test-cache/23andme.fa
test-data/test-cache/23andme.fa.fai
test-data/test-cache/consensus.fa
test-data/test-cache/consensus.fa.fai
test-data/test-cache/csq.fa
test-data/test-cache/csq.fa.fai
test-data/test-cache/gvcf.fa
test-data/test-cache/gvcf.fa.fai
test-data/test-cache/mpileup.ref.fa
test-data/test-cache/mpileup.ref.fa.fai
test-data/test-cache/norm.fa
test-data/test-cache/norm.fa.fai
test-data/view.GL.vcf
test-data/view.bcf
test-data/view.vcf
tool-data/fasta_indexes.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 3b6cd8086498 README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Sat Jul 23 13:49:21 2022 +0000
[
@@ -0,0 +1,68 @@
+# bcftools (v1.3)
+
+Copied from branch bcftools1.2:
+
+This aims to be a "faithful" rendering of the bcftool suite. I.e. options are
+presented essentially as closely to the command line version as is useful.
+
+This may not appeal to all, if you'd like to see smaller and more dedicated
+tools (e.g. "intersect", "union" and "complement" being separate tools instead
+of all of them included in the "isec" tool,) please feel free to file an issue.
+
+Updated for bcftools v1.3
+
+This was extended from the bcftools1.2 branch then greatly hand edited to
+group params and manage param innteractions.
+
+In the macros.xml there are macros and tokens to handle file input and output.
+These use the datatypes currently available in galaxy: Vcf and Bcf
+The macros take care of bgzip and indexing of inputs.
+
+The convert command was split into 2 tools, "convert to vcf" and "convert from vcf"
+
+## TODO:
+
+- stats needs a matplotlib tool dependency  and pdflatex for generating a pdf of plots
+- cnv needs a matplotlib tool dependency for generating images, then a means to consolidate those.
+- cnv needs an input.vcf for testing, runs with bcftools cnv -s "HG00101" -o 'HG00101/' -p 5 mpileup.vcf
+- roh needs a more useful input.vcf for testing
+- plugin color chrs
+- plugin frameshifts
+
+## Status
+
+The wrappers were automatically generated in bulk. That doesn't get them 100%
+of the way there (e.g. meaningful test cases), so the rest of the process is a
+bit slower.
+
+- [x] annotate
+- [x] call
+- [ ] cnv (needs real test data, needs plotting)
+- [x] concat
+- [x] consensus
+- [x] convert from vcf
+- [x] convert to vcf
+- [x] filter
+- [x] gtcheck
+- [x] isec
+- [x] merge
+- [x] norm
+- [x] query
+- [x] query list samples
+- [x] reheader
+- [x] roh
+- [x] stats (needs plotting)
+- [x] view
+- [ ] +color chrs
+- [x] +counts
+- [x] +dosage
+- [x] +fill an ac
+- [x] +fill tags
+- [x] +fixploidy
+- [ ] +frameshifts
+- [x] +impute info
+- [x] +mendelian
+- [x] +missing2ref
+- [x] +setgt
+- [x] +tag2tag
+- [x] +vcf2sex
b
diff -r 000000000000 -r 3b6cd8086498 bcftools_plugin_split_vep.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bcftools_plugin_split_vep.xml Sat Jul 23 13:49:21 2022 +0000
[
@@ -0,0 +1,112 @@
+<?xml version='1.0' encoding='utf-8'?>
+<tool name="bcftools @EXECUTABLE@" id="bcftools_plugin_@PLUGIN_ID@" version="@TOOL_VERSION@">
+    <description>plugin Extracts fields from structured annotations such as INFO/CSQ</description>
+    <macros>
+        <token name="@EXECUTABLE@">split-vep</token>
+        <token name="@PLUGIN_ID@">split_vep</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tools" />
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="aggressive"><![CDATA[
+@PREPARE_ENV@
+@PREPARE_INPUT_FILE@
+#set $section = $sec_restrict
+@PREPARE_TARGETS_FILE@
+@PREPARE_REGIONS_FILE@
+
+bcftools plugin @EXECUTABLE@
+## VCF input section
+#set $section = $sec_restrict
+@INCLUDE@
+@EXCLUDE@
+@REGIONS@
+@TARGETS@
+
+@OUTPUT_TYPE@
+
+## Primary Input/Outputs
+@INPUT_FILE@
+## Plugin options
+## ToDo: Parameters not wrapped yet: -A, -f (not for BCF/VCF output!), -l, -S, -x
+-a '$a'
+-c '$c'
+$d
+#if $p:
+    -p '$p'
+#end if
+#if $s:
+    -s '$s'
+#end if
+> '$output_file'
+]]>
+    </command>
+    <inputs>
+        <expand macro="macro_input" />
+        <param argument="-a" type="text" value="CSQ" label="Select INFO annotation to parse" help="Usually, annotations are located in the CSQ, ANN or EFF INFO field, though any arbitrary tag may be entered here." />
+        <param argument="-c" type="text" label="Enter fields to be extracted, listed either as indexes or names" help="The default type of the new annotation is String but can be also set to Integer/Int or Float/Real by adding ':type' to the field name or index. Examples can be found in the tool help." />
+        <param argument="-d" type="boolean" label="Enable Output per transcript/allele consequences on a new line rather rather than as comma-separated fields on a single line" truevalue="-d" falsevalue="" />
+        <param argument="-p" type="text" optional="true" label="Enter prefix of INFO annotations to be created after splitting the CSQ string (optional)" />
+        <param argument="-s" type="text" optional="true" label="Select transcripts to extract by type and/or consequence (optional)" help="Argument has the form TR:CSQ. TR = transcript:   worst,primary,all (default: all). CSQ = consequence: any,missense,missense+,etc (default: any). Examples can be found in the tool help." />
+        
+        <section name="sec_restrict" expanded="false" title="Restrict to">
+            <expand macro="macro_restrict" />
+            <expand macro="macro_restrict" type="target" label_type="Target" />
+            <expand macro="macro_include" />
+            <expand macro="macro_exclude" />
+        </section>
+        <expand macro="macro_select_output_type" />
+    </inputs>
+    <outputs>
+        <expand macro="macro_vcf_output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" ftype="vcf" value="plugin2.vcf" />
+            <param name="a" value="ANN" />
+            <param name="c" value="IMPACT,gnomAD_AF:Float" />
+            <param name="p" value="NW_" />
+            <param name="s" value="worst" />
+            <param name="output_type" value="v" />
+            <output name="output_file" ftype="vcf">
+                <assert_contents>
+                    <has_line_matching expression="##INFO=&lt;ID=NW_IMPACT,.+" />
+                    <has_line_matching expression="##INFO=&lt;ID=NW_gnomAD_AF,.+" />
+                    <has_line_matching expression=".+NW_IMPACT=MODERATE;NW_gnomAD_AF=0\.08.+" />
+                    <has_line_matching expression=".+NW_IMPACT=LOW;NW_gnomAD_AF=0\.9443.+" />
+                    <has_line_matching expression=".+NW_IMPACT=MODIFIER;NW_gnomAD_AF=\..+" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+=====================================
+ bcftools @EXECUTABLE@ plugin
+=====================================
+
+This plugin allows to extract fields from structured annotations such as ``INFO/CSQ`` created by bcftools/csq or VEP. These fields are then added to the VCF as new ``INFO`` fields.
+
+Examples::
+
+   # Extract Consequence, IMPACT and gene SYMBOL of the most severe consequence into
+   # INFO annotations starting with the prefix "vep". For brevity, the columns can
+   # be given also as 0-based indexes
+   bcftools +split-vep -c Consequence,IMPACT,SYMBOL -s worst -p vep file.vcf.gz
+   bcftools +split-vep -c 1-3 -s worst -p vep file.vcf.gz
+
+   # Extract gnomAD_AF subfield into a new INFO/gnomAD_AF annotation of Type=Float so that
+   # numeric filtering can be used.
+   bcftools +split-vep -c gnomAD_AF:Float file.vcf.gz -i 'gnomAD_AF<0.001'
+
+   # Similar to above, but add the annotation only if the consequence severity is missense
+   # or equivalent.
+   bcftools +split-vep -c gnomAD_AF:Float -s :missense file.vcf.gz
+
+@REGIONS_HELP@
+@TARGETS_HELP@
+@EXPRESSIONS_HELP@
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r 3b6cd8086498 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Sat Jul 23 13:49:21 2022 +0000
[
b'@@ -0,0 +1,723 @@\n+<macros>\n+  <token name="@TOOL_VERSION@">1.10</token>\n+  <xml name="bio_tools">\n+      <xrefs>\n+          <xref type="bio.tools">bcftools</xref>\n+      </xrefs>\n+  </xml>\n+  <xml name="requirements">\n+    <requirements>\n+      <requirement type="package" version="@TOOL_VERSION@">bcftools</requirement>\n+      <requirement type="package" version="1.10">htslib</requirement>\n+      <yield />\n+    </requirements>\n+  </xml>\n+  <xml name="samtools_requirement">\n+      <requirement type="package" version="1.10">samtools</requirement>\n+  </xml>\n+  <xml name="matplotlib_requirement">\n+      <requirement type="package" version="3.4.3">matplotlib</requirement>\n+  </xml>\n+  <xml name="version_command">\n+    <version_command>bcftools 2&gt;&amp;1 | grep \'Version:\'</version_command>\n+  </xml>\n+\n+  <xml name="citations">\n+    <citations>\n+      <citation type="doi">10.1093/bioinformatics/btp352</citation>\n+      <yield />\n+    </citations>\n+  </xml>\n+  <token name="@BCFTOOLS_WIKI@">https://github.com/samtools/bcftools/wiki</token>\n+  <token name="@BCFTOOLS_MANPAGE@">http://samtools.github.io/bcftools/bcftools.html</token>\n+  <token name="@THREADS@">\n+  --threads \\${GALAXY_SLOTS:-4}\n+  </token>\n+  <token name="@PREPARE_ENV@">\n+<![CDATA[\n+export BCFTOOLS_PLUGINS=`which bcftools | sed \'s,bin/bcftools,libexec/bcftools,\'`;\n+]]>\n+  </token>\n+  <xml name="macro_input">\n+    <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf" label="VCF/BCF Data" />\n+  </xml>\n+  <token name="@PREPARE_INPUT_FILE@">\n+<![CDATA[\n+## May need to symlink input if there is an associated\n+#set $input_vcf = \'input.vcf.gz\'\n+#if $input_file.is_of_type(\'vcf\')\n+  bgzip -c \'$input_file\' > $input_vcf &&\n+  bcftools index $input_vcf &&\n+#elif $input_file.is_of_type(\'vcf_bgzip\')\n+  ln -s \'$input_file\' $input_vcf &&\n+  #if $input_file.metadata.tabix_index:\n+    ln -s \'${input_file.metadata.tabix_index}\' ${input_vcf}.tbi &&\n+  #else\n+    bcftools index $input_vcf &&\n+  #end if\n+#elif $input_file.is_of_type(\'bcf\')\n+  #set $input_vcf = \'input.bcf\'\n+  ln -s \'$input_file\' $input_vcf &&\n+  #if $input_file.metadata.bcf_index:\n+    ln -s \'${input_file.metadata.bcf_index}\' ${input_vcf}.csi &&\n+  #else\n+    bcftools index $input_vcf &&\n+  #end if\n+#end if\n+]]>\n+  </token>\n+  <token name="@INPUT_FILE@">\n+$input_vcf\n+  </token>\n+\n+  <xml name="macro_inputs">\n+    <param name="input_files" type="data" format="vcf,vcf_bgzip,bcf" label="Other VCF/BCF Datasets" multiple="True" />\n+  </xml>\n+  <token name="@PREPARE_INPUT_FILES@">\n+<![CDATA[\n+## May need to symlink input if there is an associated\n+#set $input_vcfs = []\n+#set $vcfs_list_file = \'vcfs_list\'\n+#for (i, input_file) in enumerate($input_files):\n+  #set $input_vcf = \'input\' + str($i) + \'.vcf.gz\'\n+  #if $input_file.is_of_type(\'vcf\')\n+    bgzip -c \'$input_file\' > $input_vcf &&\n+    bcftools index $input_vcf &&\n+  #elif $input_file.is_of_type(\'vcf_bgzip\')\n+    ln -s \'$input_file\' $input_vcf &&\n+    #if $input_file.metadata.tabix_index:\n+      ln -s \'${input_file.metadata.tabix_index}\' ${input_vcf}.tbi &&\n+    #else\n+      bcftools index $input_vcf &&\n+    #end if\n+  #elif $input_file.is_of_type(\'bcf\')\n+    #set $input_vcf = \'input\' + str($i) + \'.bcf.gz\'\n+    ln -s \'$input_file\' $input_vcf &&\n+    #if $input_file.metadata.bcf_index:\n+      ln -s \'${input_file.metadata.bcf_index}\' ${input_vcf}.csi &&\n+    #else\n+      bcftools index $input_vcf &&\n+    #end if\n+  #end if\n+  echo \'$input_vcf\' >> $vcfs_list_file &&\n+  $input_vcfs.append($input_vcf)\n+#end for\n+]]>\n+  </token>\n+  <token name="@INPUT_FILES@">\n+#echo \' \'.join($input_vcfs)#\n+  </token>\n+  <token name="@INPUT_LIST_FILE@">\n+$vcfs_list_file\n+  </token>\n+\n+  <xml name="test_using_reference" token_select_from="history" token_ref="">\n+    <conditional name="reference_source">\n+        <param name="reference_source_selector" value="@SELECT_FROM@" />\n+        <param name="fasta_ref" ftype="fasta" value="@REF@" />\n+    </conditional>\n+  </xml>\n+\n+  <xml name="macro_fas'..b' or not. For duplicate positions, only the first  |\n+|            | SNP record will be considered and appear on output.            |\n++------------+----------------------------------------------------------------+\n+| indels     | all indel records are compatible, regardless of whether the    |\n+|            | REF and ALT alleles match or not. For duplicate positions,     |\n+|            | only the first indel record will be considered and appear on   |\n+|            | output.                                                        |\n++------------+----------------------------------------------------------------+\n+| both       | abbreviation of "-c indels  -c snps"                           |\n++------------+----------------------------------------------------------------+\n+| id         | only records with identical ID column are compatible.          |\n+|            | Supportedby bcftools merge only.                               |\n++------------+----------------------------------------------------------------+\n+  </token>\n+\n+  <token name="@EXPRESSIONS_HELP@">\n+      <![CDATA[\n+Expressions\n+-----------\n+\n+Valid expressions may contain:\n+\n+-  numerical constants, string constants\n+\n+   ::\n+\n+      1, 1.0, 1e-4\n+      "String"\n+\n+-  arithmetic operators\n+\n+   ::\n+\n+      +,*,-,/\n+\n+-  comparison operators\n+\n+   ::\n+\n+      == (same as =), >, >=, <=, <, !=\n+\n+-  regex operators "~" and its negation "!~"\n+\n+   ::\n+\n+      INFO/HAYSTACK ~ "needle"\n+\n+-  parentheses\n+\n+   ::\n+\n+      (, )\n+\n+-  logical operators\n+\n+   ::\n+\n+      && (same as &), ||,  |\n+\n+-  INFO tags, FORMAT tags, column names\n+\n+   ::\n+\n+      INFO/DP or DP\n+      FORMAT/DV, FMT/DV, or DV\n+      FILTER, QUAL, ID, REF, ALT[0]\n+\n+-  1 (or 0) to test the presence (or absence) of a flag\n+\n+   ::\n+\n+      FlagA=1 && FlagB=0\n+\n+-  "." to test missing values\n+\n+   ::\n+\n+      DP=".", DP!=".", ALT="."\n+\n+-  missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression\n+\n+   ::\n+\n+      GT="."\n+\n+-  TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other)\n+\n+   ::\n+\n+      TYPE="indel" | TYPE="snp"\n+\n+-  array subscripts, "*" for any field\n+\n+   ::\n+\n+      (DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3\n+      DP4[*] == 0\n+      CSQ[*] ~ "missense_variant.*deleterious"\n+\n+-  function on FORMAT tags (over samples) and INFO tags (over vector fields)\n+\n+   ::\n+\n+      MAX, MIN, AVG, SUM, STRLEN, ABS\n+\n+-  variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes\n+\n+   ::\n+\n+      N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN\n+\n+**Notes:**\n+\n+-  String comparisons and regular expressions are case-insensitive\n+-  If the subscript "*" is used in regular expression search, the whole field\n+   is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be\n+   true for the string vector INFO/STR=AB,CD.\n+-  Variables and function names are case-insensitive, but not tag names. For\n+   example, "qual" can be used instead of "QUAL", "strlen()" instead of\n+   "STRLEN()" , but not "dp" instead of "DP".\n+\n+**Examples:**\n+\n+   ::\n+\n+      MIN(DV)>5\n+      MIN(DV/DP)>0.3\n+      MIN(DP)>10 & MIN(DV)>3\n+      FMT/DP>10  & FMT/GQ>10 .. both conditions must be satisfied within one sample\n+      FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples\n+      QUAL>10 |  FMT/GQ>10   .. selects only GQ>10 samples\n+      QUAL>10 || FMT/GQ>10   .. selects all samples at QUAL>10 sites\n+      TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2)\n+      MIN(DP)>35 && AVG(GQ)>50\n+      ID=@file       .. selects lines with ID present in the file\n+      ID!=@~/file    .. skip lines with ID present in the ~/file\n+      MAF[0]<0.05    .. select rare variants at 5% cutoff\n+  ]]></token>\n+</macros>\n'
b
diff -r 000000000000 -r 3b6cd8086498 test-data/23andme.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/23andme.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,14 @@
+>1
+CACGTNACGGCTGAAGTCCAAGGTAC
+CGTATCGAGTTCACAGTCGATAGCTC
+GATCGATAGCATCGCTAGCNNNACTA
+CGATCGATCGCTCTCCGTAACACTCA
+AAAACGATCGATCGACTGCTCTTTAG
+CGATGACTTTAGGGGAAAAA
+>2
+CGCTCAGCCGTACAGCCGAGCAGGAC
+ACGCTATTTTAGATCGACTGGCTNNG
+CGCTAGCTACGCTTTAGCACGAGAA
+>Y
+NNNGCATACGTGTCCATCACGATGAT
+AGCGATGATCGATC
b
diff -r 000000000000 -r 3b6cd8086498 test-data/annotate.hdr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.hdr Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,4 @@
+##INFO=<ID=T_STR,Number=1,Type=String,Description="Test String">
+##INFO=<ID=T_INT,Number=.,Type=Integer,Description="Test Integer">
+##INFO=<ID=T_FLOAT,Number=.,Type=Float,Description="Test Float">
+##INFO=<ID=INDEL,Number=0,Type=Float,Description="Test Flag">
b
diff -r 000000000000 -r 3b6cd8086498 test-data/annotate.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.tab Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,18 @@
+3 3212016 CTT C,CT indel_3212016 . . . 1
+4 3258448 TACACACAC T indel_3258448 . . . 1
+4 4000000 T C id1 . . . 1
+4 4000001 T C,A id2 . . . 1
+2 3199812 G GTT,GT indel_3199812 . . . 1
+1 3000150 C CT indel_3000150 . . . 1
+1 3000150 C T snp_3000150 999 1,2 1e-10,2e-10 .
+1 3000151 C T snp_3000151 999 1 2e-10 .
+1 3062915 G T,C snp_3062915 999 1 2e-10 .
+1 3062915 GTTT G indel_3062915 . . . 1
+1 3106154 A C snp_3106154 999 1 2e-10 .
+1 3106154 C CT indel_3106154 . . . 1
+1 3106154 CAAA C indel_3106154 . . . 1
+1 3157410 GA G indel_3157410 . . . 1
+1 3162006 GAA G indel_3162006 . . . 1
+1 3177144 G . ref_3177144 999 1 2e-10 .
+1 3177144 G T snp_3177144 999 1 2e-10 0
+1 3184885 TAAAA TA,T indel_3184885 . . . 1
b
diff -r 000000000000 -r 3b6cd8086498 test-data/annotate.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,39 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##test=<ID=4,IE=5>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 . C T 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2;INDEL;STR=test GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=3;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 2:0,1:409:35:-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . C CT 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3157410 . GA G 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA G 60.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:212:22 0/1:212:22
+1 3177144 . G T 45 PASS AN=4;AC=2 GT:GQ:DP 0/0:150:30 1/1:150:30
+1 3177144 . G . 45 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 4000000 . T A,C 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 4000001 . T A 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
b
diff -r 000000000000 -r 3b6cd8086498 test-data/annotate2.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate2.tab Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,2 @@
+2 3000000 3199812 region_3000000_3199812
+1 3000150 3106154 region_3000150_3106154
b
diff -r 000000000000 -r 3b6cd8086498 test-data/annotate2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate2.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,17 @@
+##fileformat=VCFv4.1
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=FLAG,Number=0,Type=Flag,Description="Test type">
+##INFO=<ID=IINT,Number=1,Type=Integer,Description="Test type">
+##INFO=<ID=IFLT,Number=1,Type=Float,Description="Test type">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test type">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=FINT,Number=1,Type=Integer,Description="Test type">
+##FORMAT=<ID=FFLT,Number=1,Type=Float,Description="Test type">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test type">
+##FILTER=<ID=q11,Description="Quality below 10">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C
+1 3000001 xx C T 11 PASS FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:xxx 0/0:11:1.1:x 0/0:11:1.1:x
+1 3000002 . C T . . . GT . . .
+1 3000003 xx C T 11 q11 FLAG;IINT=.;IFLT=.;ISTR=. GT:FINT:FFLT:FSTR 0/0:.:.:. 0/0:.:.:. 0/0:.:.:.
+1 3000004 xx C T 11 q11 FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:x 0/0:11:1.1:xxx 0/0:11:1.1:xxx
b
diff -r 000000000000 -r 3b6cd8086498 test-data/annotate3.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate3.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,21 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Genotype LK">
+##FORMAT=<ID=X,Number=1,Type=Integer,Description="rmme">
+##FORMAT=<ID=Y,Number=1,Type=Integer,Description="rmme">
+##FORMAT=<ID=AA,Number=1,Type=Integer,Description="rmme">
+##INFO=<ID=AA,Number=1,Type=Integer,Description="rmme">
+##INFO=<ID=BB,Number=1,Type=Integer,Description="rmme">
+##INFO=<ID=X,Number=1,Type=Integer,Description="rmme">
+##INFO=<ID=Y,Number=1,Type=Integer,Description="rmme">
+##FILTER=<ID=fltA,Description="rmme">
+##FILTER=<ID=fltB,Description="rmme">
+##FILTER=<ID=fltX,Description="rmme">
+##FILTER=<ID=fltY,Description="rmme">
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 id C . 20 . AA=1;BB=2;X=3;Y=4 GT:X:PL:Y:AA 0/1:1:2:3:1 0/1:1:2:3:1
+1 3000001 id C . 20 PASS AA=1;BB=2;X=3;Y=4 GT:X:PL:Y:AA 0/1:1:2:3:1 0/1:1:2:3:1
+1 3000002 id C . 20 fltY;fltA;fltB;fltX BB=2;X=3;Y=4;AA=1 GT:Y:X:PL:AA 0/1:3:1:2:1 0/1:3:1:2:1
b
diff -r 000000000000 -r 3b6cd8086498 test-data/annots.bcf
b
Binary file test-data/annots.bcf has changed
b
diff -r 000000000000 -r 3b6cd8086498 test-data/annots.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,37 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##test=<ID=4,IE=5>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 id1 C T 99 PASS STR=id1;AN=4;AC=0 GT:GQ 0|0:999 0|0:999
+1 3000151 id2 C T 99 PASS STR=id2;AN=4;AC=0 GT:DP:GQ 0|0:99:999 0|0:99:999
+1 3062915 idIndel GTTT G 99 PASS DP4=1,2,3,4;AN=4;AC=0;INDEL;STR=testIndel GT:GQ:DP:GL 0|0:999:99:-99,-9,-99 0|0:999:99:-99,-9,-99
+1 3062915 idSNP G T,C 99 PASS STR=testSNP;TEST=5;DP4=1,2,3,4;AN=3;AC=0,0 GT:TT:GQ:DP:GL 0|0:9,9:999:99:-99,-9,-99,-99,-9,-99 0:9,9:999:99:-99,-9,-99
+1 3106154 id4 CAAA C 99 PASS STR=id4;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3106154 id5 C CT 99 PASS STR=id5;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3157410 id6 GA GC,G 99 PASS STR=id6;AN=4;AC=0 GT:GQ:DP 0|0:99:99 0|0:99:99
+1 3162006 id7 GAA GG 99 PASS STR=id7;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3177144 id8 G T 99 PASS STR=id8;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3177144 id9 G . 99 PASS STR=id9;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3184885 id10 TAAAA TA,T 99 PASS STR=id10;AN=4;AC=0,0 GT:GQ:DP 0|0:99:99 0|0:99:99
+2 3199812 id11 G GTT,GT 99 PASS STR=id11;AN=4;AC=0,0 GT:GQ:DP 0|0:999:99 0|0:999:99
+3 3212016 id12 CTT C,CT 99 PASS STR=id12;AN=4;AC=0,0 GT:GQ:DP 0|0:99:99 0|0:99:99
+4 3258448 id13 TACACACAC T 99 PASS STR=id13;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
b
diff -r 000000000000 -r 3b6cd8086498 test-data/annots2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots2.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,17 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=q99,Description="Quality below 10">
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=FLAG,Number=0,Type=Flag,Description="Test type">
+##INFO=<ID=IINT,Number=.,Type=Integer,Description="Test type">
+##INFO=<ID=IFLT,Number=.,Type=Float,Description="Test type">
+##INFO=<ID=ISTR,Number=.,Type=String,Description="Test type">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=FINT,Number=.,Type=Integer,Description="Test type">
+##FORMAT=<ID=FFLT,Number=.,Type=Float,Description="Test type">
+##FORMAT=<ID=FSTR,Number=.,Type=String,Description="Test type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT B A
+1 3000001 . C T . . . GT . .
+1 3000002 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
+1 3000003 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
+1 3000004 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
b
diff -r 000000000000 -r 3b6cd8086498 test-data/check.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/check.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,38 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="read depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=4;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 0/2:0,1:409:35:-20,-5,-20,-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . G A 59.2 PASS AN=4;AC=1 GT:GQ:DP 0/1:245:32 0/0:245:32
+1 3157410 . G A 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . G A 60.2 PASS AN=4;AC=3 GT:GQ:DP 1/1:212:22 0/1:212:22
+1 3177144 . GT G 45 PASS AN=4;AC=2 GT:GQ:DP 0/1:150:30 0/1:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T 59.9 PASS DP=62;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258449 . GCAAA GA,G 59.9 PASS DP=62;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258450 . AAAAGAAAAAG A,AAAAAAG 59.9 PASS DP=60;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258451 . AAA AGT 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258452 . AAA AGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258453 . AACA AGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258453 . ACA AAGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258454 . AACA AACA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
b
diff -r 000000000000 -r 3b6cd8086498 test-data/cnv.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cnv.vcf Sat Jul 23 13:49:21 2022 +0000
b
b'@@ -0,0 +1,192 @@\n+##fileformat=VCFv4.2\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GC,Number=1,Type=Float,Description="GenCall score">\n+##FORMAT=<ID=IA,Number=1,Type=Float,Description="Intensity of the A Allele">\n+##FORMAT=<ID=IB,Number=1,Type=Float,Description="Intensity of the B Allele">\n+##FORMAT=<ID=BAF,Number=1,Type=Float,Description="B Allele Frequency">\n+##FORMAT=<ID=LRR,Number=1,Type=Float,Description="Log R Ratio">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\ttest\n+10\t135656\trs10904561\tT\tG\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/1:0.0415:0.5176:0.543:0.661:0.9477\n+10\t135708\trs7917054\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:0.2441:0.501:0.742:0.659:0.9604\n+10\t178434\trs7089889\tT\tG\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/1:0.1440:0.4833:0.836:0.827:0.8884\n+10\t188805\trs12146291\tA\tG\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/1:0.2144:0.4715:0.776:0.743:0.9610\n+10\t203471\trs10903451\tA\tG\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.4201:0.0000:0.619:0.022:0.9337\n+10\t252693\trs2379078\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.2673:0.0068:0.806:0.081:0.9018\n+10\t273344\trs2448378\tA\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.7407:0.0000:0.497:0.012:0.9411\n+10\t279248\trs2496278\tC\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:-0.4228:0.6268:0.285:0.958:0.6772\n+10\t291134\trs10508201\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/1:0.1085:0.4723:0.317:0.444:0.9106\n+10\t293358\texm2249181\tA\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.0097:0.0044:1.507:0.061:0.3992\n+10\t313504\trs4880568\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/1:-0.5836:0.9803:0.017:0.321:0.9547\n+10\t317501\trs2018975\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.3392:0:1.069:0.039:0.7852\n+10\t323283\trs3740304\tA\tG\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/1:0.1836:0.5588:1.053:1.110:0.8640\n+10\t326894\trs3125027\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.0517:0.0005:1.300:0.026:0.8916\n+10\t327162\texm805022\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1197:0:0.995:0.015:0.5328\n+10\t329493\trs3815985\tC\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:0.0480:0.5187:0.824:0.794:0.9109\n+10\t338553\trs3125031\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:-0.3387:0.4696:0.777:1.004:0.7901\n+10\t354301\trs35198327\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:-0.1338:0.4929:0.903:0.996:0.8546\n+10\t375475\tvariant.11341\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.0747:0.000199999999999978:1.132:0.012:0.4773\n+10\t377161\trs7070654\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.1388:0:0.582:0.020:0.9458\n+10\t387060\trs7904155\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/1:0.0615:0.9945:0.080:1.883:0.8310\n+10\t390962\texm805090\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.0087:9.9999999999989e-05:1.226:0.063:0.6950\n+10\t390983\texm805091\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.3606:0:0.398:0.035:0.4585\n+10\t394615\texm805105\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0170:0:1.345:0.020:0.4108\n+10\t402390\texm805131\tG\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0341:0.0000:1.127:0.023:0.8735\n+10\t403792\texm805136\tG\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1428:0.0000:0.908:0.019:0.8483\n+10\t410501\texm805166\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0189:0:1.162:0.050:0.4628\n+10\t413010\trs9787422\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:0.0871:0.5334:0.908:0.961:0.8338\n+10\t423555\trs12245012\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.1264:0.00149999999999995:1.658:0.040:0.8015\n+10\t423671\trs10904067\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:-0.3622:0.5566:0.326:0.370:0.9306\n+10\t431161\texm2271240\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:-0.0175:0.4838:0.935:1.004:0.8308\n+10\t435971\texm2249233\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.2716:0.0000:1.217:0.042:0.3992\n+10\t435990\texm805214\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1427:0:1.107:0.031:0.4295\n+10\t453567\trs4881254\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:0.1831:0.5079:1.032:1.425:0.8149\n+10\t462885\trs10904173\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:-0.0930:0.4819:0.568:0.489:0.9332\n+10\t465046\texm805254\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1108:0:1.041:0.038:0.8704\n+10\t468599\trs12415961\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/1:0.1647:0.5021:1.192:0.825:0.8051\n+10\t486935\texm805280\tG\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1369:0.0186:0.643:0.060:0.4804\n+10\t487973\trs4881313\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/1:0.0644:0.5628:0.747:0.992:0.8463\n+10\t521431\texm2271'..b'16650\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0897:0:1.053:0.009:0.9557\n+11\t615782\trs17221309\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.2800:0.00319999999999998:1.764:0.035:0.7820\n+11\t635857\trs7069611\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.0450:0.0013:0.593:0.017:0.9595\n+11\t669358\trs7898821\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1257:0:1.694:0.067:0.7270\n+11\t673037\trs816563\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/1:0.1368:0.9065:0.167:0.495:0.4895\n+11\t680428\trs2124585\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:-0.0590:0.5212:0.660:1.123:0.7133\n+11\t712617\trs17136372\tA\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/1:-0.0174:0.9992:0.028:0.975:0.9556\n+11\t738630\trs1750792\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0269:0:1.575:0.025:0.8126\n+11\t749238\trs553326\tG\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.1758:0.00229999999999997:0.955:0.019:0.9300\n+11\t754033\trs2265090\tC\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1347:0:1.181:0.025:0.9176\n+11\t757222\trs10904546\tA\tG\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.3035:0.0000:1.975:0.056:0.8047\n+11\t766105\trs7906313\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1672:0.0116:0.960:0.062:0.9381\n+11\t776027\trs11253377\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/1:-0.1474:0.9964:0.046:1.017:0.9087\n+11\t787896\trs1769242\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.2049:0.0000:0.965:0.029:0.9107\n+11\t800213\trs2254501\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/1:0.2422:0.9918:0.027:0.612:0.9374\n+11\t802445\trs1769215\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0188:0.00370000000000004:0.859:0.024:0.9303\n+11\t803721\trs2790381\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/1:-0.0421:0.9798:0.029:0.485:0.9700\n+11\t811876\trs12414585\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:0.0717:0.516:0.374:0.276:0.9351\n+11\t813426\trs11253424\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.5256:0:0.333:0.000:0.8993\n+11\t815149\trs7084027\tA\tG\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/1:0.3183:0.9944:0.037:1.547:0.8419\n+11\t820868\trs10752019\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0749:0.0118:1.349:0.070:0.8882\n+11\t833528\texm2271241\tT\tG\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/1:-0.0329:0.4896:0.569:0.681:0.9348\n+11\t834736\trs4881530\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.0453:0.00249999999999995:1.578:0.040:0.8076\n+11\t836115\trs11253444\tA\tG\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/1:-0.1380:0.5199:0.596:0.670:0.9248\n+11\t838179\trs1536337\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.0534:0.0000:1.318:0.095:0.7967\n+11\t858022\trs9124\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.5565:0:0.715:0.012:0.9397\n+11\t858924\texm805329\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.0866:0:0.873:0.021:0.9056\n+11\t859047\texm805338\tC\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.1868:0:0.917:0.007:0.5296\n+11\t860687\texm805350\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1509:0:1.172:0.059:0.3777\n+11\t860726\texm805354\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.0224:0.0000:2.255:0.138:0.3747\n+11\t860970\texm805360\tA\tG\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1069:0.0000:1.859:0.070:0.7352\n+11\t871110\texm805378\tG\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0701:0.0000:1.373:0.002:0.4607\n+11\t871746\texm805386\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1743:0.00239999999999996:0.597:0.017:0.5312\n+11\t875350\texm805392\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.1814:0:0.924:0.018:0.5191\n+11\t888899\texm805400\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0585:0.00470000000000004:1.182:0.028:0.5116\n+11\t888916\texm805401\tT\tC\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.1408:0.0000:1.480:0.041:0.4128\n+11\t894838\trs12249828\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:0.2119:0.4839:1.145:1.256:0.8432\n+11\t903950\trs11253471\tA\tG\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/1:-0.0047:0.4747:0.562:0.549:0.9470\n+11\t909757\texm805413\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0614:0:1.538:0.027:0.4097\n+11\t909766\texm805415\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0578:0:1.502:0.023:0.4128\n+11\t910074\texm805420\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.0503:0.0071:1.001:0.057:0.5002\n+11\t910081\texm805421\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.1352:0.002:1.125:0.033:0.4254\n+11\t927331\trs11253489\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t1/0:0.0370:0.5325:0.976:1.435:0.7729\n+11\t931618\texm805430\tG\tA\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:0.0474:0:1.222:0.009:0.4871\n+11\t931631\texm805431\tC\tT\t.\t.\t.\tGT:LRR:BAF:IA:IB:GC\t0/0:-0.0819:0.00390000000000001:0.827:0.023:0.5296\n'
b
diff -r 000000000000 -r 3b6cd8086498 test-data/cnv_baf_only.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cnv_baf_only.vcf Sat Jul 23 13:49:21 2022 +0000
b
b'@@ -0,0 +1,188 @@\n+##fileformat=VCFv4.2\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=BAF,Number=1,Type=Float,Description="B Allele Frequency">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\ttest\n+10\t135656\trs10904561\tT\tG\t.\t.\t.\tGT:BAF\t0/1:0.5176\n+10\t135708\trs7917054\tG\tA\t.\t.\t.\tGT:BAF\t1/0:0.501\n+10\t178434\trs7089889\tT\tG\t.\t.\t.\tGT:BAF\t0/1:0.4833\n+10\t188805\trs12146291\tA\tG\t.\t.\t.\tGT:BAF\t0/1:0.4715\n+10\t203471\trs10903451\tA\tG\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+10\t252693\trs2379078\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0068\n+10\t273344\trs2448378\tA\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+10\t279248\trs2496278\tC\tA\t.\t.\t.\tGT:BAF\t1/0:0.6268\n+10\t291134\trs10508201\tT\tC\t.\t.\t.\tGT:BAF\t0/1:0.4723\n+10\t293358\texm2249181\tA\tC\t.\t.\t.\tGT:BAF\t0/0:0.0044\n+10\t313504\trs4880568\tC\tT\t.\t.\t.\tGT:BAF\t1/1:0.9803\n+10\t317501\trs2018975\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t323283\trs3740304\tA\tG\t.\t.\t.\tGT:BAF\t0/1:0.5588\n+10\t326894\trs3125027\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0005\n+10\t327162\texm805022\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t329493\trs3815985\tC\tA\t.\t.\t.\tGT:BAF\t1/0:0.5187\n+10\t338553\trs3125031\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.4696\n+10\t354301\trs35198327\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.4929\n+10\t375475\tvariant.11341\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.000199999999999978\n+10\t377161\trs7070654\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t387060\trs7904155\tG\tA\t.\t.\t.\tGT:BAF\t1/1:0.9945\n+10\t390962\texm805090\tG\tA\t.\t.\t.\tGT:BAF\t0/0:9.9999999999989e-05\n+10\t390983\texm805091\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t394615\texm805105\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t402390\texm805131\tG\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+10\t403792\texm805136\tG\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+10\t410501\texm805166\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t413010\trs9787422\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.5334\n+10\t423555\trs12245012\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0.00149999999999995\n+10\t423671\trs10904067\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.5566\n+10\t431161\texm2271240\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.4838\n+10\t435971\texm2249233\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+10\t435990\texm805214\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t453567\trs4881254\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.5079\n+10\t462885\trs10904173\tG\tA\t.\t.\t.\tGT:BAF\t1/0:0.4819\n+10\t465046\texm805254\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t468599\trs12415961\tT\tC\t.\t.\t.\tGT:BAF\t0/1:0.5021\n+10\t486935\texm805280\tG\tC\t.\t.\t.\tGT:BAF\t0/0:0.0186\n+10\t487973\trs4881313\tT\tC\t.\t.\t.\tGT:BAF\t0/1:0.5628\n+10\t521431\texm2271370\tC\tT\t.\t.\t.\tGT:BAF\t1/1:0.9952\n+10\t521723\trs4881336\tC\tT\t.\t.\t.\tGT:BAF\t1/1:1\n+10\t547567\trs816599\tA\tG\t.\t.\t.\tGT:BAF\t1/1:1.0000\n+10\t554186\trs12251997\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.00309999999999999\n+10\t556129\trs10466270\tA\tG\t.\t.\t.\tGT:BAF\t0/1:0.5142\n+10\t566379\trs11252926\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.5024\n+10\t588406\trs10904450\tA\tG\t.\t.\t.\tGT:BAF\t0/0:0.0006\n+10\t592283\trs816646\tG\tA\t.\t.\t.\tGT:BAF\t1/0:0.5006\n+10\t601089\trs816650\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t615782\trs17221309\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0.00319999999999998\n+10\t635857\trs7069611\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0013\n+10\t669358\trs7898821\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t673037\trs816563\tC\tT\t.\t.\t.\tGT:BAF\t1/1:0.9065\n+10\t680428\trs2124585\tG\tA\t.\t.\t.\tGT:BAF\t1/0:0.5212\n+10\t712617\trs17136372\tA\tC\t.\t.\t.\tGT:BAF\t1/1:0.9992\n+10\t738630\trs1750792\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t749238\trs553326\tG\tT\t.\t.\t.\tGT:BAF\t0/0:0.00229999999999997\n+10\t754033\trs2265090\tC\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t757222\trs10904546\tA\tG\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+10\t766105\trs7906313\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0116\n+10\t776027\trs11253377\tC\tT\t.\t.\t.\tGT:BAF\t1/1:0.9964\n+10\t787896\trs1769242\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+10\t800213\trs2254501\tG\tA\t.\t.\t.\tGT:BAF\t1/1:0.9918\n+10\t802445\trs1769215\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.00370000000000004\n+10\t803721\trs2790381\tC\tT\t.\t.\t.\tGT:BAF\t1/1:0.9798\n+10\t811876\trs12414585\tG\tA\t.\t.\t.\tGT:BAF\t1/0:0.516\n+10\t813426\trs11253424\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t815149\trs7084027\tA\tG\t.\t.\t.\tGT:BAF\t1/1:0.9944\n+10\t820868\trs10752019\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.0118\n+10\t833528\texm2271241\tT\tG\t.\t.\t.\tGT:BAF\t0/1:0.4896\n+10\t834736\trs4881530\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0.00249999999999995\n+10\t836115\trs11253444\tA\tG\t.\t.\t.\tGT:BAF\t0/1:0.5199\n+10\t838179\trs1536337\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+10\t858022\trs9124\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t858924\texm805329\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t859047\texm805338\tC\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t860687\texm805350\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+10\t860726\texm'..b'\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t323283\trs3740304\tA\tG\t.\t.\t.\tGT:BAF\t0/1:0.5588\n+11\t326894\trs3125027\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0005\n+11\t327162\texm805022\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t329493\trs3815985\tC\tA\t.\t.\t.\tGT:BAF\t1/0:0.5187\n+11\t338553\trs3125031\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.4696\n+11\t354301\trs35198327\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.4929\n+11\t375475\tvariant.11341\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.000199999999999978\n+11\t377161\trs7070654\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t387060\trs7904155\tG\tA\t.\t.\t.\tGT:BAF\t1/1:0.9945\n+11\t390962\texm805090\tG\tA\t.\t.\t.\tGT:BAF\t0/0:9.9999999999989e-05\n+11\t390983\texm805091\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t394615\texm805105\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t402390\texm805131\tG\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+11\t403792\texm805136\tG\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+11\t410501\texm805166\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t413010\trs9787422\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.5334\n+11\t423555\trs12245012\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0.00149999999999995\n+11\t423671\trs10904067\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.5566\n+11\t431161\texm2271240\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.4838\n+11\t435971\texm2249233\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+11\t435990\texm805214\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t453567\trs4881254\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.5079\n+11\t462885\trs10904173\tG\tA\t.\t.\t.\tGT:BAF\t1/0:0.4819\n+11\t465046\texm805254\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t468599\trs12415961\tT\tC\t.\t.\t.\tGT:BAF\t0/1:0.5021\n+11\t486935\texm805280\tG\tC\t.\t.\t.\tGT:BAF\t0/0:0.0186\n+11\t487973\trs4881313\tT\tC\t.\t.\t.\tGT:BAF\t0/1:0.5628\n+11\t521431\texm2271370\tC\tT\t.\t.\t.\tGT:BAF\t1/1:0.9952\n+11\t521723\trs4881336\tC\tT\t.\t.\t.\tGT:BAF\t1/1:1\n+11\t547567\trs816599\tA\tG\t.\t.\t.\tGT:BAF\t1/1:1.0000\n+11\t554186\trs12251997\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.00309999999999999\n+11\t556129\trs10466270\tA\tG\t.\t.\t.\tGT:BAF\t0/1:0.5142\n+11\t566379\trs11252926\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.5024\n+11\t588406\trs10904450\tA\tG\t.\t.\t.\tGT:BAF\t0/0:0.0006\n+11\t592283\trs816646\tG\tA\t.\t.\t.\tGT:BAF\t1/0:0.5006\n+11\t601089\trs816650\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t615782\trs17221309\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0.00319999999999998\n+11\t635857\trs7069611\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0013\n+11\t669358\trs7898821\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t673037\trs816563\tC\tT\t.\t.\t.\tGT:BAF\t1/1:0.9065\n+11\t680428\trs2124585\tG\tA\t.\t.\t.\tGT:BAF\t1/0:0.5212\n+11\t712617\trs17136372\tA\tC\t.\t.\t.\tGT:BAF\t1/1:0.9992\n+11\t738630\trs1750792\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t749238\trs553326\tG\tT\t.\t.\t.\tGT:BAF\t0/0:0.00229999999999997\n+11\t754033\trs2265090\tC\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t757222\trs10904546\tA\tG\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+11\t766105\trs7906313\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0116\n+11\t776027\trs11253377\tC\tT\t.\t.\t.\tGT:BAF\t1/1:0.9964\n+11\t787896\trs1769242\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+11\t800213\trs2254501\tG\tA\t.\t.\t.\tGT:BAF\t1/1:0.9918\n+11\t802445\trs1769215\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.00370000000000004\n+11\t803721\trs2790381\tC\tT\t.\t.\t.\tGT:BAF\t1/1:0.9798\n+11\t811876\trs12414585\tG\tA\t.\t.\t.\tGT:BAF\t1/0:0.516\n+11\t813426\trs11253424\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t815149\trs7084027\tA\tG\t.\t.\t.\tGT:BAF\t1/1:0.9944\n+11\t820868\trs10752019\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.0118\n+11\t833528\texm2271241\tT\tG\t.\t.\t.\tGT:BAF\t0/1:0.4896\n+11\t834736\trs4881530\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0.00249999999999995\n+11\t836115\trs11253444\tA\tG\t.\t.\t.\tGT:BAF\t0/1:0.5199\n+11\t838179\trs1536337\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+11\t858022\trs9124\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t858924\texm805329\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t859047\texm805338\tC\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t860687\texm805350\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t860726\texm805354\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+11\t860970\texm805360\tA\tG\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+11\t871110\texm805378\tG\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+11\t871746\texm805386\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0.00239999999999996\n+11\t875350\texm805392\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t888899\texm805400\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.00470000000000004\n+11\t888916\texm805401\tT\tC\t.\t.\t.\tGT:BAF\t0/0:0.0000\n+11\t894838\trs12249828\tC\tT\t.\t.\t.\tGT:BAF\t1/0:0.4839\n+11\t903950\trs11253471\tA\tG\t.\t.\t.\tGT:BAF\t0/1:0.4747\n+11\t909757\texm805413\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t909766\texm805415\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t910074\texm805420\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.0071\n+11\t910081\texm805421\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.002\n+11\t927331\trs11253489\tG\tA\t.\t.\t.\tGT:BAF\t1/0:0.5325\n+11\t931618\texm805430\tG\tA\t.\t.\t.\tGT:BAF\t0/0:0\n+11\t931631\texm805431\tC\tT\t.\t.\t.\tGT:BAF\t0/0:0.00390000000000001\n'
b
diff -r 000000000000 -r 3b6cd8086498 test-data/cnv_pairwise_summary.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cnv_pairwise_summary.tab Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,6 @@
+# This file was produced by: bcftools cnv*
+# The command line was: bcftools cnv .+ test .+ test *
+#
+# RG, Regions *
+RG 10 135656 931631 2 2 21.7 92 27 92 27
+RG 11 135656 931631 2 2 21.7 92 27 92 27
b
diff -r 000000000000 -r 3b6cd8086498 test-data/cnv_summary.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cnv_summary.tab Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,7 @@
+# RG, Regions *
+# This file was produced by: bcftools cnv*
+# The command line was: bcftools cnv *
+#
+# RG, Regions *
+RG 10 135656 931631 2 22.6 92 27
+RG 11 135656 931631 2 22.6 92 27
b
diff -r 000000000000 -r 3b6cd8086498 test-data/color_chrs.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/color_chrs.dat Sat Jul 23 13:49:21 2022 +0000
[
@@ -0,0 +1,5 @@
+# SG, shared segment [2]Chromosome [3]Start [4]End [5]B:1 [6]B:2
+# SW, number of switches [3]Sample [4]Chromosome [5]nHets [5]nSwitches [6]switch rate
+SG 1 1 999 A:1 C:1
+SW A 1 0 0 0.000000
+SW C 1 0 0 0.000000
b
diff -r 000000000000 -r 3b6cd8086498 test-data/color_chrs_unrelated.dat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/color_chrs_unrelated.dat Sat Jul 23 13:49:21 2022 +0000
[
@@ -0,0 +1,5 @@
+# SG, shared segment [2]Chromosome [3]Start [4]End [5]B:1 [6]B:2
+# SW, number of switches [3]Sample [4]Chromosome [5]nHets [5]nSwitches [6]switch rate
+SG 1 1 999 A:1 -
+SW A 1 0 0 0.000000
+SW (null) 1 0 0 0.000000
b
diff -r 000000000000 -r 3b6cd8086498 test-data/concat.1.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.1.a.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.0
+##FILTER=<ID=Fail,Description="Test">
+##FILTER=<ID=q10,Description="Quality below 10">
+##INFO=<ID=XX,Number=1,Type=Integer,Description="Test">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##contig=<ID=2,length=62435964>
+##contig=<ID=1,length=62435964>
+##samtoolsVersion=0.2.0-rc10+htslib-0.2.0-rc10
+##samtoolsCommand=samtools mpileup -t INFO/DPR -C50 -pm3 -F0.2 -d10000 -ug -r 1:1-1000000 -b mpileup.2014-07-03//lists/chr1-pooled.list -f human_g1k_v37.fasta
+##ALT=<ID=X,Description="Represents allele(s) other than observed.">
+##bcftools_callVersion=0.2.0-rc10-2-gcd94fde+htslib-0.2.0-rc10
+##bcftools_callCommand=call -vm -f GQ -S mpileup.2014-07-03//pooled/1/1:1-1000000.samples -
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 100 . GTTT G 1806 q10 XX=11;DP=35 GT:GQ:DP 0/1:409:35
+1 110 . C T,G 1792 Fail DP=32 GT:GQ:DP 0/1:245:32
+1 110 . CAAA C 1792 Fail DP=32 GT:GQ:DP 0/1:245:32
+1 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+1 130 . G T 1016 Fail DP=22 GT:GQ:DP 0/1:212:22
+1 130 . GAA GG 1016 Fail DP=22 GT:GQ:DP 0/1:212:22
+1 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+1 150 . TAAAA TA,T 246 Fail DP=10 GT:GQ:DP 1/2:12:10
+1 160 . TAAAA TA,T 246 Fail DP=10 GT:GQ:DP 1/2:12:10
+2 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+2 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+2 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+2 130 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+2 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
b
diff -r 000000000000 -r 3b6cd8086498 test-data/concat.1.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.1.b.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,19 @@
+##fileformat=VCFv4.0
+##samtoolsVersion=0.2.0-rc10+htslib-0.2.0-rc10
+##samtoolsCommand=samtools mpileup -t INFO/DPR -C50 -pm3 -F0.2 -d10000 -ug -r 1:1-1000000 -b mpileup.2014-07-03//lists/chr1-pooled.list -f human_g1k_v37.fasta
+##ALT=<ID=X,Description="Represents allele(s) other than observed.">
+##bcftools_callVersion=0.2.0-rc10-2-gcd94fde+htslib-0.2.0-rc10
+##bcftools_callCommand=call -vm -f GQ -S mpileup.2014-07-03//pooled/1/1:1-1000000.samples -
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FILTER=<ID=q10,Description="Quality below 10">
+##contig=<ID=3,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+3 142 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+3 152 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+3 162 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+3 172 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+3 182 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+3 192 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
b
diff -r 000000000000 -r 3b6cd8086498 test-data/concat.2.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.2.a.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.0
+##INFO=<ID=XX,Number=1,Type=Integer,Description="Test">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FILTER=<ID=Fail,Description="Fail">
+##contig=<ID=1,length=62435964>
+##contig=<ID=2,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+2 140 . A G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
+1 110 . C T,G 1792 Fail XX=11;DP=32 GT:GQ:DP 0/1:245:32
+1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
b
diff -r 000000000000 -r 3b6cd8086498 test-data/concat.2.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.2.b.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,24 @@
+##fileformat=VCFv4.0
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FILTER=<ID=q10,Description="Quality below 10">
+##contig=<ID=1,length=62435964>
+##contig=<ID=2,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+1 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+1 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+1 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+1 160 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+2 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+2 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+2 130 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+2 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
b
diff -r 000000000000 -r 3b6cd8086498 test-data/consensus.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/consensus.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,20 @@
+>1:2-501
+TACCATATGTGACATATAAAAAAGAACATAACCTACGTATCAACTAAAGTGGTTGTTTGC
+AGAAAAGGAAGACTTAAAAAGAGTCAGTACTAACCTACATAATATATACAATGTTCATTA
+AATAATAAAATGAGCTCATCATACTTAGGTCATCATAAATATATCTGAAATTCACAAATA
+TTGATCAAATGGTAAAATAGACAAGTAGATTTTAATAGGTTAAACAATTACTGATTCTCT
+TGAAAGAATAAATTTAATATGAGACCTATTTCATTATAATGAACTCACAAATTAGAAACT
+TCACACTGGGGGCTGGAGAGATGGCTCAGTAGTTAAGAACACTGACTGCTCTTCTGAAGG
+TCCTGAGTTCAAATCCCAGCAACCACATGGTGACTTACAACCATCTGTAATGACATCTGA
+TGCCCTCTGGTGTGTCTGAAGACAGCTACAGTGTACTTACATAAAATAATAAATAAATCT
+TTAAAAACAAAAAAAAAGAA
+>2
+GAAGATCTTTTCCTTATTAAGGATCTGAAGCTCTGTAGATTTGTATTCTATTAAACATGG
+AGAGATTAGTGATTTTCCATATTCTTTAAGTCATTTTAGAGTAATGTGTTCTTAAGATAA
+ATCAGAAAAACAAAAACTTGTGCTTTCCTGTTTGAAAAACAAACAGCTGTGGGGAATGGT
+GTCGGGACAGCCTTTTTATAAAATTTTTCTAAATAATGTTGAGGCTTTGATACGTCAAAG
+TTATATTTCAAATGGAATCACTTAGACCTCGTTTCTGAGTGTCAATGGCCATATTGGGGA
+TTTGCTGCTGCCAATGACAGCACACCCTGGGAATGCCCCAACTACTTACTACAAAGCAGT
+GTTACATGGAGAAGATCTTCAAGAGTCTTTTTGCTAGATCTTTCCTTGGCTTTTGATGTG
+ACTCCTCTCAATAAAATCCACAGTAATATAGTGAGTGGTCTCCTGCTCCAAACCAGTATT
+TCAGACACAGTTAATCCAGAC
b
diff -r 000000000000 -r 3b6cd8086498 test-data/consensus.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/consensus.tab Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,3 @@
+1 421 480
+2 1   60
+2 241   300
b
diff -r 000000000000 -r 3b6cd8086498 test-data/consensus.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/consensus.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,18 @@
+##fileformat=VCFv4.2
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##ALT=<ID=DEL,Description="Deletion">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA001
+1 5 . C a . PASS . GT 0/1
+1 5 . C t . PASS . GT 0/1
+1 7 . T a . PASS . GT .
+1 10 . G a . PASS . GT 0/1
+1 12 . GACA ga . PASS . GT 0/1
+1 16 . T taaa . PASS . GT 1/1
+1 19 . A c . PASS . GT 0/1
+1 61 . C a . PASS . GT 0/1
+2 61 . AGAG aa . PASS . GT 0/1
+2 119 . AAA t . PASS . GT 0/1
+2 179 . G gacgtacgt . PASS . GT 0/1
+2 200 . A <DEL> . PASS END=210 GT 1/0
+2 481 . T c,a . PASS . GT 0/2
b
diff -r 000000000000 -r 3b6cd8086498 test-data/convert.23andme
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.23andme Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,28 @@
+# rsid chromosome position genotype
+rs001 1 2 AA
+rs002 1 10 AG
+rs003 1 14 AG
+rs004 1 24 TC
+rs005 1 44 CG
+rs006 1 53 GG
+rs007 1 60 GG
+rs008 1 62 CC
+rs009 1 75 AA
+rs010 1 80 GG
+rs011 1 89 TT
+rs012 1 96 --
+rs013 1 99 CC
+rs014 1 102 GG
+rs015 1 112 TT
+rs016 2 5 CC
+rs017 2 11 CT
+rs018 2 16 CC
+rs019 2 20 GG
+rs020 2 33 CT
+rs021 2 39 AA
+rs022 2 44 CC
+rs023 2 48 CC
+rs024 2 55 AA
+rs025 2 59 CT
+rs026 Y 12 T
+rs027 Y 20 C
b
diff -r 000000000000 -r 3b6cd8086498 test-data/convert.gs.gt.gen
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.gs.gt.gen Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,32 @@
+X:2698560_G_A X:2698560_G_A 2698560 G A 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698630_A_G X:2698630_A_G 2698630 A G 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698758_CAA_C X:2698758_CAA_C 2698758 CAA C 1 0 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698769_AAG_A X:2698769_AAG_A 2698769 AAG A 0 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698789_C_G X:2698789_C_G 2698789 C G 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698822_A_C X:2698822_A_C 2698822 A C 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698831_G_A X:2698831_G_A 2698831 G A 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698889_T_C X:2698889_T_C 2698889 T C 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698923_G_A X:2698923_G_A 2698923 G A 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698953_A_AGG X:2698953_A_AGG 2698953 A AGG 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698954_G_A X:2698954_G_A 2698954 G A 0 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 0
+X:2699002_C_A X:2699002_C_A 2699002 C A 1 0 0 1 0 0 0.33 0.33 0.33 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699025_T_C X:2699025_T_C 2699025 T C 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699091_G_A X:2699091_G_A 2699091 G A 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699187_T_C X:2699187_T_C 2699187 T C 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699188_G_C X:2699188_G_C 2699188 G C 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699189_T_C X:2699189_T_C 2699189 T C 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699217_C_T X:2699217_C_T 2699217 C T 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699246_C_A X:2699246_C_A 2699246 C A 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0
+X:2699275_T_G X:2699275_T_G 2699275 T G 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699350_A_T X:2699350_A_T 2699350 A T 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699360_T_C X:2699360_T_C 2699360 T C 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699450_A_C X:2699450_A_C 2699450 A C 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699507_T_C X:2699507_T_C 2699507 T C 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699555_C_A X:2699555_C_A 2699555 C A 1 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 1 0 1 0 0 1 0 1 0 0 0 1 0
+X:2699645_G_T X:2699645_G_T 2699645 G T 1 0 0 0 0 1 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 0
+X:2699676_G_A X:2699676_G_A 2699676 G A 1 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699728_C_T X:2699728_C_T 2699728 C T 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699775_C_A X:2699775_C_A 2699775 C A 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699898_C_CT X:2699898_C_CT 2699898 C CT 1 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699968_A_G X:2699968_A_G 2699968 A G 0.5 0.0 0.5 1 0 0 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 0 1 0 0 1 0
+X:2699970_T_C X:2699970_T_C 2699970 T C 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
b
diff -r 000000000000 -r 3b6cd8086498 test-data/convert.gs.gt.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.gs.gt.samples Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,12 @@
+ID_1 ID_2 missing
+0 0 0
+NA00001 NA00001 0
+NA00002 NA00002 0
+NA00003 NA00003 0
+NA00004 NA00004 0
+NA00005 NA00005 0
+NA00006 NA00006 0
+NA00007 NA00007 0
+NA00008 NA00008 0
+NA00009 NA00009 0
+NA00010 NA00010 0
b
diff -r 000000000000 -r 3b6cd8086498 test-data/convert.gvcf.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.gvcf.vcf Sat Jul 23 13:49:21 2022 +0000
[
b'@@ -0,0 +1,105 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping">\n+##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.999 or higher that read contains indicated allele vs all other intersecting indel alleles)">\n+##FORMAT=<ID=DPI,Number=1,Type=Integer,Description="Read depth associated with indel, taken from the site preceding the indel.">\n+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">\n+##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant site block. All sites in a block are constrained to be non-variant, have the same filter value, and have all sample values in range [x,y], y <= max(x+3,(x*1.3)). All printed site block sample values are the minimum observed in the region spanned by the block">\n+##INFO=<ID=SNVSB,Number=1,Type=Float,Description="SNV site strand bias">\n+##INFO=<ID=SNVHPOL,Number=1,Type=Integer,Description="SNV contextual homopolymer length">\n+##INFO=<ID=CIGAR,Number=A,Type=String,Description="CIGAR alignment for each alternate indel allele">\n+##INFO=<ID=RU,Number=A,Type=String,Description="Smallest repeating sequence unit extended or contracted in the indel allele relative to the reference. RUs are not reported if longer than 20 bases.">\n+##INFO=<ID=REFREP,Number=A,Type=Integer,Description="Number of times RU is repeated in reference.">\n+##INFO=<ID=IDREP,Number=A,Type=Integer,Description="Number of times RU is repeated in indel allele.">\n+##FILTER=<ID=IndelConflict,Description="Locus is in region with conflicting indel calls">\n+##FILTER=<ID=SiteConflict,Description="Site genotype conflicts with proximal indel call. This is typically a heterozygous SNV call made inside of a heterozygous deletion">\n+##FILTER=<ID=LowGQX,Description="Locus GQX is less than 30 or not present">\n+##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.3">\n+##FILTER=<ID=HighSNVSB,Description="SNV strand bias value (SNVSB) exceeds 10">\n+##FILTER=<ID=HighREFREP,Description="Locus contains an indel allele occurring in a homopolymer or dinucleotide track with a reference repeat greater than 8">\n+##FILTER=<ID=HighDepth,Description="Locus depth is greater than 3x the mean chromosome depth">\n+##contig=<ID=22,length=450>\n+##SnvTheta=0.001\n+##IndelTheta=0.0001\n+##MaxDepth_1=114.18\n+##MaxDepth_10=131.73\n+##MaxDepth_11=117.27\n+##MaxDepth_12=116.97\n+##MaxDepth_13=102.24\n+##MaxDepth_14=101.55\n+##MaxDepth_15=95.22\n+##MaxDepth_16=111.33\n+##MaxDepth_17=112.59\n+##MaxDepth_18=121.86\n+##MaxDepth_19=111.12\n+##MaxDepth_2=121.83\n+##MaxDepth_20=111.24\n+##MaxDepth_21=98.43\n+##MaxDepth_22=76.23\n+##MaxDepth_3=120.09\n+##MaxDepth_4=124.50\n+##MaxDepth_5=119.82\n+##MaxDepth_6=122.22\n+##MaxDepth_7=120.27\n+##MaxDepth_8=120.45\n+##MaxDepth_9=102.48\n+##MaxDepth_M=7005.66\n+##MaxDepth_X=61.05\n+##MaxDepth_Y=37.17\n+##FILTER=<ID=IndelSizeFilter,Description="Indel is outside reportable size range. Insertion range: [1,3], Deletion range: [1,11]">\n+##gvcftools_version="0.16"\n+##FILTER=<ID=HAPLOID_CONFLICT,Description="Locus has heterozygous genotype in a haploid region.">\n+##FORMAT=<ID=OPL,Number=.,Type=Integer,Description="Original PL value before ploidy correction">\n+##INFO=<ID=phastCons,Number=0,Type=Flag,Description="overlaps a phastCons element">\n+##INFO=<ID=AA,Number=1,T'..b'CLNORIGIN,Number=.,Type=String,Description="String that describes the origin of the variant allele. Possible values: unknown, germline, somatic, inherited, paternal, maternal, de-novo, biparental, uniparental, not-tested, test-inconclusive, other. Multiple values for an allele are pipe-delimited">\n+##INFO=<ID=CLNSRC,Number=.,Type=String,Description="Variant clinical source or channel. Multiple values for an allele are pipe-delimited">\n+##INFO=<ID=CLNSRCID,Number=.,Type=String,Description="Identifier used by source defined in CLNSRC. Multiple values are pipe-delimited">\n+##INFO=<ID=CLNGENEINFO,Number=.,Type=String,Description="Gene symbol(s) and NCBI GeneID(s). The gene symbol and ID are delimited by a colon and multiple such pairs are pipe-delimited, Example SYMBOL1:GeneID1|SYMBOL2:GeneID2">\n+##INFO=<ID=CLNDBN,Number=.,Type=String,Description="Disease name used by the database specified by CLNSRC. Values corresponding to each CLNACC entry are pipe-delimited">\n+##INFO=<ID=CLNDSDB,Number=.,Type=String,Description="Colon-delimited list of variant disease database name(s). Multiple values from a single database are pipe-delimited">\n+##INFO=<ID=CLNDSDBID,Number=.,Type=String,Description="Colon-delimited list of variant disease database identifier(s). Multiple values from a single database are pipe-delimited">\n+##INFO=<ID=CSQ,Number=A,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|EXON|INTRON|HGNC|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE|DISTANCE|CANONICAL|SIFT|PolyPhen|GMAF|ENSP|DOMAINS|CCDS|HGVSc|HGVSp|CELL_TYPE">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE99\n+22\t1\t.\tN\t.\t0\tLowGQX\tEND=9;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t.:.:0:0\n+22\t10\t.\tC\t.\t0\tLowGQX\tEND=20;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:5:2:0\n+22\t21\t.\tC\t.\t0\tLowGQX\tEND=26;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:10:4:0\n+22\t27\t.\tC\t.\t0\tLowGQX\tEND=42;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:15:6:0\n+22\t43\t.\tG\t.\t0\tLowGQX\tEND=50;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:21:8:0\n+22\t51\t.\tC\t.\t0\tPASS\tEND=55;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:30:11:0\n+22\t56\t.\tG\t.\t0\tPASS\tEND=72;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:42:15:0\n+22\t73\t.\tT\t.\t0\tPASS\tEND=85;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:54:19:0\n+22\t86\t.\tG\tC\t23\tLowGQX\tSNVSB=0;SNVHPOL=2\tGT:GQ:GQX:DP:DPF:AD\t0/1:56:23:22:0:16,6\n+22\t87\t.\tT\t.\t0\tPASS\tEND=101;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:69:24:0\n+22\t102\t.\tA\t.\t0\tPASS\tEND=140;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:84:29:0\n+22\t141\t.\tG\t.\t0\tPASS\tEND=185;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:90:31:0\n+22\t186\t.\tT\t.\t0\tPASS\tEND=187;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:93:32:2\n+22\t188\t.\tT\t.\t0\tPASS\tEND=204;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:102:35:0\n+22\t205\t.\tT\t.\t0\tPASS\t.\tGT:GQX:DP:DPF\t0/0:72:36:0\n+22\t206\t.\tT\t.\t0\tPASS\tEND=231;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:87:30:0\n+22\t232\t.\tA\t.\t0\tPASS\tEND=249;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:66:23:0\n+22\t250\t.\tG\t.\t0\tPASS\tEND=257;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:87:30:0\n+22\t258\t.\tA\t.\t0\tPASS\tEND=269;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:114:39:0\n+22\t270\t.\tG\t.\t0\tPASS\tEND=279;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:150:51:0\n+22\t280\t.\tA\t.\t0\tPASS\tEND=314;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:166:63:0\n+22\t315\t.\tC\t.\t0\tPASS\tEND=316;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:220:74:0\n+22\t317\t.\tT\t.\t0\tHighDepth\tEND=342;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:229:77:0\n+22\t343\t.\tT\t.\t0\tHighDepth\tEND=377;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:241:81:0\n+22\t378\t.\tT\t.\t0\tHighDepth\tEND=384;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:200:75:0\n+22\t385\t.\tG\t.\t0\tPASS\tEND=388;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:217:73:0\n+22\t389\t.\tC\t.\t0\tHighDepth\tEND=390;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:223:75:0\n+22\t391\t.\tT\t.\t0\tPASS\t.\tGT:GQX:DP:DPF\t0/0:223:75:1\n+22\t392\t.\tT\t.\t0\tHighDepth\tEND=397;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:223:75:0\n+22\t398\t.\tT\t.\t0\tPASS\tEND=420;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:178:60:0\n+22\t421\t.\tC\t.\t0\tPASS\tEND=450;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:142:54:0\n'
b
diff -r 000000000000 -r 3b6cd8086498 test-data/convert.hls.gt.hap
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.hls.gt.hap Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,35 @@
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 0 0 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0
+0 0 0 0 ? ? 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0* 1*
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 0 1 1 0 1 1 1 1 0 0 1 0 0 0 1 0 0 1 0
+0 0 0 0 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 - 1 - 1 - 0 - 1 - 1 1 1 0 0 1 0 0 0 1
+0 - 1 - 0 - 0 - 0 - 0 0 0 0 0 1 0 0 0 0
+0 - 0 - 1 - 0 - 1 - 1 0 1 0 0 0 0 0 0 1
+0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+0 - 0 - 1 - 0 - 1 - 1 0 1 0 0 0 0 0 0 1
+? - 0 - 0 - 1 - 0 - 0 1 0 0 0 0 0 1 1 0
+0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
b
diff -r 000000000000 -r 3b6cd8086498 test-data/convert.hls.gt.legend
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.hls.gt.legend Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,36 @@
+id position a0 a1
+X:2698560_G_A 2698560 G A
+X:2698630_A_G 2698630 A G
+X:2698758_CAA_C 2698758 CAA C
+X:2698769_AAG_A 2698769 AAG A
+X:2698770_AG_A 2698770 AG A
+X:2698770_AG_AAGG 2698770 AG AAGG
+X:2698789_C_G 2698789 C G
+X:2698822_A_C 2698822 A C
+X:2698831_G_A 2698831 G A
+X:2698889_T_C 2698889 T C
+X:2698923_G_A 2698923 G A
+X:2698953_A_AGG 2698953 A AGG
+X:2698954_G_A 2698954 G A
+X:2699002_C_A 2699002 C A
+X:2699025_T_C 2699025 T C
+X:2699091_G_A 2699091 G A
+X:2699187_T_C 2699187 T C
+X:2699188_G_C 2699188 G C
+X:2699189_T_C 2699189 T C
+X:2699217_C_T 2699217 C T
+X:2699246_C_A 2699246 C A
+X:2699275_T_G 2699275 T G
+X:2699350_A_T 2699350 A T
+X:2699360_T_C 2699360 T C
+X:2699450_A_C 2699450 A C
+X:2699507_T_C 2699507 T C
+X:2699555_C_A 2699555 C A
+X:2699645_G_T 2699645 G T
+X:2699676_G_A 2699676 G A
+X:2699728_C_T 2699728 C T
+X:2699775_C_A 2699775 C A
+X:2699898_C_CT 2699898 C CT
+X:2699968_A_G 2699968 A G
+X:2699970_T_C 2699970 T C
+X:2699990_C_<INS:ME:LINE1>_2700054 2699990 C <INS:ME:LINE1>
b
diff -r 000000000000 -r 3b6cd8086498 test-data/convert.hls.gt.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.hls.gt.samples Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,11 @@
+sample population group sex
+NA00001 NA00001 NA00001 2
+NA00002 NA00002 NA00002 2
+NA00003 NA00003 NA00003 2
+NA00004 NA00004 NA00004 2
+NA00005 NA00005 NA00005 2
+NA00006 NA00006 NA00006 2
+NA00007 NA00007 NA00007 2
+NA00008 NA00008 NA00008 2
+NA00009 NA00009 NA00009 2
+NA00010 NA00010 NA00010 2
b
diff -r 000000000000 -r 3b6cd8086498 test-data/convert.hs.gt.hap
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.hs.gt.hap Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,35 @@
+X:2698560_G_A X:2698560_G_A 2698560 G A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698630_A_G X:2698630_A_G 2698630 A G 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698758_CAA_C X:2698758_CAA_C 2698758 CAA C 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698769_AAG_A X:2698769_AAG_A 2698769 AAG A 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0
+X:2698770_AG_A X:2698770_AG_A 2698770 AG A 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
+X:2698770_AG_AAGG X:2698770_AG_AAGG 2698770 AG AAGG 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698789_C_G X:2698789_C_G 2698789 C G 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698822_A_C X:2698822_A_C 2698822 A C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698831_G_A X:2698831_G_A 2698831 G A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698889_T_C X:2698889_T_C 2698889 T C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698923_G_A X:2698923_G_A 2698923 G A 1 0 0 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0
+X:2698953_A_AGG X:2698953_A_AGG 2698953 A AGG 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698954_G_A X:2698954_G_A 2698954 G A 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0
+X:2699002_C_A X:2699002_C_A 2699002 C A 0 0 0 0 ? ? 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2699025_T_C X:2699025_T_C 2699025 T C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2699091_G_A X:2699091_G_A 2699091 G A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2699187_T_C X:2699187_T_C 2699187 T C 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0* 1*
+X:2699188_G_C X:2699188_G_C 2699188 G C 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+X:2699189_T_C X:2699189_T_C 2699189 T C 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+X:2699217_C_T X:2699217_C_T 2699217 C T 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2699246_C_A X:2699246_C_A 2699246 C A 1 0 1 1 0 1 1 1 1 0 0 1 0 0 0 1 0 0 1 0
+X:2699275_T_G X:2699275_T_G 2699275 T G 0 0 0 0 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1
+X:2699350_A_T X:2699350_A_T 2699350 A T 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+X:2699360_T_C X:2699360_T_C 2699360 T C 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+X:2699450_A_C X:2699450_A_C 2699450 A C 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+X:2699507_T_C X:2699507_T_C 2699507 T C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2699555_C_A X:2699555_C_A 2699555 C A 0 - 1 - 1 - 0 - 1 - 1 1 1 0 0 1 0 0 0 1
+X:2699645_G_T X:2699645_G_T 2699645 G T 0 - 1 - 0 - 0 - 0 - 0 0 0 0 0 1 0 0 0 0
+X:2699676_G_A X:2699676_G_A 2699676 G A 0 - 0 - 1 - 0 - 1 - 1 0 1 0 0 0 0 0 0 1
+X:2699728_C_T X:2699728_C_T 2699728 C T 0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+X:2699775_C_A X:2699775_C_A 2699775 C A 0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+X:2699898_C_CT X:2699898_C_CT 2699898 C CT 0 - 0 - 1 - 0 - 1 - 1 0 1 0 0 0 0 0 0 1
+X:2699968_A_G X:2699968_A_G 2699968 A G ? - 0 - 0 - 1 - 0 - 0 1 0 0 0 0 0 1 1 0
+X:2699970_T_C X:2699970_T_C 2699970 T C 0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+X:2699990_C_<INS:ME:LINE1>_2700054 X:2699990_C_<INS:ME:LINE1>_2700054 2699990 C <INS:ME:LINE1> 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
b
diff -r 000000000000 -r 3b6cd8086498 test-data/convert.hs.gt.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.hs.gt.samples Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,12 @@
+ID_1 ID_2 missing
+0 0 0
+NA00001 NA00001 0
+NA00002 NA00002 0
+NA00003 NA00003 0
+NA00004 NA00004 0
+NA00005 NA00005 0
+NA00006 NA00006 0
+NA00007 NA00007 0
+NA00008 NA00008 0
+NA00009 NA00009 0
+NA00010 NA00010 0
b
diff -r 000000000000 -r 3b6cd8086498 test-data/convert.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.vcf Sat Jul 23 13:49:21 2022 +0000
b
b'@@ -0,0 +1,40 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">\n+##FORMAT=<ID=GP,Number=G,Type=Float,Description="Estimated Genotype Probability">\n+##contig=<ID=X,assembly=b37,length=155270560>\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA00001\tNA00002\tNA00003\tNA00004\tNA00005\tNA00006\tNA00007\tNA00008\tNA00009\tNA00010\n+X\t2698560\t.\tG\tA\t102\t.\t.\tGT:PL:GP\t0|0:0,21,177:1,0,0\t0|0:0,30,206:1,0,0\t0|0:0,21,177:1,0,0\t0|0:0,15,132:1,0,0\t0|0:0,9,90:1,0,0\t0|0:0,15,114:1,0,0\t0|0:0,15,118:1,0,0\t0|0:0,15,133:1,0,0\t0|0:0,15,144:1,0,0\t0|0:0,24,191:1,0,0\n+X\t2698630\t.\tA\tG\t537\t.\t.\tGT:PL:GP\t0|0:0,21,186:1,0,0\t0|0:0,21,176:1,0,0\t0|0:0,15,106:1,0,0\t0|0:0,18,127:1,0,0\t0|0:0,6,62:1,0,0\t0|0:0,15,146:1,0,0\t0|0:0,18,141:1,0,0\t0|0:0,21,173:1,0,0\t0|0:0,12,119:1,0,0\t0|0:0,15,145:1,0,0\n+X\t2698758\t.\tCAA\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,6,16:0.8292,0.1708,0\t0|1:0,0,0:0.0278,0.5743,0.3979\t0|0:0,0,0:0.6336,0.3664,0\t0|0:0,3,8:0.8611,0.1389,0\t0|0:0,0,8:0.7628,0.2372,0\t0|0:0,9,18:1,0,0\t0|0:0,9,23:1,0,0\t0|0:0,9,15:0.9855,0.0145,0\t0|0:0,6,10:1,0,0\t0|0:0,21,33:1,0,0\n+X\t2698769\t.\tAAG\tA\t999\t.\t.\tGT:PL:GP\t1|0:17,0,7:0.0069,0.9931,0\t1|1:0,0,0:0.0004,0.0892,0.9104\t0|1:17,3,0:0.0045,0.9954,0.0001\t1|0:11,0,2:0.0085,0.9915,0\t1|0:11,0,15:0.0003,0.9997,0\t0|0:0,15,40:1,0,0\t0|0:0,9,23:1,0,0\t0|0:0,15,25:0.8474,0.1526,0\t0|0:0,15,34:1,0,0\t0|0:0,33,56:1,0,0\n+X\t2698770\t.\tAG\tA,AAGG\t999\t.\t.\tGT:PL:GP\t0|0:0,12,103,12,103,103:0.925,0.0717,0,0.0033,0,0\t0|1:0,3,21,3,21,21:0.4944,0.368,0.0018,0.1343,0.0013,0.0002\t0|0:0,0,0,0,0,0:0.5458,0.4085,0,0.0457,0,0\t0|0:0,3,36,3,36,36:0.8126,0.1758,0,0.0116,0,0\t1|0:37,0,86,49,92,130:0,1,0,0,0,0\t0|0:0,15,125,15,125,125:1,0,0,0,0,0\t0|0:0,9,105,9,105,105:1,0,0,0,0,0\t0|0:0,15,109,15,109,109:0.9964,0.0034,0,0.0002,0,0\t0|0:0,15,137,15,137,137:1,0,0,0,0,0\t0|0:0,33,215,33,215,215:1,0,0,0,0,0\n+X\t2698789\t.\tC\tG\t153\t.\t.\tGT:PL:GP\t0|0:0,21,152:1,0,0\t0|0:0,21,131:1,0,0\t0|0:0,12,113:1,0,0\t0|0:0,12,104:1,0,0\t0|0:0,21,137:1,0,0\t0|0:0,15,118:0.9999,0.0001,0\t0|0:0,15,111:1,0,0\t0|0:0,24,152:1,0,0\t0|0:0,18,147:1,0,0\t0|0:0,33,183:1,0,0\n+X\t2698822\t.\tA\tC\t85.2\t.\t.\tGT:PL:GP\t0|0:0,21,167:1,0,0\t0|0:0,21,171:1,0,0\t0|0:0,21,158:1,0,0\t0|0:0,18,154:1,0,0\t0|0:0,15,135:1,0,0\t0|0:0,15,132:1,0,0\t0|0:0,21,168:1,0,0\t0|0:0,21,175:1,0,0\t0|0:0,15,142:1,0,0\t0|0:0,21,172:1,0,0\n+X\t2698831\t.\tG\tA\t303\t.\t.\tGT:PL:GP\t0|0:0,15,129:1,0,0\t0|0:0,27,179:1,0,0\t0|0:0,24,196:1,0,0\t0|0:0,21,158:1,0,0\t0|0:0,18,154:1,0,0\t0|0:0,12,112:1,0,0\t0|0:0,24,162:1,0,0\t0|0:0,21,168:1,0,0\t0|0:0,9,95:1,0,0\t0|0:0,21,164:1,0,0\n+X\t2698889\t.\tT\tC\t74.4\t.\t.\tGT:PL:GP\t0|0:0,27,193:1,0,0\t0|0:0,45,255:1,0,0\t0|0:0,21,190:1,0,0\t0|0:0,36,254:1,0,0\t0|0:0,30,226:1,0,0\t0|0:0,36,253:1,0,0\t0|0:0,18,156:1,0,0\t0|0:0,9,87:1,0,0\t0|0:0,9,98:1,0,0\t0|0:0,24,205:1,0,0\n+X\t2698923\t.\tG\tA\t999\t.\t.\tGT:PL:GP\t1|0:62,0,133:0,1,0\t0|1:164,0,91:0,1,0\t0|1:35,0,73:0,1,0\t1|0:91,0,108:0,1,0\t1|0:67,0,71:0,1,0\t0|0:0,30,187:1,0,0\t0|0:0,9,73:1,0,0\t0|0:0,12,99:1,0,0\t0|0:0,18,153:1,0,0\t0|0:0,18,138:1,0,0\n+X\t2698953\t.\tA\tAGG\t267\t.\t.\tGT:PL:GP\t0|0:0,27,111:1,0,0\t0|0:0,33,124:1,0,0\t0|0:0,12,62:1,0,0\t0|0:0,15,86:1,0,0\t0|0:0,12,58:1,0,0\t0|0:0,15,69:1,0,0\t0|0:0,6,34:1,0,0\t0|0:0,18,83:1,0,0\t0|0:0,18,80:1,0,0\t0|0:0,15,74:1,0,0\n+X\t2698954\t.\tG\tA\t999\t.\t.\tGT:PL:GP\t1|0:69,0,139:0,1,0\t1|1:199,24,0:0,0,1\t0|1:15,0,82:0,1,0\t1|0:32,0,76:0,1,0\t1|0:16,0,80:0,1,0\t0|0:0,15,131:1,0,0\t0|0:0,6,58:1,0,0\t0|1:99,0,39:0,1,0\t0|0:0,18,163:1,0,0\t0|0:0,15,136:1,0,0\n+X\t2699002\t.\tC\tA\t65.1\t.\t.\tGT:PL:GP\t0|0:0,18,144:1,0,0\t0|0:0,12,115:1,0,0\t.|.:0,12,120:1,0,0\t0|0:0,15,131:1,0,0\t0|0:0,6,29:1,0,0\t0|0:0,9,95:1,0,0\t0|0:0,9,79:1,0,0\t0|0:0,24,188:1,0,0\t0|0:0,15,124:1,0,0\t0|0:0,9,93:1,0,0\n+X\t2699025\t.\tT\tC\t44.9\t.\t.\tGT:PL:GP\t0|0:0,24,189:1,0,0\t0|0:0,12,98:1,0,0\t0|0:0,15,130:1,0,0\t0|0:0,15,113:1,0,0\t0|0:0,6,63:1,0,0\t0|0:0,24,198:1,0,0\t0|0:0,12,92:1,0,0\t0|0:0,24,197:1,0,0\t0|0:0,9,97:1,0,0\t0|0:0,12,108:1,0,0\n+X\t2699091\t.\tG\tA\t45\t.\t.\tGT:PL:GP\t0|0:0,18,'..b'0,0\t0|0:0,12,101:1,0,0\t0|0:0,12,97:1,0,0\t0|0:0,24,188:1,0,0\t0|0:0,24,194:1,0,0\t0|0:0,15,127:1,0,0\t0|0:0,21,169:1,0,0\t0|0:0,15,129:1,0,0\t0|0:0,21,171:1,0,0\n+X\t2699187\t.\tT\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,24,200:1,0,0\t0|0:0,24,191:1,0,0\t1|0:48,0,85:0,1,0\t0|0:0,15,145:1,0,0\t0|1:58,0,45:0,1,0\t1|0:61,0,50:0,1,0\t1|0:22,0,51:0,1,0\t0|0:0,27,211:1,0,0\t0|0:0,9,96:0.9999,0.0001,0\t0/1:23,0,160:0,1,0\n+X\t2699188\t.\tG\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,24,194:1,0,0\t0|0:0,24,167:1,0,0\t1|0:48,0,78:0,1,0\t0|0:0,15,131:1,0,0\t0|1:63,0,40:0,1,0\t1|0:50,0,44:0,1,0\t1|0:22,0,48:0,1,0\t0|0:0,27,212:1,0,0\t0|0:0,9,87:0.9999,0.0001,0\t0|1:23,0,154:0,1,0\n+X\t2699189\t.\tT\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,24,199:1,0,0\t0|0:0,24,176:1,0,0\t1|0:44,0,87:0,1,0\t0|0:0,15,136:1,0,0\t0|1:62,0,46:0,1,0\t1|0:61,0,46:0,1,0\t1|0:22,0,49:0,1,0\t0|0:0,27,212:1,0,0\t0|0:0,9,93:0.9999,0.0001,0\t0|1:23,0,164:0,1,0\n+X\t2699217\t.\tC\tT\t60.3\t.\t.\tGT:PL:GP\t0|0:0,18,158:1,0,0\t0|0:0,18,119:1,0,0\t0|0:0,21,152:1,0,0\t0|0:0,21,162:1,0,0\t0|0:0,12,102:1,0,0\t0|0:0,18,144:1,0,0\t0|0:0,12,108:1,0,0\t0|0:0,18,146:1,0,0\t0|0:0,12,98:1,0,0\t0|0:0,18,155:1,0,0\n+X\t2699246\t.\tC\tA\t999\t.\t.\tGT:PL:GP\t1|0:128,0,15:0,0.9998,0.0002\t1|1:147,21,0:0,0.0001,0.9999\t0|1:130,0,5:0,0.9977,0.0023\t1|1:237,33,0:0,0,1\t1|0:45,0,75:0,1,0\t0|1:145,0,49:0,1,0\t0|0:0,15,109:1,0,0\t0|1:13,0,63:0.0002,0.9998,0\t0|0:0,30,178:0.9953,0.0047,0\t1|0:120,0,57:0,1,0\n+X\t2699275\t.\tT\tG\t999\t.\t.\tGT:PL:GP\t0|0:0,18,165:0.9998,0.0002,0\t0|0:0,18,152:1,0,0\t1|0:0,9,95:0.0023,0.9977,0\t0|0:0,33,239:1,0,0\t0|1:125,0,40:0,1,0\t1|1:205,27,0:0,0,1\t1|0:69,0,43:0,1,0\t0|0:0,15,139:1,0,0\t0|0:0,30,219:1,0,0\t0|1:96,0,54:0,1,0\n+X\t2699350\t.\tA\tT\t999\t.\t.\tGT:PL:GP\t0|0:0,27,206:1,0,0\t0|0:0,15,139:1,0,0\t1|0:54,0,25:0,1,0\t0|0:0,12,117:0.9996,0.0004,0\t0|1:79,0,73:0,1,0\t1|0:48,0,82:0,1,0\t1|0:68,0,45:0,1,0\t0|0:0,30,216:1,0,0\t0|0:0,27,224:1,0,0\t0|1:48,0,80:0,1,0\n+X\t2699360\t.\tT\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,21,184:1,0,0\t0|0:0,15,133:1,0,0\t1|0:53,0,21:0,1,0\t0|0:0,12,114:0.9996,0.0004,0\t0|1:20,0,66:0,1,0\t1|0:40,0,93:0,1,0\t1|0:52,0,66:0,1,0\t0|0:0,30,220:1,0,0\t0|0:0,21,191:1,0,0\t0|1:20,0,83:0,1,0\n+X\t2699450\t.\tA\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,12,124:1,0,0\t0|0:0,6,55:0.9976,0.0024,0\t1|0:99,0,42:0,1,0\t0|0:0,21,186:0.9999,0.0001,0\t0|1:64,0,100:0,1,0\t1|0:38,0,177:0,1,0\t1|0:16,0,103:0,1,0\t0|0:0,24,202:1,0,0\t0|0:0,12,119:1,0,0\t0|1:75,0,115:0,1,0\n+X\t2699507\t.\tT\tC\t195\t.\t.\tGT:PL:GP\t0|0:0,15,133:1,0,0\t0|0:0,12,122:1,0,0\t0|0:0,6,60:1,0,0\t0|0:0,18,123:1,0,0\t0|0:0,15,145:1,0,0\t0|0:0,21,173:1,0,0\t0|0:0,21,178:1,0,0\t0|0:0,24,200:1,0,0\t0|0:0,12,125:1,0,0\t0|0:0,24,189:1,0,0\n+X\t2699555\t.\tC\tA\t999\t.\t.\tGT:PL:GP\t0:0,156:1,0\t1:58,19:0,1\t1:51,0:0,1\t0:0,91:1,0\t1:89,0:0,1\t1|1:132,15,0:0,0,1\t1|0:99,0,68:0,1,0\t0|1:101,0,101:0,1,0\t0|0:0,18,161:0.9998,0.0002,0\t0|1:118,0,72:0,1,0\n+X\t2699645\t.\tG\tT\t999\t.\t.\tGT:PL:GP\t0:0,95:1,0\t1:49,0:0,1\t0:0,58:1,0\t0:0,64:1,0\t0:0,113:1,0\t0|0:0,18,158:1,0,0\t0|0:0,18,146:1,0,0\t0|1:68,0,136:0,1,0\t0|0:0,30,210:1,0,0\t0|0:0,27,186:1,0,0\n+X\t2699676\t.\tG\tA\t999\t.\t.\tGT:PL:GP\t0:0,84:1,0\t0:0,87:1,0\t1:35,0:0,1\t0:0,28:1,0\t1:114,0:0,1\t1|0:99,0,72:0,1,0\t1|0:48,0,89:0,1,0\t0|0:0,18,155:1,0,0\t0|0:0,24,191:1,0,0\t0|1:99,0,61:0,1,0\n+X\t2699728\t.\tC\tT\t69.7\t.\t.\tGT:PL:GP\t0:0,58:1,0\t0:0,64:1,0\t0:0,33:1,0\t0:0,69:1,0\t0:0,81:1,0\t0|0:0,27,183:1,0,0\t0|0:0,45,220:1,0,0\t0|0:0,30,161:1,0,0\t0|0:0,15,110:1,0,0\t0|0:0,21,156:1,0,0\n+X\t2699775\t.\tC\tA\t71.1\t.\t.\tGT:PL:GP\t0:0,62:1,0\t0:0,101:1,0\t0:0,130:1,0\t0:0,141:1,0\t0:0,54:1,0\t0|0:0,30,203:1,0,0\t0|0:0,39,208:1,0,0\t0|0:0,30,177:1,0,0\t0|0:0,18,132:1,0,0\t0|0:0,15,103:1,0,0\n+X\t2699898\t.\tC\tCT\t999\t.\t.\tGT:PL:GP\t0:0,32:1,0\t0:0,11:1,0\t1:11,0:0,1\t0:0,11:1,0\t1:31,0:0,1\t1|0:11,0,24:0.0438,0.9562,0\t1|0:8,0,17:0,1,0\t0|0:0,33,72:1,0,0\t0|0:0,27,69:1,0,0\t0|1:11,4,12:0.0003,0.9997,0\n+X\t2699968\t.\tA\tG\t999\t.\t.\tGT:PL:GP\t.:0,84:1,0\t0:0,32:1,0\t0:0,57:1,0\t1:131,0:0,1\t0:0,66:1,0\t0|1:89,0,44:0,1,0\t0|0:0,18,157:1,0,0\t0|0:0,45,255:1,0,0\t0|1:75,0,109:0,1,0\t1|0:98,0,62:0,1,0\n+X\t2699970\t.\tT\tC\t55.3\t.\t.\tGT:PL:GP\t0:0,68:1,0\t0:0,34:1,0\t0:0,32:1,0\t0:0,162:1,0\t0:0,63:1,0\t0|0:0,15,149:1,0,0\t0|0:0,21,181:1,0,0\t0|0:0,45,255:1,0,0\t0|0:0,27,207:1,0,0\t0|0:0,24,196:1,0,0\n'
b
diff -r 000000000000 -r 3b6cd8086498 test-data/csq.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/csq.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,34 @@
+>1
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+>2
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+>3
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
b
diff -r 000000000000 -r 3b6cd8086498 test-data/csq.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/csq.gff3 Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,40 @@
+##gff-version   3
+#! This file shows which fields are used and required by `bcftools +csq`. It is a trimmed version 
+#! of the GFF3 format, see an example of the full format here
+#!      ftp://ftp.ensembl.org/pub/grch37/release-84/gff3/homo_sapiens/
+#!
+###
+1 . gene 90 200 . + . ID=gene:ENSG00000000001;Name=XYZ;biotype=protein_coding
+1 . transcript 90 200 . + . ID=transcript:ENST00000000001;Parent=gene:ENSG00000000001;biotype=protein_coding
+1 . exon 90 110 . + . Parent=transcript:ENST00000000001
+1 . five_prime_UTR 90 98 . + . Parent=transcript:ENST00000000001
+1 . CDS 99 110 . + 1 Parent=transcript:ENST00000000001
+1 . exon 120 130 . + . Parent=transcript:ENST00000000001
+1 . CDS 120 130 . + 1 Parent=transcript:ENST00000000001
+1 . exon 140 150 . + . Parent=transcript:ENST00000000001
+1 . CDS 140 150 . + 2 Parent=transcript:ENST00000000001
+1 . exon 160 200 . + . Parent=transcript:ENST00000000001
+1 . CDS 160 171 . + 0 Parent=transcript:ENST00000000001
+1 . three_prime_UTR 172 200 . + . Parent=transcript:ENST00000000001
+2 . gene 80 200 . - . ID=gene:ENSG00000000002;Name=ABC;biotype=protein_coding
+2 . transcript 80 200 . - . ID=transcript:ENST00000000002;Parent=gene:ENSG00000000002;biotype=protein_coding
+2 . exon 80 110 . - . Parent=transcript:ENST00000000002
+2 . three_prime_UTR 80 98 . - . Parent=transcript:ENST00000000002
+2 . CDS 99 110 . - 0 Parent=transcript:ENST00000000002
+2 . exon 120 130 . - . Parent=transcript:ENST00000000002
+2 . CDS 120 130 . - 2 Parent=transcript:ENST00000000002
+2 . exon 140 150 . - . Parent=transcript:ENST00000000002
+2 . CDS 140 150 . - 1 Parent=transcript:ENST00000000002
+2 . exon 160 200 . - . Parent=transcript:ENST00000000002
+2 . CDS 160 171 . - 1 Parent=transcript:ENST00000000002
+2 . five_prime_UTR 172 200 . - . Parent=transcript:ENST00000000002
+3 . lincRNA_gene 20 50 . - . ID=gene:ENSG00000000004;Name=mir-007;biotype=lincRNA
+3 . lincRNA 20 50 . - . ID=transcript:ENSG00000000004;Parent=gene:ENSG00000000004;biotype=lincRNA
+3 . gene 100 200 . - . ID=gene:ENSG00000000003;Name=QWRTY;biotype=protein_coding
+3 . transcript 100 200 . - . ID=transcript:ENST00000000003;Parent=gene:ENSG00000000003;biotype=protein_coding
+3 . exon 100 110 . - . Parent=transcript:ENST00000000003
+3 . three_prime_UTR 100 105 . - . Parent=transcript:ENST00000000003
+3 . CDS 106 110 . - 0 Parent=transcript:ENST00000000003
+3 . exon 160 200 . - . Parent=transcript:ENST00000000003
+3 . CDS 160 171 . - 0 Parent=transcript:ENST00000000003
+3 . five_prime_UTR 172 200 . - . Parent=transcript:ENST00000000003
b
diff -r 000000000000 -r 3b6cd8086498 test-data/csq.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/csq.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,40 @@
+##fileformat=VCFv4.2
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##INFO=<ID=EXP,Number=1,Type=String,Description="Expected consequence">
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=3,length=2147483647>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SmplAAA SmplBBB
+1 90 . C T . . EXP=5_prime_utr|XYZ|ENST00000000001|protein_coding GT 1|0 1|1
+1 102 . C T 1 . EXP=synonymous|XYZ|ENST00000000001|protein_coding|+|1Y|102C>T GT 1|0 1|0
+1 103 . G A 1 . EXP=missense|XYZ|ENST00000000001|protein_coding|+|2V>2I|103G>A GT 1|0 0|0
+1 103 . G C 1 . EXP=missense|XYZ|ENST00000000001|protein_coding|+|2V>2L|103G>C GT 0|0 1|0
+1 107 . G A 1 . EXP=missense|XYZ|ENST00000000001|protein_coding|+|3R>3Q|107G>A+108T>A GT 1|0 1|0
+1 108 . T A 1 . EXP=splice_region|XYZ|ENST00000000001|protein_coding,@107 GT 1|0 1|0
+1 121 . ACG A . . EXP=inframe_deletion|XYZ|ENST00000000001|protein_coding|+|5TY>5I|121ACG>A+124TA>T,splice_region|XYZ|ENST00000000001|protein_coding GT 1|0 1|0
+1 124 . TA T . . EXP=@121 GT 1|0 1|0
+1 128 . T C 1 . EXP=missense|XYZ|ENST00000000001|protein_coding|+|7V>6A|128T>C+129A>C,splice_region|XYZ|ENST00000000001|protein_coding GT 1|0 0/0
+1 129 . A C 1 . EXP=splice_region|XYZ|ENST00000000001|protein_coding,@128 GT 1|0 0/0
+1 140 . TA AACG . . EXP=inframe_insertion|XYZ|ENST00000000001|protein_coding|+|8LR>7QRR|140TA>AACG+142C>CC,splice_region|XYZ|ENST00000000001|protein_coding GT 1|0 0|0
+1 142 . C CC . . EXP=splice_region|XYZ|ENST00000000001|protein_coding,@140 GT 1|0 0|0
+1 145 . AC TA . . EXP=stop_gained|XYZ|ENST00000000001|protein_coding|+|10T>10*|145AC>TA GT 1|0 0|0
+1 160 . TA T . . EXP=*frameshift|XYZ|ENST00000000001|protein_coding|+|12YVRT>12SYV|160TA>T,splice_region|XYZ|ENST00000000001|protein_coding GT 1|0 0|0
+1 190 . C T . . EXP=3_prime_utr|XYZ|ENST00000000001|protein_coding GT 1|0 0|0
+2 97 . A C . . EXP=3_prime_utr|ABC|ENST00000000002|protein_coding GT 1|0 0|0
+2 105 . AC A . . EXP=@121 GT 1|0 0|0
+2 121 . AC A . . EXP=frameshift|ABC|ENST00000000002|protein_coding|-|11VVRTY>11*|105AC>A+121AC>A,splice_region|ABC|ENST00000000002|protein_coding GT 1|0 0|0
+2 126 . C CTT . . EXP=@127 GT 1|0 0|0
+2 127 . G GG . . EXP=inframe_insertion|ABC|ENST00000000002|protein_coding|-|9T>8TK|126C>CTT+127G>GG GT 1|0 0|0
+2 144 . TAC T . . EXP=@148 GT 1|0 0|0
+2 148 . TA T . . EXP=inframe_deletion|ABC|ENST00000000002|protein_coding|-|5YV>5T|144TAC>T+148TA>T,splice_region|ABC|ENST00000000002|protein_coding GT 1|0 0|0
+2 164 . T G . . EXP=missense|ABC|ENST00000000002|protein_coding|-|3T>3P|164T>G GT 1|0 0|0
+2 165 . A C . . EXP=synonymous|ABC|ENST00000000002|protein_coding|-|2R|165A>C GT 1|0 0|0
+2 169 . A G . . EXP=@170 GT 1|0 0|0
+2 170 . C T . . EXP=missense|ABC|ENST00000000002|protein_coding|-|1V>1T|169A>G+170C>T GT 1|0 0|0
+2 199 . G T . . EXP=5_prime_utr|ABC|ENST00000000002|protein_coding GT 1|0 0|0
+3 20 . T A . . EXP=non_coding|mir-007||lincRNA GT 1|0 0|0
+3 109 . ACGTACGT A 1 . EXP=splice_acceptor|QWRTY|ENST00000000003|protein_coding GT 1|0 0|0
+3 113 . A T . . EXP=splice_region|QWRTY|ENST00000000003|protein_coding GT 1|0 0|0
+3 120 . T A . . EXP=intron|QWRTY||protein_coding GT 1|0 0|0
+3 152 . T A . . EXP=splice_region|QWRTY|ENST00000000003|protein_coding GT 1|0 0|0
+3 159 . G A . . EXP=splice_donor|QWRTY|ENST00000000003|protein_coding GT 1|0 0|0
b
diff -r 000000000000 -r 3b6cd8086498 test-data/fasta_indexes.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_indexes.loc Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,6 @@
+23andme ? test genome ${__HERE__}/test-cache/23andme.fa
+consensus ? test genome ${__HERE__}/test-cache/consensus.fa
+csq ? test genome ${__HERE__}/test-cache/csq.fa
+gvcf ? test genome ${__HERE__}/test-cache/gvcf.fa
+mpileup ? test genome ${__HERE__}/test-cache/mpileup.ref.fa
+norm ? test genome ${__HERE__}/test-cache/norm.fa
b
diff -r 000000000000 -r 3b6cd8086498 test-data/filter.1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter.1.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,33 @@
+##fileformat=VCFv4.1
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##test=<xx=A,yy=B,zz=C>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##readme=AAAAAA
+##readme=BBBBBB
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 1000 . G A 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 1001 . G A 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 1003 . GT G 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 1006 . G A 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 1007 . G A 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 2000 . T C 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 2001 . T C 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 2003 . T TC 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 2005 . T C 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 2006 . T C 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 1001 . GT G 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 1004 . GT G 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 1008 . GT G 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 2001 . A AT 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 2003 . A AT 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 2006 . A AT 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
b
diff -r 000000000000 -r 3b6cd8086498 test-data/filter.2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter.2.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,36 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 . C T 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2;INDEL;STR=test GT:GQ:DP:GL 0/1:25:35:-20,-5,-20 0/1:45:11:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=3;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 2:0,1:409:35:-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:25:300
+1 3106154 . C CT 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:25:12 0/1:245:310
+1 3157410 . GA G 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA G 60.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:212:22 0/1:212:22
+1 3177144 . G T 45 PASS AN=4;AC=2 GT:GQ:DP 0/0:150:30 1/1:150:30
+1 3177144 . G . 45 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:20
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:20 1/2:322:10
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T 59.9 PASS AN=4;AC=2 GT 0/1 0/1
b
diff -r 000000000000 -r 3b6cd8086498 test-data/filter.3.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter.3.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,42 @@
+##fileformat=VCFv4.2
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=STR,Number=A,Type=String,Description="Testing string and Number=A in INFO">
+##INFO=<ID=TXT0,Number=1,Type=String,Description="Testing in INFO">
+##INFO=<ID=TXT,Number=.,Type=String,Description="Testing in INFO">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##SAMPLE=<ID=NORMAL,SampleName=B,Description="Less-than (\"<\") and greater-than (\">\") quoting nonsense where double brackets would do just fine",softwareName=<Nonsense,Software>,softwareVer=<119,65>,softwareParam=<.>,MetadataResource=http://somewhere.com/path>
+##INFO=<ID=CIGAR,Number=A,Type=String,Description="test">
+##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=STR,Number=1,Type=String,Description="Test">
+##FILTER=<ID=q20,Description="Mapping quality below 20">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=243199373>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3162006 . GAA G,GA 238 q20 DP=19;AN=4;AC=1,1;XRF=1e6,2e6,3e6;XRI=1111,2222,3333;XRS=ABC,DEF,GHI;XAF=1e6,2e6;XAI=1111,2222;XAS=ABC,DEF;XGF=1e6,2e6,3e6,4e6,5e6,6e6;XGI=11,22,33,44,55,66;XGS=ABC,DEF,GHI,JKL,MNO,PQR;TXT=ABC,DEF,GHI GT:GQ:DP:STR 0/1:589:19:XX 0/2:1:1:YY
+1 3162007 . TAGGG CAGGG,CAGGT 238 q20 AO=52101,113;CIGAR=1X4M,1X3M1X;TXT0=text GT:FGS:FGI:FGF:FAS:FAI:FAF:FRS:FRI:FRF 0/1:AAAAAA,BBBBB,CCCC,DDD,EE,F:1,2,3,4,5,6:1e-1,2e-2,3e-3,4e-4,5e-5,6e-6:AAA,B:1,2:1e-1,2e-2:A,BB,CCC:1,2,3:1e-1,2e-2,3e-3 2:AAAAAA,BBB,C:1,2,3:1e-1,2e-2,3e-3:AAA,B:1,2:1e-1,2e-2:A,BB,CCC:1,2,3:1e-1,2e-2,3e-3
b
diff -r 000000000000 -r 3b6cd8086498 test-data/fixploidy.ploidy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fixploidy.ploidy Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,9 @@
+1 3000001 3000001 X 0
+1 3000001 3000001 Y 1
+1 3000001 3000001 Z 2
+1 3000002 3000002 X 3
+1 3000002 3000002 Y 4
+1 3000002 3000002 Z 5
+1 3000004 3000004 X 6
+1 3000004 3000004 Y 7
+1 3000004 3000004 Z 8
b
diff -r 000000000000 -r 3b6cd8086498 test-data/fixploidy.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fixploidy.samples Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,3 @@
+A X
+B Y
+C Z
b
diff -r 000000000000 -r 3b6cd8086498 test-data/fixploidy.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fixploidy.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,17 @@
+##fileformat=VCFv4.1
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=FLAG,Number=0,Type=Flag,Description="Test type">
+##INFO=<ID=IINT,Number=1,Type=Integer,Description="Test type">
+##INFO=<ID=IFLT,Number=1,Type=Float,Description="Test type">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test type">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=FINT,Number=1,Type=Integer,Description="Test type">
+##FORMAT=<ID=FFLT,Number=1,Type=Float,Description="Test type">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test type">
+##FILTER=<ID=q11,Description="Quality below 10">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C
+1 3000001 xx C CT 11 PASS FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:xxx 0/0:11:1.1:x 0|0:11:1.1:x
+1 3000002 . C CTT . . . GT ./. ./. .|.
+1 3000003 xx C CTTT 11 q11 FLAG;IINT=.;IFLT=.;ISTR=. GT:FINT:FFLT:FSTR 0/0:.:.:. 0/0:.:.:. 0|0:.:.:.
+1 3000004 xx C CTTTT 11 q11 FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:x 0/0:11:1.1:xxx 0|0:11:1.1:xxx
b
diff -r 000000000000 -r 3b6cd8086498 test-data/gvcf.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gvcf.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,9 @@
+>22
+NNNNNNNNCCTTGGCCAAGTCACTTCCTCCTTCAGGAACATTGCAGTGGGCCTAAGTGCC
+TCCTCTCGGGACTGGTATGGGGACGGTCATGCAATCTGGACAACATTCACCTTTAAAAGT
+TTATTGATCTTTTGTGACATGCACGTGGGTTCCCAGTAGCAAGAAACTAAAGGGTCGCAG
+GCCGGTTTCTGCTAATTTCTTTAATTCCAAGACAGTCTCAAATATTTTCTTATTAACTTC
+CTGGAGGGAGGCTTATCATTCTCTCTTTTGGATGATTCTAAGTACCAGCTAAAATACAGC
+TATCATTCATTTTCCTTGATTTGGGAGCCTAATTTCTTTAATTTAGTATGCAAGAAAACC
+AATTTGGAAATATCAACTGTTTTGGAAACCTTAGACCTAGGTCATCCTTAGTAAGATCTT
+CCCATTTATATAAATACTTGCAAGTAGTAGTGCCATAATT
b
diff -r 000000000000 -r 3b6cd8086498 test-data/isec.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/isec.a.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,27 @@
+##fileformat=VCFv4.1
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##test=<xx=A,yy=B,zz=C>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##readme=AAAAAA
+##readme=BBBBBB
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 3062915 . GTTT G 1806 q10 DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 3062915 . G T 1806 q10 DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 3106154 . CAAA C 1792 PASS DP=32;AN=2;AC=1 GT:GQ:DP 0/1:245:32
+1 3106154 . C T,CT 1792 PASS DP=32;AN=2;AC=1 GT:GQ:DP 0/1:245:32
+1 3157410 . GA G 628 q10 DP=21;AN=2;AC=2 GT:GQ:DP 1/1:21:21
+1 3162006 . GAA G 1016 PASS DP=22;AN=2;AC=1 GT:GQ:DP 0/1:212:22
+1 3177144 . GT G 727 PASS DP=30;AN=2;AC=1 GT:GQ:DP 0/1:150:30
+1 3184885 . TAAAA TA,T 246 PASS DP=10;AN=2;AC=1,1 GT:GQ:DP 1/2:12:10
+2 3199812 . G GTT,GT 481 PASS DP=26;AN=2;AC=1,1 GT:GQ:DP 1/2:322:26
+3 3212016 . CTT C,CT 565 PASS DP=26;AN=2;AC=1,1 GT:GQ:DP 1/2:91:26
+4 3212016 . TACACACAC T 325 PASS DP=31;AN=2;AC=1 GT:GQ:DP 0/1:325:31
+4 3258448 . TACACACAC T 325 PASS DP=31;AN=2;AC=1 GT:GQ:DP 0/1:325:31
b
diff -r 000000000000 -r 3b6cd8086498 test-data/isec.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/isec.b.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,27 @@
+##fileformat=VCFv4.1
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q20,Description="Mapping quality below 20">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=243199373>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT B
+1 3062915 . G A 376 q20 DP=14;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:376:14:-10,0,-10
+1 3062915 . GTTT GT 376 q20 DP=14;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:376:14:-10,0,-10
+1 3106154 . C T 677 PASS DP=15;AN=2;AC=1 GT:GQ:DP:GL 0/1:277:15:-10,0,-10
+1 3106154 . CAAAA C 677 PASS DP=15;AN=2;AC=1 GT:GQ:DP:GL 0/1:277:15:-10,0,-10
+1 3157410 . GA G 249 PASS DP=11;AN=2;AC=1 GT:GQ:DP 0/1:49:11
+1 3162006 . GAA G 663 PASS DP=19;AN=2;AC=1 GT:GQ:DP 0/1:589:19
+1 3177144 . GT G 460 PASS DP=24;AN=2;AC=1 GT:GQ:DP 0/1:236:24
+1 3184885 . TAAA T 598 PASS DP=16;AN=2;AC=1 GT:GQ:DP 0/1:435:16
+2 3188209 . GA G 162 . DP=15;AN=2;AC=1 GT:GQ:DP 0/1:162:15
+3 3199812 . G GTT,GT 353 PASS DP=19;AN=2;AC=1,1 GT:GQ:DP 1/2:188:19
+4 3212016 . CTT C 677 q20 DP=15;AN=2;AC=1 GT:GQ:DP 0/1:158:15
b
diff -r 000000000000 -r 3b6cd8086498 test-data/merge.2.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.2.a.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,30 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 . C CCG 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000150 . C A 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C A 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3106154 . C CC 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . C A 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200000 . C T 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200010 . C T 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200020 . C G,T 59.2 PASS AN=4;AC=2 GT:GL ./.:1,2,3,4,5,6 .:1,2,3
b
diff -r 000000000000 -r 3b6cd8086498 test-data/merge.2.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.2.b.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,30 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 . C G 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000150 . C G 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C G 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3106154 . C CCC 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . C T 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200000 . C T 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200010 . c A,T 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200020 . C T,G 59.2 PASS AN=4;AC=2 GT:GL ./.:1,4,6,2,5,3 .:1,3,2
b
diff -r 000000000000 -r 3b6cd8086498 test-data/merge.3.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.3.a.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,26 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=TR,Number=R,Type=Float,Description="Test tag">
+##INFO=<ID=TA,Number=A,Type=Float,Description="Test tag">
+##INFO=<ID=TG,Number=G,Type=Float,Description="Test tag">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 . C CCG 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GQ 0/1:245 0/1:245
b
diff -r 000000000000 -r 3b6cd8086498 test-data/merge.3.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.3.b.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,26 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=TR,Number=R,Type=Float,Description="Test tag">
+##INFO=<ID=TA,Number=A,Type=Float,Description="Test tag">
+##INFO=<ID=TG,Number=G,Type=Float,Description="Test tag">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 . C CG 59.2 PASS AN=4;AC=2;TR=10,20;TA=10;TG=10,20,30 GT:GQ 0/1:245 0/1:245
b
diff -r 000000000000 -r 3b6cd8086498 test-data/merge.4.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.4.a.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,33 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=TR,Number=R,Type=Float,Description="Test tag">
+##INFO=<ID=TA,Number=A,Type=Float,Description="Test tag">
+##INFO=<ID=TG,Number=G,Type=Float,Description="Test tag">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=XR,Number=R,Type=Integer,Description="Some description">
+##FORMAT=<ID=XA,Number=A,Type=Integer,Description="Some description">
+##FORMAT=<ID=XG,Number=G,Type=Integer,Description="Some description">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 id1 C CCG 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GQ:XR:XA:XG 0/1:245:0,1:1:0,1,2 0/1:245:1,2:2:0,1,2
+1 3000000 . C CCG 59.2 PASS AN=4;AC=2 GT:GQ:XR:XA:XG 0/1:245:1,2:2:0,1,2 0/1:245:2,3:3:1,2,3
+1 3000002 . C CCG 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000002 id2 C CCG 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GL:XR:XA:XG 0/1:245:.:.:. 0/1:245:.:.:.
+1 3000002 id3 C CCG 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GL:XR:XA:XG 0/1:245:.:.:. 0/1:245:.
b
diff -r 000000000000 -r 3b6cd8086498 test-data/merge.4.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.4.b.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,33 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=TR,Number=R,Type=Float,Description="Test tag">
+##INFO=<ID=TA,Number=A,Type=Float,Description="Test tag">
+##INFO=<ID=TG,Number=G,Type=Float,Description="Test tag">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FORMAT=<ID=XR,Number=R,Type=Integer,Description="Some description">
+##FORMAT=<ID=XA,Number=A,Type=Integer,Description="Some description">
+##FORMAT=<ID=XG,Number=G,Type=Integer,Description="Some description">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT C D
+1 3000000 . C A 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000000 id1 C A 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GQ:XR:XG:XA 0/1:245:4,5:3,4,5:5 0/1:245:6,7:6,7,8:7
+1 3000002 id3 C A 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GQ:XR:XG:XA 0/1:245:. 0/1:245:1,2:1,2,3:2
+1 3000002 id2 C A 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GQ:XR:XG:XA 0/1:245:. 0/1:245:.:.:.
+1 3000002 . C A 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
b
diff -r 000000000000 -r 3b6cd8086498 test-data/merge.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.a.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,36 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 . C T 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2;INDEL;STR=test GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=3;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 2:0,1:409:35:-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . C CT 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3157410 . GA G 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA G 60.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:212:22 0/1:212:22
+1 3177144 . G T 45 PASS AN=4;AC=2 GT:GQ:DP 0/0:150:30 1/1:150:30
+1 3177144 . G . 45 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T . PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
b
diff -r 000000000000 -r 3b6cd8086498 test-data/merge.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.b.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,42 @@
+##fileformat=VCFv4.1
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=STR,Number=1,Type=String,Description="Testing string and Number=A in INFO">
+##INFO=<ID=TXT,Number=.,Type=String,Description="Testing in INFO">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##SAMPLE=<ID=NORMAL,SampleName=B,Description="Less-than (\"<\") and greater-than (\">\") quoting nonsense where double brackets would do just fine",softwareName=<Nonsense,Software>,softwareVer=<119,65>,softwareParam=<.>,MetadataResource=http://somewhere.com/path>
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FORMAT=<ID=STR,Number=1,Type=String,Description="Testing string in format">
+##FILTER=<ID=q20,Description="Mapping quality below 20">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=243199373>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT B
+1 3062915 idSNP G A 24.6 q20 DP=14;DP4=1,2,3,4;AN=2;STR=.;AC=1 GT:GQ:DP:STR:GL 0/1:376:14:ABC:-10,0,-10
+1 3062915 id1D GTT GT 101 q20 DP=14;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL:STR 0/1:376:14:-10,0,-10:DEF
+1 3062915 id3D GTTT G 84.6 q20 TXT=AA;DP=14;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:376:14:-10,0,-10
+1 3106154 . C T 999 PASS DP=15;AN=2;AC=1 GT:GQ:DP:GL 0/1:277:15:-10,0,-10
+1 3106154 . CAAAA C 15.4 PASS DP=15;AN=2;AC=1 GT:GQ:DP:GL 0/1:277:15:-10,0,-10
+1 3157410 . GAC G 36.8 PASS DP=11;AN=2;AC=1 GT:GQ:DP 0/1:49:11
+1 3162006 . GAA G 238 PASS DP=19;AN=2;AC=1;XRF=1e6,2e6;XRI=1111,2222;XRS=AAA,BBB;XAF=1e6;XAI=1111;XAS=AAA;XGF=1e6,2e6,3e6;XGI=1111,2222,3333;XGS=A,B,C GT:GQ:DP 0/1:589:19
+1 3177144 . G T 999 PASS DP=24;AN=2;AC=1 GT:GQ:DP 0/1:236:24
+1 3177144 . GT G 999 PASS DP=24;AN=2;AC=1 GT:GQ:DP 0/1:236:24
+1 3184885 . TAAA T 25.8 PASS DP=16;AN=2;AC=1 GT:GQ:DP 0/1:435:16
+2 3188209 . GA G 41.5 . DP=15;AN=2;AC=1 GT:GQ:DP 0/1:162:15
+3 3199812 . GA GTT,GT 17.5 PASS DP=19;AN=2;AC=1,1 GT:GQ:DP 1/2:188:19
+4 3212016 . CTT C 999 q20 DP=15;AN=2;AC=1 GT:GQ:DP 0/1:158:15
b
diff -r 000000000000 -r 3b6cd8086498 test-data/merge.c.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.c.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,43 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##INFO=<ID=TXT,Number=.,Type=String,Description="Testing in INFO">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=INTA,Number=A,Type=Integer,Description="Testing Number=A in INFO">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT C D
+1 3062915 id3D GTTT G 48.7 q10 TXT=BB;DP=999;DP4=4,3,2,1;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G C,T 419 test TEST=5;DP4=1,2,3,4;AN=4;AC=1,1;INTA=1,2 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 0/2:0,1:409:35:-20,-5,-20,-20,-5,-20
+1 3062915 id2D GTT G 999 q10 DP4=1,2,3,4;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3106154 . CAAA C 72.6 PASS AN=0;AC=0 GT:GQ:DP .:245:32 ./.:245:32
+1 3106154 . C CT 459 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3157410 . G T 46.7 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA GA 206 PASS AN=4;AC=2;XRF=1e6,5e5;XRI=1111,5555;XRS=AAA,DDD;XAF=5e5;XAI=5555;XAS=DDD;XGF=1e6,5e5,9e9;XGI=1111,5555,9999;XGS=A,E,F GT:GQ:DP 0/1:212:22 0/1:212:22
+1 3177144 . G . 364 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:30
+1 3184885 . TAAAA TA,T 8.42 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 291 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 52.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T . PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.1.bam
b
Binary file test-data/mpileup.1.bam has changed
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.1.bam.bai
b
Binary file test-data/mpileup.1.bam.bai has changed
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.2.bam
b
Binary file test-data/mpileup.2.bam has changed
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.2.bam.bai
b
Binary file test-data/mpileup.2.bam.bai has changed
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.3.bam
b
Binary file test-data/mpileup.3.bam has changed
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.3.bam.bai
b
Binary file test-data/mpileup.3.bam.bai has changed
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.3.cram
b
Binary file test-data/mpileup.3.cram has changed
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.X.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.X.vcf Sat Jul 23 13:49:21 2022 +0000
b
b'@@ -0,0 +1,4127 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##samtoolsVersion=1.1-19-g6b249e2+htslib-1.1-74-g845c515\n+##samtoolsCommand=samtools mpileup -uvDV -b xxx//mpileup.bam.list -f xxx//mpileup.ref.fa.gz\n+##reference=file://xxx//mpileup.ref.fa.gz\n+##contig=<ID=X,length=81195210>\n+##ALT=<ID=X,Description="Represents allele(s) other than observed.">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=IDV,Number=1,Type=Integer,Description="Maximum number of reads supporting an indel">\n+##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of reads supporting an indel">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)",Version="3">\n+##INFO=<ID=RPB,Number=1,Type=Float,Description="Mann-Whitney U test of Read Position Bias (bigger is better)">\n+##INFO=<ID=MQB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality Bias (bigger is better)">\n+##INFO=<ID=BQB,Number=1,Type=Float,Description="Mann-Whitney U test of Base Quality Bias (bigger is better)">\n+##INFO=<ID=MQSB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)">\n+##INFO=<ID=SGB,Number=1,Type=Float,Description="Segregation based metric.">\n+##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)">\n+##INFO=<ID=I16,Number=16,Type=Float,Description="Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h">\n+##INFO=<ID=QS,Number=R,Type=Float,Description="Auxiliary tag used for calling">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">\n+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of high-quality non-reference bases">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tHG00100\tHG00101\tHG00102\n+X\t1\t.\tA\t<*>\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t2\t.\tA\t<*>\t0\t.\tDP=11;I16=11,0,0,0,439,17587,0,0,319,9251,0,0,226,5030,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t3\t.\tG\t<*>\t0\t.\tDP=11;I16=11,0,0,0,431,16971,0,0,319,9251,0,0,229,5111,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t4\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,423,16417,0,0,319,9251,0,0,232,5202,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,71:3:0\n+X\t5\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,450,18520,0,0,319,9251,0,0,234,5252,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t6\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,403,14847,0,0,319,9251,0,0,236,5310,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t7\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,446,18114,0,0,319,9251,0,0,237,5327,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t8\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,465,19677,0,0,319,9251,0,0,238,5354,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t9\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,447,18205,0,0,319,9251,0,0,239,5391,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t10\t.\tA\t<*>\t0\t.\tDP=11;I16=11,0,0,0,426,16756,0,0,319,9251,0,0,240,5438,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,69:3:0\n+X\t11\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,413,15603,0,0,319,9251,0,0,241,5495,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t12\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,438,17506,0,0,319,9251,0,0,242,5562,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t13\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,437,17463,0,0,319,9251,0,0,243,5639,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t14\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,453,18715,0,0,319,9251,0,0,242,5628,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t15\t.\tG\t<*>\t0\t.\tDP=11;I1'..b'1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4072\t.\tG\t<*>\t0\t.\tDP=5;I16=2,2,0,0,138,4974,0,0,194,9938,0,0,55,987,0,0;QS=1,0;MQSB=0;MQ0F=0\tPL:DP:DV\t0,12,122:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4073\t.\tG\t<*>\t0\t.\tDP=5;I16=3,2,0,0,156,5082,0,0,254,13538,0,0,60,994,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,136:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4074\t.\tC\t<*>\t0\t.\tDP=5;I16=3,2,0,0,160,5602,0,0,254,13538,0,0,56,928,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4075\t.\tT\t<*>\t0\t.\tDP=5;I16=3,2,0,0,187,7069,0,0,254,13538,0,0,52,870,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,155:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4076\t.\tG\t<*>\t0\t.\tDP=5;I16=3,2,0,0,174,6298,0,0,254,13538,0,0,48,820,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,149:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4077\t.\tA\t<*>\t0\t.\tDP=4;I16=3,1,0,0,138,4810,0,0,194,9938,0,0,44,728,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,121:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4078\t.\tT\t<*>\t0\t.\tDP=4;I16=3,1,0,0,143,5173,0,0,194,9938,0,0,40,644,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,124:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4079\t.\tA\t<*>\t0\t.\tDP=4;I16=3,1,0,0,121,3847,0,0,194,9938,0,0,36,568,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,107:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4080\t.\tT\t<*>\t0\t.\tDP=4;I16=3,0,0,0,106,3778,0,0,134,6338,0,0,25,451,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,9,87:3:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4081\t.\tT\t<*>\t0\t.\tDP=4;I16=3,1,0,0,106,2934,0,0,194,9938,0,0,28,440,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,94:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4082\t.\tC\t<*>\t0\t.\tDP=3;I16=2,1,0,0,110,4042,0,0,134,6338,0,0,25,387,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,103:3:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4083\t.\tC\t<*>\t0\t.\tDP=3;I16=2,1,0,0,104,3648,0,0,134,6338,0,0,22,340,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,98:3:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4084\t.\tA\t<*>\t0\t.\tDP=2;I16=1,1,0,0,78,3050,0,0,97,4969,0,0,20,298,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,74:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4085\t.\tC\t<*>\t0\t.\tDP=2;I16=1,1,0,0,62,1940,0,0,97,4969,0,0,18,260,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,62:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4086\t.\tG\t<*>\t0\t.\tDP=2;I16=1,1,0,0,56,1640,0,0,97,4969,0,0,16,226,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,56:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4087\t.\tC\t<*>\t0\t.\tDP=2;I16=1,1,0,0,69,2405,0,0,97,4969,0,0,14,196,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,68:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4088\t.\tA\t<*>\t0\t.\tDP=1;I16=1,0,0,0,39,1521,0,0,37,1369,0,0,13,169,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4089\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,12,144,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4090\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,33,1089,0,0,37,1369,0,0,11,121,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,33:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4091\t.\tT\t<*>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,10,100,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4092\t.\tG\t<*>\t0\t.\tDP=1;I16=1,0,0,0,37,1369,0,0,37,1369,0,0,9,81,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4093\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,8,64,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4094\t.\tT\t<*>\t0\t.\tDP=1;I16=1,0,0,0,40,1600,0,0,37,1369,0,0,7,49,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4095\t.\tA\t<*>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,6,36,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4096\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,5,25,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4097\t.\tA\t<*>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,4,16,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4098\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,31,961,0,0,37,1369,0,0,3,9,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,31:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4099\t.\tT\t<*>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,2,4,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4100\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,27,729,0,0,37,1369,0,0,1,1,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,27:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4101\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,26,676,0,0,37,1369,0,0,0,0,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,26:1:0\t0,0,0:0:0\t0,0,0:0:0\n'
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.ploidy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.ploidy Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,4 @@
+X 1 1000 M 1
+X 3104 5000 M 1
+* * * M 2
+* * * F 2
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.ref.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.ref.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,71 @@
+>17 17:1-4200
+AAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAA
+TGTGCTCTGGGGTCTCTGGGGTCTCACCCACGACCAACTCCCTGGGCCTGGCACCAGGGA
+GCTTAACAAACATCTGTCCAGCGAATACCTGCATCCCTAGAAGTGAAGCCACCGCCCAAA
+GACACGCCCATGTCCAGCTTAACCTGCATCCCTAGAAGTGAAGGCACCGCCCAAAGACAC
+GCCCATGTCCAGCTTATTCTGCCCAGTTCCTCTCCAGAAAGGCTGCATGGTTGACACACA
+GTGCCTGCGACAAAGCTGAATGCTATCATTTAAAAACTCCTTGCTGGTTTGAGAGGCAGA
+AAATGATATCTCATAGTTGCTTTACTTTGCATATTTTAAAATTGTGACTTTCATGGCATA
+AATAATACTGGTTTATTACAGAAGCACTAGAAAATGCATGTGGACAAAAGTTGGGATTAG
+GAGAGAGAAATGAAGACATATGTCCACACAAAAACCTGTTCATTGCAGCTTTCTACCATC
+ACCAAAAATTGCAAACAACCACACGCCCTTCAACTGGGGAACTCATCAACAACAAACTTG
+TGGTTTACCCACACAATGGAAGACCACTTAGCAACAAAAAGGACCAAACTCCTGGTACAT
+GCAACTGACAGATGAATCTCAAACGCATTCCTCCGTGTGAAAGAAGCCGGACTCACAGGG
+CAACACACTATCTGACTGTTTCATGGGAAAGTCTGGAAACGGCAACACCATTGAGACAGA
+AAACAGGTGAGTGGTTGCCTGGGGCCAGGGAACTTTCTGGGGTCATATTCTCTGTGTTGA
+TTCTGGTGGTGGAAACAAGACTGTCCCAGCCTGGGTGATACAGCGAGACCCCATCTCTAC
+CAAAAAATTAAAAATTAGCTGGGCATGGTGGTGCATGCCTGTAGTCCCAGCTATTCACAG
+TGCTGAGGTGGGAAGATGCTTGAGCCCAGGAGTTCAAGGCTGCAATGAGCTATGATTGCG
+CCACTGCACTTTGGCCTGGACAACAGAGCAAAACCCTGTCTCTAAAAAAAGAAAAGAAAA
+GAAAAACTCACTGGATATGAATGATACAGGTTGAGGATCCATTATCTGAAATGCTTGGAC
+CAGATGTTTTGAATTTTGGATTTTTTCATATTTTGTAATCTTTGCAGTATATTTACCAGT
+TCAGCATCCCTAACTCAAAAATTCAAAAATCTGAAATCCCAAACGCGCCAATAAGCATTC
+CCTTTGAGCGTCATGTCGGTGCTTGGAATGTTTGGGGTTTTGGATTTACAGCTTTGGGAC
+GCTCAACCTGTACCTCAATAAACCTGATTTTAAAAAAGTTTGGGGGGATTCCCCTAAGCC
+CGCCACCCGGAGACAGCGGATTTCCTTAGTTACTTACTATGCTCCTTGGCCATTTCTCTA
+GGTATTGGTATATTGTGTCTGCTGTGAACTGTCCTTGGCCTGTTTGGTGACGGGTGAGGA
+GCAGGGACAGAAGGGTCCTGCGTGCCCTGCCTTCACAAGCCCCTGGAAGGAAAGTTGTTT
+TGGGATCTCTGCACCCTCAGCCTGGACAACTTGTGCCCATCTGGTGACCCCTCACTCAGC
+CACCAGACTTCCACGACAGGCTCCAGCCTCGGCACCTTCAGCCATGGACAGTTCCGCCAG
+CGTTGCCCTCTGTTCTGCTGTTTTCTCTACCAGAAGTGCCCTTCCCTCCTCACCTGACCA
+CTCTGGGGAAATCCCTCAGCACCCTCCCTGAGCATACCCTACTCTGGCACAAGCCCACCC
+TGCAAAGCCCCTGAGGCCCGCCCTGTGGCGTCTCTCCCTCCCTTGCTGTCAGGACAGTGG
+TCCTGGCCACCGGGGCTCACGGAGCCGCCCTGTGCCGTGTACCTCTGAGCCCTCTGCACA
+GTGCCTTCTGCTTGCCTGTGGCTTTGAGAAGAAACCCCTTCTGGTTATACATAAGACAGC
+CAGAGAAGGGAGTTGCCCAGGGTGGCACAGCACGTTGCTGCCAGTTACTGCCATTTTCAC
+GGGCATGAAATGGAGATAACAACAGGAGCGACCGCACAGGCTGCTGAGCGCGTCACACGC
+AGCCATCGCGCAGCTCAGGGATATTACGTGTAACTCGACATGTCAGCGATTGTCACAGGC
+ACTGCTACTCCTGGGGTTTTCCATCAAACCCTCAAGAGCTGGGCCTGGGGTCAACTTCCG
+GCCTGGGGAAACTGGGGCAAGTATCACCAGAGATGAGCTTTATAAAAATAATGGTGCTAG
+CTGGGCATGGTGGCTTGCACCTGTAATCCCAGCACTTTGGGAGGCCGAGCTAGGAGGATC
+GTTTGAGTCCAGCAGTTTGAGACCAGCCTGGCCAATACGGCAAAACCCAGTCTCTACAAA
+AAATACAAAAAACAACTAGCCAGGCGTGGTGGTGCACACCTGTAGTCCCAGCTACTCAGG
+AGGCTGAGGGGGAAGGACTGCTTGAGCCCAGGAGTTTGAGGCTGCTGTGAGCTGTGATCG
+CATCACTGCATTCCAGCCCGGTGACAGAGTGAGTCACTGTCTCAAAAAAGAAAGGAAGAA
+ATAAAGAAAACAAATAAAAATAATAGTGCAGACAAAAGGCCTTGACCCATCTAGCTTTGG
+CCCTCAGCATCAACCGCTAGATACGTCCCTCCCTTTCTTCTGGGGCACAGGTCACACTCT
+CTTCCAGGTCTAGGATGCAGCTGAGGGGTGCCCCTCTTACCATCTAATCTGTGCCCTTAT
+TTCCTCTGCTTTAGTGAGGAAGAGGCCCCTGGTCCATGAAGGGGCCTTTCAGAGACGGGG
+ACCCCTGAGGAGCCCCGAGCAGCAGCCGTCGTGTCTCACCCAGGGTGTCTGAAACAGATG
+TGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCT
+TTGAAAGGCCACGTGACCTGGCCCACGGCTGGCAGGTGGGACCCAGCTGCAGGGGTCCAG
+CAGCACCCACAGCAGCCACCTGTGGCAGGGAGGAGCTTGTGGTACAGTGGACAGGCCCTG
+CCCAGATGGCCCCCCGCCTGCCTGTGGAAGTTGACCAGACCATCTGTCACAGCAGGTAAG
+ACTCTGCTTTCTGGGCAACCCAGCAGGTGACCCTGGAATTCCTGTCCATCTGGCAGGTGG
+GCATTGAAACTGGTTTAAAAATGTCACACCATAGGCCGGGCACAGTGGCTCACGCCTGTA
+ATCCCAGCCCTTTGGGAGGCCAGGGTGGGTGGATCACTTGAGGTCAGGAGTTCAAGACCA
+GCCTGGCCAACATGGTGAAACCCCGTCTACTAAAAATACAAAAATTAGCCTGGCGTGGTG
+GCGCATGCCTGTAATCCCAGCTACTTGGGAAGCTGAGGGATGAGAACTGCTTGAACCTGG
+GAGGCAGACGTTGCAGTGAGCTGAGATCACGCCACTGCACTCCAGCCTGGGCAACAGAGT
+AAGACTCTGTCTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCC
+TGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAGAAAAAACATATAT
+ATACGCAAACCAGTATCCTACTGTGTGTGTCGTTTGTTGTGTTTTCGACAGCTGTCCGTG
+TTATAATAATTCCTCTAGTTCAAATTTATTCATTTTTAACTTCATAGTACCACATTCTAC
+ACACTGCCCATGTCCCCTCAAGCTTCCCCTGGCTCCTGCAACCACAAATCTACTCTCTGC
+CTCTGTGGGTTGACCTATTCTGGACACGTCATAGAAATAGAGTCCTGCAACACGTGGCCG
+TCTGTGTCTGGCTTCTCTCGCTTAGCATCTTGTTTCCAAGGTCCTCCCACAGTGTAGCAT
+GCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAT
+GGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACACACCCGCT
+ACACTCCTTCTTAGGGCTGATATTCCACGCACCCGCTACACTCCTTCTTAGGGCTGATAT
+TCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTT
+CTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCAC
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.samples Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,3 @@
+HG00100 F
+HG00101 M
+HG00102 F
b
diff -r 000000000000 -r 3b6cd8086498 test-data/mpileup.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.vcf Sat Jul 23 13:49:21 2022 +0000
b
b'@@ -0,0 +1,4127 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##samtoolsVersion=1.1-19-g6b249e2+htslib-1.1-74-g845c515\n+##samtoolsCommand=samtools mpileup -uvDV -b xxx//mpileup.bam.list -f xxx//mpileup.ref.fa.gz\n+##reference=file://xxx//mpileup.ref.fa.gz\n+##contig=<ID=17,length=81195210>\n+##ALT=<ID=X,Description="Represents allele(s) other than observed.">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=IDV,Number=1,Type=Integer,Description="Maximum number of reads supporting an indel">\n+##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of reads supporting an indel">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)",Version="3">\n+##INFO=<ID=RPB,Number=1,Type=Float,Description="Mann-Whitney U test of Read Position Bias (bigger is better)">\n+##INFO=<ID=MQB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality Bias (bigger is better)">\n+##INFO=<ID=BQB,Number=1,Type=Float,Description="Mann-Whitney U test of Base Quality Bias (bigger is better)">\n+##INFO=<ID=MQSB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)">\n+##INFO=<ID=SGB,Number=1,Type=Float,Description="Segregation based metric.">\n+##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)">\n+##INFO=<ID=I16,Number=16,Type=Float,Description="Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h">\n+##INFO=<ID=QS,Number=R,Type=Float,Description="Auxiliary tag used for calling">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">\n+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of high-quality non-reference bases">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tHG00100\tHG00101\tHG00102\n+17\t1\t.\tA\tG,X,T,C\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,1,0,1,1;MQ0F=0\tPL:DP:DV\t0,0,0,0,0,0,.,.,.,.,.,.,.,.,.:5:0\t.:3:0\t.:3:0\n+17\t2\t.\tA\tG,X,T,C\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,1,0,1,1;MQ0F=0\tPL:DP:DV\t0,0,0,0,0,0,.,.,.,.,.,.,.,.,.:5:0\t.:3:0\t.:3:0\n+17\t3\t.\tA\tG,X,T,C\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,1,0,1,1;MQ0F=0\tPL:DP:DV\t0,0,0,0,0,0,.,.,.,.,.,.,.,.,.:5:0\t.:3:0\t.:3:0\n+17\t4\t.\tA\tG,X,T,C\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,1,0,1,1;MQ0F=0\tPL:DP:DV\t1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:5:0\t.:3:0\t.:3:0\n+17\t5\t.\tA\tX,G\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,0,1;MQ0F=0\tPL:DP:DV\t0,0,0,0,0,0:5:0\t.:3:0\t.:3:0\n+17\t6\t.\tA\tX,G\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,0,1;MQ0F=0\tPL:DP:DV\t0,0,0,0,0,0:5:0\t.:3:0\t.:3:0\n+17\t7\t.\tA\tX,G\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,0,1;MQ0F=0\tPL:DP:DV\t1,2,3,4,5,6:5:0\t.:3:0\t.:3:0\n+17\t8\t.\tT\t<X>\t0\t.\tDP=11;I16=11,0,0,0,465,19677,0,0,319,9251,0,0,238,5354,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t9\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,447,18205,0,0,319,9251,0,0,239,5391,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t10\t.\tA\t<X>\t0\t.\tDP=11;I16=11,0,0,0,426,16756,0,0,319,9251,0,0,240,5438,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,69:3:0\n+17\t11\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,413,15603,0,0,319,9251,0,0,241,5495,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t12\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,438,17506,0,0,319,9251,0,0,242,5562,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t13\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,437,17463,0,0,319,9251,0,0,243,5639,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t14\t.\tT\t<X>\t0\t.\tDP=11;I16=11,0,0,0,453,18715,0,0,319,9251,0,0,242,562'..b':DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4072\t.\tG\t<X>\t0\t.\tDP=5;I16=2,2,0,0,138,4974,0,0,194,9938,0,0,55,987,0,0;QS=1,0;MQSB=0;MQ0F=0\tPL:DP:DV\t0,12,122:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4073\t.\tG\t<X>\t0\t.\tDP=5;I16=3,2,0,0,156,5082,0,0,254,13538,0,0,60,994,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,136:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4074\t.\tC\t<X>\t0\t.\tDP=5;I16=3,2,0,0,160,5602,0,0,254,13538,0,0,56,928,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4075\t.\tT\t<X>\t0\t.\tDP=5;I16=3,2,0,0,187,7069,0,0,254,13538,0,0,52,870,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,155:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4076\t.\tG\t<X>\t0\t.\tDP=5;I16=3,2,0,0,174,6298,0,0,254,13538,0,0,48,820,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,149:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4077\t.\tA\t<X>\t0\t.\tDP=4;I16=3,1,0,0,138,4810,0,0,194,9938,0,0,44,728,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,121:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4078\t.\tT\t<X>\t0\t.\tDP=4;I16=3,1,0,0,143,5173,0,0,194,9938,0,0,40,644,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,124:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4079\t.\tA\t<X>\t0\t.\tDP=4;I16=3,1,0,0,121,3847,0,0,194,9938,0,0,36,568,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,107:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4080\t.\tT\t<X>\t0\t.\tDP=4;I16=3,0,0,0,106,3778,0,0,134,6338,0,0,25,451,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,9,87:3:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4081\t.\tT\t<X>\t0\t.\tDP=4;I16=3,1,0,0,106,2934,0,0,194,9938,0,0,28,440,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,94:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4082\t.\tC\t<X>\t0\t.\tDP=3;I16=2,1,0,0,110,4042,0,0,134,6338,0,0,25,387,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,103:3:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4083\t.\tC\t<X>\t0\t.\tDP=3;I16=2,1,0,0,104,3648,0,0,134,6338,0,0,22,340,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,98:3:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4084\t.\tA\t<X>\t0\t.\tDP=2;I16=1,1,0,0,78,3050,0,0,97,4969,0,0,20,298,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,74:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4085\t.\tC\t<X>\t0\t.\tDP=2;I16=1,1,0,0,62,1940,0,0,97,4969,0,0,18,260,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,62:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4086\t.\tG\t<X>\t0\t.\tDP=2;I16=1,1,0,0,56,1640,0,0,97,4969,0,0,16,226,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,56:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4087\t.\tC\t<X>\t0\t.\tDP=2;I16=1,1,0,0,69,2405,0,0,97,4969,0,0,14,196,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,68:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4088\t.\tA\t<X>\t0\t.\tDP=1;I16=1,0,0,0,39,1521,0,0,37,1369,0,0,13,169,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4089\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,12,144,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4090\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,33,1089,0,0,37,1369,0,0,11,121,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,33:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4091\t.\tT\t<X>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,10,100,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4092\t.\tG\t<X>\t0\t.\tDP=1;I16=1,0,0,0,37,1369,0,0,37,1369,0,0,9,81,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4093\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,8,64,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4094\t.\tT\t<X>\t0\t.\tDP=1;I16=1,0,0,0,40,1600,0,0,37,1369,0,0,7,49,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4095\t.\tA\t<X>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,6,36,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4096\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,5,25,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4097\t.\tA\t<X>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,4,16,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4098\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,31,961,0,0,37,1369,0,0,3,9,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,31:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4099\t.\tT\t<X>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,2,4,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4100\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,27,729,0,0,37,1369,0,0,1,1,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,27:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4101\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,26,676,0,0,37,1369,0,0,0,0,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,26:1:0\t0,0,0:0:0\t0,0,0:0:0\n'
b
diff -r 000000000000 -r 3b6cd8086498 test-data/norm.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/norm.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,26 @@
+>20 20:1339000-1339300
+AGGATGGGGCTCATTAATAGAGCTCCACTTGTCTCCAGAATCACTGGTGAGGAAGGGGAG
+TGTTGCCCCCACATTCGTGCACAGCAGGGATGGTTCACCGAACTCCACACCAGTCTCTGC
+AGAGCCTGTTGGGGAGAGGAGGGCTGTGGTTTCTTTGATGGTGTTCACCTGGAGTAGAGC
+AAGTATTGTCAAAAGGGTCATCCTCGGAGGTTGCAGTGAGCCGAGATCGCACCATTGCAC
+TGCAGCCTGGGAGACAGAGCAAGACTCCATCTCAAAAAAAAAAAAAAAAAAAAAGGCCAT
+C
+>1 1:10143-10443
+CTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACC
+CTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAAACCCTA
+ACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCAACCCCAACCCCAACCCCA
+ACCCTAACCCCTAACCCTAACCCTAACCCTACCCTAACCCTAACCCTAACCCTAACCCTA
+ACCCTAACCCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC
+>2 1:1382388-1382602
+GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTA
+TTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTT
+GAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTG
+CAAGCTCCACCTCCTGGGTTCACGCCATTCTCCTG
+>3 madeup
+ACTGGACACGTGGACACACACACACACACACACACACACACAGTCAAACCACCTACCAGA
+>4 20:8917026-8917085
+TCCCCTCTTGACCTCTCTCTATTTTTTTTTTTTTTTCTGAGATGGATTTTTGCTCTTGTT
+>5 20:18724313-18724343
+GTCTCAAAAAAAAAAAAAAAAAAAAGAAAAG
+>21
+TTTATTATTATTATTATTAAATTGAATTTATTTAGTGTACATACATTCATGTGTATTGTG
b
diff -r 000000000000 -r 3b6cd8086498 test-data/norm.merge.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/norm.merge.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,61 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=20,length=2147483647>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test String in FORMAT">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
+##FILTER=<ID=FAIL1,Description="Failed filter 1">
+##FILTER=<ID=FAIL2,Description="Failed filter 2">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
+1 105 . TAAACCCTAAA TAA 999 PASS INDEL;AN=4;AC=2;DP=19;ISTR=SomeString;XRF=1e+06,2e+06;XRI=1111,2222;XRS=AAA,BBB;XAF=1e+06;XAI=1111;XAS=AAA;XGF=1e+06,2e+06,3e+06;XGI=1111,2222,3333;XGS=A,B,C GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 1/0:1,2,3:1:1e+06,2e+06:1111,2222:AAAA,BBB:1e+06:1111:A:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC 1/0:1,2,3:1:1e+06,2e+06:1111,2222:AAAA,BBB:1e+06:1111:A:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC
+1 105 . TAAACCCTAAA TAACCCTAAA 999 PASS INDEL;AN=4;AC=2;DP=19;ISTR=SomeString;XRF=1e+06,500000;XRI=1111,5555;XRS=AAA,DDD;XAF=500000;XAI=5555;XAS=DDD;XGF=1e+06,500000,9e+09;XGI=1111,5555,9999;XGS=A,E,F GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 0/1:1,4,6:1:1e+06,500000:1111,5555:AAAA,CC:500000:5555:BB:1e+06,500000,9e+09:1111,5555,9999:A,EEEE,FFFFF 0/1:1,4,6:1:1e+06,500000:1111,5555:AAAA,CC:500000:5555:BB:1e+06,500000,9e+09:1111,5555,9999:A,EEEE,FFFFF
+2 1 . GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTATTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTGCAAGCTCCACCT ACGT 999 PASS INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
+2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+2 114 . TC TTCC 999 FAIL1 INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
+2 114 . TC TTC 999 PASS INDEL;AN=4;AC=2 GT:DP 0/1:1 0/1:1
+2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+20 3 . G CT 999 PASS INDEL;AN=4;AC=2 GT 0/1 0/1
+20 3 . GATG GACT 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0
+20 5 . TGGG TAC . PASS INDEL;AN=4;AC=2 GT:PL:DP 1/0:1,2,3:1 1/0:1,2,3:1
+20 5 id0001 TGGG TG . PASS INDEL;AN=4;AC=2 GT:PL:DP 0/1:1,4,6:1 0/1:1,4,6:1
+20 5 id0002 TGGG TGGGG . PASS INDEL;AN=4;AC=0 GT:PL:DP 0/0:1,7,10:1 0/0:1,7,10:1
+20 5 . TGGG AC . PASS INDEL;AN=4;AC=0 GT:PL:DP 0/0:1,11,15:1 0/0:1,11,15:1
+20 59 id0003 AG . 999 PASS AN=4 GT:PL:DP 0/0:0:4 0/0:0:4
+20 80 . CACAG CACAT 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 81 . A C 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG ACACCG 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG AAAAAA 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2 GT:PL:DP 1/0:0,3,5:1 1/0:0,3,5:1
+20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2 GT:PL:DP 0/1:0,3,5:1 0/1:0,3,5:1
+20 274 . AAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 275 . A C 0 FAIL1 INDEL;AN=2;AC=0 GT:PL:DP:FGF:FGI:FGS:FSTR 0:0,0:0:1e+06,2e+06:1111,2222:A,BB:WORD 0:0,0:0:1e+06,2e+06:1111,2222:A,BB:WORD
+20 275 . A G 0 FAIL2 INDEL;AN=2;AC=2 GT:PL:DP:FGF:FGI:FGS:FSTR 1:0,0:0:1e+06,3e+06:1111,3333:A,CCC:WORD 1:0,0:0:1e+06,3e+06:1111,3333:A,CCC:WORD
+20 278 . AAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 300 . A C 998 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 300 . A G 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
b
diff -r 000000000000 -r 3b6cd8086498 test-data/norm.setref.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/norm.setref.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,45 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=3,length=2147483647>
+##contig=<ID=4,length=2147483647>
+##contig=<ID=5,length=2147483647>
+##contig=<ID=20,length=2147483647>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test String in FORMAT">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
+1 105 . TAACCCTAAA TAAACCCTAAA,TAA 999 PASS INDEL;AN=4;AC=2,2;DP=19 GT 1/2 1/2
+2 101 . . c 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+2 105 . n <DEL> 999 PASS END=112;AN=4;AC=3 GT:DP 0/1:1 1/1:1
+2 115 . t c 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+20 3 . c g 999 PASS INDEL;AN=4;AC=1 GT 0/0 0/1
+20 3 . gact gatg 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0
+20 10 . . . 999 PASS INDEL;AN=4;AC=1 GT 1/0 0/0
+20 275 . g c,a,t,aaa 999 PASS INDEL;AN=2;AC=0,2,0,0 GT 2 2
b
diff -r 000000000000 -r 3b6cd8086498 test-data/norm.split.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/norm.split.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,53 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=20,length=2147483647>
+##contig=<ID=21,length=2147483647>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test String in FORMAT">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
+1 105 . TAAACCCTAAA TAA,TAACCCTAAA 999 PASS INDEL;AN=4;AC=2,2;DP=19;ISTR=SomeString;XRF=1e+06,2e+06,500000;XRI=1111,2222,5555;XRS=AAA,BBB,DDD;XAF=1e+06,500000;XAI=1111,5555;XAS=AAA,DDD;XGF=1e+06,2e+06,3e+06,500000,.,9e+09;XGI=1111,2222,3333,5555,.,9999;XGS=A,B,C,E,.,F GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF
+2 1 . GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTATTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTGCAAGCTCCACCT ACGT 999 PASS INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
+2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+2 114 . TC TTCC,TTC 999 PASS INDEL;AN=4;AC=2,2 GT:DP 1/2:1 1/2:1
+2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+20 3 . G CT 999 PASS INDEL;AN=4;AC=2 GT 0/1 0/1
+20 3 . GATG GACT 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0
+20 5 . TGGG TAC,TG,TGGGG,AC . PASS INDEL;AN=4;AC=2,2,0,0 GT:PL:DP 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1
+20 59 . AG . 999 PASS AN=4 GT:PL:DP 0/0:0:4 0/0:0:4
+20 80 . CACAG CACAT 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 81 . A C 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG ACACCG 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG AAAAAA 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAA,CAAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2,2 GT:PL:DP 1/2:0,3,5,3,5,5:1 1/2:0,3,5,3,5,5:1
+20 274 . AAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 275 . A C,G 999 PASS INDEL;AN=2;AC=0,2 GT:PL:DP:FGF:FGI:FGS:FSTR 2:0,0,0:0:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC:WORD 2:0,0,0:0:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC:WORD
+20 278 . AAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 300 . T C,G 999 PASS INDEL;AN=0;AC=0,0 GT:PL:DP ./.:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0
+21 1 id TTTA TTTATTATTA,TTTATTATTATTATTATTATTA,T,TTATTATTA 999 PASS INDEL;AN=0;AC=0,0,0,0 GT:DP ./.:0 ./.:0
b
diff -r 000000000000 -r 3b6cd8086498 test-data/norm.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/norm.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,57 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=3,length=2147483647>
+##contig=<ID=4,length=2147483647>
+##contig=<ID=5,length=2147483647>
+##contig=<ID=20,length=2147483647>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test String in FORMAT">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
+1 105 . TAAACCCTAAA TAA,TAACCCTAAA 999 PASS INDEL;AN=4;AC=2,2;DP=19;ISTR=SomeString;XRF=1e+06,2e+06,500000;XRI=1111,2222,5555;XRS=AAA,BBB,DDD;XAF=1e+06,500000;XAI=1111,5555;XAS=AAA,DDD;XGF=1e+06,2e+06,3e+06,500000,.,9e+09;XGI=1111,2222,3333,5555,.,9999;XGS=A,B,C,E,.,F GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF
+2 1 . GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTATTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTGCAAGCTCCACCT ACGT 999 PASS INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
+2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+2 114 . TC TTCC,TTC 999 PASS INDEL;AN=4;AC=2,2 GT:DP 1/2:1 1/2:1
+2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+20 3 . G CT 999 PASS INDEL;AN=4;AC=2 GT 0/1 0/1
+20 3 . GATG GACT 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0
+20 5 . TGGG TAC,TG,TGGGG,AC . PASS INDEL;AN=4;AC=2,2,0,0 GT:PL:DP 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1
+20 59 . AG . 999 PASS AN=4 GT:PL:DP 0/0:0:4 0/0:0:4
+20 80 . CACAG CACAT 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 81 . A C 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG ACACCG 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG AAAAAA 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAA,CAAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2,2 GT:PL:DP 1/2:0,3,5,3,5,5:1 1/2:0,3,5,3,5,5:1
+20 274 . AAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 275 . A C,G 999 PASS INDEL;AN=2;AC=0,2 GT:PL:DP:FGF:FGI:FGS:FSTR 2:0,0,0:0:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC:WORD 2:0,0,0:0:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC:WORD
+20 278 . AAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+3 10 . GTGGAC GTGGACACAC,GTGGACAC,GTGGACACACAC,GTGG,GTGGACACACACAC,ATGGACACACAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+3 15 . CACA CAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+4 21 . ATTTTTTTTTTTTTTTC ATTTTTTTTTTTTTTC,ATTTTTTTTTTTTTTTT,ATTTTTTTTTTTTTTTTC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+5 22 . A AGA 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
b
diff -r 000000000000 -r 3b6cd8086498 test-data/plugin1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/plugin1.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,36 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##test=<ID=4,IE=5>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 . C T 59.2 PASS . GT:GQ ./.:245 ./.:245
+1 3000151 . C T 59.2 PASS . GT:DP:GQ ./.:32:245 ./.:32:245
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;INDEL;STR=test GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 2:0,1:409:35:-20,-5,-20
+1 3106154 . CAAA C 342 PASS . GT:GQ:DP ./.:245:32 ./.:245:32
+1 3106154 . C CT 59.2 PASS . GT:GQ:DP ./.:245:32 ./.:245:32
+1 3157410 . GA G 90.6 q10 . GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA G 60.2 PASS . GT:GQ:DP ./.:212:22 ./.:212:22
+1 3177144 . G T 45 PASS . GT:GQ:DP ./.:150:30 ./.:150:30
+1 3177144 . G . 45 PASS . GT:GQ:DP ./.:150:30 ./.:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS . GT:GQ:DP ./.:12:10 ./.:12:10
+2 3199812 . G GTT,GT 82.7 PASS . GT:GQ:DP ./.:322:26 ./.:322:26
+3 3212016 . CTT C,CT 79 PASS . GT:GQ:DP ./.:91:26 ./.:91:26
+4 3258448 . TACACACAC T 59.9 PASS . GT:GQ:DP ./.:325:31 ./.:325:31
b
diff -r 000000000000 -r 3b6cd8086498 test-data/plugin2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/plugin2.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,13 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype code">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Read depth for each allele">
+##FORMAT=<ID=ADF,Number=R,Type=Integer,Description="Read depth for each allele on the forward strand">
+##FORMAT=<ID=ADR,Number=R,Type=Integer,Description="Read depth for each allele on the reverse strand">
+##INFO=<ID=ANN,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|SIFT|PolyPhen|gnomAD_AF|gnomAD_AFR_AF|gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|gnomAD_NFE_AF|gnomAD_OTH_AF|gnomAD_SAS_AF|CLIN_SIG|SOMATIC|PHENO|CAROL|Condel">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR
+chr12 247923 . A G . PASS ANN=G|missense_variant|MODERATE|IQSEC3|ENSG00000120645|Transcript|ENST00000326261|protein_coding|4/15||||1394|1394|465|D/G|gAt/gGt|rs75677829||1||HGNC|29193|tolerated(0.23)|benign(0.015)|0.08|0.2314|0.05139|0.1233|0.01977|0.09129|0.08025|0.07976|0.07557||||Neutral(0.763)|neutral(0.042),G|missense_variant|MODERATE|IQSEC3|ENSG00000120645|Transcript|ENST00000382841|protein_coding|3/13||||897|485|162|D/G|gAt/gGt|rs75677829||1||HGNC|29193|tolerated_low_confidence(0.32)|benign(0.036)|0.08|0.2314|0.05139|0.1233|0.01977|0.09129|0.08025|0.07976|0.07557||||Neutral(0.659)|neutral(0.026),G|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-598F7.4|ENSG00000249695|Transcript|ENST00000505893|antisense||7/9||||||||rs75677829||-1||Clone_based_vega_gene||||0.08|0.2314|0.05139|0.1233|0.01977|0.09129|0.08025|0.07976|0.07557|||||,G|downstream_gene_variant|MODIFIER|RP11-598F7.4|ENSG00000249695|Transcript|ENST00000508953|antisense||||||||||rs75677829|777|-1||Clone_based_vega_gene||||0.08|0.2314|0.05139|0.1233|0.01977|0.09129|0.08025|0.07976|0.07557|||||,G|missense_variant|MODERATE|IQSEC3|ENSG00000120645|Transcript|ENST00000538872|protein_coding|4/14||||1512|1394|465|D/G|gAt/gGt|rs75677829||1||HGNC|29193|tolerated(0.23)|benign(0.015)|0.08|0.2314|0.05139|0.1233|0.01977|0.09129|0.08025|0.07976|0.07557||||Neutral(0.763)|neutral(0.042) GT:GQ:DP:AD:ADF:ADR 0/1:.:14:8,6:7,6:1,0 0/1:.:41:12,29:10,25:2,4
+chr12 284058 . T C . PASS ANN=C|synonymous_variant|LOW|IQSEC3|ENSG00000120645|Transcript|ENST00000326261|protein_coding|14/15||||3408|3408|1136|G|ggT/ggC|rs216230&COSV58289061||1||HGNC|29193|||0.9443|0.9477|0.967|0.9298|0.8657|0.944|0.9597|0.9377|0.9158||0&1|0&1||,C|downstream_gene_variant|MODIFIER|IQSEC3|ENSG00000120645|Transcript|ENST00000382841|protein_coding||||||||||rs216230&COSV58289061|3562|1||HGNC|29193|||0.9443|0.9477|0.967|0.9298|0.8657|0.944|0.9597|0.9377|0.9158||0&1|0&1||,C|non_coding_transcript_exon_variant|MODIFIER|IQSEC3|ENSG00000120645|Transcript|ENST00000537151|processed_transcript|3/4||||378|||||rs216230&COSV58289061||1||HGNC|29193|||0.9443|0.9477|0.967|0.9298|0.8657|0.944|0.9597|0.9377|0.9158||0&1|0&1||,C|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-598F7.6|ENSG00000256540|Transcript|ENST00000537295|lincRNA||1/2||||||||rs216230&COSV58289061||-1||Clone_based_vega_gene||||0.9443|0.9477|0.967|0.9298|0.8657|0.944|0.9597|0.9377|0.9158||0&1|0&1||,C|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-598F7.6|ENSG00000256540|Transcript|ENST00000537961|lincRNA||1/1||||||||rs216230&COSV58289061||-1||Clone_based_vega_gene||||0.9443|0.9477|0.967|0.9298|0.8657|0.944|0.9597|0.9377|0.9158||0&1|0&1||,C|synonymous_variant|LOW|IQSEC3|ENSG00000120645|Transcript|ENST00000538872|protein_coding|14/14||||3526|3408|1136|G|ggT/ggC|rs216230&COSV58289061||1||HGNC|29193|||0.9443|0.9477|0.967|0.9298|0.8657|0.944|0.9597|0.9377|0.9158||0&1|0&1||,C|downstream_gene_variant|MODIFIER|IQSEC3|ENSG00000120645|Transcript|ENST00000540907|retained_intron||||||||||rs216230&COSV58289061|3426|1||HGNC|29193|||0.9443|0.9477|0.967|0.9298|0.8657|0.944|0.9597|0.9377|0.9158||0&1|0&1||,C|upstream_gene_variant|MODIFIER|IQSEC3|ENSG00000120645|Transcript|ENST00000544511|processed_transcript||||||||||rs216230&COSV58289061|1590|1||HGNC|29193|||0.9443|0.9477|0.967|0.9298|0.8657|0.944|0.9597|0.9377|0.9158||0&1|0&1|| GT:GQ:DP:AD:ADF:ADR 1/1:.:13:0,13:0,5:0,8 1/1:.:48:0,48:0,14:0,34
+chr12 285027 . C A . PASS ANN=A|3_prime_UTR_variant|MODIFIER|IQSEC3|ENSG00000120645|Transcript|ENST00000326261|protein_coding|14/15||||4377|||||rs7960096||1||HGNC|29193||||||||||||||||,A|downstream_gene_variant|MODIFIER|IQSEC3|ENSG00000120645|Transcript|ENST00000382841|protein_coding||||||||||rs7960096|4531|1||HGNC|29193||||||||||||||||,A|downstream_gene_variant|MODIFIER|IQSEC3|ENSG00000120645|Transcript|ENST00000537151|processed_transcript||||||||||rs7960096|502|1||HGNC|29193||||||||||||||||,A|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-598F7.6|ENSG00000256540|Transcript|ENST00000537295|lincRNA||1/2||||||||rs7960096||-1||Clone_based_vega_gene|||||||||||||||||,A|intron_variant&non_coding_transcript_variant|MODIFIER|RP11-598F7.6|ENSG00000256540|Transcript|ENST00000537961|lincRNA||1/1||||||||rs7960096||-1||Clone_based_vega_gene|||||||||||||||||,A|3_prime_UTR_variant|MODIFIER|IQSEC3|ENSG00000120645|Transcript|ENST00000538872|protein_coding|14/14||||4495|||||rs7960096||1||HGNC|29193||||||||||||||||,A|downstream_gene_variant|MODIFIER|IQSEC3|ENSG00000120645|Transcript|ENST00000540907|retained_intron||||||||||rs7960096|4395|1||HGNC|29193||||||||||||||||,A|upstream_gene_variant|MODIFIER|IQSEC3|ENSG00000120645|Transcript|ENST00000544511|processed_transcript||||||||||rs7960096|621|1||HGNC|29193|||||||||||||||| GT:GQ:DP:AD:ADF:ADR 0/1:.:25:15,10:10,7:5,3 0/1:.:19:8,11:3,8:5,3
b
diff -r 000000000000 -r 3b6cd8086498 test-data/plugin_frameshifts.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/plugin_frameshifts.bed Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,1 @@
+1 3106150 3106160
b
diff -r 000000000000 -r 3b6cd8086498 test-data/query.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query.out Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,13 @@
+1 3062915 GTTT G 1,2,3,4 4 0/1 GTTT/G 0/1 GTTT/G
+1 3062915 G C,T 1,2,3,4 4 0/1 G/C 0/2 G/T
+1 3062915 GTT G 1,2,3,4 4 0/1 GTT/G 0/1 GTT/G
+1 3106154 CAAA C . 0 . . ./. ./.
+1 3106154 C CT . 4 0/1 C/CT 0/1 C/CT
+1 3157410 G T . 4 1/1 T/T 1/1 T/T
+1 3162006 GAA G . 4 0/1 GAA/G 0/1 GAA/G
+1 3177144 G . . 4 0/0 G/G 0/0 G/G
+1 3184885 TAAAA TA,T . 4 1/2 TA/T 1/2 TA/T
+2 3199812 G GTT,GT . 4 1/2 GTT/GT 1/2 GTT/GT
+3 3212016 CTT C,CT . 4 1/2 C/CT 1/2 C/CT
+4 3258448 TACACACAC T . 4 0/1 TACACACAC/T 0/1 TACACACAC/T
+4 3258449 A C . 4 1/1 C/C 0/1 A/C
b
diff -r 000000000000 -r 3b6cd8086498 test-data/query.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,33 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##contig=<ID=2,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT C D
+1 3062915 id3D GTTT G 48.7 q10 DP4=1,2,3,4;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:109:25:-10,-5,-20
+1 3062915 idSNP G C,T 419 test TEST=5;DP4=1,2,3,4;AN=4;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 0/2:0,1:109:35:-10,-5,-20,-20,-5,-20
+1 3062915 id2D GTT G 999 q10 DP4=1,2,3,4;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:109:25:-10,-5,-20
+1 3106154 . CAAA C 72.6 PASS AN=0;AC=0 GT:GQ:DP .:245:32 ./.:145:22
+1 3106154 . C CT 459 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:145:22
+1 3157410 . G T 46.7 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:11:11
+1 3162006 . GAA G 206 PASS AN=4;AC=2 GT:GQ:DP 0/1:212:22 0/1:112:12
+1 3177144 . G . 364 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:20
+1 3184885 . TAAAA TA,T 8.42 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:22:20 1/2:12:10
+2 3199812 . G GTT,GT 291 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:122:16
+3 3212016 . CTT C,CT 52.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:11:16
+4 3258448 . TACACACAC T 123 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:125:11
+4 3258449 . A C 123 PASS AN=4;AC=3 GT:GQ:DP 1/1:325:31 0/1:125:11
b
diff -r 000000000000 -r 3b6cd8086498 test-data/reheader.hdr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reheader.hdr Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,19 @@
+##fileformat=VCFv4.2
+##INFO=<ID=DP2,Number=2,Type=Integer,Description="Depth">
+##FILTER=<ID=Test,Description="Test filter">
+##FORMAT=<ID=DP2,Number=2,Type=Integer,Description="Depth">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=12,length=123456789>
+##contig=<ID=20,length=2147483647>
+##contig=<ID=5,length=2147483647>
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=3,length=2147483647>
+##contig=<ID=4,length=2147483647>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AAA0001 BBB0002
b
diff -r 000000000000 -r 3b6cd8086498 test-data/reheader.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reheader.samples Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,2 @@
+AAA
+BBB
b
diff -r 000000000000 -r 3b6cd8086498 test-data/reheader.samples2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reheader.samples2 Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,2 @@
+XY00002 BBB
+XY00001 AAA
b
diff -r 000000000000 -r 3b6cd8086498 test-data/reheader.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reheader.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,54 @@
+##fileformat=VCFv4.1
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test String in FORMAT">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=3,length=2147483647>
+##contig=<ID=4,length=2147483647>
+##contig=<ID=5,length=2147483647>
+##contig=<ID=20,length=2147483647>
+##FILTER=<ID=Test,Description="Test Filter">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
+2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+2 114 . TC TTCC,TTC 999 PASS INDEL;AN=4;AC=2,2 GT:DP 1/2:1 1/2:1
+2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+20 3 . G CT 999 PASS INDEL;AN=4;AC=2 GT 0/1 0/1
+20 3 . GATG GACT 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0
+20 5 . TGGG TAC,TG,TGGGG,AC . PASS INDEL;AN=4;AC=2,2,0,0 GT:PL:DP 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1
+20 59 . AG . 999 PASS AN=4 GT:PL:DP 0/0:0:4 0/0:0:4
+20 80 . CACAG CACAT 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 81 . A C 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG ACACCG 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG AAAAAA 999 Test AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAA,CAAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2,2 GT:PL:DP 1/2:0,3,5,3,5,5:1 1/2:0,3,5,3,5,5:1
+20 274 . AAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 278 . AAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+3 10 . GTGGAC GTGGACACAC,GTGGACAC,GTGGACACACAC,GTGG,GTGGACACACACAC,ATGGACACACAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+3 15 . CACA CAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+4 21 . ATTTTTTTTTTTTTTTC ATTTTTTTTTTTTTTC,ATTTTTTTTTTTTTTTT,ATTTTTTTTTTTTTTTTC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+5 22 . A AGA 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
b
diff -r 000000000000 -r 3b6cd8086498 test-data/roh.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/roh.vcf Sat Jul 23 13:49:21 2022 +0000
b
b'@@ -0,0 +1,8895 @@\n+##fileformat=VCFv4.2\n+##reference=test\n+##contig=<ID=1,length=249250621>\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample\n+1\t14907\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t14930\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t15211\t.\tT\tG\t.\tPASS\t.\tGT\t0/1\n+1\t16378\t.\tT\tC\t.\tPASS\t.\tGT\t0/1\n+1\t69511\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t129285\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t133129\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t133483\t.\tG\tT\t.\tPASS\t.\tGT\t0/1\n+1\t136048\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t753405\t.\tC\tA\t.\tPASS\t.\tGT\t1/1\n+1\t753425\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t753474\t.\tC\tG\t.\tPASS\t.\tGT\t0/1\n+1\t808631\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t808922\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t808928\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t814668\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t824094\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t866319\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t876499\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t880238\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t881627\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t881918\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t883625\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t887560\t.\tA\tC\t.\tPASS\t.\tGT\t1/1\n+1\t887801\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t888639\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t888659\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t889158\t.\tG\tC\t.\tPASS\t.\tGT\t1/1\n+1\t892745\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t894573\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t897325\t.\tG\tC\t.\tPASS\t.\tGT\t1/1\n+1\t897564\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t898323\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t900505\t.\tG\tC\t.\tPASS\t.\tGT\t0/1\n+1\t908823\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t909238\t.\tG\tC\t.\tPASS\t.\tGT\t0/1\n+1\t909309\t.\tT\tC\t.\tPASS\t.\tGT\t0/1\n+1\t909419\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t909555\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t909768\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t911916\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t914876\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t915227\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t943468\t.\tT\tC\t.\tPASS\t.\tGT\t0/1\n+1\t943687\t.\tG\tC\t.\tPASS\t.\tGT\t1/1\n+1\t948870\t.\tC\tG\t.\tPASS\t.\tGT\t1/1\n+1\t948921\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t949235\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t949608\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t949654\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t949925\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t951628\t.\tA\tT\t.\tPASS\t.\tGT\t1/1\n+1\t957640\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t957898\t.\tG\tT\t.\tPASS\t.\tGT\t0/1\n+1\t977203\t.\tG\tC\t.\tPASS\t.\tGT\t1/1\n+1\t977330\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t977570\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t979748\t.\tA\tT\t.\tPASS\t.\tGT\t0/1\n+1\t980460\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t981087\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t982722\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t982941\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t982994\t.\tT\tC\t.\tPASS\t.\tGT\t0/1\n+1\t985797\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t986038\t.\tG\tC\t.\tPASS\t.\tGT\t0/1\n+1\t986286\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t986443\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t987200\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t1019841\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t1021415\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t1102563\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t1119657\t.\tG\tC\t.\tPASS\t.\tGT\t1/1\n+1\t1158631\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t1225579\t.\tG\tC\t.\tPASS\t.\tGT\t0/1\n+1\t1229286\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t1246004\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t1249187\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t1254136\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t1254436\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t1254443\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t1254841\t.\tC\tG\t.\tPASS\t.\tGT\t1/1\n+1\t1262591\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t1262966\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t1263144\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t1263457\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t1268847\t.\tT\tG\t.\tPASS\t.\tGT\t1/1\n+1\t1269554\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t1269888\t.\tC\tA\t.\tPASS\t.\tGT\t0/1\n+1\t1275291\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t1277533\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t1278237\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t1284490\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t1289911\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t1293764\t.\tG\tC\t.\tPASS\t.\tGT\t1/1\n+1\t1296691\t.\tC\tG\t.\tPASS\t.\tGT\t0/1\n+1\t1296818\t.\tC\tG\t.\tPASS\t.\tGT\t1/1\n+1\t1308982\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t1314172\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t1314245\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t1387667\t.\tC\tG\t.\tPASS\t.\tGT\t1/1\n+1\t1395983\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t1469546\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t1478880\t.\tT\tG\t.\tPASS\t.\tGT\t1/1\n+1\t1479057\t.\tA\tT\t.\tPASS\t.\tGT\t0/1\n+1\t1479082\t.\tA\tT\t.\tPASS\t.\tGT\t0/1\n+1\t1558792\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t1560103\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t1562437\t.\tC\tG\t.\tPASS\t.\tGT\t0/1\n+1\t1562536\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t1562895\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t1574019\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t1574076\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t1575616\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t1575836\t.\tC\tG\t.\tPASS\t.\tGT\t0/1\n+1\t1576636\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t1580738\t.\tT\tA\t.\tPASS\t.\tGT\t0/1\n+1\t1581096\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1'..b'.\tGT\t0/1\n+1\t248112836\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248112915\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248113026\t.\tT\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248113040\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248113121\t.\tC\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248113206\t.\tC\tG\t.\tPASS\t.\tGT\t1/1\n+1\t248128929\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248129240\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248138006\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t248138013\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248138217\t.\tT\tG\t.\tPASS\t.\tGT\t1/1\n+1\t248138520\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248138531\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248138577\t.\tC\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248138588\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248138642\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248138848\t.\tG\tC\t.\tPASS\t.\tGT\t1/1\n+1\t248153578\t.\tT\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248153613\t.\tA\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248154224\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248166472\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248166526\t.\tT\tC\t.\tPASS\t.\tGT\t0/1\n+1\t248167268\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248185883\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248202344\t.\tG\tC\t.\tPASS\t.\tGT\t0/1\n+1\t248224099\t.\tT\tC\t.\tPASS\t.\tGT\t0/1\n+1\t248224216\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248224294\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248224398\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248224569\t.\tG\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248224754\t.\tT\tC\t.\tPASS\t.\tGT\t0/1\n+1\t248247308\t.\tA\tC\t.\tPASS\t.\tGT\t0/1\n+1\t248247463\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248262644\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248285441\t.\tG\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248285487\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248343543\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248343945\t.\tC\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248343990\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t248344442\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248366702\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248367415\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248402758\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248436611\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248436616\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t248436805\t.\tT\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248436972\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248457979\t.\tC\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248458717\t.\tC\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248458974\t.\tT\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248486895\t.\tG\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248486913\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t248487300\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248487338\t.\tC\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248487638\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248487768\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248511928\t.\tG\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248512064\t.\tT\tG\t.\tPASS\t.\tGT\t1/1\n+1\t248512094\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248512637\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248512749\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248512767\t.\tG\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248513023\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248513088\t.\tG\tC\t.\tPASS\t.\tGT\t1/1\n+1\t248513191\t.\tA\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248550976\t.\tT\tG\t.\tPASS\t.\tGT\t1/1\n+1\t248569282\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248569369\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248570440\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248604452\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t248604465\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248604542\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248604614\t.\tA\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248604765\t.\tT\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248604937\t.\tT\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248605395\t.\tT\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248637046\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248651867\t.\tC\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248651905\t.\tA\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248651927\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t248651959\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248685253\t.\tT\tC\t.\tPASS\t.\tGT\t0/1\n+1\t248722722\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t248722723\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248722777\t.\tT\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248722883\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t248722888\t.\tC\tG\t.\tPASS\t.\tGT\t1/1\n+1\t248737329\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248789454\t.\tG\tC\t.\tPASS\t.\tGT\t1/1\n+1\t248789504\t.\tT\tC\t.\tPASS\t.\tGT\t0/1\n+1\t248790075\t.\tA\tG\t.\tPASS\t.\tGT\t0/1\n+1\t248801556\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248801566\t.\tT\tG\t.\tPASS\t.\tGT\t1/1\n+1\t248801592\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248801897\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248802763\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t248813135\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248813729\t.\tA\tC\t.\tPASS\t.\tGT\t0/1\n+1\t248814052\t.\tT\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248814080\t.\tA\tC\t.\tPASS\t.\tGT\t0/1\n+1\t248814126\t.\tG\tA\t.\tPASS\t.\tGT\t1/1\n+1\t248845097\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248845411\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t248845458\t.\tC\tT\t.\tPASS\t.\tGT\t1/1\n+1\t248845471\t.\tT\tC\t.\tPASS\t.\tGT\t1/1\n+1\t248845499\t.\tA\tG\t.\tPASS\t.\tGT\t1/1\n+1\t249107065\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t249110906\t.\tG\tC\t.\tPASS\t.\tGT\t0/1\n+1\t249149747\t.\tT\tC\t.\tPASS\t.\tGT\t0/1\n+1\t249150116\t.\tG\tC\t.\tPASS\t.\tGT\t0/1\n+1\t249150330\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n+1\t249210707\t.\tG\tA\t.\tPASS\t.\tGT\t0/1\n+1\t249211350\t.\tT\tC\t.\tPASS\t.\tGT\t0/1\n+1\t249230768\t.\tC\tT\t.\tPASS\t.\tGT\t0/1\n'
b
diff -r 000000000000 -r 3b6cd8086498 test-data/stats.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats.a.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.2
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C
+1 1000 . G A . PASS . GT 0|0 0|1 1|1
+1 1001 . G A . PASS . GT 0/0 0|1 1|1
+1 1002 . G A . PASS . GT 0|0 0|1 1|1
b
diff -r 000000000000 -r 3b6cd8086498 test-data/stats.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats.b.vcf Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.2
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C
+1 1000 . G A . PASS . GT 0/1 0/0 0/0
+1 1001 . G A . PASS . GT 0/0 0/0 0/0
+1 1002 . G A . PASS . GT 0/0 0/1 0/0
b
diff -r 000000000000 -r 3b6cd8086498 test-data/summary.pdf
b
Binary file test-data/summary.pdf has changed
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/23andme.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/23andme.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,14 @@
+>1
+CACGTNACGGCTGAAGTCCAAGGTAC
+CGTATCGAGTTCACAGTCGATAGCTC
+GATCGATAGCATCGCTAGCNNNACTA
+CGATCGATCGCTCTCCGTAACACTCA
+AAAACGATCGATCGACTGCTCTTTAG
+CGATGACTTTAGGGGAAAAA
+>2
+CGCTCAGCCGTACAGCCGAGCAGGAC
+ACGCTATTTTAGATCGACTGGCTNNG
+CGCTAGCTACGCTTTAGCACGAGAA
+>Y
+NNNGCATACGTGTCCATCACGATGAT
+AGCGATGATCGATC
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/23andme.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/23andme.fa.fai Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,3 @@
+1 150 3 26 27
+2 77 162 26 27
+Y 40 245 26 27
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/consensus.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/consensus.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,20 @@
+>1:2-501
+TACCATATGTGACATATAAAAAAGAACATAACCTACGTATCAACTAAAGTGGTTGTTTGC
+AGAAAAGGAAGACTTAAAAAGAGTCAGTACTAACCTACATAATATATACAATGTTCATTA
+AATAATAAAATGAGCTCATCATACTTAGGTCATCATAAATATATCTGAAATTCACAAATA
+TTGATCAAATGGTAAAATAGACAAGTAGATTTTAATAGGTTAAACAATTACTGATTCTCT
+TGAAAGAATAAATTTAATATGAGACCTATTTCATTATAATGAACTCACAAATTAGAAACT
+TCACACTGGGGGCTGGAGAGATGGCTCAGTAGTTAAGAACACTGACTGCTCTTCTGAAGG
+TCCTGAGTTCAAATCCCAGCAACCACATGGTGACTTACAACCATCTGTAATGACATCTGA
+TGCCCTCTGGTGTGTCTGAAGACAGCTACAGTGTACTTACATAAAATAATAAATAAATCT
+TTAAAAACAAAAAAAAAGAA
+>2
+GAAGATCTTTTCCTTATTAAGGATCTGAAGCTCTGTAGATTTGTATTCTATTAAACATGG
+AGAGATTAGTGATTTTCCATATTCTTTAAGTCATTTTAGAGTAATGTGTTCTTAAGATAA
+ATCAGAAAAACAAAAACTTGTGCTTTCCTGTTTGAAAAACAAACAGCTGTGGGGAATGGT
+GTCGGGACAGCCTTTTTATAAAATTTTTCTAAATAATGTTGAGGCTTTGATACGTCAAAG
+TTATATTTCAAATGGAATCACTTAGACCTCGTTTCTGAGTGTCAATGGCCATATTGGGGA
+TTTGCTGCTGCCAATGACAGCACACCCTGGGAATGCCCCAACTACTTACTACAAAGCAGT
+GTTACATGGAGAAGATCTTCAAGAGTCTTTTTGCTAGATCTTTCCTTGGCTTTTGATGTG
+ACTCCTCTCAATAAAATCCACAGTAATATAGTGAGTGGTCTCCTGCTCCAAACCAGTATT
+TCAGACACAGTTAATCCAGAC
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/consensus.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/consensus.fa.fai Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,2 @@
+1:2-501 500 9 60 61
+2 501 521 60 61
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/csq.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/csq.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,34 @@
+>1
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+>2
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+>3
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/csq.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/csq.fa.fai Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,3 @@
+1 600 3 60 61
+2 600 616 60 61
+3 600 1229 60 61
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/gvcf.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gvcf.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,9 @@
+>22
+NNNNNNNNCCTTGGCCAAGTCACTTCCTCCTTCAGGAACATTGCAGTGGGCCTAAGTGCC
+TCCTCTCGGGACTGGTATGGGGACGGTCATGCAATCTGGACAACATTCACCTTTAAAAGT
+TTATTGATCTTTTGTGACATGCACGTGGGTTCCCAGTAGCAAGAAACTAAAGGGTCGCAG
+GCCGGTTTCTGCTAATTTCTTTAATTCCAAGACAGTCTCAAATATTTTCTTATTAACTTC
+CTGGAGGGAGGCTTATCATTCTCTCTTTTGGATGATTCTAAGTACCAGCTAAAATACAGC
+TATCATTCATTTTCCTTGATTTGGGAGCCTAATTTCTTTAATTTAGTATGCAAGAAAACC
+AATTTGGAAATATCAACTGTTTTGGAAACCTTAGACCTAGGTCATCCTTAGTAAGATCTT
+CCCATTTATATAAATACTTGCAAGTAGTAGTGCCATAATT
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/gvcf.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gvcf.fa.fai Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,1 @@
+22 460 4 60 61
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/mpileup.ref.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/mpileup.ref.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,71 @@
+>17 17:1-4200
+AAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAA
+TGTGCTCTGGGGTCTCTGGGGTCTCACCCACGACCAACTCCCTGGGCCTGGCACCAGGGA
+GCTTAACAAACATCTGTCCAGCGAATACCTGCATCCCTAGAAGTGAAGCCACCGCCCAAA
+GACACGCCCATGTCCAGCTTAACCTGCATCCCTAGAAGTGAAGGCACCGCCCAAAGACAC
+GCCCATGTCCAGCTTATTCTGCCCAGTTCCTCTCCAGAAAGGCTGCATGGTTGACACACA
+GTGCCTGCGACAAAGCTGAATGCTATCATTTAAAAACTCCTTGCTGGTTTGAGAGGCAGA
+AAATGATATCTCATAGTTGCTTTACTTTGCATATTTTAAAATTGTGACTTTCATGGCATA
+AATAATACTGGTTTATTACAGAAGCACTAGAAAATGCATGTGGACAAAAGTTGGGATTAG
+GAGAGAGAAATGAAGACATATGTCCACACAAAAACCTGTTCATTGCAGCTTTCTACCATC
+ACCAAAAATTGCAAACAACCACACGCCCTTCAACTGGGGAACTCATCAACAACAAACTTG
+TGGTTTACCCACACAATGGAAGACCACTTAGCAACAAAAAGGACCAAACTCCTGGTACAT
+GCAACTGACAGATGAATCTCAAACGCATTCCTCCGTGTGAAAGAAGCCGGACTCACAGGG
+CAACACACTATCTGACTGTTTCATGGGAAAGTCTGGAAACGGCAACACCATTGAGACAGA
+AAACAGGTGAGTGGTTGCCTGGGGCCAGGGAACTTTCTGGGGTCATATTCTCTGTGTTGA
+TTCTGGTGGTGGAAACAAGACTGTCCCAGCCTGGGTGATACAGCGAGACCCCATCTCTAC
+CAAAAAATTAAAAATTAGCTGGGCATGGTGGTGCATGCCTGTAGTCCCAGCTATTCACAG
+TGCTGAGGTGGGAAGATGCTTGAGCCCAGGAGTTCAAGGCTGCAATGAGCTATGATTGCG
+CCACTGCACTTTGGCCTGGACAACAGAGCAAAACCCTGTCTCTAAAAAAAGAAAAGAAAA
+GAAAAACTCACTGGATATGAATGATACAGGTTGAGGATCCATTATCTGAAATGCTTGGAC
+CAGATGTTTTGAATTTTGGATTTTTTCATATTTTGTAATCTTTGCAGTATATTTACCAGT
+TCAGCATCCCTAACTCAAAAATTCAAAAATCTGAAATCCCAAACGCGCCAATAAGCATTC
+CCTTTGAGCGTCATGTCGGTGCTTGGAATGTTTGGGGTTTTGGATTTACAGCTTTGGGAC
+GCTCAACCTGTACCTCAATAAACCTGATTTTAAAAAAGTTTGGGGGGATTCCCCTAAGCC
+CGCCACCCGGAGACAGCGGATTTCCTTAGTTACTTACTATGCTCCTTGGCCATTTCTCTA
+GGTATTGGTATATTGTGTCTGCTGTGAACTGTCCTTGGCCTGTTTGGTGACGGGTGAGGA
+GCAGGGACAGAAGGGTCCTGCGTGCCCTGCCTTCACAAGCCCCTGGAAGGAAAGTTGTTT
+TGGGATCTCTGCACCCTCAGCCTGGACAACTTGTGCCCATCTGGTGACCCCTCACTCAGC
+CACCAGACTTCCACGACAGGCTCCAGCCTCGGCACCTTCAGCCATGGACAGTTCCGCCAG
+CGTTGCCCTCTGTTCTGCTGTTTTCTCTACCAGAAGTGCCCTTCCCTCCTCACCTGACCA
+CTCTGGGGAAATCCCTCAGCACCCTCCCTGAGCATACCCTACTCTGGCACAAGCCCACCC
+TGCAAAGCCCCTGAGGCCCGCCCTGTGGCGTCTCTCCCTCCCTTGCTGTCAGGACAGTGG
+TCCTGGCCACCGGGGCTCACGGAGCCGCCCTGTGCCGTGTACCTCTGAGCCCTCTGCACA
+GTGCCTTCTGCTTGCCTGTGGCTTTGAGAAGAAACCCCTTCTGGTTATACATAAGACAGC
+CAGAGAAGGGAGTTGCCCAGGGTGGCACAGCACGTTGCTGCCAGTTACTGCCATTTTCAC
+GGGCATGAAATGGAGATAACAACAGGAGCGACCGCACAGGCTGCTGAGCGCGTCACACGC
+AGCCATCGCGCAGCTCAGGGATATTACGTGTAACTCGACATGTCAGCGATTGTCACAGGC
+ACTGCTACTCCTGGGGTTTTCCATCAAACCCTCAAGAGCTGGGCCTGGGGTCAACTTCCG
+GCCTGGGGAAACTGGGGCAAGTATCACCAGAGATGAGCTTTATAAAAATAATGGTGCTAG
+CTGGGCATGGTGGCTTGCACCTGTAATCCCAGCACTTTGGGAGGCCGAGCTAGGAGGATC
+GTTTGAGTCCAGCAGTTTGAGACCAGCCTGGCCAATACGGCAAAACCCAGTCTCTACAAA
+AAATACAAAAAACAACTAGCCAGGCGTGGTGGTGCACACCTGTAGTCCCAGCTACTCAGG
+AGGCTGAGGGGGAAGGACTGCTTGAGCCCAGGAGTTTGAGGCTGCTGTGAGCTGTGATCG
+CATCACTGCATTCCAGCCCGGTGACAGAGTGAGTCACTGTCTCAAAAAAGAAAGGAAGAA
+ATAAAGAAAACAAATAAAAATAATAGTGCAGACAAAAGGCCTTGACCCATCTAGCTTTGG
+CCCTCAGCATCAACCGCTAGATACGTCCCTCCCTTTCTTCTGGGGCACAGGTCACACTCT
+CTTCCAGGTCTAGGATGCAGCTGAGGGGTGCCCCTCTTACCATCTAATCTGTGCCCTTAT
+TTCCTCTGCTTTAGTGAGGAAGAGGCCCCTGGTCCATGAAGGGGCCTTTCAGAGACGGGG
+ACCCCTGAGGAGCCCCGAGCAGCAGCCGTCGTGTCTCACCCAGGGTGTCTGAAACAGATG
+TGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCT
+TTGAAAGGCCACGTGACCTGGCCCACGGCTGGCAGGTGGGACCCAGCTGCAGGGGTCCAG
+CAGCACCCACAGCAGCCACCTGTGGCAGGGAGGAGCTTGTGGTACAGTGGACAGGCCCTG
+CCCAGATGGCCCCCCGCCTGCCTGTGGAAGTTGACCAGACCATCTGTCACAGCAGGTAAG
+ACTCTGCTTTCTGGGCAACCCAGCAGGTGACCCTGGAATTCCTGTCCATCTGGCAGGTGG
+GCATTGAAACTGGTTTAAAAATGTCACACCATAGGCCGGGCACAGTGGCTCACGCCTGTA
+ATCCCAGCCCTTTGGGAGGCCAGGGTGGGTGGATCACTTGAGGTCAGGAGTTCAAGACCA
+GCCTGGCCAACATGGTGAAACCCCGTCTACTAAAAATACAAAAATTAGCCTGGCGTGGTG
+GCGCATGCCTGTAATCCCAGCTACTTGGGAAGCTGAGGGATGAGAACTGCTTGAACCTGG
+GAGGCAGACGTTGCAGTGAGCTGAGATCACGCCACTGCACTCCAGCCTGGGCAACAGAGT
+AAGACTCTGTCTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCC
+TGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAGAAAAAACATATAT
+ATACGCAAACCAGTATCCTACTGTGTGTGTCGTTTGTTGTGTTTTCGACAGCTGTCCGTG
+TTATAATAATTCCTCTAGTTCAAATTTATTCATTTTTAACTTCATAGTACCACATTCTAC
+ACACTGCCCATGTCCCCTCAAGCTTCCCCTGGCTCCTGCAACCACAAATCTACTCTCTGC
+CTCTGTGGGTTGACCTATTCTGGACACGTCATAGAAATAGAGTCCTGCAACACGTGGCCG
+TCTGTGTCTGGCTTCTCTCGCTTAGCATCTTGTTTCCAAGGTCCTCCCACAGTGTAGCAT
+GCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAT
+GGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACACACCCGCT
+ACACTCCTTCTTAGGGCTGATATTCCACGCACCCGCTACACTCCTTCTTAGGGCTGATAT
+TCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTT
+CTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCAC
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/mpileup.ref.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/mpileup.ref.fa.fai Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,1 @@
+17 4200 14 60 61
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/norm.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/norm.fa Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,26 @@
+>20 20:1339000-1339300
+AGGATGGGGCTCATTAATAGAGCTCCACTTGTCTCCAGAATCACTGGTGAGGAAGGGGAG
+TGTTGCCCCCACATTCGTGCACAGCAGGGATGGTTCACCGAACTCCACACCAGTCTCTGC
+AGAGCCTGTTGGGGAGAGGAGGGCTGTGGTTTCTTTGATGGTGTTCACCTGGAGTAGAGC
+AAGTATTGTCAAAAGGGTCATCCTCGGAGGTTGCAGTGAGCCGAGATCGCACCATTGCAC
+TGCAGCCTGGGAGACAGAGCAAGACTCCATCTCAAAAAAAAAAAAAAAAAAAAAGGCCAT
+C
+>1 1:10143-10443
+CTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACC
+CTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAAACCCTA
+ACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCAACCCCAACCCCAACCCCA
+ACCCTAACCCCTAACCCTAACCCTAACCCTACCCTAACCCTAACCCTAACCCTAACCCTA
+ACCCTAACCCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC
+>2 1:1382388-1382602
+GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTA
+TTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTT
+GAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTG
+CAAGCTCCACCTCCTGGGTTCACGCCATTCTCCTG
+>3 madeup
+ACTGGACACGTGGACACACACACACACACACACACACACACAGTCAAACCACCTACCAGA
+>4 20:8917026-8917085
+TCCCCTCTTGACCTCTCTCTATTTTTTTTTTTTTTTCTGAGATGGATTTTTGCTCTTGTT
+>5 20:18724313-18724343
+GTCTCAAAAAAAAAAAAAAAAAAAAGAAAAG
+>21
+TTTATTATTATTATTATTAAATTGAATTTATTTAGTGTACATACATTCATGTGTATTGTG
b
diff -r 000000000000 -r 3b6cd8086498 test-data/test-cache/norm.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/norm.fa.fai Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,7 @@
+20 301 23 60 61
+1 300 347 60 61
+2 215 673 60 61
+3 60 902 60 61
+4 60 985 60 61
+5 31 1070 31 32
+21 60 1106 60 61
b
diff -r 000000000000 -r 3b6cd8086498 test-data/view.GL.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/view.GL.vcf Sat Jul 23 13:49:21 2022 +0000
[
@@ -0,0 +1,29 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##reference=file:///seq/references/1000Genomes-NCBI37.fasta
+##contig=<ID=11,length=135006516>
+##contig=<ID=20,length=63025520>
+##contig=<ID=X,length=155270560>
+##contig=<ID=Y,length=59373566>
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="List of Phred-scaled genotype likelihoods">
+##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]">
+##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
+##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
+11 2343543 . A . 999 PASS . GL 0,-25.5,-25.5 0,-25.5,-25.5 0,-25.5,-25.5
+11 5464562 . C T 999 PASS . GL 0,0,0 0,0,0 0,0,0
+20 76962 rs6111385 T C 999 PASS . GL -25.5,0,-25.5 -25.5,-25.5,0 -25.5,-25.5,0
+20 126310 . ACC A 999 StrandBias;EndDistBias . GL -25.5,0,-13.2 -25.5,0,-13.9 -25.5,-21.3,0
+20 138125 rs2298108 G T 999 PASS . GL -13.5,0,-16.3 -14,0,-25.5 -25.5,-19.9,0
+20 138148 rs2298109 C T 999 PASS . GL -19.5,0,-25.5 -19.2,0,-25.5 -25.5,-23.5,0
+20 271225 . T TTTA,TA 999 StrandBias . GL -15.1,-5.3,-20.3,0,-5.2,-15.9 -25.5,0,-21.3,-25.5,-25.5,-25.5 -25.5,-25.5,-25.5,-25.5,0,-24.1
+20 304568 . C T 999 PASS . GL -9.5,0,-25.5 -19.2,0,-25.5 -25.5,-9.5,0
+20 326891 . A AC 999 PASS . GL -25.5,0,-13.2 -25.5,0,-13.9 .,.,.
+X 2928329 rs62584840 C T 999 PASS . GL 0,-5.6 0,-8.1 -7.3,0,-1.9
+X 2933066 rs61746890 G C 999 PASS . GL 0,-25.5 0,-25.5 -25.5,-25.5,-25.5
+X 2942109 rs5939407 T C 999 PASS . GL 0,-25.5 -25.5,0 -25.5,-15.7,0
+X 3048719 . T C 999 PASS . GL 0,-25.5 -25.5,0 -25.5,0,-15.7
+Y 8657215 . C A 999 PASS . GL 0,-25.5 -25.5,0 .
+Y 10011673 rs78249411 G A 999 MinAB . GL -12.6,-10.1 -9.5,0 .
b
diff -r 000000000000 -r 3b6cd8086498 test-data/view.bcf
b
Binary file test-data/view.bcf has changed
b
diff -r 000000000000 -r 3b6cd8086498 test-data/view.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/view.vcf Sat Jul 23 13:49:21 2022 +0000
[
@@ -0,0 +1,46 @@
+##fileformat=VCFv4.1
+##reference=file:///seq/references/1000Genomes-NCBI37.fasta
+##contig=<ID=11,length=135006516>
+##contig=<ID=20,length=63025520>
+##contig=<ID=X,length=155270560>
+##contig=<ID=Y,length=59373566>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of reads containing spanning deletions">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest contiguous homopolymer run of variant allele in either direction">
+##INFO=<ID=HWE,Number=1,Type=Float,Description="Hardy-Weinberg equilibrium test (PMID:15789306)">
+##INFO=<ID=ICF,Number=1,Type=Float,Description="Inbreeding coefficient F">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=IS,Number=2,Type=Float,Description="Maximum number of reads supporting an indel and fraction of indel reads">
+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total mapping quality zero reads">
+##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant confidence/quality by depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
+##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]">
+##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
+##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
+11 2343543 . A . 999 PASS DP=100223 GT:PL:DP:GQ 0/0:0,255,255:193:99 0/0:0,255,255:211:99 0/0:0,255,255:182:99
+11 5464562 . C T 999 PASS DP=0 GT:PL:DP:GQ ./.:0,0,0:.:. ./.:0,0,0:.:. ./.:0,0,0:.:.
+20 76962 rs6111385 T C 999 PASS DP4=110138,70822,421911,262673;DP=911531;Dels=0;FS=21.447;HWE=0.491006;ICF=-0.01062;MQ0=1;MQ=46;PV4=2.5e-09,0,0,1;QD=22.31 GT:PL:DP:GQ 0/1:255,0,255:193:99 1/1:255,255,0:211:99 1/1:255,255,0:182:99
+20 126310 . ACC A 999 StrandBias;EndDistBias DP4=125718,95950,113812,80890;DP=461867;HWE=0.24036;ICF=0.01738;INDEL;IS=374,0.937343;MQ=49;PV4=9e-30,1,0,3.8e-13;QD=0.0172;AN=6;AC=4 GT:DP:GQ:PL 0/1:117:99:255,0,132 0/1:111:99:255,0,139 1/1:78:99:255,213,0
+20 138125 rs2298108 G T 999 PASS DP4=174391,20849,82080,4950;DP=286107;Dels=0;FS=3200;HWE=0.199462;ICF=0.01858;MQ0=0;MQ=46;PV4=0,0,0,1;QD=17.22;AN=6;AC=4 GT:PL:DP:GQ 0/1:135,0,163:66:99 0/1:140,0,255:71:99 1/1:255,199,0:66:99
+20 138148 rs2298109 C T 999 PASS DP4=194136,45753,94945,14367;DP=356657;Dels=0;FS=3200;HWE=0.177865;ICF=0.0198;MQ0=0;MQ=47;PV4=0,0,0,1;QD=14.57;AN=6;AC=4 GT:PL:DP:GQ 0/1:195,0,255:87:99 0/1:192,0,255:82:99 1/1:255,235,0:78:99
+20 271225 . T TTTA,TA 999 StrandBias DP4=29281,42401,27887,29245;DP=272732;INDEL;IS=95,0.748031;MQ=47;PV4=0,1,0,1;QD=0.0948;AN=6;AC=2,2 GT:DP:GQ:PL 0/2:33:49:151,53,203,0,52,159 0/1:51:99:255,0,213,255,255,255 1/2:47:99:255,255,255,255,0,241
+20 304568 . C T 999 PASS DP4=16413,4543,945,156;DP=43557;Dels=0;FS=3200;HWE=0.076855;ICF=0.0213;MQ0=0;MQ=50;PV4=0,0,0,1;QD=15.45;AN=6;AC=4 GT:PL:DP:GQ 0|1:95,0,255:90:99 0|1:192,0,255:13:99 1|1:255,95,0:60:99
+20 326891 . A AC 999 PASS DP4=125718,95950,113812,80890;DP=461867;HWE=0.24036;ICF=0.01738;INDEL;IS=374,0.937343;MQ=49;PV4=9e-30,1,0,3.8e-13;QD=0.0172;AN=4;AC=2 GT:DP:GQ:PL 0|1:117:99:255,0,132 0|1:111:99:255,0,139 ./.:.:.:.,.,.
+X 2928329 rs62584840 C T 999 PASS DP4=302,9137,32,1329;DP=11020;Dels=0;FS=13.38;HWE=0.284332;ICF=0.0253;MQ0=0;MQ=49;PV4=0.094,0,0,1;QD=18.61;AN=4;AC=1 GT:PL:DP:GQ 0:0,56:2:73 0:0,81:3:98 0/1:73,0,19:4:30
+X 2933066 rs61746890 G C 999 PASS DP4=69865,100561,461,783;DP=173729;Dels=0;FS=10.833;MQ0=0;MQ=50;PV4=0.005,3.6e-14,0,1;QD=15.33;AN=4;AC=1 GT:PL:DP:GQ 0:0,255:39:99 0:0,255:37:99 0/1:255,255,255:62:99
+X 2942109 rs5939407 T C 999 PASS DP4=23273,27816,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=4;AC=3 GT:PL:DP:GQ 0:0,255:20:99 1:255,0:33:99 1/1:255,157,0:52:99
+X 3048719 . T C 999 PASS DP4=13263,27466,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=4;AC=3 GT:PL:DP:GQ 0:0,255:20:99 1:255,0:33:99 0|1:255,0,157:52:99
+Y 8657215 . C A 999 PASS DP4=74915,114274,1948,2955;DP=195469;Dels=0;FS=3.181;MQ0=0;MQ=50;PV4=0.86,1,0,1;QD=33.77;AN=2;AC=1 GT:PL:DP:GQ 0:0,255:47:99 1:255,0:64:99 .
+Y 10011673 rs78249411 G A 999 MinAB DP4=47351,30839,178796,279653;DP=550762;Dels=0;FS=41.028;MQ0=37362;MQ=26;PV4=0,0,0,1;QD=17.45;AN=2;AC=2 GT:PL:DP:GQ 1:126,101:146:37 1:95,0:130:99 .
b
diff -r 000000000000 -r 3b6cd8086498 tool-data/fasta_indexes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id> <dbkey> <display_name> <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
b
diff -r 000000000000 -r 3b6cd8086498 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Location of SAMTools indexes for FASTA files -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 3b6cd8086498 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Sat Jul 23 13:49:21 2022 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of FASTA index ffiles for testing -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/fasta_indexes.loc" />
+    </table>
+</tables>