Repository 'bcftools_query'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/bcftools_query

Changeset 0:2fea169065ec (2016-07-06)
Next changeset 1:1fc5e47cb4a3 (2016-07-13)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit ef90c4602bdb83ea7455946c9d175ea27284e643
added:
README.md
bcftools_query.xml
macros.xml
test-data/23andme.fa
test-data/annotate.hdr
test-data/annotate.tab
test-data/annotate.vcf
test-data/annotate2.tab
test-data/annotate2.vcf
test-data/annotate3.vcf
test-data/annotate4.vcf
test-data/annots.vcf
test-data/annots2.vcf
test-data/annots4.tab
test-data/annots4.vcf
test-data/check.vcf
test-data/concat.1.a.vcf
test-data/concat.1.b.vcf
test-data/concat.2.a.vcf
test-data/concat.2.b.vcf
test-data/concat.3.0.vcf
test-data/concat.3.a.vcf
test-data/concat.3.b.vcf
test-data/concat.3.c.vcf
test-data/concat.3.d.vcf
test-data/concat.3.e.vcf
test-data/concat.3.f.vcf
test-data/consensus.1.chain
test-data/consensus.1.out
test-data/consensus.fa
test-data/consensus.tab
test-data/consensus.vcf
test-data/convert.23andme
test-data/convert.gs.gt.gen
test-data/convert.gs.gt.samples
test-data/convert.gvcf.vcf
test-data/convert.hls.gt.hap
test-data/convert.hls.gt.legend
test-data/convert.hls.gt.samples
test-data/convert.hs.gt.hap
test-data/convert.hs.gt.samples
test-data/convert.vcf
test-data/filter.1.vcf
test-data/filter.2.vcf
test-data/filter.3.vcf
test-data/filter.4.vcf
test-data/fixploidy.ploidy
test-data/fixploidy.samples
test-data/fixploidy.vcf
test-data/gvcf.fa
test-data/isec.a.vcf
test-data/isec.b.vcf
test-data/merge.2.a.vcf
test-data/merge.2.b.vcf
test-data/merge.3.a.vcf
test-data/merge.3.b.vcf
test-data/merge.4.a.vcf
test-data/merge.4.b.vcf
test-data/merge.a.vcf
test-data/merge.b.vcf
test-data/merge.c.vcf
test-data/mpileup.X.vcf
test-data/mpileup.c.X.vcf
test-data/mpileup.c.vcf
test-data/mpileup.ploidy
test-data/mpileup.samples
test-data/mpileup.tab
test-data/mpileup.vcf
test-data/norm.fa
test-data/norm.merge.vcf
test-data/norm.setref.vcf
test-data/norm.split.vcf
test-data/norm.vcf
test-data/plugin1.vcf
test-data/query.filter.vcf
test-data/query.out
test-data/query.vcf
test-data/reheader.hdr
test-data/reheader.samples
test-data/reheader.samples2
test-data/reheader.vcf
test-data/stats.a.vcf
test-data/stats.b.vcf
test-data/vcf2sex.vcf
test-data/view.GL.vcf
test-data/view.bcf
test-data/view.bcf.csi
test-data/view.filter.vcf
test-data/view.vcf
tool_dependencies.xml
b
diff -r 000000000000 -r 2fea169065ec README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Wed Jul 06 07:08:15 2016 -0400
[
@@ -0,0 +1,68 @@
+# bcftools (v1.3)
+
+Copied from branch bcftools1.2:
+
+This aims to be a "faithful" rendering of the bcftool suite. I.e. options are
+presented essentially as closely to the command line version as is useful. 
+
+This may not appeal to all, if you'd like to see smaller and more dedicated
+tools (e.g. "intersect", "union" and "complement" being separate tools instead
+of all of them included in the "isec" tool,) please feel free to file an issue.
+
+Updated for bcftools v1.3
+
+This was extended from the bcftools1.2 branch then greatly hand edited to 
+group params and manage param innteractions.
+
+In the macros.xml there are macros and tokens to handle file input and output.  
+These use the datatypes currently available in galaxy: Vcf and Bcf
+The macros take care of bgzip and indexing of inputs.  
+
+The convert command was split into 2 tools, "convert to vcf" and "convert from vcf"
+
+## TODO:
+
+- stats needs a matplotlib tool dependency  and pdflatex for generating a pdf of plots
+- cnv needs a matplotlib tool dependency for generating images, then a means to consolidate those.
+- cnv needs an input.vcf for testing, runs with bcftools cnv -s "HG00101" -o 'HG00101/' -p 5 mpileup.vcf
+- roh needs a more useful input.vcf for testing
+- plugin color chrs
+- plugin frameshifts 
+
+## Status
+
+The wrappers were automatically generated in bulk. That doesn't get them 100%
+of the way there (e.g. meaningful test cases), so the rest of the process is a
+bit slower.
+
+- [x] annotate
+- [x] call
+- [ ] cnv (needs real test data, needs plotting)
+- [x] concat
+- [x] consensus
+- [x] convert from vcf
+- [x] convert to vcf
+- [x] filter
+- [x] gtcheck
+- [x] isec
+- [x] merge
+- [x] norm
+- [x] query
+- [x] query list samples
+- [x] reheader
+- [x] roh
+- [x] stats (needs plotting)
+- [x] view
+- [ ] +color chrs
+- [x] +counts
+- [x] +dosage
+- [x] +fill an ac
+- [x] +fill tags
+- [x] +fixploidy
+- [ ] +frameshifts
+- [x] +impute info
+- [x] +mendelian
+- [x] +missing2ref
+- [x] +setgt
+- [x] +tag2tag
+- [x] +vcf2sex
b
diff -r 000000000000 -r 2fea169065ec bcftools_query.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bcftools_query.xml Wed Jul 06 07:08:15 2016 -0400
[
@@ -0,0 +1,126 @@
+<?xml version='1.0' encoding='utf-8'?>
+<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@VERSION@.0">
+    <description>Extracts fields from VCF/BCF file and prints them in user-defined format</description>
+    <macros>
+        <token name="@EXECUTABLE@">query</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="aggressive"><![CDATA[
+@PREPARE_ENV@
+@PREPARE_INPUT_FILES@
+#set $section = $sec_restrict
+@PREPARE_TARGETS_FILE@
+
+bcftools @EXECUTABLE@
+
+## Query section
+#set $section = $sec_query
+  --format '${section.format}'
+
+${section.allow_undef_tags}
+
+${section.print_header}
+
+## Default section
+#set $section = $sec_restrict
+@INCLUDE@
+@EXCLUDE@
+@COLLAPSE@
+@REGIONS@
+@SAMPLES@
+@TARGETS@
+
+## Primary Input/Outputs
+--vcf-list @INPUT_LIST_FILE@
+> $output_file
+]]>
+    </command>
+    <inputs>
+        <expand macro="macro_inputs" />
+        <section name="sec_restrict" expanded="false" title="Restrict to">
+            <expand macro="macro_collapse" />
+            <expand macro="macro_samples" />
+            <expand macro="macro_regions" />
+            <expand macro="macro_targets" />
+            <expand macro="macro_include" />
+            <expand macro="macro_exclude" />
+        </section>
+        <section name="sec_query" expanded="true" title="Query Options">
+            <param name="format" type="text" value="" label="Format for Output">
+                <help>
+                <![CDATA[ 
+                Example: %CHROM\t%POS\t%REF\t%ALT{0}\n
+                ( NOTE TAB: '\t' and new line character: '\n' )
+                ]]>
+                </help>
+                <sanitizer sanitize="False"/>
+                <validator type="empty_field" />
+            </param>
+            <param name="tsv" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Output is tab-delimited"
+                   help="Fields in your Format are separated by a TAB character: \t"/>
+            <param name="allow_undef_tags" type="boolean" truevalue="--allow-undef-tags" falsevalue="" label="Allow Undefined Tags" 
+                   help="Print &quot;.&quot; for undefined tags" />
+            <param name="print_header" type="boolean" truevalue="--print-header" falsevalue="" label="Print Header Line" />
+        </section>
+
+    </inputs>
+    <outputs>
+        <data name="output_file" format="txt">
+            <change_format>
+                <when input="tsv" value="True" format="tabular" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_files" ftype="vcf" value="query.vcf" />
+            <param name="format" value="%CHROM\t%POS\t%REF\t%ALT\t%DP4\t%AN[\t%GT\t%TGT]\n" />
+            <param name="tsv" value="True" />
+            <output name="output_file" file="query.out" />
+        </test>
+    </tests>
+    <help><![CDATA[ 
+=====================================
+ bcftools @EXECUTABLE@
+=====================================
+
+Extracts fields from VCF/BCF file and prints them in user-defined format
+
+  Format:
+
+  ::
+
+    ``%CHROM``          The CHROM column (similarly also other columns: POS, ID, REF, ALT, QUAL, FILTER)
+    ``%INFO/TAG``       Any tag in the INFO column
+    ``%TYPE``           Variant type (REF, SNP, MNP, INDEL, OTHER)
+    ``%MASK``           Indicates presence of the site in other files (with multiple files)
+    ``%TAG{INT}``       Curly brackets to subscript vectors (0-based)
+    ``%FIRST_ALT``      Alias for %ALT{0}
+    ``[]``              The brackets loop over all samples
+    ``%GT``             Genotype (e.g. 0/1)
+    ``%TGT``            Translated genotype (e.g. C/A)
+    ``%IUPACGT``        Genotype translated to IUPAC ambiguity codes (e.g. M instead of C/A)
+    ``%LINE``           Prints the whole line
+    ``%SAMPLE``         Sample name
+
+  Examples:
+
+  ::
+
+    ``bcftools query -f '%CHROM  %POS  %REF  %ALT{0}\n' file.vcf.gz``
+    ``bcftools query -f '%CHROM\t%POS\t%REF\t%ALT[\t%SAMPLE=%GT]\n' file.vcf.gz``
+
+@COLLAPSE_HELP@
+@REGIONS_HELP@
+@TARGETS_HELP@
+@EXPRESSIONS_HELP@
+
+@BCFTOOLS_MANPAGE@#@EXECUTABLE@
+
+@BCFTOOLS_WIKI@
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r 2fea169065ec macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Jul 06 07:08:15 2016 -0400
[
b'@@ -0,0 +1,674 @@\n+\n+<macros>\n+  <token name="@VERSION@">1.3</token>\n+  <xml name="stdio">\n+    <stdio>\n+      <exit_code range="1:" />\n+      <exit_code range=":-1" />\n+      <regex match="Error:" />\n+      <regex match="Exception:" />\n+    </stdio>\n+  </xml>\n+  <xml name="requirements">\n+    <requirements>\n+      <requirement type="package" version="1.3">bcftools</requirement>\n+      <!-- conda dependency -->\n+      <requirement type="package" version="1.3">htslib</requirement>\n+      <requirement type="package" version="0.2.6">tabix</requirement>\n+      <requirement type="package" version="1.2">samtools</requirement>\n+    </requirements>\n+  </xml>\n+  <xml name="version_command">\n+    <version_command>bcftools 2&gt;&amp;1 | grep \'Version:\'</version_command>\n+  </xml>\n+  \n+  <xml name="citations">\n+    <citations>\n+      <citation type="doi">10.1093/bioinformatics/btp352</citation>\n+      <yield />\n+    </citations>\n+  </xml>\n+  <token name="@BCFTOOLS_WIKI@">https://github.com/samtools/bcftools/wiki</token>\n+  <token name="@BCFTOOLS_MANPAGE@">http://samtools.github.io/bcftools/bcftools.html</token>\n+  <token name="@THREADS@">\n+  --threads \\${GALAXY_SLOTS:-4}\n+  </token>\n+  <token name="@PREPARE_ENV@">\n+<![CDATA[\n+export BCFTOOLS_PLUGINS=`which bcftools | sed \'s,bin/bcftools,libexec/bcftools,\'`;\n+]]>\n+  </token>\n+  <xml name="macro_input">\n+    <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf,bcf_bgzip" label="VCF/BCF Data" />\n+  </xml>\n+  <token name="@PREPARE_INPUT_FILE@">\n+<![CDATA[\n+## May need to symlink input if there is an associated\n+#set $input_vcf = \'input.vcf.gz\'\n+#if $input_file.datatype.file_ext == \'vcf\'\n+  bgzip -c "$input_file" > $input_vcf &&\n+  bcftools index $input_vcf &&\n+#elif $input_file.datatype.file_ext == \'vcf_bgzip\'\n+  ln -s "$input_file" $input_vcf\n+#elif $input_file.datatype.file_ext == \'bcf\'\n+  #set $input_vcf = \'input.bcf\'\n+  ln -s "$input_file" $input_vcf && \n+  #if $input_file.metadata.bcf_index:\n+    ln -s $input_file.metadata.bcf_index ${input_vcf}.csi &&\n+  #else \n+    bcftools index $input_vcf &&\n+  #end if\n+#elif $input_file.datatype.file_ext == \'bcf_bgzip\'\n+  ln -s "$input_file" $input_vcf\n+#end if\n+]]>\n+  </token>\n+  <token name="@INPUT_FILE@">\n+$input_vcf\n+  </token>\n+\n+  <xml name="macro_inputs">\n+    <param name="input_files" type="data" format="vcf,bcf" label="Other VCF/BCF Datasets" multiple="True" />\n+  </xml>\n+  <token name="@PREPARE_INPUT_FILES@">\n+<![CDATA[\n+## May need to symlink input if there is an associated\n+#set $input_vcfs = []\n+#set $vcfs_list_file = \'vcfs_list\'\n+#for (i,input_file) in enumerate($input_files):\n+  #set $input_vcf = \'input\' + str($i) + \'.vcf.gz\'\n+  echo \'$input_vcf\' >> $vcfs_list_file &&\n+  #if $input_file.datatype.file_ext == \'vcf\'\n+    bgzip -c "$input_file" > $input_vcf &&\n+    bcftools index $input_vcf &&\n+  #elif $input_file.datatype.file_ext == \'vcf_bgz\'\n+    ln -s "$input_file" $input_vcf\n+  #elif $input_file.datatype.file_ext == \'bcf\'\n+    #set $input_vcf = \'input\' + str($i) + \'.bcf.gz\'\n+    ## bgzip -c "$input_file" > $input_vcf &&\n+    ln -s "$input_file" $input_vcf &&\n+    #if $input_file.metadata.bcf_index:\n+      ln -s $input_file.metadata.bcf_index ${input_vcf}.csi &&\n+    #else \n+      bcftools index $input_vcf &&\n+    #end if\n+  #elif $input_file.datatype.file_ext == \'bcfvcf_bgz\'\n+    ln -s "$input_file" $input_vcf &&\n+  #end if\n+  $input_vcfs.append($input_vcf)\n+#end for\n+]]>\n+  </token>\n+  <token name="@INPUT_FILES@">\n+#echo \' \'.join($input_vcfs)#\n+  </token>\n+  <token name="@INPUT_LIST_FILE@">\n+$vcfs_list_file\n+  </token>\n+\n+  <xml name="macro_fasta_ref">\n+    <param name="fasta_ref" type="data" format="data" label="Fasta Ref" optional="True" help="reference sequence in fasta format" />\n+  </xml>\n+  <token name="@PREPARE_FASTA_REF@">\n+<![CDATA[\n+#set $input_fa_ref = None\n+#if \'fasta_ref\' in $section and $section.fasta_ref:\n+  #set $input_fa_ref = \'ref.fa\'\n+  ln -s $section.fasta_ref $input_fa_ref &&\n+  samtools faidx $input_'..b'or duplicate positions, only the first  |\n+|            | SNP record will be considered and appear on output.            |\n++------------+----------------------------------------------------------------+\n+| indels     | all indel records are compatible, regardless of whether the    |\n+|            | REF and ALT alleles match or not. For duplicate positions,     |\n+|            | only the first indel record will be considered and appear on   |\n+|            | output.                                                        |\n++------------+----------------------------------------------------------------+\n+| both       | abbreviation of "-c indels  -c snps"                           |\n++------------+----------------------------------------------------------------+\n+| id         | only records with identical ID column are compatible.          |\n+|            | Supportedby bcftools merge only.                               |\n++------------+----------------------------------------------------------------+\n+\n+  </token>\n+\n+  <token name="@EXPRESSIONS_HELP@">\n+      <![CDATA[\n+Expressions\n+-----------\n+\n+Valid expressions may contain:\n+\n+-  numerical constants, string constants\n+\n+   ::\n+\n+      1, 1.0, 1e-4\n+      "String"\n+\n+-  arithmetic operators\n+\n+   ::\n+\n+      +,*,-,/\n+\n+-  comparison operators\n+\n+   ::\n+\n+      == (same as =), >, >=, <=, <, !=\n+\n+-  regex operators "~" and its negation "!~"\n+\n+   ::\n+\n+      INFO/HAYSTACK ~ "needle"\n+\n+-  parentheses\n+\n+   ::\n+\n+      (, )\n+\n+-  logical operators\n+\n+   ::\n+\n+      && (same as &), ||,  |\n+\n+-  INFO tags, FORMAT tags, column names\n+\n+   ::\n+\n+      INFO/DP or DP\n+      FORMAT/DV, FMT/DV, or DV\n+      FILTER, QUAL, ID, REF, ALT[0]\n+\n+-  1 (or 0) to test the presence (or absence) of a flag\n+\n+   ::\n+\n+      FlagA=1 && FlagB=0\n+\n+-  "." to test missing values\n+\n+   ::\n+\n+      DP=".", DP!=".", ALT="."\n+\n+-  missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression\n+\n+   ::\n+\n+      GT="."\n+\n+-  TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other)\n+\n+   ::\n+\n+      TYPE="indel" | TYPE="snp"\n+\n+-  array subscripts, "*" for any field\n+\n+   ::\n+\n+      (DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3\n+      DP4[*] == 0\n+      CSQ[*] ~ "missense_variant.*deleterious"\n+\n+-  function on FORMAT tags (over samples) and INFO tags (over vector fields)\n+\n+   ::\n+\n+      MAX, MIN, AVG, SUM, STRLEN, ABS\n+\n+-  variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes\n+\n+   ::\n+\n+      N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN\n+\n+**Notes:**\n+\n+-  String comparisons and regular expressions are case-insensitive\n+-  If the subscript "*" is used in regular expression search, the whole field\n+   is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be\n+   true for the string vector INFO/STR=AB,CD.\n+-  Variables and function names are case-insensitive, but not tag names. For\n+   example, "qual" can be used instead of "QUAL", "strlen()" instead of\n+   "STRLEN()" , but not "dp" instead of "DP".\n+\n+**Examples:**\n+\n+   ::\n+\n+      MIN(DV)>5\n+      MIN(DV/DP)>0.3\n+      MIN(DP)>10 & MIN(DV)>3\n+      FMT/DP>10  & FMT/GQ>10 .. both conditions must be satisfied within one sample\n+      FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples\n+      QUAL>10 |  FMT/GQ>10   .. selects only GQ>10 samples\n+      QUAL>10 || FMT/GQ>10   .. selects all samples at QUAL>10 sites\n+      TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2)\n+      MIN(DP)>35 && AVG(GQ)>50\n+      ID=@file       .. selects lines with ID present in the file\n+      ID!=@~/file    .. skip lines with ID present in the ~/file\n+      MAF[0]<0.05    .. select rare variants at 5% cutoff\n+\n+]]></token>\n+\n+\n+\n+\n+</macros>\n'
b
diff -r 000000000000 -r 2fea169065ec test-data/23andme.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/23andme.fa Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,14 @@
+>1
+CACGTNACGGCTGAAGTCCAAGGTAC
+CGTATCGAGTTCACAGTCGATAGCTC
+GATCGATAGCATCGCTAGCNNNACTA
+CGATCGATCGCTCTCCGTAACACTCA
+AAAACGATCGATCGACTGCTCTTTAG
+CGATGACTTTAGGGGAAAAA
+>2
+CGCTCAGCCGTACAGCCGAGCAGGAC
+ACGCTATTTTAGATCGACTGGCTNNG
+CGCTAGCTACGCTTTAGCACGAGAA
+>Y
+NNNGCATACGTGTCCATCACGATGAT
+AGCGATGATCGATC
b
diff -r 000000000000 -r 2fea169065ec test-data/annotate.hdr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.hdr Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,4 @@
+##INFO=<ID=T_STR,Number=1,Type=String,Description="Test String">
+##INFO=<ID=T_INT,Number=.,Type=Integer,Description="Test Integer">
+##INFO=<ID=T_FLOAT,Number=.,Type=Float,Description="Test Float">
+##INFO=<ID=INDEL,Number=0,Type=Float,Description="Test Flag">
b
diff -r 000000000000 -r 2fea169065ec test-data/annotate.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.tab Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,18 @@
+3 3212016 CTT C,CT indel_3212016 . . . 1
+4 3258448 TACACACAC T indel_3258448 . . . 1
+4 4000000 T C id1 . . . 1
+4 4000001 T C,A id2 . . . 1
+2 3199812 G GTT,GT indel_3199812 . . . 1
+1 3000150 C CT indel_3000150 . . . 1
+1 3000150 C T snp_3000150 999 1,2 1e-10,2e-10 .
+1 3000151 C T snp_3000151 999 1 2e-10 .
+1 3062915 G T,C snp_3062915 999 1 2e-10 .
+1 3062915 GTTT G indel_3062915 . . . 1
+1 3106154 A C snp_3106154 999 1 2e-10 .
+1 3106154 C CT indel_3106154 . . . 1
+1 3106154 CAAA C indel_3106154 . . . 1
+1 3157410 GA G indel_3157410 . . . 1
+1 3162006 GAA G indel_3162006 . . . 1
+1 3177144 G . ref_3177144 999 1 2e-10 .
+1 3177144 G T snp_3177144 999 1 2e-10 0
+1 3184885 TAAAA TA,T indel_3184885 . . . 1
b
diff -r 000000000000 -r 2fea169065ec test-data/annotate.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,39 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##test=<ID=4,IE=5>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 . C T 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2;INDEL;STR=test GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=3;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 2:0,1:409:35:-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . C CT 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3157410 . GA G 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA G 60.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:212:22 0/1:212:22
+1 3177144 . G T 45 PASS AN=4;AC=2 GT:GQ:DP 0/0:150:30 1/1:150:30
+1 3177144 . G . 45 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 4000000 . T A,C 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 4000001 . T A 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
b
diff -r 000000000000 -r 2fea169065ec test-data/annotate2.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate2.tab Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,2 @@
+2 3000000 3199812 region_3000000_3199812
+1 3000150 3106154 region_3000150_3106154
b
diff -r 000000000000 -r 2fea169065ec test-data/annotate2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate2.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,17 @@
+##fileformat=VCFv4.1
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=FLAG,Number=0,Type=Flag,Description="Test type">
+##INFO=<ID=IINT,Number=1,Type=Integer,Description="Test type">
+##INFO=<ID=IFLT,Number=1,Type=Float,Description="Test type">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test type">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=FINT,Number=1,Type=Integer,Description="Test type">
+##FORMAT=<ID=FFLT,Number=1,Type=Float,Description="Test type">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test type">
+##FILTER=<ID=q11,Description="Quality below 10">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C
+1 3000001 xx C T 11 PASS FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:xxx 0/0:11:1.1:x 0/0:11:1.1:x
+1 3000002 . C T . . . GT . . .
+1 3000003 xx C T 11 q11 FLAG;IINT=.;IFLT=.;ISTR=. GT:FINT:FFLT:FSTR 0/0:.:.:. 0/0:.:.:. 0/0:.:.:.
+1 3000004 xx C T 11 q11 FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:x 0/0:11:1.1:xxx 0/0:11:1.1:xxx
b
diff -r 000000000000 -r 2fea169065ec test-data/annotate3.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate3.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,21 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Genotype LK">
+##FORMAT=<ID=X,Number=1,Type=Integer,Description="rmme">
+##FORMAT=<ID=Y,Number=1,Type=Integer,Description="rmme">
+##FORMAT=<ID=AA,Number=1,Type=Integer,Description="rmme">
+##INFO=<ID=AA,Number=1,Type=Integer,Description="rmme">
+##INFO=<ID=BB,Number=1,Type=Integer,Description="rmme">
+##INFO=<ID=X,Number=1,Type=Integer,Description="rmme">
+##INFO=<ID=Y,Number=1,Type=Integer,Description="rmme">
+##FILTER=<ID=fltA,Description="rmme">
+##FILTER=<ID=fltB,Description="rmme">
+##FILTER=<ID=fltX,Description="rmme">
+##FILTER=<ID=fltY,Description="rmme">
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 id C . 20 . AA=1;BB=2;X=3;Y=4 GT:X:PL:Y:AA 0/1:1:2:3:1 0/1:1:2:3:1
+1 3000001 id C . 20 PASS AA=1;BB=2;X=3;Y=4 GT:X:PL:Y:AA 0/1:1:2:3:1 0/1:1:2:3:1
+1 3000002 id C . 20 fltY;fltA;fltB;fltX BB=2;X=3;Y=4;AA=1 GT:Y:X:PL:AA 0/1:3:1:2:1 0/1:3:1:2:1
b
diff -r 000000000000 -r 2fea169065ec test-data/annotate4.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate4.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,13 @@
+##fileformat=VCFv4.2
+##INFO=<ID=FR,Number=R,Type=Float,Description="test">
+##INFO=<ID=FA,Number=A,Type=Float,Description="test">
+##INFO=<ID=IA,Number=A,Type=Integer,Description="test">
+##INFO=<ID=IR,Number=R,Type=Integer,Description="test">
+##INFO=<ID=SA,Number=A,Type=String,Description="test">
+##INFO=<ID=SR,Number=R,Type=String,Description="test">
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO
+1 1 . C T . . .
+1 2 . C T,G . . FA=.,9.9;FR=.,9.9,.;IA=.,99;IR=.,99,.;SA=.,99;SR=.,99,.
+1 3 . C A,T . . .
b
diff -r 000000000000 -r 2fea169065ec test-data/annots.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,37 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##test=<ID=4,IE=5>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 id1 C T 99 PASS STR=id1;AN=4;AC=0 GT:GQ 0|0:999 0|0:999
+1 3000151 id2 C T 99 PASS STR=id2;AN=4;AC=0 GT:DP:GQ 0|0:99:999 0|0:99:999
+1 3062915 idIndel GTTT G 99 PASS DP4=1,2,3,4;AN=4;AC=0;INDEL;STR=testIndel GT:GQ:DP:GL 0|0:999:99:-99,-9,-99 0|0:999:99:-99,-9,-99
+1 3062915 idSNP G T,C 99 PASS STR=testSNP;TEST=5;DP4=1,2,3,4;AN=3;AC=0,0 GT:TT:GQ:DP:GL 0|0:9,9:999:99:-99,-9,-99,-99,-9,-99 0:9,9:999:99:-99,-9,-99
+1 3106154 id4 CAAA C 99 PASS STR=id4;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3106154 id5 C CT 99 PASS STR=id5;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3157410 id6 GA GC,G 99 PASS STR=id6;AN=4;AC=0 GT:GQ:DP 0|0:99:99 0|0:99:99
+1 3162006 id7 GAA GG 99 PASS STR=id7;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3177144 id8 G T 99 PASS STR=id8;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3177144 id9 G . 99 PASS STR=id9;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3184885 id10 TAAAA TA,T 99 PASS STR=id10;AN=4;AC=0,0 GT:GQ:DP 0|0:99:99 0|0:99:99
+2 3199812 id11 G GTT,GT 99 PASS STR=id11;AN=4;AC=0,0 GT:GQ:DP 0|0:999:99 0|0:999:99
+3 3212016 id12 CTT C,CT 99 PASS STR=id12;AN=4;AC=0,0 GT:GQ:DP 0|0:99:99 0|0:99:99
+4 3258448 id13 TACACACAC T 99 PASS STR=id13;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
b
diff -r 000000000000 -r 2fea169065ec test-data/annots2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots2.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,17 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=q99,Description="Quality below 10">
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=FLAG,Number=0,Type=Flag,Description="Test type">
+##INFO=<ID=IINT,Number=.,Type=Integer,Description="Test type">
+##INFO=<ID=IFLT,Number=.,Type=Float,Description="Test type">
+##INFO=<ID=ISTR,Number=.,Type=String,Description="Test type">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=FINT,Number=.,Type=Integer,Description="Test type">
+##FORMAT=<ID=FFLT,Number=.,Type=Float,Description="Test type">
+##FORMAT=<ID=FSTR,Number=.,Type=String,Description="Test type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT B A
+1 3000001 . C T . . . GT . .
+1 3000002 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
+1 3000003 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
+1 3000004 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
b
diff -r 000000000000 -r 2fea169065ec test-data/annots4.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots4.tab Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,3 @@
+1 1 C A,T,G 0,1.1,0 1.1,0,2.2,0 0,1,0 1,0,2,0 X,11,XXX 1,XX,222,XXX
+1 2 C T,G 1.1,2.2 1.1,2.2,3.3 1,2 1,2,3 11,2 111,22,3
+1 3 C T 1.1 1.1,2.2 1 1,2 11 11,2
b
diff -r 000000000000 -r 2fea169065ec test-data/annots4.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots4.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,13 @@
+##fileformat=VCFv4.2
+##INFO=<ID=FA,Number=A,Type=Float,Description="test">
+##INFO=<ID=FR,Number=R,Type=Float,Description="test">
+##INFO=<ID=IA,Number=A,Type=Integer,Description="test">
+##INFO=<ID=IR,Number=R,Type=Integer,Description="test">
+##INFO=<ID=SA,Number=A,Type=String,Description="test">
+##INFO=<ID=SR,Number=R,Type=String,Description="test">
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO
+1 1 . C A,T,G . . FA=0,1.1,0;FR=1.1,0,2.2,0;IA=0,1,0;IR=1,0,2,0;SA=X,11,XXX;SR=1,XX,222,XXX
+1 2 . C T,G . . FA=1.1,2.2;FR=1.1,2.2,3.3;IA=1,2;IR=1,2,3;SA=11,2;SR=111,22,3
+1 3 . C T . . FA=1.1;FR=1.1,2.2;IA=1;IR=1,2;SA=11;SR=11,2
b
diff -r 000000000000 -r 2fea169065ec test-data/check.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/check.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,38 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="read depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=4;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 0/2:0,1:409:35:-20,-5,-20,-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . G A 59.2 PASS AN=4;AC=1 GT:GQ:DP 0/1:245:32 0/0:245:32
+1 3157410 . G A 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . G A 60.2 PASS AN=4;AC=3 GT:GQ:DP 1/1:212:22 0/1:212:22
+1 3177144 . GT G 45 PASS AN=4;AC=2 GT:GQ:DP 0/1:150:30 0/1:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T 59.9 PASS DP=62;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258449 . GCAAA GA,G 59.9 PASS DP=62;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258450 . AAAAGAAAAAG A,AAAAAAG 59.9 PASS DP=60;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258451 . AAA AGT 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258452 . AAA AGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258453 . AACA AGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258453 . ACA AAGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258454 . AACA AACA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
b
diff -r 000000000000 -r 2fea169065ec test-data/concat.1.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.1.a.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.0
+##FILTER=<ID=Fail,Description="Test">
+##FILTER=<ID=q10,Description="Quality below 10">
+##INFO=<ID=XX,Number=1,Type=Integer,Description="Test">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##contig=<ID=2,length=62435964>
+##contig=<ID=1,length=62435964>
+##samtoolsVersion=0.2.0-rc10+htslib-0.2.0-rc10
+##samtoolsCommand=samtools mpileup -t INFO/DPR -C50 -pm3 -F0.2 -d10000 -ug -r 1:1-1000000 -b mpileup.2014-07-03//lists/chr1-pooled.list -f human_g1k_v37.fasta
+##ALT=<ID=X,Description="Represents allele(s) other than observed.">
+##bcftools_callVersion=0.2.0-rc10-2-gcd94fde+htslib-0.2.0-rc10
+##bcftools_callCommand=call -vm -f GQ -S mpileup.2014-07-03//pooled/1/1:1-1000000.samples -
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 100 . GTTT G 1806 q10 XX=11;DP=35 GT:GQ:DP 0/1:409:35
+1 110 . C T,G 1792 Fail DP=32 GT:GQ:DP 0/1:245:32
+1 110 . CAAA C 1792 Fail DP=32 GT:GQ:DP 0/1:245:32
+1 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+1 130 . G T 1016 Fail DP=22 GT:GQ:DP 0/1:212:22
+1 130 . GAA GG 1016 Fail DP=22 GT:GQ:DP 0/1:212:22
+1 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+1 150 . TAAAA TA,T 246 Fail DP=10 GT:GQ:DP 1/2:12:10
+1 160 . TAAAA TA,T 246 Fail DP=10 GT:GQ:DP 1/2:12:10
+2 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+2 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+2 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+2 130 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+2 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
b
diff -r 000000000000 -r 2fea169065ec test-data/concat.1.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.1.b.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,19 @@
+##fileformat=VCFv4.0
+##samtoolsVersion=0.2.0-rc10+htslib-0.2.0-rc10
+##samtoolsCommand=samtools mpileup -t INFO/DPR -C50 -pm3 -F0.2 -d10000 -ug -r 1:1-1000000 -b mpileup.2014-07-03//lists/chr1-pooled.list -f human_g1k_v37.fasta
+##ALT=<ID=X,Description="Represents allele(s) other than observed.">
+##bcftools_callVersion=0.2.0-rc10-2-gcd94fde+htslib-0.2.0-rc10
+##bcftools_callCommand=call -vm -f GQ -S mpileup.2014-07-03//pooled/1/1:1-1000000.samples -
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FILTER=<ID=q10,Description="Quality below 10">
+##contig=<ID=3,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+3 142 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+3 152 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+3 162 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+3 172 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+3 182 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+3 192 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
b
diff -r 000000000000 -r 2fea169065ec test-data/concat.2.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.2.a.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.0
+##INFO=<ID=XX,Number=1,Type=Integer,Description="Test">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FILTER=<ID=Fail,Description="Fail">
+##contig=<ID=1,length=62435964>
+##contig=<ID=2,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+2 140 . A G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
+1 110 . C T,G 1792 Fail XX=11;DP=32 GT:GQ:DP 0/1:245:32
+1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
b
diff -r 000000000000 -r 2fea169065ec test-data/concat.2.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.2.b.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,24 @@
+##fileformat=VCFv4.0
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FILTER=<ID=q10,Description="Quality below 10">
+##contig=<ID=1,length=62435964>
+##contig=<ID=2,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+1 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+1 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+1 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+1 160 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+2 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+2 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+2 130 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+2 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
b
diff -r 000000000000 -r 2fea169065ec test-data/concat.3.0.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.0.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.0
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FILTER=<ID=q10,Description="Quality below 10">
+##contig=<ID=2,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
b
diff -r 000000000000 -r 2fea169065ec test-data/concat.3.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.a.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,27 @@
+##fileformat=VCFv4.0
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FILTER=<ID=q10,Description="Quality below 10">
+##contig=<ID=9,length=62435964>
+##contig=<ID=1,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+9 202 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0|1:409:35 0|1
+9 212 . C T,G 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+9 212 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+9 222 . GA G 628 q10 DP=21 GT:GQ:DP 0|1:21:21 0|1
+9 232 . G T 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+9 232 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+9 242 . GT G 727 PASS DP=30 GT:GQ:DP 0|1:150:30 0|1
+9 252 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
+9 262 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
+1 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0|1:409:35 0|1
+1 110 . C T,G 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+1 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+1 120 . GA G 628 q10 DP=21 GT:GQ:DP 0|1:21:21 0|1
+1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+1 140 . GT G 727 PASS DP=30 GT:GQ:DP 0|1:150:30 0|1
+1 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
+1 160 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
b
diff -r 000000000000 -r 2fea169065ec test-data/concat.3.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.b.vcf Wed Jul 06 07:08:15 2016 -0400
b
b'@@ -0,0 +1,223 @@\n+##fileformat=VCFv4.0\n+##INFO=<ID=T1,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T2,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T3,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T4,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T5,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T6,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T7,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T8,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T9,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T10,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T11,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T12,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T13,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T14,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T15,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T16,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T17,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T18,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T19,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T20,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T21,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T22,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T23,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T24,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T25,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T26,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T27,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T28,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T29,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T30,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T31,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T32,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T33,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T34,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T35,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T36,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T37,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T38,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T39,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T40,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T41,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T42,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T43,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T44,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T45,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T46,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T47,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T48,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T49,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T50,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T51,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T52,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T53,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T54,Number=1,Type=Integer,Descript'..b'er=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T161,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T162,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T163,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T164,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T165,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T166,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T167,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T168,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T169,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T170,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T171,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T172,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T173,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T174,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T175,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T176,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T177,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T178,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T179,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T180,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T181,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T182,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T183,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T184,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T185,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T186,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T187,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T188,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T189,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T190,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T191,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T192,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T193,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T194,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T195,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T196,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T197,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T198,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=T199,Number=1,Type=Integer,Description="Test Tag: Big header">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">\n+##FILTER=<ID=q10,Description="Quality below 10">\n+##contig=<ID=2,length=62435964>\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tA\tB\n+2\t170\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t180\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t190\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t200\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t210\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t220\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t230\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t240\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t250\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t260\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t270\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t280\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t290\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t300\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t310\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n+2\t320\t.\tT\tA\t246\tPASS\tDP=10\tGT\t1|0\t0|1\n'
b
diff -r 000000000000 -r 2fea169065ec test-data/concat.3.c.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.c.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,22 @@
+##fileformat=VCFv4.0
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FILTER=<ID=q10,Description="Quality below 10">
+##contig=<ID=2,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+2 280 . T A 246 PASS DP=10 GT 1|0 0|1
+2 290 . T A 246 PASS DP=10 GT 1|0 1|0
+2 300 . T A 246 PASS DP=10 GT 0|1 1|0
+2 310 . T A 246 PASS DP=10 GT 0|1 1|0
+2 320 . T A 246 PASS DP=10 GT 0|1 1|0
+2 330 . T A 246 PASS DP=10 GT 0|1 1|0
+2 340 . T A 246 PASS DP=10 GT 0|1 1|0
+2 350 . T A 246 PASS DP=10 GT 0|1 1|0
+2 360 . T A 246 PASS DP=10 GT 0|1 1|0
+2 370 . T A 246 PASS DP=10 GT 0|1 1|0
+2 380 . T A 246 PASS DP=10 GT 0|1 1|0
+2 390 . T A 246 PASS DP=10 GT 0|1 1|0
+2 400 . T A 246 PASS DP=10 GT 0|1 1|0
+2 410 . T A 246 PASS DP=10 GT 0|1 1|0
b
diff -r 000000000000 -r 2fea169065ec test-data/concat.3.d.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.d.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,20 @@
+##fileformat=VCFv4.0
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FILTER=<ID=q10,Description="Quality below 10">
+##contig=<ID=2,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+2 300 . T A 246 PASS DP=10 GT 1|0 1|0
+2 320 . T A 246 PASS DP=10 GT 1|0 1|0
+2 330 . T A 246 PASS DP=10 GT 1|0 1|0
+2 340 . T A 246 PASS DP=10 GT 1|0 1|0
+2 350 . T A 246 PASS DP=10 GT 1|0 1|0
+2 360 . T A 246 PASS DP=10 GT 1|0 1|0
+2 370 . T A 246 PASS DP=10 GT 1|0 1|0
+2 380 . T A 246 PASS DP=10 GT 1|0 1|0
+2 390 . T A 246 PASS DP=10 GT 1|0 1|0
+2 490 . T A 246 PASS DP=10 GT 1|0 1|0
+2 500 . T A 246 PASS DP=10 GT 1|0 1|0
+2 510 . T A 246 PASS DP=10 GT 1|0 1|0
b
diff -r 000000000000 -r 2fea169065ec test-data/concat.3.e.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.e.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,20 @@
+##fileformat=VCFv4.0
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FILTER=<ID=q10,Description="Quality below 10">
+##contig=<ID=2,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+2 310 . T A 246 PASS DP=10 GT 0|1 0|1
+2 320 . T A 246 PASS DP=10 GT 0|1 0|1
+2 330 . T A 246 PASS DP=10 GT 0|1 0|1
+2 340 . T A 246 PASS DP=10 GT 0|1 0|1
+2 350 . T A 246 PASS DP=10 GT 0|1 0|1
+2 360 . T A 246 PASS DP=10 GT 0|1 0|1
+2 370 . T A 246 PASS DP=10 GT 0|1 0|1
+2 380 . T A 246 PASS DP=10 GT 0|1 0|1
+2 390 . T A 246 PASS DP=10 GT 0|1 0|1
+2 490 . T A 246 PASS DP=10 GT 0|1 0|1
+2 500 . T A 246 PASS DP=10 GT 0|1 0|1
+2 510 . T A 246 PASS DP=10 GT 0|1 0|1
b
diff -r 000000000000 -r 2fea169065ec test-data/concat.3.f.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.f.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,20 @@
+##fileformat=VCFv4.0
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FILTER=<ID=q10,Description="Quality below 10">
+##contig=<ID=3,length=62435964>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+3 380 . T A 246 PASS DP=10 GT 1|0 1|0
+3 390 . T A 246 PASS DP=10 GT 1|0 1|0
+3 400 . T A 246 PASS DP=10 GT 1|0 1|0
+3 410 . T A 246 PASS DP=10 GT 1|0 1|0
+3 420 . T A 246 PASS DP=10 GT 1|0 1|0
+3 430 . T A 246 PASS DP=10 GT 1|0 1|0
+3 440 . T A 246 PASS DP=10 GT 1|0 1|0
+3 450 . T A 246 PASS DP=10 GT 1|0 1|0
+3 460 . T A 246 PASS DP=10 GT 1|0 1|0
+3 470 . T A 246 PASS DP=10 GT 1|0 1|0
+3 480 . T A 246 PASS DP=10 GT 1|0 1|0
+3 490 . T A 246 PASS DP=10 GT 1|0 1|0
b
diff -r 000000000000 -r 2fea169065ec test-data/consensus.1.chain
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/consensus.1.chain Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,12 @@
+chain 497 1 501 + 1 501 1 502 + 1 502 1
+11 3 1
+1 0 3
+485
+
+chain 485 2 501 + 0 501 2 495 + 0 495 2
+61 3 1
+54 3 1
+58 0 8
+21 10 0
+291
+
b
diff -r 000000000000 -r 2fea169065ec test-data/consensus.1.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/consensus.1.out Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,20 @@
+>1:2-501
+TACAAAATATGATAAAATCAAAAAGAACATAACCTACGTATCAACTAAAGTGGTTGTTTG
+AAGAAAAGGAAGACTTAAAAAGAGTCAGTACTAACCTACATAATATATACAATGTTCATT
+AAATAATAAAATGAGCTCATCATACTTAGGTCATCATAAATATATCTGAAATTCACAAAT
+ATTGATCAAATGGTAAAATAGACAAGTAGATTTTAATAGGTTAAACAATTACTGATTCTC
+TTGAAAGAATAAATTTAATATGAGACCTATTTCATTATAATGAACTCACAAATTAGAAAC
+TTCACACTGGGGGCTGGAGAGATGGCTCAGTAGTTAAGAACACTGACTGCTCTTCTGAAG
+GTCCTGAGTTCAAATCCCAGCAACCACATGGTGACTTACAACCATCTGTAATGACATCTG
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+TTTAAAAACAAAAAAAAAGAA
+>2
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+AAATTAGTGATTTTCCATATTCTTTAAGTCATTTTAGAGTAATGTGTTCTTAAGATTTCA
+GAAAAACAAAAACTTGTGCTTTCCTGTTTGAAAAACAAACAGCTGTGGGGAATGGACGTA
+CGTTGTCGGGACAGCCTTTTTATAAAATAATGTTGAGGCTTTGATACGTCAAAGNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTTGCT
+GCTGCCAATGACAGCACACCCTGGGAATGCCCCAACTACTTACTACAAAGCAGTGTTACA
+TGGAGAAGATCTTCAAGAGTCTTTTTGCTAGATCTTTCCTTGGCTTTTGATGTGACTCCT
+CTCAATAAAATCCACAGTAATATAGTGAGTGGTCTCCTGCTCCAAACCAGTATTCCAGAC
+ACAGTTAATCCAGAC
b
diff -r 000000000000 -r 2fea169065ec test-data/consensus.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/consensus.fa Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,20 @@
+>1:2-501
+TACCATATGTGACATATAAAAAAGAACATAACCTACGTATCAACTAAAGTGGTTGTTTGC
+AGAAAAGGAAGACTTAAAAAGAGTCAGTACTAACCTACATAATATATACAATGTTCATTA
+AATAATAAAATGAGCTCATCATACTTAGGTCATCATAAATATATCTGAAATTCACAAATA
+TTGATCAAATGGTAAAATAGACAAGTAGATTTTAATAGGTTAAACAATTACTGATTCTCT
+TGAAAGAATAAATTTAATATGAGACCTATTTCATTATAATGAACTCACAAATTAGAAACT
+TCACACTGGGGGCTGGAGAGATGGCTCAGTAGTTAAGAACACTGACTGCTCTTCTGAAGG
+TCCTGAGTTCAAATCCCAGCAACCACATGGTGACTTACAACCATCTGTAATGACATCTGA
+TGCCCTCTGGTGTGTCTGAAGACAGCTACAGTGTACTTACATAAAATAATAAATAAATCT
+TTAAAAACAAAAAAAAAGAA
+>2
+GAAGATCTTTTCCTTATTAAGGATCTGAAGCTCTGTAGATTTGTATTCTATTAAACATGG
+AGAGATTAGTGATTTTCCATATTCTTTAAGTCATTTTAGAGTAATGTGTTCTTAAGATAA
+ATCAGAAAAACAAAAACTTGTGCTTTCCTGTTTGAAAAACAAACAGCTGTGGGGAATGGT
+GTCGGGACAGCCTTTTTATAAAATTTTTCTAAATAATGTTGAGGCTTTGATACGTCAAAG
+TTATATTTCAAATGGAATCACTTAGACCTCGTTTCTGAGTGTCAATGGCCATATTGGGGA
+TTTGCTGCTGCCAATGACAGCACACCCTGGGAATGCCCCAACTACTTACTACAAAGCAGT
+GTTACATGGAGAAGATCTTCAAGAGTCTTTTTGCTAGATCTTTCCTTGGCTTTTGATGTG
+ACTCCTCTCAATAAAATCCACAGTAATATAGTGAGTGGTCTCCTGCTCCAAACCAGTATT
+TCAGACACAGTTAATCCAGAC
b
diff -r 000000000000 -r 2fea169065ec test-data/consensus.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/consensus.tab Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,3 @@
+1 421 480
+2 1   60
+2 241   300
b
diff -r 000000000000 -r 2fea169065ec test-data/consensus.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/consensus.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,18 @@
+##fileformat=VCFv4.2
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##ALT=<ID=DEL,Description="Deletion">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA001
+1 5 . C a . PASS . GT 0/1
+1 5 . C t . PASS . GT 0/1
+1 7 . T a . PASS . GT .
+1 10 . G a . PASS . GT 0/1
+1 12 . GACA ga . PASS . GT 0/1
+1 16 . T taaa . PASS . GT 1/1
+1 19 . A c . PASS . GT 0/1
+1 61 . C a . PASS . GT 0/1
+2 61 . AGAG aa . PASS . GT 0/1
+2 119 . AAA t . PASS . GT 0/1
+2 179 . G gacgtacgt . PASS . GT 0/1
+2 200 . A <DEL> . PASS END=210 GT 1/0
+2 481 . T c,a . PASS . GT 0/2
b
diff -r 000000000000 -r 2fea169065ec test-data/convert.23andme
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.23andme Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,28 @@
+# rsid chromosome position genotype
+rs001 1 2 AA
+rs002 1 10 AG
+rs003 1 14 AG
+rs004 1 24 TC
+rs005 1 44 CG
+rs006 1 53 GG
+rs007 1 60 GG
+rs008 1 62 CC
+rs009 1 75 AA
+rs010 1 80 GG
+rs011 1 89 TT
+rs012 1 96 --
+rs013 1 99 CC
+rs014 1 102 GG
+rs015 1 112 TT
+rs016 2 5 CC
+rs017 2 11 CT
+rs018 2 16 CC
+rs019 2 20 GG
+rs020 2 33 CT
+rs021 2 39 AA
+rs022 2 44 CC
+rs023 2 48 CC
+rs024 2 55 AA
+rs025 2 59 CT
+rs026 Y 12 T
+rs027 Y 20 C
b
diff -r 000000000000 -r 2fea169065ec test-data/convert.gs.gt.gen
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.gs.gt.gen Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,32 @@
+X:2698560_G_A X:2698560_G_A 2698560 G A 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698630_A_G X:2698630_A_G 2698630 A G 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698758_CAA_C X:2698758_CAA_C 2698758 CAA C 1 0 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698769_AAG_A X:2698769_AAG_A 2698769 AAG A 0 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698789_C_G X:2698789_C_G 2698789 C G 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698822_A_C X:2698822_A_C 2698822 A C 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698831_G_A X:2698831_G_A 2698831 G A 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698889_T_C X:2698889_T_C 2698889 T C 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698923_G_A X:2698923_G_A 2698923 G A 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698953_A_AGG X:2698953_A_AGG 2698953 A AGG 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2698954_G_A X:2698954_G_A 2698954 G A 0 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 0
+X:2699002_C_A X:2699002_C_A 2699002 C A 1 0 0 1 0 0 0.33 0.33 0.33 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699025_T_C X:2699025_T_C 2699025 T C 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699091_G_A X:2699091_G_A 2699091 G A 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699187_T_C X:2699187_T_C 2699187 T C 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699188_G_C X:2699188_G_C 2699188 G C 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699189_T_C X:2699189_T_C 2699189 T C 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699217_C_T X:2699217_C_T 2699217 C T 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699246_C_A X:2699246_C_A 2699246 C A 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0
+X:2699275_T_G X:2699275_T_G 2699275 T G 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699350_A_T X:2699350_A_T 2699350 A T 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699360_T_C X:2699360_T_C 2699360 T C 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699450_A_C X:2699450_A_C 2699450 A C 1 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699507_T_C X:2699507_T_C 2699507 T C 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699555_C_A X:2699555_C_A 2699555 C A 1 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 1 0 1 0 0 1 0 1 0 0 0 1 0
+X:2699645_G_T X:2699645_G_T 2699645 G T 1 0 0 0 0 1 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 0
+X:2699676_G_A X:2699676_G_A 2699676 G A 1 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699728_C_T X:2699728_C_T 2699728 C T 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699775_C_A X:2699775_C_A 2699775 C A 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
+X:2699898_C_CT X:2699898_C_CT 2699898 C CT 1 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0
+X:2699968_A_G X:2699968_A_G 2699968 A G 0.5 0.0 0.5 1 0 0 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 0 1 0 0 1 0
+X:2699970_T_C X:2699970_T_C 2699970 T C 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0
b
diff -r 000000000000 -r 2fea169065ec test-data/convert.gs.gt.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.gs.gt.samples Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,12 @@
+ID_1 ID_2 missing
+0 0 0
+NA00001 NA00001 0
+NA00002 NA00002 0
+NA00003 NA00003 0
+NA00004 NA00004 0
+NA00005 NA00005 0
+NA00006 NA00006 0
+NA00007 NA00007 0
+NA00008 NA00008 0
+NA00009 NA00009 0
+NA00010 NA00010 0
b
diff -r 000000000000 -r 2fea169065ec test-data/convert.gvcf.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.gvcf.vcf Wed Jul 06 07:08:15 2016 -0400
[
b'@@ -0,0 +1,105 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping">\n+##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.999 or higher that read contains indicated allele vs all other intersecting indel alleles)">\n+##FORMAT=<ID=DPI,Number=1,Type=Integer,Description="Read depth associated with indel, taken from the site preceding the indel.">\n+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">\n+##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant site block. All sites in a block are constrained to be non-variant, have the same filter value, and have all sample values in range [x,y], y <= max(x+3,(x*1.3)). All printed site block sample values are the minimum observed in the region spanned by the block">\n+##INFO=<ID=SNVSB,Number=1,Type=Float,Description="SNV site strand bias">\n+##INFO=<ID=SNVHPOL,Number=1,Type=Integer,Description="SNV contextual homopolymer length">\n+##INFO=<ID=CIGAR,Number=A,Type=String,Description="CIGAR alignment for each alternate indel allele">\n+##INFO=<ID=RU,Number=A,Type=String,Description="Smallest repeating sequence unit extended or contracted in the indel allele relative to the reference. RUs are not reported if longer than 20 bases.">\n+##INFO=<ID=REFREP,Number=A,Type=Integer,Description="Number of times RU is repeated in reference.">\n+##INFO=<ID=IDREP,Number=A,Type=Integer,Description="Number of times RU is repeated in indel allele.">\n+##FILTER=<ID=IndelConflict,Description="Locus is in region with conflicting indel calls">\n+##FILTER=<ID=SiteConflict,Description="Site genotype conflicts with proximal indel call. This is typically a heterozygous SNV call made inside of a heterozygous deletion">\n+##FILTER=<ID=LowGQX,Description="Locus GQX is less than 30 or not present">\n+##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.3">\n+##FILTER=<ID=HighSNVSB,Description="SNV strand bias value (SNVSB) exceeds 10">\n+##FILTER=<ID=HighREFREP,Description="Locus contains an indel allele occurring in a homopolymer or dinucleotide track with a reference repeat greater than 8">\n+##FILTER=<ID=HighDepth,Description="Locus depth is greater than 3x the mean chromosome depth">\n+##contig=<ID=22,length=450>\n+##SnvTheta=0.001\n+##IndelTheta=0.0001\n+##MaxDepth_1=114.18\n+##MaxDepth_10=131.73\n+##MaxDepth_11=117.27\n+##MaxDepth_12=116.97\n+##MaxDepth_13=102.24\n+##MaxDepth_14=101.55\n+##MaxDepth_15=95.22\n+##MaxDepth_16=111.33\n+##MaxDepth_17=112.59\n+##MaxDepth_18=121.86\n+##MaxDepth_19=111.12\n+##MaxDepth_2=121.83\n+##MaxDepth_20=111.24\n+##MaxDepth_21=98.43\n+##MaxDepth_22=76.23\n+##MaxDepth_3=120.09\n+##MaxDepth_4=124.50\n+##MaxDepth_5=119.82\n+##MaxDepth_6=122.22\n+##MaxDepth_7=120.27\n+##MaxDepth_8=120.45\n+##MaxDepth_9=102.48\n+##MaxDepth_M=7005.66\n+##MaxDepth_X=61.05\n+##MaxDepth_Y=37.17\n+##FILTER=<ID=IndelSizeFilter,Description="Indel is outside reportable size range. Insertion range: [1,3], Deletion range: [1,11]">\n+##gvcftools_version="0.16"\n+##FILTER=<ID=HAPLOID_CONFLICT,Description="Locus has heterozygous genotype in a haploid region.">\n+##FORMAT=<ID=OPL,Number=.,Type=Integer,Description="Original PL value before ploidy correction">\n+##INFO=<ID=phastCons,Number=0,Type=Flag,Description="overlaps a phastCons element">\n+##INFO=<ID=AA,Number=1,T'..b'CLNORIGIN,Number=.,Type=String,Description="String that describes the origin of the variant allele. Possible values: unknown, germline, somatic, inherited, paternal, maternal, de-novo, biparental, uniparental, not-tested, test-inconclusive, other. Multiple values for an allele are pipe-delimited">\n+##INFO=<ID=CLNSRC,Number=.,Type=String,Description="Variant clinical source or channel. Multiple values for an allele are pipe-delimited">\n+##INFO=<ID=CLNSRCID,Number=.,Type=String,Description="Identifier used by source defined in CLNSRC. Multiple values are pipe-delimited">\n+##INFO=<ID=CLNGENEINFO,Number=.,Type=String,Description="Gene symbol(s) and NCBI GeneID(s). The gene symbol and ID are delimited by a colon and multiple such pairs are pipe-delimited, Example SYMBOL1:GeneID1|SYMBOL2:GeneID2">\n+##INFO=<ID=CLNDBN,Number=.,Type=String,Description="Disease name used by the database specified by CLNSRC. Values corresponding to each CLNACC entry are pipe-delimited">\n+##INFO=<ID=CLNDSDB,Number=.,Type=String,Description="Colon-delimited list of variant disease database name(s). Multiple values from a single database are pipe-delimited">\n+##INFO=<ID=CLNDSDBID,Number=.,Type=String,Description="Colon-delimited list of variant disease database identifier(s). Multiple values from a single database are pipe-delimited">\n+##INFO=<ID=CSQ,Number=A,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|EXON|INTRON|HGNC|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE|DISTANCE|CANONICAL|SIFT|PolyPhen|GMAF|ENSP|DOMAINS|CCDS|HGVSc|HGVSp|CELL_TYPE">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE99\n+22\t1\t.\tN\t.\t0\tLowGQX\tEND=9;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t.:.:0:0\n+22\t10\t.\tC\t.\t0\tLowGQX\tEND=20;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:5:2:0\n+22\t21\t.\tC\t.\t0\tLowGQX\tEND=26;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:10:4:0\n+22\t27\t.\tC\t.\t0\tLowGQX\tEND=42;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:15:6:0\n+22\t43\t.\tG\t.\t0\tLowGQX\tEND=50;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:21:8:0\n+22\t51\t.\tC\t.\t0\tPASS\tEND=55;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:30:11:0\n+22\t56\t.\tG\t.\t0\tPASS\tEND=72;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:42:15:0\n+22\t73\t.\tT\t.\t0\tPASS\tEND=85;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:54:19:0\n+22\t86\t.\tG\tC\t23\tLowGQX\tSNVSB=0;SNVHPOL=2\tGT:GQ:GQX:DP:DPF:AD\t0/1:56:23:22:0:16,6\n+22\t87\t.\tT\t.\t0\tPASS\tEND=101;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:69:24:0\n+22\t102\t.\tA\t.\t0\tPASS\tEND=140;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:84:29:0\n+22\t141\t.\tG\t.\t0\tPASS\tEND=185;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:90:31:0\n+22\t186\t.\tT\t.\t0\tPASS\tEND=187;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:93:32:2\n+22\t188\t.\tT\t.\t0\tPASS\tEND=204;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:102:35:0\n+22\t205\t.\tT\t.\t0\tPASS\t.\tGT:GQX:DP:DPF\t0/0:72:36:0\n+22\t206\t.\tT\t.\t0\tPASS\tEND=231;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:87:30:0\n+22\t232\t.\tA\t.\t0\tPASS\tEND=249;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:66:23:0\n+22\t250\t.\tG\t.\t0\tPASS\tEND=257;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:87:30:0\n+22\t258\t.\tA\t.\t0\tPASS\tEND=269;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:114:39:0\n+22\t270\t.\tG\t.\t0\tPASS\tEND=279;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:150:51:0\n+22\t280\t.\tA\t.\t0\tPASS\tEND=314;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:166:63:0\n+22\t315\t.\tC\t.\t0\tPASS\tEND=316;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:220:74:0\n+22\t317\t.\tT\t.\t0\tHighDepth\tEND=342;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:229:77:0\n+22\t343\t.\tT\t.\t0\tHighDepth\tEND=377;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:241:81:0\n+22\t378\t.\tT\t.\t0\tHighDepth\tEND=384;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:200:75:0\n+22\t385\t.\tG\t.\t0\tPASS\tEND=388;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:217:73:0\n+22\t389\t.\tC\t.\t0\tHighDepth\tEND=390;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:223:75:0\n+22\t391\t.\tT\t.\t0\tPASS\t.\tGT:GQX:DP:DPF\t0/0:223:75:1\n+22\t392\t.\tT\t.\t0\tHighDepth\tEND=397;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:223:75:0\n+22\t398\t.\tT\t.\t0\tPASS\tEND=420;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:178:60:0\n+22\t421\t.\tC\t.\t0\tPASS\tEND=450;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:142:54:0\n'
b
diff -r 000000000000 -r 2fea169065ec test-data/convert.hls.gt.hap
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.hls.gt.hap Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,35 @@
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 0 0 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0
+0 0 0 0 ? ? 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0* 1*
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 0 1 1 0 1 1 1 1 0 0 1 0 0 0 1 0 0 1 0
+0 0 0 0 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 - 1 - 1 - 0 - 1 - 1 1 1 0 0 1 0 0 0 1
+0 - 1 - 0 - 0 - 0 - 0 0 0 0 0 1 0 0 0 0
+0 - 0 - 1 - 0 - 1 - 1 0 1 0 0 0 0 0 0 1
+0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+0 - 0 - 1 - 0 - 1 - 1 0 1 0 0 0 0 0 0 1
+? - 0 - 0 - 1 - 0 - 0 1 0 0 0 0 0 1 1 0
+0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
b
diff -r 000000000000 -r 2fea169065ec test-data/convert.hls.gt.legend
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.hls.gt.legend Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,36 @@
+id position a0 a1
+X:2698560_G_A 2698560 G A
+X:2698630_A_G 2698630 A G
+X:2698758_CAA_C 2698758 CAA C
+X:2698769_AAG_A 2698769 AAG A
+X:2698770_AG_A 2698770 AG A
+X:2698770_AG_AAGG 2698770 AG AAGG
+X:2698789_C_G 2698789 C G
+X:2698822_A_C 2698822 A C
+X:2698831_G_A 2698831 G A
+X:2698889_T_C 2698889 T C
+X:2698923_G_A 2698923 G A
+X:2698953_A_AGG 2698953 A AGG
+X:2698954_G_A 2698954 G A
+X:2699002_C_A 2699002 C A
+X:2699025_T_C 2699025 T C
+X:2699091_G_A 2699091 G A
+X:2699187_T_C 2699187 T C
+X:2699188_G_C 2699188 G C
+X:2699189_T_C 2699189 T C
+X:2699217_C_T 2699217 C T
+X:2699246_C_A 2699246 C A
+X:2699275_T_G 2699275 T G
+X:2699350_A_T 2699350 A T
+X:2699360_T_C 2699360 T C
+X:2699450_A_C 2699450 A C
+X:2699507_T_C 2699507 T C
+X:2699555_C_A 2699555 C A
+X:2699645_G_T 2699645 G T
+X:2699676_G_A 2699676 G A
+X:2699728_C_T 2699728 C T
+X:2699775_C_A 2699775 C A
+X:2699898_C_CT 2699898 C CT
+X:2699968_A_G 2699968 A G
+X:2699970_T_C 2699970 T C
+X:2699990_C_<INS:ME:LINE1>_2700054 2699990 C <INS:ME:LINE1>
b
diff -r 000000000000 -r 2fea169065ec test-data/convert.hls.gt.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.hls.gt.samples Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,11 @@
+sample population group sex
+NA00001 NA00001 NA00001 2
+NA00002 NA00002 NA00002 2
+NA00003 NA00003 NA00003 2
+NA00004 NA00004 NA00004 2
+NA00005 NA00005 NA00005 2
+NA00006 NA00006 NA00006 2
+NA00007 NA00007 NA00007 2
+NA00008 NA00008 NA00008 2
+NA00009 NA00009 NA00009 2
+NA00010 NA00010 NA00010 2
b
diff -r 000000000000 -r 2fea169065ec test-data/convert.hs.gt.hap
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.hs.gt.hap Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,35 @@
+X:2698560_G_A X:2698560_G_A 2698560 G A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698630_A_G X:2698630_A_G 2698630 A G 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698758_CAA_C X:2698758_CAA_C 2698758 CAA C 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698769_AAG_A X:2698769_AAG_A 2698769 AAG A 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0
+X:2698770_AG_A X:2698770_AG_A 2698770 AG A 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
+X:2698770_AG_AAGG X:2698770_AG_AAGG 2698770 AG AAGG 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698789_C_G X:2698789_C_G 2698789 C G 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698822_A_C X:2698822_A_C 2698822 A C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698831_G_A X:2698831_G_A 2698831 G A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698889_T_C X:2698889_T_C 2698889 T C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698923_G_A X:2698923_G_A 2698923 G A 1 0 0 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0
+X:2698953_A_AGG X:2698953_A_AGG 2698953 A AGG 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2698954_G_A X:2698954_G_A 2698954 G A 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0
+X:2699002_C_A X:2699002_C_A 2699002 C A 0 0 0 0 ? ? 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2699025_T_C X:2699025_T_C 2699025 T C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2699091_G_A X:2699091_G_A 2699091 G A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2699187_T_C X:2699187_T_C 2699187 T C 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0* 1*
+X:2699188_G_C X:2699188_G_C 2699188 G C 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+X:2699189_T_C X:2699189_T_C 2699189 T C 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+X:2699217_C_T X:2699217_C_T 2699217 C T 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2699246_C_A X:2699246_C_A 2699246 C A 1 0 1 1 0 1 1 1 1 0 0 1 0 0 0 1 0 0 1 0
+X:2699275_T_G X:2699275_T_G 2699275 T G 0 0 0 0 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1
+X:2699350_A_T X:2699350_A_T 2699350 A T 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+X:2699360_T_C X:2699360_T_C 2699360 T C 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+X:2699450_A_C X:2699450_A_C 2699450 A C 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
+X:2699507_T_C X:2699507_T_C 2699507 T C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+X:2699555_C_A X:2699555_C_A 2699555 C A 0 - 1 - 1 - 0 - 1 - 1 1 1 0 0 1 0 0 0 1
+X:2699645_G_T X:2699645_G_T 2699645 G T 0 - 1 - 0 - 0 - 0 - 0 0 0 0 0 1 0 0 0 0
+X:2699676_G_A X:2699676_G_A 2699676 G A 0 - 0 - 1 - 0 - 1 - 1 0 1 0 0 0 0 0 0 1
+X:2699728_C_T X:2699728_C_T 2699728 C T 0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+X:2699775_C_A X:2699775_C_A 2699775 C A 0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+X:2699898_C_CT X:2699898_C_CT 2699898 C CT 0 - 0 - 1 - 0 - 1 - 1 0 1 0 0 0 0 0 0 1
+X:2699968_A_G X:2699968_A_G 2699968 A G ? - 0 - 0 - 1 - 0 - 0 1 0 0 0 0 0 1 1 0
+X:2699970_T_C X:2699970_T_C 2699970 T C 0 - 0 - 0 - 0 - 0 - 0 0 0 0 0 0 0 0 0 0
+X:2699990_C_<INS:ME:LINE1>_2700054 X:2699990_C_<INS:ME:LINE1>_2700054 2699990 C <INS:ME:LINE1> 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1
b
diff -r 000000000000 -r 2fea169065ec test-data/convert.hs.gt.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.hs.gt.samples Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,12 @@
+ID_1 ID_2 missing
+0 0 0
+NA00001 NA00001 0
+NA00002 NA00002 0
+NA00003 NA00003 0
+NA00004 NA00004 0
+NA00005 NA00005 0
+NA00006 NA00006 0
+NA00007 NA00007 0
+NA00008 NA00008 0
+NA00009 NA00009 0
+NA00010 NA00010 0
b
diff -r 000000000000 -r 2fea169065ec test-data/convert.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert.vcf Wed Jul 06 07:08:15 2016 -0400
b
b'@@ -0,0 +1,40 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">\n+##FORMAT=<ID=GP,Number=G,Type=Float,Description="Estimated Genotype Probability">\n+##contig=<ID=X,assembly=b37,length=155270560>\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA00001\tNA00002\tNA00003\tNA00004\tNA00005\tNA00006\tNA00007\tNA00008\tNA00009\tNA00010\n+X\t2698560\t.\tG\tA\t102\t.\t.\tGT:PL:GP\t0|0:0,21,177:1,0,0\t0|0:0,30,206:1,0,0\t0|0:0,21,177:1,0,0\t0|0:0,15,132:1,0,0\t0|0:0,9,90:1,0,0\t0|0:0,15,114:1,0,0\t0|0:0,15,118:1,0,0\t0|0:0,15,133:1,0,0\t0|0:0,15,144:1,0,0\t0|0:0,24,191:1,0,0\n+X\t2698630\t.\tA\tG\t537\t.\t.\tGT:PL:GP\t0|0:0,21,186:1,0,0\t0|0:0,21,176:1,0,0\t0|0:0,15,106:1,0,0\t0|0:0,18,127:1,0,0\t0|0:0,6,62:1,0,0\t0|0:0,15,146:1,0,0\t0|0:0,18,141:1,0,0\t0|0:0,21,173:1,0,0\t0|0:0,12,119:1,0,0\t0|0:0,15,145:1,0,0\n+X\t2698758\t.\tCAA\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,6,16:0.8292,0.1708,0\t0|1:0,0,0:0.0278,0.5743,0.3979\t0|0:0,0,0:0.6336,0.3664,0\t0|0:0,3,8:0.8611,0.1389,0\t0|0:0,0,8:0.7628,0.2372,0\t0|0:0,9,18:1,0,0\t0|0:0,9,23:1,0,0\t0|0:0,9,15:0.9855,0.0145,0\t0|0:0,6,10:1,0,0\t0|0:0,21,33:1,0,0\n+X\t2698769\t.\tAAG\tA\t999\t.\t.\tGT:PL:GP\t1|0:17,0,7:0.0069,0.9931,0\t1|1:0,0,0:0.0004,0.0892,0.9104\t0|1:17,3,0:0.0045,0.9954,0.0001\t1|0:11,0,2:0.0085,0.9915,0\t1|0:11,0,15:0.0003,0.9997,0\t0|0:0,15,40:1,0,0\t0|0:0,9,23:1,0,0\t0|0:0,15,25:0.8474,0.1526,0\t0|0:0,15,34:1,0,0\t0|0:0,33,56:1,0,0\n+X\t2698770\t.\tAG\tA,AAGG\t999\t.\t.\tGT:PL:GP\t0|0:0,12,103,12,103,103:0.925,0.0717,0,0.0033,0,0\t0|1:0,3,21,3,21,21:0.4944,0.368,0.0018,0.1343,0.0013,0.0002\t0|0:0,0,0,0,0,0:0.5458,0.4085,0,0.0457,0,0\t0|0:0,3,36,3,36,36:0.8126,0.1758,0,0.0116,0,0\t1|0:37,0,86,49,92,130:0,1,0,0,0,0\t0|0:0,15,125,15,125,125:1,0,0,0,0,0\t0|0:0,9,105,9,105,105:1,0,0,0,0,0\t0|0:0,15,109,15,109,109:0.9964,0.0034,0,0.0002,0,0\t0|0:0,15,137,15,137,137:1,0,0,0,0,0\t0|0:0,33,215,33,215,215:1,0,0,0,0,0\n+X\t2698789\t.\tC\tG\t153\t.\t.\tGT:PL:GP\t0|0:0,21,152:1,0,0\t0|0:0,21,131:1,0,0\t0|0:0,12,113:1,0,0\t0|0:0,12,104:1,0,0\t0|0:0,21,137:1,0,0\t0|0:0,15,118:0.9999,0.0001,0\t0|0:0,15,111:1,0,0\t0|0:0,24,152:1,0,0\t0|0:0,18,147:1,0,0\t0|0:0,33,183:1,0,0\n+X\t2698822\t.\tA\tC\t85.2\t.\t.\tGT:PL:GP\t0|0:0,21,167:1,0,0\t0|0:0,21,171:1,0,0\t0|0:0,21,158:1,0,0\t0|0:0,18,154:1,0,0\t0|0:0,15,135:1,0,0\t0|0:0,15,132:1,0,0\t0|0:0,21,168:1,0,0\t0|0:0,21,175:1,0,0\t0|0:0,15,142:1,0,0\t0|0:0,21,172:1,0,0\n+X\t2698831\t.\tG\tA\t303\t.\t.\tGT:PL:GP\t0|0:0,15,129:1,0,0\t0|0:0,27,179:1,0,0\t0|0:0,24,196:1,0,0\t0|0:0,21,158:1,0,0\t0|0:0,18,154:1,0,0\t0|0:0,12,112:1,0,0\t0|0:0,24,162:1,0,0\t0|0:0,21,168:1,0,0\t0|0:0,9,95:1,0,0\t0|0:0,21,164:1,0,0\n+X\t2698889\t.\tT\tC\t74.4\t.\t.\tGT:PL:GP\t0|0:0,27,193:1,0,0\t0|0:0,45,255:1,0,0\t0|0:0,21,190:1,0,0\t0|0:0,36,254:1,0,0\t0|0:0,30,226:1,0,0\t0|0:0,36,253:1,0,0\t0|0:0,18,156:1,0,0\t0|0:0,9,87:1,0,0\t0|0:0,9,98:1,0,0\t0|0:0,24,205:1,0,0\n+X\t2698923\t.\tG\tA\t999\t.\t.\tGT:PL:GP\t1|0:62,0,133:0,1,0\t0|1:164,0,91:0,1,0\t0|1:35,0,73:0,1,0\t1|0:91,0,108:0,1,0\t1|0:67,0,71:0,1,0\t0|0:0,30,187:1,0,0\t0|0:0,9,73:1,0,0\t0|0:0,12,99:1,0,0\t0|0:0,18,153:1,0,0\t0|0:0,18,138:1,0,0\n+X\t2698953\t.\tA\tAGG\t267\t.\t.\tGT:PL:GP\t0|0:0,27,111:1,0,0\t0|0:0,33,124:1,0,0\t0|0:0,12,62:1,0,0\t0|0:0,15,86:1,0,0\t0|0:0,12,58:1,0,0\t0|0:0,15,69:1,0,0\t0|0:0,6,34:1,0,0\t0|0:0,18,83:1,0,0\t0|0:0,18,80:1,0,0\t0|0:0,15,74:1,0,0\n+X\t2698954\t.\tG\tA\t999\t.\t.\tGT:PL:GP\t1|0:69,0,139:0,1,0\t1|1:199,24,0:0,0,1\t0|1:15,0,82:0,1,0\t1|0:32,0,76:0,1,0\t1|0:16,0,80:0,1,0\t0|0:0,15,131:1,0,0\t0|0:0,6,58:1,0,0\t0|1:99,0,39:0,1,0\t0|0:0,18,163:1,0,0\t0|0:0,15,136:1,0,0\n+X\t2699002\t.\tC\tA\t65.1\t.\t.\tGT:PL:GP\t0|0:0,18,144:1,0,0\t0|0:0,12,115:1,0,0\t.|.:0,12,120:1,0,0\t0|0:0,15,131:1,0,0\t0|0:0,6,29:1,0,0\t0|0:0,9,95:1,0,0\t0|0:0,9,79:1,0,0\t0|0:0,24,188:1,0,0\t0|0:0,15,124:1,0,0\t0|0:0,9,93:1,0,0\n+X\t2699025\t.\tT\tC\t44.9\t.\t.\tGT:PL:GP\t0|0:0,24,189:1,0,0\t0|0:0,12,98:1,0,0\t0|0:0,15,130:1,0,0\t0|0:0,15,113:1,0,0\t0|0:0,6,63:1,0,0\t0|0:0,24,198:1,0,0\t0|0:0,12,92:1,0,0\t0|0:0,24,197:1,0,0\t0|0:0,9,97:1,0,0\t0|0:0,12,108:1,0,0\n+X\t2699091\t.\tG\tA\t45\t.\t.\tGT:PL:GP\t0|0:0,18,'..b'0,0\t0|0:0,12,101:1,0,0\t0|0:0,12,97:1,0,0\t0|0:0,24,188:1,0,0\t0|0:0,24,194:1,0,0\t0|0:0,15,127:1,0,0\t0|0:0,21,169:1,0,0\t0|0:0,15,129:1,0,0\t0|0:0,21,171:1,0,0\n+X\t2699187\t.\tT\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,24,200:1,0,0\t0|0:0,24,191:1,0,0\t1|0:48,0,85:0,1,0\t0|0:0,15,145:1,0,0\t0|1:58,0,45:0,1,0\t1|0:61,0,50:0,1,0\t1|0:22,0,51:0,1,0\t0|0:0,27,211:1,0,0\t0|0:0,9,96:0.9999,0.0001,0\t0/1:23,0,160:0,1,0\n+X\t2699188\t.\tG\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,24,194:1,0,0\t0|0:0,24,167:1,0,0\t1|0:48,0,78:0,1,0\t0|0:0,15,131:1,0,0\t0|1:63,0,40:0,1,0\t1|0:50,0,44:0,1,0\t1|0:22,0,48:0,1,0\t0|0:0,27,212:1,0,0\t0|0:0,9,87:0.9999,0.0001,0\t0|1:23,0,154:0,1,0\n+X\t2699189\t.\tT\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,24,199:1,0,0\t0|0:0,24,176:1,0,0\t1|0:44,0,87:0,1,0\t0|0:0,15,136:1,0,0\t0|1:62,0,46:0,1,0\t1|0:61,0,46:0,1,0\t1|0:22,0,49:0,1,0\t0|0:0,27,212:1,0,0\t0|0:0,9,93:0.9999,0.0001,0\t0|1:23,0,164:0,1,0\n+X\t2699217\t.\tC\tT\t60.3\t.\t.\tGT:PL:GP\t0|0:0,18,158:1,0,0\t0|0:0,18,119:1,0,0\t0|0:0,21,152:1,0,0\t0|0:0,21,162:1,0,0\t0|0:0,12,102:1,0,0\t0|0:0,18,144:1,0,0\t0|0:0,12,108:1,0,0\t0|0:0,18,146:1,0,0\t0|0:0,12,98:1,0,0\t0|0:0,18,155:1,0,0\n+X\t2699246\t.\tC\tA\t999\t.\t.\tGT:PL:GP\t1|0:128,0,15:0,0.9998,0.0002\t1|1:147,21,0:0,0.0001,0.9999\t0|1:130,0,5:0,0.9977,0.0023\t1|1:237,33,0:0,0,1\t1|0:45,0,75:0,1,0\t0|1:145,0,49:0,1,0\t0|0:0,15,109:1,0,0\t0|1:13,0,63:0.0002,0.9998,0\t0|0:0,30,178:0.9953,0.0047,0\t1|0:120,0,57:0,1,0\n+X\t2699275\t.\tT\tG\t999\t.\t.\tGT:PL:GP\t0|0:0,18,165:0.9998,0.0002,0\t0|0:0,18,152:1,0,0\t1|0:0,9,95:0.0023,0.9977,0\t0|0:0,33,239:1,0,0\t0|1:125,0,40:0,1,0\t1|1:205,27,0:0,0,1\t1|0:69,0,43:0,1,0\t0|0:0,15,139:1,0,0\t0|0:0,30,219:1,0,0\t0|1:96,0,54:0,1,0\n+X\t2699350\t.\tA\tT\t999\t.\t.\tGT:PL:GP\t0|0:0,27,206:1,0,0\t0|0:0,15,139:1,0,0\t1|0:54,0,25:0,1,0\t0|0:0,12,117:0.9996,0.0004,0\t0|1:79,0,73:0,1,0\t1|0:48,0,82:0,1,0\t1|0:68,0,45:0,1,0\t0|0:0,30,216:1,0,0\t0|0:0,27,224:1,0,0\t0|1:48,0,80:0,1,0\n+X\t2699360\t.\tT\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,21,184:1,0,0\t0|0:0,15,133:1,0,0\t1|0:53,0,21:0,1,0\t0|0:0,12,114:0.9996,0.0004,0\t0|1:20,0,66:0,1,0\t1|0:40,0,93:0,1,0\t1|0:52,0,66:0,1,0\t0|0:0,30,220:1,0,0\t0|0:0,21,191:1,0,0\t0|1:20,0,83:0,1,0\n+X\t2699450\t.\tA\tC\t999\t.\t.\tGT:PL:GP\t0|0:0,12,124:1,0,0\t0|0:0,6,55:0.9976,0.0024,0\t1|0:99,0,42:0,1,0\t0|0:0,21,186:0.9999,0.0001,0\t0|1:64,0,100:0,1,0\t1|0:38,0,177:0,1,0\t1|0:16,0,103:0,1,0\t0|0:0,24,202:1,0,0\t0|0:0,12,119:1,0,0\t0|1:75,0,115:0,1,0\n+X\t2699507\t.\tT\tC\t195\t.\t.\tGT:PL:GP\t0|0:0,15,133:1,0,0\t0|0:0,12,122:1,0,0\t0|0:0,6,60:1,0,0\t0|0:0,18,123:1,0,0\t0|0:0,15,145:1,0,0\t0|0:0,21,173:1,0,0\t0|0:0,21,178:1,0,0\t0|0:0,24,200:1,0,0\t0|0:0,12,125:1,0,0\t0|0:0,24,189:1,0,0\n+X\t2699555\t.\tC\tA\t999\t.\t.\tGT:PL:GP\t0:0,156:1,0\t1:58,19:0,1\t1:51,0:0,1\t0:0,91:1,0\t1:89,0:0,1\t1|1:132,15,0:0,0,1\t1|0:99,0,68:0,1,0\t0|1:101,0,101:0,1,0\t0|0:0,18,161:0.9998,0.0002,0\t0|1:118,0,72:0,1,0\n+X\t2699645\t.\tG\tT\t999\t.\t.\tGT:PL:GP\t0:0,95:1,0\t1:49,0:0,1\t0:0,58:1,0\t0:0,64:1,0\t0:0,113:1,0\t0|0:0,18,158:1,0,0\t0|0:0,18,146:1,0,0\t0|1:68,0,136:0,1,0\t0|0:0,30,210:1,0,0\t0|0:0,27,186:1,0,0\n+X\t2699676\t.\tG\tA\t999\t.\t.\tGT:PL:GP\t0:0,84:1,0\t0:0,87:1,0\t1:35,0:0,1\t0:0,28:1,0\t1:114,0:0,1\t1|0:99,0,72:0,1,0\t1|0:48,0,89:0,1,0\t0|0:0,18,155:1,0,0\t0|0:0,24,191:1,0,0\t0|1:99,0,61:0,1,0\n+X\t2699728\t.\tC\tT\t69.7\t.\t.\tGT:PL:GP\t0:0,58:1,0\t0:0,64:1,0\t0:0,33:1,0\t0:0,69:1,0\t0:0,81:1,0\t0|0:0,27,183:1,0,0\t0|0:0,45,220:1,0,0\t0|0:0,30,161:1,0,0\t0|0:0,15,110:1,0,0\t0|0:0,21,156:1,0,0\n+X\t2699775\t.\tC\tA\t71.1\t.\t.\tGT:PL:GP\t0:0,62:1,0\t0:0,101:1,0\t0:0,130:1,0\t0:0,141:1,0\t0:0,54:1,0\t0|0:0,30,203:1,0,0\t0|0:0,39,208:1,0,0\t0|0:0,30,177:1,0,0\t0|0:0,18,132:1,0,0\t0|0:0,15,103:1,0,0\n+X\t2699898\t.\tC\tCT\t999\t.\t.\tGT:PL:GP\t0:0,32:1,0\t0:0,11:1,0\t1:11,0:0,1\t0:0,11:1,0\t1:31,0:0,1\t1|0:11,0,24:0.0438,0.9562,0\t1|0:8,0,17:0,1,0\t0|0:0,33,72:1,0,0\t0|0:0,27,69:1,0,0\t0|1:11,4,12:0.0003,0.9997,0\n+X\t2699968\t.\tA\tG\t999\t.\t.\tGT:PL:GP\t.:0,84:1,0\t0:0,32:1,0\t0:0,57:1,0\t1:131,0:0,1\t0:0,66:1,0\t0|1:89,0,44:0,1,0\t0|0:0,18,157:1,0,0\t0|0:0,45,255:1,0,0\t0|1:75,0,109:0,1,0\t1|0:98,0,62:0,1,0\n+X\t2699970\t.\tT\tC\t55.3\t.\t.\tGT:PL:GP\t0:0,68:1,0\t0:0,34:1,0\t0:0,32:1,0\t0:0,162:1,0\t0:0,63:1,0\t0|0:0,15,149:1,0,0\t0|0:0,21,181:1,0,0\t0|0:0,45,255:1,0,0\t0|0:0,27,207:1,0,0\t0|0:0,24,196:1,0,0\n'
b
diff -r 000000000000 -r 2fea169065ec test-data/filter.1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter.1.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,33 @@
+##fileformat=VCFv4.1
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##test=<xx=A,yy=B,zz=C>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##readme=AAAAAA
+##readme=BBBBBB
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 1000 . G A 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 1001 . G A 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 1003 . GT G 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 1006 . G A 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 1007 . G A 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 2000 . T C 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 2001 . T C 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 2003 . T TC 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 2005 . T C 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 2006 . T C 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 1001 . GT G 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 1004 . GT G 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 1008 . GT G 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 2001 . A AT 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 2003 . A AT 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+2 2006 . A AT 1806 . DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
b
diff -r 000000000000 -r 2fea169065ec test-data/filter.2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter.2.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,36 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 . C T 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2;INDEL;STR=test GT:GQ:DP:GL 0/1:25:35:-20,-5,-20 0/1:45:11:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=3;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 2:0,1:409:35:-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:25:300
+1 3106154 . C CT 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:25:12 0/1:245:310
+1 3157410 . GA G 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA G 60.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:212:22 0/1:212:22
+1 3177144 . G T 45 PASS AN=4;AC=2 GT:GQ:DP 0/0:150:30 1/1:150:30
+1 3177144 . G . 45 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:20
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:20 1/2:322:10
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T 59.9 PASS AN=4;AC=2 GT 0/1 0/1
b
diff -r 000000000000 -r 2fea169065ec test-data/filter.3.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter.3.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,42 @@
+##fileformat=VCFv4.2
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=STR,Number=A,Type=String,Description="Testing string and Number=A in INFO">
+##INFO=<ID=TXT0,Number=1,Type=String,Description="Testing in INFO">
+##INFO=<ID=TXT,Number=.,Type=String,Description="Testing in INFO">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##SAMPLE=<ID=NORMAL,SampleName=B,Description="Less-than (\"<\") and greater-than (\">\") quoting nonsense where double brackets would do just fine",softwareName=<Nonsense,Software>,softwareVer=<119,65>,softwareParam=<.>,MetadataResource=http://somewhere.com/path>
+##INFO=<ID=CIGAR,Number=A,Type=String,Description="test">
+##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=STR,Number=1,Type=String,Description="Test">
+##FILTER=<ID=q20,Description="Mapping quality below 20">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=243199373>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3162006 . GAA G,GA 238 q20 DP=19;AN=4;AC=1,1;XRF=1e6,2e6,3e6;XRI=1111,2222,3333;XRS=ABC,DEF,GHI;XAF=1e6,2e6;XAI=1111,2222;XAS=ABC,DEF;XGF=1e6,2e6,3e6,4e6,5e6,6e6;XGI=11,22,33,44,55,66;XGS=ABC,DEF,GHI,JKL,MNO,PQR;TXT=ABC,DEF,GHI GT:GQ:DP:STR 0/1:589:19:XX 0/2:1:1:YY
+1 3162007 . TAGGG CAGGG,CAGGT 238 q20 AO=52101,113;CIGAR=1X4M,1X3M1X;TXT0=text GT:FGS:FGI:FGF:FAS:FAI:FAF:FRS:FRI:FRF 0/1:AAAAAA,BBBBB,CCCC,DDD,EE,F:1,2,3,4,5,6:1e-1,2e-2,3e-3,4e-4,5e-5,6e-6:AAA,B:1,2:1e-1,2e-2:A,BB,CCC:1,2,3:1e-1,2e-2,3e-3 2:AAAAAA,BBB,C:1,2,3:1e-1,2e-2,3e-3:AAA,B:1,2:1e-1,2e-2:A,BB,CCC:1,2,3:1e-1,2e-2,3e-3
b
diff -r 000000000000 -r 2fea169065ec test-data/filter.4.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter.4.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,10 @@
+##fileformat=VCFv4.1
+##contig=<ID=chr1,length=135006516>
+##INFO=<ID=TEST1,Number=1,Type=Integer,Description="Test1">
+##INFO=<ID=TEST2,Number=1,Type=Float,Description="Test2">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=TEST3,Number=1,Type=Integer,Description="Test3">
+##FORMAT=<ID=TEST4,Number=1,Type=Float,Description="Test4">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2
+chr1 1 . A T 100 . TEST1=10;TEST2=10 GT:TEST3:TEST4 0/1:10:10 0/1:30:30
+chr1 2 . A T 100 . TEST1=50;TEST2=50 GT:TEST3:TEST4 0/1:20:20 0/1:40:40
b
diff -r 000000000000 -r 2fea169065ec test-data/fixploidy.ploidy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fixploidy.ploidy Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,9 @@
+1 3000001 3000001 X 0
+1 3000001 3000001 Y 1
+1 3000001 3000001 Z 2
+1 3000002 3000002 X 3
+1 3000002 3000002 Y 4
+1 3000002 3000002 Z 5
+1 3000004 3000004 X 6
+1 3000004 3000004 Y 7
+1 3000004 3000004 Z 8
b
diff -r 000000000000 -r 2fea169065ec test-data/fixploidy.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fixploidy.samples Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,3 @@
+A X
+B Y
+C Z
b
diff -r 000000000000 -r 2fea169065ec test-data/fixploidy.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fixploidy.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,17 @@
+##fileformat=VCFv4.1
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=FLAG,Number=0,Type=Flag,Description="Test type">
+##INFO=<ID=IINT,Number=1,Type=Integer,Description="Test type">
+##INFO=<ID=IFLT,Number=1,Type=Float,Description="Test type">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test type">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=FINT,Number=1,Type=Integer,Description="Test type">
+##FORMAT=<ID=FFLT,Number=1,Type=Float,Description="Test type">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test type">
+##FILTER=<ID=q11,Description="Quality below 10">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C
+1 3000001 xx C CT 11 PASS FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:xxx 0/0:11:1.1:x 0|0:11:1.1:x
+1 3000002 . C CTT . . . GT ./. ./. .|.
+1 3000003 xx C CTTT 11 q11 FLAG;IINT=.;IFLT=.;ISTR=. GT:FINT:FFLT:FSTR 0/0:.:.:. 0/0:.:.:. 0|0:.:.:.
+1 3000004 xx C CTTTT 11 q11 FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:x 0/0:11:1.1:xxx 0|0:11:1.1:xxx
b
diff -r 000000000000 -r 2fea169065ec test-data/gvcf.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gvcf.fa Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,9 @@
+>22
+NNNNNNNNCCTTGGCCAAGTCACTTCCTCCTTCAGGAACATTGCAGTGGGCCTAAGTGCC
+TCCTCTCGGGACTGGTATGGGGACGGTCATGCAATCTGGACAACATTCACCTTTAAAAGT
+TTATTGATCTTTTGTGACATGCACGTGGGTTCCCAGTAGCAAGAAACTAAAGGGTCGCAG
+GCCGGTTTCTGCTAATTTCTTTAATTCCAAGACAGTCTCAAATATTTTCTTATTAACTTC
+CTGGAGGGAGGCTTATCATTCTCTCTTTTGGATGATTCTAAGTACCAGCTAAAATACAGC
+TATCATTCATTTTCCTTGATTTGGGAGCCTAATTTCTTTAATTTAGTATGCAAGAAAACC
+AATTTGGAAATATCAACTGTTTTGGAAACCTTAGACCTAGGTCATCCTTAGTAAGATCTT
+CCCATTTATATAAATACTTGCAAGTAGTAGTGCCATAATT
b
diff -r 000000000000 -r 2fea169065ec test-data/isec.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/isec.a.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,27 @@
+##fileformat=VCFv4.1
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##test=<xx=A,yy=B,zz=C>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##readme=AAAAAA
+##readme=BBBBBB
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 3062915 . GTTT G 1806 q10 DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 3062915 . G T 1806 q10 DP=35;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20
+1 3106154 . CAAA C 1792 PASS DP=32;AN=2;AC=1 GT:GQ:DP 0/1:245:32
+1 3106154 . C T,CT 1792 PASS DP=32;AN=2;AC=1 GT:GQ:DP 0/1:245:32
+1 3157410 . GA G 628 q10 DP=21;AN=2;AC=2 GT:GQ:DP 1/1:21:21
+1 3162006 . GAA G 1016 PASS DP=22;AN=2;AC=1 GT:GQ:DP 0/1:212:22
+1 3177144 . GT G 727 PASS DP=30;AN=2;AC=1 GT:GQ:DP 0/1:150:30
+1 3184885 . TAAAA TA,T 246 PASS DP=10;AN=2;AC=1,1 GT:GQ:DP 1/2:12:10
+2 3199812 . G GTT,GT 481 PASS DP=26;AN=2;AC=1,1 GT:GQ:DP 1/2:322:26
+3 3212016 . CTT C,CT 565 PASS DP=26;AN=2;AC=1,1 GT:GQ:DP 1/2:91:26
+4 3212016 . TACACACAC T 325 PASS DP=31;AN=2;AC=1 GT:GQ:DP 0/1:325:31
+4 3258448 . TACACACAC T 325 PASS DP=31;AN=2;AC=1 GT:GQ:DP 0/1:325:31
b
diff -r 000000000000 -r 2fea169065ec test-data/isec.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/isec.b.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,27 @@
+##fileformat=VCFv4.1
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q20,Description="Mapping quality below 20">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=243199373>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT B
+1 3062915 . G A 376 q20 DP=14;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:376:14:-10,0,-10
+1 3062915 . GTTT GT 376 q20 DP=14;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:376:14:-10,0,-10
+1 3106154 . C T 677 PASS DP=15;AN=2;AC=1 GT:GQ:DP:GL 0/1:277:15:-10,0,-10
+1 3106154 . CAAAA C 677 PASS DP=15;AN=2;AC=1 GT:GQ:DP:GL 0/1:277:15:-10,0,-10
+1 3157410 . GA G 249 PASS DP=11;AN=2;AC=1 GT:GQ:DP 0/1:49:11
+1 3162006 . GAA G 663 PASS DP=19;AN=2;AC=1 GT:GQ:DP 0/1:589:19
+1 3177144 . GT G 460 PASS DP=24;AN=2;AC=1 GT:GQ:DP 0/1:236:24
+1 3184885 . TAAA T 598 PASS DP=16;AN=2;AC=1 GT:GQ:DP 0/1:435:16
+2 3188209 . GA G 162 . DP=15;AN=2;AC=1 GT:GQ:DP 0/1:162:15
+3 3199812 . G GTT,GT 353 PASS DP=19;AN=2;AC=1,1 GT:GQ:DP 1/2:188:19
+4 3212016 . CTT C 677 q20 DP=15;AN=2;AC=1 GT:GQ:DP 0/1:158:15
b
diff -r 000000000000 -r 2fea169065ec test-data/merge.2.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.2.a.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,30 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 . C CCG 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000150 . C A 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C A 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3106154 . C CC 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . C A 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200000 . C T 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200010 . C T 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200020 . C G,T 59.2 PASS AN=4;AC=2 GT:GL ./.:1,2,3,4,5,6 .:1,2,3
b
diff -r 000000000000 -r 2fea169065ec test-data/merge.2.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.2.b.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,30 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 . C G 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000150 . C G 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C G 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3106154 . C CCC 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . C T 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200000 . C T 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200010 . c A,T 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3200020 . C T,G 59.2 PASS AN=4;AC=2 GT:GL ./.:1,4,6,2,5,3 .:1,3,2
b
diff -r 000000000000 -r 2fea169065ec test-data/merge.3.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.3.a.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,26 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=TR,Number=R,Type=Float,Description="Test tag">
+##INFO=<ID=TA,Number=A,Type=Float,Description="Test tag">
+##INFO=<ID=TG,Number=G,Type=Float,Description="Test tag">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 . C CCG 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GQ 0/1:245 0/1:245
b
diff -r 000000000000 -r 2fea169065ec test-data/merge.3.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.3.b.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,26 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=TR,Number=R,Type=Float,Description="Test tag">
+##INFO=<ID=TA,Number=A,Type=Float,Description="Test tag">
+##INFO=<ID=TG,Number=G,Type=Float,Description="Test tag">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 . C CG 59.2 PASS AN=4;AC=2;TR=10,20;TA=10;TG=10,20,30 GT:GQ 0/1:245 0/1:245
b
diff -r 000000000000 -r 2fea169065ec test-data/merge.4.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.4.a.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,33 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=TR,Number=R,Type=Float,Description="Test tag">
+##INFO=<ID=TA,Number=A,Type=Float,Description="Test tag">
+##INFO=<ID=TG,Number=G,Type=Float,Description="Test tag">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=XR,Number=R,Type=Integer,Description="Some description">
+##FORMAT=<ID=XA,Number=A,Type=Integer,Description="Some description">
+##FORMAT=<ID=XG,Number=G,Type=Integer,Description="Some description">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 id1 C CCG 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GQ:XR:XA:XG 0/1:245:0,1:1:0,1,2 0/1:245:1,2:2:0,1,2
+1 3000000 . C CCG 59.2 PASS AN=4;AC=2 GT:GQ:XR:XA:XG 0/1:245:1,2:2:0,1,2 0/1:245:2,3:3:1,2,3
+1 3000002 . C CCG 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000002 id2 C CCG 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GL:XR:XA:XG 0/1:245:.:.:. 0/1:245:.:.:.
+1 3000002 id3 C CCG 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GL:XR:XA:XG 0/1:245:.:.:. 0/1:245:.
b
diff -r 000000000000 -r 2fea169065ec test-data/merge.4.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.4.b.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,33 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=TR,Number=R,Type=Float,Description="Test tag">
+##INFO=<ID=TA,Number=A,Type=Float,Description="Test tag">
+##INFO=<ID=TG,Number=G,Type=Float,Description="Test tag">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FORMAT=<ID=XR,Number=R,Type=Integer,Description="Some description">
+##FORMAT=<ID=XA,Number=A,Type=Integer,Description="Some description">
+##FORMAT=<ID=XG,Number=G,Type=Integer,Description="Some description">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT C D
+1 3000000 . C A 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000000 id1 C A 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GQ:XR:XG:XA 0/1:245:4,5:3,4,5:5 0/1:245:6,7:6,7,8:7
+1 3000002 id3 C A 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GQ:XR:XG:XA 0/1:245:. 0/1:245:1,2:1,2,3:2
+1 3000002 id2 C A 59.2 PASS AN=4;AC=2;TR=1,2;TA=1;TG=1,2,3 GT:GQ:XR:XG:XA 0/1:245:. 0/1:245:.:.:.
+1 3000002 . C A 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
b
diff -r 000000000000 -r 2fea169065ec test-data/merge.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.a.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,36 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 . C T 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2;INDEL;STR=test GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=3;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 2:0,1:409:35:-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . C CT 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3157410 . GA G 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA G 60.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:212:22 0/1:212:22
+1 3177144 . G T 45 PASS AN=4;AC=2 GT:GQ:DP 0/0:150:30 1/1:150:30
+1 3177144 . G . 45 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T . PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
b
diff -r 000000000000 -r 2fea169065ec test-data/merge.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.b.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,42 @@
+##fileformat=VCFv4.1
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=STR,Number=1,Type=String,Description="Testing string and Number=A in INFO">
+##INFO=<ID=TXT,Number=.,Type=String,Description="Testing in INFO">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##SAMPLE=<ID=NORMAL,SampleName=B,Description="Less-than (\"<\") and greater-than (\">\") quoting nonsense where double brackets would do just fine",softwareName=<Nonsense,Software>,softwareVer=<119,65>,softwareParam=<.>,MetadataResource=http://somewhere.com/path>
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FORMAT=<ID=STR,Number=1,Type=String,Description="Testing string in format">
+##FILTER=<ID=q20,Description="Mapping quality below 20">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=243199373>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT B
+1 3062915 idSNP G A 24.6 q20 DP=14;DP4=1,2,3,4;AN=2;STR=.;AC=1 GT:GQ:DP:STR:GL 0/1:376:14:ABC:-10,0,-10
+1 3062915 id1D GTT GT 101 q20 DP=14;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL:STR 0/1:376:14:-10,0,-10:DEF
+1 3062915 id3D GTTT G 84.6 q20 TXT=AA;DP=14;DP4=1,2,3,4;AN=2;AC=1 GT:GQ:DP:GL 0/1:376:14:-10,0,-10
+1 3106154 . C T 999 PASS DP=15;AN=2;AC=1 GT:GQ:DP:GL 0/1:277:15:-10,0,-10
+1 3106154 . CAAAA C 15.4 PASS DP=15;AN=2;AC=1 GT:GQ:DP:GL 0/1:277:15:-10,0,-10
+1 3157410 . GAC G 36.8 PASS DP=11;AN=2;AC=1 GT:GQ:DP 0/1:49:11
+1 3162006 . GAA G 238 PASS DP=19;AN=2;AC=1;XRF=1e6,2e6;XRI=1111,2222;XRS=AAA,BBB;XAF=1e6;XAI=1111;XAS=AAA;XGF=1e6,2e6,3e6;XGI=1111,2222,3333;XGS=A,B,C GT:GQ:DP 0/1:589:19
+1 3177144 . G T 999 PASS DP=24;AN=2;AC=1 GT:GQ:DP 0/1:236:24
+1 3177144 . GT G 999 PASS DP=24;AN=2;AC=1 GT:GQ:DP 0/1:236:24
+1 3184885 . TAAA T 25.8 PASS DP=16;AN=2;AC=1 GT:GQ:DP 0/1:435:16
+2 3188209 . GA G 41.5 . DP=15;AN=2;AC=1 GT:GQ:DP 0/1:162:15
+3 3199812 . GA GTT,GT 17.5 PASS DP=19;AN=2;AC=1,1 GT:GQ:DP 1/2:188:19
+4 3212016 . CTT C 999 q20 DP=15;AN=2;AC=1 GT:GQ:DP 0/1:158:15
b
diff -r 000000000000 -r 2fea169065ec test-data/merge.c.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merge.c.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,43 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##INFO=<ID=TXT,Number=.,Type=String,Description="Testing in INFO">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=INTA,Number=A,Type=Integer,Description="Testing Number=A in INFO">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT C D
+1 3062915 id3D GTTT G 48.7 q10 TXT=BB;DP=999;DP4=4,3,2,1;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G C,T 419 test TEST=5;DP4=1,2,3,4;AN=4;AC=1,1;INTA=1,2 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 0/2:0,1:409:35:-20,-5,-20,-20,-5,-20
+1 3062915 id2D GTT G 999 q10 DP4=1,2,3,4;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3106154 . CAAA C 72.6 PASS AN=0;AC=0 GT:GQ:DP .:245:32 ./.:245:32
+1 3106154 . C CT 459 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3157410 . G T 46.7 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA GA 206 PASS AN=4;AC=2;XRF=1e6,5e5;XRI=1111,5555;XRS=AAA,DDD;XAF=5e5;XAI=5555;XAS=DDD;XGF=1e6,5e5,9e9;XGI=1111,5555,9999;XGS=A,E,F GT:GQ:DP 0/1:212:22 0/1:212:22
+1 3177144 . G . 364 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:30
+1 3184885 . TAAAA TA,T 8.42 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 291 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 52.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T . PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
b
diff -r 000000000000 -r 2fea169065ec test-data/mpileup.X.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.X.vcf Wed Jul 06 07:08:15 2016 -0400
b
b'@@ -0,0 +1,4127 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##samtoolsVersion=1.1-19-g6b249e2+htslib-1.1-74-g845c515\n+##samtoolsCommand=samtools mpileup -uvDV -b xxx//mpileup.bam.list -f xxx//mpileup.ref.fa.gz\n+##reference=file://xxx//mpileup.ref.fa.gz\n+##contig=<ID=X,length=81195210>\n+##ALT=<ID=X,Description="Represents allele(s) other than observed.">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=IDV,Number=1,Type=Integer,Description="Maximum number of reads supporting an indel">\n+##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of reads supporting an indel">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)",Version="3">\n+##INFO=<ID=RPB,Number=1,Type=Float,Description="Mann-Whitney U test of Read Position Bias (bigger is better)">\n+##INFO=<ID=MQB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality Bias (bigger is better)">\n+##INFO=<ID=BQB,Number=1,Type=Float,Description="Mann-Whitney U test of Base Quality Bias (bigger is better)">\n+##INFO=<ID=MQSB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)">\n+##INFO=<ID=SGB,Number=1,Type=Float,Description="Segregation based metric.">\n+##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)">\n+##INFO=<ID=I16,Number=16,Type=Float,Description="Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h">\n+##INFO=<ID=QS,Number=R,Type=Float,Description="Auxiliary tag used for calling">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">\n+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of high-quality non-reference bases">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tHG00100\tHG00101\tHG00102\n+X\t1\t.\tA\t<*>\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t2\t.\tA\t<*>\t0\t.\tDP=11;I16=11,0,0,0,439,17587,0,0,319,9251,0,0,226,5030,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t3\t.\tG\t<*>\t0\t.\tDP=11;I16=11,0,0,0,431,16971,0,0,319,9251,0,0,229,5111,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t4\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,423,16417,0,0,319,9251,0,0,232,5202,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,71:3:0\n+X\t5\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,450,18520,0,0,319,9251,0,0,234,5252,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t6\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,403,14847,0,0,319,9251,0,0,236,5310,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t7\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,446,18114,0,0,319,9251,0,0,237,5327,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t8\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,465,19677,0,0,319,9251,0,0,238,5354,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t9\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,447,18205,0,0,319,9251,0,0,239,5391,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t10\t.\tA\t<*>\t0\t.\tDP=11;I16=11,0,0,0,426,16756,0,0,319,9251,0,0,240,5438,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,69:3:0\n+X\t11\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,413,15603,0,0,319,9251,0,0,241,5495,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t12\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,438,17506,0,0,319,9251,0,0,242,5562,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t13\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,437,17463,0,0,319,9251,0,0,243,5639,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t14\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,453,18715,0,0,319,9251,0,0,242,5628,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t15\t.\tG\t<*>\t0\t.\tDP=11;I1'..b'1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4072\t.\tG\t<*>\t0\t.\tDP=5;I16=2,2,0,0,138,4974,0,0,194,9938,0,0,55,987,0,0;QS=1,0;MQSB=0;MQ0F=0\tPL:DP:DV\t0,12,122:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4073\t.\tG\t<*>\t0\t.\tDP=5;I16=3,2,0,0,156,5082,0,0,254,13538,0,0,60,994,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,136:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4074\t.\tC\t<*>\t0\t.\tDP=5;I16=3,2,0,0,160,5602,0,0,254,13538,0,0,56,928,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4075\t.\tT\t<*>\t0\t.\tDP=5;I16=3,2,0,0,187,7069,0,0,254,13538,0,0,52,870,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,155:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4076\t.\tG\t<*>\t0\t.\tDP=5;I16=3,2,0,0,174,6298,0,0,254,13538,0,0,48,820,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,149:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4077\t.\tA\t<*>\t0\t.\tDP=4;I16=3,1,0,0,138,4810,0,0,194,9938,0,0,44,728,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,121:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4078\t.\tT\t<*>\t0\t.\tDP=4;I16=3,1,0,0,143,5173,0,0,194,9938,0,0,40,644,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,124:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4079\t.\tA\t<*>\t0\t.\tDP=4;I16=3,1,0,0,121,3847,0,0,194,9938,0,0,36,568,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,107:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4080\t.\tT\t<*>\t0\t.\tDP=4;I16=3,0,0,0,106,3778,0,0,134,6338,0,0,25,451,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,9,87:3:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4081\t.\tT\t<*>\t0\t.\tDP=4;I16=3,1,0,0,106,2934,0,0,194,9938,0,0,28,440,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,94:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4082\t.\tC\t<*>\t0\t.\tDP=3;I16=2,1,0,0,110,4042,0,0,134,6338,0,0,25,387,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,103:3:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4083\t.\tC\t<*>\t0\t.\tDP=3;I16=2,1,0,0,104,3648,0,0,134,6338,0,0,22,340,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,98:3:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4084\t.\tA\t<*>\t0\t.\tDP=2;I16=1,1,0,0,78,3050,0,0,97,4969,0,0,20,298,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,74:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4085\t.\tC\t<*>\t0\t.\tDP=2;I16=1,1,0,0,62,1940,0,0,97,4969,0,0,18,260,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,62:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4086\t.\tG\t<*>\t0\t.\tDP=2;I16=1,1,0,0,56,1640,0,0,97,4969,0,0,16,226,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,56:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4087\t.\tC\t<*>\t0\t.\tDP=2;I16=1,1,0,0,69,2405,0,0,97,4969,0,0,14,196,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,68:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4088\t.\tA\t<*>\t0\t.\tDP=1;I16=1,0,0,0,39,1521,0,0,37,1369,0,0,13,169,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4089\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,12,144,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4090\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,33,1089,0,0,37,1369,0,0,11,121,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,33:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4091\t.\tT\t<*>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,10,100,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4092\t.\tG\t<*>\t0\t.\tDP=1;I16=1,0,0,0,37,1369,0,0,37,1369,0,0,9,81,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4093\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,8,64,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4094\t.\tT\t<*>\t0\t.\tDP=1;I16=1,0,0,0,40,1600,0,0,37,1369,0,0,7,49,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4095\t.\tA\t<*>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,6,36,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4096\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,5,25,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4097\t.\tA\t<*>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,4,16,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4098\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,31,961,0,0,37,1369,0,0,3,9,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,31:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4099\t.\tT\t<*>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,2,4,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4100\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,27,729,0,0,37,1369,0,0,1,1,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,27:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4101\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,26,676,0,0,37,1369,0,0,0,0,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,26:1:0\t0,0,0:0:0\t0,0,0:0:0\n'
b
diff -r 000000000000 -r 2fea169065ec test-data/mpileup.c.X.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.c.X.vcf Wed Jul 06 07:08:15 2016 -0400
b
b'@@ -0,0 +1,4127 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##samtoolsVersion=1.1-19-g6b249e2+htslib-1.1-74-g845c515\n+##samtoolsCommand=samtools mpileup -uvDV -b xxx//mpileup.bam.list -f xxx//mpileup.ref.fa.gz\n+##reference=file://xxx//mpileup.ref.fa.gz\n+##contig=<ID=X,length=81195210>\n+##ALT=<ID=X,Description="Represents allele(s) other than observed.">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=IDV,Number=1,Type=Integer,Description="Maximum number of reads supporting an indel">\n+##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of reads supporting an indel">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)",Version="3">\n+##INFO=<ID=RPB,Number=1,Type=Float,Description="Mann-Whitney U test of Read Position Bias (bigger is better)">\n+##INFO=<ID=MQB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality Bias (bigger is better)">\n+##INFO=<ID=BQB,Number=1,Type=Float,Description="Mann-Whitney U test of Base Quality Bias (bigger is better)">\n+##INFO=<ID=MQSB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)">\n+##INFO=<ID=SGB,Number=1,Type=Float,Description="Segregation based metric.">\n+##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)">\n+##INFO=<ID=I16,Number=16,Type=Float,Description="Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h">\n+##INFO=<ID=QS,Number=R,Type=Float,Description="Auxiliary tag used for calling">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">\n+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of high-quality non-reference bases">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tHG00100\tHG00101\tHG00102\n+X\t1\t.\tA\t<*>\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t2\t.\tA\t<*>\t0\t.\tDP=11;I16=11,0,0,0,439,17587,0,0,319,9251,0,0,226,5030,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t3\t.\tG\t<*>\t0\t.\tDP=11;I16=11,0,0,0,431,16971,0,0,319,9251,0,0,229,5111,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t4\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,423,16417,0,0,319,9251,0,0,232,5202,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,71:3:0\n+X\t5\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,450,18520,0,0,319,9251,0,0,234,5252,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t6\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,403,14847,0,0,319,9251,0,0,236,5310,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t7\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,446,18114,0,0,319,9251,0,0,237,5327,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t8\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,465,19677,0,0,319,9251,0,0,238,5354,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t9\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,447,18205,0,0,319,9251,0,0,239,5391,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t10\t.\tA\t<*>\t0\t.\tDP=11;I16=11,0,0,0,426,16756,0,0,319,9251,0,0,240,5438,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,69:3:0\n+X\t11\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,413,15603,0,0,319,9251,0,0,241,5495,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t12\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,438,17506,0,0,319,9251,0,0,242,5562,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t13\t.\tC\t<*>\t0\t.\tDP=11;I16=11,0,0,0,437,17463,0,0,319,9251,0,0,243,5639,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t14\t.\tT\t<*>\t0\t.\tDP=11;I16=11,0,0,0,453,18715,0,0,319,9251,0,0,242,5628,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+X\t15\t.\tG\t<*>\t0\t.\tDP=11;I1'..b'1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4072\t.\tG\t<*>\t0\t.\tDP=5;I16=2,2,0,0,138,4974,0,0,194,9938,0,0,55,987,0,0;QS=1,0;MQSB=0;MQ0F=0\tPL:DP:DV\t0,12,122:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4073\t.\tG\t<*>\t0\t.\tDP=5;I16=3,2,0,0,156,5082,0,0,254,13538,0,0,60,994,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,136:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4074\t.\tC\t<*>\t0\t.\tDP=5;I16=3,2,0,0,160,5602,0,0,254,13538,0,0,56,928,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4075\t.\tT\t<*>\t0\t.\tDP=5;I16=3,2,0,0,187,7069,0,0,254,13538,0,0,52,870,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,155:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4076\t.\tG\t<*>\t0\t.\tDP=5;I16=3,2,0,0,174,6298,0,0,254,13538,0,0,48,820,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,149:5:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4077\t.\tA\t<*>\t0\t.\tDP=4;I16=3,1,0,0,138,4810,0,0,194,9938,0,0,44,728,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,121:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4078\t.\tT\t<*>\t0\t.\tDP=4;I16=3,1,0,0,143,5173,0,0,194,9938,0,0,40,644,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,124:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4079\t.\tA\t<*>\t0\t.\tDP=4;I16=3,1,0,0,121,3847,0,0,194,9938,0,0,36,568,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,107:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4080\t.\tT\t<*>\t0\t.\tDP=4;I16=3,0,0,0,106,3778,0,0,134,6338,0,0,25,451,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,9,87:3:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4081\t.\tT\t<*>\t0\t.\tDP=4;I16=3,1,0,0,106,2934,0,0,194,9938,0,0,28,440,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,94:4:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4082\t.\tC\t<*>\t0\t.\tDP=3;I16=2,1,0,0,110,4042,0,0,134,6338,0,0,25,387,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,103:3:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4083\t.\tC\t<*>\t0\t.\tDP=3;I16=2,1,0,0,104,3648,0,0,134,6338,0,0,22,340,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,98:3:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4084\t.\tA\t<*>\t0\t.\tDP=2;I16=1,1,0,0,78,3050,0,0,97,4969,0,0,20,298,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,74:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4085\t.\tC\t<*>\t0\t.\tDP=2;I16=1,1,0,0,62,1940,0,0,97,4969,0,0,18,260,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,62:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4086\t.\tG\t<*>\t0\t.\tDP=2;I16=1,1,0,0,56,1640,0,0,97,4969,0,0,16,226,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,56:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4087\t.\tC\t<*>\t0\t.\tDP=2;I16=1,1,0,0,69,2405,0,0,97,4969,0,0,14,196,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,68:2:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4088\t.\tA\t<*>\t0\t.\tDP=1;I16=1,0,0,0,39,1521,0,0,37,1369,0,0,13,169,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4089\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,12,144,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4090\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,33,1089,0,0,37,1369,0,0,11,121,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,33:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4091\t.\tT\t<*>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,10,100,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4092\t.\tG\t<*>\t0\t.\tDP=1;I16=1,0,0,0,37,1369,0,0,37,1369,0,0,9,81,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4093\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,8,64,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4094\t.\tT\t<*>\t0\t.\tDP=1;I16=1,0,0,0,40,1600,0,0,37,1369,0,0,7,49,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4095\t.\tA\t<*>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,6,36,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4096\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,5,25,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4097\t.\tA\t<*>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,4,16,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4098\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,31,961,0,0,37,1369,0,0,3,9,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,31:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4099\t.\tT\t<*>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,2,4,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4100\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,27,729,0,0,37,1369,0,0,1,1,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,27:1:0\t0,0,0:0:0\t0,0,0:0:0\n+X\t4101\t.\tC\t<*>\t0\t.\tDP=1;I16=1,0,0,0,26,676,0,0,37,1369,0,0,0,0,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,26:1:0\t0,0,0:0:0\t0,0,0:0:0\n'
b
diff -r 000000000000 -r 2fea169065ec test-data/mpileup.c.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.c.vcf Wed Jul 06 07:08:15 2016 -0400
b
b'@@ -0,0 +1,4127 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##samtoolsVersion=1.1-19-g6b249e2+htslib-1.1-74-g845c515\n+##samtoolsCommand=samtools mpileup -uvDV -b xxx//mpileup.bam.list -f xxx//mpileup.ref.fa.gz\n+##reference=file://xxx//mpileup.ref.fa.gz\n+##contig=<ID=17,length=81195210>\n+##ALT=<ID=X,Description="Represents allele(s) other than observed.">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=IDV,Number=1,Type=Integer,Description="Maximum number of reads supporting an indel">\n+##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of reads supporting an indel">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)",Version="3">\n+##INFO=<ID=RPB,Number=1,Type=Float,Description="Mann-Whitney U test of Read Position Bias (bigger is better)">\n+##INFO=<ID=MQB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality Bias (bigger is better)">\n+##INFO=<ID=BQB,Number=1,Type=Float,Description="Mann-Whitney U test of Base Quality Bias (bigger is better)">\n+##INFO=<ID=MQSB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)">\n+##INFO=<ID=SGB,Number=1,Type=Float,Description="Segregation based metric.">\n+##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)">\n+##INFO=<ID=I16,Number=16,Type=Float,Description="Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h">\n+##INFO=<ID=QS,Number=R,Type=Float,Description="Auxiliary tag used for calling">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">\n+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of high-quality non-reference bases">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tHG00100\tHG00101\tHG00102\n+17\t1\t.\tA\t<X>\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t2\t.\tA\t<X>\t0\t.\tDP=11;I16=11,0,0,0,439,17587,0,0,319,9251,0,0,226,5030,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t3\t.\tG\t<X>\t0\t.\tDP=11;I16=11,0,0,0,431,16971,0,0,319,9251,0,0,229,5111,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t4\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,423,16417,0,0,319,9251,0,0,232,5202,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,71:3:0\n+17\t5\t.\tT\t<X>\t0\t.\tDP=11;I16=11,0,0,0,450,18520,0,0,319,9251,0,0,234,5252,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t6\t.\tT\t<X>\t0\t.\tDP=11;I16=11,0,0,0,403,14847,0,0,319,9251,0,0,236,5310,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t7\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,446,18114,0,0,319,9251,0,0,237,5327,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t8\t.\tT\t<X>\t0\t.\tDP=11;I16=11,0,0,0,465,19677,0,0,319,9251,0,0,238,5354,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t9\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,447,18205,0,0,319,9251,0,0,239,5391,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t10\t.\tA\t<X>\t0\t.\tDP=11;I16=11,0,0,0,426,16756,0,0,319,9251,0,0,240,5438,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,69:3:0\n+17\t11\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,413,15603,0,0,319,9251,0,0,241,5495,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t12\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,438,17506,0,0,319,9251,0,0,242,5562,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t13\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,437,17463,0,0,319,9251,0,0,243,5639,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t14\t.\tT\t<X>\t0\t.\tDP=11;I16=11,0,0,0,453,18715,0,0,319,9251,0,0,242,5628,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t15\t.\tG\t'..b':DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4072\t.\tG\t<X>\t0\t.\tDP=5;I16=2,2,0,0,138,4974,0,0,194,9938,0,0,55,987,0,0;QS=1,0;MQSB=0;MQ0F=0\tPL:DP:DV\t0,12,122:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4073\t.\tG\t<X>\t0\t.\tDP=5;I16=3,2,0,0,156,5082,0,0,254,13538,0,0,60,994,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,136:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4074\t.\tC\t<X>\t0\t.\tDP=5;I16=3,2,0,0,160,5602,0,0,254,13538,0,0,56,928,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4075\t.\tT\t<X>\t0\t.\tDP=5;I16=3,2,0,0,187,7069,0,0,254,13538,0,0,52,870,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,155:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4076\t.\tG\t<X>\t0\t.\tDP=5;I16=3,2,0,0,174,6298,0,0,254,13538,0,0,48,820,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,149:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4077\t.\tA\t<X>\t0\t.\tDP=4;I16=3,1,0,0,138,4810,0,0,194,9938,0,0,44,728,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,121:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4078\t.\tT\t<X>\t0\t.\tDP=4;I16=3,1,0,0,143,5173,0,0,194,9938,0,0,40,644,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,124:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4079\t.\tA\t<X>\t0\t.\tDP=4;I16=3,1,0,0,121,3847,0,0,194,9938,0,0,36,568,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,107:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4080\t.\tT\t<X>\t0\t.\tDP=4;I16=3,0,0,0,106,3778,0,0,134,6338,0,0,25,451,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,9,87:3:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4081\t.\tT\t<X>\t0\t.\tDP=4;I16=3,1,0,0,106,2934,0,0,194,9938,0,0,28,440,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,94:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4082\t.\tC\t<X>\t0\t.\tDP=3;I16=2,1,0,0,110,4042,0,0,134,6338,0,0,25,387,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,103:3:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4083\t.\tC\t<X>\t0\t.\tDP=3;I16=2,1,0,0,104,3648,0,0,134,6338,0,0,22,340,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,98:3:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4084\t.\tA\t<X>\t0\t.\tDP=2;I16=1,1,0,0,78,3050,0,0,97,4969,0,0,20,298,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,74:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4085\t.\tC\t<X>\t0\t.\tDP=2;I16=1,1,0,0,62,1940,0,0,97,4969,0,0,18,260,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,62:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4086\t.\tG\t<X>\t0\t.\tDP=2;I16=1,1,0,0,56,1640,0,0,97,4969,0,0,16,226,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,56:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4087\t.\tC\t<X>\t0\t.\tDP=2;I16=1,1,0,0,69,2405,0,0,97,4969,0,0,14,196,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,68:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4088\t.\tA\t<X>\t0\t.\tDP=1;I16=1,0,0,0,39,1521,0,0,37,1369,0,0,13,169,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4089\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,12,144,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4090\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,33,1089,0,0,37,1369,0,0,11,121,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,33:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4091\t.\tT\t<X>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,10,100,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4092\t.\tG\t<X>\t0\t.\tDP=1;I16=1,0,0,0,37,1369,0,0,37,1369,0,0,9,81,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4093\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,8,64,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4094\t.\tT\t<X>\t0\t.\tDP=1;I16=1,0,0,0,40,1600,0,0,37,1369,0,0,7,49,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4095\t.\tA\t<X>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,6,36,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4096\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,5,25,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4097\t.\tA\t<X>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,4,16,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4098\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,31,961,0,0,37,1369,0,0,3,9,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,31:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4099\t.\tT\t<X>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,2,4,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4100\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,27,729,0,0,37,1369,0,0,1,1,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,27:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4101\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,26,676,0,0,37,1369,0,0,0,0,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,26:1:0\t0,0,0:0:0\t0,0,0:0:0\n'
b
diff -r 000000000000 -r 2fea169065ec test-data/mpileup.ploidy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.ploidy Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,4 @@
+X 1 1000 M 1
+X 3104 5000 M 1
+* * * M 2
+* * * F 2
b
diff -r 000000000000 -r 2fea169065ec test-data/mpileup.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.samples Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,3 @@
+HG00100 F
+HG00101 M
+HG00102 F
b
diff -r 000000000000 -r 2fea169065ec test-data/mpileup.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.tab Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,11 @@
+17 1 A,G,T
+17 2 A,T,G
+17 3 A,C
+17 4 A,C,T,G
+17 5 A,G,T
+17 6 A,T,G
+17 7 A,T,G,C
+17 828 T,C
+17 1665 T,C
+17 2220 G,C
+17 2564 A,AG
b
diff -r 000000000000 -r 2fea169065ec test-data/mpileup.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mpileup.vcf Wed Jul 06 07:08:15 2016 -0400
b
b'@@ -0,0 +1,4127 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##samtoolsVersion=1.1-19-g6b249e2+htslib-1.1-74-g845c515\n+##samtoolsCommand=samtools mpileup -uvDV -b xxx//mpileup.bam.list -f xxx//mpileup.ref.fa.gz\n+##reference=file://xxx//mpileup.ref.fa.gz\n+##contig=<ID=17,length=81195210>\n+##ALT=<ID=X,Description="Represents allele(s) other than observed.">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=IDV,Number=1,Type=Integer,Description="Maximum number of reads supporting an indel">\n+##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of reads supporting an indel">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)",Version="3">\n+##INFO=<ID=RPB,Number=1,Type=Float,Description="Mann-Whitney U test of Read Position Bias (bigger is better)">\n+##INFO=<ID=MQB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality Bias (bigger is better)">\n+##INFO=<ID=BQB,Number=1,Type=Float,Description="Mann-Whitney U test of Base Quality Bias (bigger is better)">\n+##INFO=<ID=MQSB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)">\n+##INFO=<ID=SGB,Number=1,Type=Float,Description="Segregation based metric.">\n+##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)">\n+##INFO=<ID=I16,Number=16,Type=Float,Description="Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h">\n+##INFO=<ID=QS,Number=R,Type=Float,Description="Auxiliary tag used for calling">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">\n+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of high-quality non-reference bases">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tHG00100\tHG00101\tHG00102\n+17\t1\t.\tA\tG,X,T,C\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,1,0,1,1;MQ0F=0\tPL:DP:DV\t0,0,0,0,0,0,.,.,.,.,.,.,.,.,.:5:0\t.:3:0\t.:3:0\n+17\t2\t.\tA\tG,X,T,C\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,1,0,1,1;MQ0F=0\tPL:DP:DV\t0,0,0,0,0,0,.,.,.,.,.,.,.,.,.:5:0\t.:3:0\t.:3:0\n+17\t3\t.\tA\tG,X,T,C\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,1,0,1,1;MQ0F=0\tPL:DP:DV\t0,0,0,0,0,0,.,.,.,.,.,.,.,.,.:5:0\t.:3:0\t.:3:0\n+17\t4\t.\tA\tG,X,T,C\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,1,0,1,1;MQ0F=0\tPL:DP:DV\t1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:5:0\t.:3:0\t.:3:0\n+17\t5\t.\tA\tX,G\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,0,1;MQ0F=0\tPL:DP:DV\t0,0,0,0,0,0:5:0\t.:3:0\t.:3:0\n+17\t6\t.\tA\tX,G\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,0,1;MQ0F=0\tPL:DP:DV\t0,0,0,0,0,0:5:0\t.:3:0\t.:3:0\n+17\t7\t.\tA\tX,G\t0\t.\tDP=11;I16=11,0,0,0,452,18594,0,0,319,9251,0,0,223,4959,0,0;QS=1,0,1;MQ0F=0\tPL:DP:DV\t1,2,3,4,5,6:5:0\t.:3:0\t.:3:0\n+17\t8\t.\tT\t<X>\t0\t.\tDP=11;I16=11,0,0,0,465,19677,0,0,319,9251,0,0,238,5354,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t9\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,447,18205,0,0,319,9251,0,0,239,5391,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t10\t.\tA\t<X>\t0\t.\tDP=11;I16=11,0,0,0,426,16756,0,0,319,9251,0,0,240,5438,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,69:3:0\n+17\t11\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,413,15603,0,0,319,9251,0,0,241,5495,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t12\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,438,17506,0,0,319,9251,0,0,242,5562,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t13\t.\tC\t<X>\t0\t.\tDP=11;I16=11,0,0,0,437,17463,0,0,319,9251,0,0,243,5639,0,0;QS=3,0;MQ0F=0\tPL:DP:DV\t0,15,100:5:0\t0,9,72:3:0\t0,9,72:3:0\n+17\t14\t.\tT\t<X>\t0\t.\tDP=11;I16=11,0,0,0,453,18715,0,0,319,9251,0,0,242,562'..b':DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4072\t.\tG\t<X>\t0\t.\tDP=5;I16=2,2,0,0,138,4974,0,0,194,9938,0,0,55,987,0,0;QS=1,0;MQSB=0;MQ0F=0\tPL:DP:DV\t0,12,122:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4073\t.\tG\t<X>\t0\t.\tDP=5;I16=3,2,0,0,156,5082,0,0,254,13538,0,0,60,994,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,136:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4074\t.\tC\t<X>\t0\t.\tDP=5;I16=3,2,0,0,160,5602,0,0,254,13538,0,0,56,928,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,142:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4075\t.\tT\t<X>\t0\t.\tDP=5;I16=3,2,0,0,187,7069,0,0,254,13538,0,0,52,870,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,155:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4076\t.\tG\t<X>\t0\t.\tDP=5;I16=3,2,0,0,174,6298,0,0,254,13538,0,0,48,820,0,0;QS=1,0;MQSB=0.333333;MQ0F=0\tPL:DP:DV\t0,15,149:5:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4077\t.\tA\t<X>\t0\t.\tDP=4;I16=3,1,0,0,138,4810,0,0,194,9938,0,0,44,728,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,121:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4078\t.\tT\t<X>\t0\t.\tDP=4;I16=3,1,0,0,143,5173,0,0,194,9938,0,0,40,644,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,124:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4079\t.\tA\t<X>\t0\t.\tDP=4;I16=3,1,0,0,121,3847,0,0,194,9938,0,0,36,568,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,107:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4080\t.\tT\t<X>\t0\t.\tDP=4;I16=3,0,0,0,106,3778,0,0,134,6338,0,0,25,451,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,9,87:3:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4081\t.\tT\t<X>\t0\t.\tDP=4;I16=3,1,0,0,106,2934,0,0,194,9938,0,0,28,440,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,12,94:4:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4082\t.\tC\t<X>\t0\t.\tDP=3;I16=2,1,0,0,110,4042,0,0,134,6338,0,0,25,387,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,103:3:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4083\t.\tC\t<X>\t0\t.\tDP=3;I16=2,1,0,0,104,3648,0,0,134,6338,0,0,22,340,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,9,98:3:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4084\t.\tA\t<X>\t0\t.\tDP=2;I16=1,1,0,0,78,3050,0,0,97,4969,0,0,20,298,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,74:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4085\t.\tC\t<X>\t0\t.\tDP=2;I16=1,1,0,0,62,1940,0,0,97,4969,0,0,18,260,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,62:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4086\t.\tG\t<X>\t0\t.\tDP=2;I16=1,1,0,0,56,1640,0,0,97,4969,0,0,16,226,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,56:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4087\t.\tC\t<X>\t0\t.\tDP=2;I16=1,1,0,0,69,2405,0,0,97,4969,0,0,14,196,0,0;QS=1,0;MQSB=1;MQ0F=0\tPL:DP:DV\t0,6,68:2:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4088\t.\tA\t<X>\t0\t.\tDP=1;I16=1,0,0,0,39,1521,0,0,37,1369,0,0,13,169,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4089\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,12,144,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4090\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,33,1089,0,0,37,1369,0,0,11,121,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,33:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4091\t.\tT\t<X>\t0\t.\tDP=1;I16=1,0,0,0,36,1296,0,0,37,1369,0,0,10,100,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,36:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4092\t.\tG\t<X>\t0\t.\tDP=1;I16=1,0,0,0,37,1369,0,0,37,1369,0,0,9,81,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4093\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,8,64,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4094\t.\tT\t<X>\t0\t.\tDP=1;I16=1,0,0,0,40,1600,0,0,37,1369,0,0,7,49,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,37:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4095\t.\tA\t<X>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,6,36,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4096\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,5,25,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4097\t.\tA\t<X>\t0\t.\tDP=1;I16=1,0,0,0,35,1225,0,0,37,1369,0,0,4,16,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,35:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4098\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,31,961,0,0,37,1369,0,0,3,9,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,31:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4099\t.\tT\t<X>\t0\t.\tDP=1;I16=1,0,0,0,32,1024,0,0,37,1369,0,0,2,4,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,32:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4100\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,27,729,0,0,37,1369,0,0,1,1,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,27:1:0\t0,0,0:0:0\t0,0,0:0:0\n+17\t4101\t.\tC\t<X>\t0\t.\tDP=1;I16=1,0,0,0,26,676,0,0,37,1369,0,0,0,0,0,0;QS=1,0;MQ0F=0\tPL:DP:DV\t0,3,26:1:0\t0,0,0:0:0\t0,0,0:0:0\n'
b
diff -r 000000000000 -r 2fea169065ec test-data/norm.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/norm.fa Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,26 @@
+>20 20:1339000-1339300
+AGGATGGGGCTCATTAATAGAGCTCCACTTGTCTCCAGAATCACTGGTGAGGAAGGGGAG
+TGTTGCCCCCACATTCGTGCACAGCAGGGATGGTTCACCGAACTCCACACCAGTCTCTGC
+AGAGCCTGTTGGGGAGAGGAGGGCTGTGGTTTCTTTGATGGTGTTCACCTGGAGTAGAGC
+AAGTATTGTCAAAAGGGTCATCCTCGGAGGTTGCAGTGAGCCGAGATCGCACCATTGCAC
+TGCAGCCTGGGAGACAGAGCAAGACTCCATCTCAAAAAAAAAAAAAAAAAAAAAGGCCAT
+C
+>1 1:10143-10443
+CTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACC
+CTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAAACCCTA
+ACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCAACCCCAACCCCAACCCCA
+ACCCTAACCCCTAACCCTAACCCTAACCCTACCCTAACCCTAACCCTAACCCTAACCCTA
+ACCCTAACCCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC
+>2 1:1382388-1382602
+GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTA
+TTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTT
+GAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTG
+CAAGCTCCACCTCCTGGGTTCACGCCATTCTCCTG
+>3 madeup
+ACTGGACACGTGGACACACACACACACACACACACACACACAGTCAAACCACCTACCAGA
+>4 20:8917026-8917085
+TCCCCTCTTGACCTCTCTCTATTTTTTTTTTTTTTTCTGAGATGGATTTTTGCTCTTGTT
+>5 20:18724313-18724343
+GTCTCAAAAAAAAAAAAAAAAAAAAGAAAAG
+>21
+TTTATTATTATTATTATTAAATTGAATTTATTTAGTGTACATACATTCATGTGTATTGTG
b
diff -r 000000000000 -r 2fea169065ec test-data/norm.merge.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/norm.merge.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,61 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=20,length=2147483647>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test String in FORMAT">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
+##FILTER=<ID=FAIL1,Description="Failed filter 1">
+##FILTER=<ID=FAIL2,Description="Failed filter 2">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
+1 105 . TAAACCCTAAA TAA 999 PASS INDEL;AN=4;AC=2;DP=19;ISTR=SomeString;XRF=1e+06,2e+06;XRI=1111,2222;XRS=AAA,BBB;XAF=1e+06;XAI=1111;XAS=AAA;XGF=1e+06,2e+06,3e+06;XGI=1111,2222,3333;XGS=A,B,C GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 1/0:1,2,3:1:1e+06,2e+06:1111,2222:AAAA,BBB:1e+06:1111:A:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC 1/0:1,2,3:1:1e+06,2e+06:1111,2222:AAAA,BBB:1e+06:1111:A:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC
+1 105 . TAAACCCTAAA TAACCCTAAA 999 PASS INDEL;AN=4;AC=2;DP=19;ISTR=SomeString;XRF=1e+06,500000;XRI=1111,5555;XRS=AAA,DDD;XAF=500000;XAI=5555;XAS=DDD;XGF=1e+06,500000,9e+09;XGI=1111,5555,9999;XGS=A,E,F GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 0/1:1,4,6:1:1e+06,500000:1111,5555:AAAA,CC:500000:5555:BB:1e+06,500000,9e+09:1111,5555,9999:A,EEEE,FFFFF 0/1:1,4,6:1:1e+06,500000:1111,5555:AAAA,CC:500000:5555:BB:1e+06,500000,9e+09:1111,5555,9999:A,EEEE,FFFFF
+2 1 . GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTATTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTGCAAGCTCCACCT ACGT 999 PASS INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
+2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+2 114 . TC TTCC 999 FAIL1 INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
+2 114 . TC TTC 999 PASS INDEL;AN=4;AC=2 GT:DP 0/1:1 0/1:1
+2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+20 3 . G CT 999 PASS INDEL;AN=4;AC=2 GT 0/1 0/1
+20 3 . GATG GACT 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0
+20 5 . TGGG TAC . PASS INDEL;AN=4;AC=2 GT:PL:DP 1/0:1,2,3:1 1/0:1,2,3:1
+20 5 id0001 TGGG TG . PASS INDEL;AN=4;AC=2 GT:PL:DP 0/1:1,4,6:1 0/1:1,4,6:1
+20 5 id0002 TGGG TGGGG . PASS INDEL;AN=4;AC=0 GT:PL:DP 0/0:1,7,10:1 0/0:1,7,10:1
+20 5 . TGGG AC . PASS INDEL;AN=4;AC=0 GT:PL:DP 0/0:1,11,15:1 0/0:1,11,15:1
+20 59 id0003 AG . 999 PASS AN=4 GT:PL:DP 0/0:0:4 0/0:0:4
+20 80 . CACAG CACAT 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 81 . A C 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG ACACCG 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG AAAAAA 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2 GT:PL:DP 1/0:0,3,5:1 1/0:0,3,5:1
+20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2 GT:PL:DP 0/1:0,3,5:1 0/1:0,3,5:1
+20 274 . AAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 275 . A C 0 FAIL1 INDEL;AN=2;AC=0 GT:PL:DP:FGF:FGI:FGS:FSTR 0:0,0:0:1e+06,2e+06:1111,2222:A,BB:WORD 0:0,0:0:1e+06,2e+06:1111,2222:A,BB:WORD
+20 275 . A G 0 FAIL2 INDEL;AN=2;AC=2 GT:PL:DP:FGF:FGI:FGS:FSTR 1:0,0:0:1e+06,3e+06:1111,3333:A,CCC:WORD 1:0,0:0:1e+06,3e+06:1111,3333:A,CCC:WORD
+20 278 . AAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 300 . A C 998 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 300 . A G 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
b
diff -r 000000000000 -r 2fea169065ec test-data/norm.setref.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/norm.setref.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,45 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=3,length=2147483647>
+##contig=<ID=4,length=2147483647>
+##contig=<ID=5,length=2147483647>
+##contig=<ID=20,length=2147483647>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test String in FORMAT">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
+1 105 . TAACCCTAAA TAAACCCTAAA,TAA 999 PASS INDEL;AN=4;AC=2,2;DP=19 GT 1/2 1/2
+2 101 . . c 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+2 105 . n <DEL> 999 PASS END=112;AN=4;AC=3 GT:DP 0/1:1 1/1:1
+2 115 . t c 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+20 3 . c g 999 PASS INDEL;AN=4;AC=1 GT 0/0 0/1
+20 3 . gact gatg 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0
+20 10 . . . 999 PASS INDEL;AN=4;AC=1 GT 1/0 0/0
+20 275 . g c,a,t,aaa 999 PASS INDEL;AN=2;AC=0,2,0,0 GT 2 2
b
diff -r 000000000000 -r 2fea169065ec test-data/norm.split.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/norm.split.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,53 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=20,length=2147483647>
+##contig=<ID=21,length=2147483647>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test String in FORMAT">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
+1 105 . TAAACCCTAAA TAA,TAACCCTAAA 999 PASS INDEL;AN=4;AC=2,2;DP=19;ISTR=SomeString;XRF=1e+06,2e+06,500000;XRI=1111,2222,5555;XRS=AAA,BBB,DDD;XAF=1e+06,500000;XAI=1111,5555;XAS=AAA,DDD;XGF=1e+06,2e+06,3e+06,500000,.,9e+09;XGI=1111,2222,3333,5555,.,9999;XGS=A,B,C,E,.,F GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF
+2 1 . GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTATTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTGCAAGCTCCACCT ACGT 999 PASS INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
+2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+2 114 . TC TTCC,TTC 999 PASS INDEL;AN=4;AC=2,2 GT:DP 1/2:1 1/2:1
+2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+20 3 . G CT 999 PASS INDEL;AN=4;AC=2 GT 0/1 0/1
+20 3 . GATG GACT 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0
+20 5 . TGGG TAC,TG,TGGGG,AC . PASS INDEL;AN=4;AC=2,2,0,0 GT:PL:DP 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1
+20 59 . AG . 999 PASS AN=4 GT:PL:DP 0/0:0:4 0/0:0:4
+20 80 . CACAG CACAT 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 81 . A C 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG ACACCG 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG AAAAAA 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAA,CAAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2,2 GT:PL:DP 1/2:0,3,5,3,5,5:1 1/2:0,3,5,3,5,5:1
+20 274 . AAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 275 . A C,G 999 PASS INDEL;AN=2;AC=0,2 GT:PL:DP:FGF:FGI:FGS:FSTR 2:0,0,0:0:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC:WORD 2:0,0,0:0:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC:WORD
+20 278 . AAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 300 . T C,G 999 PASS INDEL;AN=0;AC=0,0 GT:PL:DP ./.:0,0,0,0,0,0:0 ./.:0,0,0,0,0,0:0
+21 1 id TTTA TTTATTATTA,TTTATTATTATTATTATTATTA,T,TTATTATTA 999 PASS INDEL;AN=0;AC=0,0,0,0 GT:DP ./.:0 ./.:0
b
diff -r 000000000000 -r 2fea169065ec test-data/norm.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/norm.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,57 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=3,length=2147483647>
+##contig=<ID=4,length=2147483647>
+##contig=<ID=5,length=2147483647>
+##contig=<ID=20,length=2147483647>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test String in FORMAT">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
+1 105 . TAAACCCTAAA TAA,TAACCCTAAA 999 PASS INDEL;AN=4;AC=2,2;DP=19;ISTR=SomeString;XRF=1e+06,2e+06,500000;XRI=1111,2222,5555;XRS=AAA,BBB,DDD;XAF=1e+06,500000;XAI=1111,5555;XAS=AAA,DDD;XGF=1e+06,2e+06,3e+06,500000,.,9e+09;XGI=1111,2222,3333,5555,.,9999;XGS=A,B,C,E,.,F GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF
+2 1 . GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTATTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTGCAAGCTCCACCT ACGT 999 PASS INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
+2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+2 114 . TC TTCC,TTC 999 PASS INDEL;AN=4;AC=2,2 GT:DP 1/2:1 1/2:1
+2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+20 3 . G CT 999 PASS INDEL;AN=4;AC=2 GT 0/1 0/1
+20 3 . GATG GACT 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0
+20 5 . TGGG TAC,TG,TGGGG,AC . PASS INDEL;AN=4;AC=2,2,0,0 GT:PL:DP 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1
+20 59 . AG . 999 PASS AN=4 GT:PL:DP 0/0:0:4 0/0:0:4
+20 80 . CACAG CACAT 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 81 . A C 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG ACACCG 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG AAAAAA 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAA,CAAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2,2 GT:PL:DP 1/2:0,3,5,3,5,5:1 1/2:0,3,5,3,5,5:1
+20 274 . AAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 275 . A C,G 999 PASS INDEL;AN=2;AC=0,2 GT:PL:DP:FGF:FGI:FGS:FSTR 2:0,0,0:0:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC:WORD 2:0,0,0:0:1e+06,2e+06,3e+06:1111,2222,3333:A,BB,CCC:WORD
+20 278 . AAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+3 10 . GTGGAC GTGGACACAC,GTGGACAC,GTGGACACACAC,GTGG,GTGGACACACACAC,ATGGACACACAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+3 15 . CACA CAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+4 21 . ATTTTTTTTTTTTTTTC ATTTTTTTTTTTTTTC,ATTTTTTTTTTTTTTTT,ATTTTTTTTTTTTTTTTC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+5 22 . A AGA 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
b
diff -r 000000000000 -r 2fea169065ec test-data/plugin1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/plugin1.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,36 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##test=<ID=4,IE=5>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 . C T 59.2 PASS . GT:GQ ./.:245 ./.:245
+1 3000151 . C T 59.2 PASS . GT:DP:GQ ./.:32:245 ./.:32:245
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;INDEL;STR=test GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 2:0,1:409:35:-20,-5,-20
+1 3106154 . CAAA C 342 PASS . GT:GQ:DP ./.:245:32 ./.:245:32
+1 3106154 . C CT 59.2 PASS . GT:GQ:DP ./.:245:32 ./.:245:32
+1 3157410 . GA G 90.6 q10 . GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA G 60.2 PASS . GT:GQ:DP ./.:212:22 ./.:212:22
+1 3177144 . G T 45 PASS . GT:GQ:DP ./.:150:30 ./.:150:30
+1 3177144 . G . 45 PASS . GT:GQ:DP ./.:150:30 ./.:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS . GT:GQ:DP ./.:12:10 ./.:12:10
+2 3199812 . G GTT,GT 82.7 PASS . GT:GQ:DP ./.:322:26 ./.:322:26
+3 3212016 . CTT C,CT 79 PASS . GT:GQ:DP ./.:91:26 ./.:91:26
+4 3258448 . TACACACAC T 59.9 PASS . GT:GQ:DP ./.:325:31 ./.:325:31
b
diff -r 000000000000 -r 2fea169065ec test-data/query.filter.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query.filter.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,20 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 . C T 59.2 PASS . GT 0 0
+1 3000151 . C T 59.2 PASS . GT 1 0
+1 3000152 . C T 59.2 PASS . GT 0 1
+1 3000153 . C T 59.2 PASS . GT 1 1
b
diff -r 000000000000 -r 2fea169065ec test-data/query.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query.out Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,13 @@
+1 3062915 GTTT G 1,2,3,4 4 0/1 GTTT/G 0/1 GTTT/G
+1 3062915 G C,T 1,2,3,4 4 0/1 G/C 0/2 G/T
+1 3062915 GTT G 1,2,3,4 4 0/1 GTT/G 0/1 GTT/G
+1 3106154 CAAA C . 0 . . ./. ./.
+1 3106154 C CT . 4 0/1 C/CT 0/1 C/CT
+1 3157410 G T . 4 1/1 T/T 1/1 T/T
+1 3162006 GAA G . 4 0/1 GAA/G 0/1 GAA/G
+1 3177144 G . . 4 0/0 G/G 0/0 G/G
+1 3184885 TAAAA TA,T . 4 1/2 TA/T 1/2 TA/T
+2 3199812 G GTT,GT . 4 1/2 GTT/GT 1/2 GTT/GT
+3 3212016 CTT C,CT . 4 1/2 C/CT 1/2 C/CT
+4 3258448 TACACACAC T . 4 0/1 TACACACAC/T 0/1 TACACACAC/T
+4 3258449 A C . 4 1/1 C/C 0/1 A/C
b
diff -r 000000000000 -r 2fea169065ec test-data/query.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/query.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,33 @@
+##fileformat=VCFv4.1
+##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
+##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=test,Description="Testing filter">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##contig=<ID=2,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT C D
+1 3062915 id3D GTTT G 48.7 q10 DP4=1,2,3,4;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:109:25:-10,-5,-20
+1 3062915 idSNP G C,T 419 test TEST=5;DP4=1,2,3,4;AN=4;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 0/2:0,1:109:35:-10,-5,-20,-20,-5,-20
+1 3062915 id2D GTT G 999 q10 DP4=1,2,3,4;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:109:25:-10,-5,-20
+1 3106154 . CAAA C 72.6 PASS AN=0;AC=0 GT:GQ:DP .:245:32 ./.:145:22
+1 3106154 . C CT 459 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:145:22
+1 3157410 . G T 46.7 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:11:11
+1 3162006 . GAA G 206 PASS AN=4;AC=2 GT:GQ:DP 0/1:212:22 0/1:112:12
+1 3177144 . G . 364 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:20
+1 3184885 . TAAAA TA,T 8.42 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:22:20 1/2:12:10
+2 3199812 . G GTT,GT 291 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:122:16
+3 3212016 . CTT C,CT 52.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:11:16
+4 3258448 . TACACACAC T 123 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:125:11
+4 3258449 . A C 123 PASS AN=4;AC=3 GT:GQ:DP 1/1:325:31 0/1:125:11
b
diff -r 000000000000 -r 2fea169065ec test-data/reheader.hdr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reheader.hdr Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,19 @@
+##fileformat=VCFv4.2
+##INFO=<ID=DP2,Number=2,Type=Integer,Description="Depth">
+##FILTER=<ID=Test,Description="Test filter">
+##FORMAT=<ID=DP2,Number=2,Type=Integer,Description="Depth">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=12,length=123456789>
+##contig=<ID=20,length=2147483647>
+##contig=<ID=5,length=2147483647>
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=3,length=2147483647>
+##contig=<ID=4,length=2147483647>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AAA0001 BBB0002
b
diff -r 000000000000 -r 2fea169065ec test-data/reheader.samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reheader.samples Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,2 @@
+AAA
+BBB
b
diff -r 000000000000 -r 2fea169065ec test-data/reheader.samples2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reheader.samples2 Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,2 @@
+XY00002 BBB
+XY00001 AAA
b
diff -r 000000000000 -r 2fea169065ec test-data/reheader.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reheader.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,54 @@
+##fileformat=VCFv4.1
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FSTR,Number=1,Type=String,Description="Test String in FORMAT">
+##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth">
+##contig=<ID=1,length=2147483647>
+##contig=<ID=2,length=2147483647>
+##contig=<ID=3,length=2147483647>
+##contig=<ID=4,length=2147483647>
+##contig=<ID=5,length=2147483647>
+##contig=<ID=20,length=2147483647>
+##FILTER=<ID=Test,Description="Test Filter">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
+2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+2 114 . TC TTCC,TTC 999 PASS INDEL;AN=4;AC=2,2 GT:DP 1/2:1 1/2:1
+2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
+20 3 . G CT 999 PASS INDEL;AN=4;AC=2 GT 0/1 0/1
+20 3 . GATG GACT 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0
+20 5 . TGGG TAC,TG,TGGGG,AC . PASS INDEL;AN=4;AC=2,2,0,0 GT:PL:DP 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1
+20 59 . AG . 999 PASS AN=4 GT:PL:DP 0/0:0:4 0/0:0:4
+20 80 . CACAG CACAT 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 81 . A C 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG ACACCG 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 95 . TCACCG AAAAAA 999 Test AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13
+20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAA,CAAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2,2 GT:PL:DP 1/2:0,3,5,3,5,5:1 1/2:0,3,5,3,5,5:1
+20 274 . AAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+20 278 . AAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0
+3 10 . GTGGAC GTGGACACAC,GTGGACAC,GTGGACACACAC,GTGG,GTGGACACACACAC,ATGGACACACAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+3 15 . CACA CAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+4 21 . ATTTTTTTTTTTTTTTC ATTTTTTTTTTTTTTC,ATTTTTTTTTTTTTTTT,ATTTTTTTTTTTTTTTTC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
+5 22 . A AGA 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0
b
diff -r 000000000000 -r 2fea169065ec test-data/stats.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats.a.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.2
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C
+1 1000 . G A . PASS . GT 0/0 0/1 1/1
+1 1001 . G A . PASS . GT 0/0 0/1 1/1
+1 1002 . G A . PASS . GT 0/0 0/1 1/1
b
diff -r 000000000000 -r 2fea169065ec test-data/stats.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/stats.b.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.2
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##contig=<ID=1,assembly=b37,length=249250621>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C
+1 1000 . G A . PASS . GT 0/1 0/0 0/0
+1 1001 . G A . PASS . GT 0/0 0/0 0/0
+1 1002 . G A . PASS . GT 0/0 0/1 0/0
b
diff -r 000000000000 -r 2fea169065ec test-data/vcf2sex.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcf2sex.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,24 @@
+##fileformat=VCFv4.2
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##contig=<ID=X,assembly=b37,length=249250621>
+##contig=<ID=Y,assembly=b37,length=249250621>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT MALE FEMALE
+X 1 . C A . PASS . GT 1 0/1
+X 12000 . C A . PASS . GT 0 0/1
+X 24000 . C A . PASS . GT 1 0/1
+X 36000 . C A . PASS . GT 0 0/1
+X 48000 . C A . PASS . GT 1 0/1
+X 60000 . C A . PASS . GT 0 0/1
+X 100000 . C A . PASS . GT 0/1 0/1
+X 2699521 . C A . PASS . GT 0 0/1
+X 33145825 . C A . PASS . GT 1 0/1
+X 63592129 . C A . PASS . GT 1 0/1
+X 94038433 . C A . PASS . GT 0 0/1
+X 124484737 . C A . PASS . GT 0 0/1
+X 154931043 . C A . PASS . GT 0/1 0/1
+Y 1 . C A . PASS . GT 0 .
+Y 11874713 . C A . PASS . GT 0 .
+Y 23749426 . C A . PASS . GT 0 .
+Y 35624139 . C A . PASS . GT 0 .
+Y 47498852 . C A . PASS . GT 0 .
+Y 59373565 . C A . PASS . GT 0 .
b
diff -r 000000000000 -r 2fea169065ec test-data/view.GL.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/view.GL.vcf Wed Jul 06 07:08:15 2016 -0400
[
@@ -0,0 +1,29 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##reference=file:///seq/references/1000Genomes-NCBI37.fasta
+##contig=<ID=11,length=135006516>
+##contig=<ID=20,length=63025520>
+##contig=<ID=X,length=155270560>
+##contig=<ID=Y,length=59373566>
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="List of Phred-scaled genotype likelihoods">
+##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]">
+##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
+##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
+11 2343543 . A . 999 PASS . GL 0,-25.5,-25.5 0,-25.5,-25.5 0,-25.5,-25.5
+11 5464562 . C T 999 PASS . GL 0,0,0 0,0,0 0,0,0
+20 76962 rs6111385 T C 999 PASS . GL -25.5,0,-25.5 -25.5,-25.5,0 -25.5,-25.5,0
+20 126310 . ACC A 999 StrandBias;EndDistBias . GL -25.5,0,-13.2 -25.5,0,-13.9 -25.5,-21.3,0
+20 138125 rs2298108 G T 999 PASS . GL -13.5,0,-16.3 -14,0,-25.5 -25.5,-19.9,0
+20 138148 rs2298109 C T 999 PASS . GL -19.5,0,-25.5 -19.2,0,-25.5 -25.5,-23.5,0
+20 271225 . T TTTA,TA 999 StrandBias . GL -15.1,-5.3,-20.3,0,-5.2,-15.9 -25.5,0,-21.3,-25.5,-25.5,-25.5 -25.5,-25.5,-25.5,-25.5,0,-24.1
+20 304568 . C T 999 PASS . GL -9.5,0,-25.5 -19.2,0,-25.5 -25.5,-9.5,0
+20 326891 . A AC 999 PASS . GL -25.5,0,-13.2 -25.5,0,-13.9 .,.,.
+X 2928329 rs62584840 C T 999 PASS . GL 0,-5.6 0,-8.1 -7.3,0,-1.9
+X 2933066 rs61746890 G C 999 PASS . GL 0,-25.5 0,-25.5 -25.5,-25.5,-25.5
+X 2942109 rs5939407 T C 999 PASS . GL 0,-25.5 -25.5,0 -25.5,-15.7,0
+X 3048719 . T C 999 PASS . GL 0,-25.5 -25.5,0 -25.5,0,-15.7
+Y 8657215 . C A 999 PASS . GL 0,-25.5 -25.5,0 .
+Y 10011673 rs78249411 G A 999 MinAB . GL -12.6,-10.1 -9.5,0 .
b
diff -r 000000000000 -r 2fea169065ec test-data/view.bcf
b
Binary file test-data/view.bcf has changed
b
diff -r 000000000000 -r 2fea169065ec test-data/view.bcf.csi
b
Binary file test-data/view.bcf.csi has changed
b
diff -r 000000000000 -r 2fea169065ec test-data/view.filter.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/view.filter.vcf Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,42 @@
+##fileformat=VCFv4.2
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=STR,Number=A,Type=String,Description="Testing string and Number=A in INFO">
+##INFO=<ID=TXT0,Number=1,Type=String,Description="Testing in INFO">
+##INFO=<ID=TXT,Number=.,Type=String,Description="Testing in INFO">
+##INFO=<ID=XRF,Number=R,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAF,Number=A,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGF,Number=G,Type=Float,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRI,Number=R,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAI,Number=A,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGI,Number=G,Type=Integer,Description="Test Number=AGR in INFO">
+##INFO=<ID=XRS,Number=R,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XAS,Number=A,Type=String,Description="Test Number=AGR in INFO">
+##INFO=<ID=XGS,Number=G,Type=String,Description="Test Number=AGR in INFO">
+##SAMPLE=<ID=NORMAL,SampleName=B,Description="Less-than (\"<\") and greater-than (\">\") quoting nonsense where double brackets would do just fine",softwareName=<Nonsense,Software>,softwareVer=<119,65>,softwareParam=<.>,MetadataResource=http://somewhere.com/path>
+##INFO=<ID=CIGAR,Number=A,Type=String,Description="test">
+##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=FGS,Number=G,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGI,Number=G,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FGF,Number=G,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAS,Number=A,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAI,Number=A,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FAF,Number=A,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRS,Number=R,Type=String,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRI,Number=R,Type=Integer,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=FRF,Number=R,Type=Float,Description="Test Number=AGR in FORMAT">
+##FORMAT=<ID=STR,Number=1,Type=String,Description="Test">
+##FILTER=<ID=q20,Description="Mapping quality below 20">
+##contig=<ID=1,assembly=b37,length=249250621>
+##contig=<ID=2,assembly=b37,length=243199373>
+##contig=<ID=3,assembly=b37,length=198022430>
+##contig=<ID=4,assembly=b37,length=191154276>
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3162006 . GAA G,GA 238 PASS DP=19;AN=4;AC=1,1;XRF=1e6,2e6,3e6;XRI=1111,2222,3333;XRS=ABC,DEF,GHI;XAF=1e6,2e6;XAI=1111,2222;XAS=ABC,DEF;XGF=1e6,2e6,3e6,4e6,5e6,6e6;XGI=11,22,33,44,55,66;XGS=ABC,DEF,GHI,JKL,MNO,PQR;TXT=ABC,DEF,GHI GT:GQ:DP:STR 0/1:589:19:XX 0/2:1:1:YY
+1 3162007 . TAGGG CAGGG,CAGGT 238 PASS AO=52101,113;CIGAR=1X4M,1X3M1X;TXT0=text GT:FGS:FGI:FGF:FAS:FAI:FAF:FRS:FRI:FRF 0/1:AAAAAA,BBBBB,CCCC,DDD,EE,F:1,2,3,4,5,6:1e-1,2e-2,3e-3,4e-4,5e-5,6e-6:AAA,B:1,2:1e-1,2e-2:A,BB,CCC:1,2,3:1e-1,2e-2,3e-3 2:AAAAAA,BBB,C:1,2,3:1e-1,2e-2,3e-3:AAA,B:1,2:1e-1,2e-2:A,BB,CCC:1,2,3:1e-1,2e-2,3e-3
b
diff -r 000000000000 -r 2fea169065ec test-data/view.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/view.vcf Wed Jul 06 07:08:15 2016 -0400
[
@@ -0,0 +1,46 @@
+##fileformat=VCFv4.1
+##reference=file:///seq/references/1000Genomes-NCBI37.fasta
+##contig=<ID=11,length=135006516>
+##contig=<ID=20,length=63025520>
+##contig=<ID=X,length=155270560>
+##contig=<ID=Y,length=59373566>
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of reads containing spanning deletions">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest contiguous homopolymer run of variant allele in either direction">
+##INFO=<ID=HWE,Number=1,Type=Float,Description="Hardy-Weinberg equilibrium test (PMID:15789306)">
+##INFO=<ID=ICF,Number=1,Type=Float,Description="Inbreeding coefficient F">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=IS,Number=2,Type=Float,Description="Maximum number of reads supporting an indel and fraction of indel reads">
+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total mapping quality zero reads">
+##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant confidence/quality by depth">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
+##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]">
+##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
+##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
+##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
+11 2343543 . A . 999 PASS DP=100223 GT:PL:DP:GQ 0/0:0,255,255:193:99 0/0:0,255,255:211:99 0/0:0,255,255:182:99
+11 5464562 . C T 999 PASS DP=0 GT:PL:DP:GQ ./.:0,0,0:.:. ./.:0,0,0:.:. ./.:0,0,0:.:.
+20 76962 rs6111385 T C 999 PASS DP4=110138,70822,421911,262673;DP=911531;Dels=0;FS=21.447;HWE=0.491006;ICF=-0.01062;MQ0=1;MQ=46;PV4=2.5e-09,0,0,1;QD=22.31 GT:PL:DP:GQ 0/1:255,0,255:193:99 1/1:255,255,0:211:99 1/1:255,255,0:182:99
+20 126310 . ACC A 999 StrandBias;EndDistBias DP4=125718,95950,113812,80890;DP=461867;HWE=0.24036;ICF=0.01738;INDEL;IS=374,0.937343;MQ=49;PV4=9e-30,1,0,3.8e-13;QD=0.0172;AN=6;AC=4 GT:DP:GQ:PL 0/1:117:99:255,0,132 0/1:111:99:255,0,139 1/1:78:99:255,213,0
+20 138125 rs2298108 G T 999 PASS DP4=174391,20849,82080,4950;DP=286107;Dels=0;FS=3200;HWE=0.199462;ICF=0.01858;MQ0=0;MQ=46;PV4=0,0,0,1;QD=17.22;AN=6;AC=4 GT:PL:DP:GQ 0/1:135,0,163:66:99 0/1:140,0,255:71:99 1/1:255,199,0:66:99
+20 138148 rs2298109 C T 999 PASS DP4=194136,45753,94945,14367;DP=356657;Dels=0;FS=3200;HWE=0.177865;ICF=0.0198;MQ0=0;MQ=47;PV4=0,0,0,1;QD=14.57;AN=6;AC=4 GT:PL:DP:GQ 0/1:195,0,255:87:99 0/1:192,0,255:82:99 1/1:255,235,0:78:99
+20 271225 . T TTTA,TA 999 StrandBias DP4=29281,42401,27887,29245;DP=272732;INDEL;IS=95,0.748031;MQ=47;PV4=0,1,0,1;QD=0.0948;AN=6;AC=2,2 GT:DP:GQ:PL 0/2:33:49:151,53,203,0,52,159 0/1:51:99:255,0,213,255,255,255 1/2:47:99:255,255,255,255,0,241
+20 304568 . C T 999 PASS DP4=16413,4543,945,156;DP=43557;Dels=0;FS=3200;HWE=0.076855;ICF=0.0213;MQ0=0;MQ=50;PV4=0,0,0,1;QD=15.45;AN=6;AC=4 GT:PL:DP:GQ 0|1:95,0,255:90:99 0|1:192,0,255:13:99 1|1:255,95,0:60:99
+20 326891 . A AC 999 PASS DP4=125718,95950,113812,80890;DP=461867;HWE=0.24036;ICF=0.01738;INDEL;IS=374,0.937343;MQ=49;PV4=9e-30,1,0,3.8e-13;QD=0.0172;AN=4;AC=2 GT:DP:GQ:PL 0|1:117:99:255,0,132 0|1:111:99:255,0,139 ./.:.:.:.,.,.
+X 2928329 rs62584840 C T 999 PASS DP4=302,9137,32,1329;DP=11020;Dels=0;FS=13.38;HWE=0.284332;ICF=0.0253;MQ0=0;MQ=49;PV4=0.094,0,0,1;QD=18.61;AN=4;AC=1 GT:PL:DP:GQ 0:0,56:2:73 0:0,81:3:98 0/1:73,0,19:4:30
+X 2933066 rs61746890 G C 999 PASS DP4=69865,100561,461,783;DP=173729;Dels=0;FS=10.833;MQ0=0;MQ=50;PV4=0.005,3.6e-14,0,1;QD=15.33;AN=4;AC=1 GT:PL:DP:GQ 0:0,255:39:99 0:0,255:37:99 0/1:255,255,255:62:99
+X 2942109 rs5939407 T C 999 PASS DP4=23273,27816,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=4;AC=3 GT:PL:DP:GQ 0:0,255:20:99 1:255,0:33:99 1/1:255,157,0:52:99
+X 3048719 . T C 999 PASS DP4=13263,27466,40128,48208;DP=146673;Dels=0;FS=43.639;HWE=0.622715;ICF=-0.01176;MQ0=1;MQ=46;PV4=0.65,1,0,1;QD=14.81;AN=4;AC=3 GT:PL:DP:GQ 0:0,255:20:99 1:255,0:33:99 0|1:255,0,157:52:99
+Y 8657215 . C A 999 PASS DP4=74915,114274,1948,2955;DP=195469;Dels=0;FS=3.181;MQ0=0;MQ=50;PV4=0.86,1,0,1;QD=33.77;AN=2;AC=1 GT:PL:DP:GQ 0:0,255:47:99 1:255,0:64:99 .
+Y 10011673 rs78249411 G A 999 MinAB DP4=47351,30839,178796,279653;DP=550762;Dels=0;FS=41.028;MQ0=37362;MQ=26;PV4=0,0,0,1;QD=17.45;AN=2;AC=2 GT:PL:DP:GQ 1:126,101:146:37 1:95,0:130:99 .
b
diff -r 000000000000 -r 2fea169065ec tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Jul 06 07:08:15 2016 -0400
b
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="bcftools" version="1.3">
+        <repository changeset_revision="43a9aebf3adb" name="package_bcftools_1_3" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="tabix" version="0.2.6">
+        <repository changeset_revision="389d2376b60b" name="package_tabix_0_2_6" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="samtools" version="1.2">
+        <repository changeset_revision="f6ae3ba3f3c1" name="package_samtools_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>