changeset 0:3123ce7acd0e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59
author iuc
date Fri, 11 Jan 2019 17:50:55 -0500
parents
children 162a9d4a3bdc
files gemini_inheritance.xml gemini_macros.xml readme.rst repository_dependencies.xml static/images/gemini_mendel_errors.png test-data/anno.bed test-data/gemini_actionable_mutations_result.tabular test-data/gemini_amend.ped test-data/gemini_amend.vcf test-data/gemini_amend_input.db test-data/gemini_annotate_result.db test-data/gemini_auto_dom_input.db test-data/gemini_auto_rec_input.db test-data/gemini_burden_calpha_template.tabular test-data/gemini_burden_count_highimpact_result.tabular test-data/gemini_burden_count_nonsynonymous_result.tabular test-data/gemini_comphets_input.db test-data/gemini_de_novo_input.db test-data/gemini_fusions_result.tabular test-data/gemini_is_somatic_result.db test-data/gemini_load_input.vcf test-data/gemini_load_result1.db test-data/gemini_load_result2.db test-data/gemini_versioned_databases.loc test-data/gemini_windower_template.tabular test-data/test-cache/gemini-config.yaml test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz.tbi test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz.tbi test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz.tbi test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz.tbi test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz.tbi test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz.tbi test-data/test-cache/gemini/data/cancer_gene_census.20140120.tsv test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz.tbi test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz.tbi test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi test-data/test-cache/gemini/data/detailed_gene_table_v75 test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz.tbi test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz.tbi test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz.tbi test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi test-data/test-cache/gemini/data/hg19.CpG.bed.gz test-data/test-cache/gemini/data/hg19.CpG.bed.gz.tbi test-data/test-cache/gemini/data/hg19.cytoband.bed.gz test-data/test-cache/gemini/data/hg19.cytoband.bed.gz.tbi test-data/test-cache/gemini/data/hg19.dgv.bed.gz test-data/test-cache/gemini/data/hg19.dgv.bed.gz.tbi test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz.tbi test-data/test-cache/gemini/data/hg19.gwas.bed.gz test-data/test-cache/gemini/data/hg19.gwas.bed.gz.tbi test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz.tbi test-data/test-cache/gemini/data/hg19.rmsk.bed.gz test-data/test-cache/gemini/data/hg19.rmsk.bed.gz.tbi test-data/test-cache/gemini/data/hg19.segdup.bed.gz test-data/test-cache/gemini/data/hg19.segdup.bed.gz.tbi test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz.tbi test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz.tbi test-data/test-cache/gemini/data/hprd_interaction_edges.gz test-data/test-cache/gemini/data/kegg_pathways_ensembl66 test-data/test-cache/gemini/data/kegg_pathways_ensembl67 test-data/test-cache/gemini/data/kegg_pathways_ensembl68 test-data/test-cache/gemini/data/kegg_pathways_ensembl69 test-data/test-cache/gemini/data/kegg_pathways_ensembl70 test-data/test-cache/gemini/data/kegg_pathways_ensembl71 test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz.tbi test-data/test-cache/gemini/data/summary_gene_table_v75 test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz.tbi test-data/util/README.rst test-data/util/build-data/anno.bed test-data/util/build-data/anno.bed.gz test-data/util/build-data/anno.bed.gz.tbi test-data/util/build-data/gemini_load_input.vcf test-data/util/build-data/test.auto_dom.ped test-data/util/build-data/test.auto_dom.vcf test-data/util/build-data/test.auto_rec.ped test-data/util/build-data/test.auto_rec.vcf test-data/util/build-data/test.comp_het.ped test-data/util/build-data/test.comp_het.vcf test-data/util/build-data/test.de_novo.ped test-data/util/build-data/test.de_novo.vcf test-data/util/build-gemini-testdata.sh test-data/util/shrink_simple_tab.py test-data/util/shrink_tabix.py tool-data/gemini_versioned_databases.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 106 files changed, 2343 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_inheritance.xml	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,486 @@
+<tool id="gemini_inheritance" name="GEMINI inheritance pattern" version="@VERSION@">
+    <description>based identification of candidate genes</description>
+    <macros>
+        <import>gemini_macros.xml</import>
+        <xml name="name_X">
+            <param name="X" type="text" value=""
+            label="Alias to use for X chromosome"
+            help="The tool expects the X chromosome to be named 'X' or 'chrX'. If the reference genome used for variant calling had a different name for it, you will have to specify it here." />
+        </xml>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+        gemini ${inheritance.pattern_type}
+
+            #for $cond in $inheritance.constraint:
+                #set $filter = str($cond.filter).strip()
+                #if str($filter):
+                    #if str($inheritance.pattern_type) == "comp_hets" and $cond.overwrite_default_filter:
+                        --gene-where '$filter'
+                    #else:
+                        --filter '$filter'
+                    #end if
+                #end if
+            #end for
+
+            #if str($inheritance.pattern_type) in ("comp_hets", "mendel_errors"):
+                ${inheritance.by_pattern_only}
+            #end if
+            
+            ${inheritance.lenient}
+            ${inheritance.allow_unaffected}
+            
+            #if str($inheritance.pattern_type).startswith('x_linked_') and str($inheritance.X).strip():
+                -X "${inheritance.X}"
+            #end if
+
+            #if int($family_wise.min_kindreds) > 0:
+                --min-kindreds ${family_wise.min_kindreds}
+            #end if
+
+            #if str($family_wise.families).strip():
+                #set $families = ','.join([f.strip() for f in $family_wise.families.split(',')])
+                --families "$families"
+            #end if            
+
+            #if int($family_wise.per_variant_selection.min_dp) > 0:
+                -d ${family_wise.per_variant_selection.min_dp}
+            #end if
+
+            #if int($family_wise.per_variant_selection.min_gq) > 0:
+                --min-gq ${family_wise.per_variant_selection.min_gq}
+            #end if
+
+            #if int($family_wise.per_variant_selection.max_pl) > -1:
+                --gt-pl-max ${family_wise.per_variant_selection.max_pl}
+            #end if
+            
+            #set $report = $oformat.report
+            @COLUMN_SELECT@
+
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <inputs>
+        <expand macro="infile" />
+        <conditional name="inheritance">
+            <param name="pattern_type" type="select"
+            label="Your assumption about the inheritance pattern of the phenotype of interest">
+                <option value="autosomal_recessive">Autosomal recessive</option>
+                <option value="autosomal_dominant">Autosomal dominant</option>
+                <option value="x_linked_recessive">X-linked recessive</option>
+                <option value="x_linked_dominant">X-linked dominant</option>
+                <option value="de_novo">Autosomal de-novo</option>
+                <option value="x_linked_de_novo">X-linked de-novo</option>
+                <option value="comp_hets">Compound heterozygous</option>
+                <option value="mendel_errors">Violation of mendelian laws (LOH, plausible and implausible de-novo, uniparental disomy)</option>
+            </param>
+            <when value="comp_hets">
+                <expand macro="insert_constraint">
+                    <expand macro="overwritable_where_default" default_where="exonic and high-impact variants (SQL clause: is_exonic = 1 or impact_severity != 'LOW')" />
+                </expand>
+                <param argument="--pattern-only" name="by_pattern_only" type="boolean" truevalue="--pattern-only" falsevalue="" checked="false"
+                label="Ignore sample phenotypes during variant identification"
+                help="When turned on, the identification of compound heterozygous variant pairs gets based on the family tree only, i.e., the tool looks for heterozygous allele pairs in any kid that weren't occuring together in the parents (see the tool help below for the exact criteria used to detect compound heterozygosity)." />
+                <expand macro="lenient" argument="--max-priority" truevalue="--max-priority 3"
+                help="When turned on, runs the tool with --max-priority 3 instead of the default value 1. This leads to inclusion of more ambiguous cases of compound heterozygosity." />
+                <expand macro="unaffected" />
+            </when>
+            <when value="mendel_errors">
+                <expand macro="insert_constraint" />
+                <param argument="--only-affected" name="by_pattern_only" type="boolean" truevalue="" falsevalue="--only-affected" checked="false"
+                label="Ignore sample phenotypes during variant identification"
+                help="When turned on, the identification of candidate variants gets based on the observed inheritance pattern only. The default is to report candidates only if there is evidence for them being phenotypically relevant, i.e., if they are observed in an affected sample." />
+                <expand macro="lenient" />
+                <param name="allow_unaffected" type="hidden" value="" />
+            </when>
+            <when value="autosomal_recessive">
+                <expand macro="insert_constraint" />
+                <expand macro="lenient" />
+                <expand macro="unaffected" />
+            </when>
+            <when value="autosomal_dominant">
+                <expand macro="insert_constraint" />
+                <expand macro="lenient" />
+                <expand macro="unaffected" />
+            </when>
+            <when value="x_linked_recessive">
+                <expand macro="insert_constraint" />
+                <param name="lenient" type="hidden" value="" />
+                <expand macro="unaffected" />
+                <expand macro="name_X" />
+            </when>
+            <when value="x_linked_dominant">
+                <expand macro="insert_constraint" />
+                <param name="lenient" type="hidden" value="" />
+                <expand macro="unaffected" />
+                <expand macro="name_X" />
+            </when>
+            <when value="de_novo">
+                <expand macro="insert_constraint" />
+                <expand macro="lenient" />
+                <expand macro="unaffected" />
+            </when>
+            <when value="x_linked_de_novo">
+                <expand macro="insert_constraint" />
+                <param name="lenient" type="hidden" value="" />
+                <expand macro="unaffected" />
+                <expand macro="name_X" />
+            </when>
+        </conditional>
+        <section name="family_wise" title="Family-wise criteria for variant selection" expanded="true">
+            <expand macro="min_kindreds" />
+            <param argument="--families" name="families" type="text" value=""
+            label="List of families to restrict the analysis to (comma-separated)"
+            help="Leave empty for an analysis including all families"/>
+            <conditional name="per_variant_selection">
+                <param name="enabled" type="select"
+                label="Specify additional criteria to exclude families on a per-variant basis">
+                    <option value="no">No, analyze all variants from all included families</option>
+                    <option value="yes">Yes, filter variants within families</option>
+                </param>
+                <when value="no">
+                    <param name="min_dp" type="hidden" value="0" />
+                    <param name="min_gq" type="hidden" value="0" />
+                    <param name="max_pl" type="hidden" value="-1" />
+                </when>
+                <when value="yes">
+                    <param argument="-d" name="min_dp" type="integer" value="0" min="0"
+                    label="Per-variant DP threshold for including a family"
+                    help="All samples from a family must have a sequencing depth of at least this value at a given variant site in order for the family to be included in the analysis of this particular variant. default: 0 (do not apply this filter)" />
+                    <param argument="--min-gq" name="min_gq" type="integer" value="0" min="0"
+                    label="per-variant GQ threshold for including a family"
+                    help="The genotypes of all samples from a family must be called with at least this quality at a given variant site in order for the family to be included in the analysis of this particular variant. default: 0 (do not apply this filter)">
+                    </param>
+                    <param argument="--gt-pl-max" name="max_pl" type="integer" value="-1" min="-1"
+                    label="per-variant PL threshold for including a family"
+                    help="The genotypes at a given variant site of all samples from a family must not have a higher (phred-scaled) likelihood to be wrong than this value in order for the family to be included in the analysis of this particular variant. default: -1 (do not apply this filter); if used the GEMINI documentation suggests 10 as a reasonable value" />
+                </when>
+            </conditional>
+        </section>
+        <section name="oformat" title="Output - included information" expanded="true">
+            <expand macro="column_filter" help="The tool reports key information about the inheritance pattern detection for each candidate variant found. It can precede each such row with additional columns, listing information about the variant taken from the variants table of the GEMINI database. Here, you can control which subset of the variants table columns should be added to the output."/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular"
+        label="GEMINI ${inheritance.pattern_type} pattern on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="gemini_auto_dom_input.db" ftype="gemini.sqlite" />
+            <conditional name="inheritance">
+                <param name="pattern_type" value="autosomal_dominant" />
+                <param name="lenient" value="true" />
+            </conditional>
+            <conditional name="report">
+                <param name="report_selector" value="minimal" />
+            </conditional>
+            <output name="outfile">
+                <assert_contents>
+                    <has_line_matching expression="variant_id&#009;gene&#009;.*" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="infile" value="gemini_auto_dom_input.db" ftype="gemini.sqlite" />
+            <conditional name="inheritance">
+                <param name="pattern_type" value="autosomal_dominant" />
+                <param name="lenient" value="true" />
+            </conditional>
+            <section name="oformat">
+                <conditional name="report">
+                    <param name="report_selector" value="custom" />
+                    <param name="columns" value="gene,chrom,impact" />
+                </conditional>
+            </section>
+            <output name="outfile">
+                <assert_contents>
+                    <has_line_matching expression="gene&#009;chrom&#009;impact.*" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="infile" value="gemini_auto_dom_input.db" ftype="gemini.sqlite" />
+            <conditional name="inheritance">
+                <param name="pattern_type" value="autosomal_dominant" />
+                <param name="lenient" value="true" />
+            </conditional>
+            <section name="oformat">
+                <conditional name="report">
+                    <param name="report_selector" value="custom" />
+                    <!-- test with empty multiselect list and columns specified
+                    via text field instead -->
+                    <param name="extra_cols" value="gene,chrom,impact" />
+                </conditional>
+            </section>
+            <output name="outfile">
+                <assert_contents>
+                    <has_line_matching expression="gene&#009;chrom&#009;impact.*" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="infile" value="gemini_auto_rec_input.db" ftype="gemini.sqlite" />
+            <conditional name="inheritance">
+                <param name="pattern_type" value="autosomal_recessive" />
+                <param name="lenient" value="true" />
+            </conditional>
+            <section name="oformat">
+                <conditional name="report">
+                    <param name="report_selector" value="custom" />
+                    <param name="columns" value="gene,chrom,impact" />
+                </conditional>
+            </section>
+            <output name="outfile">
+                <assert_contents>
+                    <has_line_matching expression="gene&#009;chrom&#009;impact.*" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" />
+            <conditional name="inheritance">
+                <param name="pattern_type" value="de_novo" />
+            </conditional>
+            <section name="oformat">
+                <conditional name="report">
+                    <param name="report_selector" value="custom" />
+                    <param name="columns" value="gene,ref,alt,impact" />
+                </conditional>
+            </section>
+            <output name="outfile">
+                <assert_contents>
+                    <has_line_matching expression="gene&#009;ref&#009;alt&#009;impact.*" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="infile" value="gemini_comphets_input.db" ftype="gemini.sqlite" />
+            <conditional name="inheritance">
+                <param name="pattern_type" value="comp_hets" />
+                <param name="lenient" value="true" />
+                <param name="allow_unaffected" value="true" />
+            </conditional>
+            <section name="oformat">
+                <conditional name="report">
+                    <param name="report_selector" value="custom" />
+                    <param name="columns" value="chrom,start,end,ref,alt,gene,impact" />
+                </conditional>
+            </section>
+            <output name="outfile">
+                <assert_contents>
+                    <has_line_matching expression="chrom&#009;start&#009;end&#009;.*gene.*" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" />
+            <conditional name="inheritance">
+                <param name="pattern_type" value="mendel_errors" />
+            </conditional>
+            <section name="oformat">
+                <conditional name="report">
+                    <param name="report_selector" value="custom" />
+                    <param name="columns" value="gene,ref,alt,impact" />
+                </conditional>
+            </section>
+            <output name="outfile">
+                <assert_contents>
+                    <has_line_matching expression="gene&#009;ref&#009;alt&#009;impact&#009;.*violation.*" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+**What it does**
+
+Assuming you have defined the familial relationships between samples when
+loading your VCF into GEMINI, you can use this tool to identify candidate genes
+and variants that explain the inheritance pattern of a phenotype of interest.
+
+**Inheritance pattern detection rules**
+
+*Autosomal recessive*
+
+Criteria:
+
+- all affected must be hom_alt
+- [affected] no unaffected can be hom_alt (can be unknown)
+- [default] if parents exist they must be unaffected and het for all affected kids
+- [default] if there are no affecteds that have a parent, a warning is issued.
+
+If ``--lenient`` is specified, the 2 criteria prefixed with “[default]” are not
+applied.
+
+If ``--allow-unaffected`` is specified, the criterion prefixed with
+“[affected]” is not enforced.
+
+----
+
+*Autosomal dominant*
+
+Criteria:
+
+- All affecteds must be het
+- [affected] No unaffected can be het or homalt (can be unknown)
+- de_novo mutations are not auto_dom (at least not in the first generation)
+- At least 1 affected must have 1 affected parent (or have no parents).
+- If no affected has a parent, a warning is issued.
+- [default] All affecteds must have parents with known phenotype.
+- [default] All affected kids must have at least 1 affected parent
+
+If ``--lenient`` is specified, the criteria prefixed with “[default]” are not
+enforced.
+
+If ``--allow-unaffected`` is specified, the criterion prefixed with
+“[affected]” is not enforced.
+
+Note that, for autosomal dominant, ``--lenient`` allows singleton affecteds to
+be used to meet the ``--min-kindreds`` requirement if they are HET.
+
+If there is incomplete penetrance in the kindred (unaffected obligate carriers),
+these individuals currently must be coded as having unknown phenotype or as
+being affected.
+
+----
+
+*X-linked recessive*
+
+Criteria:
+
+- Affected females must be HOM_ALT
+- Unaffected females are HET or HOM_REF
+- Affected males are not HOM_REF
+- Unaffected males are HOM_REF
+
+Note: Pseudo-autosomal regions are not accounted for by the tool.
+
+----
+
+*X-linked dominant*
+
+Criteria:
+
+- Affected males are HET or HOM_ALT
+- Affected females must be HET
+- Unaffecteds must be HOM_REF
+- girls of affected dad must be affected
+- boys of affected dad must be unaffected
+- mothers of affected males must be het (and affected)
+- at least 1 parent of affected females must be het (and affected).
+
+Note: Pseudo-autosomal regions are not accounted for by the tool.
+
+----
+
+*De-novo mutations*
+
+Criteria:
+
+- all affected must be het
+- [affected] all unaffected must be homref or homalt
+- at least 1 affected kid must have unaffected parents
+- [default] if an affected has affected parents, it’s not de_novo
+- [default] all affected kids must have unaffected (or no) parents
+- [default] warning if none of the affected samples have parents.
+
+The last 3 items, prefixed with [default] can be turned off with ``--lenient``.
+
+If ``--allow-unaffected`` is specified, then the criterion prefixed [affected]
+is not enforced.
+
+----
+
+*X-linked de-novo mutations*
+
+Criteria:
+
+- affected female child must be het
+- affected male child must be hom_alt (or het)
+- parents should be unaffected and hom_ref
+
+Note: Pseudo-autosomal regions are not accounted for by the tool.
+
+----
+
+*Compound heterozygosity*
+
+Unlike canonical recessive sites where the same recessive allele is inherited
+from both parents at the *same* site in the gene, compound heterozygosity
+occurs when the individual’s phenotype is caused by two heterozygous recessive
+alleles at *different* sites in a particular gene.
+
+To detect compound heterozygosity, the tool looks for two heterozygous variants
+impacting the same gene at different loci. The complicating factor is that this
+is a case of *recessive* inheritance and as such, we must also require that the
+consequential alleles at each heterozygous site were inherited on different
+chromosomes (one from each parent). Hence, where possible, the tool will phase
+by transmission.
+
+Criteria (default):
+
+- All affected individuals must be heterozygous at both sites.
+- No unaffected can be homozygous alterate at either site.
+- Neither parent of an affected sample can be homozygous reference at both
+  sites.
+- If any unphased-unaffected is het at both sites, the site will be given lower
+  priority.
+- No phased-unaffected can be heterozygous at both sites.
+
+  a. ``--allow-unaffected`` keeps sites where a phased unaffected shares the
+     het-pair
+  b. unphased, unaffected that share the het pair are counted and reported for
+     each candidate pair.
+- Candidates where an affected from the same family does NOT share the same het
+  pair are removed.
+- Sites are automatically phased by transmission when parents are present in
+  order to remove false positive candidates.
+  
+  If data from one or both parents are unavailable and the child’s data was not
+  phased prior to loading into GEMINI, all comp_het variant pairs will
+  automatically be given at most priority == 2. If there’s only a single parent
+  and both the parent and the affected are HET at both sites, the candidate
+  will have priority 3.
+  
+Criteria (``--pattern-only``):
+
+- Kid must be HET at both sites.
+- Kid must have alts on different chromosomes.
+- Neither parent can be HOM_ALT at either site.
+- If either parent is phased at both sites and matches the kid, it’s excluded.
+- When the above criteria are met, and both parents and kid are phased or
+  parents are HET at different sites, the priority is 1.
+- If either parent is HET at both sites, priority is reduced.
+- If both parents are not phased, the priority is 2.
+- For every parent that’s a het at both sites, the priority is incremented by 1.
+- The priority in a family is the minimum found among all kids.
+
+----
+
+*Violation of Mendelian laws*
+
+The tool can be used to detect the following kinds of non-Mendelian patterns:
+
+- loss of heterozygosity (LOH) events
+- de-novo mutations
+- implausible de-novo mutations
+- potential cases of uniparental disomy
+
+Criteria:
+
+- LOH: child and one parent are opposite homozygotes; other parent is HET
+- plausible de novo: kid is het. parents are same homozygotes
+- implausible de novo: kid is homozygote. parents are same homozygotes and opposite to kid.
+- uniparental disomy: parents are opposite homozygotes; kid is homozygote
+
+]]>
+    </help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_macros.xml	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,254 @@
+<macros>
+    <!-- gemini version to be used -->
+    <token name="@VERSION@">0.20.1</token>
+    <!-- minimal annotation files version required by this version of gemini -->
+    <token name="@DB_VERSION@">200</token>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">gemini</requirement>
+            <yield />
+        </requirements>
+    </xml>
+
+    <xml name="version_command">
+        <version_command>gemini --version</version_command>
+    </xml>
+
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+            <regex match="Error:" />
+            <regex match="Exception:" />
+            <yield />
+        </stdio>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1003153</citation>
+            <yield />
+        </citations>
+    </xml>
+
+    <xml name="annotation_dir">
+        <param name="annotation_databases" type="select" label="Choose a gemini annotation source">
+            <options from_data_table="gemini_versioned_databases">
+                <filter type="sort_by" column="0" />
+                <filter type="static_value" column="2" value="@DB_VERSION@" />
+            </options>
+        </param>
+    </xml>
+
+    <xml name="infile">
+        <param name="infile" type="data" format="gemini.sqlite" label="GEMINI database" help="Only files with version @VERSION@ are accepted." >
+            <options options_filter_attribute="metadata.gemini_version" >
+                <filter type="add_value" value="@VERSION@" />
+            </options>
+        </param>
+    </xml>
+
+    <xml name="add_header_column">
+        <param argument="--header" name="header" type="boolean" truevalue="--header" falsevalue="" checked="True" 
+        label="Add a header of column names to the output" />
+    </xml>
+
+    <xml name="column_filter" token_help="" token_minimalset="variant_id, gene">
+        <conditional name="report">
+            <param name="report_selector" type="select"
+            label="Set of columns to include in the variant report table"
+            help="@HELP@">
+                <option value="minimal">Minimal (report only a preconfigured minimal set of columns)</option>
+                <option value="full">Full (report all columns defined in the GEMINI database variants table)</option>
+                <option value="custom">Custom (report user-specified columns)</option>
+            </param>
+            <when value="full" />
+            <when value="minimal">
+                <param name="columns" type="hidden" value="@MINIMALSET@" />
+                <param name="extra_cols" type="hidden" value="" />
+            </when>
+            <when value="custom">
+                <param name="columns" type="select" display="checkboxes" multiple="true" optional="true"
+                label="Choose columns to include in the report" help="(--columns)">
+                    <option value="gene">gene</option>
+                    <option value="chrom">chrom</option>
+                    <option value="start">start</option>
+                    <option value="end">end</option>
+                    <option value="ref">ref</option>
+                    <option value="alt">alt</option>
+                    <option value="impact">impact</option>
+                    <option value="impact_severity">impact_severity</option>
+                    <option value="max_aaf_all">alternative allele frequency (max_aaf_all)</option>
+                </param>
+                <param name="extra_cols" type="text"
+                label="Additional columns (comma-separated)"
+                help="Column must be specified by the exact name they have in the GEMINI database, e.g., is_exonic or num_hom_alt, but, for genotype columns, GEMINI wildcard syntax is supported. The order of columns in the list is maintained in the output.">
+                    <expand macro="sanitize_query" />
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="filter" token_argument="--filter">
+        <param argument="@ARGUMENT@" name="filter" type="text"
+        label="Additional constraints expressed in SQL syntax"
+        help="Constraints defined here will become the WHERE clause of the SQL query issued to the GEMINI database. E.g. alt='G' or impact_severity = 'HIGH'.">
+            <expand macro="sanitize_query" />
+        </param>
+    </xml>
+
+    <xml name="sanitize_query">
+        <sanitizer invalid_char="">
+            <valid initial="string.printable">
+                <remove value="&apos;" />
+            </valid>
+            <mapping initial="none">
+                <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;" />
+            </mapping>
+       </sanitizer>
+    </xml>
+
+    <xml name="lenient" token_argument="--lenient" token_truevalue="--lenient" token_help="The exact consequence of this setting depends on the type of inheritance pattern you are looking for (see the tool help below).">
+        <param argument="@ARGUMENT@" name="lenient" type="boolean" truevalue="@TRUEVALUE@" falsevalue="" checked="False"
+        label="Include hits with less convincing inheritance patterns"
+        help= "@HELP@" />
+    </xml>
+
+    <xml name="unaffected">
+        <param argument="--allow-unaffected" name="allow_unaffected" type="boolean" truevalue="--allow-unaffected" falsevalue="" checked="False"
+        label="Report candidates shared by unaffected samples"
+        help="Activating this option will enable the reporting of variants as candidate causative even if they are shared by unaffected samples in the family tree. The default will only report variants that are unique to affected samples."/>
+    </xml>
+
+    <xml name="min_kindreds" token_label="Minimum number of families with a candidate variant for a gene to be reported" token_help="This is the number of families required to have a variant fitting the inheritance model in the same gene in order for the gene and its variants to be reported. For example, we may only be interested in candidates where at least 4 families have a variant (with a fitting inheritance pattern) in that gene.">
+        <param argument="--min-kindreds" name="min_kindreds" type="integer" value="1" min="1"
+        label="@LABEL@"
+        help="@HELP@" />
+    </xml>
+
+    <xml name="insert_constraint" token_max_repeat="1">
+        <repeat name="constraint" title="Additional constraints on variants" default="0" max="@MAX_REPEAT@">
+            <expand macro="filter" />
+            <yield />
+        </repeat>
+    </xml>
+
+    <xml name="overwritable_where_default" token_default_where="">
+        <param name="overwrite_default_filter" type="boolean" checked="false"
+        label="Overwrite the default constraint of this tool"
+        help="By default, this tool restricts its analysis to @DEFAULT_WHERE@ and this constraint is applied on top of any constraint expressed above. With this option here selected, your custom constraint, if given, will overwrite the default instead." />
+    </xml>
+
+    <xml name="gt_filter" token_default_repeat="0" token_min_repeat="0" token_max_repeat="1">
+        <repeat name="filter_by_genotype" title="Genotype filter expression" default="@DEFAULT_REPEAT@" min="@MIN_REPEAT@" max="@MAX_REPEAT@">
+            <param argument="--gt-filter" name="gt_filter" type="text" value="" area="True" size="5x50"
+            label="Restrictions to apply to genotype values" help="">
+                <expand macro="sanitize_query" />
+                <validator type="expression" message="Genotype filter expression cannot be empty">value.strip()</validator>
+            </param>
+            <yield />
+        </repeat>
+    </xml>
+
+    <xml name="sample_filter">
+        <repeat name="filter_by_sample" title="Sample filter expression" default="0" max="1">
+            <param argument="--sample-filter" name="sample_filter" type="text" area="True" size="5x50"
+            label="SQL filter to use to filter the sample table" help="">
+                <expand macro="sanitize_query" />
+                <validator type="expression" message="Sample filter expression cannot be empty">value.strip()</validator>
+            </param>
+            <param argument="--in" name="in" type="select"
+            label="A variant must be in either all, none or any samples passing the sample-query filter"
+            help="">
+                <option value="">Return a variant if it is found in any sample passing the sample filter. (default) </option>
+                <option value="--in all">Return a variant if it is found in ALL samples passing the sample filter. (all)</option>
+                <option value="--in none">Return a variant if it is found in NO sample passing the sample filter. (none)</option>
+                <option value="--in only">Return a variant if it is found in any sample passing the sample filter, and in NO sample NOT passing it. (only)</option>
+                <option value="--in only all">Return a variant if is found in ALL samples passing the sample filter, and in NO sample NOT passing it. (only all)</option>
+            </param>
+            <expand macro="min_kindreds"
+            label="Minimum number of families in which a variant must pass the sample filter" help=""/>
+            <param argument="--family-wise" name="family_wise" type="boolean" truevalue="--family-wise" falsevalue="" checked="False"
+            label="Apply the sample-filter on a family-wise basis" help="If a variant passes the sample filter in at least the minimum number of families specified above it is retained." />
+        </repeat>
+    </xml>
+
+    <xml name="region_filter">
+        <repeat name="regions" title="Region Filter" default="0" min="0"
+        help="Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported.">
+            <param name="chrom" type="text" label="Chromosome">
+                <validator type="expression" message="A chromosome identifier is required when specifying a region filter">value.strip()</validator>
+            </param>
+            <param name="start" type="text" label="Region Start">
+                <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator>
+            </param>
+            <param name="stop" type="text" label="Region End">
+                <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator>
+            </param>
+        </repeat>
+    </xml>
+
+    <token name="@PROVIDE_ANNO_DATA@"><![CDATA[
+        mkdir gemini &&
+        ln -s '${annotation_databases.fields.path}/gemini/data' gemini/data &&
+        export GEMINI_CONFIG='${annotation_databases.fields.path}' &&
+    ]]></token>
+
+    <token name="@MULTILN_SQL_EXPR_TO_CMDLN@">
+        #set $sql_expr = str($multiline_sql_expr).strip()
+        #if str($sql_expr):
+            #set $sql_expr = $sql_expr.replace('\r\n', '\n')
+            #set $sql_expr = $sql_expr.replace('\r', '\n')
+            #set $sql_expr = $sql_expr.replace('\\\n', ' ')
+            $cmdln_param '$sql_expr'
+        #end if
+    </token>
+
+    <token name="@SET_COLS@">
+        #if str($report.report_selector) == 'full':
+            #set cols = "*"
+        #else:
+            #if $report.columns and str($report.columns) != '':
+                #set $cols = str($report.columns)
+            #else
+                #set $cols = ''
+            #end if
+            #if str($report.extra_cols).strip():
+                #if $cols:
+                    #set $cols = $cols + ', ' + str($report.extra_cols)
+                #else:
+                    #set $cols = str($report.extra_cols)
+                #end if
+            #end if
+            #if not $cols:
+                #set $cols = "variant_id, gene"
+            #end if
+        #end if
+    </token>
+
+    <token name="@COLUMN_SELECT@">
+        @SET_COLS@
+        #if $cols != "*"
+            --columns '$cols'
+        #end if
+    </token>
+
+    <token name="@PARSE_REGION_ELEMENTS@"><![CDATA[
+        #set $region_elements = []
+        #for $r in $regions:
+            ## The actual chromosome name needs to be single-quoted
+            ## in SQL, so we need to quote the single quotes like the
+            ## sanitize_query macro would if the whole was a parameter.
+            #set $r_elements = ["chrom = '\"'\"'%s'\"'\"'" % str($r.chrom).strip()]
+            #if str($r.start).strip():
+                #silent $r_elements.append("start >= %d" % int($r.start))
+            #end if
+            #if str($r.stop).strip():
+                #silent $r_elements.append("end <= %d" % int($r.stop))
+            #end if
+            #silent $region_elements.append("(%s)" % " AND ".join($r_elements))
+        #end for
+    ]]>
+    </token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,65 @@
+=========================
+Galaxy wrapper for GEMINI
+=========================
+
+
+GEMINI: a flexible framework for exploring genome variation
+
+GEMINI (GEnome MINIng) is designed to be a flexible framework for exploring genetic variation in the context of 
+the wealth of genome annotations available for the human genome. By placing genetic variants, sample genotypes, 
+and useful genome annotations into an integrated database framework, GEMINI provides a simple, flexible, yet very 
+powerful system for exploring genetic variation for for disease and population genetics.
+
+Using the GEMINI framework begins by loading a VCF file into a database. Each variant is automatically 
+annotated by comparing it to several genome annotations from source such as ENCODE tracks, UCSC tracks, 
+OMIM, dbSNP, KEGG, and HPRD. All of this information is stored in portable SQLite database that allows 
+one to explore and interpret both coding and non-coding variation using “off-the-shelf” tools or an 
+enhanced SQL engine.
+
+Please also see the original [manuscript](http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003153).
+
+
+============
+Installation
+============
+
+It is recommended to install this wrapper via the `Galaxy Tool Shed`.
+
+.. _`Galaxy Tool Shed`:  https://testtoolshed.g2.bx.psu.edu/view/iuc/gemini
+
+
+=======
+History
+=======
+- 0.9.1: Initial public release
+
+
+====================
+Detailed description
+====================
+
+View the original GEMINI documentation: http://gemini.readthedocs.org/en/latest/index.html
+
+
+===============================
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,4 @@
+<?xml version="1.0" ?>
+<repositories description="This requires the GEMINI data manager definition to install all required annotation databases.">
+    <repository changeset_revision="f57426daa04d" name="data_manager_gemini_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"/>
+</repositories>
\ No newline at end of file
Binary file static/images/gemini_mendel_errors.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/anno.bed	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,3 @@
+chr3	187000000	187150000
+chr3	187150000	187300000
+chr3	187300000	187450000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_actionable_mutations_result.tabular	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,1 @@
+tum_name	chrom	start	end	ref	alt	gene	impact	is_somatic	in_cosmic_census	dgidb_info
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_amend.ped	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,10 @@
+#family_id	sample_id	paternal_id	maternal_id	sex	phenotype
+1	1_dad	0	0	-1	1
+1	1_mom	0	0	-1	1
+1	1_kid	1_dad	1_mom	-1	2
+2	2_dad	0	0	-1	1
+2	2_mom	0	0	-1	1
+2	2_kid	2_dad	2_mom	-1	2
+3	3_dad	0	0	-1	1
+3	3_mom	0	0	-1	1
+3	3_kid	3_dad	3_mom	-1	2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_amend.vcf	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,127 @@
+##fileformat=VCFv4.1
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false"
+##contig=<ID=chr1,length=249250621,assembly=hg19>
+##contig=<ID=chr10,length=135534747,assembly=hg19>
+##contig=<ID=chr11,length=135006516,assembly=hg19>
+##contig=<ID=chr11_gl000202_random,length=40103,assembly=hg19>
+##contig=<ID=chr12,length=133851895,assembly=hg19>
+##contig=<ID=chr13,length=115169878,assembly=hg19>
+##contig=<ID=chr14,length=107349540,assembly=hg19>
+##contig=<ID=chr15,length=102531392,assembly=hg19>
+##contig=<ID=chr16,length=90354753,assembly=hg19>
+##contig=<ID=chr17,length=81195210,assembly=hg19>
+##contig=<ID=chr17_ctg5_hap1,length=1680828,assembly=hg19>
+##contig=<ID=chr17_gl000203_random,length=37498,assembly=hg19>
+##contig=<ID=chr17_gl000204_random,length=81310,assembly=hg19>
+##contig=<ID=chr17_gl000205_random,length=174588,assembly=hg19>
+##contig=<ID=chr17_gl000206_random,length=41001,assembly=hg19>
+##contig=<ID=chr18,length=78077248,assembly=hg19>
+##contig=<ID=chr18_gl000207_random,length=4262,assembly=hg19>
+##contig=<ID=chr19,length=59128983,assembly=hg19>
+##contig=<ID=chr19_gl000208_random,length=92689,assembly=hg19>
+##contig=<ID=chr19_gl000209_random,length=159169,assembly=hg19>
+##contig=<ID=chr1_gl000191_random,length=106433,assembly=hg19>
+##contig=<ID=chr1_gl000192_random,length=547496,assembly=hg19>
+##contig=<ID=chr2,length=243199373,assembly=hg19>
+##contig=<ID=chr20,length=63025520,assembly=hg19>
+##contig=<ID=chr21,length=48129895,assembly=hg19>
+##contig=<ID=chr21_gl000210_random,length=27682,assembly=hg19>
+##contig=<ID=chr22,length=51304566,assembly=hg19>
+##contig=<ID=chr3,length=198022430,assembly=hg19>
+##contig=<ID=chr4,length=191154276,assembly=hg19>
+##contig=<ID=chr4_ctg9_hap1,length=590426,assembly=hg19>
+##contig=<ID=chr4_gl000193_random,length=189789,assembly=hg19>
+##contig=<ID=chr4_gl000194_random,length=191469,assembly=hg19>
+##contig=<ID=chr5,length=180915260,assembly=hg19>
+##contig=<ID=chr6,length=171115067,assembly=hg19>
+##contig=<ID=chr6_apd_hap1,length=4622290,assembly=hg19>
+##contig=<ID=chr6_cox_hap2,length=4795371,assembly=hg19>
+##contig=<ID=chr6_dbb_hap3,length=4610396,assembly=hg19>
+##contig=<ID=chr6_mann_hap4,length=4683263,assembly=hg19>
+##contig=<ID=chr6_mcf_hap5,length=4833398,assembly=hg19>
+##contig=<ID=chr6_qbl_hap6,length=4611984,assembly=hg19>
+##contig=<ID=chr6_ssto_hap7,length=4928567,assembly=hg19>
+##contig=<ID=chr7,length=159138663,assembly=hg19>
+##contig=<ID=chr7_gl000195_random,length=182896,assembly=hg19>
+##contig=<ID=chr8,length=146364022,assembly=hg19>
+##contig=<ID=chr8_gl000196_random,length=38914,assembly=hg19>
+##contig=<ID=chr8_gl000197_random,length=37175,assembly=hg19>
+##contig=<ID=chr9,length=141213431,assembly=hg19>
+##contig=<ID=chr9_gl000198_random,length=90085,assembly=hg19>
+##contig=<ID=chr9_gl000199_random,length=169874,assembly=hg19>
+##contig=<ID=chr9_gl000200_random,length=187035,assembly=hg19>
+##contig=<ID=chr9_gl000201_random,length=36148,assembly=hg19>
+##contig=<ID=chrM,length=16571,assembly=hg19>
+##contig=<ID=chrUn_gl000211,length=166566,assembly=hg19>
+##contig=<ID=chrUn_gl000212,length=186858,assembly=hg19>
+##contig=<ID=chrUn_gl000213,length=164239,assembly=hg19>
+##contig=<ID=chrUn_gl000214,length=137718,assembly=hg19>
+##contig=<ID=chrUn_gl000215,length=172545,assembly=hg19>
+##contig=<ID=chrUn_gl000216,length=172294,assembly=hg19>
+##contig=<ID=chrUn_gl000217,length=172149,assembly=hg19>
+##contig=<ID=chrUn_gl000218,length=161147,assembly=hg19>
+##contig=<ID=chrUn_gl000219,length=179198,assembly=hg19>
+##contig=<ID=chrUn_gl000220,length=161802,assembly=hg19>
+##contig=<ID=chrUn_gl000221,length=155397,assembly=hg19>
+##contig=<ID=chrUn_gl000222,length=186861,assembly=hg19>
+##contig=<ID=chrUn_gl000223,length=180455,assembly=hg19>
+##contig=<ID=chrUn_gl000224,length=179693,assembly=hg19>
+##contig=<ID=chrUn_gl000225,length=211173,assembly=hg19>
+##contig=<ID=chrUn_gl000226,length=15008,assembly=hg19>
+##contig=<ID=chrUn_gl000227,length=128374,assembly=hg19>
+##contig=<ID=chrUn_gl000228,length=129120,assembly=hg19>
+##contig=<ID=chrUn_gl000229,length=19913,assembly=hg19>
+##contig=<ID=chrUn_gl000230,length=43691,assembly=hg19>
+##contig=<ID=chrUn_gl000231,length=27386,assembly=hg19>
+##contig=<ID=chrUn_gl000232,length=40652,assembly=hg19>
+##contig=<ID=chrUn_gl000233,length=45941,assembly=hg19>
+##contig=<ID=chrUn_gl000234,length=40531,assembly=hg19>
+##contig=<ID=chrUn_gl000235,length=34474,assembly=hg19>
+##contig=<ID=chrUn_gl000236,length=41934,assembly=hg19>
+##contig=<ID=chrUn_gl000237,length=45867,assembly=hg19>
+##contig=<ID=chrUn_gl000238,length=39939,assembly=hg19>
+##contig=<ID=chrUn_gl000239,length=33824,assembly=hg19>
+##contig=<ID=chrUn_gl000240,length=41933,assembly=hg19>
+##contig=<ID=chrUn_gl000241,length=42152,assembly=hg19>
+##contig=<ID=chrUn_gl000242,length=43523,assembly=hg19>
+##contig=<ID=chrUn_gl000243,length=43341,assembly=hg19>
+##contig=<ID=chrUn_gl000244,length=39929,assembly=hg19>
+##contig=<ID=chrUn_gl000245,length=36651,assembly=hg19>
+##contig=<ID=chrUn_gl000246,length=38154,assembly=hg19>
+##contig=<ID=chrUn_gl000247,length=36422,assembly=hg19>
+##contig=<ID=chrUn_gl000248,length=39786,assembly=hg19>
+##contig=<ID=chrUn_gl000249,length=38502,assembly=hg19>
+##contig=<ID=chrX,length=155270560,assembly=hg19>
+##contig=<ID=chrY,length=59373566,assembly=hg19>
+##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa
+##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Consequence|Codons|Amino_acids|Gene|HGNC|Feature|EXON|PolyPhen|SIFT">
+##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -i vcf -o vcf GRCh37.66 test4.vep.vcf "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	1_dad	1_mom	1_kid	2_dad	2_mom	2_kid	3_dad	3_mom	3_kid
+chr10	1142208	.	T	C	3404.3	.	AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	1/1:0,24:24:66.14:729,66,0	0/1:1,37:59:87.16:940,87,0	0/1:0,29:49:78.20:899,78,0	1/1:0,24:64:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	48003992	.	C	T	1047.87	.	AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	1/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	48004992	.	C	T	1047.87	.	AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	1/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	135336656	.	G	A	38.34	.	AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	135369532	.	T	C	122.62	.	AC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	1/1:0,24:24:66.14:729,66,0
\ No newline at end of file
Binary file test-data/gemini_amend_input.db has changed
Binary file test-data/gemini_annotate_result.db has changed
Binary file test-data/gemini_auto_dom_input.db has changed
Binary file test-data/gemini_auto_rec_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_burden_calpha_template.tabular	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,4 @@
+gene	T	c	Z	p_value
+SYCE1	.+	.+	.+	.+
+WDR37	.+	.+	.+	.+
+ASAH2C	.+	.+	.+	.+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_burden_count_highimpact_result.tabular	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,2 @@
+gene	1_kid	3_kid
+WDR37	1	2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_burden_count_nonsynonymous_result.tabular	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,5 @@
+gene	1_dad	1_kid	1_mom	2_dad	2_kid	2_mom	3_dad	3_kid	3_mom
+SYCE1	0	1	0	0	1	0	0	1	0
+SPRN	0	1	0	0	1	0	1	1	1
+WDR37	0	1	0	0	0	0	0	2	0
+ASAH2C	2	3	2	1	3	1	1	2	1
Binary file test-data/gemini_comphets_input.db has changed
Binary file test-data/gemini_de_novo_input.db has changed
Binary file test-data/gemini_is_somatic_result.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_load_input.vcf	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,70 @@
+##fileformat=VCFv4.1
+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">
+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">
+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">
+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">
+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">
+##ALT=<ID=DEL,Description="Deletion">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">
+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">
+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">
+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">
+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">
+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">
+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">
+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">
+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">
+##reference=GRCh37
+##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+1	10583	rs58108140	G	A	100.0	PASS	AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	10611	rs189107123	C	G	100.0	PASS	AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13302	rs180734498	C	T	100.0	PASS	THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13327	rs144762171	G	C	100.0	PASS	AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13957	.	TC	T	28.0	PASS	AA=TC;AC=35;AN=2184;VT=INDEL;AVGPOST=0.8711;RSQ=0.2501;LDAF=0.0788;THETA=0.0100;ERATE=0.0065;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|)
+1	13980	rs151276478	T	C	100.0	PASS	AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|)
+1	30923	rs140337953	G	T	100.0	PASS	AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73;EFF=DOWNSTREAM(MODIFIER|||||FAM138A|processed_transcript|CODING|ENST00000461467|),DOWNSTREAM(MODIFIER|||||FAM138A|protein_coding|CODING|ENST00000417324|),DOWNSTREAM(MODIFIER|||||MIR1302-10|miRNA|NON_CODING|ENST00000408384|),INTRON(MODIFIER||||177|MIR1302-10|antisense|NON_CODING|ENST00000469289|),INTRON(MODIFIER||||236|MIR1302-10|antisense|NON_CODING|ENST00000473358|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|)
+1	46402	.	C	CTGT	31.0	PASS	AA=.;RSQ=0.0960;AN=2184;AC=8;VT=INDEL;AVGPOST=0.8325;THETA=0.0121;ERATE=0.0072;LDAF=0.0903;AF=0.0037;ASN_AF=0.0017;AFR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	47190	.	G	GA	192.0	PASS	AA=G;AVGPOST=0.9041;AN=2184;AC=29;VT=INDEL;LDAF=0.0628;THETA=0.0153;RSQ=0.2883;ERATE=0.0041;AF=0.01;AMR_AF=0.0028;AFR_AF=0.06;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51476	rs187298206	T	C	100.0	PASS	ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51479	rs116400033	T	A	100.0	PASS	RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51914	rs190452223	T	G	100.0	PASS	ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51935	rs181754315	C	T	100.0	PASS	THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51954	rs185832753	G	C	100.0	PASS	LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52058	rs62637813	G	C	100.0	PASS	AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52144	rs190291950	T	A	100.0	PASS	THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52185	.	TTAA	T	244.0	PASS	AA=.;AN=2184;LDAF=0.0124;VT=INDEL;AC=10;THETA=0.0232;RSQ=0.4271;AVGPOST=0.9840;ERATE=0.0037;AF=0.0046;ASN_AF=0.0035;AMR_AF=0.02;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52238	rs150021059	T	G	100.0	PASS	THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95;EFF=INTERGENIC(MODIFIER|||||||||)
+1	53234	.	CAT	C	227.0	PASS	AA=CAT;AVGPOST=0.9936;AN=2184;VT=INDEL;THETA=0.0119;AC=10;LDAF=0.0074;RSQ=0.6237;ERATE=0.0007;AF=0.0046;AMR_AF=0.0028;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54353	rs140052487	C	A	100.0	PASS	THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54421	rs146477069	A	G	100.0	PASS	ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54490	rs141149254	G	A	100.0	PASS	ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54676	rs2462492	C	T	100.0	PASS	LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54753	rs143174675	T	G	100.0	PASS	AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55164	rs3091274	C	A	100.0	PASS	AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55249	.	C	CTATGG	443.0	PASS	AA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55299	rs10399749	C	T	100.0	PASS	RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55313	rs182462964	A	T	100.0	PASS	ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55326	rs3107975	T	C	100.0	PASS	AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55330	rs185215913	G	A	100.0	PASS	ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55367	rs190850374	G	A	100.0	PASS	ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55388	rs182711216	C	T	100.0	PASS	THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55394	rs2949420	T	A	100.0	PASS	AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55416	rs193242050	G	A	100.0	PASS	AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55427	rs183189405	T	C	100.0	PASS	THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55816	rs187434873	G	A	100.0	PASS	AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55850	rs191890754	C	G	100.0	PASS	AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55852	rs184233019	G	C	100.0	PASS	THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)
Binary file test-data/gemini_load_result1.db has changed
Binary file test-data/gemini_load_result2.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_versioned_databases.loc	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,3 @@
+## GEMINI versioned databases
+#DownloadDate	dbkey	DBversion	Description	Path
+1999-01-01	hg19	200	GEMINI annotations (test snapshot)	${__HERE__}/test-cache
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_windower_template.tabular	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,142 @@
+chr1	0	50000	.
+chr1	50000000	50050000	.
+chr1	100000000	100050000	.
+chr1	150000000	150050000	.
+chr1	200000000	200050000	.
+chr10	0	50000	.
+chr10	50000000	50050000	.
+chr10	100000000	100050000	.
+chr11	0	50000	.
+chr11	50000000	50050000	.
+chr11	100000000	100050000	.
+chr11_gl000202_random	0	40103	.
+chr12	0	50000	.
+chr12	50000000	50050000	.
+chr12	100000000	100050000	.
+chr13	0	50000	.
+chr13	50000000	50050000	.
+chr13	100000000	100050000	.
+chr14	0	50000	.
+chr14	50000000	50050000	.
+chr14	100000000	100050000	.
+chr15	0	50000	.
+chr15	50000000	50050000	.
+chr15	100000000	100050000	.
+chr16	0	50000	.
+chr16	50000000	50050000	.
+chr17	0	50000	.
+chr17	50000000	50050000	.
+chr17_ctg5_hap1	0	50000	.
+chr17_gl000203_random	0	37498	.
+chr17_gl000204_random	0	50000	.
+chr17_gl000205_random	0	50000	.
+chr17_gl000206_random	0	41001	.
+chr18	0	50000	.
+chr18	50000000	50050000	.
+chr18_gl000207_random	0	4262	.
+chr19	0	50000	.
+chr19	50000000	50050000	.
+chr19_gl000208_random	0	50000	.
+chr19_gl000209_random	0	50000	.
+chr1_gl000191_random	0	50000	.
+chr1_gl000192_random	0	50000	.
+chr2	0	50000	.
+chr2	50000000	50050000	.
+chr2	100000000	100050000	.
+chr2	150000000	150050000	.
+chr2	200000000	200050000	.
+chr20	0	50000	.
+chr20	50000000	50050000	.
+chr21	0	50000	.
+chr21_gl000210_random	0	27682	.
+chr22	0	50000	.
+chr22	50000000	50050000	.
+chr3	0	50000	.
+chr3	50000000	50050000	.
+chr3	100000000	100050000	.
+chr3	150000000	150050000	.
+chr4	0	50000	.
+chr4	50000000	50050000	.
+chr4	100000000	100050000	.
+chr4	150000000	150050000	.
+chr4_ctg9_hap1	0	50000	.
+chr4_gl000193_random	0	50000	.
+chr4_gl000194_random	0	50000	.
+chr5	0	50000	.
+chr5	50000000	50050000	.
+chr5	100000000	100050000	.
+chr5	150000000	150050000	.
+chr6	0	50000	.
+chr6	50000000	50050000	.
+chr6	100000000	100050000	.
+chr6	150000000	150050000	.
+chr6_apd_hap1	0	50000	.
+chr6_cox_hap2	0	50000	.
+chr6_dbb_hap3	0	50000	.
+chr6_mann_hap4	0	50000	.
+chr6_mcf_hap5	0	50000	.
+chr6_qbl_hap6	0	50000	.
+chr6_ssto_hap7	0	50000	.
+chr7	0	50000	.
+chr7	50000000	50050000	.
+chr7	100000000	100050000	.
+chr7	150000000	150050000	.
+chr7_gl000195_random	0	50000	.
+chr8	0	50000	.
+chr8	50000000	50050000	.
+chr8	100000000	100050000	.
+chr8_gl000196_random	0	38914	.
+chr8_gl000197_random	0	37175	.
+chr9	0	50000	.
+chr9	50000000	50050000	.
+chr9	100000000	100050000	.
+chr9_gl000198_random	0	50000	.
+chr9_gl000199_random	0	50000	.
+chr9_gl000200_random	0	50000	.
+chr9_gl000201_random	0	36148	.
+chrM	0	16571	.
+chrUn_gl000211	0	50000	.
+chrUn_gl000212	0	50000	.
+chrUn_gl000213	0	50000	.
+chrUn_gl000214	0	50000	.
+chrUn_gl000215	0	50000	.
+chrUn_gl000216	0	50000	.
+chrUn_gl000217	0	50000	.
+chrUn_gl000218	0	50000	.
+chrUn_gl000219	0	50000	.
+chrUn_gl000220	0	50000	.
+chrUn_gl000221	0	50000	.
+chrUn_gl000222	0	50000	.
+chrUn_gl000223	0	50000	.
+chrUn_gl000224	0	50000	.
+chrUn_gl000225	0	50000	.
+chrUn_gl000226	0	15008	.
+chrUn_gl000227	0	50000	.
+chrUn_gl000228	0	50000	.
+chrUn_gl000229	0	19913	.
+chrUn_gl000230	0	43691	.
+chrUn_gl000231	0	27386	.
+chrUn_gl000232	0	40652	.
+chrUn_gl000233	0	45941	.
+chrUn_gl000234	0	40531	.
+chrUn_gl000235	0	34474	.
+chrUn_gl000236	0	41934	.
+chrUn_gl000237	0	45867	.
+chrUn_gl000238	0	39939	.
+chrUn_gl000239	0	33824	.
+chrUn_gl000240	0	41933	.
+chrUn_gl000241	0	42152	.
+chrUn_gl000242	0	43523	.
+chrUn_gl000243	0	43341	.
+chrUn_gl000244	0	39929	.
+chrUn_gl000245	0	36651	.
+chrUn_gl000246	0	38154	.
+chrUn_gl000247	0	36422	.
+chrUn_gl000248	0	39786	.
+chrUn_gl000249	0	38502	.
+chrX	0	50000	.
+chrX	50000000	50050000	.
+chrX	100000000	100050000	.
+chrX	150000000	150050000	.
+chrY	0	50000	.
+chrY	50000000	50050000	.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini-config.yaml	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,15 @@
+annotation_dir: gemini/data
+versions:
+  ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz: 4
+  ESP6500SI.all.snps_indels.tidy.v2.vcf.gz: 2
+  ExAC.r0.3.sites.vep.tidy.vcf.gz: 4
+  GRCh37-gms-mappability.vcf.gz: 2
+  clinvar_20170130.tidy.vcf.gz: 5
+  cosmic-v68-GRCh37.tidy.vcf.gz: 3
+  dbsnp.b147.20160601.tidy.vcf.gz: 1
+  detailed_gene_table_v75: 2
+  geno2mp.variants.tidy.vcf.gz: 1
+  gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz: 2
+  hg19.rmsk.bed.gz: 2
+  summary_gene_table_v75: 2
+  whole_genome_SNVs.tsv.compressed.gz: 2
Binary file test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz has changed
Binary file test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz has changed
Binary file test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz has changed
Binary file test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz has changed
Binary file test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz has changed
Binary file test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz has changed
Binary file test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz.tbi has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/cancer_gene_census.20140120.tsv	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,19 @@
+ARHH	"RAS homolog gene family, member H (TTF)"	399	4	4p13 	yes		NHL			L	Dom	T	BCL6							
+BCL5	B-cell CLL/lymphoma 5	603	17	17q22 	yes		CLL			L	Dom	T	MYC							
+BCL6	B-cell CLL/lymphoma 6	604	3	3q27 	yes		"NHL, CLL"			L	Dom	"T, Mis"	"IG loci, ZNFN1A1, LCP1, PIM1, TFRC, CIITA, NACA, HSPCB, HSPCA, HIST1H4I, IL21R,  POU2AF1, ARHH, EIF4A2, SFRS3"							
+BCOR	BCL6 corepressor	54880	X	Xp11.4	yes		"retinoblastoma, AML, APL (translocation)"				Rec	"F, N, S, T"	RARA	yes	oculo-facio-cardio-dental genetic					
+CIITA	"class II, major histocompatibility complex, transactivator"	4261	16	16p13	yes		"PMBL, Hodgkin lymphoma"			L	Dom	T	"FLJ27352, CD274, CD273, RALGDS, RUNDC2A, C16orf75, BCL6"							
+EIF4A2	"eukaryotic translation initiation factor 4A, isoform 2"	1974	3	3q27.3	yes		NHL			L	Dom	T	BCL6							
+HIST1H4I	"histone 1, H4i (H4FM)"	8294	6	6p21.3 	yes		NHL			L	Dom	T	BCL6							
+HSPCA	"heat shock 90kDa protein 1, alpha"	3320	14	14q32.31	yes		NHL			L	Dom	T	BCL6							
+HSPCB	"heat shock 90kDa protein 1, beta"	3326	6	6p12	yes		NHL			L	Dom	T	BCL6							
+IGH@	immunoglobulin heavy locus	3492	14	14q32.33	yes		"MM, Burkitt lymphoma, NHL, CLL, B-ALL, MALT, MLCLS"			L	Dom	T	"MYC, FGFR3,PAX5, IRTA1, IRF4, CCND1, BCL9, BCL8, BCL6, BCL2, BCL3, BCL10, BCL11A. LHX4, DDX6, NFKB2, PAFAH1B2, PCSK7, CRLF2"							
+IKZF1	IKAROS family zinc finger 1	10320	7	7p12.2	yes		"ALL, DLBCL"			L	"Rec,Dom"	"D,T"	BCL6							
+IL21R	interleukin 21 receptor	50615	16	16p11 	yes		NHL			L	Dom	T	BCL6							
+LCP1	lymphocyte cytosolic protein 1 (L-plastin)	3936	13	 13q14.1-q14.3 	yes		NHL 			L	Dom	T	BCL6							
+MYC	v-myc myelocytomatosis viral oncogene homolog (avian)	4609	8	8q24.12-q24.13 	yes		"Burkitt lymphoma, amplified in other cancers, B-CLL"			"L, E"	Dom	"A, T"	"IGK@, BCL5, BCL7A , BTG1, TRA@, IGH@"							
+NACA	nascent-polypeptide-associated complex alpha polypeptide	4666	12	12q23-q24.1 	yes		NHL			L	Dom	T	BCL6							
+PIM1	pim-1 oncogene	5292	6	6p21.2 	yes		NHL			L	Dom	T	BCL6							
+POU2AF1	"POU domain, class 2, associating factor 1 (OBF1)"	5450	11	11q23.1 	yes		NHL			L	Dom	T	BCL6							
+SFRS3	"splicing factor, arginine/serine-rich 3"	6428	6	6p21	yes		follicular lymphoma			L	Dom	T	BCL6							
+TFRC	"transferrin receptor (p90, CD71)"	7037	3	3q29 	yes		NHL			L	Dom	T	BCL6							
Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz has changed
Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz has changed
Binary file test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz has changed
Binary file test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz has changed
Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/detailed_gene_table_v75	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,114 @@
+Chromosome	Gene_name	Is_hgnc	Ensembl_gene_id	Ensembl_transcript_id	Biotype	Transcript_status	CCDS_id	HGNC_id	CDS_length	Protein_length	Transcript_start	Transcript_end	strand	Synonyms	Rvis_pct	entrez_gene_id	mammalian_phenotype_id
+chr3	None	0	ENSG00000239093	ENST00000459452	snoRNA	KNOWN	None	None	None	None	187141103	187141207	1	None	None	None	None
+chr3	None	0	ENSG00000228952	ENST00000440726	lincRNA	KNOWN	None	None	None	None	187166633	187167238	1	None	None	None	None
+chr3	None	0	ENSG00000223401	ENST00000450760	lincRNA	KNOWN	None	None	None	None	187461474	187463208	1	None	None	None	None
+chr3	MASP	0	ENSG00000127241	ENST00000337774	protein_coding	KNOWN	CCDS33907	None	2100	699	186935942	187009810	-1	PRSS5,MASP1,CRARF	16.8141071	5648	None
+chr3	PRSS5	0	ENSG00000127241	ENST00000337774	protein_coding	KNOWN	CCDS33907	None	2100	699	186935942	187009810	-1	MASP1,CRARF,MASP	16.8141071	5648	None
+chr3	MASP1	1	ENSG00000127241	ENST00000337774	protein_coding	KNOWN	CCDS33907	6901	2100	699	186935942	187009810	-1	PRSS5,CRARF,MASP	16.8141071	5648	None
+chr3	CRARF	0	ENSG00000127241	ENST00000337774	protein_coding	KNOWN	CCDS33907	None	2100	699	186935942	187009810	-1	PRSS5,MASP1,MASP	16.8141071	5648	None
+chr3	MASP	0	ENSG00000127241	ENST00000296280	protein_coding	KNOWN	CCDS33908	None	2187	728	186951870	187009646	-1	PRSS5,MASP1,CRARF	16.8141071	5648	None
+chr3	PRSS5	0	ENSG00000127241	ENST00000296280	protein_coding	KNOWN	CCDS33908	None	2187	728	186951870	187009646	-1	MASP1,CRARF,MASP	16.8141071	5648	None
+chr3	MASP1	1	ENSG00000127241	ENST00000296280	protein_coding	KNOWN	CCDS33908	6901	2187	728	186951870	187009646	-1	PRSS5,CRARF,MASP	16.8141071	5648	None
+chr3	CRARF	0	ENSG00000127241	ENST00000296280	protein_coding	KNOWN	CCDS33908	None	2187	728	186951870	187009646	-1	PRSS5,MASP1,MASP	16.8141071	5648	None
+chr3	MASP	0	ENSG00000127241	ENST00000392472	protein_coding	PUTATIVE	None	None	1848	615	186951872	187009765	-1	PRSS5,MASP1,CRARF	16.8141071	5648	None
+chr3	PRSS5	0	ENSG00000127241	ENST00000392472	protein_coding	PUTATIVE	None	None	1848	615	186951872	187009765	-1	MASP1,CRARF,MASP	16.8141071	5648	None
+chr3	MASP1	1	ENSG00000127241	ENST00000392472	protein_coding	PUTATIVE	None	6901	1848	615	186951872	187009765	-1	PRSS5,CRARF,MASP	16.8141071	5648	None
+chr3	CRARF	0	ENSG00000127241	ENST00000392472	protein_coding	PUTATIVE	None	None	1848	615	186951872	187009765	-1	PRSS5,MASP1,MASP	16.8141071	5648	None
+chr3	MASP	0	ENSG00000127241	ENST00000495249	processed_transcript	PUTATIVE	None	None	None	None	186953655	187009542	-1	PRSS5,MASP1,CRARF	16.8141071	5648	None
+chr3	PRSS5	0	ENSG00000127241	ENST00000495249	processed_transcript	PUTATIVE	None	None	None	None	186953655	187009542	-1	MASP1,CRARF,MASP	16.8141071	5648	None
+chr3	MASP1	1	ENSG00000127241	ENST00000495249	processed_transcript	PUTATIVE	None	6901	None	None	186953655	187009542	-1	PRSS5,CRARF,MASP	16.8141071	5648	None
+chr3	CRARF	0	ENSG00000127241	ENST00000495249	processed_transcript	PUTATIVE	None	None	None	None	186953655	187009542	-1	PRSS5,MASP1,MASP	16.8141071	5648	None
+chr3	MASP	0	ENSG00000127241	ENST00000169293	protein_coding	KNOWN	CCDS33909	None	1143	380	186964149	187009745	-1	PRSS5,MASP1,CRARF	16.8141071	5648	None
+chr3	PRSS5	0	ENSG00000127241	ENST00000169293	protein_coding	KNOWN	CCDS33909	None	1143	380	186964149	187009745	-1	MASP1,CRARF,MASP	16.8141071	5648	None
+chr3	MASP1	1	ENSG00000127241	ENST00000169293	protein_coding	KNOWN	CCDS33909	6901	1143	380	186964149	187009745	-1	PRSS5,CRARF,MASP	16.8141071	5648	None
+chr3	CRARF	0	ENSG00000127241	ENST00000169293	protein_coding	KNOWN	CCDS33909	None	1143	380	186964149	187009745	-1	PRSS5,MASP1,MASP	16.8141071	5648	None
+chr3	MASP	0	ENSG00000127241	ENST00000392470	protein_coding	PUTATIVE	None	None	1065	354	186964947	187009670	-1	PRSS5,MASP1,CRARF	16.8141071	5648	None
+chr3	PRSS5	0	ENSG00000127241	ENST00000392470	protein_coding	PUTATIVE	None	None	1065	354	186964947	187009670	-1	MASP1,CRARF,MASP	16.8141071	5648	None
+chr3	MASP1	1	ENSG00000127241	ENST00000392470	protein_coding	PUTATIVE	None	6901	1065	354	186964947	187009670	-1	PRSS5,CRARF,MASP	16.8141071	5648	None
+chr3	CRARF	0	ENSG00000127241	ENST00000392470	protein_coding	PUTATIVE	None	None	1065	354	186964947	187009670	-1	PRSS5,MASP1,MASP	16.8141071	5648	None
+chr3	MASP	0	ENSG00000127241	ENST00000460839	retained_intron	KNOWN	None	None	None	None	186974373	187003796	-1	PRSS5,MASP1,CRARF	16.8141071	5648	None
+chr3	PRSS5	0	ENSG00000127241	ENST00000460839	retained_intron	KNOWN	None	None	None	None	186974373	187003796	-1	MASP1,CRARF,MASP	16.8141071	5648	None
+chr3	MASP1	1	ENSG00000127241	ENST00000460839	retained_intron	KNOWN	None	6901	None	None	186974373	187003796	-1	PRSS5,CRARF,MASP	16.8141071	5648	None
+chr3	CRARF	0	ENSG00000127241	ENST00000460839	retained_intron	KNOWN	None	None	None	None	186974373	187003796	-1	PRSS5,MASP1,MASP	16.8141071	5648	None
+chr3	MASP	0	ENSG00000127241	ENST00000392475	protein_coding	NOVEL	None	None	614	203	186974603	187009768	-1	PRSS5,MASP1,CRARF	16.8141071	5648	None
+chr3	PRSS5	0	ENSG00000127241	ENST00000392475	protein_coding	NOVEL	None	None	614	203	186974603	187009768	-1	MASP1,CRARF,MASP	16.8141071	5648	None
+chr3	MASP1	1	ENSG00000127241	ENST00000392475	protein_coding	NOVEL	None	6901	614	203	186974603	187009768	-1	PRSS5,CRARF,MASP	16.8141071	5648	None
+chr3	CRARF	0	ENSG00000127241	ENST00000392475	protein_coding	NOVEL	None	None	614	203	186974603	187009768	-1	PRSS5,MASP1,MASP	16.8141071	5648	None
+chr3	MASP	0	ENSG00000127241	ENST00000439271	protein_coding	PUTATIVE	None	None	355	117	186980469	187009746	-1	PRSS5,MASP1,CRARF	16.8141071	5648	None
+chr3	PRSS5	0	ENSG00000127241	ENST00000439271	protein_coding	PUTATIVE	None	None	355	117	186980469	187009746	-1	MASP1,CRARF,MASP	16.8141071	5648	None
+chr3	MASP1	1	ENSG00000127241	ENST00000439271	protein_coding	PUTATIVE	None	6901	355	117	186980469	187009746	-1	PRSS5,CRARF,MASP	16.8141071	5648	None
+chr3	CRARF	0	ENSG00000127241	ENST00000439271	protein_coding	PUTATIVE	None	None	355	117	186980469	187009746	-1	PRSS5,MASP1,MASP	16.8141071	5648	None
+chr3	MASP	0	ENSG00000127241	ENST00000425937	protein_coding	PUTATIVE	None	None	166	54	186980502	187009485	-1	PRSS5,MASP1,CRARF	16.8141071	5648	None
+chr3	PRSS5	0	ENSG00000127241	ENST00000425937	protein_coding	PUTATIVE	None	None	166	54	186980502	187009485	-1	MASP1,CRARF,MASP	16.8141071	5648	None
+chr3	MASP1	1	ENSG00000127241	ENST00000425937	protein_coding	PUTATIVE	None	6901	166	54	186980502	187009485	-1	PRSS5,CRARF,MASP	16.8141071	5648	None
+chr3	CRARF	0	ENSG00000127241	ENST00000425937	protein_coding	PUTATIVE	None	None	166	54	186980502	187009485	-1	PRSS5,MASP1,MASP	16.8141071	5648	None
+chr3	IFRG28	0	ENSG00000136514	ENST00000259030	protein_coding	KNOWN	CCDS33910	None	741	246	187086120	187089864	1	RTP4,Z3CXXC4	94.35008257	64108	None
+chr3	RTP4	1	ENSG00000136514	ENST00000259030	protein_coding	KNOWN	CCDS33910	23992	741	246	187086120	187089864	1	IFRG28,Z3CXXC4	94.35008257	64108	None
+chr3	Z3CXXC4	0	ENSG00000136514	ENST00000259030	protein_coding	KNOWN	CCDS33910	None	741	246	187086120	187089864	1	IFRG28,RTP4	94.35008257	64108	None
+chr3	SST	1	ENSG00000157005	ENST00000287641	protein_coding	KNOWN	CCDS3288	11329	351	116	187386694	187388187	-1	SMST	78.16112291	6750	MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378
+chr3	SMST	0	ENSG00000157005	ENST00000287641	protein_coding	KNOWN	CCDS3288	None	351	116	187386694	187388187	-1	SST	78.16112291	6750	MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378
+chr3	Z3CXXC2	0	ENSG00000198471	ENST00000358241	protein_coding	KNOWN	CCDS33911	None	678	225	187416047	187420345	-1	RTP2,MGC78665	69.20853975	344892	MP:0005389
+chr3	RTP2	1	ENSG00000198471	ENST00000358241	protein_coding	KNOWN	CCDS33911	32486	678	225	187416047	187420345	-1	Z3CXXC2,MGC78665	69.20853975	344892	MP:0005389
+chr3	MGC78665	0	ENSG00000198471	ENST00000358241	protein_coding	KNOWN	CCDS33911	None	678	225	187416047	187420345	-1	Z3CXXC2,RTP2	69.20853975	344892	MP:0005389
+chr3	None	0	ENSG00000228804	ENST00000449623	protein_coding	PUTATIVE	None	None	390	129	187420101	187451637	1	None	None	None	None
+chr3	None	0	ENSG00000228804	ENST00000437407	protein_coding	PUTATIVE	None	None	153	50	187420154	187450203	1	None	None	None	None
+chr3	ZNF51	0	ENSG00000113916	ENST00000406870	protein_coding	KNOWN	CCDS3289	None	2121	706	187439165	187463515	-1	LAZ3,BCL5,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	LAZ3	0	ENSG00000113916	ENST00000406870	protein_coding	KNOWN	CCDS3289	None	2121	706	187439165	187463515	-1	BCL5,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL5	0	ENSG00000113916	ENST00000406870	protein_coding	KNOWN	CCDS3289	None	2121	706	187439165	187463515	-1	LAZ3,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6A	0	ENSG00000113916	ENST00000406870	protein_coding	KNOWN	CCDS3289	None	2121	706	187439165	187463515	-1	LAZ3,ZNF51,BCL6,BCL5,ZBTB27	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6	1	ENSG00000113916	ENST00000406870	protein_coding	KNOWN	CCDS3289	1001	2121	706	187439165	187463515	-1	LAZ3,ZNF51,ZBTB27,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZBTB27	0	ENSG00000113916	ENST00000406870	protein_coding	KNOWN	CCDS3289	None	2121	706	187439165	187463515	-1	LAZ3,ZNF51,BCL6,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZNF51	0	ENSG00000113916	ENST00000419510	nonsense_mediated_decay	KNOWN	None	None	168	55	187439175	187454876	-1	LAZ3,BCL5,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	LAZ3	0	ENSG00000113916	ENST00000419510	nonsense_mediated_decay	KNOWN	None	None	168	55	187439175	187454876	-1	BCL5,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL5	0	ENSG00000113916	ENST00000419510	nonsense_mediated_decay	KNOWN	None	None	168	55	187439175	187454876	-1	LAZ3,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6A	0	ENSG00000113916	ENST00000419510	nonsense_mediated_decay	KNOWN	None	None	168	55	187439175	187454876	-1	LAZ3,ZNF51,BCL6,BCL5,ZBTB27	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6	1	ENSG00000113916	ENST00000419510	nonsense_mediated_decay	KNOWN	None	1001	168	55	187439175	187454876	-1	LAZ3,ZNF51,ZBTB27,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZBTB27	0	ENSG00000113916	ENST00000419510	nonsense_mediated_decay	KNOWN	None	None	168	55	187439175	187454876	-1	LAZ3,ZNF51,BCL6,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZNF51	0	ENSG00000113916	ENST00000232014	protein_coding	KNOWN	CCDS3289	None	2121	706	187440186	187454357	-1	LAZ3,BCL5,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	LAZ3	0	ENSG00000113916	ENST00000232014	protein_coding	KNOWN	CCDS3289	None	2121	706	187440186	187454357	-1	BCL5,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL5	0	ENSG00000113916	ENST00000232014	protein_coding	KNOWN	CCDS3289	None	2121	706	187440186	187454357	-1	LAZ3,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6A	0	ENSG00000113916	ENST00000232014	protein_coding	KNOWN	CCDS3289	None	2121	706	187440186	187454357	-1	LAZ3,ZNF51,BCL6,BCL5,ZBTB27	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6	1	ENSG00000113916	ENST00000232014	protein_coding	KNOWN	CCDS3289	1001	2121	706	187440186	187454357	-1	LAZ3,ZNF51,ZBTB27,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZBTB27	0	ENSG00000113916	ENST00000232014	protein_coding	KNOWN	CCDS3289	None	2121	706	187440186	187454357	-1	LAZ3,ZNF51,BCL6,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZNF51	0	ENSG00000113916	ENST00000450123	protein_coding	NOVEL	CCDS46975	None	1953	650	187440220	187452670	-1	LAZ3,BCL5,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	LAZ3	0	ENSG00000113916	ENST00000450123	protein_coding	NOVEL	CCDS46975	None	1953	650	187440220	187452670	-1	BCL5,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL5	0	ENSG00000113916	ENST00000450123	protein_coding	NOVEL	CCDS46975	None	1953	650	187440220	187452670	-1	LAZ3,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6A	0	ENSG00000113916	ENST00000450123	protein_coding	NOVEL	CCDS46975	None	1953	650	187440220	187452670	-1	LAZ3,ZNF51,BCL6,BCL5,ZBTB27	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6	1	ENSG00000113916	ENST00000450123	protein_coding	NOVEL	CCDS46975	1001	1953	650	187440220	187452670	-1	LAZ3,ZNF51,ZBTB27,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZBTB27	0	ENSG00000113916	ENST00000450123	protein_coding	NOVEL	CCDS46975	None	1953	650	187440220	187452670	-1	LAZ3,ZNF51,BCL6,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZNF51	0	ENSG00000113916	ENST00000479110	retained_intron	KNOWN	None	None	None	None	187442357	187443411	-1	LAZ3,BCL5,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	LAZ3	0	ENSG00000113916	ENST00000479110	retained_intron	KNOWN	None	None	None	None	187442357	187443411	-1	BCL5,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL5	0	ENSG00000113916	ENST00000479110	retained_intron	KNOWN	None	None	None	None	187442357	187443411	-1	LAZ3,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6A	0	ENSG00000113916	ENST00000479110	retained_intron	KNOWN	None	None	None	None	187442357	187443411	-1	LAZ3,ZNF51,BCL6,BCL5,ZBTB27	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6	1	ENSG00000113916	ENST00000479110	retained_intron	KNOWN	None	1001	None	None	187442357	187443411	-1	LAZ3,ZNF51,ZBTB27,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZBTB27	0	ENSG00000113916	ENST00000479110	retained_intron	KNOWN	None	None	None	None	187442357	187443411	-1	LAZ3,ZNF51,BCL6,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZNF51	0	ENSG00000113916	ENST00000430339	protein_coding	KNOWN	None	None	365	120	187449515	187452735	-1	LAZ3,BCL5,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	LAZ3	0	ENSG00000113916	ENST00000430339	protein_coding	KNOWN	None	None	365	120	187449515	187452735	-1	BCL5,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL5	0	ENSG00000113916	ENST00000430339	protein_coding	KNOWN	None	None	365	120	187449515	187452735	-1	LAZ3,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6A	0	ENSG00000113916	ENST00000430339	protein_coding	KNOWN	None	None	365	120	187449515	187452735	-1	LAZ3,ZNF51,BCL6,BCL5,ZBTB27	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6	1	ENSG00000113916	ENST00000430339	protein_coding	KNOWN	None	1001	365	120	187449515	187452735	-1	LAZ3,ZNF51,ZBTB27,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZBTB27	0	ENSG00000113916	ENST00000430339	protein_coding	KNOWN	None	None	365	120	187449515	187452735	-1	LAZ3,ZNF51,BCL6,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZNF51	0	ENSG00000113916	ENST00000480458	processed_transcript	KNOWN	None	None	None	None	187449553	187463225	-1	LAZ3,BCL5,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	LAZ3	0	ENSG00000113916	ENST00000480458	processed_transcript	KNOWN	None	None	None	None	187449553	187463225	-1	BCL5,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL5	0	ENSG00000113916	ENST00000480458	processed_transcript	KNOWN	None	None	None	None	187449553	187463225	-1	LAZ3,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6A	0	ENSG00000113916	ENST00000480458	processed_transcript	KNOWN	None	None	None	None	187449553	187463225	-1	LAZ3,ZNF51,BCL6,BCL5,ZBTB27	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6	1	ENSG00000113916	ENST00000480458	processed_transcript	KNOWN	None	1001	None	None	187449553	187463225	-1	LAZ3,ZNF51,ZBTB27,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZBTB27	0	ENSG00000113916	ENST00000480458	processed_transcript	KNOWN	None	None	None	None	187449553	187463225	-1	LAZ3,ZNF51,BCL6,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZNF51	0	ENSG00000113916	ENST00000438077	protein_coding	KNOWN	None	None	312	103	187449568	187455732	-1	LAZ3,BCL5,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	LAZ3	0	ENSG00000113916	ENST00000438077	protein_coding	KNOWN	None	None	312	103	187449568	187455732	-1	BCL5,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL5	0	ENSG00000113916	ENST00000438077	protein_coding	KNOWN	None	None	312	103	187449568	187455732	-1	LAZ3,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6A	0	ENSG00000113916	ENST00000438077	protein_coding	KNOWN	None	None	312	103	187449568	187455732	-1	LAZ3,ZNF51,BCL6,BCL5,ZBTB27	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6	1	ENSG00000113916	ENST00000438077	protein_coding	KNOWN	None	1001	312	103	187449568	187455732	-1	LAZ3,ZNF51,ZBTB27,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZBTB27	0	ENSG00000113916	ENST00000438077	protein_coding	KNOWN	None	None	312	103	187449568	187455732	-1	LAZ3,ZNF51,BCL6,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZNF51	0	ENSG00000113916	ENST00000470319	retained_intron	KNOWN	None	None	None	None	187452233	187463260	-1	LAZ3,BCL5,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	LAZ3	0	ENSG00000113916	ENST00000470319	retained_intron	KNOWN	None	None	None	None	187452233	187463260	-1	BCL5,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL5	0	ENSG00000113916	ENST00000470319	retained_intron	KNOWN	None	None	None	None	187452233	187463260	-1	LAZ3,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6A	0	ENSG00000113916	ENST00000470319	retained_intron	KNOWN	None	None	None	None	187452233	187463260	-1	LAZ3,ZNF51,BCL6,BCL5,ZBTB27	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6	1	ENSG00000113916	ENST00000470319	retained_intron	KNOWN	None	1001	None	None	187452233	187463260	-1	LAZ3,ZNF51,ZBTB27,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZBTB27	0	ENSG00000113916	ENST00000470319	retained_intron	KNOWN	None	None	None	None	187452233	187463260	-1	LAZ3,ZNF51,BCL6,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZNF51	0	ENSG00000113916	ENST00000496823	processed_transcript	PUTATIVE	None	None	None	None	187453975	187463247	-1	LAZ3,BCL5,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	LAZ3	0	ENSG00000113916	ENST00000496823	processed_transcript	PUTATIVE	None	None	None	None	187453975	187463247	-1	BCL5,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL5	0	ENSG00000113916	ENST00000496823	processed_transcript	PUTATIVE	None	None	None	None	187453975	187463247	-1	LAZ3,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6A	0	ENSG00000113916	ENST00000496823	processed_transcript	PUTATIVE	None	None	None	None	187453975	187463247	-1	LAZ3,ZNF51,BCL6,BCL5,ZBTB27	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6	1	ENSG00000113916	ENST00000496823	processed_transcript	PUTATIVE	None	1001	None	None	187453975	187463247	-1	LAZ3,ZNF51,ZBTB27,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZBTB27	0	ENSG00000113916	ENST00000496823	processed_transcript	PUTATIVE	None	None	None	None	187453975	187463247	-1	LAZ3,ZNF51,BCL6,BCL5,BCL6A	23.5727766	604	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
Binary file test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz has changed
Binary file test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz has changed
Binary file test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz has changed
Binary file test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz has changed
Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/hg19.CpG.bed.gz has changed
Binary file test-data/test-cache/gemini/data/hg19.CpG.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/hg19.cytoband.bed.gz has changed
Binary file test-data/test-cache/gemini/data/hg19.cytoband.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/hg19.dgv.bed.gz has changed
Binary file test-data/test-cache/gemini/data/hg19.dgv.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz has changed
Binary file test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/hg19.gwas.bed.gz has changed
Binary file test-data/test-cache/gemini/data/hg19.gwas.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz has changed
Binary file test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/hg19.rmsk.bed.gz has changed
Binary file test-data/test-cache/gemini/data/hg19.rmsk.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/hg19.segdup.bed.gz has changed
Binary file test-data/test-cache/gemini/data/hg19.segdup.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz has changed
Binary file test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz has changed
Binary file test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz.tbi has changed
Binary file test-data/test-cache/gemini/data/hprd_interaction_edges.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl66	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,30 @@
+B8PSA7	BCL6	BCL6	ENSG00000113916	ENST00000450123	None	None
+C9J1C7	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JCS5	BCL6	BCL6	ENSG00000113916	ENST00000430339	None	None
+C9JCS5	BCL6	BCL6	ENSG00000113916	ENST00000450123	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000430339	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000438077	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000450123	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000169293	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000296280	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000425937	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JMA2	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+F5H2J0	MASP1	MASP1	ENSG00000127241	ENST00000541896	None	None
+F8W876	MASP1	MASP1	ENSG00000127241	ENST00000169293	None	None
+F8W876	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000232014	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000406870	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04971;Gastric_acid_secretion
+Q5HYM1	MASP1	MASP1	ENSG00000127241	ENST00000541896	None	None
+Q5QGT7	RTP2	RTP2	ENSG00000198471	ENST00000358241	hsa:344892	None
+Q96DX8	RTP4	RTP4	ENSG00000136514	ENST00000259030	hsa:64108	None
+Q9NSY8	MASP1	MASP1	ENSG00000127241	ENST00000296280	None	None
+Q9NSY8	MASP1	MASP1	ENSG00000127241	ENST00000392472	None	None
+Q9NSY8	MASP1	MASP1	ENSG00000127241	ENST00000541811	None	None
+none	BCL6	BCL6	ENSG00000113916	ENST00000419510	None	None
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl67	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,30 @@
+B8PSA7	BCL6	BCL6	ENSG00000113916	ENST00000450123	None	None
+C9J1C7	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JCS5	BCL6	BCL6	ENSG00000113916	ENST00000430339	None	None
+C9JCS5	BCL6	BCL6	ENSG00000113916	ENST00000450123	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000430339	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000438077	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000450123	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000169293	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000296280	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000425937	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JMA2	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+F5H2J0	MASP1	MASP1	ENSG00000127241	ENST00000541896	None	None
+F8W876	MASP1	MASP1	ENSG00000127241	ENST00000169293	None	None
+F8W876	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000232014	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000406870	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04971;Gastric_acid_secretion
+Q5HYM1	MASP1	MASP1	ENSG00000127241	ENST00000541896	None	None
+Q5QGT7	RTP2	RTP2	ENSG00000198471	ENST00000358241	hsa:344892	None
+Q96DX8	RTP4	RTP4	ENSG00000136514	ENST00000259030	hsa:64108	None
+Q9NSY8	MASP1	MASP1	ENSG00000127241	ENST00000296280	None	None
+Q9NSY8	MASP1	MASP1	ENSG00000127241	ENST00000392472	None	None
+Q9NSY8	MASP1	MASP1	ENSG00000127241	ENST00000541811	None	None
+none	BCL6	BCL6	ENSG00000113916	ENST00000419510	None	None
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl68	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,26 @@
+C9J1C7	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JCS5	BCL6	BCL6	ENSG00000113916	ENST00000430339	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000430339	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000438077	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000425937	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JMA2	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+F8W876	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000232014	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000406870	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000450123	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000169293	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000169293	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000296280	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000296280	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000392472	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000392472	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04971;Gastric_acid_secretion
+Q5QGT7	RTP2	RTP2	ENSG00000198471	ENST00000358241	hsa:344892	None
+Q96DX8	RTP4	RTP4	ENSG00000136514	ENST00000259030	hsa:64108	None
+none	BCL6	BCL6	ENSG00000113916	ENST00000419510	None	None
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl69	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,26 @@
+C9J1C7	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JCS5	BCL6	BCL6	ENSG00000113916	ENST00000430339	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000430339	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000438077	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000425937	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JMA2	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+F8W876	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000232014	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000406870	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000450123	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000169293	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000169293	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000296280	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000296280	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000392472	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000392472	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04971;Gastric_acid_secretion
+Q5QGT7	RTP2	RTP2	ENSG00000198471	ENST00000358241	hsa:344892	None
+Q96DX8	RTP4	RTP4	ENSG00000136514	ENST00000259030	hsa:64108	None
+none	BCL6	BCL6	ENSG00000113916	ENST00000419510	None	None
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl70	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,25 @@
+C9J1C7	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000430339	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000438077	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000425937	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JMA2	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+F8W876	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000232014	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000406870	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000450123	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000169293	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000169293	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000296280	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000296280	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000392472	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000392472	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04971;Gastric_acid_secretion
+Q5QGT7	RTP2	RTP2	ENSG00000198471	ENST00000358241	hsa:344892	None
+Q96DX8	RTP4	RTP4	ENSG00000136514	ENST00000259030	hsa:64108	None
+none	BCL6	BCL6	ENSG00000113916	ENST00000419510	None	None
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl71	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,26 @@
+C9J1C7	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JCS5	BCL6	BCL6	ENSG00000113916	ENST00000430339	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000430339	None	None
+C9JL16	BCL6	BCL6	ENSG00000113916	ENST00000438077	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000425937	None	None
+C9JLU5	MASP1	MASP1	ENSG00000127241	ENST00000439271	None	None
+C9JMA2	MASP1	MASP1	ENSG00000127241	ENST00000392475	None	None
+F8W876	MASP1	MASP1	ENSG00000127241	ENST00000392470	None	None
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000232014	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000406870	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182	BCL6	BCL6	ENSG00000113916	ENST00000450123	hsa:604	path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000169293	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000169293	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000296280	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000296280	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000337774	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000392472	hsa:5648	path:hsa04610;Complement_and_coagulation_cascades
+P48740	MASP1	MASP1	ENSG00000127241	ENST00000392472	hsa:5648	path:hsa05150;Staphylococcus_aureus_infection
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278	SST	SST	ENSG00000157005	ENST00000287641	hsa:6750	path:hsa04971;Gastric_acid_secretion
+Q5QGT7	RTP2	RTP2	ENSG00000198471	ENST00000358241	hsa:344892	None
+Q96DX8	RTP4	RTP4	ENSG00000136514	ENST00000259030	hsa:64108	None
+none	BCL6	BCL6	ENSG00000113916	ENST00000419510	None	None
Binary file test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz has changed
Binary file test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz.tbi has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/summary_gene_table_v75	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,23 @@
+Chromosome	Gene_name	Is_hgnc	Ensembl_gene_id	HGNC_id	Synonyms	Rvis_pct	Strand	Transcript_min_start	Transcript_max_end	Mammalian_phenotype_id
+chr3	None	0	ENSG00000239093	None	None	None	1	187141103	187141207	None
+chr3	None	0	ENSG00000228952	None	None	None	1	187166633	187167238	None
+chr3	None	0	ENSG00000223401	None	None	None	1	187461474	187463208	None
+chr3	MASP	0	ENSG00000127241	None	PRSS5,MASP1,CRARF	16.8141071	-1	186935942	187009810	None
+chr3	PRSS5	0	ENSG00000127241	None	MASP1,CRARF,MASP	16.8141071	-1	186935942	187009810	None
+chr3	MASP1	1	ENSG00000127241	6901	PRSS5,CRARF,MASP	16.8141071	-1	186935942	187009810	None
+chr3	CRARF	0	ENSG00000127241	None	PRSS5,MASP1,MASP	16.8141071	-1	186935942	187009810	None
+chr3	IFRG28	0	ENSG00000136514	None	RTP4,Z3CXXC4	94.35008257	1	187086120	187089864	None
+chr3	RTP4	1	ENSG00000136514	23992	IFRG28,Z3CXXC4	94.35008257	1	187086120	187089864	None
+chr3	Z3CXXC4	0	ENSG00000136514	None	IFRG28,RTP4	94.35008257	1	187086120	187089864	None
+chr3	SST	1	ENSG00000157005	11329	SMST	78.16112291	-1	187386694	187388187	MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378
+chr3	SMST	0	ENSG00000157005	None	SST	78.16112291	-1	187386694	187388187	MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378
+chr3	Z3CXXC2	0	ENSG00000198471	None	RTP2,MGC78665	69.20853975	-1	187416047	187420345	MP:0005389
+chr3	RTP2	1	ENSG00000198471	32486	Z3CXXC2,MGC78665	69.20853975	-1	187416047	187420345	MP:0005389
+chr3	MGC78665	0	ENSG00000198471	None	Z3CXXC2,RTP2	69.20853975	-1	187416047	187420345	MP:0005389
+chr3	None	0	ENSG00000228804	None	None	None	1	187420101	187451637	None
+chr3	ZNF51	0	ENSG00000113916	None	LAZ3,BCL5,BCL6,ZBTB27,BCL6A	23.5727766	-1	187439165	187463515	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	LAZ3	0	ENSG00000113916	None	BCL5,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	-1	187439165	187463515	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL5	0	ENSG00000113916	None	LAZ3,ZNF51,BCL6,ZBTB27,BCL6A	23.5727766	-1	187439165	187463515	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6A	0	ENSG00000113916	None	LAZ3,ZNF51,BCL6,BCL5,ZBTB27	23.5727766	-1	187439165	187463515	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	BCL6	1	ENSG00000113916	1001	LAZ3,ZNF51,ZBTB27,BCL5,BCL6A	23.5727766	-1	187439165	187463515	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3	ZBTB27	0	ENSG00000113916	None	LAZ3,ZNF51,BCL6,BCL5,BCL6A	23.5727766	-1	187439165	187463515	MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
Binary file test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz has changed
Binary file test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz.tbi has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/README.rst	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,25 @@
+Prepare Gemini annotation files and test databases for tool tests
+=================================================================
+
+Each version of GEMINI is tied to a particular set of annotation files and
+database version.
+
+The ``build-gemini-testdata.sh`` script in this folder should be used to
+regenerate the annotation files and the test databases whenever the GEMINI
+version required by the tool wrappers gets upgraded.
+
+The script requires a working GEMINI installation at the targeted version and
+a folder with GEMINI's original annotation files, and can be executed with::
+
+  sh build-gemini-testdata.sh path/to/gemini/annotation/files
+  
+It will regenerate the annotation files inside test-data/test-cache/gemini/data
+and rebuild the *.db files in test-data.
+
+.. Note::
+
+   If the version of GEMINI that you are upgrading to uses a gemini-config.yaml
+   file that is different from the one found in test-data/test-cache you will
+   have to upgrade this file manually (make sure you leave the line
+   ``annotation_dir: gemini/data`` unchanged in the process).
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/anno.bed	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,3 @@
+chr3	187000000	187150000
+chr3	187150000	187300000
+chr3	187300000	187450000
Binary file test-data/util/build-data/anno.bed.gz has changed
Binary file test-data/util/build-data/anno.bed.gz.tbi has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/gemini_load_input.vcf	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,70 @@
+##fileformat=VCFv4.1
+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">
+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">
+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">
+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">
+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">
+##ALT=<ID=DEL,Description="Deletion">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">
+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">
+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">
+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">
+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">
+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">
+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">
+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">
+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">
+##reference=GRCh37
+##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+1	10583	rs58108140	G	A	100.0	PASS	AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	10611	rs189107123	C	G	100.0	PASS	AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13302	rs180734498	C	T	100.0	PASS	THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13327	rs144762171	G	C	100.0	PASS	AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13957	.	TC	T	28.0	PASS	AA=TC;AC=35;AN=2184;VT=INDEL;AVGPOST=0.8711;RSQ=0.2501;LDAF=0.0788;THETA=0.0100;ERATE=0.0065;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|)
+1	13980	rs151276478	T	C	100.0	PASS	AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|)
+1	30923	rs140337953	G	T	100.0	PASS	AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73;EFF=DOWNSTREAM(MODIFIER|||||FAM138A|processed_transcript|CODING|ENST00000461467|),DOWNSTREAM(MODIFIER|||||FAM138A|protein_coding|CODING|ENST00000417324|),DOWNSTREAM(MODIFIER|||||MIR1302-10|miRNA|NON_CODING|ENST00000408384|),INTRON(MODIFIER||||177|MIR1302-10|antisense|NON_CODING|ENST00000469289|),INTRON(MODIFIER||||236|MIR1302-10|antisense|NON_CODING|ENST00000473358|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|)
+1	46402	.	C	CTGT	31.0	PASS	AA=.;RSQ=0.0960;AN=2184;AC=8;VT=INDEL;AVGPOST=0.8325;THETA=0.0121;ERATE=0.0072;LDAF=0.0903;AF=0.0037;ASN_AF=0.0017;AFR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	47190	.	G	GA	192.0	PASS	AA=G;AVGPOST=0.9041;AN=2184;AC=29;VT=INDEL;LDAF=0.0628;THETA=0.0153;RSQ=0.2883;ERATE=0.0041;AF=0.01;AMR_AF=0.0028;AFR_AF=0.06;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51476	rs187298206	T	C	100.0	PASS	ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51479	rs116400033	T	A	100.0	PASS	RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51914	rs190452223	T	G	100.0	PASS	ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51935	rs181754315	C	T	100.0	PASS	THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51954	rs185832753	G	C	100.0	PASS	LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52058	rs62637813	G	C	100.0	PASS	AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52144	rs190291950	T	A	100.0	PASS	THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52185	.	TTAA	T	244.0	PASS	AA=.;AN=2184;LDAF=0.0124;VT=INDEL;AC=10;THETA=0.0232;RSQ=0.4271;AVGPOST=0.9840;ERATE=0.0037;AF=0.0046;ASN_AF=0.0035;AMR_AF=0.02;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52238	rs150021059	T	G	100.0	PASS	THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95;EFF=INTERGENIC(MODIFIER|||||||||)
+1	53234	.	CAT	C	227.0	PASS	AA=CAT;AVGPOST=0.9936;AN=2184;VT=INDEL;THETA=0.0119;AC=10;LDAF=0.0074;RSQ=0.6237;ERATE=0.0007;AF=0.0046;AMR_AF=0.0028;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54353	rs140052487	C	A	100.0	PASS	THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54421	rs146477069	A	G	100.0	PASS	ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54490	rs141149254	G	A	100.0	PASS	ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54676	rs2462492	C	T	100.0	PASS	LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54753	rs143174675	T	G	100.0	PASS	AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55164	rs3091274	C	A	100.0	PASS	AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55249	.	C	CTATGG	443.0	PASS	AA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55299	rs10399749	C	T	100.0	PASS	RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55313	rs182462964	A	T	100.0	PASS	ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55326	rs3107975	T	C	100.0	PASS	AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55330	rs185215913	G	A	100.0	PASS	ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55367	rs190850374	G	A	100.0	PASS	ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55388	rs182711216	C	T	100.0	PASS	THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55394	rs2949420	T	A	100.0	PASS	AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55416	rs193242050	G	A	100.0	PASS	AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55427	rs183189405	T	C	100.0	PASS	THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55816	rs187434873	G	A	100.0	PASS	AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55850	rs191890754	C	G	100.0	PASS	AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55852	rs184233019	G	C	100.0	PASS	THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.auto_dom.ped	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,9 @@
+1	1_dad	0	0	-1	1
+1	1_mom	0	0	-1	1
+1	1_kid	1_dad	1_mom	-1	2
+2	2_dad	0	0	-1	1
+2	2_mom	0	0	-1	2
+2	2_kid	2_dad	2_mom	-1	2
+3	3_dad	0	0	-1	2
+3	3_mom	0	0	-1	-9
+3	3_kid	3_dad	3_mom	-1	2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.auto_dom.vcf	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,127 @@
+##fileformat=VCFv4.1
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false"
+##contig=<ID=chr1,length=249250621,assembly=hg19>
+##contig=<ID=chr10,length=135534747,assembly=hg19>
+##contig=<ID=chr11,length=135006516,assembly=hg19>
+##contig=<ID=chr11_gl000202_random,length=40103,assembly=hg19>
+##contig=<ID=chr12,length=133851895,assembly=hg19>
+##contig=<ID=chr13,length=115169878,assembly=hg19>
+##contig=<ID=chr14,length=107349540,assembly=hg19>
+##contig=<ID=chr15,length=102531392,assembly=hg19>
+##contig=<ID=chr16,length=90354753,assembly=hg19>
+##contig=<ID=chr17,length=81195210,assembly=hg19>
+##contig=<ID=chr17_ctg5_hap1,length=1680828,assembly=hg19>
+##contig=<ID=chr17_gl000203_random,length=37498,assembly=hg19>
+##contig=<ID=chr17_gl000204_random,length=81310,assembly=hg19>
+##contig=<ID=chr17_gl000205_random,length=174588,assembly=hg19>
+##contig=<ID=chr17_gl000206_random,length=41001,assembly=hg19>
+##contig=<ID=chr18,length=78077248,assembly=hg19>
+##contig=<ID=chr18_gl000207_random,length=4262,assembly=hg19>
+##contig=<ID=chr19,length=59128983,assembly=hg19>
+##contig=<ID=chr19_gl000208_random,length=92689,assembly=hg19>
+##contig=<ID=chr19_gl000209_random,length=159169,assembly=hg19>
+##contig=<ID=chr1_gl000191_random,length=106433,assembly=hg19>
+##contig=<ID=chr1_gl000192_random,length=547496,assembly=hg19>
+##contig=<ID=chr2,length=243199373,assembly=hg19>
+##contig=<ID=chr20,length=63025520,assembly=hg19>
+##contig=<ID=chr21,length=48129895,assembly=hg19>
+##contig=<ID=chr21_gl000210_random,length=27682,assembly=hg19>
+##contig=<ID=chr22,length=51304566,assembly=hg19>
+##contig=<ID=chr3,length=198022430,assembly=hg19>
+##contig=<ID=chr4,length=191154276,assembly=hg19>
+##contig=<ID=chr4_ctg9_hap1,length=590426,assembly=hg19>
+##contig=<ID=chr4_gl000193_random,length=189789,assembly=hg19>
+##contig=<ID=chr4_gl000194_random,length=191469,assembly=hg19>
+##contig=<ID=chr5,length=180915260,assembly=hg19>
+##contig=<ID=chr6,length=171115067,assembly=hg19>
+##contig=<ID=chr6_apd_hap1,length=4622290,assembly=hg19>
+##contig=<ID=chr6_cox_hap2,length=4795371,assembly=hg19>
+##contig=<ID=chr6_dbb_hap3,length=4610396,assembly=hg19>
+##contig=<ID=chr6_mann_hap4,length=4683263,assembly=hg19>
+##contig=<ID=chr6_mcf_hap5,length=4833398,assembly=hg19>
+##contig=<ID=chr6_qbl_hap6,length=4611984,assembly=hg19>
+##contig=<ID=chr6_ssto_hap7,length=4928567,assembly=hg19>
+##contig=<ID=chr7,length=159138663,assembly=hg19>
+##contig=<ID=chr7_gl000195_random,length=182896,assembly=hg19>
+##contig=<ID=chr8,length=146364022,assembly=hg19>
+##contig=<ID=chr8_gl000196_random,length=38914,assembly=hg19>
+##contig=<ID=chr8_gl000197_random,length=37175,assembly=hg19>
+##contig=<ID=chr9,length=141213431,assembly=hg19>
+##contig=<ID=chr9_gl000198_random,length=90085,assembly=hg19>
+##contig=<ID=chr9_gl000199_random,length=169874,assembly=hg19>
+##contig=<ID=chr9_gl000200_random,length=187035,assembly=hg19>
+##contig=<ID=chr9_gl000201_random,length=36148,assembly=hg19>
+##contig=<ID=chrM,length=16571,assembly=hg19>
+##contig=<ID=chrUn_gl000211,length=166566,assembly=hg19>
+##contig=<ID=chrUn_gl000212,length=186858,assembly=hg19>
+##contig=<ID=chrUn_gl000213,length=164239,assembly=hg19>
+##contig=<ID=chrUn_gl000214,length=137718,assembly=hg19>
+##contig=<ID=chrUn_gl000215,length=172545,assembly=hg19>
+##contig=<ID=chrUn_gl000216,length=172294,assembly=hg19>
+##contig=<ID=chrUn_gl000217,length=172149,assembly=hg19>
+##contig=<ID=chrUn_gl000218,length=161147,assembly=hg19>
+##contig=<ID=chrUn_gl000219,length=179198,assembly=hg19>
+##contig=<ID=chrUn_gl000220,length=161802,assembly=hg19>
+##contig=<ID=chrUn_gl000221,length=155397,assembly=hg19>
+##contig=<ID=chrUn_gl000222,length=186861,assembly=hg19>
+##contig=<ID=chrUn_gl000223,length=180455,assembly=hg19>
+##contig=<ID=chrUn_gl000224,length=179693,assembly=hg19>
+##contig=<ID=chrUn_gl000225,length=211173,assembly=hg19>
+##contig=<ID=chrUn_gl000226,length=15008,assembly=hg19>
+##contig=<ID=chrUn_gl000227,length=128374,assembly=hg19>
+##contig=<ID=chrUn_gl000228,length=129120,assembly=hg19>
+##contig=<ID=chrUn_gl000229,length=19913,assembly=hg19>
+##contig=<ID=chrUn_gl000230,length=43691,assembly=hg19>
+##contig=<ID=chrUn_gl000231,length=27386,assembly=hg19>
+##contig=<ID=chrUn_gl000232,length=40652,assembly=hg19>
+##contig=<ID=chrUn_gl000233,length=45941,assembly=hg19>
+##contig=<ID=chrUn_gl000234,length=40531,assembly=hg19>
+##contig=<ID=chrUn_gl000235,length=34474,assembly=hg19>
+##contig=<ID=chrUn_gl000236,length=41934,assembly=hg19>
+##contig=<ID=chrUn_gl000237,length=45867,assembly=hg19>
+##contig=<ID=chrUn_gl000238,length=39939,assembly=hg19>
+##contig=<ID=chrUn_gl000239,length=33824,assembly=hg19>
+##contig=<ID=chrUn_gl000240,length=41933,assembly=hg19>
+##contig=<ID=chrUn_gl000241,length=42152,assembly=hg19>
+##contig=<ID=chrUn_gl000242,length=43523,assembly=hg19>
+##contig=<ID=chrUn_gl000243,length=43341,assembly=hg19>
+##contig=<ID=chrUn_gl000244,length=39929,assembly=hg19>
+##contig=<ID=chrUn_gl000245,length=36651,assembly=hg19>
+##contig=<ID=chrUn_gl000246,length=38154,assembly=hg19>
+##contig=<ID=chrUn_gl000247,length=36422,assembly=hg19>
+##contig=<ID=chrUn_gl000248,length=39786,assembly=hg19>
+##contig=<ID=chrUn_gl000249,length=38502,assembly=hg19>
+##contig=<ID=chrX,length=155270560,assembly=hg19>
+##contig=<ID=chrY,length=59373566,assembly=hg19>
+##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa
+##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Consequence|Codons|Amino_acids|Gene|HGNC|Feature|EXON|PolyPhen|SIFT">
+##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -i vcf -o vcf GRCh37.66 test4.vep.vcf "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	1_dad	1_mom	1_kid	2_dad	2_mom	2_kid	3_dad	3_mom	3_kid
+chr10	1142208	.	T	C	3404.3	.	AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566)	GT:AD:DP:GQ:PL	0/0:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/0:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	1142209	.	T	C	3404.3	.	AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	1/1:0,24:24:66.14:729,66,0	0/1:1,37:59:87.16:940,87,0	0/1:0,29:49:78.20:899,78,0	1/1:0,24:64:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	48003992	.	C	T	1047.87	.	AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056)	GT:AD:DP:GQ:PL	0/0:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/0:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:97.16:940,87,0	0/0:0,29:29:98.20:899,78,0	0/1:0,24:24:96.14:729,66,0
+chr10	48004992	.	C	T	1047.87	.	AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056)	GT:AD:DP:GQ:PL	0/0:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/0:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	135336656	.	G	A	38.34	.	AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.auto_rec.ped	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,10 @@
+#family_id	sample_id	paternal_id	maternal_id	sex	phenotype
+1	1_dad	0	0	-1	1
+1	1_mom	0	0	-1	1
+1	1_kid	1_dad	1_mom	-1	2
+2	2_dad	0	0	-1	1
+2	2_mom	0	0	-1	1
+2	2_kid	2_dad	2_mom	-1	2
+3	3_dad	0	0	-1	1
+3	3_mom	0	0	-1	1
+3	3_kid	3_dad	3_mom	-1	2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.auto_rec.vcf	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,127 @@
+##fileformat=VCFv4.1
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false"
+##contig=<ID=chr1,length=249250621,assembly=hg19>
+##contig=<ID=chr10,length=135534747,assembly=hg19>
+##contig=<ID=chr11,length=135006516,assembly=hg19>
+##contig=<ID=chr11_gl000202_random,length=40103,assembly=hg19>
+##contig=<ID=chr12,length=133851895,assembly=hg19>
+##contig=<ID=chr13,length=115169878,assembly=hg19>
+##contig=<ID=chr14,length=107349540,assembly=hg19>
+##contig=<ID=chr15,length=102531392,assembly=hg19>
+##contig=<ID=chr16,length=90354753,assembly=hg19>
+##contig=<ID=chr17,length=81195210,assembly=hg19>
+##contig=<ID=chr17_ctg5_hap1,length=1680828,assembly=hg19>
+##contig=<ID=chr17_gl000203_random,length=37498,assembly=hg19>
+##contig=<ID=chr17_gl000204_random,length=81310,assembly=hg19>
+##contig=<ID=chr17_gl000205_random,length=174588,assembly=hg19>
+##contig=<ID=chr17_gl000206_random,length=41001,assembly=hg19>
+##contig=<ID=chr18,length=78077248,assembly=hg19>
+##contig=<ID=chr18_gl000207_random,length=4262,assembly=hg19>
+##contig=<ID=chr19,length=59128983,assembly=hg19>
+##contig=<ID=chr19_gl000208_random,length=92689,assembly=hg19>
+##contig=<ID=chr19_gl000209_random,length=159169,assembly=hg19>
+##contig=<ID=chr1_gl000191_random,length=106433,assembly=hg19>
+##contig=<ID=chr1_gl000192_random,length=547496,assembly=hg19>
+##contig=<ID=chr2,length=243199373,assembly=hg19>
+##contig=<ID=chr20,length=63025520,assembly=hg19>
+##contig=<ID=chr21,length=48129895,assembly=hg19>
+##contig=<ID=chr21_gl000210_random,length=27682,assembly=hg19>
+##contig=<ID=chr22,length=51304566,assembly=hg19>
+##contig=<ID=chr3,length=198022430,assembly=hg19>
+##contig=<ID=chr4,length=191154276,assembly=hg19>
+##contig=<ID=chr4_ctg9_hap1,length=590426,assembly=hg19>
+##contig=<ID=chr4_gl000193_random,length=189789,assembly=hg19>
+##contig=<ID=chr4_gl000194_random,length=191469,assembly=hg19>
+##contig=<ID=chr5,length=180915260,assembly=hg19>
+##contig=<ID=chr6,length=171115067,assembly=hg19>
+##contig=<ID=chr6_apd_hap1,length=4622290,assembly=hg19>
+##contig=<ID=chr6_cox_hap2,length=4795371,assembly=hg19>
+##contig=<ID=chr6_dbb_hap3,length=4610396,assembly=hg19>
+##contig=<ID=chr6_mann_hap4,length=4683263,assembly=hg19>
+##contig=<ID=chr6_mcf_hap5,length=4833398,assembly=hg19>
+##contig=<ID=chr6_qbl_hap6,length=4611984,assembly=hg19>
+##contig=<ID=chr6_ssto_hap7,length=4928567,assembly=hg19>
+##contig=<ID=chr7,length=159138663,assembly=hg19>
+##contig=<ID=chr7_gl000195_random,length=182896,assembly=hg19>
+##contig=<ID=chr8,length=146364022,assembly=hg19>
+##contig=<ID=chr8_gl000196_random,length=38914,assembly=hg19>
+##contig=<ID=chr8_gl000197_random,length=37175,assembly=hg19>
+##contig=<ID=chr9,length=141213431,assembly=hg19>
+##contig=<ID=chr9_gl000198_random,length=90085,assembly=hg19>
+##contig=<ID=chr9_gl000199_random,length=169874,assembly=hg19>
+##contig=<ID=chr9_gl000200_random,length=187035,assembly=hg19>
+##contig=<ID=chr9_gl000201_random,length=36148,assembly=hg19>
+##contig=<ID=chrM,length=16571,assembly=hg19>
+##contig=<ID=chrUn_gl000211,length=166566,assembly=hg19>
+##contig=<ID=chrUn_gl000212,length=186858,assembly=hg19>
+##contig=<ID=chrUn_gl000213,length=164239,assembly=hg19>
+##contig=<ID=chrUn_gl000214,length=137718,assembly=hg19>
+##contig=<ID=chrUn_gl000215,length=172545,assembly=hg19>
+##contig=<ID=chrUn_gl000216,length=172294,assembly=hg19>
+##contig=<ID=chrUn_gl000217,length=172149,assembly=hg19>
+##contig=<ID=chrUn_gl000218,length=161147,assembly=hg19>
+##contig=<ID=chrUn_gl000219,length=179198,assembly=hg19>
+##contig=<ID=chrUn_gl000220,length=161802,assembly=hg19>
+##contig=<ID=chrUn_gl000221,length=155397,assembly=hg19>
+##contig=<ID=chrUn_gl000222,length=186861,assembly=hg19>
+##contig=<ID=chrUn_gl000223,length=180455,assembly=hg19>
+##contig=<ID=chrUn_gl000224,length=179693,assembly=hg19>
+##contig=<ID=chrUn_gl000225,length=211173,assembly=hg19>
+##contig=<ID=chrUn_gl000226,length=15008,assembly=hg19>
+##contig=<ID=chrUn_gl000227,length=128374,assembly=hg19>
+##contig=<ID=chrUn_gl000228,length=129120,assembly=hg19>
+##contig=<ID=chrUn_gl000229,length=19913,assembly=hg19>
+##contig=<ID=chrUn_gl000230,length=43691,assembly=hg19>
+##contig=<ID=chrUn_gl000231,length=27386,assembly=hg19>
+##contig=<ID=chrUn_gl000232,length=40652,assembly=hg19>
+##contig=<ID=chrUn_gl000233,length=45941,assembly=hg19>
+##contig=<ID=chrUn_gl000234,length=40531,assembly=hg19>
+##contig=<ID=chrUn_gl000235,length=34474,assembly=hg19>
+##contig=<ID=chrUn_gl000236,length=41934,assembly=hg19>
+##contig=<ID=chrUn_gl000237,length=45867,assembly=hg19>
+##contig=<ID=chrUn_gl000238,length=39939,assembly=hg19>
+##contig=<ID=chrUn_gl000239,length=33824,assembly=hg19>
+##contig=<ID=chrUn_gl000240,length=41933,assembly=hg19>
+##contig=<ID=chrUn_gl000241,length=42152,assembly=hg19>
+##contig=<ID=chrUn_gl000242,length=43523,assembly=hg19>
+##contig=<ID=chrUn_gl000243,length=43341,assembly=hg19>
+##contig=<ID=chrUn_gl000244,length=39929,assembly=hg19>
+##contig=<ID=chrUn_gl000245,length=36651,assembly=hg19>
+##contig=<ID=chrUn_gl000246,length=38154,assembly=hg19>
+##contig=<ID=chrUn_gl000247,length=36422,assembly=hg19>
+##contig=<ID=chrUn_gl000248,length=39786,assembly=hg19>
+##contig=<ID=chrUn_gl000249,length=38502,assembly=hg19>
+##contig=<ID=chrX,length=155270560,assembly=hg19>
+##contig=<ID=chrY,length=59373566,assembly=hg19>
+##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa
+##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Consequence|Codons|Amino_acids|Gene|HGNC|Feature|EXON|PolyPhen|SIFT">
+##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -i vcf -o vcf GRCh37.66 test4.vep.vcf "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	1_dad	1_mom	1_kid	2_dad	2_mom	2_kid	3_dad	3_mom	3_kid
+chr10	1142208	.	T	C	3404.3	.	AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	1/1:0,24:24:66.14:729,66,0	0/1:1,37:59:87.16:940,87,0	0/1:0,29:49:78.20:899,78,0	1/1:0,24:64:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	48003992	.	C	T	1047.87	.	AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	1/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	48004992	.	C	T	1047.87	.	AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	1/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	135336656	.	G	A	38.34	.	AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	135369532	.	T	C	122.62	.	AC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	1/1:0,24:24:66.14:729,66,0
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.comp_het.ped	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,13 @@
+#Family_ID	Individual_ID	Paternal_ID	Maternal_ID	Sex	Phenotype	ethnicity
+1	child_1	dad_1	mom_1	1	2	caucasian
+2	child_2	dad_2	mom_2	1	2	caucasian
+2	dad_2	-9	-9	1	1	caucasian
+2	mom_2	-9	-9	2	1	caucasian
+1	dad_1	-9	-9	1	1	caucasian
+1	mom_1	-9	-9	2	1	caucasian
+3	child_3	dad_3	mom_3	1	2	caucasian
+3	dad_3	-9	-9	1	1	caucasian
+3	mom_3	-9	-9	2	1	caucasian
+4	child_4	dad_4	mom_4	1	2	caucasianNEuropean
+4	dad_4	-9	-9	1	1	caucasianNEuropean
+4	mom_4	-9	-9	2	1	caucasianNEuropean
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.comp_het.vcf	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,120 @@
+##fileformat=VCFv4.1
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/all.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=20 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false"
+##contig=<ID=1,length=249250621,assembly=b37>
+##contig=<ID=10,length=135534747,assembly=b37>
+##contig=<ID=11,length=135006516,assembly=b37>
+##contig=<ID=12,length=133851895,assembly=b37>
+##contig=<ID=13,length=115169878,assembly=b37>
+##contig=<ID=14,length=107349540,assembly=b37>
+##contig=<ID=15,length=102531392,assembly=b37>
+##contig=<ID=16,length=90354753,assembly=b37>
+##contig=<ID=17,length=81195210,assembly=b37>
+##contig=<ID=18,length=78077248,assembly=b37>
+##contig=<ID=19,length=59128983,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=20,length=63025520,assembly=b37>
+##contig=<ID=21,length=48129895,assembly=b37>
+##contig=<ID=22,length=51304566,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+##contig=<ID=4,length=191154276,assembly=b37>
+##contig=<ID=5,length=180915260,assembly=b37>
+##contig=<ID=6,length=171115067,assembly=b37>
+##contig=<ID=7,length=159138663,assembly=b37>
+##contig=<ID=8,length=146364022,assembly=b37>
+##contig=<ID=9,length=141213431,assembly=b37>
+##contig=<ID=GL000191.1,length=106433,assembly=b37>
+##contig=<ID=GL000192.1,length=547496,assembly=b37>
+##contig=<ID=GL000193.1,length=189789,assembly=b37>
+##contig=<ID=GL000194.1,length=191469,assembly=b37>
+##contig=<ID=GL000195.1,length=182896,assembly=b37>
+##contig=<ID=GL000196.1,length=38914,assembly=b37>
+##contig=<ID=GL000197.1,length=37175,assembly=b37>
+##contig=<ID=GL000198.1,length=90085,assembly=b37>
+##contig=<ID=GL000199.1,length=169874,assembly=b37>
+##contig=<ID=GL000200.1,length=187035,assembly=b37>
+##contig=<ID=GL000201.1,length=36148,assembly=b37>
+##contig=<ID=GL000202.1,length=40103,assembly=b37>
+##contig=<ID=GL000203.1,length=37498,assembly=b37>
+##contig=<ID=GL000204.1,length=81310,assembly=b37>
+##contig=<ID=GL000205.1,length=174588,assembly=b37>
+##contig=<ID=GL000206.1,length=41001,assembly=b37>
+##contig=<ID=GL000207.1,length=4262,assembly=b37>
+##contig=<ID=GL000208.1,length=92689,assembly=b37>
+##contig=<ID=GL000209.1,length=159169,assembly=b37>
+##contig=<ID=GL000210.1,length=27682,assembly=b37>
+##contig=<ID=GL000211.1,length=166566,assembly=b37>
+##contig=<ID=GL000212.1,length=186858,assembly=b37>
+##contig=<ID=GL000213.1,length=164239,assembly=b37>
+##contig=<ID=GL000214.1,length=137718,assembly=b37>
+##contig=<ID=GL000215.1,length=172545,assembly=b37>
+##contig=<ID=GL000216.1,length=172294,assembly=b37>
+##contig=<ID=GL000217.1,length=172149,assembly=b37>
+##contig=<ID=GL000218.1,length=161147,assembly=b37>
+##contig=<ID=GL000219.1,length=179198,assembly=b37>
+##contig=<ID=GL000220.1,length=161802,assembly=b37>
+##contig=<ID=GL000221.1,length=155397,assembly=b37>
+##contig=<ID=GL000222.1,length=186861,assembly=b37>
+##contig=<ID=GL000223.1,length=180455,assembly=b37>
+##contig=<ID=GL000224.1,length=179693,assembly=b37>
+##contig=<ID=GL000225.1,length=211173,assembly=b37>
+##contig=<ID=GL000226.1,length=15008,assembly=b37>
+##contig=<ID=GL000227.1,length=128374,assembly=b37>
+##contig=<ID=GL000228.1,length=129120,assembly=b37>
+##contig=<ID=GL000229.1,length=19913,assembly=b37>
+##contig=<ID=GL000230.1,length=43691,assembly=b37>
+##contig=<ID=GL000231.1,length=27386,assembly=b37>
+##contig=<ID=GL000232.1,length=40652,assembly=b37>
+##contig=<ID=GL000233.1,length=45941,assembly=b37>
+##contig=<ID=GL000234.1,length=40531,assembly=b37>
+##contig=<ID=GL000235.1,length=34474,assembly=b37>
+##contig=<ID=GL000236.1,length=41934,assembly=b37>
+##contig=<ID=GL000237.1,length=45867,assembly=b37>
+##contig=<ID=GL000238.1,length=39939,assembly=b37>
+##contig=<ID=GL000239.1,length=33824,assembly=b37>
+##contig=<ID=GL000240.1,length=41933,assembly=b37>
+##contig=<ID=GL000241.1,length=42152,assembly=b37>
+##contig=<ID=GL000242.1,length=43523,assembly=b37>
+##contig=<ID=GL000243.1,length=43341,assembly=b37>
+##contig=<ID=GL000244.1,length=39929,assembly=b37>
+##contig=<ID=GL000245.1,length=36651,assembly=b37>
+##contig=<ID=GL000246.1,length=38154,assembly=b37>
+##contig=<ID=GL000247.1,length=36422,assembly=b37>
+##contig=<ID=GL000248.1,length=39786,assembly=b37>
+##contig=<ID=GL000249.1,length=38502,assembly=b37>
+##contig=<ID=MT,length=16569,assembly=b37>
+##contig=<ID=X,length=155270560,assembly=b37>
+##contig=<ID=Y,length=59373566,assembly=b37>
+##reference=file:///home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/human_g1k_v37.fasta
+##SnpEffVersion="SnpEff 3.2 (build 2013-03-14), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -i vcf -o vcf GRCh37.66 /if2/arq5x/cphg-quinlan/projects/sms-elsea/varCalling/all.vcf "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon  | GenotypeNum [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	child_1	child_2	dad_2	mom_2	dad_1	mom_1	child_3	dad_3	mom_3	child_4	dad_4	mom_4
+1	16977	.	G	A	2022.88	.	AC=9;AF=0.375;AN=24;BaseQRankSum=-25.424;DP=2999;DS;Dels=0.00;FS=4.077;HRun=0;HaplotypeScore=1.6017;InbreedingCoeff=-0.5953;MQ=10.20;MQ0=124;MQRankSum=0.084;QD=0.90;ReadPosRankSum=0.727;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|7|1)	GT:AD:DP:GQ:PL	0/1:181,69:250:4.24:4,0,401	0/0:190,60:250:5.21:0,5,708	0/1:167,83:250:72.66:73,0,721	0/0:187,63:250:63.12:0,63,633	0/0:221,29:250:96.57:0,97,1729	0/1:160,90:250:99:175,0,537	0/1:177,73:250:99:412,0,723	0/1:183,67:250:99:209,0,838	0/1:174,76:250:99:284,0,844	0/1:194,53:248:26.15:26,0,756	0/1:149,101:250:99:681,0,496	0/1:184,66:250:99:229,0,561
+1	17222	.	A	G	225.47	.	AC=4;AF=0.167;AN=24;BaseQRankSum=-1.725;DP=2985;DS;Dels=0.00;FS=6.073;HRun=0;HaplotypeScore=1.1157;InbreedingCoeff=-0.2212;MQ=14.57;MQ0=605;MQRankSum=-3.790;QD=0.23;ReadPosRankSum=1.860;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|8|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|6|1)	GT:AD:DP:GQ:PL	0/0:224,25:250:44.94:0,45,987	0/1:190,44:241:55.80:56,0,776	0/0:230,17:250:90.16:0,90,940	0/1:212,34:250:36.11:36,0,754	0/0:191,55:248:18.04:0,18,1324	0/0:232,17:250:84.90:0,85,1270	0/0:223,27:250:99:0,129,1563	0/0:237,13:250:99:0,107,1537	0/1:207,42:249:99:106,0,1096	0/1:204,43:247:91.42:91,0,1280	0/0:229,20:249:99:0,113,1404	0/0:221,29:250:3.04:0,3,1183
+1	17363	.	TTCT	T	628.85	.	AC=2;AF=0.083;AN=24;BaseQRankSum=4.577;DP=2951;DS;FS=10.112;HRun=0;HaplotypeScore=316.7300;InbreedingCoeff=-0.0909;MQ=22.73;MQ0=27;MQRankSum=1.681;QD=1.33;ReadPosRankSum=0.329;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|8|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|5|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|protein_coding|NON_CODING|ENST00000438504|6|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|6|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|6|1)	GT:AD:DP:GQ:PL	0/0:250,0:70:99:0,169,3371	0/0:250,0:78:99:0,199,4105	0/0:250,0:70:99:0,187,3984	0/0:250,0:63:99:0,166,3406	0/0:243,0:89:99:0,262,5364	0/0:250,0:62:99:0,172,3575	0/0:250,0:79:99:0,223,4726	0/0:249,1:80:99:0,181,4749	0/0:234,1:85:99:0,241,5015	0/1:205,27:73:99:371,0,3406	0/1:225,16:74:99:318,0,3528	0/0:250,0:64:99:0,193,3988
+1	17563	.	G	A	79.88	.	AC=1;AF=0.042;AN=24;BaseQRankSum=3.375;DP=3000;DS;Dels=0.00;FS=9.603;HRun=0;HaplotypeScore=0.9909;InbreedingCoeff=-0.0518;MQ=30.15;MQ0=97;MQRankSum=-1.386;QD=0.32;ReadPosRankSum=-0.446;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|7|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1)	GT:AD:DP:GQ:PL	0/0:243,7:250:99:0,202,2559	0/0:246,4:250:99:0,187,2479	0/0:250,0:250:99:0,256,3232	0/0:249,1:250:99:0,239,3309	0/0:231,18:250:21.61:0,22,2802	0/0:248,2:250:99:0,214,2657	0/1:232,18:250:99:121,0,1801	0/0:249,1:250:99:0,263,3184	0/0:238,12:250:6.72:0,7,2591	0/0:250,0:250:99:0,343,3875	0/0:247,3:250:99:0,280,3428	0/0:250,0:250:99:0,301,3416
+1	17697	.	G	C	255.3	.	AC=4;AF=0.167;AN=24;BaseQRankSum=-1.815;DP=2999;DS;Dels=0.00;FS=0.000;HRun=1;HaplotypeScore=1.7379;InbreedingCoeff=-0.2256;MQ=15.66;MQ0=86;MQRankSum=2.999;QD=0.26;ReadPosRankSum=-2.160;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1)	GT:AD:DP:GQ:PL	0/1:214,36:250:99:132,0,595	0/0:249,1:250:78.22:0,78,967	0/0:248,1:250:81.24:0,81,1022	0/1:216,34:250:91.94:92,0,404	0/0:226,24:250:56.86:0,57,1099	0/0:245,4:249:23.87:0,24,704	0/0:225,25:250:2.97:0,3,746	0/1:216,33:250:80.80:81,0,810	0/0:245,4:250:73.97:0,74,1201	0/1:222,28:250:12.96:13,0,920	0/0:249,1:250:87.24:0,87,1000	0/0:182,66:250:8.40:0,8,703
+1	17722	.	A	G	32.03	.	AC=3;AF=0.125;AN=24;BaseQRankSum=0.923;DP=2937;DS;Dels=0.00;FS=0.000;HRun=2;HaplotypeScore=1.9343;InbreedingCoeff=-0.1033;MQ=14.33;MQ0=62;MQRankSum=-4.474;QD=0.04;ReadPosRankSum=0.750;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1)	GT:AD:DP:GQ:PL	0/0:247,3:250:51.15:0,51,628	0/0:250,0:250:60.18:0,60,746	0/0:250,0:250:54.17:0,54,685	0/0:249,1:250:54.15:0,54,644	0/0:233,0:233:81.25:0,81,1014	0/0:235,4:239:45.13:0,45,549	0/0:249,0:250:45.13:0,45,514	0/0:249,1:250:78.20:0,78,853	0/0:247,0:247:90.24:0,90,1012	0/1:227,17:244:0.06:0,0,670	0/1:214,11:225:4.62:5,0,542	0/1:236,13:249:71.41:71,0,448
+1	17730	.	C	A	102.87	.	AC=5;AF=0.208;AN=24;BaseQRankSum=-11.508;DP=2968;DS;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=1.0610;InbreedingCoeff=-0.2498;MQ=13.12;MQ0=24;MQRankSum=-4.433;QD=0.08;ReadPosRankSum=1.952;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|6|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|protein_coding|NON_CODING|ENST00000430492|7|1)	GT:AD:DP:GQ:PL	0/0:246,4:250:36.11:0,36,464	0/0:244,5:249:57.17:0,57,727	0/0:244,6:250:48.15:0,48,628	0/0:247,3:250:51.13:0,51,621	0/1:242,8:250:26.26:26,0,830	0/0:246,4:250:48.14:0,48,601	0/1:238,11:250:48.54:49,0,386	0/0:244,5:249:42.12:0,42,491	0/0:239,3:243:23.74:0,24,609	0/1:221,13:234:13.85:14,0,482	0/1:232,11:243:7:7,0,501	0/1:238,12:250:73.18:73,0,368
+1	17746	.	A	G	607.7	.	AC=8;AF=0.333;AN=24;BaseQRankSum=13.191;DP=2993;DS;Dels=0.00;FS=0.000;HRun=1;HaplotypeScore=0.4155;InbreedingCoeff=-0.5280;MQ=11.87;MQ0=5;MQRankSum=-4.672;QD=0.30;ReadPosRankSum=3.574;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|3|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|3|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|3|1)	GT:AD:DP:GQ:PL	0/0:206,44:250:7.45:0,7,373	0/0:221,29:250:5.12:0,5,468	0/0:224,26:250:3.61:0,4,382	0/0:216,34:250:9.49:0,9,405	0/1:243,7:250:43.06:43,0,781	0/1:195,55:250:99:113,0,232	0/1:218,32:250:44.30:44,0,212	0/1:219,31:250:0.03:0,0,330	0/1:211,39:250:74.42:74,0,236	0/1:209,34:243:68.40:68,0,298	0/1:172,77:249:99:193,0,137	0/1:219,30:250:99:137,0,197
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.de_novo.ped	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,9 @@
+1	1_dad	0	0	-1	1
+1	1_mom	0	0	-1	1
+1	1_kid	1_dad	1_mom	-1	2
+2	2_dad	0	0	-1	1
+2	2_mom	0	0	-1	1
+2	2_kid	2_dad	2_mom	-1	2
+3	3_dad	0	0	-1	1
+3	3_mom	0	0	-1	1
+3	3_kid	3_dad	3_mom	-1	2
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.de_novo.vcf	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,127 @@
+##fileformat=VCFv4.1
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false"
+##contig=<ID=chr1,length=249250621,assembly=hg19>
+##contig=<ID=chr10,length=135534747,assembly=hg19>
+##contig=<ID=chr11,length=135006516,assembly=hg19>
+##contig=<ID=chr11_gl000202_random,length=40103,assembly=hg19>
+##contig=<ID=chr12,length=133851895,assembly=hg19>
+##contig=<ID=chr13,length=115169878,assembly=hg19>
+##contig=<ID=chr14,length=107349540,assembly=hg19>
+##contig=<ID=chr15,length=102531392,assembly=hg19>
+##contig=<ID=chr16,length=90354753,assembly=hg19>
+##contig=<ID=chr17,length=81195210,assembly=hg19>
+##contig=<ID=chr17_ctg5_hap1,length=1680828,assembly=hg19>
+##contig=<ID=chr17_gl000203_random,length=37498,assembly=hg19>
+##contig=<ID=chr17_gl000204_random,length=81310,assembly=hg19>
+##contig=<ID=chr17_gl000205_random,length=174588,assembly=hg19>
+##contig=<ID=chr17_gl000206_random,length=41001,assembly=hg19>
+##contig=<ID=chr18,length=78077248,assembly=hg19>
+##contig=<ID=chr18_gl000207_random,length=4262,assembly=hg19>
+##contig=<ID=chr19,length=59128983,assembly=hg19>
+##contig=<ID=chr19_gl000208_random,length=92689,assembly=hg19>
+##contig=<ID=chr19_gl000209_random,length=159169,assembly=hg19>
+##contig=<ID=chr1_gl000191_random,length=106433,assembly=hg19>
+##contig=<ID=chr1_gl000192_random,length=547496,assembly=hg19>
+##contig=<ID=chr2,length=243199373,assembly=hg19>
+##contig=<ID=chr20,length=63025520,assembly=hg19>
+##contig=<ID=chr21,length=48129895,assembly=hg19>
+##contig=<ID=chr21_gl000210_random,length=27682,assembly=hg19>
+##contig=<ID=chr22,length=51304566,assembly=hg19>
+##contig=<ID=chr3,length=198022430,assembly=hg19>
+##contig=<ID=chr4,length=191154276,assembly=hg19>
+##contig=<ID=chr4_ctg9_hap1,length=590426,assembly=hg19>
+##contig=<ID=chr4_gl000193_random,length=189789,assembly=hg19>
+##contig=<ID=chr4_gl000194_random,length=191469,assembly=hg19>
+##contig=<ID=chr5,length=180915260,assembly=hg19>
+##contig=<ID=chr6,length=171115067,assembly=hg19>
+##contig=<ID=chr6_apd_hap1,length=4622290,assembly=hg19>
+##contig=<ID=chr6_cox_hap2,length=4795371,assembly=hg19>
+##contig=<ID=chr6_dbb_hap3,length=4610396,assembly=hg19>
+##contig=<ID=chr6_mann_hap4,length=4683263,assembly=hg19>
+##contig=<ID=chr6_mcf_hap5,length=4833398,assembly=hg19>
+##contig=<ID=chr6_qbl_hap6,length=4611984,assembly=hg19>
+##contig=<ID=chr6_ssto_hap7,length=4928567,assembly=hg19>
+##contig=<ID=chr7,length=159138663,assembly=hg19>
+##contig=<ID=chr7_gl000195_random,length=182896,assembly=hg19>
+##contig=<ID=chr8,length=146364022,assembly=hg19>
+##contig=<ID=chr8_gl000196_random,length=38914,assembly=hg19>
+##contig=<ID=chr8_gl000197_random,length=37175,assembly=hg19>
+##contig=<ID=chr9,length=141213431,assembly=hg19>
+##contig=<ID=chr9_gl000198_random,length=90085,assembly=hg19>
+##contig=<ID=chr9_gl000199_random,length=169874,assembly=hg19>
+##contig=<ID=chr9_gl000200_random,length=187035,assembly=hg19>
+##contig=<ID=chr9_gl000201_random,length=36148,assembly=hg19>
+##contig=<ID=chrM,length=16571,assembly=hg19>
+##contig=<ID=chrUn_gl000211,length=166566,assembly=hg19>
+##contig=<ID=chrUn_gl000212,length=186858,assembly=hg19>
+##contig=<ID=chrUn_gl000213,length=164239,assembly=hg19>
+##contig=<ID=chrUn_gl000214,length=137718,assembly=hg19>
+##contig=<ID=chrUn_gl000215,length=172545,assembly=hg19>
+##contig=<ID=chrUn_gl000216,length=172294,assembly=hg19>
+##contig=<ID=chrUn_gl000217,length=172149,assembly=hg19>
+##contig=<ID=chrUn_gl000218,length=161147,assembly=hg19>
+##contig=<ID=chrUn_gl000219,length=179198,assembly=hg19>
+##contig=<ID=chrUn_gl000220,length=161802,assembly=hg19>
+##contig=<ID=chrUn_gl000221,length=155397,assembly=hg19>
+##contig=<ID=chrUn_gl000222,length=186861,assembly=hg19>
+##contig=<ID=chrUn_gl000223,length=180455,assembly=hg19>
+##contig=<ID=chrUn_gl000224,length=179693,assembly=hg19>
+##contig=<ID=chrUn_gl000225,length=211173,assembly=hg19>
+##contig=<ID=chrUn_gl000226,length=15008,assembly=hg19>
+##contig=<ID=chrUn_gl000227,length=128374,assembly=hg19>
+##contig=<ID=chrUn_gl000228,length=129120,assembly=hg19>
+##contig=<ID=chrUn_gl000229,length=19913,assembly=hg19>
+##contig=<ID=chrUn_gl000230,length=43691,assembly=hg19>
+##contig=<ID=chrUn_gl000231,length=27386,assembly=hg19>
+##contig=<ID=chrUn_gl000232,length=40652,assembly=hg19>
+##contig=<ID=chrUn_gl000233,length=45941,assembly=hg19>
+##contig=<ID=chrUn_gl000234,length=40531,assembly=hg19>
+##contig=<ID=chrUn_gl000235,length=34474,assembly=hg19>
+##contig=<ID=chrUn_gl000236,length=41934,assembly=hg19>
+##contig=<ID=chrUn_gl000237,length=45867,assembly=hg19>
+##contig=<ID=chrUn_gl000238,length=39939,assembly=hg19>
+##contig=<ID=chrUn_gl000239,length=33824,assembly=hg19>
+##contig=<ID=chrUn_gl000240,length=41933,assembly=hg19>
+##contig=<ID=chrUn_gl000241,length=42152,assembly=hg19>
+##contig=<ID=chrUn_gl000242,length=43523,assembly=hg19>
+##contig=<ID=chrUn_gl000243,length=43341,assembly=hg19>
+##contig=<ID=chrUn_gl000244,length=39929,assembly=hg19>
+##contig=<ID=chrUn_gl000245,length=36651,assembly=hg19>
+##contig=<ID=chrUn_gl000246,length=38154,assembly=hg19>
+##contig=<ID=chrUn_gl000247,length=36422,assembly=hg19>
+##contig=<ID=chrUn_gl000248,length=39786,assembly=hg19>
+##contig=<ID=chrUn_gl000249,length=38502,assembly=hg19>
+##contig=<ID=chrX,length=155270560,assembly=hg19>
+##contig=<ID=chrY,length=59373566,assembly=hg19>
+##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa
+##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Consequence|Codons|Amino_acids|Gene|HGNC|Feature|EXON|PolyPhen|SIFT">
+##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  -i vcf -o vcf GRCh37.66 test4.vep.vcf "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	1_dad	1_mom	1_kid	2_dad	2_mom	2_kid	3_dad	3_mom	3_kid
+chr10	1142208	.	T	C	3404.3	.	AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566)	GT:AD:DP:GQ:PL	0/0:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,23:24:66.14:729,66,0	0/0:1,37:59:87.16:940,87,0	0/0:0,29:49:78.20:899,78,0	0/0:0,22:64:66.14:729,66,0	0/0:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	1/1:0,24:24:66.14:729,66,0
+chr10	48003992	.	C	T	1047.87	.	AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	1/1:0,23:24:66.14:729,66,0	0/0:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,23:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,23:24:66.14:729,66,0
+chr10	48004992	.	C	T	1047.87	.	AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056)	GT:AD:DP:GQ:PL	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,23:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	1/1:0,23:24:66.14:729,66,0	0/0:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,23:24:66.14:729,66,0
+chr10	135336656	.	G	A	38.34	.	AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|)	GT:AD:DP:GQ:PL	0/0:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/0:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0	0/1:1,37:39:87.16:940,87,0	0/1:0,29:29:78.20:899,78,0	0/1:0,24:24:66.14:729,66,0
+chr10	135369532	.	T	C	122.62	.	AC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551)	GT:AD:DP:GQ:PL	0/0:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,22:24:66.14:729,66,0	0/0:1,37:39:87.16:940,87,0	0/0:0,29:29:78.20:899,78,0	0/1:0,21:24:66.14:729,66,0	0/0:1,37:50:87.16:940,87,0	0/0:0,29:50:78.20:899,78,0	0/1:0,24:50:66.14:729,66,0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-gemini-testdata.sh	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,72 @@
+cd "$(dirname "$0")"
+
+export GEMINI_CONFIG=../test-cache
+OUT_PTH=$GEMINI_CONFIG/gemini/data
+GENOMIC_REGION=3:187000000-187500000
+
+
+if [ -n "$1" ]; then
+
+IN_PTH="$1"
+# downsample all vcf and bed annotation files to the region of interest and reindex
+for vcf in `ls $IN_PTH/*.gz | grep -v hprd_interaction_edges.gz -`
+do
+    python ./shrink_tabix.py $vcf -r $GENOMIC_REGION -o $OUT_PTH/`basename $vcf`
+done
+
+# downsample gene_table files to the region of interest
+echo "$IN_PTH/summary_gene_table_v75 -> $OUT_PTH/summary_gene_table_v75"
+python ./shrink_simple_tab.py $IN_PTH/summary_gene_table_v75 -r chr$GENOMIC_REGION -c 0 8 9 -n 1 -o $OUT_PTH/summary_gene_table_v75
+
+echo "$IN_PTH/detailed_gene_table_v75 -> $OUT_PTH/detailed_gene_table_v75"
+python ./shrink_simple_tab.py $IN_PTH/detailed_gene_table_v75 -r chr$GENOMIC_REGION -c 0 11 12 -n 1 -o $OUT_PTH/detailed_gene_table_v75
+
+# filter kegg_pathway files to retain only records of the genes listed
+# in the downsampled summary_gene_table
+for kegg in `ls $IN_PTH/kegg_pathways_*`
+do
+    echo "$kegg -> $OUT_PTH/`basename $kegg`"
+    cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $kegg > $OUT_PTH/`basename $kegg`
+done
+
+# filter hprd_interaction file to retain only records of the genes listed
+# in the downsampled summary_gene_table
+echo "$IN_PTH/hprd_interaction_edges.gz -> $OUT_PTH/hprd_interaction_edges.gz"
+bgzip -dc $IN_PTH/hprd_interaction_edges.gz > $OUT_PTH/hprd_interaction_edges
+cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Ff - $OUT_PTH/hprd_interaction_edges | bgzip > $OUT_PTH/hprd_interaction_edges.gz
+rm $OUT_PTH/hprd_interaction_edges
+
+# filter cancer_gene_census file to retain only records of the genes listed
+# in the downsampled summary_gene_table;
+# TO DO: make the filter stricter by looking for matches only in the first
+# column of the cancer_gene_census file (but the file is relatively small anyway)
+echo "$IN_PTH/cancer_gene_census.20140120.tsv -> $OUT_PTH/cancer_gene_census.20140120.tsv"
+cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $IN_PTH/cancer_gene_census.20140120.tsv > $OUT_PTH/cancer_gene_census.20140120.tsv
+
+else
+    echo "no path to gemini annotation files provided - only building test databases"
+fi
+
+
+# now use gemini load to build the test databases
+echo "Building gemini test databases"
+echo "Test databases for gemini_load"
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff ../gemini_load_result1.db
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff --skip-gene-tables --no-load-genotypes ../gemini_load_result2.db
+echo "Test database for gemini_amend"
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -t snpEff ../gemini_amend_input.db
+echo "Test database for gemini_annotate"
+bgzip -c build-data anno.bed > build-data/anno.bed.gz
+tabix --force -p bed build-data/anno.bed.gz
+cp ../gemini_load_result1.db ../gemini_annotate_result.db
+gemini --annotation-dir $OUT_PTH annotate -f build-data/anno.bed.gz -c anno5 -a count ../gemini_annotate_result.db
+echo "Test database for gemini_set_somatic"
+cp ../gemini_load_result1.db ../gemini_is_somatic_result.db
+gemini set_somatic --min-somatic-score 5.65 ../gemini_is_somatic_result.db
+echo "Test database for gemini_de_novo and gemini_mendel_errors"
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.de_novo.vcf -p build-data/test.de_novo.ped -t snpEff ../gemini_de_novo_input.db
+echo "Test database for gemini_comp_hets"
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.comp_het.vcf -p build-data/test.comp_het.ped -t snpEff ../gemini_comphets_input.db
+echo "Test databases for gemini_autosomal"
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -p build-data/test.auto_rec.ped -t snpEff ../gemini_auto_rec_input.db
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_dom.vcf -p build-data/test.auto_dom.ped -t snpEff ../gemini_auto_dom_input.db
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/shrink_simple_tab.py	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,61 @@
+from __future__ import print_function
+
+import argparse
+from functools import partial
+
+
+def keep_line(line, pos_cols, region):
+    fields = line.rstrip().split(b'\t')
+    if fields[pos_cols[0]] == region[0]:  # same chromosome
+        if (
+            region[1] < int(fields[pos_cols[1]]) < region[2]
+        ) or (
+            region[1] < int(fields[pos_cols[2]]) < region[2]
+        ):
+            return True
+
+
+def main(infile, ofile, num_header_lines):
+    print(infile, '->', ofile)
+    with open(infile, 'rb') as i:
+        with open(ofile, 'wb') as o:
+            # copy header lines
+            for c in range(num_header_lines):
+                o.write(next(i))
+            for line in i:
+                if keep_line(line):
+                    o.write(line)
+
+
+if __name__ == '__main__':
+    p = argparse.ArgumentParser()
+    p.add_argument('infile')
+    p.add_argument(
+        '-r', '--region',
+        required=True,
+        help='the region of the input file to rewrite'
+    )
+    p.add_argument(
+        '-o', '--ofile',
+        required=True,
+        help="the name of the output file"
+    )
+    p.add_argument(
+        '-c', '--cols',
+        nargs=3, type=int, required=True,
+        help="the columns of the input file specifying chrom, start and stop, "
+             "respectively"
+    )
+    p.add_argument(
+        '-n', '--num-header-lines',
+        type=int, default=0,
+        help='the number of header lines present in the input; These will '
+             'always be copied over to the new file.'
+    )
+    args = vars(p.parse_args())
+
+    chrom, reg = args['region'].split(':')
+    region = [chrom.encode()] + [int(x) for x in reg.split('-')]
+    keep_line = partial(keep_line, pos_cols=args['cols'], region=region)
+
+    main(args['infile'], args['ofile'], args['num_header_lines'])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/shrink_tabix.py	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,42 @@
+from __future__ import print_function
+
+import argparse
+
+import pysam
+
+
+def main(infile, ofile, region):
+    print(infile, '->', ofile)
+    with pysam.Tabixfile(infile) as i:
+        fformat = i.format.lower()
+        if fformat == 'sam':
+            fformat = 'bed'
+        if ofile[-3:] == '.gz':
+            ofile = ofile[:-3]
+        with open(ofile, 'w') as o:
+            try:
+                region_it = i.fetch(region=region)
+            except ValueError:
+                region_it = i.fetch(region='chr' + region)
+            for line in i.header:
+                o.write(line + '\n')
+            for line in region_it:
+                o.write(str(line) + '\n')
+    pysam.tabix_index(ofile, preset=fformat, force=True)
+
+
+if __name__ == '__main__':
+    p = argparse.ArgumentParser()
+    p.add_argument('infile')
+    p.add_argument(
+        '-r', '--region',
+        required=True,
+        help='the region of the input file to rewrite'
+    )
+    p.add_argument(
+        '-o', '--ofile',
+        required=True,
+        help="the name of the output file"
+    )
+    args = vars(p.parse_args())
+    main(**args)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gemini_versioned_databases.loc.sample	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,3 @@
+## GEMINI versioned databases
+#DownloadDate	dbkey	DBversion	Description	Path
+#2018-07-08	hg19	181	GEMINI annotations (2018-07-08 snapshot)	/path/to/data
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <table name="gemini_versioned_databases" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, version, name, path</columns>
+        <file path="tool-data/gemini_versioned_databases.loc" />
+    </table>
+</tables>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri Jan 11 17:50:55 2019 -0500
@@ -0,0 +1,8 @@
+<tables>
+    <!-- Location of gemini annotation files for testing -->
+    <table name="gemini_versioned_databases" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, version, name, path</columns>
+        <file path="${__HERE__}/test-data/gemini_versioned_databases.loc" />
+    </table>
+</tables>
+