changeset 0:685b3408c181 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
author iuc
date Thu, 18 Feb 2016 08:56:22 -0500
parents
children 3859552a63d8
files gemini_annotate.xml gemini_macros.xml readme.rst repository_dependencies.xml static/images/gemini_mendel_errors.png test-data/anno.bed test-data/gemini_actionable_mutations_result.tabular test-data/gemini_amend_input.db test-data/gemini_amend_input.ped test-data/gemini_amend_result.db test-data/gemini_annotate_input.db test-data/gemini_annotate_result.tabular test-data/gemini_autosomal_dominant_result.tabular test-data/gemini_autosomal_input.db test-data/gemini_autosomal_recessive.tabular test-data/gemini_burden_input.db test-data/gemini_burden_result.tabular test-data/gemini_comphets_input.db test-data/gemini_comphets_result.tabular test-data/gemini_dbinfo_result.tabular test-data/gemini_de_novo_input.db test-data/gemini_de_novo_result.tabular test-data/gemini_dump_result.tabular test-data/gemini_fusions_result.tabular test-data/gemini_gene_wise_result.tabular test-data/gemini_interactions_result.tabular test-data/gemini_is_somatic_result.db test-data/gemini_load_input.vcf test-data/gemini_load_result.db test-data/gemini_lofsieve_result.tabular test-data/gemini_mendel_errors_result.tabular test-data/gemini_pathways_result.tabular test-data/gemini_qc_result.tabular test-data/gemini_query_result.tabular test-data/gemini_region_result.tabular test-data/gemini_roh_result.tabular test-data/gemini_stats_result.tabular test-data/gemini_windower_input.db tool-data/gemini_databases.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 40 files changed, 958 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_annotate.xml	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,215 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>adding your own custom annotations</description>
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">annotate</token>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+
+    bgzip -c "$annotate_source" > tabixed.gz &&
+    tabix -p bed tabixed.gz &&
+
+        gemini @BINARY@
+            -f tabixed.gz
+            -c $column_name
+            -a $a.a_selector
+            #if $a.a_selector == 'extract':
+                -t $a.column_type
+                -e $a.column_extracts
+                -o $a.operation
+            #end if
+            $region_only
+            "${ infile }"
+            > "${ outfile }"
+]]>
+
+    </command>
+    <inputs>
+        <expand macro="infile" />
+        <param name="annotate_source" type="data" format="bed" label="File containing the annotations in BED format" help="(-f)"/>
+
+        <param name="column_name" type="text" value=""
+            label="The name of the column to be added to the variant table" 
+            help=" If the input file is a VCF, then this is the name of the info field to pull. (-c)">
+            <sanitizer invalid_char=" ">
+                <valid initial="string.letters,string.digits">
+                    <add value="_" />
+                </valid>
+            </sanitizer>
+        </param>
+        <conditional name="a">
+            <param name="a_selector" type="select" label="How should the annotation file be used?" help="(-a)">
+                <option value="boolean">Did a variant overlap a region or not? (boolean)</option>
+                <option value="count">How many regions did a variant overlap? (count)</option>
+                <option value="extract" selected="True">Extract specific values from a BED file. (extract)</option>
+            </param>
+            <when value="extract">
+
+                <param name="column_extracts" label="Column to extract information from for list annotations"
+                    type="data_column" data_ref="annotate_source" force_select="true" help="(-e)"/>
+
+
+                <param name="column_type" type="select" label="What data type(s) should be used to represent the new values in the database?"
+                    help="(-t)">
+                    <option value="float">Decimal precision number (float)</option>
+                    <option value="integer">Integer number (integer)</option>
+                    <option value="text">Text columns such as “valid”, “yes” (text)</option>
+                </param>
+
+                <param name="operation" type="select" label="Operation to apply to the extract column values ..."
+                    help="in the event that a variant overlaps multiple annotations in your annotation file. (-o)">
+                    <option value="mean">Compute the average of the (numeric) values</option>
+                    <option value="sum">Compute the sum of the (numeric) values</option>
+                    <option value="median">Compute the median of the (numeric) values</option>
+                    <option value="min">Compute the minimum of the (numeric) values</option>
+                    <option value="max">Compute the maximum of the (numeric) values</option>
+                    <option value="mode">Compute the maximum of the (numeric) values</option>
+                    <option value="first">Use the value from the first record in the annotation file</option>
+                    <option value="last">Use the value from the last record in the annotation file</option>
+                    <option value="list">Create a comma-separated list of the observed (text) values</option>
+                    <option value="uniq_list">Create a comma-separated list of non-redundant observed (text) values</option>
+                </param>
+
+            </when>
+            <when value="boolean"/>
+            <when value="count"/>
+        </conditional>
+        <param name="region_only" argument="--region-only" type="boolean" checked="false"
+            truevalue="--region-only" falsevalue=""
+            label="If set, only region coordinates will be considered when annotating variants."
+            help="The default is to annotate using region coordinates as well as REF and ALT
+                variant values. This option is only valid if annotation is a VCF file"/>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="gemini_annotate_input.db" ftype="gemini.sqlite" />
+            <param name="annotate_source" value="anno.bed" />
+            <param name="a_selector" value="count" />
+            <param name="column_name" value="anno5" />
+            <output name="outfile" file="gemini_annotate_result.tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+It is inevitable that researchers will want to enhance the GEMINI framework with their own, custom annotations. GEMINI provides a sub-command called annotate for exactly this purpose.
+
+**Details**
+
+It is inevitable that researchers will want to enhance the GEMINI framework with their own, custom annotations. GEMINI provides a sub-command called annotate for exactly this purpose. As long as you provide a tabix‘ed annotation file in BED or VCF format, the annotate tool will, for each variant in the variants table, screen for overlaps in your annotation file and update a one or more new column in the variants table that you may specify on the command line. This is best illustrated by a following **example**.
+
+**Input files**
+
+Let’s assume you have already created a GEMINI database of a **VCF file** using the *load module*.
+
+Now, let’s imagine you have an annotated file in **BED format** (important.bed) that describes regions of the genome that are particularly relevant to your lab’s research. You would like to annotate in the GEMINI database which variants overlap these crucial regions. We want to store this knowledge in a new column in the variants table called important_variant that tracks whether a given variant overlapped (1) or did not overlap (0) intervals in your annotation file.
+
+  *To do this, you must first TABIX your BED file*
+
+**-a boolean - Did a variant overlap a region or not?**
+
+Now, you can use this *TABIX*’ed file to annotate which variants overlap your important regions. In the example below, the results will be stored in a new column called “important”. The **-t boolean** option says that you just want to track whether (1) or not (0) the variant overlapped one or more of your regions.
+
+Since a new columns has been created in the database, we can now directly query the new column. In the example results below, the first and third variants overlapped a crucial region while the second did not::
+
+    chr22   100    101    1   1
+    chr22   200    201    2   0
+    chr22   300    500    3   1
+
+**-a count - How many regions did a variant overlap?**
+
+Instead of a simple yes or no, we can use the **-t count** option to count how many important regions a variant overlapped. It turns out that the 3rd variant actually overlapped two important regions::
+
+    chr22   100    101    1   1
+    chr22   200    201    2   0
+    chr22   300    500    3   2
+
+**-a extract - Extract specific values from a BED file**
+
+Lastly, we may also extract values from specific fields in a BED file (or from the INFO field in a VCF) and populate one or more new columns in the database based on overlaps with the annotation file and the values of the fields therein. To do this, we use the **-a extract** option.
+
+This is best described with an example. To set this up, let’s imagine that we have a VCF file from a different experiment and we want to annotate the variants in our GEMINI database with the allele frequency and depth tags from the INFO fields for the same variants in this other VCF file.
+
+Now that we have a proper *TABIX*’ed VCF file, we can use the **-a extract** option to populate new columns in the GEMINI database. In order to do so, we must specify:
+
+ 1) its type (e.g., text, int, float,) (**-t**)
+ 2) the field in the INFO column of the VCF file that we should use to extract data with which to populate the new column (**-e**)
+ 3) what operation should be used to summarize the data in the event of multiple overlaps in the annotation file (**-o**)
+ 4) (optionally) the name of the column we want to add (**-c**), if this is not specified, it will use the value from **-e**.
+
+For example, let’s imagine we want to create a new column called “other_allele_freq” (**-c**) using the AF field in our VCF file to populate it.
+
+This create a new column in my.db called other_allele_freq and this new column will be a FLOAT (**-t float**). In the event of multiple records in the VCF file overlapping a variant in the database, the average (**-o mean**) of the allele frequencies values from the VCF file will be used.
+
+At this point, one can query the database based on the values of the new other_allele_freq column (using **GEMINI query**).
+
+**-t TYPE - Specifying the column type(s) when using -a extract**
+
+The annotate tool will create three different types of columns via the **-t** option:
+
+ 1) Floating point columns for annotations with decimal precision as above (-t float)
+ 2) Integer columns for integral annotations (-t integer)
+ 3) Text columns for string columns such as “valid”, “yes”, etc. (-t text)
+
+  *The -t option is only valid when using the -a extract option.*
+
+**-o OPERATION - Specifying the summary operations when using -a extract**
+
+In the event of multiple overlaps between a variant and records in the annotation file, the annotate tool can summarize the values observed with multiple options:
+
+  - -o mean       Compute the average of the values. They must be numeric.
+  - -o median     Compute the median of the values. They must be numeric.
+  - -o min        Compute the minimum of the values. They must be numeric.
+  - -o max        Compute the maximum of the values. They must be numeric.
+  - -o mode       Compute the maximum of the values. They must be numeric.
+  - -o first      Use the value from the first record in the annotation file.
+  - -o last       Use the value from the last record in the annotation file.
+  - -o list       Create a comma-separated list of the observed values.
+  - -o uniq_list  Create a comma-separated list of the distinct observed values.
+  - -o sum        Compute the sum of the values. They must be numeric.
+
+The -o option is only valid when using the -a extract option.
+
+**Annotating with VCF**
+
+Most of the examples to this point have pulled a column from a tabix indexed bed file. It is likewise possible to pull from the INFO field of a tabix index VCF. The syntax is identical but the **-e** operation will specify the names of fields in the INFO column to pull. By default, those names will be used, but that can still be specified with the **-c column**.
+
+To put a DP column in the db, set:
+
+  -o list, -e DP, -t integer
+
+... and name it 'depth', set:
+
+  -o list, -e DP, -c depth, -t integer
+
+
+Missing values are allowed since we expect that in some cases an annotation VCF will not have all INFO fields specified for all variants.
+
+*We recommend decomposing and normalizing variants before annotating. See Step 1. split, left-align, and trim variants for a detailed explanation of how to do this. To do that see the GEMINI* preprocessing_ *website.*
+
+**Extracting and populating multiple columns at once**
+
+One can also extract and populate multiple columns at once by providing comma-separated lists (no spaces) of column names (**-c**), types (**-t**), numbers (**-e**), and summary operations (**-o**). For example, recall that in the VCF example above, we created a *TABIX*’ed BED file containg the allele frequency and depth values from the INFO field as the 4th and 5th columns in the BED, respectively.
+
+Instead of running the annotate tool twice (once for each column), we can run the tool once and load both columns in the same run. For example with settings:
+
+  - -a extract
+  - -c other_allele_freq,other_depth
+  - -t float,integer
+  - -e 4,5
+  - -o mean,max
+
+We can then use each of the new columns to filter variants with a *GEMINI query*:
+
+.. _preprocessing: https://gemini.readthedocs.org/en/latest/content/preprocessing.html#preprocess
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_macros.xml	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,146 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="0.18.1">gemini</requirement>
+            <yield />
+        </requirements>
+    </xml>
+
+    <xml name="version_command">
+        <version_command>gemini --version</version_command>
+    </xml>
+
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+            <regex match="Error:" />
+            <regex match="Exception:" />
+        </stdio>
+    </xml>
+
+    <xml name="annotation_dir">
+        <param name="annotation_databases" type="select" optional="True" label="Choose a gemini annotation database">
+            <options from_data_table="gemini_databases">
+                <filter type="sort_by" column="0" />
+            </options>
+        </param>
+    </xml>
+
+    <xml name="add_header_column">
+        <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" 
+            label="Add a header of column names to the output" help="(--header)"/>
+    </xml>
+
+    <xml name="radius">
+        <param name="radius" type="integer" value="3" label="Set filter for Breadth-first search (BFS) in the Protein-Protein Interaction network" help="(-r)" >
+            <validator type="in_range" min="0"/>
+        </param>
+    </xml>
+    <xml name="variant_mode">
+        <param name="variant_mode" type="boolean" truevalue="--var" falsevalue="" checked="False" 
+            label="Returns variant info (e.g. impact, biotype) for interacting genes" help="(--var)"/>
+    </xml>
+
+    <xml name="column_filter">
+        <conditional name="report">
+            <param name="report_selector" type="select" label="Columns to include in the report"
+                help="By default, this tool reports all columns in the variants table. One may choose to report only a subset of the columns.">
+                <option value="all" selected="True">all</option>
+                <option value="column_filter">User given columns</option>
+            </param>
+            <when value="all"/>
+            <when value="column_filter">
+                <param name="columns" type="select" display="checkboxes" multiple="True" label="Choose columns to include in the report" help="(--columns)">
+                    <option value="gene">gene</option>
+                    <option value="chrom">chrom</option>
+                    <option value="start">start</option>
+                    <option value="end">end</option>
+                    <option value="ref">ref</option>
+                    <option value="alt">alt</option>
+                    <option value="impact">impact</option>
+                    <option value="impact_severity">impact_severity</option>
+                    <option value="max_aaf_all">alternative allele frequency</option>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="filter">
+        <conditional name="filter">
+            <param name="filter_selector" type="select" label="Apply additional constraints"
+                help="By default, this tool will report all variants regardless of their putative functional impact. In order to apply additional constraints on the variants returned, you can this optional filter.">
+                <option value="no">No additional constraints</option>
+                <option value="yes">Apply additional constraints</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param name="filter" type="text" label="Contraints in SQL syntax" help="Conditions applied here will become WHERE clauses in the query issued to the GEMINI database. E.g. alt='G' or impact_severity = 'HIGH'. (--filter)">
+                    <expand macro="sanitize_query" />
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="sanitize_query">
+        <sanitizer invalid_char="">
+            <valid initial="string.printable"/>
+       </sanitizer>
+    </xml>
+
+    <token name="@CMDLN_SQL_FILTER_FILTER_OPTION@">
+        #if str($filter.filter_selector) == 'yes' and $filter.filter:
+            #import pipes
+            --filter ${ pipes.quote( str( $filter.filter ) ) or "''" }
+        #end if
+    </token>
+
+    <xml name="family">
+        <param name="families" type="text" value="" label="Comma seperated list of families to restrict the analysis to." help="e.g. Family1,Family3 (--families)"/>
+    </xml>
+
+    <xml name="lenient">
+        <param name="lenient" type="boolean" truevalue="--lenient" falsevalue="" checked="False" label="Loosen the restrictions on family structure"/>
+    </xml>
+
+    <xml name="unaffected">
+        <param name="allow_unaffected" type="boolean" truevalue="--allow-unaffected" falsevalue="" checked="False" label="Report candidates that also impact samples labeled as unaffected." help="(--allow-unaffected)"/>
+    </xml>
+
+    <xml name="min_kindreds">
+        <param name="min_kindreds" type="integer" value="1" label="The min. number of kindreds that must have a candidate variant in a gene" help="default: 1 (--min-kindreds)" />
+    </xml>
+
+    <xml name="min_sequence_depth">
+        <param name="d" type="integer" value="0" min="0" label="The minimum aligned sequence depth (genotype DP) required for each sample"
+                help="default: 0 (-d)" />
+    </xml>
+
+    <xml name="min_gq">
+        <param name="min_gq" type="integer" value="0" label="the minimum genotype quality required for each sample in a family" help="default: 0 (--min-gq)">
+            <validator type="in_range" min="0"/>
+        </param>
+    </xml>
+
+    <xml name="gt_pl_max">
+        <param name="gt_pl_max" type="integer" value="-1" min="-1" label="The maximum phred-scaled genotype likelihod (PL) allowed for each sample in a family" help="default: -1 (not set) (--gt-pl-max)" />
+    </xml>
+    <token name="@VERSION@">0.18.1</token>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1003153</citation>
+            <yield />
+        </citations>
+    </xml>
+
+    <xml name="infile">
+        <param name="infile" type="data" format="gemini.sqlite" label="GEMINI database" help="Only files with version @VERSION@ are accepted." >
+            <options options_filter_attribute="metadata.gemini_version" >
+                <filter type="add_value" value="@VERSION@" />
+            </options>
+            <validator type="expression" message="This version of Gemini will only work with Gemini files that are for version @VERSION@.">value is not None and value.metadata.gemini_version == "@VERSION@"</validator>
+        </param>
+    </xml>
+
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,65 @@
+=========================
+Galaxy wrapper for GEMINI
+=========================
+
+
+GEMINI: a flexible framework for exploring genome variation
+
+GEMINI (GEnome MINIng) is designed to be a flexible framework for exploring genetic variation in the context of 
+the wealth of genome annotations available for the human genome. By placing genetic variants, sample genotypes, 
+and useful genome annotations into an integrated database framework, GEMINI provides a simple, flexible, yet very 
+powerful system for exploring genetic variation for for disease and population genetics.
+
+Using the GEMINI framework begins by loading a VCF file into a database. Each variant is automatically 
+annotated by comparing it to several genome annotations from source such as ENCODE tracks, UCSC tracks, 
+OMIM, dbSNP, KEGG, and HPRD. All of this information is stored in portable SQLite database that allows 
+one to explore and interpret both coding and non-coding variation using “off-the-shelf” tools or an 
+enhanced SQL engine.
+
+Please also see the original [manuscript](http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003153).
+
+
+============
+Installation
+============
+
+It is recommended to install this wrapper via the `Galaxy Tool Shed`.
+
+.. _`Galaxy Tool Shed`:  https://testtoolshed.g2.bx.psu.edu/view/iuc/gemini
+
+
+=======
+History
+=======
+- 0.9.1: Initial public release
+
+
+====================
+Detailed description
+====================
+
+View the original GEMINI documentation: http://gemini.readthedocs.org/en/latest/index.html
+
+
+===============================
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the GEMINI data manager definition to install all required annotation databases.">
+    <repository changeset_revision="345412d58d75" name="data_manager_gemini_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>
Binary file static/images/gemini_mendel_errors.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/anno.bed	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,3 @@
+chr1	30547	30548
+chr1	30920	30925
+chr1	30922	30923
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_actionable_mutations_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,1 @@
+tum_name	chrom	start	end	ref	alt	gene	impact	is_somatic	in_cosmic_census	dgidb_info
Binary file test-data/gemini_amend_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_amend_input.ped	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,5 @@
+#family_id name paternal_id maternal_id sex phenotype ethnicity hair_color
+1 M10475    None None  1    1    None	brown
+1 M10478     M10475  M10500    2    2    None	red
+1 M10500     None    None    2    2    None	
+1 M128215    M10475  M10500    1    1    None	green
Binary file test-data/gemini_amend_result.db has changed
Binary file test-data/gemini_annotate_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_annotate_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,1 @@
+updated 10 variants
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_autosomal_dominant_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,9 @@
+gene	chrom	impact	variant_id	family_id	family_members	family_genotypes	samples	family_count
+ASAH2C	chr10	missense_variant	3	3	3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected)	C/T,C/C,C/T	3_dad,3_kid	2
+ASAH2C	chr10	missense_variant	3	2	2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected)	C/C,C/T,C/T	2_mom,2_kid	2
+ASAH2C	chr10	missense_variant	4	3	3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected)	C/T,C/C,C/T	3_dad,3_kid	2
+ASAH2C	chr10	missense_variant	4	2	2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected)	C/C,C/T,C/T	2_mom,2_kid	2
+SPRN	chr10	intron_variant	5	3	3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected)	G/A,G/G,G/A	3_dad,3_kid	1
+WDR37	chr10	stop_lost	1	3	3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected)	T/C,T/T,T/C	3_dad,3_kid	2
+WDR37	chr10	stop_lost	1	2	2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected)	T/T,T/C,T/C	2_mom,2_kid	2
+WDR37	chr10	stop_lost	2	3	3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected)	T/C,T/C,T/C	3_dad,3_kid	2
Binary file test-data/gemini_autosomal_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_autosomal_recessive.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,2 @@
+gene	chrom	impact	variant_id	family_id	family_members	family_genotypes	samples	family_count
+WDR37	chr10	stop_lost	2	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	T/C,T/C,C/C	1_kid	1
Binary file test-data/gemini_burden_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_burden_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,6 @@
+gene	T	c	Z	p_value
+SYCE1	-0.5	0.25	-1.0	0.00699300699301
+DHODH	0.0	0.0	nan	nan
+WDR37	-1.0	1.5	-0.816496580928	0.00699300699301
+ASAH2C	-0.5	0.75	-0.57735026919	0.00699300699301
+CTBP2	0.0	0.0	nan	nan
Binary file test-data/gemini_comphets_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_comphets_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,3 @@
+chrom	start	end	ref	alt	gene	impact	variant_id	family_id	family_members	family_genotypes	samples	family_count	comp_het_id	priority
+chr1	17362	17366	TTCT	T	WASH7P	splice_acceptor_variant	3	4	child_4(child_4;affected;male),dad_4(dad_4;unaffected;male),mom_4(mom_4;unaffected;female)	TTCT|T,TTCT/T,TTCT/TTCT	child_4	1	1_3_7	3
+chr1	17729	17730	C	A	WASH7P	splice_acceptor_variant	7	4	child_4(child_4;affected;male),dad_4(dad_4;unaffected;male),mom_4(mom_4;unaffected;female)	C/A,C/A,C/A	child_4	1	1_3_7	3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_dbinfo_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,209 @@
+table_name	column_name	type	
+variants	chrom	text	
+variants	start	integer	
+variants	end	integer	
+variants	vcf_id	text	
+variants	variant_id	integer	
+variants	anno_id	integer	
+variants	ref	text	
+variants	alt	text	
+variants	qual	float	
+variants	filter	text	
+variants	type	text	
+variants	sub_type	text	
+variants	gts	blob	
+variants	gt_types	blob	
+variants	gt_phases	blob	
+variants	gt_depths	blob	
+variants	gt_ref_depths	blob	
+variants	gt_alt_depths	blob	
+variants	gt_quals	blob	
+variants	gt_copy_numbers	blob	
+variants	gt_phred_ll_homref	blob	
+variants	gt_phred_ll_het	blob	
+variants	gt_phred_ll_homalt	blob	
+variants	call_rate	float	
+variants	in_dbsnp	bool	
+variants	rs_ids	text	
+variants	sv_cipos_start_left	integer	
+variants	sv_cipos_end_left	integer	
+variants	sv_cipos_start_right	integer	
+variants	sv_cipos_end_right	integer	
+variants	sv_length	integer	
+variants	sv_is_precise	bool	
+variants	sv_tool	text	
+variants	sv_evidence_type	text	
+variants	sv_event_id	text	
+variants	sv_mate_id	text	
+variants	sv_strand	text	
+variants	in_omim	bool	
+variants	clinvar_sig	text	
+variants	clinvar_disease_name	text	
+variants	clinvar_dbsource	text	
+variants	clinvar_dbsource_id	text	
+variants	clinvar_origin	text	
+variants	clinvar_dsdb	text	
+variants	clinvar_dsdbid	text	
+variants	clinvar_disease_acc	text	
+variants	clinvar_in_locus_spec_db	bool	
+variants	clinvar_on_diag_assay	bool	
+variants	clinvar_causal_allele	text	
+variants	pfam_domain	text	
+variants	cyto_band	text	
+variants	rmsk	text	
+variants	in_cpg_island	bool	
+variants	in_segdup	bool	
+variants	is_conserved	bool	
+variants	gerp_bp_score	float	
+variants	gerp_element_pval	float	
+variants	num_hom_ref	integer	
+variants	num_het	integer	
+variants	num_hom_alt	integer	
+variants	num_unknown	integer	
+variants	aaf	real	
+variants	hwe	decimal(2,7)
+variants	inbreeding_coeff	decimal(2,7)
+variants	pi	decimal(2,7)
+variants	recomb_rate	decimal(2,7)
+variants	gene	text	
+variants	transcript	text	
+variants	is_exonic	bool	
+variants	is_coding	bool	
+variants	is_splicing	bool	
+variants	is_lof	bool	
+variants	exon	text	
+variants	codon_change	text	
+variants	aa_change	text	
+variants	aa_length	text	
+variants	biotype	text	
+variants	impact	text	
+variants	impact_so	text	
+variants	impact_severity	text	
+variants	polyphen_pred	text	
+variants	polyphen_score	float	
+variants	sift_pred	text	
+variants	sift_score	float	
+variants	anc_allele	text	
+variants	rms_bq	float	
+variants	cigar	text	
+variants	depth	integer	
+variants	strand_bias	float	
+variants	rms_map_qual	float	
+variants	in_hom_run	integer	
+variants	num_mapq_zero	integer	
+variants	num_alleles	integer	
+variants	num_reads_w_dels	float	
+variants	haplotype_score	float	
+variants	qual_depth	float	
+variants	allele_count	integer	
+variants	allele_bal	float	
+variants	in_hm2	bool	
+variants	in_hm3	bool	
+variants	is_somatic	bool	
+variants	somatic_score	float	
+variants	in_esp	bool	
+variants	aaf_esp_ea	decimal(2,7)
+variants	aaf_esp_aa	decimal(2,7)
+variants	aaf_esp_all	decimal(2,7)
+variants	exome_chip	bool	
+variants	in_1kg	bool	
+variants	aaf_1kg_amr	decimal(2,7)
+variants	aaf_1kg_eas	decimal(2,7)
+variants	aaf_1kg_sas	decimal(2,7)
+variants	aaf_1kg_afr	decimal(2,7)
+variants	aaf_1kg_eur	decimal(2,7)
+variants	aaf_1kg_all	decimal(2,7)
+variants	grc	text	
+variants	gms_illumina	float	
+variants	gms_solid	float	
+variants	gms_iontorrent	float	
+variants	in_cse	bool	
+variants	encode_tfbs	text	
+variants	encode_dnaseI_cell_count	integer	
+variants	encode_dnaseI_cell_list	text	
+variants	encode_consensus_gm12878	text	
+variants	encode_consensus_h1hesc	text	
+variants	encode_consensus_helas3	text	
+variants	encode_consensus_hepg2	text	
+variants	encode_consensus_huvec	text	
+variants	encode_consensus_k562	text	
+variants	vista_enhancers	text	
+variants	cosmic_ids	text	
+variants	info	blob	
+variants	cadd_raw	float	
+variants	cadd_scaled	float	
+variants	fitcons	float	
+variants	in_exac	bool	
+variants	aaf_exac_all	decimal(2,7)
+variants	aaf_adj_exac_all	decimal(2,7)
+variants	aaf_adj_exac_afr	decimal(2,7)
+variants	aaf_adj_exac_amr	decimal(2,7)
+variants	aaf_adj_exac_eas	decimal(2,7)
+variants	aaf_adj_exac_fin	decimal(2,7)
+variants	aaf_adj_exac_nfe	decimal(2,7)
+variants	aaf_adj_exac_oth	decimal(2,7)
+variants	aaf_adj_exac_sas	decimal(2,7)
+variants	exac_num_het	int	
+variants	exac_num_hom_alt	int	
+variants	exac_num_chroms	int	
+variants	max_aaf_all	REAL	
+variant_impacts	variant_id	integer	
+variant_impacts	anno_id	integer	
+variant_impacts	gene	text	
+variant_impacts	transcript	text	
+variant_impacts	is_exonic	bool	
+variant_impacts	is_coding	bool	
+variant_impacts	is_splicing	bool	
+variant_impacts	is_lof	bool	
+variant_impacts	exon	text	
+variant_impacts	codon_change	text	
+variant_impacts	aa_change	text	
+variant_impacts	aa_length	text	
+variant_impacts	biotype	text	
+variant_impacts	impact	text	
+variant_impacts	impact_so	text	
+variant_impacts	impact_severity	text	
+variant_impacts	polyphen_pred	text	
+variant_impacts	polyphen_score	float	
+variant_impacts	sift_pred	text	
+variant_impacts	sift_score	float	
+samples	sample_id	integer	
+samples	family_id	text	
+samples	name	text	
+samples	paternal_id	text	
+samples	maternal_id	text	
+samples	sex	text	
+samples	phenotype	text	
+samples	ethnicity	text	
+gene_detailed	uid	integer	
+gene_detailed	chrom	text	
+gene_detailed	gene	text	
+gene_detailed	is_hgnc	bool	
+gene_detailed	ensembl_gene_id	text	
+gene_detailed	transcript	text	
+gene_detailed	biotype	text	
+gene_detailed	transcript_status	text	
+gene_detailed	ccds_id	text	
+gene_detailed	hgnc_id	text	
+gene_detailed	entrez_id	text	
+gene_detailed	cds_length	text	
+gene_detailed	protein_length	text	
+gene_detailed	transcript_start	text	
+gene_detailed	transcript_end	text	
+gene_detailed	strand	text	
+gene_detailed	synonym	text	
+gene_detailed	rvis_pct	float	
+gene_detailed	mam_phenotype_id	text	
+gene_summary	uid	integer	
+gene_summary	chrom	text	
+gene_summary	gene	text	
+gene_summary	is_hgnc	bool	
+gene_summary	ensembl_gene_id	text	
+gene_summary	hgnc_id	text	
+gene_summary	transcript_min_start	text	
+gene_summary	transcript_max_end	text	
+gene_summary	strand	text	
+gene_summary	synonym	text	
+gene_summary	rvis_pct	float	
+gene_summary	mam_phenotype_id	text	
+gene_summary	in_cosmic_census	bool	
Binary file test-data/gemini_de_novo_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_de_novo_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,9 @@
+chrom	start	end	vcf_id	variant_id	anno_id	ref	alt	qual	filter	type	sub_type	call_rate	in_dbsnp	rs_ids	sv_cipos_start_left	sv_cipos_end_left	sv_cipos_start_right	sv_cipos_end_right	sv_length	sv_is_precise	sv_tool	sv_evidence_type	sv_event_id	sv_mate_id	sv_strand	in_omim	clinvar_sig	clinvar_disease_name	clinvar_dbsource	clinvar_dbsource_id	clinvar_origin	clinvar_dsdb	clinvar_dsdbid	clinvar_disease_acc	clinvar_in_locus_spec_db	clinvar_on_diag_assay	clinvar_causal_allele	pfam_domain	cyto_band	rmsk	in_cpg_island	in_segdup	is_conserved	gerp_bp_score	gerp_element_pval	num_hom_ref	num_het	num_hom_alt	num_unknown	aaf	hwe	inbreeding_coeff	pi	recomb_rate	gene	transcript	is_exonic	is_coding	is_splicing	is_lof	exon	codon_change	aa_change	aa_length	biotype	impact	impact_so	impact_severity	polyphen_pred	polyphen_score	sift_pred	sift_score	anc_allele	rms_bq	cigar	depth	strand_bias	rms_map_qual	in_hom_run	num_mapq_zero	num_alleles	num_reads_w_dels	haplotype_score	qual_depth	allele_count	allele_bal	in_hm2	in_hm3	is_somatic	somatic_score	in_esp	aaf_esp_ea	aaf_esp_aa	aaf_esp_all	exome_chip	in_1kg	aaf_1kg_amr	aaf_1kg_eas	aaf_1kg_sas	aaf_1kg_afr	aaf_1kg_eur	aaf_1kg_all	grc	gms_illumina	gms_solid	gms_iontorrent	in_cse	encode_tfbs	encode_dnaseI_cell_count	encode_dnaseI_cell_list	encode_consensus_gm12878	encode_consensus_h1hesc	encode_consensus_helas3	encode_consensus_hepg2	encode_consensus_huvec	encode_consensus_k562	vista_enhancers	cosmic_ids	info	cadd_raw	cadd_scaled	fitcons	in_exac	aaf_exac_all	aaf_adj_exac_all	aaf_adj_exac_afr	aaf_adj_exac_amr	aaf_adj_exac_eas	aaf_adj_exac_fin	aaf_adj_exac_nfe	aaf_adj_exac_oth	aaf_adj_exac_sas	exac_num_het	exac_num_hom_alt	exac_num_chroms	max_aaf_all	gts	gt_types	gt_phases	gt_depths	gt_ref_depths	gt_alt_depths	gt_quals	gt_copy_numbers	gt_phred_ll_homref	gt_phred_ll_het	gt_phred_ll_homalt	family_id	family_members	family_genotypes	samples	family_count
+chr10	48003991	48003992	None	2	1	C	T	1047.86999512	None	snp	ts	1.0	1	rs142685947,rs3739968	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q11.22	None	0	1	1	None	3.10871e-42	2	6	1	0	0.444444444444	0.29371811258	-0.35	0.522875816993	1.718591	ASAH2C	ENST00000420079	1	1	0	0	exon_10_48003968_48004056	tGt/tAt	C540Y	610	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	165	None	20.9400005341	0	0	8	0.0	4.382999897	9.52999973297	4	None	None	None	None	None	0	None	None	None	0	1	0.3112	0.4573	0.3855	0.1241	0.5149	0.346645	grc_fix	73.3	40.3	92.8	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.553676	1	0.443	0.448537771896	0.288974151858	0.281426746944	0.543088975937	0.524984286612	0.478147713207	0.463529411765	0.418641164716	17495	15317	107302	0.543088975937	['C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T' 'C/T' 'C/T' 'C/T']	[1 1 3 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 23 38 29 23 38 29 23]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 23 37 29 23]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	C/C,C/C,C/T	2_kid	2
+chr10	48004991	48004992	None	3	1	C	T	1047.86999512	None	snp	ts	1.0	0	None	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q11.22	None	0	1	0	None	None	2	6	1	0	0.444444444444	0.29371811258	-0.35	0.522875816993	1.718591	ASAH2C	ENST00000420079	1	1	0	0	exon_10_48003968_48004056	tGt/tAt	C540Y	610	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	165	None	20.9400005341	0	0	8	0.0	4.382999897	9.52999973297	4	None	None	None	None	None	0	None	None	None	0	0	None	None	None	None	None	None	grc_fix	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.061011	0	None	None	None	None	None	None	None	None	None	None	None	None	-1.0	['C/T' 'C/T' 'C/T' 'C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T']	[1 1 1 1 1 3 0 0 1]	[False False False False False False False False False]	[38 29 23 38 29 23 38 29 23]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 23 37 29 23]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	3	3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)	C/C,C/C,C/T	3_kid	2
+chr10	135336655	135336656	None	4	1	G	A	38.3400001526	None	snp	ts	1.0	1	rs6537611	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	0	None	None	4	5	0	0	0.277777777778	0.248563248239	-0.384615384615	0.424836601307	0.43264	SPRN	ENST00000541506	0	0	0	0				151	protein_coding	intron_variant	intron_variant	LOW	None	None	None	None	None	None	None	2	None	37.0	4	0	4	0.0	0.0	19.1700000763	4	None	None	None	None	None	0	None	None	None	0	1	0.9957	1	1	0.9297	1	0.980831	None	None	None	None	0	None	None	None	R	R	R	R	unknown	R	None	None	None	None	None	0.056701	0	None	None	None	None	None	None	None	None	None	None	None	None	1.0	['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A']	[0 0 1 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 24 38 29 24 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 24 37 29 24 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	G/G,G/G,G/A	1_kid	2
+chr10	135336655	135336656	None	4	1	G	A	38.3400001526	None	snp	ts	1.0	1	rs6537611	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	0	None	None	4	5	0	0	0.277777777778	0.248563248239	-0.384615384615	0.424836601307	0.43264	SPRN	ENST00000541506	0	0	0	0				151	protein_coding	intron_variant	intron_variant	LOW	None	None	None	None	None	None	None	2	None	37.0	4	0	4	0.0	0.0	19.1700000763	4	None	None	None	None	None	0	None	None	None	0	1	0.9957	1	1	0.9297	1	0.980831	None	None	None	None	0	None	None	None	R	R	R	R	unknown	R	None	None	None	None	None	0.056701	0	None	None	None	None	None	None	None	None	None	None	None	None	1.0	['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A']	[0 0 1 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 24 38 29 24 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 24 37 29 24 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	G/G,G/G,G/A	2_kid	2
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	T/T,T/T,T/C	1_kid	3
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	3	3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)	T/T,T/T,T/C	3_kid	3
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	T/T,T/T,T/C	2_kid	3
+chr10	1142207	1142208	None	1	4	T	C	3404.30004883	None	snp	ts	1.0	1	rs10794716	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10p15.3	None	0	0	0	None	None	7	1	1	0	0.166666666667	0.0718606383197	0.6	0.294117647059	0.200924	WDR37	ENST00000381329	1	1	0	1	exon_10_1142110_1142566	Tga/Cga	*250R	249	protein_coding	stop_lost	stop_lost	HIGH	None	None	None	None	None	None	None	122	None	36.0	0	0	8	0.0	2.67470002174	27.8999996185	8	None	None	None	None	None	1	0.999534883721	0.975034044485	0.991234814701	0	1	0.9942	1	1	0.9561	1	0.98762	None	None	None	None	0	None	2	Osteobl;Progfib	T	T	T	T	T	T	None	None	None	None	None	0.156188	1	0.997	0.997067786838	0.970305592927	0.998358956642	1	1	0.999595432887	0.998898678414	1	346	60354	121410	1.0	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/T' 'T/T' 'T/T' 'C/C']	[0 0 1 0 0 0 0 0 3]	[False False False False False False False False False]	[38 29 23 38 29 22 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 22 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	T/T,T/T,T/C	1_kid	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_dump_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,97 @@
+chrom	start	end	ref	alt	type	sub_type	aaf	in_dbsnp	gene	sample	genotype
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 child_1	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 child_2	G/G
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 dad_2	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 mom_2	G/G
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 dad_1	G/G
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 mom_1	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 child_3	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 dad_3	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 mom_3	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 child_4	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 dad_4	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 mom_4	G/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 child_1	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 child_2	A/G
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 dad_2	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 mom_2	A/G
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 dad_1	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 mom_1	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 child_3	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 dad_3	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 mom_3	A/G
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 child_4	A/G
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 dad_4	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 mom_4	A/A
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P child_1	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P child_2	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P dad_2	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P mom_2	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P dad_1	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P mom_1	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P child_3	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P dad_3	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P mom_3	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P child_4	TTCT/T
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P dad_4	TTCT/T
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P mom_4	TTCT/TTCT
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 child_1	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 child_2	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 dad_2	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 mom_2	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 dad_1	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 mom_1	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 child_3	G/A
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 dad_3	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 mom_3	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 child_4	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 dad_4	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 mom_4	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 child_1	G/C
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 child_2	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 dad_2	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 mom_2	G/C
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 dad_1	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 mom_1	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 child_3	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 dad_3	G/C
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 mom_3	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 child_4	G/C
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 dad_4	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 mom_4	G/G
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 child_1	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 child_2	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 dad_2	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 mom_2	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 dad_1	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 mom_1	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 child_3	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 dad_3	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 mom_3	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 child_4	A/G
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 dad_4	A/G
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 mom_4	A/G
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P child_1	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P child_2	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P dad_2	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P mom_2	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P dad_1	C/A
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P mom_1	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P child_3	C/A
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P dad_3	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P mom_3	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P child_4	C/A
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P dad_4	C/A
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P mom_4	C/A
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 child_1	A/A
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 child_2	A/A
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 dad_2	A/A
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 mom_2	A/A
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 dad_1	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 mom_1	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 child_3	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 dad_3	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 mom_3	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 child_4	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 dad_4	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 mom_4	A/G
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_gene_wise_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,6 @@
+chrom	start	end	gene	impact	impact_severity	max_aaf_all	variant_filters	n_gene_variants	gene_filters
+chr10	48003991	48003992	ASAH2C	missense_variant	MED	0.543088975937	1	1	1
+chr10	126678091	126678092	CTBP2	stop_gained	HIGH	0.0904917363803	1	1	1
+chr10	135369531	135369532	SYCE1	missense_variant	MED	0.2698	1	1	1
+chr10	1142207	1142208	WDR37	stop_lost	HIGH	1.0	1	1	1
+chr16	72057434	72057435	DHODH	missense_variant	MED	0.000432002764818	1	1	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_interactions_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,25 @@
+sample	gene	order_of_interaction	interacting_gene
+M10475	CTBP2	0_order:	none
+M10475	CTBP2	1_order:	none
+M10475	CTBP2	2_order:	none
+M10475	CTBP2	3_order:	none
+M10475	CTBP2	4_order:	WDR37
+M10475	CTBP2	5_order:	none
+M128215	CTBP2	0_order:	CTBP2
+M128215	CTBP2	1_order:	none
+M128215	CTBP2	2_order:	none
+M128215	CTBP2	3_order:	none
+M128215	CTBP2	4_order:	WDR37
+M128215	CTBP2	5_order:	none
+M10478	CTBP2	0_order:	none
+M10478	CTBP2	1_order:	none
+M10478	CTBP2	2_order:	none
+M10478	CTBP2	3_order:	none
+M10478	CTBP2	4_order:	WDR37
+M10478	CTBP2	5_order:	MTG1
+M10500	CTBP2	0_order:	none
+M10500	CTBP2	1_order:	none
+M10500	CTBP2	2_order:	none
+M10500	CTBP2	3_order:	none
+M10500	CTBP2	4_order:	WDR37
+M10500	CTBP2	5_order:	MTG1
Binary file test-data/gemini_is_somatic_result.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_load_input.vcf	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,70 @@
+##fileformat=VCFv4.1
+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">
+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">
+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">
+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">
+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">
+##ALT=<ID=DEL,Description="Deletion">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">
+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">
+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">
+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">
+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">
+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">
+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">
+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">
+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">
+##reference=GRCh37
+##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+1	10583	rs58108140	G	A	100.0	PASS	AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	10611	rs189107123	C	G	100.0	PASS	AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13302	rs180734498	C	T	100.0	PASS	THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13327	rs144762171	G	C	100.0	PASS	AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13957	.	TC	T	28.0	PASS	AA=TC;AC=35;AN=2184;VT=INDEL;AVGPOST=0.8711;RSQ=0.2501;LDAF=0.0788;THETA=0.0100;ERATE=0.0065;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|)
+1	13980	rs151276478	T	C	100.0	PASS	AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|)
+1	30923	rs140337953	G	T	100.0	PASS	AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73;EFF=DOWNSTREAM(MODIFIER|||||FAM138A|processed_transcript|CODING|ENST00000461467|),DOWNSTREAM(MODIFIER|||||FAM138A|protein_coding|CODING|ENST00000417324|),DOWNSTREAM(MODIFIER|||||MIR1302-10|miRNA|NON_CODING|ENST00000408384|),INTRON(MODIFIER||||177|MIR1302-10|antisense|NON_CODING|ENST00000469289|),INTRON(MODIFIER||||236|MIR1302-10|antisense|NON_CODING|ENST00000473358|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|)
+1	46402	.	C	CTGT	31.0	PASS	AA=.;RSQ=0.0960;AN=2184;AC=8;VT=INDEL;AVGPOST=0.8325;THETA=0.0121;ERATE=0.0072;LDAF=0.0903;AF=0.0037;ASN_AF=0.0017;AFR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	47190	.	G	GA	192.0	PASS	AA=G;AVGPOST=0.9041;AN=2184;AC=29;VT=INDEL;LDAF=0.0628;THETA=0.0153;RSQ=0.2883;ERATE=0.0041;AF=0.01;AMR_AF=0.0028;AFR_AF=0.06;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51476	rs187298206	T	C	100.0	PASS	ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51479	rs116400033	T	A	100.0	PASS	RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51914	rs190452223	T	G	100.0	PASS	ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51935	rs181754315	C	T	100.0	PASS	THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51954	rs185832753	G	C	100.0	PASS	LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52058	rs62637813	G	C	100.0	PASS	AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52144	rs190291950	T	A	100.0	PASS	THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52185	.	TTAA	T	244.0	PASS	AA=.;AN=2184;LDAF=0.0124;VT=INDEL;AC=10;THETA=0.0232;RSQ=0.4271;AVGPOST=0.9840;ERATE=0.0037;AF=0.0046;ASN_AF=0.0035;AMR_AF=0.02;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52238	rs150021059	T	G	100.0	PASS	THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95;EFF=INTERGENIC(MODIFIER|||||||||)
+1	53234	.	CAT	C	227.0	PASS	AA=CAT;AVGPOST=0.9936;AN=2184;VT=INDEL;THETA=0.0119;AC=10;LDAF=0.0074;RSQ=0.6237;ERATE=0.0007;AF=0.0046;AMR_AF=0.0028;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54353	rs140052487	C	A	100.0	PASS	THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54421	rs146477069	A	G	100.0	PASS	ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54490	rs141149254	G	A	100.0	PASS	ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54676	rs2462492	C	T	100.0	PASS	LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54753	rs143174675	T	G	100.0	PASS	AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55164	rs3091274	C	A	100.0	PASS	AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55249	.	C	CTATGG	443.0	PASS	AA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55299	rs10399749	C	T	100.0	PASS	RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55313	rs182462964	A	T	100.0	PASS	ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55326	rs3107975	T	C	100.0	PASS	AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55330	rs185215913	G	A	100.0	PASS	ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55367	rs190850374	G	A	100.0	PASS	ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55388	rs182711216	C	T	100.0	PASS	THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55394	rs2949420	T	A	100.0	PASS	AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55416	rs193242050	G	A	100.0	PASS	AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55427	rs183189405	T	C	100.0	PASS	THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55816	rs187434873	G	A	100.0	PASS	AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55850	rs191890754	C	G	100.0	PASS	AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55852	rs184233019	G	C	100.0	PASS	THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)
Binary file test-data/gemini_load_result.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_lofsieve_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,11 @@
+chrom	start	end	ref	alt	highest_impact	aa_change	var_trans_pos	trans_aa_length	var_trans_pct	sample	genotype	gene	transcript	trans_type
+chr10	1142207	1142208	T	C	stop_lost	*/R	250	250/249	1.00401606426	M10475	C/C	WDR37	ENST00000381329	protein_coding
+chr10	1142207	1142208	T	C	stop_lost	*/R	250	250/249	1.00401606426	M10478	C/C	WDR37	ENST00000381329	protein_coding
+chr10	1142207	1142208	T	C	stop_lost	*/R	250	250/249	1.00401606426	M10500	C/C	WDR37	ENST00000381329	protein_coding
+chr10	1142207	1142208	T	C	stop_lost	*/R	250	250/249	1.00401606426	M128215	C/C	WDR37	ENST00000381329	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000531469	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000309035	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000494626	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000337195	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000334808	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000411419	protein_coding
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_mendel_errors_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,10 @@
+chrom	start	end	vcf_id	variant_id	anno_id	ref	alt	qual	filter	type	sub_type	call_rate	in_dbsnp	rs_ids	sv_cipos_start_left	sv_cipos_end_left	sv_cipos_start_right	sv_cipos_end_right	sv_length	sv_is_precise	sv_tool	sv_evidence_type	sv_event_id	sv_mate_id	sv_strand	in_omim	clinvar_sig	clinvar_disease_name	clinvar_dbsource	clinvar_dbsource_id	clinvar_origin	clinvar_dsdb	clinvar_dsdbid	clinvar_disease_acc	clinvar_in_locus_spec_db	clinvar_on_diag_assay	clinvar_causal_allele	pfam_domain	cyto_band	rmsk	in_cpg_island	in_segdup	is_conserved	gerp_bp_score	gerp_element_pval	num_hom_ref	num_het	num_hom_alt	num_unknown	aaf	hwe	inbreeding_coeff	pi	recomb_rate	gene	transcript	is_exonic	is_coding	is_splicing	is_lof	exon	codon_change	aa_change	aa_length	biotype	impact	impact_so	impact_severity	polyphen_pred	polyphen_score	sift_pred	sift_score	anc_allele	rms_bq	cigar	depth	strand_bias	rms_map_qual	in_hom_run	num_mapq_zero	num_alleles	num_reads_w_dels	haplotype_score	qual_depth	allele_count	allele_bal	in_hm2	in_hm3	is_somatic	somatic_score	in_esp	aaf_esp_ea	aaf_esp_aa	aaf_esp_all	exome_chip	in_1kg	aaf_1kg_amr	aaf_1kg_eas	aaf_1kg_sas	aaf_1kg_afr	aaf_1kg_eur	aaf_1kg_all	grc	gms_illumina	gms_solid	gms_iontorrent	in_cse	encode_tfbs	encode_dnaseI_cell_count	encode_dnaseI_cell_list	encode_consensus_gm12878	encode_consensus_h1hesc	encode_consensus_helas3	encode_consensus_hepg2	encode_consensus_huvec	encode_consensus_k562	vista_enhancers	cosmic_ids	info	cadd_raw	cadd_scaled	fitcons	in_exac	aaf_exac_all	aaf_adj_exac_all	aaf_adj_exac_afr	aaf_adj_exac_amr	aaf_adj_exac_eas	aaf_adj_exac_fin	aaf_adj_exac_nfe	aaf_adj_exac_oth	aaf_adj_exac_sas	exac_num_het	exac_num_hom_alt	exac_num_chroms	max_aaf_all	gts	gt_types	gt_phases	gt_depths	gt_ref_depths	gt_alt_depths	gt_quals	gt_copy_numbers	gt_phred_ll_homref	gt_phred_ll_het	gt_phred_ll_homalt	family_id	family_members	family_genotypes	samples	family_count	violation	violation_prob
+chr10	1142207	1142208	None	1	4	T	C	3404.30004883	None	snp	ts	1.0	1	rs10794716	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10p15.3	None	0	0	0	None	None	7	1	1	0	0.166666666667	0.0718606383197	0.6	0.294117647059	0.200924	WDR37	ENST00000381329	1	1	0	1	exon_10_1142110_1142566	Tga/Cga	*250R	249	protein_coding	stop_lost	stop_lost	HIGH	None	None	None	None	None	None	None	122	None	36.0	0	0	8	0.0	2.67470002174	27.8999996185	8	None	None	None	None	None	1	0.999534883721	0.975034044485	0.991234814701	0	1	0.9942	1	1	0.9561	1	0.98762	None	None	None	None	0	None	2	Osteobl;Progfib	T	T	T	T	T	T	None	None	None	None	None	0.156188	1	0.997	0.997067786838	0.970305592927	0.998358956642	1	1	0.999595432887	0.998898678414	1	346	60354	121410	1.0	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/T' 'T/T' 'T/T' 'C/C']	[0 0 1 0 0 0 0 0 3]	[False False False False False False False False False]	[38 29 23 38 29 22 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 22 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	T/T,T/T,T/C	1_kid	2	plausible de novo;implausible de novo	0.00000
+chr10	1142207	1142208	None	1	4	T	C	3404.30004883	None	snp	ts	1.0	1	rs10794716	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10p15.3	None	0	0	0	None	None	7	1	1	0	0.166666666667	0.0718606383197	0.6	0.294117647059	0.200924	WDR37	ENST00000381329	1	1	0	1	exon_10_1142110_1142566	Tga/Cga	*250R	249	protein_coding	stop_lost	stop_lost	HIGH	None	None	None	None	None	None	None	122	None	36.0	0	0	8	0.0	2.67470002174	27.8999996185	8	None	None	None	None	None	1	0.999534883721	0.975034044485	0.991234814701	0	1	0.9942	1	1	0.9561	1	0.98762	None	None	None	None	0	None	2	Osteobl;Progfib	T	T	T	T	T	T	None	None	None	None	None	0.156188	1	0.997	0.997067786838	0.970305592927	0.998358956642	1	1	0.999595432887	0.998898678414	1	346	60354	121410	1.0	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/T' 'T/T' 'T/T' 'C/C']	[0 0 1 0 0 0 0 0 3]	[False False False False False False False False False]	[38 29 23 38 29 22 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 22 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	3	3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)	T/T,T/T,C/C	3_kid	2	plausible de novo;implausible de novo	0.00000
+chr10	48003991	48003992	None	2	1	C	T	1047.86999512	None	snp	ts	1.0	1	rs142685947,rs3739968	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q11.22	None	0	1	1	None	3.10871e-42	2	6	1	0	0.444444444444	0.29371811258	-0.35	0.522875816993	1.718591	ASAH2C	ENST00000420079	1	1	0	0	exon_10_48003968_48004056	tGt/tAt	C540Y	610	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	165	None	20.9400005341	0	0	8	0.0	4.382999897	9.52999973297	4	None	None	None	None	None	0	None	None	None	0	1	0.3112	0.4573	0.3855	0.1241	0.5149	0.346645	grc_fix	73.3	40.3	92.8	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.553676	1	0.443	0.448537771896	0.288974151858	0.281426746944	0.543088975937	0.524984286612	0.478147713207	0.463529411765	0.418641164716	17495	15317	107302	0.543088975937	['C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T' 'C/T' 'C/T' 'C/T']	[1 1 3 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 23 38 29 23 38 29 23]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 23 37 29 23]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	C/C,C/C,C/T	2_kid	1	plausible de novo	0.00000
+chr10	48004991	48004992	None	3	1	C	T	1047.86999512	None	snp	ts	1.0	0	None	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q11.22	None	0	1	0	None	None	2	6	1	0	0.444444444444	0.29371811258	-0.35	0.522875816993	1.718591	ASAH2C	ENST00000420079	1	1	0	0	exon_10_48003968_48004056	tGt/tAt	C540Y	610	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	165	None	20.9400005341	0	0	8	0.0	4.382999897	9.52999973297	4	None	None	None	None	None	0	None	None	None	0	0	None	None	None	None	None	None	grc_fix	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.061011	0	None	None	None	None	None	None	None	None	None	None	None	None	-1.0	['C/T' 'C/T' 'C/T' 'C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T']	[1 1 1 1 1 3 0 0 1]	[False False False False False False False False False]	[38 29 23 38 29 23 38 29 23]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 23 37 29 23]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	3	3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)	C/C,C/C,C/T	3_kid	1	plausible de novo	0.00000
+chr10	135336655	135336656	None	4	1	G	A	38.3400001526	None	snp	ts	1.0	1	rs6537611	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	0	None	None	4	5	0	0	0.277777777778	0.248563248239	-0.384615384615	0.424836601307	0.43264	SPRN	ENST00000541506	0	0	0	0				151	protein_coding	intron_variant	intron_variant	LOW	None	None	None	None	None	None	None	2	None	37.0	4	0	4	0.0	0.0	19.1700000763	4	None	None	None	None	None	0	None	None	None	0	1	0.9957	1	1	0.9297	1	0.980831	None	None	None	None	0	None	None	None	R	R	R	R	unknown	R	None	None	None	None	None	0.056701	0	None	None	None	None	None	None	None	None	None	None	None	None	1.0	['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A']	[0 0 1 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 24 38 29 24 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 24 37 29 24 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	G/G,G/G,G/A	1_kid	2	plausible de novo;plausible de novo	0.00000
+chr10	135336655	135336656	None	4	1	G	A	38.3400001526	None	snp	ts	1.0	1	rs6537611	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	0	None	None	4	5	0	0	0.277777777778	0.248563248239	-0.384615384615	0.424836601307	0.43264	SPRN	ENST00000541506	0	0	0	0				151	protein_coding	intron_variant	intron_variant	LOW	None	None	None	None	None	None	None	2	None	37.0	4	0	4	0.0	0.0	19.1700000763	4	None	None	None	None	None	0	None	None	None	0	1	0.9957	1	1	0.9297	1	0.980831	None	None	None	None	0	None	None	None	R	R	R	R	unknown	R	None	None	None	None	None	0.056701	0	None	None	None	None	None	None	None	None	None	None	None	None	1.0	['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A']	[0 0 1 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 24 38 29 24 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 24 37 29 24 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	G/G,G/G,G/A	2_kid	2	plausible de novo;plausible de novo	0.00000
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	T/T,T/T,T/C	1_kid	3	plausible de novo;plausible de novo;plausible de novo	0.00000
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	3	3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)	T/T,T/T,T/C	3_kid	3	plausible de novo;plausible de novo;plausible de novo	0.00000
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	T/T,T/T,T/C	2_kid	3	plausible de novo;plausible de novo;plausible de novo	0.00000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_pathways_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,17 @@
+chrom	start	end	ref	alt	impact	sample	genotype	gene	transcript	pathway
+chr10	52004314	52004315	T	C	intron_variant	M10500	C/C	ASAH2	ENST00000329428	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	52004314	52004315	T	C	intron_variant	M128215	C/C	ASAH2	ENST00000329428	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	52004314	52004315	T	C	intron_variant	M10500	C/C	ASAH2	ENST00000447815	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	52004314	52004315	T	C	intron_variant	M128215	C/C	ASAH2	ENST00000447815	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	52004314	52004315	T	C	intron_variant	M10500	C/C	ASAH2	ENST00000395526	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	52004314	52004315	T	C	intron_variant	M128215	C/C	ASAH2	ENST00000395526	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	126678091	126678092	G	A	stop_gained	M128215	G/A	CTBP2	ENST00000531469	hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10	126678091	126678092	G	A	stop_gained	M128215	G/A	CTBP2	ENST00000309035	hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10	126678091	126678092	G	A	stop_gained	M128215	G/A	CTBP2	ENST00000494626	hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10	126678091	126678092	G	A	stop_gained	M128215	G/A	CTBP2	ENST00000337195	hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10	126678091	126678092	G	A	stop_gained	M128215	G/A	CTBP2	ENST00000411419	hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10	135336655	135336656	G	A	intron_variant	M10478	A/A	CYP2E1	ENST00000463117	hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr10	135336655	135336656	G	A	intron_variant	M128215	A/A	CYP2E1	ENST00000463117	hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr10	135336655	135336656	G	A	upstream_gene_variant	M10478	A/A	CYP2E1	ENST00000252945	hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr10	135336655	135336656	G	A	upstream_gene_variant	M128215	A/A	CYP2E1	ENST00000252945	hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr16	72057434	72057435	C	T	missense_variant	M10475	C/T	DHODH	ENST00000219240	hsa01100:Metabolic_pathways,hsa00240:Pyrimidine_metabolism
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_qc_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,5 @@
+sample	sex	chrX_homref	chrX_het	chrX_homalt	chrX_unknown
+M10475	male	0	0	0	0
+M10478	female	0	0	0	0
+M10500	female	0	0	0	0
+M128215	male	0	0	0	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_query_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,10 @@
+10582
+10610
+13301
+13326
+13956
+13979
+30922
+46401
+47189
+51475
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_region_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,1 @@
+chr10	48003991	48003992	None	2	1	C	T	1047.86999512	None	snp	ts	1.0	1	rs142685947,rs3739968	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q11.22	None	0	1	1	None	3.10871e-42	1	2	1	0	0.5	1	0	0.571428571429	1.718591	ASAH2C	ENST00000420079	1	1	0	0	16/17	tGt/tAt	C/Y	542/612	protein_coding	missense_variant	missense_variant	MED	benign	0.0	tolerated	1.0	None	None	None	165	None	20.9400005341	0	0	8	0.0	4.382999897	9.52999973297	4	None	None	None	None	None	0	None	None	None	0	1	0.3112	0.4573	0.3855	0.1241	0.5149	0.346645	grc_fix	73.3	40.3	92.8	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.553676	1	0.443	0.448537771896	0.288974151858	0.281426746944	0.543088975937	0.524984286612	0.478147713207	0.463529411765	0.418641164716	17495	15317	107302	1	0.543088975937
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_roh_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,7 @@
+chrom	start	end	sample	num_of_snps	density_per_kb	run_length_in_bp
+chr10	1142208	135369532	M10475	5	0.0	134227324
+chr10	1142208	135210791	M10500	5	0.0001	134068583
+chr10	1142208	135210791	M10478	5	0.0001	134068583
+chr10	1142208	135336656	M10478	4	0.0	134194448
+chr10	1142208	135336656	M128215	6	0.0001	134194448
+chr10	1142208	135369532	M128215	5	0.0	134227324
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_stats_result.tabular	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,5 @@
+sample	total
+M10475	3
+M10478	6
+M10500	6
+M128215	4
Binary file test-data/gemini_windower_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gemini_databases.loc.sample	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,3 @@
+## GEMINI databases
+#Version	dbkey	Description	Path
+#08_08_2014	hg19	Database (08-08-2014)	/path/to/data
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <table name="gemini_databases" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gemini_databases.loc" />
+    </table>
+</tables>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Feb 18 08:56:22 2016 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="gemini" version="0.18.1">
+        <repository changeset_revision="be869e11582f" name="package_gemini_0_18_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>