changeset 0:ac761838cdaf draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
author iuc
date Thu, 18 Feb 2016 08:55:47 -0500
parents
children a0ee6874a23b
files gemini_macros.xml gemini_stats.xml readme.rst repository_dependencies.xml static/images/gemini_mendel_errors.png test-data/anno.bed test-data/gemini_actionable_mutations_result.tabular test-data/gemini_amend_input.db test-data/gemini_amend_input.ped test-data/gemini_amend_result.db test-data/gemini_annotate_input.db test-data/gemini_annotate_result.tabular test-data/gemini_autosomal_dominant_result.tabular test-data/gemini_autosomal_input.db test-data/gemini_autosomal_recessive.tabular test-data/gemini_burden_input.db test-data/gemini_burden_result.tabular test-data/gemini_comphets_input.db test-data/gemini_comphets_result.tabular test-data/gemini_dbinfo_result.tabular test-data/gemini_de_novo_input.db test-data/gemini_de_novo_result.tabular test-data/gemini_dump_result.tabular test-data/gemini_fusions_result.tabular test-data/gemini_gene_wise_result.tabular test-data/gemini_interactions_result.tabular test-data/gemini_is_somatic_result.db test-data/gemini_load_input.vcf test-data/gemini_load_result.db test-data/gemini_lofsieve_result.tabular test-data/gemini_mendel_errors_result.tabular test-data/gemini_pathways_result.tabular test-data/gemini_qc_result.tabular test-data/gemini_query_result.tabular test-data/gemini_region_result.tabular test-data/gemini_roh_result.tabular test-data/gemini_stats_result.tabular test-data/gemini_windower_input.db tool-data/gemini_databases.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 40 files changed, 868 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_macros.xml	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,146 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="0.18.1">gemini</requirement>
+            <yield />
+        </requirements>
+    </xml>
+
+    <xml name="version_command">
+        <version_command>gemini --version</version_command>
+    </xml>
+
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+            <regex match="Error:" />
+            <regex match="Exception:" />
+        </stdio>
+    </xml>
+
+    <xml name="annotation_dir">
+        <param name="annotation_databases" type="select" optional="True" label="Choose a gemini annotation database">
+            <options from_data_table="gemini_databases">
+                <filter type="sort_by" column="0" />
+            </options>
+        </param>
+    </xml>
+
+    <xml name="add_header_column">
+        <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" 
+            label="Add a header of column names to the output" help="(--header)"/>
+    </xml>
+
+    <xml name="radius">
+        <param name="radius" type="integer" value="3" label="Set filter for Breadth-first search (BFS) in the Protein-Protein Interaction network" help="(-r)" >
+            <validator type="in_range" min="0"/>
+        </param>
+    </xml>
+    <xml name="variant_mode">
+        <param name="variant_mode" type="boolean" truevalue="--var" falsevalue="" checked="False" 
+            label="Returns variant info (e.g. impact, biotype) for interacting genes" help="(--var)"/>
+    </xml>
+
+    <xml name="column_filter">
+        <conditional name="report">
+            <param name="report_selector" type="select" label="Columns to include in the report"
+                help="By default, this tool reports all columns in the variants table. One may choose to report only a subset of the columns.">
+                <option value="all" selected="True">all</option>
+                <option value="column_filter">User given columns</option>
+            </param>
+            <when value="all"/>
+            <when value="column_filter">
+                <param name="columns" type="select" display="checkboxes" multiple="True" label="Choose columns to include in the report" help="(--columns)">
+                    <option value="gene">gene</option>
+                    <option value="chrom">chrom</option>
+                    <option value="start">start</option>
+                    <option value="end">end</option>
+                    <option value="ref">ref</option>
+                    <option value="alt">alt</option>
+                    <option value="impact">impact</option>
+                    <option value="impact_severity">impact_severity</option>
+                    <option value="max_aaf_all">alternative allele frequency</option>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="filter">
+        <conditional name="filter">
+            <param name="filter_selector" type="select" label="Apply additional constraints"
+                help="By default, this tool will report all variants regardless of their putative functional impact. In order to apply additional constraints on the variants returned, you can this optional filter.">
+                <option value="no">No additional constraints</option>
+                <option value="yes">Apply additional constraints</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param name="filter" type="text" label="Contraints in SQL syntax" help="Conditions applied here will become WHERE clauses in the query issued to the GEMINI database. E.g. alt='G' or impact_severity = 'HIGH'. (--filter)">
+                    <expand macro="sanitize_query" />
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="sanitize_query">
+        <sanitizer invalid_char="">
+            <valid initial="string.printable"/>
+       </sanitizer>
+    </xml>
+
+    <token name="@CMDLN_SQL_FILTER_FILTER_OPTION@">
+        #if str($filter.filter_selector) == 'yes' and $filter.filter:
+            #import pipes
+            --filter ${ pipes.quote( str( $filter.filter ) ) or "''" }
+        #end if
+    </token>
+
+    <xml name="family">
+        <param name="families" type="text" value="" label="Comma seperated list of families to restrict the analysis to." help="e.g. Family1,Family3 (--families)"/>
+    </xml>
+
+    <xml name="lenient">
+        <param name="lenient" type="boolean" truevalue="--lenient" falsevalue="" checked="False" label="Loosen the restrictions on family structure"/>
+    </xml>
+
+    <xml name="unaffected">
+        <param name="allow_unaffected" type="boolean" truevalue="--allow-unaffected" falsevalue="" checked="False" label="Report candidates that also impact samples labeled as unaffected." help="(--allow-unaffected)"/>
+    </xml>
+
+    <xml name="min_kindreds">
+        <param name="min_kindreds" type="integer" value="1" label="The min. number of kindreds that must have a candidate variant in a gene" help="default: 1 (--min-kindreds)" />
+    </xml>
+
+    <xml name="min_sequence_depth">
+        <param name="d" type="integer" value="0" min="0" label="The minimum aligned sequence depth (genotype DP) required for each sample"
+                help="default: 0 (-d)" />
+    </xml>
+
+    <xml name="min_gq">
+        <param name="min_gq" type="integer" value="0" label="the minimum genotype quality required for each sample in a family" help="default: 0 (--min-gq)">
+            <validator type="in_range" min="0"/>
+        </param>
+    </xml>
+
+    <xml name="gt_pl_max">
+        <param name="gt_pl_max" type="integer" value="-1" min="-1" label="The maximum phred-scaled genotype likelihod (PL) allowed for each sample in a family" help="default: -1 (not set) (--gt-pl-max)" />
+    </xml>
+    <token name="@VERSION@">0.18.1</token>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1003153</citation>
+            <yield />
+        </citations>
+    </xml>
+
+    <xml name="infile">
+        <param name="infile" type="data" format="gemini.sqlite" label="GEMINI database" help="Only files with version @VERSION@ are accepted." >
+            <options options_filter_attribute="metadata.gemini_version" >
+                <filter type="add_value" value="@VERSION@" />
+            </options>
+            <validator type="expression" message="This version of Gemini will only work with Gemini files that are for version @VERSION@.">value is not None and value.metadata.gemini_version == "@VERSION@"</validator>
+        </param>
+    </xml>
+
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_stats.xml	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,125 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Compute useful variant statistics</description>
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">stats</token>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+        gemini @BINARY@
+            $stats_type
+
+            #if $gt_filter.strip():
+                --gt-filter "${gt_filter}"
+            #end if
+
+            #if $summarize.strip():
+                --gt-filter "${gt_filter}"
+            #end if
+
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <inputs>
+        <expand macro="infile" />
+
+        <param name="stats_type" type="select" label="Studying ..." help="">
+            <option value="--tstv">Compute the transition and transversion ratios for the snps (--tstv)</option>
+            <option value="--tstv-coding">Compute the transition/transversion ratios for the snps in the coding regions (--tstv-coding)</option>
+            <option value="--tstv-noncoding">Compute the transition/transversion ratios for the snps in the non-coding regions (--tstv-noncoding)</option>
+            <option value="--snp-counts">Compute the type and count of the snps (--snp-counts)</option>
+            <option value="--sfs">Calculate the site frequency spectrum of the variants (--sfs)</option>
+            <option value="--mds">Compute the pair-wise genetic distance between each sample (--mds)</option>
+            <option value="--vars-by-sample">Return the total variants per sample, sum of homozygous and heterozygous variants (--vars-by-sample)</option>
+            <option value="--gts-by-sample">Return the count of each genotype class observed per sample (--gts-by-sample)</option>
+        </param>
+
+        <param name="gt_filter" type="text" area="True" size="5x50" label="Restrictions to apply to genotype values" help="(--gt-filer)">
+            <expand macro="sanitize_query" />
+        </param>
+
+        <param name="summarize" type="text" area="True" size="5x50" label="The query to be issued to the database to summarize" help="(--summarize)">
+            <expand macro="sanitize_query" />
+        </param>
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="gemini_burden_input.db" ftype="gemini.sqlite" />
+            <param name="stats_type" value="--vars-by-sample" />
+            <output name="outfile" file="gemini_stats_result.tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+The stats tool computes some useful variant statistics for a GEMINI database.
+Like computing the transition and transversion ratios for the snps.
+
+**Settings and examples**
+
+--tstv-coding:
+ Compute the transition/transversion ratios for the snps in the coding regions.
+
+--tstv-noncoding:
+ Compute the transition/transversion ratios for the snps in the non-coding regions.
+
+EXAMPLE Compute the type and count of the snps; --snp-counts::
+
+ type    count
+ A->G    2
+ C->T    1
+ G->A    1
+
+EXAMPLE Calculate the site frequency spectrum of the variants; --sfs::
+
+ aaf     count
+ 0.125   2
+ 0.375   1
+
+EXAMPLE Compute the pair-wise genetic distance between each sample; --mds::
+
+ sample1  sample2  distance
+ M10500   M10500   0.0
+ M10475   M10478   1.25
+ M10500   M10475   2.0
+ M10500   M10478   0.5714
+
+EXAMPLE Return a count of the types of genotypes per sample; --gts-by-sample::
+
+ sample   num_hom_ref   num_het   num_hom_alt   num_unknown   total
+ M10475   4             1         3             1             9
+ M10478   2             2         4             1             9
+
+
+
+EXAMPLE Return the total variants per sample (sum of homozygous and heterozygous variants); --vars-by-sample::
+
+ sample  total
+ M10475  4
+ M10478  6
+
+**Final solution**
+
+--summarize:
+ If none of these tools are exactly what you want, you can summarize the variants per sample of an arbitrary query using the –summarize flag. 
+
+EXAMPLE If you wanted to know, for each sample, how many variants are on chromosome 1 that are also in dbSNP;--summarize "select * from variants where in_dbsnp=1 and chrom='chr1'":: 
+
+ sample   total  num_het  num_hom_alt
+ M10475   1      1        0
+ M128215  1      1        0
+ M10478   2      2        0
+ M10500   2      1        1
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,65 @@
+=========================
+Galaxy wrapper for GEMINI
+=========================
+
+
+GEMINI: a flexible framework for exploring genome variation
+
+GEMINI (GEnome MINIng) is designed to be a flexible framework for exploring genetic variation in the context of 
+the wealth of genome annotations available for the human genome. By placing genetic variants, sample genotypes, 
+and useful genome annotations into an integrated database framework, GEMINI provides a simple, flexible, yet very 
+powerful system for exploring genetic variation for for disease and population genetics.
+
+Using the GEMINI framework begins by loading a VCF file into a database. Each variant is automatically 
+annotated by comparing it to several genome annotations from source such as ENCODE tracks, UCSC tracks, 
+OMIM, dbSNP, KEGG, and HPRD. All of this information is stored in portable SQLite database that allows 
+one to explore and interpret both coding and non-coding variation using “off-the-shelf” tools or an 
+enhanced SQL engine.
+
+Please also see the original [manuscript](http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003153).
+
+
+============
+Installation
+============
+
+It is recommended to install this wrapper via the `Galaxy Tool Shed`.
+
+.. _`Galaxy Tool Shed`:  https://testtoolshed.g2.bx.psu.edu/view/iuc/gemini
+
+
+=======
+History
+=======
+- 0.9.1: Initial public release
+
+
+====================
+Detailed description
+====================
+
+View the original GEMINI documentation: http://gemini.readthedocs.org/en/latest/index.html
+
+
+===============================
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the GEMINI data manager definition to install all required annotation databases.">
+    <repository changeset_revision="345412d58d75" name="data_manager_gemini_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>
Binary file static/images/gemini_mendel_errors.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/anno.bed	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,3 @@
+chr1	30547	30548
+chr1	30920	30925
+chr1	30922	30923
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_actionable_mutations_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,1 @@
+tum_name	chrom	start	end	ref	alt	gene	impact	is_somatic	in_cosmic_census	dgidb_info
Binary file test-data/gemini_amend_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_amend_input.ped	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,5 @@
+#family_id name paternal_id maternal_id sex phenotype ethnicity hair_color
+1 M10475    None None  1    1    None	brown
+1 M10478     M10475  M10500    2    2    None	red
+1 M10500     None    None    2    2    None	
+1 M128215    M10475  M10500    1    1    None	green
Binary file test-data/gemini_amend_result.db has changed
Binary file test-data/gemini_annotate_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_annotate_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,1 @@
+updated 10 variants
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_autosomal_dominant_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,9 @@
+gene	chrom	impact	variant_id	family_id	family_members	family_genotypes	samples	family_count
+ASAH2C	chr10	missense_variant	3	3	3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected)	C/T,C/C,C/T	3_dad,3_kid	2
+ASAH2C	chr10	missense_variant	3	2	2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected)	C/C,C/T,C/T	2_mom,2_kid	2
+ASAH2C	chr10	missense_variant	4	3	3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected)	C/T,C/C,C/T	3_dad,3_kid	2
+ASAH2C	chr10	missense_variant	4	2	2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected)	C/C,C/T,C/T	2_mom,2_kid	2
+SPRN	chr10	intron_variant	5	3	3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected)	G/A,G/G,G/A	3_dad,3_kid	1
+WDR37	chr10	stop_lost	1	3	3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected)	T/C,T/T,T/C	3_dad,3_kid	2
+WDR37	chr10	stop_lost	1	2	2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected)	T/T,T/C,T/C	2_mom,2_kid	2
+WDR37	chr10	stop_lost	2	3	3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected)	T/C,T/C,T/C	3_dad,3_kid	2
Binary file test-data/gemini_autosomal_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_autosomal_recessive.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,2 @@
+gene	chrom	impact	variant_id	family_id	family_members	family_genotypes	samples	family_count
+WDR37	chr10	stop_lost	2	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	T/C,T/C,C/C	1_kid	1
Binary file test-data/gemini_burden_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_burden_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,6 @@
+gene	T	c	Z	p_value
+SYCE1	-0.5	0.25	-1.0	0.00699300699301
+DHODH	0.0	0.0	nan	nan
+WDR37	-1.0	1.5	-0.816496580928	0.00699300699301
+ASAH2C	-0.5	0.75	-0.57735026919	0.00699300699301
+CTBP2	0.0	0.0	nan	nan
Binary file test-data/gemini_comphets_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_comphets_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,3 @@
+chrom	start	end	ref	alt	gene	impact	variant_id	family_id	family_members	family_genotypes	samples	family_count	comp_het_id	priority
+chr1	17362	17366	TTCT	T	WASH7P	splice_acceptor_variant	3	4	child_4(child_4;affected;male),dad_4(dad_4;unaffected;male),mom_4(mom_4;unaffected;female)	TTCT|T,TTCT/T,TTCT/TTCT	child_4	1	1_3_7	3
+chr1	17729	17730	C	A	WASH7P	splice_acceptor_variant	7	4	child_4(child_4;affected;male),dad_4(dad_4;unaffected;male),mom_4(mom_4;unaffected;female)	C/A,C/A,C/A	child_4	1	1_3_7	3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_dbinfo_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,209 @@
+table_name	column_name	type	
+variants	chrom	text	
+variants	start	integer	
+variants	end	integer	
+variants	vcf_id	text	
+variants	variant_id	integer	
+variants	anno_id	integer	
+variants	ref	text	
+variants	alt	text	
+variants	qual	float	
+variants	filter	text	
+variants	type	text	
+variants	sub_type	text	
+variants	gts	blob	
+variants	gt_types	blob	
+variants	gt_phases	blob	
+variants	gt_depths	blob	
+variants	gt_ref_depths	blob	
+variants	gt_alt_depths	blob	
+variants	gt_quals	blob	
+variants	gt_copy_numbers	blob	
+variants	gt_phred_ll_homref	blob	
+variants	gt_phred_ll_het	blob	
+variants	gt_phred_ll_homalt	blob	
+variants	call_rate	float	
+variants	in_dbsnp	bool	
+variants	rs_ids	text	
+variants	sv_cipos_start_left	integer	
+variants	sv_cipos_end_left	integer	
+variants	sv_cipos_start_right	integer	
+variants	sv_cipos_end_right	integer	
+variants	sv_length	integer	
+variants	sv_is_precise	bool	
+variants	sv_tool	text	
+variants	sv_evidence_type	text	
+variants	sv_event_id	text	
+variants	sv_mate_id	text	
+variants	sv_strand	text	
+variants	in_omim	bool	
+variants	clinvar_sig	text	
+variants	clinvar_disease_name	text	
+variants	clinvar_dbsource	text	
+variants	clinvar_dbsource_id	text	
+variants	clinvar_origin	text	
+variants	clinvar_dsdb	text	
+variants	clinvar_dsdbid	text	
+variants	clinvar_disease_acc	text	
+variants	clinvar_in_locus_spec_db	bool	
+variants	clinvar_on_diag_assay	bool	
+variants	clinvar_causal_allele	text	
+variants	pfam_domain	text	
+variants	cyto_band	text	
+variants	rmsk	text	
+variants	in_cpg_island	bool	
+variants	in_segdup	bool	
+variants	is_conserved	bool	
+variants	gerp_bp_score	float	
+variants	gerp_element_pval	float	
+variants	num_hom_ref	integer	
+variants	num_het	integer	
+variants	num_hom_alt	integer	
+variants	num_unknown	integer	
+variants	aaf	real	
+variants	hwe	decimal(2,7)
+variants	inbreeding_coeff	decimal(2,7)
+variants	pi	decimal(2,7)
+variants	recomb_rate	decimal(2,7)
+variants	gene	text	
+variants	transcript	text	
+variants	is_exonic	bool	
+variants	is_coding	bool	
+variants	is_splicing	bool	
+variants	is_lof	bool	
+variants	exon	text	
+variants	codon_change	text	
+variants	aa_change	text	
+variants	aa_length	text	
+variants	biotype	text	
+variants	impact	text	
+variants	impact_so	text	
+variants	impact_severity	text	
+variants	polyphen_pred	text	
+variants	polyphen_score	float	
+variants	sift_pred	text	
+variants	sift_score	float	
+variants	anc_allele	text	
+variants	rms_bq	float	
+variants	cigar	text	
+variants	depth	integer	
+variants	strand_bias	float	
+variants	rms_map_qual	float	
+variants	in_hom_run	integer	
+variants	num_mapq_zero	integer	
+variants	num_alleles	integer	
+variants	num_reads_w_dels	float	
+variants	haplotype_score	float	
+variants	qual_depth	float	
+variants	allele_count	integer	
+variants	allele_bal	float	
+variants	in_hm2	bool	
+variants	in_hm3	bool	
+variants	is_somatic	bool	
+variants	somatic_score	float	
+variants	in_esp	bool	
+variants	aaf_esp_ea	decimal(2,7)
+variants	aaf_esp_aa	decimal(2,7)
+variants	aaf_esp_all	decimal(2,7)
+variants	exome_chip	bool	
+variants	in_1kg	bool	
+variants	aaf_1kg_amr	decimal(2,7)
+variants	aaf_1kg_eas	decimal(2,7)
+variants	aaf_1kg_sas	decimal(2,7)
+variants	aaf_1kg_afr	decimal(2,7)
+variants	aaf_1kg_eur	decimal(2,7)
+variants	aaf_1kg_all	decimal(2,7)
+variants	grc	text	
+variants	gms_illumina	float	
+variants	gms_solid	float	
+variants	gms_iontorrent	float	
+variants	in_cse	bool	
+variants	encode_tfbs	text	
+variants	encode_dnaseI_cell_count	integer	
+variants	encode_dnaseI_cell_list	text	
+variants	encode_consensus_gm12878	text	
+variants	encode_consensus_h1hesc	text	
+variants	encode_consensus_helas3	text	
+variants	encode_consensus_hepg2	text	
+variants	encode_consensus_huvec	text	
+variants	encode_consensus_k562	text	
+variants	vista_enhancers	text	
+variants	cosmic_ids	text	
+variants	info	blob	
+variants	cadd_raw	float	
+variants	cadd_scaled	float	
+variants	fitcons	float	
+variants	in_exac	bool	
+variants	aaf_exac_all	decimal(2,7)
+variants	aaf_adj_exac_all	decimal(2,7)
+variants	aaf_adj_exac_afr	decimal(2,7)
+variants	aaf_adj_exac_amr	decimal(2,7)
+variants	aaf_adj_exac_eas	decimal(2,7)
+variants	aaf_adj_exac_fin	decimal(2,7)
+variants	aaf_adj_exac_nfe	decimal(2,7)
+variants	aaf_adj_exac_oth	decimal(2,7)
+variants	aaf_adj_exac_sas	decimal(2,7)
+variants	exac_num_het	int	
+variants	exac_num_hom_alt	int	
+variants	exac_num_chroms	int	
+variants	max_aaf_all	REAL	
+variant_impacts	variant_id	integer	
+variant_impacts	anno_id	integer	
+variant_impacts	gene	text	
+variant_impacts	transcript	text	
+variant_impacts	is_exonic	bool	
+variant_impacts	is_coding	bool	
+variant_impacts	is_splicing	bool	
+variant_impacts	is_lof	bool	
+variant_impacts	exon	text	
+variant_impacts	codon_change	text	
+variant_impacts	aa_change	text	
+variant_impacts	aa_length	text	
+variant_impacts	biotype	text	
+variant_impacts	impact	text	
+variant_impacts	impact_so	text	
+variant_impacts	impact_severity	text	
+variant_impacts	polyphen_pred	text	
+variant_impacts	polyphen_score	float	
+variant_impacts	sift_pred	text	
+variant_impacts	sift_score	float	
+samples	sample_id	integer	
+samples	family_id	text	
+samples	name	text	
+samples	paternal_id	text	
+samples	maternal_id	text	
+samples	sex	text	
+samples	phenotype	text	
+samples	ethnicity	text	
+gene_detailed	uid	integer	
+gene_detailed	chrom	text	
+gene_detailed	gene	text	
+gene_detailed	is_hgnc	bool	
+gene_detailed	ensembl_gene_id	text	
+gene_detailed	transcript	text	
+gene_detailed	biotype	text	
+gene_detailed	transcript_status	text	
+gene_detailed	ccds_id	text	
+gene_detailed	hgnc_id	text	
+gene_detailed	entrez_id	text	
+gene_detailed	cds_length	text	
+gene_detailed	protein_length	text	
+gene_detailed	transcript_start	text	
+gene_detailed	transcript_end	text	
+gene_detailed	strand	text	
+gene_detailed	synonym	text	
+gene_detailed	rvis_pct	float	
+gene_detailed	mam_phenotype_id	text	
+gene_summary	uid	integer	
+gene_summary	chrom	text	
+gene_summary	gene	text	
+gene_summary	is_hgnc	bool	
+gene_summary	ensembl_gene_id	text	
+gene_summary	hgnc_id	text	
+gene_summary	transcript_min_start	text	
+gene_summary	transcript_max_end	text	
+gene_summary	strand	text	
+gene_summary	synonym	text	
+gene_summary	rvis_pct	float	
+gene_summary	mam_phenotype_id	text	
+gene_summary	in_cosmic_census	bool	
Binary file test-data/gemini_de_novo_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_de_novo_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,9 @@
+chrom	start	end	vcf_id	variant_id	anno_id	ref	alt	qual	filter	type	sub_type	call_rate	in_dbsnp	rs_ids	sv_cipos_start_left	sv_cipos_end_left	sv_cipos_start_right	sv_cipos_end_right	sv_length	sv_is_precise	sv_tool	sv_evidence_type	sv_event_id	sv_mate_id	sv_strand	in_omim	clinvar_sig	clinvar_disease_name	clinvar_dbsource	clinvar_dbsource_id	clinvar_origin	clinvar_dsdb	clinvar_dsdbid	clinvar_disease_acc	clinvar_in_locus_spec_db	clinvar_on_diag_assay	clinvar_causal_allele	pfam_domain	cyto_band	rmsk	in_cpg_island	in_segdup	is_conserved	gerp_bp_score	gerp_element_pval	num_hom_ref	num_het	num_hom_alt	num_unknown	aaf	hwe	inbreeding_coeff	pi	recomb_rate	gene	transcript	is_exonic	is_coding	is_splicing	is_lof	exon	codon_change	aa_change	aa_length	biotype	impact	impact_so	impact_severity	polyphen_pred	polyphen_score	sift_pred	sift_score	anc_allele	rms_bq	cigar	depth	strand_bias	rms_map_qual	in_hom_run	num_mapq_zero	num_alleles	num_reads_w_dels	haplotype_score	qual_depth	allele_count	allele_bal	in_hm2	in_hm3	is_somatic	somatic_score	in_esp	aaf_esp_ea	aaf_esp_aa	aaf_esp_all	exome_chip	in_1kg	aaf_1kg_amr	aaf_1kg_eas	aaf_1kg_sas	aaf_1kg_afr	aaf_1kg_eur	aaf_1kg_all	grc	gms_illumina	gms_solid	gms_iontorrent	in_cse	encode_tfbs	encode_dnaseI_cell_count	encode_dnaseI_cell_list	encode_consensus_gm12878	encode_consensus_h1hesc	encode_consensus_helas3	encode_consensus_hepg2	encode_consensus_huvec	encode_consensus_k562	vista_enhancers	cosmic_ids	info	cadd_raw	cadd_scaled	fitcons	in_exac	aaf_exac_all	aaf_adj_exac_all	aaf_adj_exac_afr	aaf_adj_exac_amr	aaf_adj_exac_eas	aaf_adj_exac_fin	aaf_adj_exac_nfe	aaf_adj_exac_oth	aaf_adj_exac_sas	exac_num_het	exac_num_hom_alt	exac_num_chroms	max_aaf_all	gts	gt_types	gt_phases	gt_depths	gt_ref_depths	gt_alt_depths	gt_quals	gt_copy_numbers	gt_phred_ll_homref	gt_phred_ll_het	gt_phred_ll_homalt	family_id	family_members	family_genotypes	samples	family_count
+chr10	48003991	48003992	None	2	1	C	T	1047.86999512	None	snp	ts	1.0	1	rs142685947,rs3739968	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q11.22	None	0	1	1	None	3.10871e-42	2	6	1	0	0.444444444444	0.29371811258	-0.35	0.522875816993	1.718591	ASAH2C	ENST00000420079	1	1	0	0	exon_10_48003968_48004056	tGt/tAt	C540Y	610	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	165	None	20.9400005341	0	0	8	0.0	4.382999897	9.52999973297	4	None	None	None	None	None	0	None	None	None	0	1	0.3112	0.4573	0.3855	0.1241	0.5149	0.346645	grc_fix	73.3	40.3	92.8	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.553676	1	0.443	0.448537771896	0.288974151858	0.281426746944	0.543088975937	0.524984286612	0.478147713207	0.463529411765	0.418641164716	17495	15317	107302	0.543088975937	['C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T' 'C/T' 'C/T' 'C/T']	[1 1 3 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 23 38 29 23 38 29 23]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 23 37 29 23]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	C/C,C/C,C/T	2_kid	2
+chr10	48004991	48004992	None	3	1	C	T	1047.86999512	None	snp	ts	1.0	0	None	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q11.22	None	0	1	0	None	None	2	6	1	0	0.444444444444	0.29371811258	-0.35	0.522875816993	1.718591	ASAH2C	ENST00000420079	1	1	0	0	exon_10_48003968_48004056	tGt/tAt	C540Y	610	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	165	None	20.9400005341	0	0	8	0.0	4.382999897	9.52999973297	4	None	None	None	None	None	0	None	None	None	0	0	None	None	None	None	None	None	grc_fix	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.061011	0	None	None	None	None	None	None	None	None	None	None	None	None	-1.0	['C/T' 'C/T' 'C/T' 'C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T']	[1 1 1 1 1 3 0 0 1]	[False False False False False False False False False]	[38 29 23 38 29 23 38 29 23]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 23 37 29 23]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	3	3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)	C/C,C/C,C/T	3_kid	2
+chr10	135336655	135336656	None	4	1	G	A	38.3400001526	None	snp	ts	1.0	1	rs6537611	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	0	None	None	4	5	0	0	0.277777777778	0.248563248239	-0.384615384615	0.424836601307	0.43264	SPRN	ENST00000541506	0	0	0	0				151	protein_coding	intron_variant	intron_variant	LOW	None	None	None	None	None	None	None	2	None	37.0	4	0	4	0.0	0.0	19.1700000763	4	None	None	None	None	None	0	None	None	None	0	1	0.9957	1	1	0.9297	1	0.980831	None	None	None	None	0	None	None	None	R	R	R	R	unknown	R	None	None	None	None	None	0.056701	0	None	None	None	None	None	None	None	None	None	None	None	None	1.0	['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A']	[0 0 1 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 24 38 29 24 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 24 37 29 24 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	G/G,G/G,G/A	1_kid	2
+chr10	135336655	135336656	None	4	1	G	A	38.3400001526	None	snp	ts	1.0	1	rs6537611	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	0	None	None	4	5	0	0	0.277777777778	0.248563248239	-0.384615384615	0.424836601307	0.43264	SPRN	ENST00000541506	0	0	0	0				151	protein_coding	intron_variant	intron_variant	LOW	None	None	None	None	None	None	None	2	None	37.0	4	0	4	0.0	0.0	19.1700000763	4	None	None	None	None	None	0	None	None	None	0	1	0.9957	1	1	0.9297	1	0.980831	None	None	None	None	0	None	None	None	R	R	R	R	unknown	R	None	None	None	None	None	0.056701	0	None	None	None	None	None	None	None	None	None	None	None	None	1.0	['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A']	[0 0 1 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 24 38 29 24 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 24 37 29 24 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	G/G,G/G,G/A	2_kid	2
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	T/T,T/T,T/C	1_kid	3
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	3	3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)	T/T,T/T,T/C	3_kid	3
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	T/T,T/T,T/C	2_kid	3
+chr10	1142207	1142208	None	1	4	T	C	3404.30004883	None	snp	ts	1.0	1	rs10794716	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10p15.3	None	0	0	0	None	None	7	1	1	0	0.166666666667	0.0718606383197	0.6	0.294117647059	0.200924	WDR37	ENST00000381329	1	1	0	1	exon_10_1142110_1142566	Tga/Cga	*250R	249	protein_coding	stop_lost	stop_lost	HIGH	None	None	None	None	None	None	None	122	None	36.0	0	0	8	0.0	2.67470002174	27.8999996185	8	None	None	None	None	None	1	0.999534883721	0.975034044485	0.991234814701	0	1	0.9942	1	1	0.9561	1	0.98762	None	None	None	None	0	None	2	Osteobl;Progfib	T	T	T	T	T	T	None	None	None	None	None	0.156188	1	0.997	0.997067786838	0.970305592927	0.998358956642	1	1	0.999595432887	0.998898678414	1	346	60354	121410	1.0	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/T' 'T/T' 'T/T' 'C/C']	[0 0 1 0 0 0 0 0 3]	[False False False False False False False False False]	[38 29 23 38 29 22 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 22 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	T/T,T/T,T/C	1_kid	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_dump_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,97 @@
+chrom	start	end	ref	alt	type	sub_type	aaf	in_dbsnp	gene	sample	genotype
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 child_1	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 child_2	G/G
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 dad_2	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 mom_2	G/G
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 dad_1	G/G
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 mom_1	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 child_3	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 dad_3	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 mom_3	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 child_4	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 dad_4	G/A
+chr1	16976	16977	G	A	snp	ts	0.375	0	DDX11L1 mom_4	G/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 child_1	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 child_2	A/G
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 dad_2	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 mom_2	A/G
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 dad_1	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 mom_1	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 child_3	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 dad_3	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 mom_3	A/G
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 child_4	A/G
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 dad_4	A/A
+chr1	17221	17222	A	G	snp	ts	0.166666666667	1	DDX11L1 mom_4	A/A
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P child_1	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P child_2	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P dad_2	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P mom_2	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P dad_1	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P mom_1	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P child_3	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P dad_3	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P mom_3	TTCT/TTCT
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P child_4	TTCT/T
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P dad_4	TTCT/T
+chr1	17362	17366	TTCT	T	indel	del	0.0833333333333	0	WASH7P mom_4	TTCT/TTCT
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 child_1	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 child_2	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 dad_2	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 mom_2	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 dad_1	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 mom_1	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 child_3	G/A
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 dad_3	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 mom_3	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 child_4	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 dad_4	G/G
+chr1	17562	17563	G	A	snp	ts	0.0416666666667	0	DDX11L1 mom_4	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 child_1	G/C
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 child_2	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 dad_2	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 mom_2	G/C
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 dad_1	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 mom_1	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 child_3	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 dad_3	G/C
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 mom_3	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 child_4	G/C
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 dad_4	G/G
+chr1	17696	17697	G	C	snp	tv	0.166666666667	1	DDX11L1 mom_4	G/G
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 child_1	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 child_2	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 dad_2	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 mom_2	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 dad_1	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 mom_1	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 child_3	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 dad_3	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 mom_3	A/A
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 child_4	A/G
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 dad_4	A/G
+chr1	17721	17722	A	G	snp	ts	0.125	1	DDX11L1 mom_4	A/G
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P child_1	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P child_2	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P dad_2	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P mom_2	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P dad_1	C/A
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P mom_1	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P child_3	C/A
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P dad_3	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P mom_3	C/C
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P child_4	C/A
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P dad_4	C/A
+chr1	17729	17730	C	A	snp	tv	0.208333333333	0	WASH7P mom_4	C/A
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 child_1	A/A
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 child_2	A/A
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 dad_2	A/A
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 mom_2	A/A
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 dad_1	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 mom_1	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 child_3	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 dad_3	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 mom_3	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 child_4	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 dad_4	A/G
+chr1	17745	17746	A	G	snp	ts	0.333333333333	1	DDX11L1 mom_4	A/G
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_gene_wise_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,6 @@
+chrom	start	end	gene	impact	impact_severity	max_aaf_all	variant_filters	n_gene_variants	gene_filters
+chr10	48003991	48003992	ASAH2C	missense_variant	MED	0.543088975937	1	1	1
+chr10	126678091	126678092	CTBP2	stop_gained	HIGH	0.0904917363803	1	1	1
+chr10	135369531	135369532	SYCE1	missense_variant	MED	0.2698	1	1	1
+chr10	1142207	1142208	WDR37	stop_lost	HIGH	1.0	1	1	1
+chr16	72057434	72057435	DHODH	missense_variant	MED	0.000432002764818	1	1	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_interactions_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,25 @@
+sample	gene	order_of_interaction	interacting_gene
+M10475	CTBP2	0_order:	none
+M10475	CTBP2	1_order:	none
+M10475	CTBP2	2_order:	none
+M10475	CTBP2	3_order:	none
+M10475	CTBP2	4_order:	WDR37
+M10475	CTBP2	5_order:	none
+M128215	CTBP2	0_order:	CTBP2
+M128215	CTBP2	1_order:	none
+M128215	CTBP2	2_order:	none
+M128215	CTBP2	3_order:	none
+M128215	CTBP2	4_order:	WDR37
+M128215	CTBP2	5_order:	none
+M10478	CTBP2	0_order:	none
+M10478	CTBP2	1_order:	none
+M10478	CTBP2	2_order:	none
+M10478	CTBP2	3_order:	none
+M10478	CTBP2	4_order:	WDR37
+M10478	CTBP2	5_order:	MTG1
+M10500	CTBP2	0_order:	none
+M10500	CTBP2	1_order:	none
+M10500	CTBP2	2_order:	none
+M10500	CTBP2	3_order:	none
+M10500	CTBP2	4_order:	WDR37
+M10500	CTBP2	5_order:	MTG1
Binary file test-data/gemini_is_somatic_result.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_load_input.vcf	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,70 @@
+##fileformat=VCFv4.1
+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">
+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">
+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">
+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">
+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">
+##ALT=<ID=DEL,Description="Deletion">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">
+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">
+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">
+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">
+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">
+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">
+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">
+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">
+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">
+##reference=GRCh37
+##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani"
+##SnpEffCmd="SnpEff  GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz "
+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' ">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+1	10583	rs58108140	G	A	100.0	PASS	AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	10611	rs189107123	C	G	100.0	PASS	AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13302	rs180734498	C	T	100.0	PASS	THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13327	rs144762171	G	C	100.0	PASS	AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|)
+1	13957	.	TC	T	28.0	PASS	AA=TC;AC=35;AN=2184;VT=INDEL;AVGPOST=0.8711;RSQ=0.2501;LDAF=0.0788;THETA=0.0100;ERATE=0.0065;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|)
+1	13980	rs151276478	T	C	100.0	PASS	AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|)
+1	30923	rs140337953	G	T	100.0	PASS	AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73;EFF=DOWNSTREAM(MODIFIER|||||FAM138A|processed_transcript|CODING|ENST00000461467|),DOWNSTREAM(MODIFIER|||||FAM138A|protein_coding|CODING|ENST00000417324|),DOWNSTREAM(MODIFIER|||||MIR1302-10|miRNA|NON_CODING|ENST00000408384|),INTRON(MODIFIER||||177|MIR1302-10|antisense|NON_CODING|ENST00000469289|),INTRON(MODIFIER||||236|MIR1302-10|antisense|NON_CODING|ENST00000473358|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|)
+1	46402	.	C	CTGT	31.0	PASS	AA=.;RSQ=0.0960;AN=2184;AC=8;VT=INDEL;AVGPOST=0.8325;THETA=0.0121;ERATE=0.0072;LDAF=0.0903;AF=0.0037;ASN_AF=0.0017;AFR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	47190	.	G	GA	192.0	PASS	AA=G;AVGPOST=0.9041;AN=2184;AC=29;VT=INDEL;LDAF=0.0628;THETA=0.0153;RSQ=0.2883;ERATE=0.0041;AF=0.01;AMR_AF=0.0028;AFR_AF=0.06;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51476	rs187298206	T	C	100.0	PASS	ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51479	rs116400033	T	A	100.0	PASS	RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51914	rs190452223	T	G	100.0	PASS	ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51935	rs181754315	C	T	100.0	PASS	THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0;EFF=INTERGENIC(MODIFIER|||||||||)
+1	51954	rs185832753	G	C	100.0	PASS	LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52058	rs62637813	G	C	100.0	PASS	AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52144	rs190291950	T	A	100.0	PASS	THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52185	.	TTAA	T	244.0	PASS	AA=.;AN=2184;LDAF=0.0124;VT=INDEL;AC=10;THETA=0.0232;RSQ=0.4271;AVGPOST=0.9840;ERATE=0.0037;AF=0.0046;ASN_AF=0.0035;AMR_AF=0.02;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	52238	rs150021059	T	G	100.0	PASS	THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95;EFF=INTERGENIC(MODIFIER|||||||||)
+1	53234	.	CAT	C	227.0	PASS	AA=CAT;AVGPOST=0.9936;AN=2184;VT=INDEL;THETA=0.0119;AC=10;LDAF=0.0074;RSQ=0.6237;ERATE=0.0007;AF=0.0046;AMR_AF=0.0028;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54353	rs140052487	C	A	100.0	PASS	THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54421	rs146477069	A	G	100.0	PASS	ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54490	rs141149254	G	A	100.0	PASS	ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54676	rs2462492	C	T	100.0	PASS	LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||)
+1	54753	rs143174675	T	G	100.0	PASS	AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55164	rs3091274	C	A	100.0	PASS	AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55249	.	C	CTATGG	443.0	PASS	AA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55299	rs10399749	C	T	100.0	PASS	RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55313	rs182462964	A	T	100.0	PASS	ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55326	rs3107975	T	C	100.0	PASS	AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55330	rs185215913	G	A	100.0	PASS	ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55367	rs190850374	G	A	100.0	PASS	ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55388	rs182711216	C	T	100.0	PASS	THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55394	rs2949420	T	A	100.0	PASS	AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55416	rs193242050	G	A	100.0	PASS	AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55427	rs183189405	T	C	100.0	PASS	THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55816	rs187434873	G	A	100.0	PASS	AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55850	rs191890754	C	G	100.0	PASS	AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)
+1	55852	rs184233019	G	C	100.0	PASS	THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)
Binary file test-data/gemini_load_result.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_lofsieve_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,11 @@
+chrom	start	end	ref	alt	highest_impact	aa_change	var_trans_pos	trans_aa_length	var_trans_pct	sample	genotype	gene	transcript	trans_type
+chr10	1142207	1142208	T	C	stop_lost	*/R	250	250/249	1.00401606426	M10475	C/C	WDR37	ENST00000381329	protein_coding
+chr10	1142207	1142208	T	C	stop_lost	*/R	250	250/249	1.00401606426	M10478	C/C	WDR37	ENST00000381329	protein_coding
+chr10	1142207	1142208	T	C	stop_lost	*/R	250	250/249	1.00401606426	M10500	C/C	WDR37	ENST00000381329	protein_coding
+chr10	1142207	1142208	T	C	stop_lost	*/R	250	250/249	1.00401606426	M128215	C/C	WDR37	ENST00000381329	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000531469	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000309035	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000494626	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000337195	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000334808	protein_coding
+chr10	126678091	126678092	G	A	stop_gained	Q/*	445	445/445	1.0	M128215	G/A	CTBP2	ENST00000411419	protein_coding
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_mendel_errors_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,10 @@
+chrom	start	end	vcf_id	variant_id	anno_id	ref	alt	qual	filter	type	sub_type	call_rate	in_dbsnp	rs_ids	sv_cipos_start_left	sv_cipos_end_left	sv_cipos_start_right	sv_cipos_end_right	sv_length	sv_is_precise	sv_tool	sv_evidence_type	sv_event_id	sv_mate_id	sv_strand	in_omim	clinvar_sig	clinvar_disease_name	clinvar_dbsource	clinvar_dbsource_id	clinvar_origin	clinvar_dsdb	clinvar_dsdbid	clinvar_disease_acc	clinvar_in_locus_spec_db	clinvar_on_diag_assay	clinvar_causal_allele	pfam_domain	cyto_band	rmsk	in_cpg_island	in_segdup	is_conserved	gerp_bp_score	gerp_element_pval	num_hom_ref	num_het	num_hom_alt	num_unknown	aaf	hwe	inbreeding_coeff	pi	recomb_rate	gene	transcript	is_exonic	is_coding	is_splicing	is_lof	exon	codon_change	aa_change	aa_length	biotype	impact	impact_so	impact_severity	polyphen_pred	polyphen_score	sift_pred	sift_score	anc_allele	rms_bq	cigar	depth	strand_bias	rms_map_qual	in_hom_run	num_mapq_zero	num_alleles	num_reads_w_dels	haplotype_score	qual_depth	allele_count	allele_bal	in_hm2	in_hm3	is_somatic	somatic_score	in_esp	aaf_esp_ea	aaf_esp_aa	aaf_esp_all	exome_chip	in_1kg	aaf_1kg_amr	aaf_1kg_eas	aaf_1kg_sas	aaf_1kg_afr	aaf_1kg_eur	aaf_1kg_all	grc	gms_illumina	gms_solid	gms_iontorrent	in_cse	encode_tfbs	encode_dnaseI_cell_count	encode_dnaseI_cell_list	encode_consensus_gm12878	encode_consensus_h1hesc	encode_consensus_helas3	encode_consensus_hepg2	encode_consensus_huvec	encode_consensus_k562	vista_enhancers	cosmic_ids	info	cadd_raw	cadd_scaled	fitcons	in_exac	aaf_exac_all	aaf_adj_exac_all	aaf_adj_exac_afr	aaf_adj_exac_amr	aaf_adj_exac_eas	aaf_adj_exac_fin	aaf_adj_exac_nfe	aaf_adj_exac_oth	aaf_adj_exac_sas	exac_num_het	exac_num_hom_alt	exac_num_chroms	max_aaf_all	gts	gt_types	gt_phases	gt_depths	gt_ref_depths	gt_alt_depths	gt_quals	gt_copy_numbers	gt_phred_ll_homref	gt_phred_ll_het	gt_phred_ll_homalt	family_id	family_members	family_genotypes	samples	family_count	violation	violation_prob
+chr10	1142207	1142208	None	1	4	T	C	3404.30004883	None	snp	ts	1.0	1	rs10794716	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10p15.3	None	0	0	0	None	None	7	1	1	0	0.166666666667	0.0718606383197	0.6	0.294117647059	0.200924	WDR37	ENST00000381329	1	1	0	1	exon_10_1142110_1142566	Tga/Cga	*250R	249	protein_coding	stop_lost	stop_lost	HIGH	None	None	None	None	None	None	None	122	None	36.0	0	0	8	0.0	2.67470002174	27.8999996185	8	None	None	None	None	None	1	0.999534883721	0.975034044485	0.991234814701	0	1	0.9942	1	1	0.9561	1	0.98762	None	None	None	None	0	None	2	Osteobl;Progfib	T	T	T	T	T	T	None	None	None	None	None	0.156188	1	0.997	0.997067786838	0.970305592927	0.998358956642	1	1	0.999595432887	0.998898678414	1	346	60354	121410	1.0	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/T' 'T/T' 'T/T' 'C/C']	[0 0 1 0 0 0 0 0 3]	[False False False False False False False False False]	[38 29 23 38 29 22 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 22 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	T/T,T/T,T/C	1_kid	2	plausible de novo;implausible de novo	0.00000
+chr10	1142207	1142208	None	1	4	T	C	3404.30004883	None	snp	ts	1.0	1	rs10794716	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10p15.3	None	0	0	0	None	None	7	1	1	0	0.166666666667	0.0718606383197	0.6	0.294117647059	0.200924	WDR37	ENST00000381329	1	1	0	1	exon_10_1142110_1142566	Tga/Cga	*250R	249	protein_coding	stop_lost	stop_lost	HIGH	None	None	None	None	None	None	None	122	None	36.0	0	0	8	0.0	2.67470002174	27.8999996185	8	None	None	None	None	None	1	0.999534883721	0.975034044485	0.991234814701	0	1	0.9942	1	1	0.9561	1	0.98762	None	None	None	None	0	None	2	Osteobl;Progfib	T	T	T	T	T	T	None	None	None	None	None	0.156188	1	0.997	0.997067786838	0.970305592927	0.998358956642	1	1	0.999595432887	0.998898678414	1	346	60354	121410	1.0	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/T' 'T/T' 'T/T' 'C/C']	[0 0 1 0 0 0 0 0 3]	[False False False False False False False False False]	[38 29 23 38 29 22 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 22 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	3	3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)	T/T,T/T,C/C	3_kid	2	plausible de novo;implausible de novo	0.00000
+chr10	48003991	48003992	None	2	1	C	T	1047.86999512	None	snp	ts	1.0	1	rs142685947,rs3739968	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q11.22	None	0	1	1	None	3.10871e-42	2	6	1	0	0.444444444444	0.29371811258	-0.35	0.522875816993	1.718591	ASAH2C	ENST00000420079	1	1	0	0	exon_10_48003968_48004056	tGt/tAt	C540Y	610	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	165	None	20.9400005341	0	0	8	0.0	4.382999897	9.52999973297	4	None	None	None	None	None	0	None	None	None	0	1	0.3112	0.4573	0.3855	0.1241	0.5149	0.346645	grc_fix	73.3	40.3	92.8	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.553676	1	0.443	0.448537771896	0.288974151858	0.281426746944	0.543088975937	0.524984286612	0.478147713207	0.463529411765	0.418641164716	17495	15317	107302	0.543088975937	['C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T' 'C/T' 'C/T' 'C/T']	[1 1 3 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 23 38 29 23 38 29 23]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 23 37 29 23]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	C/C,C/C,C/T	2_kid	1	plausible de novo	0.00000
+chr10	48004991	48004992	None	3	1	C	T	1047.86999512	None	snp	ts	1.0	0	None	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q11.22	None	0	1	0	None	None	2	6	1	0	0.444444444444	0.29371811258	-0.35	0.522875816993	1.718591	ASAH2C	ENST00000420079	1	1	0	0	exon_10_48003968_48004056	tGt/tAt	C540Y	610	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	165	None	20.9400005341	0	0	8	0.0	4.382999897	9.52999973297	4	None	None	None	None	None	0	None	None	None	0	0	None	None	None	None	None	None	grc_fix	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.061011	0	None	None	None	None	None	None	None	None	None	None	None	None	-1.0	['C/T' 'C/T' 'C/T' 'C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T']	[1 1 1 1 1 3 0 0 1]	[False False False False False False False False False]	[38 29 23 38 29 23 38 29 23]	[1 0 0 1 0 0 1 0 0]	[37 29 23 37 29 23 37 29 23]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	3	3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)	C/C,C/C,C/T	3_kid	1	plausible de novo	0.00000
+chr10	135336655	135336656	None	4	1	G	A	38.3400001526	None	snp	ts	1.0	1	rs6537611	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	0	None	None	4	5	0	0	0.277777777778	0.248563248239	-0.384615384615	0.424836601307	0.43264	SPRN	ENST00000541506	0	0	0	0				151	protein_coding	intron_variant	intron_variant	LOW	None	None	None	None	None	None	None	2	None	37.0	4	0	4	0.0	0.0	19.1700000763	4	None	None	None	None	None	0	None	None	None	0	1	0.9957	1	1	0.9297	1	0.980831	None	None	None	None	0	None	None	None	R	R	R	R	unknown	R	None	None	None	None	None	0.056701	0	None	None	None	None	None	None	None	None	None	None	None	None	1.0	['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A']	[0 0 1 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 24 38 29 24 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 24 37 29 24 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	G/G,G/G,G/A	1_kid	2	plausible de novo;plausible de novo	0.00000
+chr10	135336655	135336656	None	4	1	G	A	38.3400001526	None	snp	ts	1.0	1	rs6537611	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	0	None	None	4	5	0	0	0.277777777778	0.248563248239	-0.384615384615	0.424836601307	0.43264	SPRN	ENST00000541506	0	0	0	0				151	protein_coding	intron_variant	intron_variant	LOW	None	None	None	None	None	None	None	2	None	37.0	4	0	4	0.0	0.0	19.1700000763	4	None	None	None	None	None	0	None	None	None	0	1	0.9957	1	1	0.9297	1	0.980831	None	None	None	None	0	None	None	None	R	R	R	R	unknown	R	None	None	None	None	None	0.056701	0	None	None	None	None	None	None	None	None	None	None	None	None	1.0	['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A']	[0 0 1 0 0 1 1 1 1]	[False False False False False False False False False]	[38 29 24 38 29 24 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 24 37 29 24 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	G/G,G/G,G/A	2_kid	2	plausible de novo;plausible de novo	0.00000
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	1	1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)	T/T,T/T,T/C	1_kid	3	plausible de novo;plausible de novo;plausible de novo	0.00000
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	3	3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)	T/T,T/T,T/C	3_kid	3	plausible de novo;plausible de novo;plausible de novo	0.00000
+chr10	135369531	135369532	None	5	6	T	C	122.620002747	None	snp	ts	1.0	1	rs3747881,rs386585367	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q26.3	None	0	0	1	None	3.86096e-59	6	3	0	0	0.166666666667	0.548506235587	-0.2	0.294117647059	0.022013	SYCE1	ENST00000368517	1	1	0	0	exon_10_135369485_135369551	aAg/aGg	K147R	282	protein_coding	missense_variant	missense_variant	MED	None	None	None	None	None	None	None	239	None	36.0200004578	2	0	8	0.0	5.71409988403	2.30999994278	2	None	None	None	None	None	1	0.0938372093023	0.163867453473	0.117561125634	1	1	0.1844	0.2698	0.2188	0.1997	0.1093	0.197284	None	None	None	None	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.487112	1	0.134	0.134286610119	0.184985563041	0.164938655607	0.256026889198	0.122313048744	0.0919761054243	0.113686534216	0.194096927001	13825	1225	121196	0.2698	['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']	[0 0 1 0 0 1 0 0 1]	[False False False False False False False False False]	[38 29 22 38 29 21 38 29 24]	[1 0 0 1 0 0 1 0 0]	[37 29 22 37 29 21 37 29 24]	[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]	None	[940 899 729 940 899 729 940 899 729]	[87 78 66 87 78 66 87 78 66]	[0 0 0 0 0 0 0 0 0]	2	2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)	T/T,T/T,T/C	2_kid	3	plausible de novo;plausible de novo;plausible de novo	0.00000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_pathways_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,17 @@
+chrom	start	end	ref	alt	impact	sample	genotype	gene	transcript	pathway
+chr10	52004314	52004315	T	C	intron_variant	M10500	C/C	ASAH2	ENST00000329428	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	52004314	52004315	T	C	intron_variant	M128215	C/C	ASAH2	ENST00000329428	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	52004314	52004315	T	C	intron_variant	M10500	C/C	ASAH2	ENST00000447815	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	52004314	52004315	T	C	intron_variant	M128215	C/C	ASAH2	ENST00000447815	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	52004314	52004315	T	C	intron_variant	M10500	C/C	ASAH2	ENST00000395526	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	52004314	52004315	T	C	intron_variant	M128215	C/C	ASAH2	ENST00000395526	hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10	126678091	126678092	G	A	stop_gained	M128215	G/A	CTBP2	ENST00000531469	hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10	126678091	126678092	G	A	stop_gained	M128215	G/A	CTBP2	ENST00000309035	hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10	126678091	126678092	G	A	stop_gained	M128215	G/A	CTBP2	ENST00000494626	hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10	126678091	126678092	G	A	stop_gained	M128215	G/A	CTBP2	ENST00000337195	hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10	126678091	126678092	G	A	stop_gained	M128215	G/A	CTBP2	ENST00000411419	hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10	135336655	135336656	G	A	intron_variant	M10478	A/A	CYP2E1	ENST00000463117	hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr10	135336655	135336656	G	A	intron_variant	M128215	A/A	CYP2E1	ENST00000463117	hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr10	135336655	135336656	G	A	upstream_gene_variant	M10478	A/A	CYP2E1	ENST00000252945	hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr10	135336655	135336656	G	A	upstream_gene_variant	M128215	A/A	CYP2E1	ENST00000252945	hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr16	72057434	72057435	C	T	missense_variant	M10475	C/T	DHODH	ENST00000219240	hsa01100:Metabolic_pathways,hsa00240:Pyrimidine_metabolism
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_qc_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,5 @@
+sample	sex	chrX_homref	chrX_het	chrX_homalt	chrX_unknown
+M10475	male	0	0	0	0
+M10478	female	0	0	0	0
+M10500	female	0	0	0	0
+M128215	male	0	0	0	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_query_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,10 @@
+10582
+10610
+13301
+13326
+13956
+13979
+30922
+46401
+47189
+51475
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_region_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,1 @@
+chr10	48003991	48003992	None	2	1	C	T	1047.86999512	None	snp	ts	1.0	1	rs142685947,rs3739968	None	None	None	None	None	1	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	None	chr10q11.22	None	0	1	1	None	3.10871e-42	1	2	1	0	0.5	1	0	0.571428571429	1.718591	ASAH2C	ENST00000420079	1	1	0	0	16/17	tGt/tAt	C/Y	542/612	protein_coding	missense_variant	missense_variant	MED	benign	0.0	tolerated	1.0	None	None	None	165	None	20.9400005341	0	0	8	0.0	4.382999897	9.52999973297	4	None	None	None	None	None	0	None	None	None	0	1	0.3112	0.4573	0.3855	0.1241	0.5149	0.346645	grc_fix	73.3	40.3	92.8	0	None	None	None	R	R	R	R	R	R	None	None	None	None	None	0.553676	1	0.443	0.448537771896	0.288974151858	0.281426746944	0.543088975937	0.524984286612	0.478147713207	0.463529411765	0.418641164716	17495	15317	107302	1	0.543088975937
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_roh_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,7 @@
+chrom	start	end	sample	num_of_snps	density_per_kb	run_length_in_bp
+chr10	1142208	135369532	M10475	5	0.0	134227324
+chr10	1142208	135210791	M10500	5	0.0001	134068583
+chr10	1142208	135210791	M10478	5	0.0001	134068583
+chr10	1142208	135336656	M10478	4	0.0	134194448
+chr10	1142208	135336656	M128215	6	0.0001	134194448
+chr10	1142208	135369532	M128215	5	0.0	134227324
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_stats_result.tabular	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,5 @@
+sample	total
+M10475	3
+M10478	6
+M10500	6
+M128215	4
Binary file test-data/gemini_windower_input.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gemini_databases.loc.sample	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,3 @@
+## GEMINI databases
+#Version	dbkey	Description	Path
+#08_08_2014	hg19	Database (08-08-2014)	/path/to/data
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <table name="gemini_databases" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gemini_databases.loc" />
+    </table>
+</tables>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="gemini" version="0.18.1">
+        <repository changeset_revision="be869e11582f" name="package_gemini_0_18_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>