Repository 'gemini_load'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/gemini_load

Changeset 0:269c40fdcccb (2016-02-18)
Next changeset 1:9f74d7b0704a (2016-03-22)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
added:
gemini_load.xml
gemini_macros.xml
readme.rst
repository_dependencies.xml
static/images/gemini_mendel_errors.png
test-data/anno.bed
test-data/gemini_actionable_mutations_result.tabular
test-data/gemini_amend_input.db
test-data/gemini_amend_input.ped
test-data/gemini_amend_result.db
test-data/gemini_annotate_input.db
test-data/gemini_annotate_result.tabular
test-data/gemini_autosomal_dominant_result.tabular
test-data/gemini_autosomal_input.db
test-data/gemini_autosomal_recessive.tabular
test-data/gemini_burden_input.db
test-data/gemini_burden_result.tabular
test-data/gemini_comphets_input.db
test-data/gemini_comphets_result.tabular
test-data/gemini_dbinfo_result.tabular
test-data/gemini_de_novo_input.db
test-data/gemini_de_novo_result.tabular
test-data/gemini_dump_result.tabular
test-data/gemini_fusions_result.tabular
test-data/gemini_gene_wise_result.tabular
test-data/gemini_interactions_result.tabular
test-data/gemini_is_somatic_result.db
test-data/gemini_load_input.vcf
test-data/gemini_load_result.db
test-data/gemini_lofsieve_result.tabular
test-data/gemini_mendel_errors_result.tabular
test-data/gemini_pathways_result.tabular
test-data/gemini_qc_result.tabular
test-data/gemini_query_result.tabular
test-data/gemini_region_result.tabular
test-data/gemini_roh_result.tabular
test-data/gemini_stats_result.tabular
test-data/gemini_windower_input.db
tool-data/gemini_databases.loc.sample
tool_data_table_conf.xml.sample
tool_dependencies.xml
b
diff -r 000000000000 -r 269c40fdcccb gemini_load.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_load.xml Thu Feb 18 08:55:06 2016 -0500
[
@@ -0,0 +1,101 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Loading a VCF file into GEMINI</description>
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">load</token>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+        ln -s "${ infile }" input.vcf &&
+        bgzip -c input.vcf > input.vcf.gz &&
+        tabix -p vcf input.vcf.gz &&
+
+        gemini
+            #if $annotation_databases:
+                --annotation-dir "${annotation_databases.fields.path}"
+            #end if
+            @BINARY@
+            -v input.vcf.gz
+            #if str( $annotation_type ) != "None":
+                -t "$annotation_type"
+            #end if
+
+            #if $ped:
+                -p $ped
+            #end if
+
+            $skip_gerp_bp
+            $skip_cadd
+            $skip_gene_tables
+            $no_load_genotypes
+            $no_genotypes
+            $passonly
+            $infostring
+            --cores \${GALAXY_SLOTS:-4}
+
+            "${ outfile }"
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="vcf" label="VCF file to be loaded in the GEMINI database" help="Only build 37 (aka hg19) of the human genome is supported.">
+            <options>
+                <filter type="add_value" value="hg19" />
+                <filter type="add_value" value="Homo_sapiens_nuHg19_mtrCRS" />
+                <filter type="add_value" value="hg_g1k_v37" />
+            </options>
+        </param>
+
+        <param name="annotation_type" type="select" label="The annotations to be used with the input vcf" help="(-t)">
+            <option value="None">None (not recommended)</option>
+            <option value="snpEff" selected="True">snpEff annotated VCF file</option>
+            <option value="VEP">VEP annotated VCF file</option>
+        </param>
+        <param name="ped" type="data" format="tabular" optional="True" label="Sample information file in PED+ format" help="(-p)" />
+        <expand macro="annotation_dir" />
+
+        <param name="skip_gerp_bp" type="boolean" truevalue="--skip-gerp-bp" falsevalue="" checked="False"
+            label="Do not load GERP scores at base pair resolution" help="(--skip-gerp-bp)"/>
+
+        <param name="skip_cadd" type="boolean" truevalue="--skip-cadd" falsevalue="" checked="False"
+            label="Do not load CADD scores" help="(--skip-cadd)"/>
+
+        <param name="skip_gene_tables" type="boolean" truevalue="--skip-gene-tables" falsevalue="" checked="False"
+            label="Do not load gene tables" help="(--skip-gene-tables)"/>
+
+        <param name="no_load_genotypes" type="boolean" truevalue="--no-load-genotypes" falsevalue="" checked="False"
+            label="Genotypes exist in the file, but should not be stored" help="(--no-load-genotypes)"/>
+
+        <param name="no_genotypes" type="boolean" truevalue="--no-genotypes" falsevalue="" checked="False"
+            label="There are no genotypes in the file" help="e.g. some 1000G VCFs (--no-genotypes)"/>
+
+        <param name="passonly" type="boolean" truevalue="--passonly" falsevalue="" checked="False"
+            label="Keep only variants that pass all filters" help="e.g. some 1000G VCFs (--passonly)"/>
+
+        <param name="infostring" type="boolean" truevalue="--save-info-string" falsevalue="" checked="False"
+            label="Load INFO string from VCF file"  help="(--save-info-string)"/>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="gemini.sqlite" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="gemini_load_input.vcf" ftype="vcf" />
+            <param name="skip_gene_tables" value="True" />
+            <param name="skip_gerp_bp" value="True" />
+            <param name="skip_cadd" value="True" />
+            <param name="no_genotypes" value="True" />
+            <output name="outfile" file="gemini_load_result.db" compare="sim_size" delta="1000"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+Before we can use GEMINI to explore genetic variation, we must first load our VCF file into the GEMINI database framework.
+We expect you to have first annotated the functional consequence of each variant in your VCF using either VEP or snpEff.
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 269c40fdcccb gemini_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_macros.xml Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,146 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="0.18.1">gemini</requirement>
+            <yield />
+        </requirements>
+    </xml>
+
+    <xml name="version_command">
+        <version_command>gemini --version</version_command>
+    </xml>
+
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+            <regex match="Error:" />
+            <regex match="Exception:" />
+        </stdio>
+    </xml>
+
+    <xml name="annotation_dir">
+        <param name="annotation_databases" type="select" optional="True" label="Choose a gemini annotation database">
+            <options from_data_table="gemini_databases">
+                <filter type="sort_by" column="0" />
+            </options>
+        </param>
+    </xml>
+
+    <xml name="add_header_column">
+        <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" 
+            label="Add a header of column names to the output" help="(--header)"/>
+    </xml>
+
+    <xml name="radius">
+        <param name="radius" type="integer" value="3" label="Set filter for Breadth-first search (BFS) in the Protein-Protein Interaction network" help="(-r)" >
+            <validator type="in_range" min="0"/>
+        </param>
+    </xml>
+    <xml name="variant_mode">
+        <param name="variant_mode" type="boolean" truevalue="--var" falsevalue="" checked="False" 
+            label="Returns variant info (e.g. impact, biotype) for interacting genes" help="(--var)"/>
+    </xml>
+
+    <xml name="column_filter">
+        <conditional name="report">
+            <param name="report_selector" type="select" label="Columns to include in the report"
+                help="By default, this tool reports all columns in the variants table. One may choose to report only a subset of the columns.">
+                <option value="all" selected="True">all</option>
+                <option value="column_filter">User given columns</option>
+            </param>
+            <when value="all"/>
+            <when value="column_filter">
+                <param name="columns" type="select" display="checkboxes" multiple="True" label="Choose columns to include in the report" help="(--columns)">
+                    <option value="gene">gene</option>
+                    <option value="chrom">chrom</option>
+                    <option value="start">start</option>
+                    <option value="end">end</option>
+                    <option value="ref">ref</option>
+                    <option value="alt">alt</option>
+                    <option value="impact">impact</option>
+                    <option value="impact_severity">impact_severity</option>
+                    <option value="max_aaf_all">alternative allele frequency</option>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="filter">
+        <conditional name="filter">
+            <param name="filter_selector" type="select" label="Apply additional constraints"
+                help="By default, this tool will report all variants regardless of their putative functional impact. In order to apply additional constraints on the variants returned, you can this optional filter.">
+                <option value="no">No additional constraints</option>
+                <option value="yes">Apply additional constraints</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param name="filter" type="text" label="Contraints in SQL syntax" help="Conditions applied here will become WHERE clauses in the query issued to the GEMINI database. E.g. alt='G' or impact_severity = 'HIGH'. (--filter)">
+                    <expand macro="sanitize_query" />
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="sanitize_query">
+        <sanitizer invalid_char="">
+            <valid initial="string.printable"/>
+       </sanitizer>
+    </xml>
+
+    <token name="@CMDLN_SQL_FILTER_FILTER_OPTION@">
+        #if str($filter.filter_selector) == 'yes' and $filter.filter:
+            #import pipes
+            --filter ${ pipes.quote( str( $filter.filter ) ) or "''" }
+        #end if
+    </token>
+
+    <xml name="family">
+        <param name="families" type="text" value="" label="Comma seperated list of families to restrict the analysis to." help="e.g. Family1,Family3 (--families)"/>
+    </xml>
+
+    <xml name="lenient">
+        <param name="lenient" type="boolean" truevalue="--lenient" falsevalue="" checked="False" label="Loosen the restrictions on family structure"/>
+    </xml>
+
+    <xml name="unaffected">
+        <param name="allow_unaffected" type="boolean" truevalue="--allow-unaffected" falsevalue="" checked="False" label="Report candidates that also impact samples labeled as unaffected." help="(--allow-unaffected)"/>
+    </xml>
+
+    <xml name="min_kindreds">
+        <param name="min_kindreds" type="integer" value="1" label="The min. number of kindreds that must have a candidate variant in a gene" help="default: 1 (--min-kindreds)" />
+    </xml>
+
+    <xml name="min_sequence_depth">
+        <param name="d" type="integer" value="0" min="0" label="The minimum aligned sequence depth (genotype DP) required for each sample"
+                help="default: 0 (-d)" />
+    </xml>
+
+    <xml name="min_gq">
+        <param name="min_gq" type="integer" value="0" label="the minimum genotype quality required for each sample in a family" help="default: 0 (--min-gq)">
+            <validator type="in_range" min="0"/>
+        </param>
+    </xml>
+
+    <xml name="gt_pl_max">
+        <param name="gt_pl_max" type="integer" value="-1" min="-1" label="The maximum phred-scaled genotype likelihod (PL) allowed for each sample in a family" help="default: -1 (not set) (--gt-pl-max)" />
+    </xml>
+    <token name="@VERSION@">0.18.1</token>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1003153</citation>
+            <yield />
+        </citations>
+    </xml>
+
+    <xml name="infile">
+        <param name="infile" type="data" format="gemini.sqlite" label="GEMINI database" help="Only files with version @VERSION@ are accepted." >
+            <options options_filter_attribute="metadata.gemini_version" >
+                <filter type="add_value" value="@VERSION@" />
+            </options>
+            <validator type="expression" message="This version of Gemini will only work with Gemini files that are for version @VERSION@.">value is not None and value.metadata.gemini_version == "@VERSION@"</validator>
+        </param>
+    </xml>
+
+</macros>
b
diff -r 000000000000 -r 269c40fdcccb readme.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst Thu Feb 18 08:55:06 2016 -0500
[
@@ -0,0 +1,65 @@
+=========================
+Galaxy wrapper for GEMINI
+=========================
+
+
+GEMINI: a flexible framework for exploring genome variation
+
+GEMINI (GEnome MINIng) is designed to be a flexible framework for exploring genetic variation in the context of 
+the wealth of genome annotations available for the human genome. By placing genetic variants, sample genotypes, 
+and useful genome annotations into an integrated database framework, GEMINI provides a simple, flexible, yet very 
+powerful system for exploring genetic variation for for disease and population genetics.
+
+Using the GEMINI framework begins by loading a VCF file into a database. Each variant is automatically 
+annotated by comparing it to several genome annotations from source such as ENCODE tracks, UCSC tracks, 
+OMIM, dbSNP, KEGG, and HPRD. All of this information is stored in portable SQLite database that allows 
+one to explore and interpret both coding and non-coding variation using “off-the-shelf” tools or an 
+enhanced SQL engine.
+
+Please also see the original [manuscript](http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003153).
+
+
+============
+Installation
+============
+
+It is recommended to install this wrapper via the `Galaxy Tool Shed`.
+
+.. _`Galaxy Tool Shed`:  https://testtoolshed.g2.bx.psu.edu/view/iuc/gemini
+
+
+=======
+History
+=======
+- 0.9.1: Initial public release
+
+
+====================
+Detailed description
+====================
+
+View the original GEMINI documentation: http://gemini.readthedocs.org/en/latest/index.html
+
+
+===============================
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
b
diff -r 000000000000 -r 269c40fdcccb repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the GEMINI data manager definition to install all required annotation databases.">
+    <repository changeset_revision="345412d58d75" name="data_manager_gemini_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>
b
diff -r 000000000000 -r 269c40fdcccb static/images/gemini_mendel_errors.png
b
Binary file static/images/gemini_mendel_errors.png has changed
b
diff -r 000000000000 -r 269c40fdcccb test-data/anno.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/anno.bed Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,3 @@
+chr1 30547 30548
+chr1 30920 30925
+chr1 30922 30923
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_actionable_mutations_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_actionable_mutations_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,1 @@
+tum_name chrom start end ref alt gene impact is_somatic in_cosmic_census dgidb_info
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_amend_input.db
b
Binary file test-data/gemini_amend_input.db has changed
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_amend_input.ped
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_amend_input.ped Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,5 @@
+#family_id name paternal_id maternal_id sex phenotype ethnicity hair_color
+1 M10475    None None  1    1    None brown
+1 M10478     M10475  M10500    2    2    None red
+1 M10500     None    None    2    2    None
+1 M128215    M10475  M10500    1    1    None green
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_amend_result.db
b
Binary file test-data/gemini_amend_result.db has changed
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_annotate_input.db
b
Binary file test-data/gemini_annotate_input.db has changed
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_annotate_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_annotate_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,1 @@
+updated 10 variants
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_autosomal_dominant_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_autosomal_dominant_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,9 @@
+gene chrom impact variant_id family_id family_members family_genotypes samples family_count
+ASAH2C chr10 missense_variant 3 3 3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected) C/T,C/C,C/T 3_dad,3_kid 2
+ASAH2C chr10 missense_variant 3 2 2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected) C/C,C/T,C/T 2_mom,2_kid 2
+ASAH2C chr10 missense_variant 4 3 3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected) C/T,C/C,C/T 3_dad,3_kid 2
+ASAH2C chr10 missense_variant 4 2 2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected) C/C,C/T,C/T 2_mom,2_kid 2
+SPRN chr10 intron_variant 5 3 3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected) G/A,G/G,G/A 3_dad,3_kid 1
+WDR37 chr10 stop_lost 1 3 3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected) T/C,T/T,T/C 3_dad,3_kid 2
+WDR37 chr10 stop_lost 1 2 2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected) T/T,T/C,T/C 2_mom,2_kid 2
+WDR37 chr10 stop_lost 2 3 3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected) T/C,T/C,T/C 3_dad,3_kid 2
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_autosomal_input.db
b
Binary file test-data/gemini_autosomal_input.db has changed
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_autosomal_recessive.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_autosomal_recessive.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,2 @@
+gene chrom impact variant_id family_id family_members family_genotypes samples family_count
+WDR37 chr10 stop_lost 2 1 1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected) T/C,T/C,C/C 1_kid 1
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_burden_input.db
b
Binary file test-data/gemini_burden_input.db has changed
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_burden_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_burden_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,6 @@
+gene T c Z p_value
+SYCE1 -0.5 0.25 -1.0 0.00699300699301
+DHODH 0.0 0.0 nan nan
+WDR37 -1.0 1.5 -0.816496580928 0.00699300699301
+ASAH2C -0.5 0.75 -0.57735026919 0.00699300699301
+CTBP2 0.0 0.0 nan nan
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_comphets_input.db
b
Binary file test-data/gemini_comphets_input.db has changed
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_comphets_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_comphets_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,3 @@
+chrom start end ref alt gene impact variant_id family_id family_members family_genotypes samples family_count comp_het_id priority
+chr1 17362 17366 TTCT T WASH7P splice_acceptor_variant 3 4 child_4(child_4;affected;male),dad_4(dad_4;unaffected;male),mom_4(mom_4;unaffected;female) TTCT|T,TTCT/T,TTCT/TTCT child_4 1 1_3_7 3
+chr1 17729 17730 C A WASH7P splice_acceptor_variant 7 4 child_4(child_4;affected;male),dad_4(dad_4;unaffected;male),mom_4(mom_4;unaffected;female) C/A,C/A,C/A child_4 1 1_3_7 3
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_dbinfo_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_dbinfo_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,209 @@
+table_name column_name type
+variants chrom text
+variants start integer
+variants end integer
+variants vcf_id text
+variants variant_id integer
+variants anno_id integer
+variants ref text
+variants alt text
+variants qual float
+variants filter text
+variants type text
+variants sub_type text
+variants gts blob
+variants gt_types blob
+variants gt_phases blob
+variants gt_depths blob
+variants gt_ref_depths blob
+variants gt_alt_depths blob
+variants gt_quals blob
+variants gt_copy_numbers blob
+variants gt_phred_ll_homref blob
+variants gt_phred_ll_het blob
+variants gt_phred_ll_homalt blob
+variants call_rate float
+variants in_dbsnp bool
+variants rs_ids text
+variants sv_cipos_start_left integer
+variants sv_cipos_end_left integer
+variants sv_cipos_start_right integer
+variants sv_cipos_end_right integer
+variants sv_length integer
+variants sv_is_precise bool
+variants sv_tool text
+variants sv_evidence_type text
+variants sv_event_id text
+variants sv_mate_id text
+variants sv_strand text
+variants in_omim bool
+variants clinvar_sig text
+variants clinvar_disease_name text
+variants clinvar_dbsource text
+variants clinvar_dbsource_id text
+variants clinvar_origin text
+variants clinvar_dsdb text
+variants clinvar_dsdbid text
+variants clinvar_disease_acc text
+variants clinvar_in_locus_spec_db bool
+variants clinvar_on_diag_assay bool
+variants clinvar_causal_allele text
+variants pfam_domain text
+variants cyto_band text
+variants rmsk text
+variants in_cpg_island bool
+variants in_segdup bool
+variants is_conserved bool
+variants gerp_bp_score float
+variants gerp_element_pval float
+variants num_hom_ref integer
+variants num_het integer
+variants num_hom_alt integer
+variants num_unknown integer
+variants aaf real
+variants hwe decimal(2,7)
+variants inbreeding_coeff decimal(2,7)
+variants pi decimal(2,7)
+variants recomb_rate decimal(2,7)
+variants gene text
+variants transcript text
+variants is_exonic bool
+variants is_coding bool
+variants is_splicing bool
+variants is_lof bool
+variants exon text
+variants codon_change text
+variants aa_change text
+variants aa_length text
+variants biotype text
+variants impact text
+variants impact_so text
+variants impact_severity text
+variants polyphen_pred text
+variants polyphen_score float
+variants sift_pred text
+variants sift_score float
+variants anc_allele text
+variants rms_bq float
+variants cigar text
+variants depth integer
+variants strand_bias float
+variants rms_map_qual float
+variants in_hom_run integer
+variants num_mapq_zero integer
+variants num_alleles integer
+variants num_reads_w_dels float
+variants haplotype_score float
+variants qual_depth float
+variants allele_count integer
+variants allele_bal float
+variants in_hm2 bool
+variants in_hm3 bool
+variants is_somatic bool
+variants somatic_score float
+variants in_esp bool
+variants aaf_esp_ea decimal(2,7)
+variants aaf_esp_aa decimal(2,7)
+variants aaf_esp_all decimal(2,7)
+variants exome_chip bool
+variants in_1kg bool
+variants aaf_1kg_amr decimal(2,7)
+variants aaf_1kg_eas decimal(2,7)
+variants aaf_1kg_sas decimal(2,7)
+variants aaf_1kg_afr decimal(2,7)
+variants aaf_1kg_eur decimal(2,7)
+variants aaf_1kg_all decimal(2,7)
+variants grc text
+variants gms_illumina float
+variants gms_solid float
+variants gms_iontorrent float
+variants in_cse bool
+variants encode_tfbs text
+variants encode_dnaseI_cell_count integer
+variants encode_dnaseI_cell_list text
+variants encode_consensus_gm12878 text
+variants encode_consensus_h1hesc text
+variants encode_consensus_helas3 text
+variants encode_consensus_hepg2 text
+variants encode_consensus_huvec text
+variants encode_consensus_k562 text
+variants vista_enhancers text
+variants cosmic_ids text
+variants info blob
+variants cadd_raw float
+variants cadd_scaled float
+variants fitcons float
+variants in_exac bool
+variants aaf_exac_all decimal(2,7)
+variants aaf_adj_exac_all decimal(2,7)
+variants aaf_adj_exac_afr decimal(2,7)
+variants aaf_adj_exac_amr decimal(2,7)
+variants aaf_adj_exac_eas decimal(2,7)
+variants aaf_adj_exac_fin decimal(2,7)
+variants aaf_adj_exac_nfe decimal(2,7)
+variants aaf_adj_exac_oth decimal(2,7)
+variants aaf_adj_exac_sas decimal(2,7)
+variants exac_num_het int
+variants exac_num_hom_alt int
+variants exac_num_chroms int
+variants max_aaf_all REAL
+variant_impacts variant_id integer
+variant_impacts anno_id integer
+variant_impacts gene text
+variant_impacts transcript text
+variant_impacts is_exonic bool
+variant_impacts is_coding bool
+variant_impacts is_splicing bool
+variant_impacts is_lof bool
+variant_impacts exon text
+variant_impacts codon_change text
+variant_impacts aa_change text
+variant_impacts aa_length text
+variant_impacts biotype text
+variant_impacts impact text
+variant_impacts impact_so text
+variant_impacts impact_severity text
+variant_impacts polyphen_pred text
+variant_impacts polyphen_score float
+variant_impacts sift_pred text
+variant_impacts sift_score float
+samples sample_id integer
+samples family_id text
+samples name text
+samples paternal_id text
+samples maternal_id text
+samples sex text
+samples phenotype text
+samples ethnicity text
+gene_detailed uid integer
+gene_detailed chrom text
+gene_detailed gene text
+gene_detailed is_hgnc bool
+gene_detailed ensembl_gene_id text
+gene_detailed transcript text
+gene_detailed biotype text
+gene_detailed transcript_status text
+gene_detailed ccds_id text
+gene_detailed hgnc_id text
+gene_detailed entrez_id text
+gene_detailed cds_length text
+gene_detailed protein_length text
+gene_detailed transcript_start text
+gene_detailed transcript_end text
+gene_detailed strand text
+gene_detailed synonym text
+gene_detailed rvis_pct float
+gene_detailed mam_phenotype_id text
+gene_summary uid integer
+gene_summary chrom text
+gene_summary gene text
+gene_summary is_hgnc bool
+gene_summary ensembl_gene_id text
+gene_summary hgnc_id text
+gene_summary transcript_min_start text
+gene_summary transcript_max_end text
+gene_summary strand text
+gene_summary synonym text
+gene_summary rvis_pct float
+gene_summary mam_phenotype_id text
+gene_summary in_cosmic_census bool
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_de_novo_input.db
b
Binary file test-data/gemini_de_novo_input.db has changed
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_de_novo_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_de_novo_result.tabular Thu Feb 18 08:55:06 2016 -0500
[
b"@@ -0,0 +1,9 @@\n+chrom\tstart\tend\tvcf_id\tvariant_id\tanno_id\tref\talt\tqual\tfilter\ttype\tsub_type\tcall_rate\tin_dbsnp\trs_ids\tsv_cipos_start_left\tsv_cipos_end_left\tsv_cipos_start_right\tsv_cipos_end_right\tsv_length\tsv_is_precise\tsv_tool\tsv_evidence_type\tsv_event_id\tsv_mate_id\tsv_strand\tin_omim\tclinvar_sig\tclinvar_disease_name\tclinvar_dbsource\tclinvar_dbsource_id\tclinvar_origin\tclinvar_dsdb\tclinvar_dsdbid\tclinvar_disease_acc\tclinvar_in_locus_spec_db\tclinvar_on_diag_assay\tclinvar_causal_allele\tpfam_domain\tcyto_band\trmsk\tin_cpg_island\tin_segdup\tis_conserved\tgerp_bp_score\tgerp_element_pval\tnum_hom_ref\tnum_het\tnum_hom_alt\tnum_unknown\taaf\thwe\tinbreeding_coeff\tpi\trecomb_rate\tgene\ttranscript\tis_exonic\tis_coding\tis_splicing\tis_lof\texon\tcodon_change\taa_change\taa_length\tbiotype\timpact\timpact_so\timpact_severity\tpolyphen_pred\tpolyphen_score\tsift_pred\tsift_score\tanc_allele\trms_bq\tcigar\tdepth\tstrand_bias\trms_map_qual\tin_hom_run\tnum_mapq_zero\tnum_alleles\tnum_reads_w_dels\thaplotype_score\tqual_depth\tallele_count\tallele_bal\tin_hm2\tin_hm3\tis_somatic\tsomatic_score\tin_esp\taaf_esp_ea\taaf_esp_aa\taaf_esp_all\texome_chip\tin_1kg\taaf_1kg_amr\taaf_1kg_eas\taaf_1kg_sas\taaf_1kg_afr\taaf_1kg_eur\taaf_1kg_all\tgrc\tgms_illumina\tgms_solid\tgms_iontorrent\tin_cse\tencode_tfbs\tencode_dnaseI_cell_count\tencode_dnaseI_cell_list\tencode_consensus_gm12878\tencode_consensus_h1hesc\tencode_consensus_helas3\tencode_consensus_hepg2\tencode_consensus_huvec\tencode_consensus_k562\tvista_enhancers\tcosmic_ids\tinfo\tcadd_raw\tcadd_scaled\tfitcons\tin_exac\taaf_exac_all\taaf_adj_exac_all\taaf_adj_exac_afr\taaf_adj_exac_amr\taaf_adj_exac_eas\taaf_adj_exac_fin\taaf_adj_exac_nfe\taaf_adj_exac_oth\taaf_adj_exac_sas\texac_num_het\texac_num_hom_alt\texac_num_chroms\tmax_aaf_all\tgts\tgt_types\tgt_phases\tgt_depths\tgt_ref_depths\tgt_alt_depths\tgt_quals\tgt_copy_numbers\tgt_phred_ll_homref\tgt_phred_ll_het\tgt_phred_ll_homalt\tfamily_id\tfamily_members\tfamily_genotypes\tsamples\tfamily_count\n+chr10\t48003991\t48003992\tNone\t2\t1\tC\tT\t1047.86999512\tNone\tsnp\tts\t1.0\t1\trs142685947,rs3739968\tNone\tNone\tNone\tNone\tNone\t1\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tchr10q11.22\tNone\t0\t1\t1\tNone\t3.10871e-42\t2\t6\t1\t0\t0.444444444444\t0.29371811258\t-0.35\t0.522875816993\t1.718591\tASAH2C\tENST00000420079\t1\t1\t0\t0\texon_10_48003968_48004056\ttGt/tAt\tC540Y\t610\tprotein_coding\tmissense_variant\tmissense_variant\tMED\tNone\tNone\tNone\tNone\tNone\tNone\tNone\t165\tNone\t20.9400005341\t0\t0\t8\t0.0\t4.382999897\t9.52999973297\t4\tNone\tNone\tNone\tNone\tNone\t0\tNone\tNone\tNone\t0\t1\t0.3112\t0.4573\t0.3855\t0.1241\t0.5149\t0.346645\tgrc_fix\t73.3\t40.3\t92.8\t0\tNone\tNone\tNone\tR\tR\tR\tR\tR\tR\tNone\tNone\tNone\tNone\tNone\t0.553676\t1\t0.443\t0.448537771896\t0.288974151858\t0.281426746944\t0.543088975937\t0.524984286612\t0.478147713207\t0.463529411765\t0.418641164716\t17495\t15317\t107302\t0.543088975937\t['C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T' 'C/T' 'C/T' 'C/T']\t[1 1 3 0 0 1 1 1 1]\t[False False False False False False False False False]\t[38 29 23 38 29 23 38 29 23]\t[1 0 0 1 0 0 1 0 0]\t[37 29 23 37 29 23 37 29 23]\t[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]\tNone\t[940 899 729 940 899 729 940 899 729]\t[87 78 66 87 78 66 87 78 66]\t[0 0 0 0 0 0 0 0 0]\t2\t2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)\tC/C,C/C,C/T\t2_kid\t2\n+chr10\t48004991\t48004992\tNone\t3\t1\tC\tT\t1047.86999512\tNone\tsnp\tts\t1.0\t0\tNone\tNone\tNone\tNone\tNone\tNone\t1\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tchr10q11.22\tNone\t0\t1\t0\tNone\tNone\t2\t6\t1\t0\t0.444444444444\t0.29371811258\t-0.35\t0.522875816993\t1.718591\tASAH2C\tENST00000420079\t1\t1\t0\t0\texon_10_48003968_48004056\ttGt/tAt\tC540Y\t610\tprotein_coding\tmissense_variant\tmissense_variant\tMED\tNone\tNone\tNone\tNone\tNone\tNone\tNone\t165\tNone\t20.9400005341\t0\t0\t8\t0.0\t4.382999897\t9.52999973297\t4\tNone\tNone\tNone\tNone\tNone\t0\tNone\tNone\tNone\t0\t0\tNone\tNone\tNone\tNone\tNone\tNone\tgrc_fix\tNone\tNone\tNone\t0\tNone\tNone\tNone\tR\tR\tR\tR\tR\tR\tNone\tNone\tNone\tNone\tNone\t0.061011\t0\tNone\tNone\tNone\t"..b"\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tchr10q26.3\tNone\t0\t0\t1\tNone\t3.86096e-59\t6\t3\t0\t0\t0.166666666667\t0.548506235587\t-0.2\t0.294117647059\t0.022013\tSYCE1\tENST00000368517\t1\t1\t0\t0\texon_10_135369485_135369551\taAg/aGg\tK147R\t282\tprotein_coding\tmissense_variant\tmissense_variant\tMED\tNone\tNone\tNone\tNone\tNone\tNone\tNone\t239\tNone\t36.0200004578\t2\t0\t8\t0.0\t5.71409988403\t2.30999994278\t2\tNone\tNone\tNone\tNone\tNone\t1\t0.0938372093023\t0.163867453473\t0.117561125634\t1\t1\t0.1844\t0.2698\t0.2188\t0.1997\t0.1093\t0.197284\tNone\tNone\tNone\tNone\t0\tNone\tNone\tNone\tR\tR\tR\tR\tR\tR\tNone\tNone\tNone\tNone\tNone\t0.487112\t1\t0.134\t0.134286610119\t0.184985563041\t0.164938655607\t0.256026889198\t0.122313048744\t0.0919761054243\t0.113686534216\t0.194096927001\t13825\t1225\t121196\t0.2698\t['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']\t[0 0 1 0 0 1 0 0 1]\t[False False False False False False False False False]\t[38 29 22 38 29 21 38 29 24]\t[1 0 0 1 0 0 1 0 0]\t[37 29 22 37 29 21 37 29 24]\t[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]\tNone\t[940 899 729 940 899 729 940 899 729]\t[87 78 66 87 78 66 87 78 66]\t[0 0 0 0 0 0 0 0 0]\t3\t3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)\tT/T,T/T,T/C\t3_kid\t3\n+chr10\t135369531\t135369532\tNone\t5\t6\tT\tC\t122.620002747\tNone\tsnp\tts\t1.0\t1\trs3747881,rs386585367\tNone\tNone\tNone\tNone\tNone\t1\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tchr10q26.3\tNone\t0\t0\t1\tNone\t3.86096e-59\t6\t3\t0\t0\t0.166666666667\t0.548506235587\t-0.2\t0.294117647059\t0.022013\tSYCE1\tENST00000368517\t1\t1\t0\t0\texon_10_135369485_135369551\taAg/aGg\tK147R\t282\tprotein_coding\tmissense_variant\tmissense_variant\tMED\tNone\tNone\tNone\tNone\tNone\tNone\tNone\t239\tNone\t36.0200004578\t2\t0\t8\t0.0\t5.71409988403\t2.30999994278\t2\tNone\tNone\tNone\tNone\tNone\t1\t0.0938372093023\t0.163867453473\t0.117561125634\t1\t1\t0.1844\t0.2698\t0.2188\t0.1997\t0.1093\t0.197284\tNone\tNone\tNone\tNone\t0\tNone\tNone\tNone\tR\tR\tR\tR\tR\tR\tNone\tNone\tNone\tNone\tNone\t0.487112\t1\t0.134\t0.134286610119\t0.184985563041\t0.164938655607\t0.256026889198\t0.122313048744\t0.0919761054243\t0.113686534216\t0.194096927001\t13825\t1225\t121196\t0.2698\t['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']\t[0 0 1 0 0 1 0 0 1]\t[False False False False False False False False False]\t[38 29 22 38 29 21 38 29 24]\t[1 0 0 1 0 0 1 0 0]\t[37 29 22 37 29 21 37 29 24]\t[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]\tNone\t[940 899 729 940 899 729 940 899 729]\t[87 78 66 87 78 66 87 78 66]\t[0 0 0 0 0 0 0 0 0]\t2\t2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)\tT/T,T/T,T/C\t2_kid\t3\n+chr10\t1142207\t1142208\tNone\t1\t4\tT\tC\t3404.30004883\tNone\tsnp\tts\t1.0\t1\trs10794716\tNone\tNone\tNone\tNone\tNone\t1\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tchr10p15.3\tNone\t0\t0\t0\tNone\tNone\t7\t1\t1\t0\t0.166666666667\t0.0718606383197\t0.6\t0.294117647059\t0.200924\tWDR37\tENST00000381329\t1\t1\t0\t1\texon_10_1142110_1142566\tTga/Cga\t*250R\t249\tprotein_coding\tstop_lost\tstop_lost\tHIGH\tNone\tNone\tNone\tNone\tNone\tNone\tNone\t122\tNone\t36.0\t0\t0\t8\t0.0\t2.67470002174\t27.8999996185\t8\tNone\tNone\tNone\tNone\tNone\t1\t0.999534883721\t0.975034044485\t0.991234814701\t0\t1\t0.9942\t1\t1\t0.9561\t1\t0.98762\tNone\tNone\tNone\tNone\t0\tNone\t2\tOsteobl;Progfib\tT\tT\tT\tT\tT\tT\tNone\tNone\tNone\tNone\tNone\t0.156188\t1\t0.997\t0.997067786838\t0.970305592927\t0.998358956642\t1\t1\t0.999595432887\t0.998898678414\t1\t346\t60354\t121410\t1.0\t['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/T' 'T/T' 'T/T' 'C/C']\t[0 0 1 0 0 0 0 0 3]\t[False False False False False False False False False]\t[38 29 23 38 29 22 38 29 24]\t[1 0 0 1 0 0 1 0 0]\t[37 29 23 37 29 22 37 29 24]\t[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]\tNone\t[940 899 729 940 899 729 940 899 729]\t[87 78 66 87 78 66 87 78 66]\t[0 0 0 0 0 0 0 0 0]\t1\t1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)\tT/T,T/T,T/C\t1_kid\t1\n"
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_dump_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_dump_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,97 @@
+chrom start end ref alt type sub_type aaf in_dbsnp gene sample genotype
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 child_1 G/A
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 child_2 G/G
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 dad_2 G/A
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 mom_2 G/G
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 dad_1 G/G
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 mom_1 G/A
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 child_3 G/A
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 dad_3 G/A
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 mom_3 G/A
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 child_4 G/A
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 dad_4 G/A
+chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 mom_4 G/A
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 child_1 A/A
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 child_2 A/G
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 dad_2 A/A
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 mom_2 A/G
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 dad_1 A/A
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 mom_1 A/A
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 child_3 A/A
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 dad_3 A/A
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 mom_3 A/G
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 child_4 A/G
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 dad_4 A/A
+chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 mom_4 A/A
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P child_1 TTCT/TTCT
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P child_2 TTCT/TTCT
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P dad_2 TTCT/TTCT
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P mom_2 TTCT/TTCT
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P dad_1 TTCT/TTCT
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P mom_1 TTCT/TTCT
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P child_3 TTCT/TTCT
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P dad_3 TTCT/TTCT
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P mom_3 TTCT/TTCT
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P child_4 TTCT/T
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P dad_4 TTCT/T
+chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P mom_4 TTCT/TTCT
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 child_1 G/G
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 child_2 G/G
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 dad_2 G/G
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 mom_2 G/G
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 dad_1 G/G
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 mom_1 G/G
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 child_3 G/A
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 dad_3 G/G
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 mom_3 G/G
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 child_4 G/G
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 dad_4 G/G
+chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 mom_4 G/G
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 child_1 G/C
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 child_2 G/G
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 dad_2 G/G
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 mom_2 G/C
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 dad_1 G/G
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 mom_1 G/G
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 child_3 G/G
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 dad_3 G/C
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 mom_3 G/G
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 child_4 G/C
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 dad_4 G/G
+chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 mom_4 G/G
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 child_1 A/A
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 child_2 A/A
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 dad_2 A/A
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 mom_2 A/A
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 dad_1 A/A
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 mom_1 A/A
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 child_3 A/A
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 dad_3 A/A
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 mom_3 A/A
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 child_4 A/G
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 dad_4 A/G
+chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 mom_4 A/G
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P child_1 C/C
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P child_2 C/C
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P dad_2 C/C
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P mom_2 C/C
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P dad_1 C/A
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P mom_1 C/C
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P child_3 C/A
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P dad_3 C/C
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P mom_3 C/C
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P child_4 C/A
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P dad_4 C/A
+chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P mom_4 C/A
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 child_1 A/A
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 child_2 A/A
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 dad_2 A/A
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 mom_2 A/A
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 dad_1 A/G
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 mom_1 A/G
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 child_3 A/G
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 dad_3 A/G
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 mom_3 A/G
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 child_4 A/G
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 dad_4 A/G
+chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 mom_4 A/G
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_gene_wise_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_gene_wise_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,6 @@
+chrom start end gene impact impact_severity max_aaf_all variant_filters n_gene_variants gene_filters
+chr10 48003991 48003992 ASAH2C missense_variant MED 0.543088975937 1 1 1
+chr10 126678091 126678092 CTBP2 stop_gained HIGH 0.0904917363803 1 1 1
+chr10 135369531 135369532 SYCE1 missense_variant MED 0.2698 1 1 1
+chr10 1142207 1142208 WDR37 stop_lost HIGH 1.0 1 1 1
+chr16 72057434 72057435 DHODH missense_variant MED 0.000432002764818 1 1 1
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_interactions_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_interactions_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,25 @@
+sample gene order_of_interaction interacting_gene
+M10475 CTBP2 0_order: none
+M10475 CTBP2 1_order: none
+M10475 CTBP2 2_order: none
+M10475 CTBP2 3_order: none
+M10475 CTBP2 4_order: WDR37
+M10475 CTBP2 5_order: none
+M128215 CTBP2 0_order: CTBP2
+M128215 CTBP2 1_order: none
+M128215 CTBP2 2_order: none
+M128215 CTBP2 3_order: none
+M128215 CTBP2 4_order: WDR37
+M128215 CTBP2 5_order: none
+M10478 CTBP2 0_order: none
+M10478 CTBP2 1_order: none
+M10478 CTBP2 2_order: none
+M10478 CTBP2 3_order: none
+M10478 CTBP2 4_order: WDR37
+M10478 CTBP2 5_order: MTG1
+M10500 CTBP2 0_order: none
+M10500 CTBP2 1_order: none
+M10500 CTBP2 2_order: none
+M10500 CTBP2 3_order: none
+M10500 CTBP2 4_order: WDR37
+M10500 CTBP2 5_order: MTG1
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_is_somatic_result.db
b
Binary file test-data/gemini_is_somatic_result.db has changed
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_load_input.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_load_input.vcf Thu Feb 18 08:55:06 2016 -0500
[
b'@@ -0,0 +1,70 @@\n+##fileformat=VCFv4.1\n+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">\n+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">\n+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">\n+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">\n+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">\n+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">\n+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">\n+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">\n+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">\n+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">\n+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">\n+##ALT=<ID=DEL,Description="Deletion">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">\n+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">\n+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">\n+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">\n+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">\n+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">\n+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">\n+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">\n+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">\n+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">\n+##reference=GRCh37\n+##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani"\n+##SnpEffCmd="SnpEff  GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz "\n+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: \'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )\' ">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n+1\t10583\trs58108140\tG\tA\t100.0\tPASS\tAVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER||'..b'=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54421\trs146477069\tA\tG\t100.0\tPASS\tERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54490\trs141149254\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54676\trs2462492\tC\tT\t100.0\tPASS\tLDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54753\trs143174675\tT\tG\t100.0\tPASS\tAA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55164\trs3091274\tC\tA\t100.0\tPASS\tAN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55249\t.\tC\tCTATGG\t443.0\tPASS\tAA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55299\trs10399749\tC\tT\t100.0\tPASS\tRSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55313\trs182462964\tA\tT\t100.0\tPASS\tERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55326\trs3107975\tT\tC\t100.0\tPASS\tAA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55330\trs185215913\tG\tA\t100.0\tPASS\tERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55367\trs190850374\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55388\trs182711216\tC\tT\t100.0\tPASS\tTHETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55394\trs2949420\tT\tA\t100.0\tPASS\tAC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55416\trs193242050\tG\tA\t100.0\tPASS\tAA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55427\trs183189405\tT\tC\t100.0\tPASS\tTHETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55816\trs187434873\tG\tA\t100.0\tPASS\tAN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55850\trs191890754\tC\tG\t100.0\tPASS\tAVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55852\trs184233019\tG\tC\t100.0\tPASS\tTHETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n'
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_load_result.db
b
Binary file test-data/gemini_load_result.db has changed
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_lofsieve_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_lofsieve_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,11 @@
+chrom start end ref alt highest_impact aa_change var_trans_pos trans_aa_length var_trans_pct sample genotype gene transcript trans_type
+chr10 1142207 1142208 T C stop_lost */R 250 250/249 1.00401606426 M10475 C/C WDR37 ENST00000381329 protein_coding
+chr10 1142207 1142208 T C stop_lost */R 250 250/249 1.00401606426 M10478 C/C WDR37 ENST00000381329 protein_coding
+chr10 1142207 1142208 T C stop_lost */R 250 250/249 1.00401606426 M10500 C/C WDR37 ENST00000381329 protein_coding
+chr10 1142207 1142208 T C stop_lost */R 250 250/249 1.00401606426 M128215 C/C WDR37 ENST00000381329 protein_coding
+chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000531469 protein_coding
+chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000309035 protein_coding
+chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000494626 protein_coding
+chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000337195 protein_coding
+chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000334808 protein_coding
+chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000411419 protein_coding
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_mendel_errors_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_mendel_errors_result.tabular Thu Feb 18 08:55:06 2016 -0500
[
b"@@ -0,0 +1,10 @@\n+chrom\tstart\tend\tvcf_id\tvariant_id\tanno_id\tref\talt\tqual\tfilter\ttype\tsub_type\tcall_rate\tin_dbsnp\trs_ids\tsv_cipos_start_left\tsv_cipos_end_left\tsv_cipos_start_right\tsv_cipos_end_right\tsv_length\tsv_is_precise\tsv_tool\tsv_evidence_type\tsv_event_id\tsv_mate_id\tsv_strand\tin_omim\tclinvar_sig\tclinvar_disease_name\tclinvar_dbsource\tclinvar_dbsource_id\tclinvar_origin\tclinvar_dsdb\tclinvar_dsdbid\tclinvar_disease_acc\tclinvar_in_locus_spec_db\tclinvar_on_diag_assay\tclinvar_causal_allele\tpfam_domain\tcyto_band\trmsk\tin_cpg_island\tin_segdup\tis_conserved\tgerp_bp_score\tgerp_element_pval\tnum_hom_ref\tnum_het\tnum_hom_alt\tnum_unknown\taaf\thwe\tinbreeding_coeff\tpi\trecomb_rate\tgene\ttranscript\tis_exonic\tis_coding\tis_splicing\tis_lof\texon\tcodon_change\taa_change\taa_length\tbiotype\timpact\timpact_so\timpact_severity\tpolyphen_pred\tpolyphen_score\tsift_pred\tsift_score\tanc_allele\trms_bq\tcigar\tdepth\tstrand_bias\trms_map_qual\tin_hom_run\tnum_mapq_zero\tnum_alleles\tnum_reads_w_dels\thaplotype_score\tqual_depth\tallele_count\tallele_bal\tin_hm2\tin_hm3\tis_somatic\tsomatic_score\tin_esp\taaf_esp_ea\taaf_esp_aa\taaf_esp_all\texome_chip\tin_1kg\taaf_1kg_amr\taaf_1kg_eas\taaf_1kg_sas\taaf_1kg_afr\taaf_1kg_eur\taaf_1kg_all\tgrc\tgms_illumina\tgms_solid\tgms_iontorrent\tin_cse\tencode_tfbs\tencode_dnaseI_cell_count\tencode_dnaseI_cell_list\tencode_consensus_gm12878\tencode_consensus_h1hesc\tencode_consensus_helas3\tencode_consensus_hepg2\tencode_consensus_huvec\tencode_consensus_k562\tvista_enhancers\tcosmic_ids\tinfo\tcadd_raw\tcadd_scaled\tfitcons\tin_exac\taaf_exac_all\taaf_adj_exac_all\taaf_adj_exac_afr\taaf_adj_exac_amr\taaf_adj_exac_eas\taaf_adj_exac_fin\taaf_adj_exac_nfe\taaf_adj_exac_oth\taaf_adj_exac_sas\texac_num_het\texac_num_hom_alt\texac_num_chroms\tmax_aaf_all\tgts\tgt_types\tgt_phases\tgt_depths\tgt_ref_depths\tgt_alt_depths\tgt_quals\tgt_copy_numbers\tgt_phred_ll_homref\tgt_phred_ll_het\tgt_phred_ll_homalt\tfamily_id\tfamily_members\tfamily_genotypes\tsamples\tfamily_count\tviolation\tviolation_prob\n+chr10\t1142207\t1142208\tNone\t1\t4\tT\tC\t3404.30004883\tNone\tsnp\tts\t1.0\t1\trs10794716\tNone\tNone\tNone\tNone\tNone\t1\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tchr10p15.3\tNone\t0\t0\t0\tNone\tNone\t7\t1\t1\t0\t0.166666666667\t0.0718606383197\t0.6\t0.294117647059\t0.200924\tWDR37\tENST00000381329\t1\t1\t0\t1\texon_10_1142110_1142566\tTga/Cga\t*250R\t249\tprotein_coding\tstop_lost\tstop_lost\tHIGH\tNone\tNone\tNone\tNone\tNone\tNone\tNone\t122\tNone\t36.0\t0\t0\t8\t0.0\t2.67470002174\t27.8999996185\t8\tNone\tNone\tNone\tNone\tNone\t1\t0.999534883721\t0.975034044485\t0.991234814701\t0\t1\t0.9942\t1\t1\t0.9561\t1\t0.98762\tNone\tNone\tNone\tNone\t0\tNone\t2\tOsteobl;Progfib\tT\tT\tT\tT\tT\tT\tNone\tNone\tNone\tNone\tNone\t0.156188\t1\t0.997\t0.997067786838\t0.970305592927\t0.998358956642\t1\t1\t0.999595432887\t0.998898678414\t1\t346\t60354\t121410\t1.0\t['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/T' 'T/T' 'T/T' 'C/C']\t[0 0 1 0 0 0 0 0 3]\t[False False False False False False False False False]\t[38 29 23 38 29 22 38 29 24]\t[1 0 0 1 0 0 1 0 0]\t[37 29 23 37 29 22 37 29 24]\t[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]\tNone\t[940 899 729 940 899 729 940 899 729]\t[87 78 66 87 78 66 87 78 66]\t[0 0 0 0 0 0 0 0 0]\t1\t1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)\tT/T,T/T,T/C\t1_kid\t2\tplausible de novo;implausible de novo\t0.00000\n+chr10\t1142207\t1142208\tNone\t1\t4\tT\tC\t3404.30004883\tNone\tsnp\tts\t1.0\t1\trs10794716\tNone\tNone\tNone\tNone\tNone\t1\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tchr10p15.3\tNone\t0\t0\t0\tNone\tNone\t7\t1\t1\t0\t0.166666666667\t0.0718606383197\t0.6\t0.294117647059\t0.200924\tWDR37\tENST00000381329\t1\t1\t0\t1\texon_10_1142110_1142566\tTga/Cga\t*250R\t249\tprotein_coding\tstop_lost\tstop_lost\tHIGH\tNone\tNone\tNone\tNone\tNone\tNone\tNone\t122\tNone\t36.0\t0\t0\t8\t0.0\t2.67470002174\t27.8999996185\t8\tNone\tNone\tNone\tNone\tNone\t1\t0.999534883721\t0.975034044485\t0.991234814701\t0\t1\t0.9942\t1\t1\t0.9561\t1\t0.98762\tNone\tNone\tNone\tNone\t0\tNone\t2\tOsteobl;Progfib\tT\tT\tT\tT\tT\tT\tNone\tNone\tNone\tNone\tNone\t0.156188\t1\t0.997\t0"..b"e_variant\tMED\tNone\tNone\tNone\tNone\tNone\tNone\tNone\t239\tNone\t36.0200004578\t2\t0\t8\t0.0\t5.71409988403\t2.30999994278\t2\tNone\tNone\tNone\tNone\tNone\t1\t0.0938372093023\t0.163867453473\t0.117561125634\t1\t1\t0.1844\t0.2698\t0.2188\t0.1997\t0.1093\t0.197284\tNone\tNone\tNone\tNone\t0\tNone\tNone\tNone\tR\tR\tR\tR\tR\tR\tNone\tNone\tNone\tNone\tNone\t0.487112\t1\t0.134\t0.134286610119\t0.184985563041\t0.164938655607\t0.256026889198\t0.122313048744\t0.0919761054243\t0.113686534216\t0.194096927001\t13825\t1225\t121196\t0.2698\t['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']\t[0 0 1 0 0 1 0 0 1]\t[False False False False False False False False False]\t[38 29 22 38 29 21 38 29 24]\t[1 0 0 1 0 0 1 0 0]\t[37 29 22 37 29 21 37 29 24]\t[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]\tNone\t[940 899 729 940 899 729 940 899 729]\t[87 78 66 87 78 66 87 78 66]\t[0 0 0 0 0 0 0 0 0]\t1\t1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected)\tT/T,T/T,T/C\t1_kid\t3\tplausible de novo;plausible de novo;plausible de novo\t0.00000\n+chr10\t135369531\t135369532\tNone\t5\t6\tT\tC\t122.620002747\tNone\tsnp\tts\t1.0\t1\trs3747881,rs386585367\tNone\tNone\tNone\tNone\tNone\t1\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tchr10q26.3\tNone\t0\t0\t1\tNone\t3.86096e-59\t6\t3\t0\t0\t0.166666666667\t0.548506235587\t-0.2\t0.294117647059\t0.022013\tSYCE1\tENST00000368517\t1\t1\t0\t0\texon_10_135369485_135369551\taAg/aGg\tK147R\t282\tprotein_coding\tmissense_variant\tmissense_variant\tMED\tNone\tNone\tNone\tNone\tNone\tNone\tNone\t239\tNone\t36.0200004578\t2\t0\t8\t0.0\t5.71409988403\t2.30999994278\t2\tNone\tNone\tNone\tNone\tNone\t1\t0.0938372093023\t0.163867453473\t0.117561125634\t1\t1\t0.1844\t0.2698\t0.2188\t0.1997\t0.1093\t0.197284\tNone\tNone\tNone\tNone\t0\tNone\tNone\tNone\tR\tR\tR\tR\tR\tR\tNone\tNone\tNone\tNone\tNone\t0.487112\t1\t0.134\t0.134286610119\t0.184985563041\t0.164938655607\t0.256026889198\t0.122313048744\t0.0919761054243\t0.113686534216\t0.194096927001\t13825\t1225\t121196\t0.2698\t['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']\t[0 0 1 0 0 1 0 0 1]\t[False False False False False False False False False]\t[38 29 22 38 29 21 38 29 24]\t[1 0 0 1 0 0 1 0 0]\t[37 29 22 37 29 21 37 29 24]\t[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]\tNone\t[940 899 729 940 899 729 940 899 729]\t[87 78 66 87 78 66 87 78 66]\t[0 0 0 0 0 0 0 0 0]\t3\t3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected)\tT/T,T/T,T/C\t3_kid\t3\tplausible de novo;plausible de novo;plausible de novo\t0.00000\n+chr10\t135369531\t135369532\tNone\t5\t6\tT\tC\t122.620002747\tNone\tsnp\tts\t1.0\t1\trs3747881,rs386585367\tNone\tNone\tNone\tNone\tNone\t1\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tNone\tchr10q26.3\tNone\t0\t0\t1\tNone\t3.86096e-59\t6\t3\t0\t0\t0.166666666667\t0.548506235587\t-0.2\t0.294117647059\t0.022013\tSYCE1\tENST00000368517\t1\t1\t0\t0\texon_10_135369485_135369551\taAg/aGg\tK147R\t282\tprotein_coding\tmissense_variant\tmissense_variant\tMED\tNone\tNone\tNone\tNone\tNone\tNone\tNone\t239\tNone\t36.0200004578\t2\t0\t8\t0.0\t5.71409988403\t2.30999994278\t2\tNone\tNone\tNone\tNone\tNone\t1\t0.0938372093023\t0.163867453473\t0.117561125634\t1\t1\t0.1844\t0.2698\t0.2188\t0.1997\t0.1093\t0.197284\tNone\tNone\tNone\tNone\t0\tNone\tNone\tNone\tR\tR\tR\tR\tR\tR\tNone\tNone\tNone\tNone\tNone\t0.487112\t1\t0.134\t0.134286610119\t0.184985563041\t0.164938655607\t0.256026889198\t0.122313048744\t0.0919761054243\t0.113686534216\t0.194096927001\t13825\t1225\t121196\t0.2698\t['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C']\t[0 0 1 0 0 1 0 0 1]\t[False False False False False False False False False]\t[38 29 22 38 29 21 38 29 24]\t[1 0 0 1 0 0 1 0 0]\t[37 29 22 37 29 21 37 29 24]\t[ 87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939  87.16000366  78.19999695  66.13999939]\tNone\t[940 899 729 940 899 729 940 899 729]\t[87 78 66 87 78 66 87 78 66]\t[0 0 0 0 0 0 0 0 0]\t2\t2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected)\tT/T,T/T,T/C\t2_kid\t3\tplausible de novo;plausible de novo;plausible de novo\t0.00000\n"
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_pathways_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_pathways_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,17 @@
+chrom start end ref alt impact sample genotype gene transcript pathway
+chr10 52004314 52004315 T C intron_variant M10500 C/C ASAH2 ENST00000329428 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10 52004314 52004315 T C intron_variant M128215 C/C ASAH2 ENST00000329428 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10 52004314 52004315 T C intron_variant M10500 C/C ASAH2 ENST00000447815 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10 52004314 52004315 T C intron_variant M128215 C/C ASAH2 ENST00000447815 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10 52004314 52004315 T C intron_variant M10500 C/C ASAH2 ENST00000395526 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10 52004314 52004315 T C intron_variant M128215 C/C ASAH2 ENST00000395526 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways
+chr10 126678091 126678092 G A stop_gained M128215 G/A CTBP2 ENST00000531469 hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10 126678091 126678092 G A stop_gained M128215 G/A CTBP2 ENST00000309035 hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10 126678091 126678092 G A stop_gained M128215 G/A CTBP2 ENST00000494626 hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10 126678091 126678092 G A stop_gained M128215 G/A CTBP2 ENST00000337195 hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10 126678091 126678092 G A stop_gained M128215 G/A CTBP2 ENST00000411419 hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer
+chr10 135336655 135336656 G A intron_variant M10478 A/A CYP2E1 ENST00000463117 hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr10 135336655 135336656 G A intron_variant M128215 A/A CYP2E1 ENST00000463117 hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr10 135336655 135336656 G A upstream_gene_variant M10478 A/A CYP2E1 ENST00000252945 hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr10 135336655 135336656 G A upstream_gene_variant M128215 A/A CYP2E1 ENST00000252945 hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism
+chr16 72057434 72057435 C T missense_variant M10475 C/T DHODH ENST00000219240 hsa01100:Metabolic_pathways,hsa00240:Pyrimidine_metabolism
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_qc_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_qc_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,5 @@
+sample sex chrX_homref chrX_het chrX_homalt chrX_unknown
+M10475 male 0 0 0 0
+M10478 female 0 0 0 0
+M10500 female 0 0 0 0
+M128215 male 0 0 0 0
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_query_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_query_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,10 @@
+10582
+10610
+13301
+13326
+13956
+13979
+30922
+46401
+47189
+51475
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_region_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_region_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,1 @@
+chr10 48003991 48003992 None 2 1 C T 1047.86999512 None snp ts 1.0 1 rs142685947,rs3739968 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q11.22 None 0 1 1 None 3.10871e-42 1 2 1 0 0.5 1 0 0.571428571429 1.718591 ASAH2C ENST00000420079 1 1 0 0 16/17 tGt/tAt C/Y 542/612 protein_coding missense_variant missense_variant MED benign 0.0 tolerated 1.0 None None None 165 None 20.9400005341 0 0 8 0.0 4.382999897 9.52999973297 4 None None None None None 0 None None None 0 1 0.3112 0.4573 0.3855 0.1241 0.5149 0.346645 grc_fix 73.3 40.3 92.8 0 None None None R R R R R R None None None None None 0.553676 1 0.443 0.448537771896 0.288974151858 0.281426746944 0.543088975937 0.524984286612 0.478147713207 0.463529411765 0.418641164716 17495 15317 107302 1 0.543088975937
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_roh_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_roh_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,7 @@
+chrom start end sample num_of_snps density_per_kb run_length_in_bp
+chr10 1142208 135369532 M10475 5 0.0 134227324
+chr10 1142208 135210791 M10500 5 0.0001 134068583
+chr10 1142208 135210791 M10478 5 0.0001 134068583
+chr10 1142208 135336656 M10478 4 0.0 134194448
+chr10 1142208 135336656 M128215 6 0.0001 134194448
+chr10 1142208 135369532 M128215 5 0.0 134227324
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_stats_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_stats_result.tabular Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,5 @@
+sample total
+M10475 3
+M10478 6
+M10500 6
+M128215 4
b
diff -r 000000000000 -r 269c40fdcccb test-data/gemini_windower_input.db
b
Binary file test-data/gemini_windower_input.db has changed
b
diff -r 000000000000 -r 269c40fdcccb tool-data/gemini_databases.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gemini_databases.loc.sample Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,3 @@
+## GEMINI databases
+#Version dbkey Description Path
+#08_08_2014 hg19 Database (08-08-2014) /path/to/data
b
diff -r 000000000000 -r 269c40fdcccb tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,7 @@
+<tables>
+    <table name="gemini_databases" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gemini_databases.loc" />
+    </table>
+</tables>
+
b
diff -r 000000000000 -r 269c40fdcccb tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu Feb 18 08:55:06 2016 -0500
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="gemini" version="0.18.1">
+        <repository changeset_revision="be869e11582f" name="package_gemini_0_18_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>