Mercurial > repos > iuc > gemini_annotate
changeset 0:685b3408c181 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gemini_annotate.xml Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,215 @@ +<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> + <description>adding your own custom annotations</description> + <macros> + <import>gemini_macros.xml</import> + <token name="@BINARY@">annotate</token> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> + <command> +<![CDATA[ + + bgzip -c "$annotate_source" > tabixed.gz && + tabix -p bed tabixed.gz && + + gemini @BINARY@ + -f tabixed.gz + -c $column_name + -a $a.a_selector + #if $a.a_selector == 'extract': + -t $a.column_type + -e $a.column_extracts + -o $a.operation + #end if + $region_only + "${ infile }" + > "${ outfile }" +]]> + + </command> + <inputs> + <expand macro="infile" /> + <param name="annotate_source" type="data" format="bed" label="File containing the annotations in BED format" help="(-f)"/> + + <param name="column_name" type="text" value="" + label="The name of the column to be added to the variant table" + help=" If the input file is a VCF, then this is the name of the info field to pull. (-c)"> + <sanitizer invalid_char=" "> + <valid initial="string.letters,string.digits"> + <add value="_" /> + </valid> + </sanitizer> + </param> + <conditional name="a"> + <param name="a_selector" type="select" label="How should the annotation file be used?" help="(-a)"> + <option value="boolean">Did a variant overlap a region or not? (boolean)</option> + <option value="count">How many regions did a variant overlap? (count)</option> + <option value="extract" selected="True">Extract specific values from a BED file. (extract)</option> + </param> + <when value="extract"> + + <param name="column_extracts" label="Column to extract information from for list annotations" + type="data_column" data_ref="annotate_source" force_select="true" help="(-e)"/> + + + <param name="column_type" type="select" label="What data type(s) should be used to represent the new values in the database?" + help="(-t)"> + <option value="float">Decimal precision number (float)</option> + <option value="integer">Integer number (integer)</option> + <option value="text">Text columns such as “valid”, “yes” (text)</option> + </param> + + <param name="operation" type="select" label="Operation to apply to the extract column values ..." + help="in the event that a variant overlaps multiple annotations in your annotation file. (-o)"> + <option value="mean">Compute the average of the (numeric) values</option> + <option value="sum">Compute the sum of the (numeric) values</option> + <option value="median">Compute the median of the (numeric) values</option> + <option value="min">Compute the minimum of the (numeric) values</option> + <option value="max">Compute the maximum of the (numeric) values</option> + <option value="mode">Compute the maximum of the (numeric) values</option> + <option value="first">Use the value from the first record in the annotation file</option> + <option value="last">Use the value from the last record in the annotation file</option> + <option value="list">Create a comma-separated list of the observed (text) values</option> + <option value="uniq_list">Create a comma-separated list of non-redundant observed (text) values</option> + </param> + + </when> + <when value="boolean"/> + <when value="count"/> + </conditional> + <param name="region_only" argument="--region-only" type="boolean" checked="false" + truevalue="--region-only" falsevalue="" + label="If set, only region coordinates will be considered when annotating variants." + help="The default is to annotate using region coordinates as well as REF and ALT + variant values. This option is only valid if annotation is a VCF file"/> + </inputs> + <outputs> + <data name="outfile" format="tabular" /> + </outputs> + <tests> + <test> + <param name="infile" value="gemini_annotate_input.db" ftype="gemini.sqlite" /> + <param name="annotate_source" value="anno.bed" /> + <param name="a_selector" value="count" /> + <param name="column_name" value="anno5" /> + <output name="outfile" file="gemini_annotate_result.tabular" /> + </test> + </tests> + <help><![CDATA[ +**What it does** + +It is inevitable that researchers will want to enhance the GEMINI framework with their own, custom annotations. GEMINI provides a sub-command called annotate for exactly this purpose. + +**Details** + +It is inevitable that researchers will want to enhance the GEMINI framework with their own, custom annotations. GEMINI provides a sub-command called annotate for exactly this purpose. As long as you provide a tabix‘ed annotation file in BED or VCF format, the annotate tool will, for each variant in the variants table, screen for overlaps in your annotation file and update a one or more new column in the variants table that you may specify on the command line. This is best illustrated by a following **example**. + +**Input files** + +Let’s assume you have already created a GEMINI database of a **VCF file** using the *load module*. + +Now, let’s imagine you have an annotated file in **BED format** (important.bed) that describes regions of the genome that are particularly relevant to your lab’s research. You would like to annotate in the GEMINI database which variants overlap these crucial regions. We want to store this knowledge in a new column in the variants table called important_variant that tracks whether a given variant overlapped (1) or did not overlap (0) intervals in your annotation file. + + *To do this, you must first TABIX your BED file* + +**-a boolean - Did a variant overlap a region or not?** + +Now, you can use this *TABIX*’ed file to annotate which variants overlap your important regions. In the example below, the results will be stored in a new column called “important”. The **-t boolean** option says that you just want to track whether (1) or not (0) the variant overlapped one or more of your regions. + +Since a new columns has been created in the database, we can now directly query the new column. In the example results below, the first and third variants overlapped a crucial region while the second did not:: + + chr22 100 101 1 1 + chr22 200 201 2 0 + chr22 300 500 3 1 + +**-a count - How many regions did a variant overlap?** + +Instead of a simple yes or no, we can use the **-t count** option to count how many important regions a variant overlapped. It turns out that the 3rd variant actually overlapped two important regions:: + + chr22 100 101 1 1 + chr22 200 201 2 0 + chr22 300 500 3 2 + +**-a extract - Extract specific values from a BED file** + +Lastly, we may also extract values from specific fields in a BED file (or from the INFO field in a VCF) and populate one or more new columns in the database based on overlaps with the annotation file and the values of the fields therein. To do this, we use the **-a extract** option. + +This is best described with an example. To set this up, let’s imagine that we have a VCF file from a different experiment and we want to annotate the variants in our GEMINI database with the allele frequency and depth tags from the INFO fields for the same variants in this other VCF file. + +Now that we have a proper *TABIX*’ed VCF file, we can use the **-a extract** option to populate new columns in the GEMINI database. In order to do so, we must specify: + + 1) its type (e.g., text, int, float,) (**-t**) + 2) the field in the INFO column of the VCF file that we should use to extract data with which to populate the new column (**-e**) + 3) what operation should be used to summarize the data in the event of multiple overlaps in the annotation file (**-o**) + 4) (optionally) the name of the column we want to add (**-c**), if this is not specified, it will use the value from **-e**. + +For example, let’s imagine we want to create a new column called “other_allele_freq” (**-c**) using the AF field in our VCF file to populate it. + +This create a new column in my.db called other_allele_freq and this new column will be a FLOAT (**-t float**). In the event of multiple records in the VCF file overlapping a variant in the database, the average (**-o mean**) of the allele frequencies values from the VCF file will be used. + +At this point, one can query the database based on the values of the new other_allele_freq column (using **GEMINI query**). + +**-t TYPE - Specifying the column type(s) when using -a extract** + +The annotate tool will create three different types of columns via the **-t** option: + + 1) Floating point columns for annotations with decimal precision as above (-t float) + 2) Integer columns for integral annotations (-t integer) + 3) Text columns for string columns such as “valid”, “yes”, etc. (-t text) + + *The -t option is only valid when using the -a extract option.* + +**-o OPERATION - Specifying the summary operations when using -a extract** + +In the event of multiple overlaps between a variant and records in the annotation file, the annotate tool can summarize the values observed with multiple options: + + - -o mean Compute the average of the values. They must be numeric. + - -o median Compute the median of the values. They must be numeric. + - -o min Compute the minimum of the values. They must be numeric. + - -o max Compute the maximum of the values. They must be numeric. + - -o mode Compute the maximum of the values. They must be numeric. + - -o first Use the value from the first record in the annotation file. + - -o last Use the value from the last record in the annotation file. + - -o list Create a comma-separated list of the observed values. + - -o uniq_list Create a comma-separated list of the distinct observed values. + - -o sum Compute the sum of the values. They must be numeric. + +The -o option is only valid when using the -a extract option. + +**Annotating with VCF** + +Most of the examples to this point have pulled a column from a tabix indexed bed file. It is likewise possible to pull from the INFO field of a tabix index VCF. The syntax is identical but the **-e** operation will specify the names of fields in the INFO column to pull. By default, those names will be used, but that can still be specified with the **-c column**. + +To put a DP column in the db, set: + + -o list, -e DP, -t integer + +... and name it 'depth', set: + + -o list, -e DP, -c depth, -t integer + + +Missing values are allowed since we expect that in some cases an annotation VCF will not have all INFO fields specified for all variants. + +*We recommend decomposing and normalizing variants before annotating. See Step 1. split, left-align, and trim variants for a detailed explanation of how to do this. To do that see the GEMINI* preprocessing_ *website.* + +**Extracting and populating multiple columns at once** + +One can also extract and populate multiple columns at once by providing comma-separated lists (no spaces) of column names (**-c**), types (**-t**), numbers (**-e**), and summary operations (**-o**). For example, recall that in the VCF example above, we created a *TABIX*’ed BED file containg the allele frequency and depth values from the INFO field as the 4th and 5th columns in the BED, respectively. + +Instead of running the annotate tool twice (once for each column), we can run the tool once and load both columns in the same run. For example with settings: + + - -a extract + - -c other_allele_freq,other_depth + - -t float,integer + - -e 4,5 + - -o mean,max + +We can then use each of the new columns to filter variants with a *GEMINI query*: + +.. _preprocessing: https://gemini.readthedocs.org/en/latest/content/preprocessing.html#preprocess + + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gemini_macros.xml Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,146 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="0.18.1">gemini</requirement> + <yield /> + </requirements> + </xml> + + <xml name="version_command"> + <version_command>gemini --version</version_command> + </xml> + + <xml name="stdio"> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + </xml> + + <xml name="annotation_dir"> + <param name="annotation_databases" type="select" optional="True" label="Choose a gemini annotation database"> + <options from_data_table="gemini_databases"> + <filter type="sort_by" column="0" /> + </options> + </param> + </xml> + + <xml name="add_header_column"> + <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" + label="Add a header of column names to the output" help="(--header)"/> + </xml> + + <xml name="radius"> + <param name="radius" type="integer" value="3" label="Set filter for Breadth-first search (BFS) in the Protein-Protein Interaction network" help="(-r)" > + <validator type="in_range" min="0"/> + </param> + </xml> + <xml name="variant_mode"> + <param name="variant_mode" type="boolean" truevalue="--var" falsevalue="" checked="False" + label="Returns variant info (e.g. impact, biotype) for interacting genes" help="(--var)"/> + </xml> + + <xml name="column_filter"> + <conditional name="report"> + <param name="report_selector" type="select" label="Columns to include in the report" + help="By default, this tool reports all columns in the variants table. One may choose to report only a subset of the columns."> + <option value="all" selected="True">all</option> + <option value="column_filter">User given columns</option> + </param> + <when value="all"/> + <when value="column_filter"> + <param name="columns" type="select" display="checkboxes" multiple="True" label="Choose columns to include in the report" help="(--columns)"> + <option value="gene">gene</option> + <option value="chrom">chrom</option> + <option value="start">start</option> + <option value="end">end</option> + <option value="ref">ref</option> + <option value="alt">alt</option> + <option value="impact">impact</option> + <option value="impact_severity">impact_severity</option> + <option value="max_aaf_all">alternative allele frequency</option> + </param> + </when> + </conditional> + </xml> + + <xml name="filter"> + <conditional name="filter"> + <param name="filter_selector" type="select" label="Apply additional constraints" + help="By default, this tool will report all variants regardless of their putative functional impact. In order to apply additional constraints on the variants returned, you can this optional filter."> + <option value="no">No additional constraints</option> + <option value="yes">Apply additional constraints</option> + </param> + <when value="no"/> + <when value="yes"> + <param name="filter" type="text" label="Contraints in SQL syntax" help="Conditions applied here will become WHERE clauses in the query issued to the GEMINI database. E.g. alt='G' or impact_severity = 'HIGH'. (--filter)"> + <expand macro="sanitize_query" /> + </param> + </when> + </conditional> + </xml> + + <xml name="sanitize_query"> + <sanitizer invalid_char=""> + <valid initial="string.printable"/> + </sanitizer> + </xml> + + <token name="@CMDLN_SQL_FILTER_FILTER_OPTION@"> + #if str($filter.filter_selector) == 'yes' and $filter.filter: + #import pipes + --filter ${ pipes.quote( str( $filter.filter ) ) or "''" } + #end if + </token> + + <xml name="family"> + <param name="families" type="text" value="" label="Comma seperated list of families to restrict the analysis to." help="e.g. Family1,Family3 (--families)"/> + </xml> + + <xml name="lenient"> + <param name="lenient" type="boolean" truevalue="--lenient" falsevalue="" checked="False" label="Loosen the restrictions on family structure"/> + </xml> + + <xml name="unaffected"> + <param name="allow_unaffected" type="boolean" truevalue="--allow-unaffected" falsevalue="" checked="False" label="Report candidates that also impact samples labeled as unaffected." help="(--allow-unaffected)"/> + </xml> + + <xml name="min_kindreds"> + <param name="min_kindreds" type="integer" value="1" label="The min. number of kindreds that must have a candidate variant in a gene" help="default: 1 (--min-kindreds)" /> + </xml> + + <xml name="min_sequence_depth"> + <param name="d" type="integer" value="0" min="0" label="The minimum aligned sequence depth (genotype DP) required for each sample" + help="default: 0 (-d)" /> + </xml> + + <xml name="min_gq"> + <param name="min_gq" type="integer" value="0" label="the minimum genotype quality required for each sample in a family" help="default: 0 (--min-gq)"> + <validator type="in_range" min="0"/> + </param> + </xml> + + <xml name="gt_pl_max"> + <param name="gt_pl_max" type="integer" value="-1" min="-1" label="The maximum phred-scaled genotype likelihod (PL) allowed for each sample in a family" help="default: -1 (not set) (--gt-pl-max)" /> + </xml> + <token name="@VERSION@">0.18.1</token> + + <xml name="citations"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1003153</citation> + <yield /> + </citations> + </xml> + + <xml name="infile"> + <param name="infile" type="data" format="gemini.sqlite" label="GEMINI database" help="Only files with version @VERSION@ are accepted." > + <options options_filter_attribute="metadata.gemini_version" > + <filter type="add_value" value="@VERSION@" /> + </options> + <validator type="expression" message="This version of Gemini will only work with Gemini files that are for version @VERSION@.">value is not None and value.metadata.gemini_version == "@VERSION@"</validator> + </param> + </xml> + +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,65 @@ +========================= +Galaxy wrapper for GEMINI +========================= + + +GEMINI: a flexible framework for exploring genome variation + +GEMINI (GEnome MINIng) is designed to be a flexible framework for exploring genetic variation in the context of +the wealth of genome annotations available for the human genome. By placing genetic variants, sample genotypes, +and useful genome annotations into an integrated database framework, GEMINI provides a simple, flexible, yet very +powerful system for exploring genetic variation for for disease and population genetics. + +Using the GEMINI framework begins by loading a VCF file into a database. Each variant is automatically +annotated by comparing it to several genome annotations from source such as ENCODE tracks, UCSC tracks, +OMIM, dbSNP, KEGG, and HPRD. All of this information is stored in portable SQLite database that allows +one to explore and interpret both coding and non-coding variation using “off-the-shelf” tools or an +enhanced SQL engine. + +Please also see the original [manuscript](http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003153). + + +============ +Installation +============ + +It is recommended to install this wrapper via the `Galaxy Tool Shed`. + +.. _`Galaxy Tool Shed`: https://testtoolshed.g2.bx.psu.edu/view/iuc/gemini + + +======= +History +======= +- 0.9.1: Initial public release + + +==================== +Detailed description +==================== + +View the original GEMINI documentation: http://gemini.readthedocs.org/en/latest/index.html + + +=============================== +Wrapper Licence (MIT/BSD style) +=============================== + +Permission to use, copy, modify, and distribute this software and its +documentation with or without modifications and for any purpose and +without fee is hereby granted, provided that any copyright notices +appear in all copies and that both those copyright notices and this +permission notice appear in supporting documentation, and that the +names of the contributors or copyright holders not be used in +advertising or publicity pertaining to distribution of the software +without specific prior permission. + +THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT +OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +OR PERFORMANCE OF THIS SOFTWARE. +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="This requires the GEMINI data manager definition to install all required annotation databases."> + <repository changeset_revision="345412d58d75" name="data_manager_gemini_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/anno.bed Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,3 @@ +chr1 30547 30548 +chr1 30920 30925 +chr1 30922 30923
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_actionable_mutations_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,1 @@ +tum_name chrom start end ref alt gene impact is_somatic in_cosmic_census dgidb_info
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_amend_input.ped Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,5 @@ +#family_id name paternal_id maternal_id sex phenotype ethnicity hair_color +1 M10475 None None 1 1 None brown +1 M10478 M10475 M10500 2 2 None red +1 M10500 None None 2 2 None +1 M128215 M10475 M10500 1 1 None green
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_annotate_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,1 @@ +updated 10 variants
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_autosomal_dominant_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,9 @@ +gene chrom impact variant_id family_id family_members family_genotypes samples family_count +ASAH2C chr10 missense_variant 3 3 3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected) C/T,C/C,C/T 3_dad,3_kid 2 +ASAH2C chr10 missense_variant 3 2 2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected) C/C,C/T,C/T 2_mom,2_kid 2 +ASAH2C chr10 missense_variant 4 3 3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected) C/T,C/C,C/T 3_dad,3_kid 2 +ASAH2C chr10 missense_variant 4 2 2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected) C/C,C/T,C/T 2_mom,2_kid 2 +SPRN chr10 intron_variant 5 3 3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected) G/A,G/G,G/A 3_dad,3_kid 1 +WDR37 chr10 stop_lost 1 3 3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected) T/C,T/T,T/C 3_dad,3_kid 2 +WDR37 chr10 stop_lost 1 2 2_dad(2_dad;unaffected),2_mom(2_mom;affected),2_kid(2_kid;affected) T/T,T/C,T/C 2_mom,2_kid 2 +WDR37 chr10 stop_lost 2 3 3_dad(3_dad;affected),3_mom(3_mom;unknown),3_kid(3_kid;affected) T/C,T/C,T/C 3_dad,3_kid 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_autosomal_recessive.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,2 @@ +gene chrom impact variant_id family_id family_members family_genotypes samples family_count +WDR37 chr10 stop_lost 2 1 1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected) T/C,T/C,C/C 1_kid 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_burden_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,6 @@ +gene T c Z p_value +SYCE1 -0.5 0.25 -1.0 0.00699300699301 +DHODH 0.0 0.0 nan nan +WDR37 -1.0 1.5 -0.816496580928 0.00699300699301 +ASAH2C -0.5 0.75 -0.57735026919 0.00699300699301 +CTBP2 0.0 0.0 nan nan
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_comphets_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,3 @@ +chrom start end ref alt gene impact variant_id family_id family_members family_genotypes samples family_count comp_het_id priority +chr1 17362 17366 TTCT T WASH7P splice_acceptor_variant 3 4 child_4(child_4;affected;male),dad_4(dad_4;unaffected;male),mom_4(mom_4;unaffected;female) TTCT|T,TTCT/T,TTCT/TTCT child_4 1 1_3_7 3 +chr1 17729 17730 C A WASH7P splice_acceptor_variant 7 4 child_4(child_4;affected;male),dad_4(dad_4;unaffected;male),mom_4(mom_4;unaffected;female) C/A,C/A,C/A child_4 1 1_3_7 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_dbinfo_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,209 @@ +table_name column_name type +variants chrom text +variants start integer +variants end integer +variants vcf_id text +variants variant_id integer +variants anno_id integer +variants ref text +variants alt text +variants qual float +variants filter text +variants type text +variants sub_type text +variants gts blob +variants gt_types blob +variants gt_phases blob +variants gt_depths blob +variants gt_ref_depths blob +variants gt_alt_depths blob +variants gt_quals blob +variants gt_copy_numbers blob +variants gt_phred_ll_homref blob +variants gt_phred_ll_het blob +variants gt_phred_ll_homalt blob +variants call_rate float +variants in_dbsnp bool +variants rs_ids text +variants sv_cipos_start_left integer +variants sv_cipos_end_left integer +variants sv_cipos_start_right integer +variants sv_cipos_end_right integer +variants sv_length integer +variants sv_is_precise bool +variants sv_tool text +variants sv_evidence_type text +variants sv_event_id text +variants sv_mate_id text +variants sv_strand text +variants in_omim bool +variants clinvar_sig text +variants clinvar_disease_name text +variants clinvar_dbsource text +variants clinvar_dbsource_id text +variants clinvar_origin text +variants clinvar_dsdb text +variants clinvar_dsdbid text +variants clinvar_disease_acc text +variants clinvar_in_locus_spec_db bool +variants clinvar_on_diag_assay bool +variants clinvar_causal_allele text +variants pfam_domain text +variants cyto_band text +variants rmsk text +variants in_cpg_island bool +variants in_segdup bool +variants is_conserved bool +variants gerp_bp_score float +variants gerp_element_pval float +variants num_hom_ref integer +variants num_het integer +variants num_hom_alt integer +variants num_unknown integer +variants aaf real +variants hwe decimal(2,7) +variants inbreeding_coeff decimal(2,7) +variants pi decimal(2,7) +variants recomb_rate decimal(2,7) +variants gene text +variants transcript text +variants is_exonic bool +variants is_coding bool +variants is_splicing bool +variants is_lof bool +variants exon text +variants codon_change text +variants aa_change text +variants aa_length text +variants biotype text +variants impact text +variants impact_so text +variants impact_severity text +variants polyphen_pred text +variants polyphen_score float +variants sift_pred text +variants sift_score float +variants anc_allele text +variants rms_bq float +variants cigar text +variants depth integer +variants strand_bias float +variants rms_map_qual float +variants in_hom_run integer +variants num_mapq_zero integer +variants num_alleles integer +variants num_reads_w_dels float +variants haplotype_score float +variants qual_depth float +variants allele_count integer +variants allele_bal float +variants in_hm2 bool +variants in_hm3 bool +variants is_somatic bool +variants somatic_score float +variants in_esp bool +variants aaf_esp_ea decimal(2,7) +variants aaf_esp_aa decimal(2,7) +variants aaf_esp_all decimal(2,7) +variants exome_chip bool +variants in_1kg bool +variants aaf_1kg_amr decimal(2,7) +variants aaf_1kg_eas decimal(2,7) +variants aaf_1kg_sas decimal(2,7) +variants aaf_1kg_afr decimal(2,7) +variants aaf_1kg_eur decimal(2,7) +variants aaf_1kg_all decimal(2,7) +variants grc text +variants gms_illumina float +variants gms_solid float +variants gms_iontorrent float +variants in_cse bool +variants encode_tfbs text +variants encode_dnaseI_cell_count integer +variants encode_dnaseI_cell_list text +variants encode_consensus_gm12878 text +variants encode_consensus_h1hesc text +variants encode_consensus_helas3 text +variants encode_consensus_hepg2 text +variants encode_consensus_huvec text +variants encode_consensus_k562 text +variants vista_enhancers text +variants cosmic_ids text +variants info blob +variants cadd_raw float +variants cadd_scaled float +variants fitcons float +variants in_exac bool +variants aaf_exac_all decimal(2,7) +variants aaf_adj_exac_all decimal(2,7) +variants aaf_adj_exac_afr decimal(2,7) +variants aaf_adj_exac_amr decimal(2,7) +variants aaf_adj_exac_eas decimal(2,7) +variants aaf_adj_exac_fin decimal(2,7) +variants aaf_adj_exac_nfe decimal(2,7) +variants aaf_adj_exac_oth decimal(2,7) +variants aaf_adj_exac_sas decimal(2,7) +variants exac_num_het int +variants exac_num_hom_alt int +variants exac_num_chroms int +variants max_aaf_all REAL +variant_impacts variant_id integer +variant_impacts anno_id integer +variant_impacts gene text +variant_impacts transcript text +variant_impacts is_exonic bool +variant_impacts is_coding bool +variant_impacts is_splicing bool +variant_impacts is_lof bool +variant_impacts exon text +variant_impacts codon_change text +variant_impacts aa_change text +variant_impacts aa_length text +variant_impacts biotype text +variant_impacts impact text +variant_impacts impact_so text +variant_impacts impact_severity text +variant_impacts polyphen_pred text +variant_impacts polyphen_score float +variant_impacts sift_pred text +variant_impacts sift_score float +samples sample_id integer +samples family_id text +samples name text +samples paternal_id text +samples maternal_id text +samples sex text +samples phenotype text +samples ethnicity text +gene_detailed uid integer +gene_detailed chrom text +gene_detailed gene text +gene_detailed is_hgnc bool +gene_detailed ensembl_gene_id text +gene_detailed transcript text +gene_detailed biotype text +gene_detailed transcript_status text +gene_detailed ccds_id text +gene_detailed hgnc_id text +gene_detailed entrez_id text +gene_detailed cds_length text +gene_detailed protein_length text +gene_detailed transcript_start text +gene_detailed transcript_end text +gene_detailed strand text +gene_detailed synonym text +gene_detailed rvis_pct float +gene_detailed mam_phenotype_id text +gene_summary uid integer +gene_summary chrom text +gene_summary gene text +gene_summary is_hgnc bool +gene_summary ensembl_gene_id text +gene_summary hgnc_id text +gene_summary transcript_min_start text +gene_summary transcript_max_end text +gene_summary strand text +gene_summary synonym text +gene_summary rvis_pct float +gene_summary mam_phenotype_id text +gene_summary in_cosmic_census bool
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_de_novo_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,9 @@ +chrom start end vcf_id variant_id anno_id ref alt qual filter type sub_type call_rate in_dbsnp rs_ids sv_cipos_start_left sv_cipos_end_left sv_cipos_start_right sv_cipos_end_right sv_length sv_is_precise sv_tool sv_evidence_type sv_event_id sv_mate_id sv_strand in_omim clinvar_sig clinvar_disease_name clinvar_dbsource clinvar_dbsource_id clinvar_origin clinvar_dsdb clinvar_dsdbid clinvar_disease_acc clinvar_in_locus_spec_db clinvar_on_diag_assay clinvar_causal_allele pfam_domain cyto_band rmsk in_cpg_island in_segdup is_conserved gerp_bp_score gerp_element_pval num_hom_ref num_het num_hom_alt num_unknown aaf hwe inbreeding_coeff pi recomb_rate gene transcript is_exonic is_coding is_splicing is_lof exon codon_change aa_change aa_length biotype impact impact_so impact_severity polyphen_pred polyphen_score sift_pred sift_score anc_allele rms_bq cigar depth strand_bias rms_map_qual in_hom_run num_mapq_zero num_alleles num_reads_w_dels haplotype_score qual_depth allele_count allele_bal in_hm2 in_hm3 is_somatic somatic_score in_esp aaf_esp_ea aaf_esp_aa aaf_esp_all exome_chip in_1kg aaf_1kg_amr aaf_1kg_eas aaf_1kg_sas aaf_1kg_afr aaf_1kg_eur aaf_1kg_all grc gms_illumina gms_solid gms_iontorrent in_cse encode_tfbs encode_dnaseI_cell_count encode_dnaseI_cell_list encode_consensus_gm12878 encode_consensus_h1hesc encode_consensus_helas3 encode_consensus_hepg2 encode_consensus_huvec encode_consensus_k562 vista_enhancers cosmic_ids info cadd_raw cadd_scaled fitcons in_exac aaf_exac_all aaf_adj_exac_all aaf_adj_exac_afr aaf_adj_exac_amr aaf_adj_exac_eas aaf_adj_exac_fin aaf_adj_exac_nfe aaf_adj_exac_oth aaf_adj_exac_sas exac_num_het exac_num_hom_alt exac_num_chroms max_aaf_all gts gt_types gt_phases gt_depths gt_ref_depths gt_alt_depths gt_quals gt_copy_numbers gt_phred_ll_homref gt_phred_ll_het gt_phred_ll_homalt family_id family_members family_genotypes samples family_count +chr10 48003991 48003992 None 2 1 C T 1047.86999512 None snp ts 1.0 1 rs142685947,rs3739968 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q11.22 None 0 1 1 None 3.10871e-42 2 6 1 0 0.444444444444 0.29371811258 -0.35 0.522875816993 1.718591 ASAH2C ENST00000420079 1 1 0 0 exon_10_48003968_48004056 tGt/tAt C540Y 610 protein_coding missense_variant missense_variant MED None None None None None None None 165 None 20.9400005341 0 0 8 0.0 4.382999897 9.52999973297 4 None None None None None 0 None None None 0 1 0.3112 0.4573 0.3855 0.1241 0.5149 0.346645 grc_fix 73.3 40.3 92.8 0 None None None R R R R R R None None None None None 0.553676 1 0.443 0.448537771896 0.288974151858 0.281426746944 0.543088975937 0.524984286612 0.478147713207 0.463529411765 0.418641164716 17495 15317 107302 0.543088975937 ['C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T' 'C/T' 'C/T' 'C/T'] [1 1 3 0 0 1 1 1 1] [False False False False False False False False False] [38 29 23 38 29 23 38 29 23] [1 0 0 1 0 0 1 0 0] [37 29 23 37 29 23 37 29 23] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 2 2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected) C/C,C/C,C/T 2_kid 2 +chr10 48004991 48004992 None 3 1 C T 1047.86999512 None snp ts 1.0 0 None None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q11.22 None 0 1 0 None None 2 6 1 0 0.444444444444 0.29371811258 -0.35 0.522875816993 1.718591 ASAH2C ENST00000420079 1 1 0 0 exon_10_48003968_48004056 tGt/tAt C540Y 610 protein_coding missense_variant missense_variant MED None None None None None None None 165 None 20.9400005341 0 0 8 0.0 4.382999897 9.52999973297 4 None None None None None 0 None None None 0 0 None None None None None None grc_fix None None None 0 None None None R R R R R R None None None None None 0.061011 0 None None None None None None None None None None None None -1.0 ['C/T' 'C/T' 'C/T' 'C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T'] [1 1 1 1 1 3 0 0 1] [False False False False False False False False False] [38 29 23 38 29 23 38 29 23] [1 0 0 1 0 0 1 0 0] [37 29 23 37 29 23 37 29 23] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 3 3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected) C/C,C/C,C/T 3_kid 2 +chr10 135336655 135336656 None 4 1 G A 38.3400001526 None snp ts 1.0 1 rs6537611 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q26.3 None 0 0 0 None None 4 5 0 0 0.277777777778 0.248563248239 -0.384615384615 0.424836601307 0.43264 SPRN ENST00000541506 0 0 0 0 151 protein_coding intron_variant intron_variant LOW None None None None None None None 2 None 37.0 4 0 4 0.0 0.0 19.1700000763 4 None None None None None 0 None None None 0 1 0.9957 1 1 0.9297 1 0.980831 None None None None 0 None None None R R R R unknown R None None None None None 0.056701 0 None None None None None None None None None None None None 1.0 ['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A'] [0 0 1 0 0 1 1 1 1] [False False False False False False False False False] [38 29 24 38 29 24 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 24 37 29 24 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 1 1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected) G/G,G/G,G/A 1_kid 2 +chr10 135336655 135336656 None 4 1 G A 38.3400001526 None snp ts 1.0 1 rs6537611 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q26.3 None 0 0 0 None None 4 5 0 0 0.277777777778 0.248563248239 -0.384615384615 0.424836601307 0.43264 SPRN ENST00000541506 0 0 0 0 151 protein_coding intron_variant intron_variant LOW None None None None None None None 2 None 37.0 4 0 4 0.0 0.0 19.1700000763 4 None None None None None 0 None None None 0 1 0.9957 1 1 0.9297 1 0.980831 None None None None 0 None None None R R R R unknown R None None None None None 0.056701 0 None None None None None None None None None None None None 1.0 ['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A'] [0 0 1 0 0 1 1 1 1] [False False False False False False False False False] [38 29 24 38 29 24 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 24 37 29 24 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 2 2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected) G/G,G/G,G/A 2_kid 2 +chr10 135369531 135369532 None 5 6 T C 122.620002747 None snp ts 1.0 1 rs3747881,rs386585367 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q26.3 None 0 0 1 None 3.86096e-59 6 3 0 0 0.166666666667 0.548506235587 -0.2 0.294117647059 0.022013 SYCE1 ENST00000368517 1 1 0 0 exon_10_135369485_135369551 aAg/aGg K147R 282 protein_coding missense_variant missense_variant MED None None None None None None None 239 None 36.0200004578 2 0 8 0.0 5.71409988403 2.30999994278 2 None None None None None 1 0.0938372093023 0.163867453473 0.117561125634 1 1 0.1844 0.2698 0.2188 0.1997 0.1093 0.197284 None None None None 0 None None None R R R R R R None None None None None 0.487112 1 0.134 0.134286610119 0.184985563041 0.164938655607 0.256026889198 0.122313048744 0.0919761054243 0.113686534216 0.194096927001 13825 1225 121196 0.2698 ['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C'] [0 0 1 0 0 1 0 0 1] [False False False False False False False False False] [38 29 22 38 29 21 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 22 37 29 21 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 1 1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected) T/T,T/T,T/C 1_kid 3 +chr10 135369531 135369532 None 5 6 T C 122.620002747 None snp ts 1.0 1 rs3747881,rs386585367 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q26.3 None 0 0 1 None 3.86096e-59 6 3 0 0 0.166666666667 0.548506235587 -0.2 0.294117647059 0.022013 SYCE1 ENST00000368517 1 1 0 0 exon_10_135369485_135369551 aAg/aGg K147R 282 protein_coding missense_variant missense_variant MED None None None None None None None 239 None 36.0200004578 2 0 8 0.0 5.71409988403 2.30999994278 2 None None None None None 1 0.0938372093023 0.163867453473 0.117561125634 1 1 0.1844 0.2698 0.2188 0.1997 0.1093 0.197284 None None None None 0 None None None R R R R R R None None None None None 0.487112 1 0.134 0.134286610119 0.184985563041 0.164938655607 0.256026889198 0.122313048744 0.0919761054243 0.113686534216 0.194096927001 13825 1225 121196 0.2698 ['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C'] [0 0 1 0 0 1 0 0 1] [False False False False False False False False False] [38 29 22 38 29 21 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 22 37 29 21 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 3 3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected) T/T,T/T,T/C 3_kid 3 +chr10 135369531 135369532 None 5 6 T C 122.620002747 None snp ts 1.0 1 rs3747881,rs386585367 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q26.3 None 0 0 1 None 3.86096e-59 6 3 0 0 0.166666666667 0.548506235587 -0.2 0.294117647059 0.022013 SYCE1 ENST00000368517 1 1 0 0 exon_10_135369485_135369551 aAg/aGg K147R 282 protein_coding missense_variant missense_variant MED None None None None None None None 239 None 36.0200004578 2 0 8 0.0 5.71409988403 2.30999994278 2 None None None None None 1 0.0938372093023 0.163867453473 0.117561125634 1 1 0.1844 0.2698 0.2188 0.1997 0.1093 0.197284 None None None None 0 None None None R R R R R R None None None None None 0.487112 1 0.134 0.134286610119 0.184985563041 0.164938655607 0.256026889198 0.122313048744 0.0919761054243 0.113686534216 0.194096927001 13825 1225 121196 0.2698 ['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C'] [0 0 1 0 0 1 0 0 1] [False False False False False False False False False] [38 29 22 38 29 21 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 22 37 29 21 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 2 2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected) T/T,T/T,T/C 2_kid 3 +chr10 1142207 1142208 None 1 4 T C 3404.30004883 None snp ts 1.0 1 rs10794716 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10p15.3 None 0 0 0 None None 7 1 1 0 0.166666666667 0.0718606383197 0.6 0.294117647059 0.200924 WDR37 ENST00000381329 1 1 0 1 exon_10_1142110_1142566 Tga/Cga *250R 249 protein_coding stop_lost stop_lost HIGH None None None None None None None 122 None 36.0 0 0 8 0.0 2.67470002174 27.8999996185 8 None None None None None 1 0.999534883721 0.975034044485 0.991234814701 0 1 0.9942 1 1 0.9561 1 0.98762 None None None None 0 None 2 Osteobl;Progfib T T T T T T None None None None None 0.156188 1 0.997 0.997067786838 0.970305592927 0.998358956642 1 1 0.999595432887 0.998898678414 1 346 60354 121410 1.0 ['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/T' 'T/T' 'T/T' 'C/C'] [0 0 1 0 0 0 0 0 3] [False False False False False False False False False] [38 29 23 38 29 22 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 23 37 29 22 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 1 1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected) T/T,T/T,T/C 1_kid 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_dump_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,97 @@ +chrom start end ref alt type sub_type aaf in_dbsnp gene sample genotype +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 child_1 G/A +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 child_2 G/G +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 dad_2 G/A +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 mom_2 G/G +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 dad_1 G/G +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 mom_1 G/A +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 child_3 G/A +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 dad_3 G/A +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 mom_3 G/A +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 child_4 G/A +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 dad_4 G/A +chr1 16976 16977 G A snp ts 0.375 0 DDX11L1 mom_4 G/A +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 child_1 A/A +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 child_2 A/G +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 dad_2 A/A +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 mom_2 A/G +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 dad_1 A/A +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 mom_1 A/A +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 child_3 A/A +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 dad_3 A/A +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 mom_3 A/G +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 child_4 A/G +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 dad_4 A/A +chr1 17221 17222 A G snp ts 0.166666666667 1 DDX11L1 mom_4 A/A +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P child_1 TTCT/TTCT +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P child_2 TTCT/TTCT +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P dad_2 TTCT/TTCT +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P mom_2 TTCT/TTCT +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P dad_1 TTCT/TTCT +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P mom_1 TTCT/TTCT +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P child_3 TTCT/TTCT +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P dad_3 TTCT/TTCT +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P mom_3 TTCT/TTCT +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P child_4 TTCT/T +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P dad_4 TTCT/T +chr1 17362 17366 TTCT T indel del 0.0833333333333 0 WASH7P mom_4 TTCT/TTCT +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 child_1 G/G +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 child_2 G/G +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 dad_2 G/G +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 mom_2 G/G +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 dad_1 G/G +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 mom_1 G/G +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 child_3 G/A +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 dad_3 G/G +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 mom_3 G/G +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 child_4 G/G +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 dad_4 G/G +chr1 17562 17563 G A snp ts 0.0416666666667 0 DDX11L1 mom_4 G/G +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 child_1 G/C +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 child_2 G/G +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 dad_2 G/G +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 mom_2 G/C +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 dad_1 G/G +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 mom_1 G/G +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 child_3 G/G +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 dad_3 G/C +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 mom_3 G/G +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 child_4 G/C +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 dad_4 G/G +chr1 17696 17697 G C snp tv 0.166666666667 1 DDX11L1 mom_4 G/G +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 child_1 A/A +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 child_2 A/A +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 dad_2 A/A +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 mom_2 A/A +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 dad_1 A/A +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 mom_1 A/A +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 child_3 A/A +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 dad_3 A/A +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 mom_3 A/A +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 child_4 A/G +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 dad_4 A/G +chr1 17721 17722 A G snp ts 0.125 1 DDX11L1 mom_4 A/G +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P child_1 C/C +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P child_2 C/C +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P dad_2 C/C +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P mom_2 C/C +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P dad_1 C/A +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P mom_1 C/C +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P child_3 C/A +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P dad_3 C/C +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P mom_3 C/C +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P child_4 C/A +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P dad_4 C/A +chr1 17729 17730 C A snp tv 0.208333333333 0 WASH7P mom_4 C/A +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 child_1 A/A +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 child_2 A/A +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 dad_2 A/A +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 mom_2 A/A +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 dad_1 A/G +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 mom_1 A/G +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 child_3 A/G +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 dad_3 A/G +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 mom_3 A/G +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 child_4 A/G +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 dad_4 A/G +chr1 17745 17746 A G snp ts 0.333333333333 1 DDX11L1 mom_4 A/G
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_gene_wise_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,6 @@ +chrom start end gene impact impact_severity max_aaf_all variant_filters n_gene_variants gene_filters +chr10 48003991 48003992 ASAH2C missense_variant MED 0.543088975937 1 1 1 +chr10 126678091 126678092 CTBP2 stop_gained HIGH 0.0904917363803 1 1 1 +chr10 135369531 135369532 SYCE1 missense_variant MED 0.2698 1 1 1 +chr10 1142207 1142208 WDR37 stop_lost HIGH 1.0 1 1 1 +chr16 72057434 72057435 DHODH missense_variant MED 0.000432002764818 1 1 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_interactions_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,25 @@ +sample gene order_of_interaction interacting_gene +M10475 CTBP2 0_order: none +M10475 CTBP2 1_order: none +M10475 CTBP2 2_order: none +M10475 CTBP2 3_order: none +M10475 CTBP2 4_order: WDR37 +M10475 CTBP2 5_order: none +M128215 CTBP2 0_order: CTBP2 +M128215 CTBP2 1_order: none +M128215 CTBP2 2_order: none +M128215 CTBP2 3_order: none +M128215 CTBP2 4_order: WDR37 +M128215 CTBP2 5_order: none +M10478 CTBP2 0_order: none +M10478 CTBP2 1_order: none +M10478 CTBP2 2_order: none +M10478 CTBP2 3_order: none +M10478 CTBP2 4_order: WDR37 +M10478 CTBP2 5_order: MTG1 +M10500 CTBP2 0_order: none +M10500 CTBP2 1_order: none +M10500 CTBP2 2_order: none +M10500 CTBP2 3_order: none +M10500 CTBP2 4_order: WDR37 +M10500 CTBP2 5_order: MTG1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_load_input.vcf Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,70 @@ +##fileformat=VCFv4.1 +##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD"> +##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder"> +##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder"> +##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder"> +##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder"> +##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> +##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> +##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints"> +##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count"> +##ALT=<ID=DEL,Description="Deletion"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder"> +##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods"> +##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README"> +##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN"> +##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN"> +##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN"> +##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN"> +##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN"> +##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents"> +##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data"> +##reference=GRCh37 +##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani" +##SnpEffCmd="SnpEff GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz " +##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )' "> +#CHROM POS ID REF ALT QUAL FILTER INFO +1 10583 rs58108140 G A 100.0 PASS AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 10611 rs189107123 C G 100.0 PASS AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13302 rs180734498 C T 100.0 PASS THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13327 rs144762171 G C 100.0 PASS AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13957 . TC T 28.0 PASS AA=TC;AC=35;AN=2184;VT=INDEL;AVGPOST=0.8711;RSQ=0.2501;LDAF=0.0788;THETA=0.0100;ERATE=0.0065;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|) +1 13980 rs151276478 T C 100.0 PASS AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|) +1 30923 rs140337953 G T 100.0 PASS AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73;EFF=DOWNSTREAM(MODIFIER|||||FAM138A|processed_transcript|CODING|ENST00000461467|),DOWNSTREAM(MODIFIER|||||FAM138A|protein_coding|CODING|ENST00000417324|),DOWNSTREAM(MODIFIER|||||MIR1302-10|miRNA|NON_CODING|ENST00000408384|),INTRON(MODIFIER||||177|MIR1302-10|antisense|NON_CODING|ENST00000469289|),INTRON(MODIFIER||||236|MIR1302-10|antisense|NON_CODING|ENST00000473358|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|) +1 46402 . C CTGT 31.0 PASS AA=.;RSQ=0.0960;AN=2184;AC=8;VT=INDEL;AVGPOST=0.8325;THETA=0.0121;ERATE=0.0072;LDAF=0.0903;AF=0.0037;ASN_AF=0.0017;AFR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 47190 . G GA 192.0 PASS AA=G;AVGPOST=0.9041;AN=2184;AC=29;VT=INDEL;LDAF=0.0628;THETA=0.0153;RSQ=0.2883;ERATE=0.0041;AF=0.01;AMR_AF=0.0028;AFR_AF=0.06;EFF=INTERGENIC(MODIFIER|||||||||) +1 51476 rs187298206 T C 100.0 PASS ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 51479 rs116400033 T A 100.0 PASS RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22;EFF=INTERGENIC(MODIFIER|||||||||) +1 51914 rs190452223 T G 100.0 PASS ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||) +1 51935 rs181754315 C T 100.0 PASS THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0;EFF=INTERGENIC(MODIFIER|||||||||) +1 51954 rs185832753 G C 100.0 PASS LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 52058 rs62637813 G C 100.0 PASS AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05;EFF=INTERGENIC(MODIFIER|||||||||) +1 52144 rs190291950 T A 100.0 PASS THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 52185 . TTAA T 244.0 PASS AA=.;AN=2184;LDAF=0.0124;VT=INDEL;AC=10;THETA=0.0232;RSQ=0.4271;AVGPOST=0.9840;ERATE=0.0037;AF=0.0046;ASN_AF=0.0035;AMR_AF=0.02;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 52238 rs150021059 T G 100.0 PASS THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95;EFF=INTERGENIC(MODIFIER|||||||||) +1 53234 . CAT C 227.0 PASS AA=CAT;AVGPOST=0.9936;AN=2184;VT=INDEL;THETA=0.0119;AC=10;LDAF=0.0074;RSQ=0.6237;ERATE=0.0007;AF=0.0046;AMR_AF=0.0028;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 54353 rs140052487 C A 100.0 PASS THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||) +1 54421 rs146477069 A G 100.0 PASS ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 54490 rs141149254 G A 100.0 PASS ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||) +1 54676 rs2462492 C T 100.0 PASS LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||) +1 54753 rs143174675 T G 100.0 PASS AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||) +1 55164 rs3091274 C A 100.0 PASS AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||) +1 55249 . C CTATGG 443.0 PASS AA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55299 rs10399749 C T 100.0 PASS RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||) +1 55313 rs182462964 A T 100.0 PASS ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55326 rs3107975 T C 100.0 PASS AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55330 rs185215913 G A 100.0 PASS ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55367 rs190850374 G A 100.0 PASS ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55388 rs182711216 C T 100.0 PASS THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||) +1 55394 rs2949420 T A 100.0 PASS AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55416 rs193242050 G A 100.0 PASS AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55427 rs183189405 T C 100.0 PASS THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55816 rs187434873 G A 100.0 PASS AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55850 rs191890754 C G 100.0 PASS AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55852 rs184233019 G C 100.0 PASS THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_lofsieve_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,11 @@ +chrom start end ref alt highest_impact aa_change var_trans_pos trans_aa_length var_trans_pct sample genotype gene transcript trans_type +chr10 1142207 1142208 T C stop_lost */R 250 250/249 1.00401606426 M10475 C/C WDR37 ENST00000381329 protein_coding +chr10 1142207 1142208 T C stop_lost */R 250 250/249 1.00401606426 M10478 C/C WDR37 ENST00000381329 protein_coding +chr10 1142207 1142208 T C stop_lost */R 250 250/249 1.00401606426 M10500 C/C WDR37 ENST00000381329 protein_coding +chr10 1142207 1142208 T C stop_lost */R 250 250/249 1.00401606426 M128215 C/C WDR37 ENST00000381329 protein_coding +chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000531469 protein_coding +chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000309035 protein_coding +chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000494626 protein_coding +chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000337195 protein_coding +chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000334808 protein_coding +chr10 126678091 126678092 G A stop_gained Q/* 445 445/445 1.0 M128215 G/A CTBP2 ENST00000411419 protein_coding
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_mendel_errors_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,10 @@ +chrom start end vcf_id variant_id anno_id ref alt qual filter type sub_type call_rate in_dbsnp rs_ids sv_cipos_start_left sv_cipos_end_left sv_cipos_start_right sv_cipos_end_right sv_length sv_is_precise sv_tool sv_evidence_type sv_event_id sv_mate_id sv_strand in_omim clinvar_sig clinvar_disease_name clinvar_dbsource clinvar_dbsource_id clinvar_origin clinvar_dsdb clinvar_dsdbid clinvar_disease_acc clinvar_in_locus_spec_db clinvar_on_diag_assay clinvar_causal_allele pfam_domain cyto_band rmsk in_cpg_island in_segdup is_conserved gerp_bp_score gerp_element_pval num_hom_ref num_het num_hom_alt num_unknown aaf hwe inbreeding_coeff pi recomb_rate gene transcript is_exonic is_coding is_splicing is_lof exon codon_change aa_change aa_length biotype impact impact_so impact_severity polyphen_pred polyphen_score sift_pred sift_score anc_allele rms_bq cigar depth strand_bias rms_map_qual in_hom_run num_mapq_zero num_alleles num_reads_w_dels haplotype_score qual_depth allele_count allele_bal in_hm2 in_hm3 is_somatic somatic_score in_esp aaf_esp_ea aaf_esp_aa aaf_esp_all exome_chip in_1kg aaf_1kg_amr aaf_1kg_eas aaf_1kg_sas aaf_1kg_afr aaf_1kg_eur aaf_1kg_all grc gms_illumina gms_solid gms_iontorrent in_cse encode_tfbs encode_dnaseI_cell_count encode_dnaseI_cell_list encode_consensus_gm12878 encode_consensus_h1hesc encode_consensus_helas3 encode_consensus_hepg2 encode_consensus_huvec encode_consensus_k562 vista_enhancers cosmic_ids info cadd_raw cadd_scaled fitcons in_exac aaf_exac_all aaf_adj_exac_all aaf_adj_exac_afr aaf_adj_exac_amr aaf_adj_exac_eas aaf_adj_exac_fin aaf_adj_exac_nfe aaf_adj_exac_oth aaf_adj_exac_sas exac_num_het exac_num_hom_alt exac_num_chroms max_aaf_all gts gt_types gt_phases gt_depths gt_ref_depths gt_alt_depths gt_quals gt_copy_numbers gt_phred_ll_homref gt_phred_ll_het gt_phred_ll_homalt family_id family_members family_genotypes samples family_count violation violation_prob +chr10 1142207 1142208 None 1 4 T C 3404.30004883 None snp ts 1.0 1 rs10794716 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10p15.3 None 0 0 0 None None 7 1 1 0 0.166666666667 0.0718606383197 0.6 0.294117647059 0.200924 WDR37 ENST00000381329 1 1 0 1 exon_10_1142110_1142566 Tga/Cga *250R 249 protein_coding stop_lost stop_lost HIGH None None None None None None None 122 None 36.0 0 0 8 0.0 2.67470002174 27.8999996185 8 None None None None None 1 0.999534883721 0.975034044485 0.991234814701 0 1 0.9942 1 1 0.9561 1 0.98762 None None None None 0 None 2 Osteobl;Progfib T T T T T T None None None None None 0.156188 1 0.997 0.997067786838 0.970305592927 0.998358956642 1 1 0.999595432887 0.998898678414 1 346 60354 121410 1.0 ['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/T' 'T/T' 'T/T' 'C/C'] [0 0 1 0 0 0 0 0 3] [False False False False False False False False False] [38 29 23 38 29 22 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 23 37 29 22 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 1 1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected) T/T,T/T,T/C 1_kid 2 plausible de novo;implausible de novo 0.00000 +chr10 1142207 1142208 None 1 4 T C 3404.30004883 None snp ts 1.0 1 rs10794716 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10p15.3 None 0 0 0 None None 7 1 1 0 0.166666666667 0.0718606383197 0.6 0.294117647059 0.200924 WDR37 ENST00000381329 1 1 0 1 exon_10_1142110_1142566 Tga/Cga *250R 249 protein_coding stop_lost stop_lost HIGH None None None None None None None 122 None 36.0 0 0 8 0.0 2.67470002174 27.8999996185 8 None None None None None 1 0.999534883721 0.975034044485 0.991234814701 0 1 0.9942 1 1 0.9561 1 0.98762 None None None None 0 None 2 Osteobl;Progfib T T T T T T None None None None None 0.156188 1 0.997 0.997067786838 0.970305592927 0.998358956642 1 1 0.999595432887 0.998898678414 1 346 60354 121410 1.0 ['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/T' 'T/T' 'T/T' 'C/C'] [0 0 1 0 0 0 0 0 3] [False False False False False False False False False] [38 29 23 38 29 22 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 23 37 29 22 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 3 3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected) T/T,T/T,C/C 3_kid 2 plausible de novo;implausible de novo 0.00000 +chr10 48003991 48003992 None 2 1 C T 1047.86999512 None snp ts 1.0 1 rs142685947,rs3739968 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q11.22 None 0 1 1 None 3.10871e-42 2 6 1 0 0.444444444444 0.29371811258 -0.35 0.522875816993 1.718591 ASAH2C ENST00000420079 1 1 0 0 exon_10_48003968_48004056 tGt/tAt C540Y 610 protein_coding missense_variant missense_variant MED None None None None None None None 165 None 20.9400005341 0 0 8 0.0 4.382999897 9.52999973297 4 None None None None None 0 None None None 0 1 0.3112 0.4573 0.3855 0.1241 0.5149 0.346645 grc_fix 73.3 40.3 92.8 0 None None None R R R R R R None None None None None 0.553676 1 0.443 0.448537771896 0.288974151858 0.281426746944 0.543088975937 0.524984286612 0.478147713207 0.463529411765 0.418641164716 17495 15317 107302 0.543088975937 ['C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T' 'C/T' 'C/T' 'C/T'] [1 1 3 0 0 1 1 1 1] [False False False False False False False False False] [38 29 23 38 29 23 38 29 23] [1 0 0 1 0 0 1 0 0] [37 29 23 37 29 23 37 29 23] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 2 2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected) C/C,C/C,C/T 2_kid 1 plausible de novo 0.00000 +chr10 48004991 48004992 None 3 1 C T 1047.86999512 None snp ts 1.0 0 None None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q11.22 None 0 1 0 None None 2 6 1 0 0.444444444444 0.29371811258 -0.35 0.522875816993 1.718591 ASAH2C ENST00000420079 1 1 0 0 exon_10_48003968_48004056 tGt/tAt C540Y 610 protein_coding missense_variant missense_variant MED None None None None None None None 165 None 20.9400005341 0 0 8 0.0 4.382999897 9.52999973297 4 None None None None None 0 None None None 0 0 None None None None None None grc_fix None None None 0 None None None R R R R R R None None None None None 0.061011 0 None None None None None None None None None None None None -1.0 ['C/T' 'C/T' 'C/T' 'C/T' 'C/T' 'T/T' 'C/C' 'C/C' 'C/T'] [1 1 1 1 1 3 0 0 1] [False False False False False False False False False] [38 29 23 38 29 23 38 29 23] [1 0 0 1 0 0 1 0 0] [37 29 23 37 29 23 37 29 23] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 3 3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected) C/C,C/C,C/T 3_kid 1 plausible de novo 0.00000 +chr10 135336655 135336656 None 4 1 G A 38.3400001526 None snp ts 1.0 1 rs6537611 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q26.3 None 0 0 0 None None 4 5 0 0 0.277777777778 0.248563248239 -0.384615384615 0.424836601307 0.43264 SPRN ENST00000541506 0 0 0 0 151 protein_coding intron_variant intron_variant LOW None None None None None None None 2 None 37.0 4 0 4 0.0 0.0 19.1700000763 4 None None None None None 0 None None None 0 1 0.9957 1 1 0.9297 1 0.980831 None None None None 0 None None None R R R R unknown R None None None None None 0.056701 0 None None None None None None None None None None None None 1.0 ['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A'] [0 0 1 0 0 1 1 1 1] [False False False False False False False False False] [38 29 24 38 29 24 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 24 37 29 24 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 1 1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected) G/G,G/G,G/A 1_kid 2 plausible de novo;plausible de novo 0.00000 +chr10 135336655 135336656 None 4 1 G A 38.3400001526 None snp ts 1.0 1 rs6537611 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q26.3 None 0 0 0 None None 4 5 0 0 0.277777777778 0.248563248239 -0.384615384615 0.424836601307 0.43264 SPRN ENST00000541506 0 0 0 0 151 protein_coding intron_variant intron_variant LOW None None None None None None None 2 None 37.0 4 0 4 0.0 0.0 19.1700000763 4 None None None None None 0 None None None 0 1 0.9957 1 1 0.9297 1 0.980831 None None None None 0 None None None R R R R unknown R None None None None None 0.056701 0 None None None None None None None None None None None None 1.0 ['G/G' 'G/G' 'G/A' 'G/G' 'G/G' 'G/A' 'G/A' 'G/A' 'G/A'] [0 0 1 0 0 1 1 1 1] [False False False False False False False False False] [38 29 24 38 29 24 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 24 37 29 24 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 2 2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected) G/G,G/G,G/A 2_kid 2 plausible de novo;plausible de novo 0.00000 +chr10 135369531 135369532 None 5 6 T C 122.620002747 None snp ts 1.0 1 rs3747881,rs386585367 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q26.3 None 0 0 1 None 3.86096e-59 6 3 0 0 0.166666666667 0.548506235587 -0.2 0.294117647059 0.022013 SYCE1 ENST00000368517 1 1 0 0 exon_10_135369485_135369551 aAg/aGg K147R 282 protein_coding missense_variant missense_variant MED None None None None None None None 239 None 36.0200004578 2 0 8 0.0 5.71409988403 2.30999994278 2 None None None None None 1 0.0938372093023 0.163867453473 0.117561125634 1 1 0.1844 0.2698 0.2188 0.1997 0.1093 0.197284 None None None None 0 None None None R R R R R R None None None None None 0.487112 1 0.134 0.134286610119 0.184985563041 0.164938655607 0.256026889198 0.122313048744 0.0919761054243 0.113686534216 0.194096927001 13825 1225 121196 0.2698 ['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C'] [0 0 1 0 0 1 0 0 1] [False False False False False False False False False] [38 29 22 38 29 21 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 22 37 29 21 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 1 1_dad(1_dad;unaffected),1_mom(1_mom;unaffected),1_kid(1_kid;affected) T/T,T/T,T/C 1_kid 3 plausible de novo;plausible de novo;plausible de novo 0.00000 +chr10 135369531 135369532 None 5 6 T C 122.620002747 None snp ts 1.0 1 rs3747881,rs386585367 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q26.3 None 0 0 1 None 3.86096e-59 6 3 0 0 0.166666666667 0.548506235587 -0.2 0.294117647059 0.022013 SYCE1 ENST00000368517 1 1 0 0 exon_10_135369485_135369551 aAg/aGg K147R 282 protein_coding missense_variant missense_variant MED None None None None None None None 239 None 36.0200004578 2 0 8 0.0 5.71409988403 2.30999994278 2 None None None None None 1 0.0938372093023 0.163867453473 0.117561125634 1 1 0.1844 0.2698 0.2188 0.1997 0.1093 0.197284 None None None None 0 None None None R R R R R R None None None None None 0.487112 1 0.134 0.134286610119 0.184985563041 0.164938655607 0.256026889198 0.122313048744 0.0919761054243 0.113686534216 0.194096927001 13825 1225 121196 0.2698 ['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C'] [0 0 1 0 0 1 0 0 1] [False False False False False False False False False] [38 29 22 38 29 21 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 22 37 29 21 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 3 3_dad(3_dad;unaffected),3_mom(3_mom;unaffected),3_kid(3_kid;affected) T/T,T/T,T/C 3_kid 3 plausible de novo;plausible de novo;plausible de novo 0.00000 +chr10 135369531 135369532 None 5 6 T C 122.620002747 None snp ts 1.0 1 rs3747881,rs386585367 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q26.3 None 0 0 1 None 3.86096e-59 6 3 0 0 0.166666666667 0.548506235587 -0.2 0.294117647059 0.022013 SYCE1 ENST00000368517 1 1 0 0 exon_10_135369485_135369551 aAg/aGg K147R 282 protein_coding missense_variant missense_variant MED None None None None None None None 239 None 36.0200004578 2 0 8 0.0 5.71409988403 2.30999994278 2 None None None None None 1 0.0938372093023 0.163867453473 0.117561125634 1 1 0.1844 0.2698 0.2188 0.1997 0.1093 0.197284 None None None None 0 None None None R R R R R R None None None None None 0.487112 1 0.134 0.134286610119 0.184985563041 0.164938655607 0.256026889198 0.122313048744 0.0919761054243 0.113686534216 0.194096927001 13825 1225 121196 0.2698 ['T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C' 'T/T' 'T/T' 'T/C'] [0 0 1 0 0 1 0 0 1] [False False False False False False False False False] [38 29 22 38 29 21 38 29 24] [1 0 0 1 0 0 1 0 0] [37 29 22 37 29 21 37 29 24] [ 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939 87.16000366 78.19999695 66.13999939] None [940 899 729 940 899 729 940 899 729] [87 78 66 87 78 66 87 78 66] [0 0 0 0 0 0 0 0 0] 2 2_dad(2_dad;unaffected),2_mom(2_mom;unaffected),2_kid(2_kid;affected) T/T,T/T,T/C 2_kid 3 plausible de novo;plausible de novo;plausible de novo 0.00000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_pathways_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,17 @@ +chrom start end ref alt impact sample genotype gene transcript pathway +chr10 52004314 52004315 T C intron_variant M10500 C/C ASAH2 ENST00000329428 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways +chr10 52004314 52004315 T C intron_variant M128215 C/C ASAH2 ENST00000329428 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways +chr10 52004314 52004315 T C intron_variant M10500 C/C ASAH2 ENST00000447815 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways +chr10 52004314 52004315 T C intron_variant M128215 C/C ASAH2 ENST00000447815 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways +chr10 52004314 52004315 T C intron_variant M10500 C/C ASAH2 ENST00000395526 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways +chr10 52004314 52004315 T C intron_variant M128215 C/C ASAH2 ENST00000395526 hsa00600:Sphingolipid_metabolism,hsa01100:Metabolic_pathways +chr10 126678091 126678092 G A stop_gained M128215 G/A CTBP2 ENST00000531469 hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer +chr10 126678091 126678092 G A stop_gained M128215 G/A CTBP2 ENST00000309035 hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer +chr10 126678091 126678092 G A stop_gained M128215 G/A CTBP2 ENST00000494626 hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer +chr10 126678091 126678092 G A stop_gained M128215 G/A CTBP2 ENST00000337195 hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer +chr10 126678091 126678092 G A stop_gained M128215 G/A CTBP2 ENST00000411419 hsa05220:Chronic_myeloid_leukemia,hsa04310:Wnt_signaling_pathway,hsa04330:Notch_signaling_pathway,hsa05200:Pathways_in_cancer +chr10 135336655 135336656 G A intron_variant M10478 A/A CYP2E1 ENST00000463117 hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism +chr10 135336655 135336656 G A intron_variant M128215 A/A CYP2E1 ENST00000463117 hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism +chr10 135336655 135336656 G A upstream_gene_variant M10478 A/A CYP2E1 ENST00000252945 hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism +chr10 135336655 135336656 G A upstream_gene_variant M128215 A/A CYP2E1 ENST00000252945 hsa00982:Drug_metabolism_cytochrome_P450,hsa01100:Metabolic_pathways,hsa00590:Arachidonic_acid_metabolism,hsa00980:Metabolism_of_xenobiotics_by_cytochrome_P450,hsa00591:Linoleic_acid_metabolism +chr16 72057434 72057435 C T missense_variant M10475 C/T DHODH ENST00000219240 hsa01100:Metabolic_pathways,hsa00240:Pyrimidine_metabolism
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_qc_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,5 @@ +sample sex chrX_homref chrX_het chrX_homalt chrX_unknown +M10475 male 0 0 0 0 +M10478 female 0 0 0 0 +M10500 female 0 0 0 0 +M128215 male 0 0 0 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_query_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,10 @@ +10582 +10610 +13301 +13326 +13956 +13979 +30922 +46401 +47189 +51475
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_region_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,1 @@ +chr10 48003991 48003992 None 2 1 C T 1047.86999512 None snp ts 1.0 1 rs142685947,rs3739968 None None None None None 1 None None None None None None None None None None None None None None None None None None chr10q11.22 None 0 1 1 None 3.10871e-42 1 2 1 0 0.5 1 0 0.571428571429 1.718591 ASAH2C ENST00000420079 1 1 0 0 16/17 tGt/tAt C/Y 542/612 protein_coding missense_variant missense_variant MED benign 0.0 tolerated 1.0 None None None 165 None 20.9400005341 0 0 8 0.0 4.382999897 9.52999973297 4 None None None None None 0 None None None 0 1 0.3112 0.4573 0.3855 0.1241 0.5149 0.346645 grc_fix 73.3 40.3 92.8 0 None None None R R R R R R None None None None None 0.553676 1 0.443 0.448537771896 0.288974151858 0.281426746944 0.543088975937 0.524984286612 0.478147713207 0.463529411765 0.418641164716 17495 15317 107302 1 0.543088975937
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_roh_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,7 @@ +chrom start end sample num_of_snps density_per_kb run_length_in_bp +chr10 1142208 135369532 M10475 5 0.0 134227324 +chr10 1142208 135210791 M10500 5 0.0001 134068583 +chr10 1142208 135210791 M10478 5 0.0001 134068583 +chr10 1142208 135336656 M10478 4 0.0 134194448 +chr10 1142208 135336656 M128215 6 0.0001 134194448 +chr10 1142208 135369532 M128215 5 0.0 134227324
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_stats_result.tabular Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,5 @@ +sample total +M10475 3 +M10478 6 +M10500 6 +M128215 4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gemini_databases.loc.sample Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,3 @@ +## GEMINI databases +#Version dbkey Description Path +#08_08_2014 hg19 Database (08-08-2014) /path/to/data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,7 @@ +<tables> + <table name="gemini_databases" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/gemini_databases.loc" /> + </table> +</tables> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Feb 18 08:56:22 2016 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="gemini" version="0.18.1"> + <repository changeset_revision="be869e11582f" name="package_gemini_0_18_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>
