Repository 'gemini_inheritance'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/gemini_inheritance

Changeset 0:3123ce7acd0e (2019-01-11)
Next changeset 1:162a9d4a3bdc (2020-01-24)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59
added:
gemini_inheritance.xml
gemini_macros.xml
readme.rst
repository_dependencies.xml
static/images/gemini_mendel_errors.png
test-data/anno.bed
test-data/gemini_actionable_mutations_result.tabular
test-data/gemini_amend.ped
test-data/gemini_amend.vcf
test-data/gemini_amend_input.db
test-data/gemini_annotate_result.db
test-data/gemini_auto_dom_input.db
test-data/gemini_auto_rec_input.db
test-data/gemini_burden_calpha_template.tabular
test-data/gemini_burden_count_highimpact_result.tabular
test-data/gemini_burden_count_nonsynonymous_result.tabular
test-data/gemini_comphets_input.db
test-data/gemini_de_novo_input.db
test-data/gemini_fusions_result.tabular
test-data/gemini_is_somatic_result.db
test-data/gemini_load_input.vcf
test-data/gemini_load_result1.db
test-data/gemini_load_result2.db
test-data/gemini_versioned_databases.loc
test-data/gemini_windower_template.tabular
test-data/test-cache/gemini-config.yaml
test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz
test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz.tbi
test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz
test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz.tbi
test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz
test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz.tbi
test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz
test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz.tbi
test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz
test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz.tbi
test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz
test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz.tbi
test-data/test-cache/gemini/data/cancer_gene_census.20140120.tsv
test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz
test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi
test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz
test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz.tbi
test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz
test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz.tbi
test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz
test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi
test-data/test-cache/gemini/data/detailed_gene_table_v75
test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz
test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz.tbi
test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz
test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz.tbi
test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz
test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz.tbi
test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz
test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi
test-data/test-cache/gemini/data/hg19.CpG.bed.gz
test-data/test-cache/gemini/data/hg19.CpG.bed.gz.tbi
test-data/test-cache/gemini/data/hg19.cytoband.bed.gz
test-data/test-cache/gemini/data/hg19.cytoband.bed.gz.tbi
test-data/test-cache/gemini/data/hg19.dgv.bed.gz
test-data/test-cache/gemini/data/hg19.dgv.bed.gz.tbi
test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz
test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz.tbi
test-data/test-cache/gemini/data/hg19.gwas.bed.gz
test-data/test-cache/gemini/data/hg19.gwas.bed.gz.tbi
test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz
test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz.tbi
test-data/test-cache/gemini/data/hg19.rmsk.bed.gz
test-data/test-cache/gemini/data/hg19.rmsk.bed.gz.tbi
test-data/test-cache/gemini/data/hg19.segdup.bed.gz
test-data/test-cache/gemini/data/hg19.segdup.bed.gz.tbi
test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz
test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz.tbi
test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz
test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz.tbi
test-data/test-cache/gemini/data/hprd_interaction_edges.gz
test-data/test-cache/gemini/data/kegg_pathways_ensembl66
test-data/test-cache/gemini/data/kegg_pathways_ensembl67
test-data/test-cache/gemini/data/kegg_pathways_ensembl68
test-data/test-cache/gemini/data/kegg_pathways_ensembl69
test-data/test-cache/gemini/data/kegg_pathways_ensembl70
test-data/test-cache/gemini/data/kegg_pathways_ensembl71
test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz
test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz.tbi
test-data/test-cache/gemini/data/summary_gene_table_v75
test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz
test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz.tbi
test-data/util/README.rst
test-data/util/build-data/anno.bed
test-data/util/build-data/anno.bed.gz
test-data/util/build-data/anno.bed.gz.tbi
test-data/util/build-data/gemini_load_input.vcf
test-data/util/build-data/test.auto_dom.ped
test-data/util/build-data/test.auto_dom.vcf
test-data/util/build-data/test.auto_rec.ped
test-data/util/build-data/test.auto_rec.vcf
test-data/util/build-data/test.comp_het.ped
test-data/util/build-data/test.comp_het.vcf
test-data/util/build-data/test.de_novo.ped
test-data/util/build-data/test.de_novo.vcf
test-data/util/build-gemini-testdata.sh
test-data/util/shrink_simple_tab.py
test-data/util/shrink_tabix.py
tool-data/gemini_versioned_databases.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 3123ce7acd0e gemini_inheritance.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_inheritance.xml Fri Jan 11 17:50:55 2019 -0500
[
b'@@ -0,0 +1,486 @@\n+<tool id="gemini_inheritance" name="GEMINI inheritance pattern" version="@VERSION@">\n+    <description>based identification of candidate genes</description>\n+    <macros>\n+        <import>gemini_macros.xml</import>\n+        <xml name="name_X">\n+            <param name="X" type="text" value=""\n+            label="Alias to use for X chromosome"\n+            help="The tool expects the X chromosome to be named \'X\' or \'chrX\'. If the reference genome used for variant calling had a different name for it, you will have to specify it here." />\n+        </xml>\n+    </macros>\n+    <expand macro="requirements" />\n+    <expand macro="stdio" />\n+    <expand macro="version_command" />\n+    <command>\n+<![CDATA[\n+        gemini ${inheritance.pattern_type}\n+\n+            #for $cond in $inheritance.constraint:\n+                #set $filter = str($cond.filter).strip()\n+                #if str($filter):\n+                    #if str($inheritance.pattern_type) == "comp_hets" and $cond.overwrite_default_filter:\n+                        --gene-where \'$filter\'\n+                    #else:\n+                        --filter \'$filter\'\n+                    #end if\n+                #end if\n+            #end for\n+\n+            #if str($inheritance.pattern_type) in ("comp_hets", "mendel_errors"):\n+                ${inheritance.by_pattern_only}\n+            #end if\n+            \n+            ${inheritance.lenient}\n+            ${inheritance.allow_unaffected}\n+            \n+            #if str($inheritance.pattern_type).startswith(\'x_linked_\') and str($inheritance.X).strip():\n+                -X "${inheritance.X}"\n+            #end if\n+\n+            #if int($family_wise.min_kindreds) > 0:\n+                --min-kindreds ${family_wise.min_kindreds}\n+            #end if\n+\n+            #if str($family_wise.families).strip():\n+                #set $families = \',\'.join([f.strip() for f in $family_wise.families.split(\',\')])\n+                --families "$families"\n+            #end if            \n+\n+            #if int($family_wise.per_variant_selection.min_dp) > 0:\n+                -d ${family_wise.per_variant_selection.min_dp}\n+            #end if\n+\n+            #if int($family_wise.per_variant_selection.min_gq) > 0:\n+                --min-gq ${family_wise.per_variant_selection.min_gq}\n+            #end if\n+\n+            #if int($family_wise.per_variant_selection.max_pl) > -1:\n+                --gt-pl-max ${family_wise.per_variant_selection.max_pl}\n+            #end if\n+            \n+            #set $report = $oformat.report\n+            @COLUMN_SELECT@\n+\n+            "${ infile }"\n+            > "${ outfile }"\n+]]>\n+    </command>\n+    <inputs>\n+        <expand macro="infile" />\n+        <conditional name="inheritance">\n+            <param name="pattern_type" type="select"\n+            label="Your assumption about the inheritance pattern of the phenotype of interest">\n+                <option value="autosomal_recessive">Autosomal recessive</option>\n+                <option value="autosomal_dominant">Autosomal dominant</option>\n+                <option value="x_linked_recessive">X-linked recessive</option>\n+                <option value="x_linked_dominant">X-linked dominant</option>\n+                <option value="de_novo">Autosomal de-novo</option>\n+                <option value="x_linked_de_novo">X-linked de-novo</option>\n+                <option value="comp_hets">Compound heterozygous</option>\n+                <option value="mendel_errors">Violation of mendelian laws (LOH, plausible and implausible de-novo, uniparental disomy)</option>\n+            </param>\n+            <when value="comp_hets">\n+                <expand macro="insert_constraint">\n+                    <expand macro="overwritable_where_default" default_where="exonic and high-impact variants (SQL clause: is_exonic = 1 or impact_severity != \'LOW\')" />\n+                </expand>\n+                <param argument="--pattern-only" name="by_pattern_only" type="boolean" truevalue="--patt'..b'ations*\n+\n+Criteria:\n+\n+- all affected must be het\n+- [affected] all unaffected must be homref or homalt\n+- at least 1 affected kid must have unaffected parents\n+- [default] if an affected has affected parents, it\xe2\x80\x99s not de_novo\n+- [default] all affected kids must have unaffected (or no) parents\n+- [default] warning if none of the affected samples have parents.\n+\n+The last 3 items, prefixed with [default] can be turned off with ``--lenient``.\n+\n+If ``--allow-unaffected`` is specified, then the criterion prefixed [affected]\n+is not enforced.\n+\n+----\n+\n+*X-linked de-novo mutations*\n+\n+Criteria:\n+\n+- affected female child must be het\n+- affected male child must be hom_alt (or het)\n+- parents should be unaffected and hom_ref\n+\n+Note: Pseudo-autosomal regions are not accounted for by the tool.\n+\n+----\n+\n+*Compound heterozygosity*\n+\n+Unlike canonical recessive sites where the same recessive allele is inherited\n+from both parents at the *same* site in the gene, compound heterozygosity\n+occurs when the individual\xe2\x80\x99s phenotype is caused by two heterozygous recessive\n+alleles at *different* sites in a particular gene.\n+\n+To detect compound heterozygosity, the tool looks for two heterozygous variants\n+impacting the same gene at different loci. The complicating factor is that this\n+is a case of *recessive* inheritance and as such, we must also require that the\n+consequential alleles at each heterozygous site were inherited on different\n+chromosomes (one from each parent). Hence, where possible, the tool will phase\n+by transmission.\n+\n+Criteria (default):\n+\n+- All affected individuals must be heterozygous at both sites.\n+- No unaffected can be homozygous alterate at either site.\n+- Neither parent of an affected sample can be homozygous reference at both\n+  sites.\n+- If any unphased-unaffected is het at both sites, the site will be given lower\n+  priority.\n+- No phased-unaffected can be heterozygous at both sites.\n+\n+  a. ``--allow-unaffected`` keeps sites where a phased unaffected shares the\n+     het-pair\n+  b. unphased, unaffected that share the het pair are counted and reported for\n+     each candidate pair.\n+- Candidates where an affected from the same family does NOT share the same het\n+  pair are removed.\n+- Sites are automatically phased by transmission when parents are present in\n+  order to remove false positive candidates.\n+  \n+  If data from one or both parents are unavailable and the child\xe2\x80\x99s data was not\n+  phased prior to loading into GEMINI, all comp_het variant pairs will\n+  automatically be given at most priority == 2. If there\xe2\x80\x99s only a single parent\n+  and both the parent and the affected are HET at both sites, the candidate\n+  will have priority 3.\n+  \n+Criteria (``--pattern-only``):\n+\n+- Kid must be HET at both sites.\n+- Kid must have alts on different chromosomes.\n+- Neither parent can be HOM_ALT at either site.\n+- If either parent is phased at both sites and matches the kid, it\xe2\x80\x99s excluded.\n+- When the above criteria are met, and both parents and kid are phased or\n+  parents are HET at different sites, the priority is 1.\n+- If either parent is HET at both sites, priority is reduced.\n+- If both parents are not phased, the priority is 2.\n+- For every parent that\xe2\x80\x99s a het at both sites, the priority is incremented by 1.\n+- The priority in a family is the minimum found among all kids.\n+\n+----\n+\n+*Violation of Mendelian laws*\n+\n+The tool can be used to detect the following kinds of non-Mendelian patterns:\n+\n+- loss of heterozygosity (LOH) events\n+- de-novo mutations\n+- implausible de-novo mutations\n+- potential cases of uniparental disomy\n+\n+Criteria:\n+\n+- LOH: child and one parent are opposite homozygotes; other parent is HET\n+- plausible de novo: kid is het. parents are same homozygotes\n+- implausible de novo: kid is homozygote. parents are same homozygotes and opposite to kid.\n+- uniparental disomy: parents are opposite homozygotes; kid is homozygote\n+\n+]]>\n+    </help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r 3123ce7acd0e gemini_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_macros.xml Fri Jan 11 17:50:55 2019 -0500
[
b'@@ -0,0 +1,254 @@\n+<macros>\n+    <!-- gemini version to be used -->\n+    <token name="@VERSION@">0.20.1</token>\n+    <!-- minimal annotation files version required by this version of gemini -->\n+    <token name="@DB_VERSION@">200</token>\n+\n+    <xml name="requirements">\n+        <requirements>\n+            <requirement type="package" version="@VERSION@">gemini</requirement>\n+            <yield />\n+        </requirements>\n+    </xml>\n+\n+    <xml name="version_command">\n+        <version_command>gemini --version</version_command>\n+    </xml>\n+\n+    <xml name="stdio">\n+        <stdio>\n+            <exit_code range="1:" />\n+            <exit_code range=":-1" />\n+            <regex match="Error:" />\n+            <regex match="Exception:" />\n+            <yield />\n+        </stdio>\n+    </xml>\n+\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">10.1371/journal.pcbi.1003153</citation>\n+            <yield />\n+        </citations>\n+    </xml>\n+\n+    <xml name="annotation_dir">\n+        <param name="annotation_databases" type="select" label="Choose a gemini annotation source">\n+            <options from_data_table="gemini_versioned_databases">\n+                <filter type="sort_by" column="0" />\n+                <filter type="static_value" column="2" value="@DB_VERSION@" />\n+            </options>\n+        </param>\n+    </xml>\n+\n+    <xml name="infile">\n+        <param name="infile" type="data" format="gemini.sqlite" label="GEMINI database" help="Only files with version @VERSION@ are accepted." >\n+            <options options_filter_attribute="metadata.gemini_version" >\n+                <filter type="add_value" value="@VERSION@" />\n+            </options>\n+        </param>\n+    </xml>\n+\n+    <xml name="add_header_column">\n+        <param argument="--header" name="header" type="boolean" truevalue="--header" falsevalue="" checked="True" \n+        label="Add a header of column names to the output" />\n+    </xml>\n+\n+    <xml name="column_filter" token_help="" token_minimalset="variant_id, gene">\n+        <conditional name="report">\n+            <param name="report_selector" type="select"\n+            label="Set of columns to include in the variant report table"\n+            help="@HELP@">\n+                <option value="minimal">Minimal (report only a preconfigured minimal set of columns)</option>\n+                <option value="full">Full (report all columns defined in the GEMINI database variants table)</option>\n+                <option value="custom">Custom (report user-specified columns)</option>\n+            </param>\n+            <when value="full" />\n+            <when value="minimal">\n+                <param name="columns" type="hidden" value="@MINIMALSET@" />\n+                <param name="extra_cols" type="hidden" value="" />\n+            </when>\n+            <when value="custom">\n+                <param name="columns" type="select" display="checkboxes" multiple="true" optional="true"\n+                label="Choose columns to include in the report" help="(--columns)">\n+                    <option value="gene">gene</option>\n+                    <option value="chrom">chrom</option>\n+                    <option value="start">start</option>\n+                    <option value="end">end</option>\n+                    <option value="ref">ref</option>\n+                    <option value="alt">alt</option>\n+                    <option value="impact">impact</option>\n+                    <option value="impact_severity">impact_severity</option>\n+                    <option value="max_aaf_all">alternative allele frequency (max_aaf_all)</option>\n+                </param>\n+                <param name="extra_cols" type="text"\n+                label="Additional columns (comma-separated)"\n+                help="Column must be specified by the exact name they have in the GEMINI database, e.g., is_exonic or num_hom_alt, but, for genotype columns, GEMINI wildcard syntax is supported. The order of columns in the list is m'..b'              <option value="--in only all">Return a variant if is found in ALL samples passing the sample filter, and in NO sample NOT passing it. (only all)</option>\n+            </param>\n+            <expand macro="min_kindreds"\n+            label="Minimum number of families in which a variant must pass the sample filter" help=""/>\n+            <param argument="--family-wise" name="family_wise" type="boolean" truevalue="--family-wise" falsevalue="" checked="False"\n+            label="Apply the sample-filter on a family-wise basis" help="If a variant passes the sample filter in at least the minimum number of families specified above it is retained." />\n+        </repeat>\n+    </xml>\n+\n+    <xml name="region_filter">\n+        <repeat name="regions" title="Region Filter" default="0" min="0"\n+        help="Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported.">\n+            <param name="chrom" type="text" label="Chromosome">\n+                <validator type="expression" message="A chromosome identifier is required when specifying a region filter">value.strip()</validator>\n+            </param>\n+            <param name="start" type="text" label="Region Start">\n+                <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator>\n+            </param>\n+            <param name="stop" type="text" label="Region End">\n+                <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator>\n+            </param>\n+        </repeat>\n+    </xml>\n+\n+    <token name="@PROVIDE_ANNO_DATA@"><![CDATA[\n+        mkdir gemini &&\n+        ln -s \'${annotation_databases.fields.path}/gemini/data\' gemini/data &&\n+        export GEMINI_CONFIG=\'${annotation_databases.fields.path}\' &&\n+    ]]></token>\n+\n+    <token name="@MULTILN_SQL_EXPR_TO_CMDLN@">\n+        #set $sql_expr = str($multiline_sql_expr).strip()\n+        #if str($sql_expr):\n+            #set $sql_expr = $sql_expr.replace(\'\\r\\n\', \'\\n\')\n+            #set $sql_expr = $sql_expr.replace(\'\\r\', \'\\n\')\n+            #set $sql_expr = $sql_expr.replace(\'\\\\\\n\', \' \')\n+            $cmdln_param \'$sql_expr\'\n+        #end if\n+    </token>\n+\n+    <token name="@SET_COLS@">\n+        #if str($report.report_selector) == \'full\':\n+            #set cols = "*"\n+        #else:\n+            #if $report.columns and str($report.columns) != \'\':\n+                #set $cols = str($report.columns)\n+            #else\n+                #set $cols = \'\'\n+            #end if\n+            #if str($report.extra_cols).strip():\n+                #if $cols:\n+                    #set $cols = $cols + \', \' + str($report.extra_cols)\n+                #else:\n+                    #set $cols = str($report.extra_cols)\n+                #end if\n+            #end if\n+            #if not $cols:\n+                #set $cols = "variant_id, gene"\n+            #end if\n+        #end if\n+    </token>\n+\n+    <token name="@COLUMN_SELECT@">\n+        @SET_COLS@\n+        #if $cols != "*"\n+            --columns \'$cols\'\n+        #end if\n+    </token>\n+\n+    <token name="@PARSE_REGION_ELEMENTS@"><![CDATA[\n+        #set $region_elements = []\n+        #for $r in $regions:\n+            ## The actual chromosome name needs to be single-quoted\n+            ## in SQL, so we need to quote the single quotes like the\n+            ## sanitize_query macro would if the whole was a parameter.\n+            #set $r_elements = ["chrom = \'\\"\'\\"\'%s\'\\"\'\\"\'" % str($r.chrom).strip()]\n+            #if str($r.start).strip():\n+                #silent $r_elements.append("start >= %d" % int($r.start))\n+            #end if\n+            #if str($r.stop).strip():\n+                #silent $r_elements.append("end <= %d" % int($r.stop))\n+            #end if\n+            #silent $region_elements.append("(%s)" % " AND ".join($r_elements))\n+        #end for\n+    ]]>\n+    </token>\n+</macros>\n'
b
diff -r 000000000000 -r 3123ce7acd0e readme.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst Fri Jan 11 17:50:55 2019 -0500
[
@@ -0,0 +1,65 @@
+=========================
+Galaxy wrapper for GEMINI
+=========================
+
+
+GEMINI: a flexible framework for exploring genome variation
+
+GEMINI (GEnome MINIng) is designed to be a flexible framework for exploring genetic variation in the context of 
+the wealth of genome annotations available for the human genome. By placing genetic variants, sample genotypes, 
+and useful genome annotations into an integrated database framework, GEMINI provides a simple, flexible, yet very 
+powerful system for exploring genetic variation for for disease and population genetics.
+
+Using the GEMINI framework begins by loading a VCF file into a database. Each variant is automatically 
+annotated by comparing it to several genome annotations from source such as ENCODE tracks, UCSC tracks, 
+OMIM, dbSNP, KEGG, and HPRD. All of this information is stored in portable SQLite database that allows 
+one to explore and interpret both coding and non-coding variation using “off-the-shelf” tools or an 
+enhanced SQL engine.
+
+Please also see the original [manuscript](http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003153).
+
+
+============
+Installation
+============
+
+It is recommended to install this wrapper via the `Galaxy Tool Shed`.
+
+.. _`Galaxy Tool Shed`:  https://testtoolshed.g2.bx.psu.edu/view/iuc/gemini
+
+
+=======
+History
+=======
+- 0.9.1: Initial public release
+
+
+====================
+Detailed description
+====================
+
+View the original GEMINI documentation: http://gemini.readthedocs.org/en/latest/index.html
+
+
+===============================
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
b
diff -r 000000000000 -r 3123ce7acd0e repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,4 @@
+<?xml version="1.0" ?>
+<repositories description="This requires the GEMINI data manager definition to install all required annotation databases.">
+    <repository changeset_revision="f57426daa04d" name="data_manager_gemini_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"/>
+</repositories>
\ No newline at end of file
b
diff -r 000000000000 -r 3123ce7acd0e static/images/gemini_mendel_errors.png
b
Binary file static/images/gemini_mendel_errors.png has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/anno.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/anno.bed Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,3 @@
+chr3 187000000 187150000
+chr3 187150000 187300000
+chr3 187300000 187450000
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_actionable_mutations_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_actionable_mutations_result.tabular Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,1 @@
+tum_name chrom start end ref alt gene impact is_somatic in_cosmic_census dgidb_info
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_amend.ped
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_amend.ped Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,10 @@
+#family_id sample_id paternal_id maternal_id sex phenotype
+1 1_dad 0 0 -1 1
+1 1_mom 0 0 -1 1
+1 1_kid 1_dad 1_mom -1 2
+2 2_dad 0 0 -1 1
+2 2_mom 0 0 -1 1
+2 2_kid 2_dad 2_mom -1 2
+3 3_dad 0 0 -1 1
+3 3_mom 0 0 -1 1
+3 3_kid 3_dad 3_mom -1 2
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_amend.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_amend.vcf Fri Jan 11 17:50:55 2019 -0500
[
b'@@ -0,0 +1,127 @@\n+##fileformat=VCFv4.1\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">\n+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">\n+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">\n+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher\'s exact test to detect strand bias">\n+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">\n+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">\n+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">\n+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">\n+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">\n+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">\n+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">\n+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">\n+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name='..b'29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\n+chr10\t135336656\t.\tG\tA\t38.34\t.\tAC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|)\tGT:AD:DP:GQ:PL\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\n+chr10\t135369532\t.\tT\tC\t122.62\t.\tAC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551)\tGT:AD:DP:GQ:PL\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t1/1:0,24:24:66.14:729,66,0\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_amend_input.db
b
Binary file test-data/gemini_amend_input.db has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_annotate_result.db
b
Binary file test-data/gemini_annotate_result.db has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_auto_dom_input.db
b
Binary file test-data/gemini_auto_dom_input.db has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_auto_rec_input.db
b
Binary file test-data/gemini_auto_rec_input.db has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_burden_calpha_template.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_burden_calpha_template.tabular Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,4 @@
+gene T c Z p_value
+SYCE1 .+ .+ .+ .+
+WDR37 .+ .+ .+ .+
+ASAH2C .+ .+ .+ .+
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_burden_count_highimpact_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_burden_count_highimpact_result.tabular Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,2 @@
+gene 1_kid 3_kid
+WDR37 1 2
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_burden_count_nonsynonymous_result.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_burden_count_nonsynonymous_result.tabular Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,5 @@
+gene 1_dad 1_kid 1_mom 2_dad 2_kid 2_mom 3_dad 3_kid 3_mom
+SYCE1 0 1 0 0 1 0 0 1 0
+SPRN 0 1 0 0 1 0 1 1 1
+WDR37 0 1 0 0 0 0 0 2 0
+ASAH2C 2 3 2 1 3 1 1 2 1
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_comphets_input.db
b
Binary file test-data/gemini_comphets_input.db has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_de_novo_input.db
b
Binary file test-data/gemini_de_novo_input.db has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_is_somatic_result.db
b
Binary file test-data/gemini_is_somatic_result.db has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_load_input.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_load_input.vcf Fri Jan 11 17:50:55 2019 -0500
[
b'@@ -0,0 +1,70 @@\n+##fileformat=VCFv4.1\n+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">\n+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">\n+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">\n+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">\n+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">\n+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">\n+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">\n+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">\n+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">\n+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">\n+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">\n+##ALT=<ID=DEL,Description="Deletion">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">\n+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">\n+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">\n+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">\n+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">\n+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">\n+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">\n+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">\n+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">\n+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">\n+##reference=GRCh37\n+##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani"\n+##SnpEffCmd="SnpEff  GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz "\n+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: \'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )\' ">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n+1\t10583\trs58108140\tG\tA\t100.0\tPASS\tAVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER||'..b'=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54421\trs146477069\tA\tG\t100.0\tPASS\tERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54490\trs141149254\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54676\trs2462492\tC\tT\t100.0\tPASS\tLDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54753\trs143174675\tT\tG\t100.0\tPASS\tAA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55164\trs3091274\tC\tA\t100.0\tPASS\tAN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55249\t.\tC\tCTATGG\t443.0\tPASS\tAA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55299\trs10399749\tC\tT\t100.0\tPASS\tRSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55313\trs182462964\tA\tT\t100.0\tPASS\tERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55326\trs3107975\tT\tC\t100.0\tPASS\tAA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55330\trs185215913\tG\tA\t100.0\tPASS\tERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55367\trs190850374\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55388\trs182711216\tC\tT\t100.0\tPASS\tTHETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55394\trs2949420\tT\tA\t100.0\tPASS\tAC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55416\trs193242050\tG\tA\t100.0\tPASS\tAA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55427\trs183189405\tT\tC\t100.0\tPASS\tTHETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55816\trs187434873\tG\tA\t100.0\tPASS\tAN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55850\trs191890754\tC\tG\t100.0\tPASS\tAVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55852\trs184233019\tG\tC\t100.0\tPASS\tTHETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n'
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_load_result1.db
b
Binary file test-data/gemini_load_result1.db has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_load_result2.db
b
Binary file test-data/gemini_load_result2.db has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_versioned_databases.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_versioned_databases.loc Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,3 @@
+## GEMINI versioned databases
+#DownloadDate dbkey DBversion Description Path
+1999-01-01 hg19 200 GEMINI annotations (test snapshot) ${__HERE__}/test-cache
b
diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_windower_template.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_windower_template.tabular Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,142 @@
+chr1 0 50000 .
+chr1 50000000 50050000 .
+chr1 100000000 100050000 .
+chr1 150000000 150050000 .
+chr1 200000000 200050000 .
+chr10 0 50000 .
+chr10 50000000 50050000 .
+chr10 100000000 100050000 .
+chr11 0 50000 .
+chr11 50000000 50050000 .
+chr11 100000000 100050000 .
+chr11_gl000202_random 0 40103 .
+chr12 0 50000 .
+chr12 50000000 50050000 .
+chr12 100000000 100050000 .
+chr13 0 50000 .
+chr13 50000000 50050000 .
+chr13 100000000 100050000 .
+chr14 0 50000 .
+chr14 50000000 50050000 .
+chr14 100000000 100050000 .
+chr15 0 50000 .
+chr15 50000000 50050000 .
+chr15 100000000 100050000 .
+chr16 0 50000 .
+chr16 50000000 50050000 .
+chr17 0 50000 .
+chr17 50000000 50050000 .
+chr17_ctg5_hap1 0 50000 .
+chr17_gl000203_random 0 37498 .
+chr17_gl000204_random 0 50000 .
+chr17_gl000205_random 0 50000 .
+chr17_gl000206_random 0 41001 .
+chr18 0 50000 .
+chr18 50000000 50050000 .
+chr18_gl000207_random 0 4262 .
+chr19 0 50000 .
+chr19 50000000 50050000 .
+chr19_gl000208_random 0 50000 .
+chr19_gl000209_random 0 50000 .
+chr1_gl000191_random 0 50000 .
+chr1_gl000192_random 0 50000 .
+chr2 0 50000 .
+chr2 50000000 50050000 .
+chr2 100000000 100050000 .
+chr2 150000000 150050000 .
+chr2 200000000 200050000 .
+chr20 0 50000 .
+chr20 50000000 50050000 .
+chr21 0 50000 .
+chr21_gl000210_random 0 27682 .
+chr22 0 50000 .
+chr22 50000000 50050000 .
+chr3 0 50000 .
+chr3 50000000 50050000 .
+chr3 100000000 100050000 .
+chr3 150000000 150050000 .
+chr4 0 50000 .
+chr4 50000000 50050000 .
+chr4 100000000 100050000 .
+chr4 150000000 150050000 .
+chr4_ctg9_hap1 0 50000 .
+chr4_gl000193_random 0 50000 .
+chr4_gl000194_random 0 50000 .
+chr5 0 50000 .
+chr5 50000000 50050000 .
+chr5 100000000 100050000 .
+chr5 150000000 150050000 .
+chr6 0 50000 .
+chr6 50000000 50050000 .
+chr6 100000000 100050000 .
+chr6 150000000 150050000 .
+chr6_apd_hap1 0 50000 .
+chr6_cox_hap2 0 50000 .
+chr6_dbb_hap3 0 50000 .
+chr6_mann_hap4 0 50000 .
+chr6_mcf_hap5 0 50000 .
+chr6_qbl_hap6 0 50000 .
+chr6_ssto_hap7 0 50000 .
+chr7 0 50000 .
+chr7 50000000 50050000 .
+chr7 100000000 100050000 .
+chr7 150000000 150050000 .
+chr7_gl000195_random 0 50000 .
+chr8 0 50000 .
+chr8 50000000 50050000 .
+chr8 100000000 100050000 .
+chr8_gl000196_random 0 38914 .
+chr8_gl000197_random 0 37175 .
+chr9 0 50000 .
+chr9 50000000 50050000 .
+chr9 100000000 100050000 .
+chr9_gl000198_random 0 50000 .
+chr9_gl000199_random 0 50000 .
+chr9_gl000200_random 0 50000 .
+chr9_gl000201_random 0 36148 .
+chrM 0 16571 .
+chrUn_gl000211 0 50000 .
+chrUn_gl000212 0 50000 .
+chrUn_gl000213 0 50000 .
+chrUn_gl000214 0 50000 .
+chrUn_gl000215 0 50000 .
+chrUn_gl000216 0 50000 .
+chrUn_gl000217 0 50000 .
+chrUn_gl000218 0 50000 .
+chrUn_gl000219 0 50000 .
+chrUn_gl000220 0 50000 .
+chrUn_gl000221 0 50000 .
+chrUn_gl000222 0 50000 .
+chrUn_gl000223 0 50000 .
+chrUn_gl000224 0 50000 .
+chrUn_gl000225 0 50000 .
+chrUn_gl000226 0 15008 .
+chrUn_gl000227 0 50000 .
+chrUn_gl000228 0 50000 .
+chrUn_gl000229 0 19913 .
+chrUn_gl000230 0 43691 .
+chrUn_gl000231 0 27386 .
+chrUn_gl000232 0 40652 .
+chrUn_gl000233 0 45941 .
+chrUn_gl000234 0 40531 .
+chrUn_gl000235 0 34474 .
+chrUn_gl000236 0 41934 .
+chrUn_gl000237 0 45867 .
+chrUn_gl000238 0 39939 .
+chrUn_gl000239 0 33824 .
+chrUn_gl000240 0 41933 .
+chrUn_gl000241 0 42152 .
+chrUn_gl000242 0 43523 .
+chrUn_gl000243 0 43341 .
+chrUn_gl000244 0 39929 .
+chrUn_gl000245 0 36651 .
+chrUn_gl000246 0 38154 .
+chrUn_gl000247 0 36422 .
+chrUn_gl000248 0 39786 .
+chrUn_gl000249 0 38502 .
+chrX 0 50000 .
+chrX 50000000 50050000 .
+chrX 100000000 100050000 .
+chrX 150000000 150050000 .
+chrY 0 50000 .
+chrY 50000000 50050000 .
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini-config.yaml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini-config.yaml Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,15 @@
+annotation_dir: gemini/data
+versions:
+  ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz: 4
+  ESP6500SI.all.snps_indels.tidy.v2.vcf.gz: 2
+  ExAC.r0.3.sites.vep.tidy.vcf.gz: 4
+  GRCh37-gms-mappability.vcf.gz: 2
+  clinvar_20170130.tidy.vcf.gz: 5
+  cosmic-v68-GRCh37.tidy.vcf.gz: 3
+  dbsnp.b147.20160601.tidy.vcf.gz: 1
+  detailed_gene_table_v75: 2
+  geno2mp.variants.tidy.vcf.gz: 1
+  gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz: 2
+  hg19.rmsk.bed.gz: 2
+  summary_gene_table_v75: 2
+  whole_genome_SNVs.tsv.compressed.gz: 2
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz
b
Binary file test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz
b
Binary file test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz
b
Binary file test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz
b
Binary file test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/cancer_gene_census.20140120.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/cancer_gene_census.20140120.tsv Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,19 @@
+ARHH "RAS homolog gene family, member H (TTF)" 399 4 4p13  yes NHL L Dom T BCL6
+BCL5 B-cell CLL/lymphoma 5 603 17 17q22  yes CLL L Dom T MYC
+BCL6 B-cell CLL/lymphoma 6 604 3 3q27  yes "NHL, CLL" L Dom "T, Mis" "IG loci, ZNFN1A1, LCP1, PIM1, TFRC, CIITA, NACA, HSPCB, HSPCA, HIST1H4I, IL21R,  POU2AF1, ARHH, EIF4A2, SFRS3"
+BCOR BCL6 corepressor 54880 X Xp11.4 yes "retinoblastoma, AML, APL (translocation)" Rec "F, N, S, T" RARA yes oculo-facio-cardio-dental genetic
+CIITA "class II, major histocompatibility complex, transactivator" 4261 16 16p13 yes "PMBL, Hodgkin lymphoma" L Dom T "FLJ27352, CD274, CD273, RALGDS, RUNDC2A, C16orf75, BCL6"
+EIF4A2 "eukaryotic translation initiation factor 4A, isoform 2" 1974 3 3q27.3 yes NHL L Dom T BCL6
+HIST1H4I "histone 1, H4i (H4FM)" 8294 6 6p21.3  yes NHL L Dom T BCL6
+HSPCA "heat shock 90kDa protein 1, alpha" 3320 14 14q32.31 yes NHL L Dom T BCL6
+HSPCB "heat shock 90kDa protein 1, beta" 3326 6 6p12 yes NHL L Dom T BCL6
+IGH@ immunoglobulin heavy locus 3492 14 14q32.33 yes "MM, Burkitt lymphoma, NHL, CLL, B-ALL, MALT, MLCLS" L Dom T "MYC, FGFR3,PAX5, IRTA1, IRF4, CCND1, BCL9, BCL8, BCL6, BCL2, BCL3, BCL10, BCL11A. LHX4, DDX6, NFKB2, PAFAH1B2, PCSK7, CRLF2"
+IKZF1 IKAROS family zinc finger 1 10320 7 7p12.2 yes "ALL, DLBCL" L "Rec,Dom" "D,T" BCL6
+IL21R interleukin 21 receptor 50615 16 16p11  yes NHL L Dom T BCL6
+LCP1 lymphocyte cytosolic protein 1 (L-plastin) 3936 13  13q14.1-q14.3  yes NHL  L Dom T BCL6
+MYC v-myc myelocytomatosis viral oncogene homolog (avian) 4609 8 8q24.12-q24.13  yes "Burkitt lymphoma, amplified in other cancers, B-CLL" "L, E" Dom "A, T" "IGK@, BCL5, BCL7A , BTG1, TRA@, IGH@"
+NACA nascent-polypeptide-associated complex alpha polypeptide 4666 12 12q23-q24.1  yes NHL L Dom T BCL6
+PIM1 pim-1 oncogene 5292 6 6p21.2  yes NHL L Dom T BCL6
+POU2AF1 "POU domain, class 2, associating factor 1 (OBF1)" 5450 11 11q23.1  yes NHL L Dom T BCL6
+SFRS3 "splicing factor, arginine/serine-rich 3" 6428 6 6p21 yes follicular lymphoma L Dom T BCL6
+TFRC "transferrin receptor (p90, CD71)" 7037 3 3q29  yes NHL L Dom T BCL6
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz
b
Binary file test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/detailed_gene_table_v75
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/detailed_gene_table_v75 Fri Jan 11 17:50:55 2019 -0500
b
b'@@ -0,0 +1,114 @@\n+Chromosome\tGene_name\tIs_hgnc\tEnsembl_gene_id\tEnsembl_transcript_id\tBiotype\tTranscript_status\tCCDS_id\tHGNC_id\tCDS_length\tProtein_length\tTranscript_start\tTranscript_end\tstrand\tSynonyms\tRvis_pct\tentrez_gene_id\tmammalian_phenotype_id\n+chr3\tNone\t0\tENSG00000239093\tENST00000459452\tsnoRNA\tKNOWN\tNone\tNone\tNone\tNone\t187141103\t187141207\t1\tNone\tNone\tNone\tNone\n+chr3\tNone\t0\tENSG00000228952\tENST00000440726\tlincRNA\tKNOWN\tNone\tNone\tNone\tNone\t187166633\t187167238\t1\tNone\tNone\tNone\tNone\n+chr3\tNone\t0\tENSG00000223401\tENST00000450760\tlincRNA\tKNOWN\tNone\tNone\tNone\tNone\t187461474\t187463208\t1\tNone\tNone\tNone\tNone\n+chr3\tMASP\t0\tENSG00000127241\tENST00000337774\tprotein_coding\tKNOWN\tCCDS33907\tNone\t2100\t699\t186935942\t187009810\t-1\tPRSS5,MASP1,CRARF\t16.8141071\t5648\tNone\n+chr3\tPRSS5\t0\tENSG00000127241\tENST00000337774\tprotein_coding\tKNOWN\tCCDS33907\tNone\t2100\t699\t186935942\t187009810\t-1\tMASP1,CRARF,MASP\t16.8141071\t5648\tNone\n+chr3\tMASP1\t1\tENSG00000127241\tENST00000337774\tprotein_coding\tKNOWN\tCCDS33907\t6901\t2100\t699\t186935942\t187009810\t-1\tPRSS5,CRARF,MASP\t16.8141071\t5648\tNone\n+chr3\tCRARF\t0\tENSG00000127241\tENST00000337774\tprotein_coding\tKNOWN\tCCDS33907\tNone\t2100\t699\t186935942\t187009810\t-1\tPRSS5,MASP1,MASP\t16.8141071\t5648\tNone\n+chr3\tMASP\t0\tENSG00000127241\tENST00000296280\tprotein_coding\tKNOWN\tCCDS33908\tNone\t2187\t728\t186951870\t187009646\t-1\tPRSS5,MASP1,CRARF\t16.8141071\t5648\tNone\n+chr3\tPRSS5\t0\tENSG00000127241\tENST00000296280\tprotein_coding\tKNOWN\tCCDS33908\tNone\t2187\t728\t186951870\t187009646\t-1\tMASP1,CRARF,MASP\t16.8141071\t5648\tNone\n+chr3\tMASP1\t1\tENSG00000127241\tENST00000296280\tprotein_coding\tKNOWN\tCCDS33908\t6901\t2187\t728\t186951870\t187009646\t-1\tPRSS5,CRARF,MASP\t16.8141071\t5648\tNone\n+chr3\tCRARF\t0\tENSG00000127241\tENST00000296280\tprotein_coding\tKNOWN\tCCDS33908\tNone\t2187\t728\t186951870\t187009646\t-1\tPRSS5,MASP1,MASP\t16.8141071\t5648\tNone\n+chr3\tMASP\t0\tENSG00000127241\tENST00000392472\tprotein_coding\tPUTATIVE\tNone\tNone\t1848\t615\t186951872\t187009765\t-1\tPRSS5,MASP1,CRARF\t16.8141071\t5648\tNone\n+chr3\tPRSS5\t0\tENSG00000127241\tENST00000392472\tprotein_coding\tPUTATIVE\tNone\tNone\t1848\t615\t186951872\t187009765\t-1\tMASP1,CRARF,MASP\t16.8141071\t5648\tNone\n+chr3\tMASP1\t1\tENSG00000127241\tENST00000392472\tprotein_coding\tPUTATIVE\tNone\t6901\t1848\t615\t186951872\t187009765\t-1\tPRSS5,CRARF,MASP\t16.8141071\t5648\tNone\n+chr3\tCRARF\t0\tENSG00000127241\tENST00000392472\tprotein_coding\tPUTATIVE\tNone\tNone\t1848\t615\t186951872\t187009765\t-1\tPRSS5,MASP1,MASP\t16.8141071\t5648\tNone\n+chr3\tMASP\t0\tENSG00000127241\tENST00000495249\tprocessed_transcript\tPUTATIVE\tNone\tNone\tNone\tNone\t186953655\t187009542\t-1\tPRSS5,MASP1,CRARF\t16.8141071\t5648\tNone\n+chr3\tPRSS5\t0\tENSG00000127241\tENST00000495249\tprocessed_transcript\tPUTATIVE\tNone\tNone\tNone\tNone\t186953655\t187009542\t-1\tMASP1,CRARF,MASP\t16.8141071\t5648\tNone\n+chr3\tMASP1\t1\tENSG00000127241\tENST00000495249\tprocessed_transcript\tPUTATIVE\tNone\t6901\tNone\tNone\t186953655\t187009542\t-1\tPRSS5,CRARF,MASP\t16.8141071\t5648\tNone\n+chr3\tCRARF\t0\tENSG00000127241\tENST00000495249\tprocessed_transcript\tPUTATIVE\tNone\tNone\tNone\tNone\t186953655\t187009542\t-1\tPRSS5,MASP1,MASP\t16.8141071\t5648\tNone\n+chr3\tMASP\t0\tENSG00000127241\tENST00000169293\tprotein_coding\tKNOWN\tCCDS33909\tNone\t1143\t380\t186964149\t187009745\t-1\tPRSS5,MASP1,CRARF\t16.8141071\t5648\tNone\n+chr3\tPRSS5\t0\tENSG00000127241\tENST00000169293\tprotein_coding\tKNOWN\tCCDS33909\tNone\t1143\t380\t186964149\t187009745\t-1\tMASP1,CRARF,MASP\t16.8141071\t5648\tNone\n+chr3\tMASP1\t1\tENSG00000127241\tENST00000169293\tprotein_coding\tKNOWN\tCCDS33909\t6901\t1143\t380\t186964149\t187009745\t-1\tPRSS5,CRARF,MASP\t16.8141071\t5648\tNone\n+chr3\tCRARF\t0\tENSG00000127241\tENST00000169293\tprotein_coding\tKNOWN\tCCDS33909\tNone\t1143\t380\t186964149\t187009745\t-1\tPRSS5,MASP1,MASP\t16.8141071\t5648\tNone\n+chr3\tMASP\t0\tENSG00000127241\tENST00000392470\tprotein_coding\tPUTATIVE\tNone\tNone\t1065\t354\t186964947\t187009670\t-1\tPRSS5,MASP1,CRARF\t16.8141071\t5648\tNone\n+chr3\tPRSS5\t0\tENSG00000127241\tENST00000392470\tprotein_coding\tPUTATIVE\tNone\tNone\t1065\t354\t186964947\t187009670\t-1\tMASP1,CRARF,MASP\t16.8141071\t5648\tNone\n+chr3\tMASP1\t1\tENSG00000127241\tENST00'..b'otein_coding\tKNOWN\tNone\t1001\t312\t103\t187449568\t187455732\t-1\tLAZ3,ZNF51,ZBTB27,BCL5,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tZBTB27\t0\tENSG00000113916\tENST00000438077\tprotein_coding\tKNOWN\tNone\tNone\t312\t103\t187449568\t187455732\t-1\tLAZ3,ZNF51,BCL6,BCL5,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tZNF51\t0\tENSG00000113916\tENST00000470319\tretained_intron\tKNOWN\tNone\tNone\tNone\tNone\t187452233\t187463260\t-1\tLAZ3,BCL5,BCL6,ZBTB27,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tLAZ3\t0\tENSG00000113916\tENST00000470319\tretained_intron\tKNOWN\tNone\tNone\tNone\tNone\t187452233\t187463260\t-1\tBCL5,ZNF51,BCL6,ZBTB27,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tBCL5\t0\tENSG00000113916\tENST00000470319\tretained_intron\tKNOWN\tNone\tNone\tNone\tNone\t187452233\t187463260\t-1\tLAZ3,ZNF51,BCL6,ZBTB27,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tBCL6A\t0\tENSG00000113916\tENST00000470319\tretained_intron\tKNOWN\tNone\tNone\tNone\tNone\t187452233\t187463260\t-1\tLAZ3,ZNF51,BCL6,BCL5,ZBTB27\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tBCL6\t1\tENSG00000113916\tENST00000470319\tretained_intron\tKNOWN\tNone\t1001\tNone\tNone\t187452233\t187463260\t-1\tLAZ3,ZNF51,ZBTB27,BCL5,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tZBTB27\t0\tENSG00000113916\tENST00000470319\tretained_intron\tKNOWN\tNone\tNone\tNone\tNone\t187452233\t187463260\t-1\tLAZ3,ZNF51,BCL6,BCL5,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tZNF51\t0\tENSG00000113916\tENST00000496823\tprocessed_transcript\tPUTATIVE\tNone\tNone\tNone\tNone\t187453975\t187463247\t-1\tLAZ3,BCL5,BCL6,ZBTB27,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tLAZ3\t0\tENSG00000113916\tENST00000496823\tprocessed_transcript\tPUTATIVE\tNone\tNone\tNone\tNone\t187453975\t187463247\t-1\tBCL5,ZNF51,BCL6,ZBTB27,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tBCL5\t0\tENSG00000113916\tENST00000496823\tprocessed_transcript\tPUTATIVE\tNone\tNone\tNone\tNone\t187453975\t187463247\t-1\tLAZ3,ZNF51,BCL6,ZBTB27,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tBCL6A\t0\tENSG00000113916\tENST00000496823\tprocessed_transcript\tPUTATIVE\tNone\tNone\tNone\tNone\t187453975\t187463247\t-1\tLAZ3,ZNF51,BCL6,BCL5,ZBTB27\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tBCL6\t1\tENSG00000113916\tENST00000496823\tprocessed_transcript\tPUTATIVE\tNone\t1001\tNone\tNone\t187453975\t187463247\t-1\tLAZ3,ZNF51,ZBTB27,BCL5,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n+chr3\tZBTB27\t0\tENSG00000113916\tENST00000496823\tprocessed_transcript\tPUTATIVE\tNone\tNone\tNone\tNone\t187453975\t187463247\t-1\tLAZ3,ZNF51,BCL6,BCL5,BCL6A\t23.5727766\t604\tMP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387\n'
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz
b
Binary file test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz.tbi
b
Binary file test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz
b
Binary file test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz.tbi
b
Binary file test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.CpG.bed.gz
b
Binary file test-data/test-cache/gemini/data/hg19.CpG.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.CpG.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/hg19.CpG.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.cytoband.bed.gz
b
Binary file test-data/test-cache/gemini/data/hg19.cytoband.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.cytoband.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/hg19.cytoband.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.dgv.bed.gz
b
Binary file test-data/test-cache/gemini/data/hg19.dgv.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.dgv.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/hg19.dgv.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz
b
Binary file test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.gwas.bed.gz
b
Binary file test-data/test-cache/gemini/data/hg19.gwas.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.gwas.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/hg19.gwas.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz
b
Binary file test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.rmsk.bed.gz
b
Binary file test-data/test-cache/gemini/data/hg19.rmsk.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.rmsk.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/hg19.rmsk.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.segdup.bed.gz
b
Binary file test-data/test-cache/gemini/data/hg19.segdup.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.segdup.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/hg19.segdup.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz
b
Binary file test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz
b
Binary file test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hprd_interaction_edges.gz
b
Binary file test-data/test-cache/gemini/data/hprd_interaction_edges.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl66
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl66 Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,30 @@
+B8PSA7 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None
+C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None
+C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+F5H2J0 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None
+F8W876 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None
+F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion
+Q5HYM1 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None
+Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None
+Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None
+Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None
+Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000392472 None None
+Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000541811 None None
+none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl67
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl67 Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,30 @@
+B8PSA7 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None
+C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None
+C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+F5H2J0 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None
+F8W876 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None
+F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion
+Q5HYM1 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None
+Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None
+Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None
+Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None
+Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000392472 None None
+Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000541811 None None
+none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl68
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl68 Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,26 @@
+C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion
+Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None
+Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None
+none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl69
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl69 Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,26 @@
+C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion
+Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None
+Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None
+none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl70
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl70 Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,25 @@
+C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion
+Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None
+Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None
+none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl71
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl71 Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,26 @@
+C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None
+C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None
+C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None
+C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None
+F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades
+P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction
+P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion
+Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None
+Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None
+none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz
b
Binary file test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/summary_gene_table_v75
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/gemini/data/summary_gene_table_v75 Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,23 @@
+Chromosome Gene_name Is_hgnc Ensembl_gene_id HGNC_id Synonyms Rvis_pct Strand Transcript_min_start Transcript_max_end Mammalian_phenotype_id
+chr3 None 0 ENSG00000239093 None None None 1 187141103 187141207 None
+chr3 None 0 ENSG00000228952 None None None 1 187166633 187167238 None
+chr3 None 0 ENSG00000223401 None None None 1 187461474 187463208 None
+chr3 MASP 0 ENSG00000127241 None PRSS5,MASP1,CRARF 16.8141071 -1 186935942 187009810 None
+chr3 PRSS5 0 ENSG00000127241 None MASP1,CRARF,MASP 16.8141071 -1 186935942 187009810 None
+chr3 MASP1 1 ENSG00000127241 6901 PRSS5,CRARF,MASP 16.8141071 -1 186935942 187009810 None
+chr3 CRARF 0 ENSG00000127241 None PRSS5,MASP1,MASP 16.8141071 -1 186935942 187009810 None
+chr3 IFRG28 0 ENSG00000136514 None RTP4,Z3CXXC4 94.35008257 1 187086120 187089864 None
+chr3 RTP4 1 ENSG00000136514 23992 IFRG28,Z3CXXC4 94.35008257 1 187086120 187089864 None
+chr3 Z3CXXC4 0 ENSG00000136514 None IFRG28,RTP4 94.35008257 1 187086120 187089864 None
+chr3 SST 1 ENSG00000157005 11329 SMST 78.16112291 -1 187386694 187388187 MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378
+chr3 SMST 0 ENSG00000157005 None SST 78.16112291 -1 187386694 187388187 MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378
+chr3 Z3CXXC2 0 ENSG00000198471 None RTP2,MGC78665 69.20853975 -1 187416047 187420345 MP:0005389
+chr3 RTP2 1 ENSG00000198471 32486 Z3CXXC2,MGC78665 69.20853975 -1 187416047 187420345 MP:0005389
+chr3 MGC78665 0 ENSG00000198471 None Z3CXXC2,RTP2 69.20853975 -1 187416047 187420345 MP:0005389
+chr3 None 0 ENSG00000228804 None None None 1 187420101 187451637 None
+chr3 ZNF51 0 ENSG00000113916 None LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3 LAZ3 0 ENSG00000113916 None BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3 BCL5 0 ENSG00000113916 None LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3 BCL6A 0 ENSG00000113916 None LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3 BCL6 1 ENSG00000113916 1001 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
+chr3 ZBTB27 0 ENSG00000113916 None LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz
b
Binary file test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz.tbi
b
Binary file test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/README.rst Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,25 @@
+Prepare Gemini annotation files and test databases for tool tests
+=================================================================
+
+Each version of GEMINI is tied to a particular set of annotation files and
+database version.
+
+The ``build-gemini-testdata.sh`` script in this folder should be used to
+regenerate the annotation files and the test databases whenever the GEMINI
+version required by the tool wrappers gets upgraded.
+
+The script requires a working GEMINI installation at the targeted version and
+a folder with GEMINI's original annotation files, and can be executed with::
+
+  sh build-gemini-testdata.sh path/to/gemini/annotation/files
+  
+It will regenerate the annotation files inside test-data/test-cache/gemini/data
+and rebuild the *.db files in test-data.
+
+.. Note::
+
+   If the version of GEMINI that you are upgrading to uses a gemini-config.yaml
+   file that is different from the one found in test-data/test-cache you will
+   have to upgrade this file manually (make sure you leave the line
+   ``annotation_dir: gemini/data`` unchanged in the process).
+
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/anno.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/anno.bed Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,3 @@
+chr3 187000000 187150000
+chr3 187150000 187300000
+chr3 187300000 187450000
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/anno.bed.gz
b
Binary file test-data/util/build-data/anno.bed.gz has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/anno.bed.gz.tbi
b
Binary file test-data/util/build-data/anno.bed.gz.tbi has changed
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/gemini_load_input.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/gemini_load_input.vcf Fri Jan 11 17:50:55 2019 -0500
[
b'@@ -0,0 +1,70 @@\n+##fileformat=VCFv4.1\n+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">\n+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">\n+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">\n+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">\n+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">\n+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">\n+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">\n+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">\n+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">\n+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">\n+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">\n+##ALT=<ID=DEL,Description="Deletion">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">\n+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">\n+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">\n+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">\n+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">\n+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">\n+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">\n+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">\n+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">\n+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">\n+##reference=GRCh37\n+##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani"\n+##SnpEffCmd="SnpEff  GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz "\n+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: \'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )\' ">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n+1\t10583\trs58108140\tG\tA\t100.0\tPASS\tAVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER||'..b'=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54421\trs146477069\tA\tG\t100.0\tPASS\tERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54490\trs141149254\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54676\trs2462492\tC\tT\t100.0\tPASS\tLDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54753\trs143174675\tT\tG\t100.0\tPASS\tAA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55164\trs3091274\tC\tA\t100.0\tPASS\tAN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55249\t.\tC\tCTATGG\t443.0\tPASS\tAA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55299\trs10399749\tC\tT\t100.0\tPASS\tRSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55313\trs182462964\tA\tT\t100.0\tPASS\tERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55326\trs3107975\tT\tC\t100.0\tPASS\tAA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55330\trs185215913\tG\tA\t100.0\tPASS\tERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55367\trs190850374\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55388\trs182711216\tC\tT\t100.0\tPASS\tTHETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55394\trs2949420\tT\tA\t100.0\tPASS\tAC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55416\trs193242050\tG\tA\t100.0\tPASS\tAA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55427\trs183189405\tT\tC\t100.0\tPASS\tTHETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55816\trs187434873\tG\tA\t100.0\tPASS\tAN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55850\trs191890754\tC\tG\t100.0\tPASS\tAVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55852\trs184233019\tG\tC\t100.0\tPASS\tTHETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n'
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.auto_dom.ped
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.auto_dom.ped Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,9 @@
+1 1_dad 0 0 -1 1
+1 1_mom 0 0 -1 1
+1 1_kid 1_dad 1_mom -1 2
+2 2_dad 0 0 -1 1
+2 2_mom 0 0 -1 2
+2 2_kid 2_dad 2_mom -1 2
+3 3_dad 0 0 -1 2
+3 3_mom 0 0 -1 -9
+3 3_kid 3_dad 3_mom -1 2
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.auto_dom.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.auto_dom.vcf Fri Jan 11 17:50:55 2019 -0500
[
b'@@ -0,0 +1,127 @@\n+##fileformat=VCFv4.1\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">\n+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">\n+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">\n+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher\'s exact test to detect strand bias">\n+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">\n+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">\n+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">\n+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">\n+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">\n+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">\n+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">\n+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">\n+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name='..b'IGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566)\tGT:AD:DP:GQ:PL\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t1/1:0,24:24:66.14:729,66,0\t0/1:1,37:59:87.16:940,87,0\t0/1:0,29:49:78.20:899,78,0\t1/1:0,24:64:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\n+chr10\t48003992\t.\tC\tT\t1047.87\t.\tAC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056)\tGT:AD:DP:GQ:PL\t0/0:1,37:39:87.16:940,87,0\t0/0:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/0:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:97.16:940,87,0\t0/0:0,29:29:98.20:899,78,0\t0/1:0,24:24:96.14:729,66,0\n+chr10\t48004992\t.\tC\tT\t1047.87\t.\tAC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056)\tGT:AD:DP:GQ:PL\t0/0:1,37:39:87.16:940,87,0\t0/0:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/0:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/0:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\n+chr10\t135336656\t.\tG\tA\t38.34\t.\tAC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|)\tGT:AD:DP:GQ:PL\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/0:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\n'
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.auto_rec.ped
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.auto_rec.ped Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,10 @@
+#family_id sample_id paternal_id maternal_id sex phenotype
+1 1_dad 0 0 -1 1
+1 1_mom 0 0 -1 1
+1 1_kid 1_dad 1_mom -1 2
+2 2_dad 0 0 -1 1
+2 2_mom 0 0 -1 1
+2 2_kid 2_dad 2_mom -1 2
+3 3_dad 0 0 -1 1
+3 3_mom 0 0 -1 1
+3 3_kid 3_dad 3_mom -1 2
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.auto_rec.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.auto_rec.vcf Fri Jan 11 17:50:55 2019 -0500
[
b'@@ -0,0 +1,127 @@\n+##fileformat=VCFv4.1\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">\n+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">\n+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">\n+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher\'s exact test to detect strand bias">\n+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">\n+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">\n+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">\n+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">\n+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">\n+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">\n+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">\n+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">\n+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name='..b'29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\n+chr10\t135336656\t.\tG\tA\t38.34\t.\tAC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|)\tGT:AD:DP:GQ:PL\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\n+chr10\t135369532\t.\tT\tC\t122.62\t.\tAC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551)\tGT:AD:DP:GQ:PL\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t1/1:0,24:24:66.14:729,66,0\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.comp_het.ped
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.comp_het.ped Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,13 @@
+#Family_ID Individual_ID Paternal_ID Maternal_ID Sex Phenotype ethnicity
+1 child_1 dad_1 mom_1 1 2 caucasian
+2 child_2 dad_2 mom_2 1 2 caucasian
+2 dad_2 -9 -9 1 1 caucasian
+2 mom_2 -9 -9 2 1 caucasian
+1 dad_1 -9 -9 1 1 caucasian
+1 mom_1 -9 -9 2 1 caucasian
+3 child_3 dad_3 mom_3 1 2 caucasian
+3 dad_3 -9 -9 1 1 caucasian
+3 mom_3 -9 -9 2 1 caucasian
+4 child_4 dad_4 mom_4 1 2 caucasianNEuropean
+4 dad_4 -9 -9 1 1 caucasianNEuropean
+4 mom_4 -9 -9 2 1 caucasianNEuropean
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.comp_het.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.comp_het.vcf Fri Jan 11 17:50:55 2019 -0500
[
b'@@ -0,0 +1,120 @@\n+##fileformat=VCFv4.1\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">\n+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">\n+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">\n+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher\'s exact test to detect strand bias">\n+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">\n+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">\n+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">\n+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">\n+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">\n+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">\n+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">\n+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">\n+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/all.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=20 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sit'..b'e|NON_CODING|ENST00000423562|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1)\tGT:AD:DP:GQ:PL\t0/0:247,3:250:51.15:0,51,628\t0/0:250,0:250:60.18:0,60,746\t0/0:250,0:250:54.17:0,54,685\t0/0:249,1:250:54.15:0,54,644\t0/0:233,0:233:81.25:0,81,1014\t0/0:235,4:239:45.13:0,45,549\t0/0:249,0:250:45.13:0,45,514\t0/0:249,1:250:78.20:0,78,853\t0/0:247,0:247:90.24:0,90,1012\t0/1:227,17:244:0.06:0,0,670\t0/1:214,11:225:4.62:5,0,542\t0/1:236,13:249:71.41:71,0,448\n+1\t17730\t.\tC\tA\t102.87\t.\tAC=5;AF=0.208;AN=24;BaseQRankSum=-11.508;DP=2968;DS;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=1.0610;InbreedingCoeff=-0.2498;MQ=13.12;MQ0=24;MQRankSum=-4.433;QD=0.08;ReadPosRankSum=1.952;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|6|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|protein_coding|NON_CODING|ENST00000430492|7|1)\tGT:AD:DP:GQ:PL\t0/0:246,4:250:36.11:0,36,464\t0/0:244,5:249:57.17:0,57,727\t0/0:244,6:250:48.15:0,48,628\t0/0:247,3:250:51.13:0,51,621\t0/1:242,8:250:26.26:26,0,830\t0/0:246,4:250:48.14:0,48,601\t0/1:238,11:250:48.54:49,0,386\t0/0:244,5:249:42.12:0,42,491\t0/0:239,3:243:23.74:0,24,609\t0/1:221,13:234:13.85:14,0,482\t0/1:232,11:243:7:7,0,501\t0/1:238,12:250:73.18:73,0,368\n+1\t17746\t.\tA\tG\t607.7\t.\tAC=8;AF=0.333;AN=24;BaseQRankSum=13.191;DP=2993;DS;Dels=0.00;FS=0.000;HRun=1;HaplotypeScore=0.4155;InbreedingCoeff=-0.5280;MQ=11.87;MQ0=5;MQRankSum=-4.672;QD=0.30;ReadPosRankSum=3.574;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|3|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|3|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|3|1)\tGT:AD:DP:GQ:PL\t0/0:206,44:250:7.45:0,7,373\t0/0:221,29:250:5.12:0,5,468\t0/0:224,26:250:3.61:0,4,382\t0/0:216,34:250:9.49:0,9,405\t0/1:243,7:250:43.06:43,0,781\t0/1:195,55:250:99:113,0,232\t0/1:218,32:250:44.30:44,0,212\t0/1:219,31:250:0.03:0,0,330\t0/1:211,39:250:74.42:74,0,236\t0/1:209,34:243:68.40:68,0,298\t0/1:172,77:249:99:193,0,137\t0/1:219,30:250:99:137,0,197\n'
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.de_novo.ped
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.de_novo.ped Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,9 @@
+1 1_dad 0 0 -1 1
+1 1_mom 0 0 -1 1
+1 1_kid 1_dad 1_mom -1 2
+2 2_dad 0 0 -1 1
+2 2_mom 0 0 -1 1
+2 2_kid 2_dad 2_mom -1 2
+3 3_dad 0 0 -1 1
+3 3_mom 0 0 -1 1
+3 3_kid 3_dad 3_mom -1 2
\ No newline at end of file
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.de_novo.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-data/test.de_novo.vcf Fri Jan 11 17:50:55 2019 -0500
[
b'@@ -0,0 +1,127 @@\n+##fileformat=VCFv4.1\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">\n+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">\n+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">\n+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher\'s exact test to detect strand bias">\n+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">\n+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">\n+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">\n+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">\n+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">\n+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">\n+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">\n+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">\n+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name='..b':39:87.16:940,87,0\t0/0:0,29:29:78.20:899,78,0\t0/1:0,23:24:66.14:729,66,0\n+chr10\t135336656\t.\tG\tA\t38.34\t.\tAC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|)\tGT:AD:DP:GQ:PL\t0/0:1,37:39:87.16:940,87,0\t0/0:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/0:1,37:39:87.16:940,87,0\t0/0:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\t0/1:1,37:39:87.16:940,87,0\t0/1:0,29:29:78.20:899,78,0\t0/1:0,24:24:66.14:729,66,0\n+chr10\t135369532\t.\tT\tC\t122.62\t.\tAC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551)\tGT:AD:DP:GQ:PL\t0/0:1,37:39:87.16:940,87,0\t0/0:0,29:29:78.20:899,78,0\t0/1:0,22:24:66.14:729,66,0\t0/0:1,37:39:87.16:940,87,0\t0/0:0,29:29:78.20:899,78,0\t0/1:0,21:24:66.14:729,66,0\t0/0:1,37:50:87.16:940,87,0\t0/0:0,29:50:78.20:899,78,0\t0/1:0,24:50:66.14:729,66,0\n'
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-gemini-testdata.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/build-gemini-testdata.sh Fri Jan 11 17:50:55 2019 -0500
[
@@ -0,0 +1,72 @@
+cd "$(dirname "$0")"
+
+export GEMINI_CONFIG=../test-cache
+OUT_PTH=$GEMINI_CONFIG/gemini/data
+GENOMIC_REGION=3:187000000-187500000
+
+
+if [ -n "$1" ]; then
+
+IN_PTH="$1"
+# downsample all vcf and bed annotation files to the region of interest and reindex
+for vcf in `ls $IN_PTH/*.gz | grep -v hprd_interaction_edges.gz -`
+do
+    python ./shrink_tabix.py $vcf -r $GENOMIC_REGION -o $OUT_PTH/`basename $vcf`
+done
+
+# downsample gene_table files to the region of interest
+echo "$IN_PTH/summary_gene_table_v75 -> $OUT_PTH/summary_gene_table_v75"
+python ./shrink_simple_tab.py $IN_PTH/summary_gene_table_v75 -r chr$GENOMIC_REGION -c 0 8 9 -n 1 -o $OUT_PTH/summary_gene_table_v75
+
+echo "$IN_PTH/detailed_gene_table_v75 -> $OUT_PTH/detailed_gene_table_v75"
+python ./shrink_simple_tab.py $IN_PTH/detailed_gene_table_v75 -r chr$GENOMIC_REGION -c 0 11 12 -n 1 -o $OUT_PTH/detailed_gene_table_v75
+
+# filter kegg_pathway files to retain only records of the genes listed
+# in the downsampled summary_gene_table
+for kegg in `ls $IN_PTH/kegg_pathways_*`
+do
+    echo "$kegg -> $OUT_PTH/`basename $kegg`"
+    cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $kegg > $OUT_PTH/`basename $kegg`
+done
+
+# filter hprd_interaction file to retain only records of the genes listed
+# in the downsampled summary_gene_table
+echo "$IN_PTH/hprd_interaction_edges.gz -> $OUT_PTH/hprd_interaction_edges.gz"
+bgzip -dc $IN_PTH/hprd_interaction_edges.gz > $OUT_PTH/hprd_interaction_edges
+cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Ff - $OUT_PTH/hprd_interaction_edges | bgzip > $OUT_PTH/hprd_interaction_edges.gz
+rm $OUT_PTH/hprd_interaction_edges
+
+# filter cancer_gene_census file to retain only records of the genes listed
+# in the downsampled summary_gene_table;
+# TO DO: make the filter stricter by looking for matches only in the first
+# column of the cancer_gene_census file (but the file is relatively small anyway)
+echo "$IN_PTH/cancer_gene_census.20140120.tsv -> $OUT_PTH/cancer_gene_census.20140120.tsv"
+cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $IN_PTH/cancer_gene_census.20140120.tsv > $OUT_PTH/cancer_gene_census.20140120.tsv
+
+else
+    echo "no path to gemini annotation files provided - only building test databases"
+fi
+
+
+# now use gemini load to build the test databases
+echo "Building gemini test databases"
+echo "Test databases for gemini_load"
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff ../gemini_load_result1.db
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff --skip-gene-tables --no-load-genotypes ../gemini_load_result2.db
+echo "Test database for gemini_amend"
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -t snpEff ../gemini_amend_input.db
+echo "Test database for gemini_annotate"
+bgzip -c build-data anno.bed > build-data/anno.bed.gz
+tabix --force -p bed build-data/anno.bed.gz
+cp ../gemini_load_result1.db ../gemini_annotate_result.db
+gemini --annotation-dir $OUT_PTH annotate -f build-data/anno.bed.gz -c anno5 -a count ../gemini_annotate_result.db
+echo "Test database for gemini_set_somatic"
+cp ../gemini_load_result1.db ../gemini_is_somatic_result.db
+gemini set_somatic --min-somatic-score 5.65 ../gemini_is_somatic_result.db
+echo "Test database for gemini_de_novo and gemini_mendel_errors"
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.de_novo.vcf -p build-data/test.de_novo.ped -t snpEff ../gemini_de_novo_input.db
+echo "Test database for gemini_comp_hets"
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.comp_het.vcf -p build-data/test.comp_het.ped -t snpEff ../gemini_comphets_input.db
+echo "Test databases for gemini_autosomal"
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -p build-data/test.auto_rec.ped -t snpEff ../gemini_auto_rec_input.db
+gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_dom.vcf -p build-data/test.auto_dom.ped -t snpEff ../gemini_auto_dom_input.db
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/shrink_simple_tab.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/shrink_simple_tab.py Fri Jan 11 17:50:55 2019 -0500
[
@@ -0,0 +1,61 @@
+from __future__ import print_function
+
+import argparse
+from functools import partial
+
+
+def keep_line(line, pos_cols, region):
+    fields = line.rstrip().split(b'\t')
+    if fields[pos_cols[0]] == region[0]:  # same chromosome
+        if (
+            region[1] < int(fields[pos_cols[1]]) < region[2]
+        ) or (
+            region[1] < int(fields[pos_cols[2]]) < region[2]
+        ):
+            return True
+
+
+def main(infile, ofile, num_header_lines):
+    print(infile, '->', ofile)
+    with open(infile, 'rb') as i:
+        with open(ofile, 'wb') as o:
+            # copy header lines
+            for c in range(num_header_lines):
+                o.write(next(i))
+            for line in i:
+                if keep_line(line):
+                    o.write(line)
+
+
+if __name__ == '__main__':
+    p = argparse.ArgumentParser()
+    p.add_argument('infile')
+    p.add_argument(
+        '-r', '--region',
+        required=True,
+        help='the region of the input file to rewrite'
+    )
+    p.add_argument(
+        '-o', '--ofile',
+        required=True,
+        help="the name of the output file"
+    )
+    p.add_argument(
+        '-c', '--cols',
+        nargs=3, type=int, required=True,
+        help="the columns of the input file specifying chrom, start and stop, "
+             "respectively"
+    )
+    p.add_argument(
+        '-n', '--num-header-lines',
+        type=int, default=0,
+        help='the number of header lines present in the input; These will '
+             'always be copied over to the new file.'
+    )
+    args = vars(p.parse_args())
+
+    chrom, reg = args['region'].split(':')
+    region = [chrom.encode()] + [int(x) for x in reg.split('-')]
+    keep_line = partial(keep_line, pos_cols=args['cols'], region=region)
+
+    main(args['infile'], args['ofile'], args['num_header_lines'])
b
diff -r 000000000000 -r 3123ce7acd0e test-data/util/shrink_tabix.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/util/shrink_tabix.py Fri Jan 11 17:50:55 2019 -0500
[
@@ -0,0 +1,42 @@
+from __future__ import print_function
+
+import argparse
+
+import pysam
+
+
+def main(infile, ofile, region):
+    print(infile, '->', ofile)
+    with pysam.Tabixfile(infile) as i:
+        fformat = i.format.lower()
+        if fformat == 'sam':
+            fformat = 'bed'
+        if ofile[-3:] == '.gz':
+            ofile = ofile[:-3]
+        with open(ofile, 'w') as o:
+            try:
+                region_it = i.fetch(region=region)
+            except ValueError:
+                region_it = i.fetch(region='chr' + region)
+            for line in i.header:
+                o.write(line + '\n')
+            for line in region_it:
+                o.write(str(line) + '\n')
+    pysam.tabix_index(ofile, preset=fformat, force=True)
+
+
+if __name__ == '__main__':
+    p = argparse.ArgumentParser()
+    p.add_argument('infile')
+    p.add_argument(
+        '-r', '--region',
+        required=True,
+        help='the region of the input file to rewrite'
+    )
+    p.add_argument(
+        '-o', '--ofile',
+        required=True,
+        help="the name of the output file"
+    )
+    args = vars(p.parse_args())
+    main(**args)
b
diff -r 000000000000 -r 3123ce7acd0e tool-data/gemini_versioned_databases.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gemini_versioned_databases.loc.sample Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,3 @@
+## GEMINI versioned databases
+#DownloadDate dbkey DBversion Description Path
+#2018-07-08 hg19 181 GEMINI annotations (2018-07-08 snapshot) /path/to/data
b
diff -r 000000000000 -r 3123ce7acd0e tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,7 @@
+<tables>
+    <table name="gemini_versioned_databases" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, version, name, path</columns>
+        <file path="tool-data/gemini_versioned_databases.loc" />
+    </table>
+</tables>
+
b
diff -r 000000000000 -r 3123ce7acd0e tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Fri Jan 11 17:50:55 2019 -0500
b
@@ -0,0 +1,8 @@
+<tables>
+    <!-- Location of gemini annotation files for testing -->
+    <table name="gemini_versioned_databases" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, version, name, path</columns>
+        <file path="${__HERE__}/test-data/gemini_versioned_databases.loc" />
+    </table>
+</tables>
+