# HG changeset patch # User iuc # Date 1547247055 18000 # Node ID 3123ce7acd0eda8baf1dc32503420596db7ad08e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59 diff -r 000000000000 -r 3123ce7acd0e gemini_inheritance.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gemini_inheritance.xml Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,486 @@ + + based identification of candidate genes + + gemini_macros.xml + + + + + + + + + 0: + --min-kindreds ${family_wise.min_kindreds} + #end if + + #if str($family_wise.families).strip(): + #set $families = ','.join([f.strip() for f in $family_wise.families.split(',')]) + --families "$families" + #end if + + #if int($family_wise.per_variant_selection.min_dp) > 0: + -d ${family_wise.per_variant_selection.min_dp} + #end if + + #if int($family_wise.per_variant_selection.min_gq) > 0: + --min-gq ${family_wise.per_variant_selection.min_gq} + #end if + + #if int($family_wise.per_variant_selection.max_pl) > -1: + --gt-pl-max ${family_wise.per_variant_selection.max_pl} + #end if + + #set $report = $oformat.report + @COLUMN_SELECT@ + + "${ infile }" + > "${ outfile }" +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+ + + + + + +
+ + + + + +
+ + + + + +
+ + + + + + +
+ + + + +
+ + + + + +
+ + + + + +
+ + + + +
+ + + + + +
+ + + + + + + +
+ + + + +
+ + + + + +
+ + + + + +
+ + + + +
+ + + + + +
+
+ + + + +
diff -r 000000000000 -r 3123ce7acd0e gemini_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gemini_macros.xml Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,254 @@ + + + 0.20.1 + + 200 + + + + gemini + + + + + + gemini --version + + + + + + + + + + + + + + + 10.1371/journal.pcbi.1003153 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value.strip() + + + + + + + + + + value.strip() + + + + + + + + + + + + + + + + + value.strip() + + + not value or value.isdigit() + + + not value or value.isdigit() + + + + + + + + #set $sql_expr = str($multiline_sql_expr).strip() + #if str($sql_expr): + #set $sql_expr = $sql_expr.replace('\r\n', '\n') + #set $sql_expr = $sql_expr.replace('\r', '\n') + #set $sql_expr = $sql_expr.replace('\\\n', ' ') + $cmdln_param '$sql_expr' + #end if + + + + #if str($report.report_selector) == 'full': + #set cols = "*" + #else: + #if $report.columns and str($report.columns) != '': + #set $cols = str($report.columns) + #else + #set $cols = '' + #end if + #if str($report.extra_cols).strip(): + #if $cols: + #set $cols = $cols + ', ' + str($report.extra_cols) + #else: + #set $cols = str($report.extra_cols) + #end if + #end if + #if not $cols: + #set $cols = "variant_id, gene" + #end if + #end if + + + + @SET_COLS@ + #if $cols != "*" + --columns '$cols' + #end if + + + = %d" % int($r.start)) + #end if + #if str($r.stop).strip(): + #silent $r_elements.append("end <= %d" % int($r.stop)) + #end if + #silent $region_elements.append("(%s)" % " AND ".join($r_elements)) + #end for + ]]> + + diff -r 000000000000 -r 3123ce7acd0e readme.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,65 @@ +========================= +Galaxy wrapper for GEMINI +========================= + + +GEMINI: a flexible framework for exploring genome variation + +GEMINI (GEnome MINIng) is designed to be a flexible framework for exploring genetic variation in the context of +the wealth of genome annotations available for the human genome. By placing genetic variants, sample genotypes, +and useful genome annotations into an integrated database framework, GEMINI provides a simple, flexible, yet very +powerful system for exploring genetic variation for for disease and population genetics. + +Using the GEMINI framework begins by loading a VCF file into a database. Each variant is automatically +annotated by comparing it to several genome annotations from source such as ENCODE tracks, UCSC tracks, +OMIM, dbSNP, KEGG, and HPRD. All of this information is stored in portable SQLite database that allows +one to explore and interpret both coding and non-coding variation using “off-the-shelf” tools or an +enhanced SQL engine. + +Please also see the original [manuscript](http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003153). + + +============ +Installation +============ + +It is recommended to install this wrapper via the `Galaxy Tool Shed`. + +.. _`Galaxy Tool Shed`: https://testtoolshed.g2.bx.psu.edu/view/iuc/gemini + + +======= +History +======= +- 0.9.1: Initial public release + + +==================== +Detailed description +==================== + +View the original GEMINI documentation: http://gemini.readthedocs.org/en/latest/index.html + + +=============================== +Wrapper Licence (MIT/BSD style) +=============================== + +Permission to use, copy, modify, and distribute this software and its +documentation with or without modifications and for any purpose and +without fee is hereby granted, provided that any copyright notices +appear in all copies and that both those copyright notices and this +permission notice appear in supporting documentation, and that the +names of the contributors or copyright holders not be used in +advertising or publicity pertaining to distribution of the software +without specific prior permission. + +THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT +OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +OR PERFORMANCE OF THIS SOFTWARE. + diff -r 000000000000 -r 3123ce7acd0e repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff -r 000000000000 -r 3123ce7acd0e static/images/gemini_mendel_errors.png Binary file static/images/gemini_mendel_errors.png has changed diff -r 000000000000 -r 3123ce7acd0e test-data/anno.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/anno.bed Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,3 @@ +chr3 187000000 187150000 +chr3 187150000 187300000 +chr3 187300000 187450000 diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_actionable_mutations_result.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_actionable_mutations_result.tabular Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,1 @@ +tum_name chrom start end ref alt gene impact is_somatic in_cosmic_census dgidb_info diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_amend.ped --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_amend.ped Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,10 @@ +#family_id sample_id paternal_id maternal_id sex phenotype +1 1_dad 0 0 -1 1 +1 1_mom 0 0 -1 1 +1 1_kid 1_dad 1_mom -1 2 +2 2_dad 0 0 -1 1 +2 2_mom 0 0 -1 1 +2 2_kid 2_dad 2_mom -1 2 +3 3_dad 0 0 -1 1 +3 3_mom 0 0 -1 1 +3 3_kid 3_dad 3_mom -1 2 diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_amend.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_amend.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,127 @@ +##fileformat=VCFv4.1 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false" +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa +##INFO= +##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf GRCh37.66 test4.vep.vcf " +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1_dad 1_mom 1_kid 2_dad 2_mom 2_kid 3_dad 3_mom 3_kid +chr10 1142208 . T C 3404.3 . AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:59:87.16:940,87,0 0/1:0,29:49:78.20:899,78,0 1/1:0,24:64:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 48003992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 48004992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135336656 . G A 38.34 . AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135369532 . T C 122.62 . AC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 \ No newline at end of file diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_amend_input.db Binary file test-data/gemini_amend_input.db has changed diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_annotate_result.db Binary file test-data/gemini_annotate_result.db has changed diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_auto_dom_input.db Binary file test-data/gemini_auto_dom_input.db has changed diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_auto_rec_input.db Binary file test-data/gemini_auto_rec_input.db has changed diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_burden_calpha_template.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_burden_calpha_template.tabular Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,4 @@ +gene T c Z p_value +SYCE1 .+ .+ .+ .+ +WDR37 .+ .+ .+ .+ +ASAH2C .+ .+ .+ .+ diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_burden_count_highimpact_result.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_burden_count_highimpact_result.tabular Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,2 @@ +gene 1_kid 3_kid +WDR37 1 2 diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_burden_count_nonsynonymous_result.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_burden_count_nonsynonymous_result.tabular Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,5 @@ +gene 1_dad 1_kid 1_mom 2_dad 2_kid 2_mom 3_dad 3_kid 3_mom +SYCE1 0 1 0 0 1 0 0 1 0 +SPRN 0 1 0 0 1 0 1 1 1 +WDR37 0 1 0 0 0 0 0 2 0 +ASAH2C 2 3 2 1 3 1 1 2 1 diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_comphets_input.db Binary file test-data/gemini_comphets_input.db has changed diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_de_novo_input.db Binary file test-data/gemini_de_novo_input.db has changed diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_fusions_result.tabular diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_is_somatic_result.db Binary file test-data/gemini_is_somatic_result.db has changed diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_load_input.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_load_input.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,70 @@ +##fileformat=VCFv4.1 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##ALT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=GRCh37 +##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani" +##SnpEffCmd="SnpEff GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz " +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 10583 rs58108140 G A 100.0 PASS AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 10611 rs189107123 C G 100.0 PASS AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13302 rs180734498 C T 100.0 PASS THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13327 rs144762171 G C 100.0 PASS AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13957 . TC T 28.0 PASS AA=TC;AC=35;AN=2184;VT=INDEL;AVGPOST=0.8711;RSQ=0.2501;LDAF=0.0788;THETA=0.0100;ERATE=0.0065;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|) +1 13980 rs151276478 T C 100.0 PASS AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|) +1 30923 rs140337953 G T 100.0 PASS AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73;EFF=DOWNSTREAM(MODIFIER|||||FAM138A|processed_transcript|CODING|ENST00000461467|),DOWNSTREAM(MODIFIER|||||FAM138A|protein_coding|CODING|ENST00000417324|),DOWNSTREAM(MODIFIER|||||MIR1302-10|miRNA|NON_CODING|ENST00000408384|),INTRON(MODIFIER||||177|MIR1302-10|antisense|NON_CODING|ENST00000469289|),INTRON(MODIFIER||||236|MIR1302-10|antisense|NON_CODING|ENST00000473358|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|) +1 46402 . C CTGT 31.0 PASS AA=.;RSQ=0.0960;AN=2184;AC=8;VT=INDEL;AVGPOST=0.8325;THETA=0.0121;ERATE=0.0072;LDAF=0.0903;AF=0.0037;ASN_AF=0.0017;AFR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 47190 . G GA 192.0 PASS AA=G;AVGPOST=0.9041;AN=2184;AC=29;VT=INDEL;LDAF=0.0628;THETA=0.0153;RSQ=0.2883;ERATE=0.0041;AF=0.01;AMR_AF=0.0028;AFR_AF=0.06;EFF=INTERGENIC(MODIFIER|||||||||) +1 51476 rs187298206 T C 100.0 PASS ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 51479 rs116400033 T A 100.0 PASS RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22;EFF=INTERGENIC(MODIFIER|||||||||) +1 51914 rs190452223 T G 100.0 PASS ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||) +1 51935 rs181754315 C T 100.0 PASS THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0;EFF=INTERGENIC(MODIFIER|||||||||) +1 51954 rs185832753 G C 100.0 PASS LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 52058 rs62637813 G C 100.0 PASS AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05;EFF=INTERGENIC(MODIFIER|||||||||) +1 52144 rs190291950 T A 100.0 PASS THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 52185 . TTAA T 244.0 PASS AA=.;AN=2184;LDAF=0.0124;VT=INDEL;AC=10;THETA=0.0232;RSQ=0.4271;AVGPOST=0.9840;ERATE=0.0037;AF=0.0046;ASN_AF=0.0035;AMR_AF=0.02;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 52238 rs150021059 T G 100.0 PASS THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95;EFF=INTERGENIC(MODIFIER|||||||||) +1 53234 . CAT C 227.0 PASS AA=CAT;AVGPOST=0.9936;AN=2184;VT=INDEL;THETA=0.0119;AC=10;LDAF=0.0074;RSQ=0.6237;ERATE=0.0007;AF=0.0046;AMR_AF=0.0028;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 54353 rs140052487 C A 100.0 PASS THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||) +1 54421 rs146477069 A G 100.0 PASS ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 54490 rs141149254 G A 100.0 PASS ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||) +1 54676 rs2462492 C T 100.0 PASS LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||) +1 54753 rs143174675 T G 100.0 PASS AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||) +1 55164 rs3091274 C A 100.0 PASS AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||) +1 55249 . C CTATGG 443.0 PASS AA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55299 rs10399749 C T 100.0 PASS RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||) +1 55313 rs182462964 A T 100.0 PASS ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55326 rs3107975 T C 100.0 PASS AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55330 rs185215913 G A 100.0 PASS ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55367 rs190850374 G A 100.0 PASS ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55388 rs182711216 C T 100.0 PASS THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||) +1 55394 rs2949420 T A 100.0 PASS AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55416 rs193242050 G A 100.0 PASS AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55427 rs183189405 T C 100.0 PASS THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55816 rs187434873 G A 100.0 PASS AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55850 rs191890754 C G 100.0 PASS AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55852 rs184233019 G C 100.0 PASS THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||) diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_load_result1.db Binary file test-data/gemini_load_result1.db has changed diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_load_result2.db Binary file test-data/gemini_load_result2.db has changed diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_versioned_databases.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_versioned_databases.loc Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,3 @@ +## GEMINI versioned databases +#DownloadDate dbkey DBversion Description Path +1999-01-01 hg19 200 GEMINI annotations (test snapshot) ${__HERE__}/test-cache diff -r 000000000000 -r 3123ce7acd0e test-data/gemini_windower_template.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_windower_template.tabular Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,142 @@ +chr1 0 50000 . +chr1 50000000 50050000 . +chr1 100000000 100050000 . +chr1 150000000 150050000 . +chr1 200000000 200050000 . +chr10 0 50000 . +chr10 50000000 50050000 . +chr10 100000000 100050000 . +chr11 0 50000 . +chr11 50000000 50050000 . +chr11 100000000 100050000 . +chr11_gl000202_random 0 40103 . +chr12 0 50000 . +chr12 50000000 50050000 . +chr12 100000000 100050000 . +chr13 0 50000 . +chr13 50000000 50050000 . +chr13 100000000 100050000 . +chr14 0 50000 . +chr14 50000000 50050000 . +chr14 100000000 100050000 . +chr15 0 50000 . +chr15 50000000 50050000 . +chr15 100000000 100050000 . +chr16 0 50000 . +chr16 50000000 50050000 . +chr17 0 50000 . +chr17 50000000 50050000 . +chr17_ctg5_hap1 0 50000 . +chr17_gl000203_random 0 37498 . +chr17_gl000204_random 0 50000 . +chr17_gl000205_random 0 50000 . +chr17_gl000206_random 0 41001 . +chr18 0 50000 . +chr18 50000000 50050000 . +chr18_gl000207_random 0 4262 . +chr19 0 50000 . +chr19 50000000 50050000 . +chr19_gl000208_random 0 50000 . +chr19_gl000209_random 0 50000 . +chr1_gl000191_random 0 50000 . +chr1_gl000192_random 0 50000 . +chr2 0 50000 . +chr2 50000000 50050000 . +chr2 100000000 100050000 . +chr2 150000000 150050000 . +chr2 200000000 200050000 . +chr20 0 50000 . +chr20 50000000 50050000 . +chr21 0 50000 . +chr21_gl000210_random 0 27682 . +chr22 0 50000 . +chr22 50000000 50050000 . +chr3 0 50000 . +chr3 50000000 50050000 . +chr3 100000000 100050000 . +chr3 150000000 150050000 . +chr4 0 50000 . +chr4 50000000 50050000 . +chr4 100000000 100050000 . +chr4 150000000 150050000 . +chr4_ctg9_hap1 0 50000 . +chr4_gl000193_random 0 50000 . +chr4_gl000194_random 0 50000 . +chr5 0 50000 . +chr5 50000000 50050000 . +chr5 100000000 100050000 . +chr5 150000000 150050000 . +chr6 0 50000 . +chr6 50000000 50050000 . +chr6 100000000 100050000 . +chr6 150000000 150050000 . +chr6_apd_hap1 0 50000 . +chr6_cox_hap2 0 50000 . +chr6_dbb_hap3 0 50000 . +chr6_mann_hap4 0 50000 . +chr6_mcf_hap5 0 50000 . +chr6_qbl_hap6 0 50000 . +chr6_ssto_hap7 0 50000 . +chr7 0 50000 . +chr7 50000000 50050000 . +chr7 100000000 100050000 . +chr7 150000000 150050000 . +chr7_gl000195_random 0 50000 . +chr8 0 50000 . +chr8 50000000 50050000 . +chr8 100000000 100050000 . +chr8_gl000196_random 0 38914 . +chr8_gl000197_random 0 37175 . +chr9 0 50000 . +chr9 50000000 50050000 . +chr9 100000000 100050000 . +chr9_gl000198_random 0 50000 . +chr9_gl000199_random 0 50000 . +chr9_gl000200_random 0 50000 . +chr9_gl000201_random 0 36148 . +chrM 0 16571 . +chrUn_gl000211 0 50000 . +chrUn_gl000212 0 50000 . +chrUn_gl000213 0 50000 . +chrUn_gl000214 0 50000 . +chrUn_gl000215 0 50000 . +chrUn_gl000216 0 50000 . +chrUn_gl000217 0 50000 . +chrUn_gl000218 0 50000 . +chrUn_gl000219 0 50000 . +chrUn_gl000220 0 50000 . +chrUn_gl000221 0 50000 . +chrUn_gl000222 0 50000 . +chrUn_gl000223 0 50000 . +chrUn_gl000224 0 50000 . +chrUn_gl000225 0 50000 . +chrUn_gl000226 0 15008 . +chrUn_gl000227 0 50000 . +chrUn_gl000228 0 50000 . +chrUn_gl000229 0 19913 . +chrUn_gl000230 0 43691 . +chrUn_gl000231 0 27386 . +chrUn_gl000232 0 40652 . +chrUn_gl000233 0 45941 . +chrUn_gl000234 0 40531 . +chrUn_gl000235 0 34474 . +chrUn_gl000236 0 41934 . +chrUn_gl000237 0 45867 . +chrUn_gl000238 0 39939 . +chrUn_gl000239 0 33824 . +chrUn_gl000240 0 41933 . +chrUn_gl000241 0 42152 . +chrUn_gl000242 0 43523 . +chrUn_gl000243 0 43341 . +chrUn_gl000244 0 39929 . +chrUn_gl000245 0 36651 . +chrUn_gl000246 0 38154 . +chrUn_gl000247 0 36422 . +chrUn_gl000248 0 39786 . +chrUn_gl000249 0 38502 . +chrX 0 50000 . +chrX 50000000 50050000 . +chrX 100000000 100050000 . +chrX 150000000 150050000 . +chrY 0 50000 . +chrY 50000000 50050000 . diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini-config.yaml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini-config.yaml Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,15 @@ +annotation_dir: gemini/data +versions: + ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz: 4 + ESP6500SI.all.snps_indels.tidy.v2.vcf.gz: 2 + ExAC.r0.3.sites.vep.tidy.vcf.gz: 4 + GRCh37-gms-mappability.vcf.gz: 2 + clinvar_20170130.tidy.vcf.gz: 5 + cosmic-v68-GRCh37.tidy.vcf.gz: 3 + dbsnp.b147.20160601.tidy.vcf.gz: 1 + detailed_gene_table_v75: 2 + geno2mp.variants.tidy.vcf.gz: 1 + gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz: 2 + hg19.rmsk.bed.gz: 2 + summary_gene_table_v75: 2 + whole_genome_SNVs.tsv.compressed.gz: 2 diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz Binary file test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz.tbi Binary file test-data/test-cache/gemini/data/29way_pi_lods_elements_12mers.chr_specific.fdr_0.1_with_scores.txt.hg19.merged.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz Binary file test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/ESP6500SI.all.snps_indels.tidy.v2.vcf.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/ExAC.r0.3.sites.vep.tidy.vcf.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz Binary file test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz.tbi Binary file test-data/test-cache/gemini/data/GRC_patch_regions.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz Binary file test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/GRCh37-gms-mappability.vcf.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/cancer_gene_census.20140120.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/cancer_gene_census.20140120.tsv Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,19 @@ +ARHH "RAS homolog gene family, member H (TTF)" 399 4 4p13 yes NHL L Dom T BCL6 +BCL5 B-cell CLL/lymphoma 5 603 17 17q22 yes CLL L Dom T MYC +BCL6 B-cell CLL/lymphoma 6 604 3 3q27 yes "NHL, CLL" L Dom "T, Mis" "IG loci, ZNFN1A1, LCP1, PIM1, TFRC, CIITA, NACA, HSPCB, HSPCA, HIST1H4I, IL21R, POU2AF1, ARHH, EIF4A2, SFRS3" +BCOR BCL6 corepressor 54880 X Xp11.4 yes "retinoblastoma, AML, APL (translocation)" Rec "F, N, S, T" RARA yes oculo-facio-cardio-dental genetic +CIITA "class II, major histocompatibility complex, transactivator" 4261 16 16p13 yes "PMBL, Hodgkin lymphoma" L Dom T "FLJ27352, CD274, CD273, RALGDS, RUNDC2A, C16orf75, BCL6" +EIF4A2 "eukaryotic translation initiation factor 4A, isoform 2" 1974 3 3q27.3 yes NHL L Dom T BCL6 +HIST1H4I "histone 1, H4i (H4FM)" 8294 6 6p21.3 yes NHL L Dom T BCL6 +HSPCA "heat shock 90kDa protein 1, alpha" 3320 14 14q32.31 yes NHL L Dom T BCL6 +HSPCB "heat shock 90kDa protein 1, beta" 3326 6 6p12 yes NHL L Dom T BCL6 +IGH@ immunoglobulin heavy locus 3492 14 14q32.33 yes "MM, Burkitt lymphoma, NHL, CLL, B-ALL, MALT, MLCLS" L Dom T "MYC, FGFR3,PAX5, IRTA1, IRF4, CCND1, BCL9, BCL8, BCL6, BCL2, BCL3, BCL10, BCL11A. LHX4, DDX6, NFKB2, PAFAH1B2, PCSK7, CRLF2" +IKZF1 IKAROS family zinc finger 1 10320 7 7p12.2 yes "ALL, DLBCL" L "Rec,Dom" "D,T" BCL6 +IL21R interleukin 21 receptor 50615 16 16p11 yes NHL L Dom T BCL6 +LCP1 lymphocyte cytosolic protein 1 (L-plastin) 3936 13 13q14.1-q14.3 yes NHL L Dom T BCL6 +MYC v-myc myelocytomatosis viral oncogene homolog (avian) 4609 8 8q24.12-q24.13 yes "Burkitt lymphoma, amplified in other cancers, B-CLL" "L, E" Dom "A, T" "IGK@, BCL5, BCL7A , BTG1, TRA@, IGH@" +NACA nascent-polypeptide-associated complex alpha polypeptide 4666 12 12q23-q24.1 yes NHL L Dom T BCL6 +PIM1 pim-1 oncogene 5292 6 6p21.2 yes NHL L Dom T BCL6 +POU2AF1 "POU domain, class 2, associating factor 1 (OBF1)" 5450 11 11q23.1 yes NHL L Dom T BCL6 +SFRS3 "splicing factor, arginine/serine-rich 3" 6428 6 6p21 yes follicular lymphoma L Dom T BCL6 +TFRC "transferrin receptor (p90, CD71)" 7037 3 3q29 yes NHL L Dom T BCL6 diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/cosmic-v68-GRCh37.tidy.vcf.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz Binary file test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz.tbi Binary file test-data/test-cache/gemini/data/cse-hiseq-8_4-2013-02-20.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/detailed_gene_table_v75 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/detailed_gene_table_v75 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,114 @@ +Chromosome Gene_name Is_hgnc Ensembl_gene_id Ensembl_transcript_id Biotype Transcript_status CCDS_id HGNC_id CDS_length Protein_length Transcript_start Transcript_end strand Synonyms Rvis_pct entrez_gene_id mammalian_phenotype_id +chr3 None 0 ENSG00000239093 ENST00000459452 snoRNA KNOWN None None None None 187141103 187141207 1 None None None None +chr3 None 0 ENSG00000228952 ENST00000440726 lincRNA KNOWN None None None None 187166633 187167238 1 None None None None +chr3 None 0 ENSG00000223401 ENST00000450760 lincRNA KNOWN None None None None 187461474 187463208 1 None None None None +chr3 MASP 0 ENSG00000127241 ENST00000337774 protein_coding KNOWN CCDS33907 None 2100 699 186935942 187009810 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000337774 protein_coding KNOWN CCDS33907 None 2100 699 186935942 187009810 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000337774 protein_coding KNOWN CCDS33907 6901 2100 699 186935942 187009810 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000337774 protein_coding KNOWN CCDS33907 None 2100 699 186935942 187009810 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000296280 protein_coding KNOWN CCDS33908 None 2187 728 186951870 187009646 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000296280 protein_coding KNOWN CCDS33908 None 2187 728 186951870 187009646 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000296280 protein_coding KNOWN CCDS33908 6901 2187 728 186951870 187009646 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000296280 protein_coding KNOWN CCDS33908 None 2187 728 186951870 187009646 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000392472 protein_coding PUTATIVE None None 1848 615 186951872 187009765 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000392472 protein_coding PUTATIVE None None 1848 615 186951872 187009765 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000392472 protein_coding PUTATIVE None 6901 1848 615 186951872 187009765 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000392472 protein_coding PUTATIVE None None 1848 615 186951872 187009765 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000495249 processed_transcript PUTATIVE None None None None 186953655 187009542 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000495249 processed_transcript PUTATIVE None None None None 186953655 187009542 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000495249 processed_transcript PUTATIVE None 6901 None None 186953655 187009542 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000495249 processed_transcript PUTATIVE None None None None 186953655 187009542 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000169293 protein_coding KNOWN CCDS33909 None 1143 380 186964149 187009745 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000169293 protein_coding KNOWN CCDS33909 None 1143 380 186964149 187009745 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000169293 protein_coding KNOWN CCDS33909 6901 1143 380 186964149 187009745 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000169293 protein_coding KNOWN CCDS33909 None 1143 380 186964149 187009745 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000392470 protein_coding PUTATIVE None None 1065 354 186964947 187009670 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000392470 protein_coding PUTATIVE None None 1065 354 186964947 187009670 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000392470 protein_coding PUTATIVE None 6901 1065 354 186964947 187009670 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000392470 protein_coding PUTATIVE None None 1065 354 186964947 187009670 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000460839 retained_intron KNOWN None None None None 186974373 187003796 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000460839 retained_intron KNOWN None None None None 186974373 187003796 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000460839 retained_intron KNOWN None 6901 None None 186974373 187003796 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000460839 retained_intron KNOWN None None None None 186974373 187003796 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000392475 protein_coding NOVEL None None 614 203 186974603 187009768 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000392475 protein_coding NOVEL None None 614 203 186974603 187009768 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000392475 protein_coding NOVEL None 6901 614 203 186974603 187009768 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000392475 protein_coding NOVEL None None 614 203 186974603 187009768 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000439271 protein_coding PUTATIVE None None 355 117 186980469 187009746 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000439271 protein_coding PUTATIVE None None 355 117 186980469 187009746 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000439271 protein_coding PUTATIVE None 6901 355 117 186980469 187009746 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000439271 protein_coding PUTATIVE None None 355 117 186980469 187009746 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 MASP 0 ENSG00000127241 ENST00000425937 protein_coding PUTATIVE None None 166 54 186980502 187009485 -1 PRSS5,MASP1,CRARF 16.8141071 5648 None +chr3 PRSS5 0 ENSG00000127241 ENST00000425937 protein_coding PUTATIVE None None 166 54 186980502 187009485 -1 MASP1,CRARF,MASP 16.8141071 5648 None +chr3 MASP1 1 ENSG00000127241 ENST00000425937 protein_coding PUTATIVE None 6901 166 54 186980502 187009485 -1 PRSS5,CRARF,MASP 16.8141071 5648 None +chr3 CRARF 0 ENSG00000127241 ENST00000425937 protein_coding PUTATIVE None None 166 54 186980502 187009485 -1 PRSS5,MASP1,MASP 16.8141071 5648 None +chr3 IFRG28 0 ENSG00000136514 ENST00000259030 protein_coding KNOWN CCDS33910 None 741 246 187086120 187089864 1 RTP4,Z3CXXC4 94.35008257 64108 None +chr3 RTP4 1 ENSG00000136514 ENST00000259030 protein_coding KNOWN CCDS33910 23992 741 246 187086120 187089864 1 IFRG28,Z3CXXC4 94.35008257 64108 None +chr3 Z3CXXC4 0 ENSG00000136514 ENST00000259030 protein_coding KNOWN CCDS33910 None 741 246 187086120 187089864 1 IFRG28,RTP4 94.35008257 64108 None +chr3 SST 1 ENSG00000157005 ENST00000287641 protein_coding KNOWN CCDS3288 11329 351 116 187386694 187388187 -1 SMST 78.16112291 6750 MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378 +chr3 SMST 0 ENSG00000157005 ENST00000287641 protein_coding KNOWN CCDS3288 None 351 116 187386694 187388187 -1 SST 78.16112291 6750 MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378 +chr3 Z3CXXC2 0 ENSG00000198471 ENST00000358241 protein_coding KNOWN CCDS33911 None 678 225 187416047 187420345 -1 RTP2,MGC78665 69.20853975 344892 MP:0005389 +chr3 RTP2 1 ENSG00000198471 ENST00000358241 protein_coding KNOWN CCDS33911 32486 678 225 187416047 187420345 -1 Z3CXXC2,MGC78665 69.20853975 344892 MP:0005389 +chr3 MGC78665 0 ENSG00000198471 ENST00000358241 protein_coding KNOWN CCDS33911 None 678 225 187416047 187420345 -1 Z3CXXC2,RTP2 69.20853975 344892 MP:0005389 +chr3 None 0 ENSG00000228804 ENST00000449623 protein_coding PUTATIVE None None 390 129 187420101 187451637 1 None None None None +chr3 None 0 ENSG00000228804 ENST00000437407 protein_coding PUTATIVE None None 153 50 187420154 187450203 1 None None None None +chr3 ZNF51 0 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 None 2121 706 187439165 187463515 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 None 2121 706 187439165 187463515 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 None 2121 706 187439165 187463515 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 None 2121 706 187439165 187463515 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 1001 2121 706 187439165 187463515 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000406870 protein_coding KNOWN CCDS3289 None 2121 706 187439165 187463515 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None None 168 55 187439175 187454876 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None None 168 55 187439175 187454876 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None None 168 55 187439175 187454876 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None None 168 55 187439175 187454876 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None 1001 168 55 187439175 187454876 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000419510 nonsense_mediated_decay KNOWN None None 168 55 187439175 187454876 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 None 2121 706 187440186 187454357 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 None 2121 706 187440186 187454357 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 None 2121 706 187440186 187454357 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 None 2121 706 187440186 187454357 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 1001 2121 706 187440186 187454357 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000232014 protein_coding KNOWN CCDS3289 None 2121 706 187440186 187454357 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 None 1953 650 187440220 187452670 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 None 1953 650 187440220 187452670 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 None 1953 650 187440220 187452670 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 None 1953 650 187440220 187452670 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 1001 1953 650 187440220 187452670 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000450123 protein_coding NOVEL CCDS46975 None 1953 650 187440220 187452670 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000479110 retained_intron KNOWN None None None None 187442357 187443411 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000479110 retained_intron KNOWN None None None None 187442357 187443411 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000479110 retained_intron KNOWN None None None None 187442357 187443411 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000479110 retained_intron KNOWN None None None None 187442357 187443411 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000479110 retained_intron KNOWN None 1001 None None 187442357 187443411 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000479110 retained_intron KNOWN None None None None 187442357 187443411 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000430339 protein_coding KNOWN None None 365 120 187449515 187452735 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000430339 protein_coding KNOWN None None 365 120 187449515 187452735 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000430339 protein_coding KNOWN None None 365 120 187449515 187452735 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000430339 protein_coding KNOWN None None 365 120 187449515 187452735 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000430339 protein_coding KNOWN None 1001 365 120 187449515 187452735 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000430339 protein_coding KNOWN None None 365 120 187449515 187452735 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None None None None 187449553 187463225 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None None None None 187449553 187463225 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None None None None 187449553 187463225 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None None None None 187449553 187463225 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None 1001 None None 187449553 187463225 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000480458 processed_transcript KNOWN None None None None 187449553 187463225 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000438077 protein_coding KNOWN None None 312 103 187449568 187455732 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000438077 protein_coding KNOWN None None 312 103 187449568 187455732 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000438077 protein_coding KNOWN None None 312 103 187449568 187455732 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000438077 protein_coding KNOWN None None 312 103 187449568 187455732 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000438077 protein_coding KNOWN None 1001 312 103 187449568 187455732 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000438077 protein_coding KNOWN None None 312 103 187449568 187455732 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000470319 retained_intron KNOWN None None None None 187452233 187463260 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000470319 retained_intron KNOWN None None None None 187452233 187463260 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000470319 retained_intron KNOWN None None None None 187452233 187463260 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000470319 retained_intron KNOWN None None None None 187452233 187463260 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000470319 retained_intron KNOWN None 1001 None None 187452233 187463260 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000470319 retained_intron KNOWN None None None None 187452233 187463260 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZNF51 0 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None None None None 187453975 187463247 -1 LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None None None None 187453975 187463247 -1 BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None None None None 187453975 187463247 -1 LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None None None None 187453975 187463247 -1 LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None 1001 None None 187453975 187463247 -1 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 ENST00000496823 processed_transcript PUTATIVE None None None None 187453975 187463247 -1 LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 604 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz Binary file test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz.tbi Binary file test-data/test-cache/gemini/data/encode.6celltypes.consensus.bedg.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz Binary file test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz.tbi Binary file test-data/test-cache/gemini/data/genetic_map_HapMapII_GRCh37.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/geno2mp.variants.tidy.vcf.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.CpG.bed.gz Binary file test-data/test-cache/gemini/data/hg19.CpG.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.CpG.bed.gz.tbi Binary file test-data/test-cache/gemini/data/hg19.CpG.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.cytoband.bed.gz Binary file test-data/test-cache/gemini/data/hg19.cytoband.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.cytoband.bed.gz.tbi Binary file test-data/test-cache/gemini/data/hg19.cytoband.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.dgv.bed.gz Binary file test-data/test-cache/gemini/data/hg19.dgv.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.dgv.bed.gz.tbi Binary file test-data/test-cache/gemini/data/hg19.dgv.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz Binary file test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz.tbi Binary file test-data/test-cache/gemini/data/hg19.gerp.elements.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.gwas.bed.gz Binary file test-data/test-cache/gemini/data/hg19.gwas.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.gwas.bed.gz.tbi Binary file test-data/test-cache/gemini/data/hg19.gwas.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz Binary file test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz.tbi Binary file test-data/test-cache/gemini/data/hg19.pfam.ucscgenes.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.rmsk.bed.gz Binary file test-data/test-cache/gemini/data/hg19.rmsk.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.rmsk.bed.gz.tbi Binary file test-data/test-cache/gemini/data/hg19.rmsk.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.segdup.bed.gz Binary file test-data/test-cache/gemini/data/hg19.segdup.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.segdup.bed.gz.tbi Binary file test-data/test-cache/gemini/data/hg19.segdup.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz Binary file test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz.tbi Binary file test-data/test-cache/gemini/data/hg19.vista.enhancers.20131108.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz Binary file test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz.tbi Binary file test-data/test-cache/gemini/data/hg19_fitcons_fc-i6-0_V1-01.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/hprd_interaction_edges.gz Binary file test-data/test-cache/gemini/data/hprd_interaction_edges.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl66 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl66 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,30 @@ +B8PSA7 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F5H2J0 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5HYM1 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000392472 None None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000541811 None None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl67 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl67 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,30 @@ +B8PSA7 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000450123 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F5H2J0 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000169293 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5HYM1 MASP1 MASP1 ENSG00000127241 ENST00000541896 None None +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000296280 None None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000392472 None None +Q9NSY8 MASP1 MASP1 ENSG00000127241 ENST00000541811 None None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl68 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl68 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,26 @@ +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl69 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl69 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,26 @@ +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl70 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl70 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,25 @@ +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/kegg_pathways_ensembl71 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/kegg_pathways_ensembl71 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,26 @@ +C9J1C7 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JCS5 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000430339 None None +C9JL16 BCL6 BCL6 ENSG00000113916 ENST00000438077 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000425937 None None +C9JLU5 MASP1 MASP1 ENSG00000127241 ENST00000439271 None None +C9JMA2 MASP1 MASP1 ENSG00000127241 ENST00000392475 None None +F8W876 MASP1 MASP1 ENSG00000127241 ENST00000392470 None None +P41182 BCL6 BCL6 ENSG00000113916 ENST00000232014 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000406870 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P41182 BCL6 BCL6 ENSG00000113916 ENST00000450123 hsa:604 path:hsa05202;Transcriptional_misregulation_in_cancer +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000169293 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000296280 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000337774 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa04610;Complement_and_coagulation_cascades +P48740 MASP1 MASP1 ENSG00000127241 ENST00000392472 hsa:5648 path:hsa05150;Staphylococcus_aureus_infection +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04080;Neuroactive_ligand_receptor_interaction +P61278 SST SST ENSG00000157005 ENST00000287641 hsa:6750 path:hsa04971;Gastric_acid_secretion +Q5QGT7 RTP2 RTP2 ENSG00000198471 ENST00000358241 hsa:344892 None +Q96DX8 RTP4 RTP4 ENSG00000136514 ENST00000259030 hsa:64108 None +none BCL6 BCL6 ENSG00000113916 ENST00000419510 None None diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz Binary file test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz.tbi Binary file test-data/test-cache/gemini/data/stam.125cells.dnaseI.hg19.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/summary_gene_table_v75 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/gemini/data/summary_gene_table_v75 Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,23 @@ +Chromosome Gene_name Is_hgnc Ensembl_gene_id HGNC_id Synonyms Rvis_pct Strand Transcript_min_start Transcript_max_end Mammalian_phenotype_id +chr3 None 0 ENSG00000239093 None None None 1 187141103 187141207 None +chr3 None 0 ENSG00000228952 None None None 1 187166633 187167238 None +chr3 None 0 ENSG00000223401 None None None 1 187461474 187463208 None +chr3 MASP 0 ENSG00000127241 None PRSS5,MASP1,CRARF 16.8141071 -1 186935942 187009810 None +chr3 PRSS5 0 ENSG00000127241 None MASP1,CRARF,MASP 16.8141071 -1 186935942 187009810 None +chr3 MASP1 1 ENSG00000127241 6901 PRSS5,CRARF,MASP 16.8141071 -1 186935942 187009810 None +chr3 CRARF 0 ENSG00000127241 None PRSS5,MASP1,MASP 16.8141071 -1 186935942 187009810 None +chr3 IFRG28 0 ENSG00000136514 None RTP4,Z3CXXC4 94.35008257 1 187086120 187089864 None +chr3 RTP4 1 ENSG00000136514 23992 IFRG28,Z3CXXC4 94.35008257 1 187086120 187089864 None +chr3 Z3CXXC4 0 ENSG00000136514 None IFRG28,RTP4 94.35008257 1 187086120 187089864 None +chr3 SST 1 ENSG00000157005 11329 SMST 78.16112291 -1 187386694 187388187 MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378 +chr3 SMST 0 ENSG00000157005 None SST 78.16112291 -1 187386694 187388187 MP:0002873,MP:0005386,MP:0005376,MP:0005381,MP:0003631,MP:0005378 +chr3 Z3CXXC2 0 ENSG00000198471 None RTP2,MGC78665 69.20853975 -1 187416047 187420345 MP:0005389 +chr3 RTP2 1 ENSG00000198471 32486 Z3CXXC2,MGC78665 69.20853975 -1 187416047 187420345 MP:0005389 +chr3 MGC78665 0 ENSG00000198471 None Z3CXXC2,RTP2 69.20853975 -1 187416047 187420345 MP:0005389 +chr3 None 0 ENSG00000228804 None None None 1 187420101 187451637 None +chr3 ZNF51 0 ENSG00000113916 None LAZ3,BCL5,BCL6,ZBTB27,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 LAZ3 0 ENSG00000113916 None BCL5,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL5 0 ENSG00000113916 None LAZ3,ZNF51,BCL6,ZBTB27,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6A 0 ENSG00000113916 None LAZ3,ZNF51,BCL6,BCL5,ZBTB27 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 BCL6 1 ENSG00000113916 1001 LAZ3,ZNF51,ZBTB27,BCL5,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 +chr3 ZBTB27 0 ENSG00000113916 None LAZ3,ZNF51,BCL6,BCL5,BCL6A 23.5727766 -1 187439165 187463515 MP:0005369,MP:0005384,MP:0005376,MP:0005388,MP:0005370,MP:0010768,MP:0002873,MP:0005389,MP:0005378,MP:0005397,MP:0005385,MP:0005387 diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz Binary file test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz.tbi Binary file test-data/test-cache/gemini/data/wgEncodeRegTfbsClusteredV2.cell_count.20130213.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/util/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/README.rst Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,25 @@ +Prepare Gemini annotation files and test databases for tool tests +================================================================= + +Each version of GEMINI is tied to a particular set of annotation files and +database version. + +The ``build-gemini-testdata.sh`` script in this folder should be used to +regenerate the annotation files and the test databases whenever the GEMINI +version required by the tool wrappers gets upgraded. + +The script requires a working GEMINI installation at the targeted version and +a folder with GEMINI's original annotation files, and can be executed with:: + + sh build-gemini-testdata.sh path/to/gemini/annotation/files + +It will regenerate the annotation files inside test-data/test-cache/gemini/data +and rebuild the *.db files in test-data. + +.. Note:: + + If the version of GEMINI that you are upgrading to uses a gemini-config.yaml + file that is different from the one found in test-data/test-cache you will + have to upgrade this file manually (make sure you leave the line + ``annotation_dir: gemini/data`` unchanged in the process). + diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/anno.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/anno.bed Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,3 @@ +chr3 187000000 187150000 +chr3 187150000 187300000 +chr3 187300000 187450000 diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/anno.bed.gz Binary file test-data/util/build-data/anno.bed.gz has changed diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/anno.bed.gz.tbi Binary file test-data/util/build-data/anno.bed.gz.tbi has changed diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/gemini_load_input.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/gemini_load_input.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,70 @@ +##fileformat=VCFv4.1 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##ALT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=GRCh37 +##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani" +##SnpEffCmd="SnpEff GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz " +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 10583 rs58108140 G A 100.0 PASS AVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 10611 rs189107123 C G 100.0 PASS AN=2184;THETA=0.0077;VT=SNP;AA=.;AC=41;ERATE=0.0048;SNPSOURCE=LOWCOV;AVGPOST=0.9330;LDAF=0.0479;RSQ=0.3475;AF=0.02;ASN_AF=0.01;AMR_AF=0.03;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13302 rs180734498 C T 100.0 PASS THETA=0.0048;AN=2184;AC=249;VT=SNP;AA=.;RSQ=0.6281;LDAF=0.1573;SNPSOURCE=LOWCOV;AVGPOST=0.8895;ERATE=0.0058;AF=0.11;ASN_AF=0.02;AMR_AF=0.08;AFR_AF=0.21;EUR_AF=0.14;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13327 rs144762171 G C 100.0 PASS AVGPOST=0.9698;AN=2184;VT=SNP;AA=.;RSQ=0.6482;AC=59;SNPSOURCE=LOWCOV;ERATE=0.0012;LDAF=0.0359;THETA=0.0204;AF=0.03;ASN_AF=0.02;AMR_AF=0.03;AFR_AF=0.02;EUR_AF=0.04;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||209|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),INTRON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|) +1 13957 . TC T 28.0 PASS AA=TC;AC=35;AN=2184;VT=INDEL;AVGPOST=0.8711;RSQ=0.2501;LDAF=0.0788;THETA=0.0100;ERATE=0.0065;AF=0.02;ASN_AF=0.01;AMR_AF=0.02;AFR_AF=0.02;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|) +1 13980 rs151276478 T C 100.0 PASS AN=2184;AC=45;ERATE=0.0034;THETA=0.0139;RSQ=0.3603;LDAF=0.0525;VT=SNP;AA=.;AVGPOST=0.9221;SNPSOURCE=LOWCOV;AF=0.02;ASN_AF=0.02;AMR_AF=0.02;AFR_AF=0.01;EUR_AF=0.02;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),EXON(MODIFIER||||493|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655|),EXON(MODIFIER||||550|DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242|),EXON(MODIFIER||||551|DDX11L1|processed_transcript|NON_CODING|ENST00000456328|) +1 30923 rs140337953 G T 100.0 PASS AC=1584;AA=T;AN=2184;RSQ=0.5481;VT=SNP;THETA=0.0162;SNPSOURCE=LOWCOV;ERATE=0.0183;LDAF=0.6576;AVGPOST=0.7335;AF=0.73;ASN_AF=0.89;AMR_AF=0.80;AFR_AF=0.48;EUR_AF=0.73;EFF=DOWNSTREAM(MODIFIER|||||FAM138A|processed_transcript|CODING|ENST00000461467|),DOWNSTREAM(MODIFIER|||||FAM138A|protein_coding|CODING|ENST00000417324|),DOWNSTREAM(MODIFIER|||||MIR1302-10|miRNA|NON_CODING|ENST00000408384|),INTRON(MODIFIER||||177|MIR1302-10|antisense|NON_CODING|ENST00000469289|),INTRON(MODIFIER||||236|MIR1302-10|antisense|NON_CODING|ENST00000473358|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),UPSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|) +1 46402 . C CTGT 31.0 PASS AA=.;RSQ=0.0960;AN=2184;AC=8;VT=INDEL;AVGPOST=0.8325;THETA=0.0121;ERATE=0.0072;LDAF=0.0903;AF=0.0037;ASN_AF=0.0017;AFR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 47190 . G GA 192.0 PASS AA=G;AVGPOST=0.9041;AN=2184;AC=29;VT=INDEL;LDAF=0.0628;THETA=0.0153;RSQ=0.2883;ERATE=0.0041;AF=0.01;AMR_AF=0.0028;AFR_AF=0.06;EFF=INTERGENIC(MODIFIER|||||||||) +1 51476 rs187298206 T C 100.0 PASS ERATE=0.0021;AA=C;AC=18;AN=2184;VT=SNP;THETA=0.0103;LDAF=0.0157;SNPSOURCE=LOWCOV;AVGPOST=0.9819;RSQ=0.5258;AF=0.01;ASN_AF=0.01;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 51479 rs116400033 T A 100.0 PASS RSQ=0.7414;AVGPOST=0.9085;AA=T;AN=2184;THETA=0.0131;AC=235;VT=SNP;LDAF=0.1404;SNPSOURCE=LOWCOV;ERATE=0.0012;AF=0.11;ASN_AF=0.0035;AMR_AF=0.16;AFR_AF=0.03;EUR_AF=0.22;EFF=INTERGENIC(MODIFIER|||||||||) +1 51914 rs190452223 T G 100.0 PASS ERATE=0.0004;AVGPOST=0.9985;THETA=0.0159;AA=T;AN=2184;VT=SNP;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4089;LDAF=0.0012;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||) +1 51935 rs181754315 C T 100.0 PASS THETA=0.0126;AA=C;AN=2184;RSQ=0.1888;AVGPOST=0.9972;LDAF=0.0015;VT=SNP;AC=0;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0;EFF=INTERGENIC(MODIFIER|||||||||) +1 51954 rs185832753 G C 100.0 PASS LDAF=0.0021;AA=G;AN=2184;RSQ=0.4692;AVGPOST=0.9975;VT=SNP;SNPSOURCE=LOWCOV;THETA=0.0029;ERATE=0.0006;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 52058 rs62637813 G C 100.0 PASS AA=C;ERATE=0.0057;AN=2184;AVGPOST=0.9264;VT=SNP;RSQ=0.4882;AC=64;SNPSOURCE=LOWCOV;LDAF=0.0620;THETA=0.0069;AF=0.03;ASN_AF=0.0017;AMR_AF=0.04;AFR_AF=0.02;EUR_AF=0.05;EFF=INTERGENIC(MODIFIER|||||||||) +1 52144 rs190291950 T A 100.0 PASS THETA=0.0093;ERATE=0.0013;LDAF=0.0156;AA=T;AN=2184;VT=SNP;RSQ=0.5220;AVGPOST=0.9811;SNPSOURCE=LOWCOV;AC=21;AF=0.01;ASN_AF=0.0035;AMR_AF=0.01;AFR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 52185 . TTAA T 244.0 PASS AA=.;AN=2184;LDAF=0.0124;VT=INDEL;AC=10;THETA=0.0232;RSQ=0.4271;AVGPOST=0.9840;ERATE=0.0037;AF=0.0046;ASN_AF=0.0035;AMR_AF=0.02;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 52238 rs150021059 T G 100.0 PASS THETA=0.0132;AA=G;AN=2184;RSQ=0.6256;VT=SNP;ERATE=0.0026;AVGPOST=0.8617;SNPSOURCE=LOWCOV;AC=1941;LDAF=0.8423;AF=0.89;ASN_AF=0.99;AMR_AF=0.93;AFR_AF=0.64;EUR_AF=0.95;EFF=INTERGENIC(MODIFIER|||||||||) +1 53234 . CAT C 227.0 PASS AA=CAT;AVGPOST=0.9936;AN=2184;VT=INDEL;THETA=0.0119;AC=10;LDAF=0.0074;RSQ=0.6237;ERATE=0.0007;AF=0.0046;AMR_AF=0.0028;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 54353 rs140052487 C A 100.0 PASS THETA=0.0026;AA=C;AN=2184;AC=16;VT=SNP;RSQ=0.5074;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||) +1 54421 rs146477069 A G 100.0 PASS ERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 54490 rs141149254 G A 100.0 PASS ERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||) +1 54676 rs2462492 C T 100.0 PASS LDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||) +1 54753 rs143174675 T G 100.0 PASS AA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||) +1 55164 rs3091274 C A 100.0 PASS AN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||) +1 55249 . C CTATGG 443.0 PASS AA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55299 rs10399749 C T 100.0 PASS RSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||) +1 55313 rs182462964 A T 100.0 PASS ERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55326 rs3107975 T C 100.0 PASS AA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55330 rs185215913 G A 100.0 PASS ERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55367 rs190850374 G A 100.0 PASS ERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55388 rs182711216 C T 100.0 PASS THETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||) +1 55394 rs2949420 T A 100.0 PASS AC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55416 rs193242050 G A 100.0 PASS AA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||) +1 55427 rs183189405 T C 100.0 PASS THETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||) +1 55816 rs187434873 G A 100.0 PASS AN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55850 rs191890754 C G 100.0 PASS AVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||) +1 55852 rs184233019 G C 100.0 PASS THETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||) diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.auto_dom.ped --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.auto_dom.ped Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,9 @@ +1 1_dad 0 0 -1 1 +1 1_mom 0 0 -1 1 +1 1_kid 1_dad 1_mom -1 2 +2 2_dad 0 0 -1 1 +2 2_mom 0 0 -1 2 +2 2_kid 2_dad 2_mom -1 2 +3 3_dad 0 0 -1 2 +3 3_mom 0 0 -1 -9 +3 3_kid 3_dad 3_mom -1 2 diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.auto_dom.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.auto_dom.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,127 @@ +##fileformat=VCFv4.1 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false" +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa +##INFO= +##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf GRCh37.66 test4.vep.vcf " +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1_dad 1_mom 1_kid 2_dad 2_mom 2_kid 3_dad 3_mom 3_kid +chr10 1142208 . T C 3404.3 . AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 1142209 . T C 3404.3 . AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:59:87.16:940,87,0 0/1:0,29:49:78.20:899,78,0 1/1:0,24:64:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 48003992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:97.16:940,87,0 0/0:0,29:29:98.20:899,78,0 0/1:0,24:24:96.14:729,66,0 +chr10 48004992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135336656 . G A 38.34 . AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.auto_rec.ped --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.auto_rec.ped Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,10 @@ +#family_id sample_id paternal_id maternal_id sex phenotype +1 1_dad 0 0 -1 1 +1 1_mom 0 0 -1 1 +1 1_kid 1_dad 1_mom -1 2 +2 2_dad 0 0 -1 1 +2 2_mom 0 0 -1 1 +2 2_kid 2_dad 2_mom -1 2 +3 3_dad 0 0 -1 1 +3 3_mom 0 0 -1 1 +3 3_kid 3_dad 3_mom -1 2 diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.auto_rec.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.auto_rec.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,127 @@ +##fileformat=VCFv4.1 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false" +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa +##INFO= +##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf GRCh37.66 test4.vep.vcf " +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1_dad 1_mom 1_kid 2_dad 2_mom 2_kid 3_dad 3_mom 3_kid +chr10 1142208 . T C 3404.3 . AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:59:87.16:940,87,0 0/1:0,29:49:78.20:899,78,0 1/1:0,24:64:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 48003992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 48004992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135336656 . G A 38.34 . AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135369532 . T C 122.62 . AC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 \ No newline at end of file diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.comp_het.ped --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.comp_het.ped Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,13 @@ +#Family_ID Individual_ID Paternal_ID Maternal_ID Sex Phenotype ethnicity +1 child_1 dad_1 mom_1 1 2 caucasian +2 child_2 dad_2 mom_2 1 2 caucasian +2 dad_2 -9 -9 1 1 caucasian +2 mom_2 -9 -9 2 1 caucasian +1 dad_1 -9 -9 1 1 caucasian +1 mom_1 -9 -9 2 1 caucasian +3 child_3 dad_3 mom_3 1 2 caucasian +3 dad_3 -9 -9 1 1 caucasian +3 mom_3 -9 -9 2 1 caucasian +4 child_4 dad_4 mom_4 1 2 caucasianNEuropean +4 dad_4 -9 -9 1 1 caucasianNEuropean +4 mom_4 -9 -9 2 1 caucasianNEuropean diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.comp_het.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.comp_het.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,120 @@ +##fileformat=VCFv4.1 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/all.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=20 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false" +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##reference=file:///home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/human_g1k_v37.fasta +##SnpEffVersion="SnpEff 3.2 (build 2013-03-14), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf GRCh37.66 /if2/arq5x/cphg-quinlan/projects/sms-elsea/varCalling/all.vcf " +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT child_1 child_2 dad_2 mom_2 dad_1 mom_1 child_3 dad_3 mom_3 child_4 dad_4 mom_4 +1 16977 . G A 2022.88 . AC=9;AF=0.375;AN=24;BaseQRankSum=-25.424;DP=2999;DS;Dels=0.00;FS=4.077;HRun=0;HaplotypeScore=1.6017;InbreedingCoeff=-0.5953;MQ=10.20;MQ0=124;MQRankSum=0.084;QD=0.90;ReadPosRankSum=0.727;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|7|1) GT:AD:DP:GQ:PL 0/1:181,69:250:4.24:4,0,401 0/0:190,60:250:5.21:0,5,708 0/1:167,83:250:72.66:73,0,721 0/0:187,63:250:63.12:0,63,633 0/0:221,29:250:96.57:0,97,1729 0/1:160,90:250:99:175,0,537 0/1:177,73:250:99:412,0,723 0/1:183,67:250:99:209,0,838 0/1:174,76:250:99:284,0,844 0/1:194,53:248:26.15:26,0,756 0/1:149,101:250:99:681,0,496 0/1:184,66:250:99:229,0,561 +1 17222 . A G 225.47 . AC=4;AF=0.167;AN=24;BaseQRankSum=-1.725;DP=2985;DS;Dels=0.00;FS=6.073;HRun=0;HaplotypeScore=1.1157;InbreedingCoeff=-0.2212;MQ=14.57;MQ0=605;MQRankSum=-3.790;QD=0.23;ReadPosRankSum=1.860;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|8|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|6|1) GT:AD:DP:GQ:PL 0/0:224,25:250:44.94:0,45,987 0/1:190,44:241:55.80:56,0,776 0/0:230,17:250:90.16:0,90,940 0/1:212,34:250:36.11:36,0,754 0/0:191,55:248:18.04:0,18,1324 0/0:232,17:250:84.90:0,85,1270 0/0:223,27:250:99:0,129,1563 0/0:237,13:250:99:0,107,1537 0/1:207,42:249:99:106,0,1096 0/1:204,43:247:91.42:91,0,1280 0/0:229,20:249:99:0,113,1404 0/0:221,29:250:3.04:0,3,1183 +1 17363 . TTCT T 628.85 . AC=2;AF=0.083;AN=24;BaseQRankSum=4.577;DP=2951;DS;FS=10.112;HRun=0;HaplotypeScore=316.7300;InbreedingCoeff=-0.0909;MQ=22.73;MQ0=27;MQRankSum=1.681;QD=1.33;ReadPosRankSum=0.329;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|8|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|6|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|6|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|5|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|protein_coding|NON_CODING|ENST00000438504|6|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|6|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|6|1) GT:AD:DP:GQ:PL 0/0:250,0:70:99:0,169,3371 0/0:250,0:78:99:0,199,4105 0/0:250,0:70:99:0,187,3984 0/0:250,0:63:99:0,166,3406 0/0:243,0:89:99:0,262,5364 0/0:250,0:62:99:0,172,3575 0/0:250,0:79:99:0,223,4726 0/0:249,1:80:99:0,181,4749 0/0:234,1:85:99:0,241,5015 0/1:205,27:73:99:371,0,3406 0/1:225,16:74:99:318,0,3528 0/0:250,0:64:99:0,193,3988 +1 17563 . G A 79.88 . AC=1;AF=0.042;AN=24;BaseQRankSum=3.375;DP=3000;DS;Dels=0.00;FS=9.603;HRun=0;HaplotypeScore=0.9909;InbreedingCoeff=-0.0518;MQ=30.15;MQ0=97;MQRankSum=-1.386;QD=0.32;ReadPosRankSum=-0.446;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|7|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1) GT:AD:DP:GQ:PL 0/0:243,7:250:99:0,202,2559 0/0:246,4:250:99:0,187,2479 0/0:250,0:250:99:0,256,3232 0/0:249,1:250:99:0,239,3309 0/0:231,18:250:21.61:0,22,2802 0/0:248,2:250:99:0,214,2657 0/1:232,18:250:99:121,0,1801 0/0:249,1:250:99:0,263,3184 0/0:238,12:250:6.72:0,7,2591 0/0:250,0:250:99:0,343,3875 0/0:247,3:250:99:0,280,3428 0/0:250,0:250:99:0,301,3416 +1 17697 . G C 255.3 . AC=4;AF=0.167;AN=24;BaseQRankSum=-1.815;DP=2999;DS;Dels=0.00;FS=0.000;HRun=1;HaplotypeScore=1.7379;InbreedingCoeff=-0.2256;MQ=15.66;MQ0=86;MQRankSum=2.999;QD=0.26;ReadPosRankSum=-2.160;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1) GT:AD:DP:GQ:PL 0/1:214,36:250:99:132,0,595 0/0:249,1:250:78.22:0,78,967 0/0:248,1:250:81.24:0,81,1022 0/1:216,34:250:91.94:92,0,404 0/0:226,24:250:56.86:0,57,1099 0/0:245,4:249:23.87:0,24,704 0/0:225,25:250:2.97:0,3,746 0/1:216,33:250:80.80:81,0,810 0/0:245,4:250:73.97:0,74,1201 0/1:222,28:250:12.96:13,0,920 0/0:249,1:250:87.24:0,87,1000 0/0:182,66:250:8.40:0,8,703 +1 17722 . A G 32.03 . AC=3;AF=0.125;AN=24;BaseQRankSum=0.923;DP=2937;DS;Dels=0.00;FS=0.000;HRun=2;HaplotypeScore=1.9343;InbreedingCoeff=-0.1033;MQ=14.33;MQ0=62;MQRankSum=-4.474;QD=0.04;ReadPosRankSum=0.750;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|7|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1) GT:AD:DP:GQ:PL 0/0:247,3:250:51.15:0,51,628 0/0:250,0:250:60.18:0,60,746 0/0:250,0:250:54.17:0,54,685 0/0:249,1:250:54.15:0,54,644 0/0:233,0:233:81.25:0,81,1014 0/0:235,4:239:45.13:0,45,549 0/0:249,0:250:45.13:0,45,514 0/0:249,1:250:78.20:0,78,853 0/0:247,0:247:90.24:0,90,1012 0/1:227,17:244:0.06:0,0,670 0/1:214,11:225:4.62:5,0,542 0/1:236,13:249:71.41:71,0,448 +1 17730 . C A 102.87 . AC=5;AF=0.208;AN=24;BaseQRankSum=-11.508;DP=2968;DS;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=1.0610;InbreedingCoeff=-0.2498;MQ=13.12;MQ0=24;MQRankSum=-4.433;QD=0.08;ReadPosRankSum=1.952;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|4|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|5|1),EXON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|6|1),SPLICE_SITE_ACCEPTOR(HIGH|||||WASH7P|protein_coding|NON_CODING|ENST00000430492|7|1) GT:AD:DP:GQ:PL 0/0:246,4:250:36.11:0,36,464 0/0:244,5:249:57.17:0,57,727 0/0:244,6:250:48.15:0,48,628 0/0:247,3:250:51.13:0,51,621 0/1:242,8:250:26.26:26,0,830 0/0:246,4:250:48.14:0,48,601 0/1:238,11:250:48.54:49,0,386 0/0:244,5:249:42.12:0,42,491 0/0:239,3:243:23.74:0,24,609 0/1:221,13:234:13.85:14,0,482 0/1:232,11:243:7:7,0,501 0/1:238,12:250:73.18:73,0,368 +1 17746 . A G 607.7 . AC=8;AF=0.333;AN=24;BaseQRankSum=13.191;DP=2993;DS;Dels=0.00;FS=0.000;HRun=1;HaplotypeScore=0.4155;InbreedingCoeff=-0.5280;MQ=11.87;MQ0=5;MQRankSum=-4.672;QD=0.30;ReadPosRankSum=3.574;EFF=DOWNSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000515242||1),DOWNSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000518655||1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|3|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000430492|5|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000537342|3|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|4|1),INTRON(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|3|1) GT:AD:DP:GQ:PL 0/0:206,44:250:7.45:0,7,373 0/0:221,29:250:5.12:0,5,468 0/0:224,26:250:3.61:0,4,382 0/0:216,34:250:9.49:0,9,405 0/1:243,7:250:43.06:43,0,781 0/1:195,55:250:99:113,0,232 0/1:218,32:250:44.30:44,0,212 0/1:219,31:250:0.03:0,0,330 0/1:211,39:250:74.42:74,0,236 0/1:209,34:243:68.40:68,0,298 0/1:172,77:249:99:193,0,137 0/1:219,30:250:99:137,0,197 diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.de_novo.ped --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.de_novo.ped Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,9 @@ +1 1_dad 0 0 -1 1 +1 1_mom 0 0 -1 1 +1 1_kid 1_dad 1_mom -1 2 +2 2_dad 0 0 -1 1 +2 2_mom 0 0 -1 1 +2 2_kid 2_dad 2_mom -1 2 +3 3_dad 0 0 -1 1 +3 3_mom 0 0 -1 1 +3 3_kid 3_dad 3_mom -1 2 \ No newline at end of file diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-data/test.de_novo.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-data/test.de_novo.vcf Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,127 @@ +##fileformat=VCFv4.1 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[bam/M10478.conc.on.pos.realigned.bam, bam/M10475.conc.on.pos.realigned.bam, bam/M10500.conc.on.pos.realigned.bam, bam/M128215.conc.on.pos.realigned.bam] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=10 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT heterozygosity=0.0010 pcr_error_rate=1.0E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 computeSLOD=false alleles=(RodBinding name= source=UNBOUND) min_base_quality_score=17 max_deletion_fraction=0.05 multiallelic=false max_alternate_alleles=5 min_indel_count_for_genotyping=5 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10.0 indelGapOpenPenalty=45.0 indelHaplotypeSize=80 bandedIndel=false indelDebug=false ignoreSNPAlleles=false dbsnp=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false" +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##reference=file:///m/cphg-quinlan/cphg-quinlan/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa +##INFO= +##SnpEffVersion="SnpEff 3.0g (build 2012-08-31), by Pablo Cingolani" +##SnpEffCmd="SnpEff -i vcf -o vcf GRCh37.66 test4.vep.vcf " +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1_dad 1_mom 1_kid 2_dad 2_mom 2_kid 3_dad 3_mom 3_kid +chr10 1142208 . T C 3404.3 . AC=8;AF=1.00;AN=8;DP=122;Dels=0.00;FS=0.000;HRun=0;HaplotypeScore=2.6747;MQ=36.00;MQ0=0;QD=27.90;CSQ=intron_variant|||ENSG00000047056|WDR37|ENST00000263150|||,downstream_gene_variant|||ENSG00000047056|WDR37|ENST00000436154|||,intron_variant|||ENSG00000047056|WDR37|ENST00000358220|||,stop_lost|Tga/Cga|*/R|ENSG00000047056|WDR37|ENST00000381329|9/9||;EFF=DOWNSTREAM(MODIFIER||||208|WDR37|protein_coding|CODING|ENST00000436154|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000263150|),INTRON(MODIFIER||||494|WDR37|protein_coding|CODING|ENST00000358220|),STOP_LOST(HIGH|MISSENSE|Tga/Cga|*250R|249|WDR37|protein_coding|CODING|ENST00000381329|exon_10_1142110_1142566) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,23:24:66.14:729,66,0 0/0:1,37:59:87.16:940,87,0 0/0:0,29:49:78.20:899,78,0 0/0:0,22:64:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 1/1:0,24:24:66.14:729,66,0 +chr10 48003992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,23:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,23:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,23:24:66.14:729,66,0 +chr10 48004992 . C T 1047.87 . AC=4;AF=0.50;AN=8;BaseQRankSum=-0.053;DP=165;Dels=0.00;FS=6.377;HRun=0;HaplotypeScore=4.3830;MQ=20.94;MQ0=0;MQRankSum=-0.368;QD=9.53;ReadPosRankSum=1.346;CSQ=missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000420079|16/17|benign(0)|tolerated(1),missense_variant|tGt/tAt|C/Y|ENSG00000072444|ASAH2C|ENST00000426610|17/18|benign(0)|tolerated(1);EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C540Y|610|ASAH2C|protein_coding|CODING|ENST00000420079|exon_10_48003968_48004056),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|tGt/tAt|C552Y|622|ASAH2C|protein_coding|CODING|ENST00000426610|exon_10_48003968_48004056) GT:AD:DP:GQ:PL 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,23:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 1/1:0,23:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,23:24:66.14:729,66,0 +chr10 135336656 . G A 38.34 . AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS=0.000;HRun=4;HaplotypeScore=0.0000;MQ=37.00;MQ0=0;QD=19.17;CSQ=upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000418356|||,intron_variant&nc_transcript_variant|||ENSG00000214279||ENST00000488261|||,intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541261|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000421586|||,intron_variant|||ENSG00000130649|CYP2E1|ENST00000463117|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000252945|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000541080|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000477500|||,upstream_gene_variant|||ENSG00000130649|CYP2E1|ENST00000480558|||,intron_variant|||ENSG00000214279||ENST00000356567|||;EFF=INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),INTRON(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000463117|),INTRON(MODIFIER||||693|RP11-108K14.4.1|protein_coding|CODING|ENST00000356567|),INTRON(MODIFIER|||||RP11-108K14.4.1|retained_intron|CODING|ENST00000488261|),UPSTREAM(MODIFIER||||305|CYP2E1|protein_coding|CODING|ENST00000418356|),UPSTREAM(MODIFIER||||355|CYP2E1|protein_coding|CODING|ENST00000421586|),UPSTREAM(MODIFIER||||493|CYP2E1|protein_coding|CODING|ENST00000252945|),UPSTREAM(MODIFIER||||85|CYP2E1|protein_coding|CODING|ENST00000541261|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000477500|),UPSTREAM(MODIFIER|||||CYP2E1|processed_transcript|CODING|ENST00000480558|) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 0/1:1,37:39:87.16:940,87,0 0/1:0,29:29:78.20:899,78,0 0/1:0,24:24:66.14:729,66,0 +chr10 135369532 . T C 122.62 . AC=2;AF=0.25;AN=8;BaseQRankSum=2.118;DP=239;Dels=0.00;FS=5.194;HRun=2;HaplotypeScore=5.7141;MQ=36.02;MQ0=0;MQRankSum=0.082;QD=2.31;ReadPosRankSum=-0.695;CSQ=missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000303903|9/13|benign(0.001)|tolerated(1),missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000432597|10/14|benign(0)|tolerated(1),downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000460441|||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000343131|9/13|benign(0.001)|tolerated(1),intron_variant|||ENSG00000203772|SPRN|ENST00000541506|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000171772|SYCE1|ENST00000479535|6/10||,downstream_gene_variant|||ENSG00000171772|SYCE1|ENST00000482127|||,non_coding_exon_variant&nc_transcript_variant|||ENSG00000130649|CYP2E1|ENST00000368520|6/6||,missense_variant|aAg/aGg|K/R|ENSG00000171772|SYCE1|ENST00000368517|9/13|benign(0)|tolerated(1);EFF=DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000460441|),DOWNSTREAM(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000482127|),EXON(MODIFIER|||||CYP2E1|retained_intron|CODING|ENST00000368520|),EXON(MODIFIER|||||SYCE1|processed_transcript|CODING|ENST00000479535|),INTRON(MODIFIER||||151|SPRN|protein_coding|CODING|ENST00000541506|),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000368517|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K147R|282|SYCE1|protein_coding|CODING|ENST00000432597|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|318|SYCE1|protein_coding|CODING|ENST00000303903|exon_10_135369485_135369551),NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|aAg/aGg|K183R|351|SYCE1|protein_coding|CODING|ENST00000343131|exon_10_135369485_135369551) GT:AD:DP:GQ:PL 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,22:24:66.14:729,66,0 0/0:1,37:39:87.16:940,87,0 0/0:0,29:29:78.20:899,78,0 0/1:0,21:24:66.14:729,66,0 0/0:1,37:50:87.16:940,87,0 0/0:0,29:50:78.20:899,78,0 0/1:0,24:50:66.14:729,66,0 diff -r 000000000000 -r 3123ce7acd0e test-data/util/build-gemini-testdata.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/build-gemini-testdata.sh Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,72 @@ +cd "$(dirname "$0")" + +export GEMINI_CONFIG=../test-cache +OUT_PTH=$GEMINI_CONFIG/gemini/data +GENOMIC_REGION=3:187000000-187500000 + + +if [ -n "$1" ]; then + +IN_PTH="$1" +# downsample all vcf and bed annotation files to the region of interest and reindex +for vcf in `ls $IN_PTH/*.gz | grep -v hprd_interaction_edges.gz -` +do + python ./shrink_tabix.py $vcf -r $GENOMIC_REGION -o $OUT_PTH/`basename $vcf` +done + +# downsample gene_table files to the region of interest +echo "$IN_PTH/summary_gene_table_v75 -> $OUT_PTH/summary_gene_table_v75" +python ./shrink_simple_tab.py $IN_PTH/summary_gene_table_v75 -r chr$GENOMIC_REGION -c 0 8 9 -n 1 -o $OUT_PTH/summary_gene_table_v75 + +echo "$IN_PTH/detailed_gene_table_v75 -> $OUT_PTH/detailed_gene_table_v75" +python ./shrink_simple_tab.py $IN_PTH/detailed_gene_table_v75 -r chr$GENOMIC_REGION -c 0 11 12 -n 1 -o $OUT_PTH/detailed_gene_table_v75 + +# filter kegg_pathway files to retain only records of the genes listed +# in the downsampled summary_gene_table +for kegg in `ls $IN_PTH/kegg_pathways_*` +do + echo "$kegg -> $OUT_PTH/`basename $kegg`" + cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $kegg > $OUT_PTH/`basename $kegg` +done + +# filter hprd_interaction file to retain only records of the genes listed +# in the downsampled summary_gene_table +echo "$IN_PTH/hprd_interaction_edges.gz -> $OUT_PTH/hprd_interaction_edges.gz" +bgzip -dc $IN_PTH/hprd_interaction_edges.gz > $OUT_PTH/hprd_interaction_edges +cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Ff - $OUT_PTH/hprd_interaction_edges | bgzip > $OUT_PTH/hprd_interaction_edges.gz +rm $OUT_PTH/hprd_interaction_edges + +# filter cancer_gene_census file to retain only records of the genes listed +# in the downsampled summary_gene_table; +# TO DO: make the filter stricter by looking for matches only in the first +# column of the cancer_gene_census file (but the file is relatively small anyway) +echo "$IN_PTH/cancer_gene_census.20140120.tsv -> $OUT_PTH/cancer_gene_census.20140120.tsv" +cut -f2 $OUT_PTH/summary_gene_table_v75 | grep -Fv None | grep -Fwf - $IN_PTH/cancer_gene_census.20140120.tsv > $OUT_PTH/cancer_gene_census.20140120.tsv + +else + echo "no path to gemini annotation files provided - only building test databases" +fi + + +# now use gemini load to build the test databases +echo "Building gemini test databases" +echo "Test databases for gemini_load" +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff ../gemini_load_result1.db +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/gemini_load_input.vcf -t snpEff --skip-gene-tables --no-load-genotypes ../gemini_load_result2.db +echo "Test database for gemini_amend" +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -t snpEff ../gemini_amend_input.db +echo "Test database for gemini_annotate" +bgzip -c build-data anno.bed > build-data/anno.bed.gz +tabix --force -p bed build-data/anno.bed.gz +cp ../gemini_load_result1.db ../gemini_annotate_result.db +gemini --annotation-dir $OUT_PTH annotate -f build-data/anno.bed.gz -c anno5 -a count ../gemini_annotate_result.db +echo "Test database for gemini_set_somatic" +cp ../gemini_load_result1.db ../gemini_is_somatic_result.db +gemini set_somatic --min-somatic-score 5.65 ../gemini_is_somatic_result.db +echo "Test database for gemini_de_novo and gemini_mendel_errors" +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.de_novo.vcf -p build-data/test.de_novo.ped -t snpEff ../gemini_de_novo_input.db +echo "Test database for gemini_comp_hets" +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.comp_het.vcf -p build-data/test.comp_het.ped -t snpEff ../gemini_comphets_input.db +echo "Test databases for gemini_autosomal" +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_rec.vcf -p build-data/test.auto_rec.ped -t snpEff ../gemini_auto_rec_input.db +gemini --annotation-dir $OUT_PTH load --skip-cadd --skip-gerp-bp -v build-data/test.auto_dom.vcf -p build-data/test.auto_dom.ped -t snpEff ../gemini_auto_dom_input.db diff -r 000000000000 -r 3123ce7acd0e test-data/util/shrink_simple_tab.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/shrink_simple_tab.py Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,61 @@ +from __future__ import print_function + +import argparse +from functools import partial + + +def keep_line(line, pos_cols, region): + fields = line.rstrip().split(b'\t') + if fields[pos_cols[0]] == region[0]: # same chromosome + if ( + region[1] < int(fields[pos_cols[1]]) < region[2] + ) or ( + region[1] < int(fields[pos_cols[2]]) < region[2] + ): + return True + + +def main(infile, ofile, num_header_lines): + print(infile, '->', ofile) + with open(infile, 'rb') as i: + with open(ofile, 'wb') as o: + # copy header lines + for c in range(num_header_lines): + o.write(next(i)) + for line in i: + if keep_line(line): + o.write(line) + + +if __name__ == '__main__': + p = argparse.ArgumentParser() + p.add_argument('infile') + p.add_argument( + '-r', '--region', + required=True, + help='the region of the input file to rewrite' + ) + p.add_argument( + '-o', '--ofile', + required=True, + help="the name of the output file" + ) + p.add_argument( + '-c', '--cols', + nargs=3, type=int, required=True, + help="the columns of the input file specifying chrom, start and stop, " + "respectively" + ) + p.add_argument( + '-n', '--num-header-lines', + type=int, default=0, + help='the number of header lines present in the input; These will ' + 'always be copied over to the new file.' + ) + args = vars(p.parse_args()) + + chrom, reg = args['region'].split(':') + region = [chrom.encode()] + [int(x) for x in reg.split('-')] + keep_line = partial(keep_line, pos_cols=args['cols'], region=region) + + main(args['infile'], args['ofile'], args['num_header_lines']) diff -r 000000000000 -r 3123ce7acd0e test-data/util/shrink_tabix.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/util/shrink_tabix.py Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,42 @@ +from __future__ import print_function + +import argparse + +import pysam + + +def main(infile, ofile, region): + print(infile, '->', ofile) + with pysam.Tabixfile(infile) as i: + fformat = i.format.lower() + if fformat == 'sam': + fformat = 'bed' + if ofile[-3:] == '.gz': + ofile = ofile[:-3] + with open(ofile, 'w') as o: + try: + region_it = i.fetch(region=region) + except ValueError: + region_it = i.fetch(region='chr' + region) + for line in i.header: + o.write(line + '\n') + for line in region_it: + o.write(str(line) + '\n') + pysam.tabix_index(ofile, preset=fformat, force=True) + + +if __name__ == '__main__': + p = argparse.ArgumentParser() + p.add_argument('infile') + p.add_argument( + '-r', '--region', + required=True, + help='the region of the input file to rewrite' + ) + p.add_argument( + '-o', '--ofile', + required=True, + help="the name of the output file" + ) + args = vars(p.parse_args()) + main(**args) diff -r 000000000000 -r 3123ce7acd0e tool-data/gemini_versioned_databases.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gemini_versioned_databases.loc.sample Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,3 @@ +## GEMINI versioned databases +#DownloadDate dbkey DBversion Description Path +#2018-07-08 hg19 181 GEMINI annotations (2018-07-08 snapshot) /path/to/data diff -r 000000000000 -r 3123ce7acd0e tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,7 @@ + + + value, dbkey, version, name, path + +
+
+ diff -r 000000000000 -r 3123ce7acd0e tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Jan 11 17:50:55 2019 -0500 @@ -0,0 +1,8 @@ + + + + value, dbkey, version, name, path + +
+
+