changeset 1:ad130eaa3a05 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/bumbershoot/custom_pro_db commit e025f5b4d590c44537cf0702e2fb040a28f98fec
author galaxyp
date Fri, 12 May 2017 13:17:40 -0400
parents 8ccfff69dd57
children 2cba79e6037e
files README.md customProDB.R customProDB.xml test-data/exon_anno.RData test-data/hg19/cosmic.RData test-data/hg19/dbsnpinCoding.RData test-data/hg19/exon_anno.RData test-data/hg19/ids.RData test-data/hg19/junctions1.bed test-data/hg19/procodingseq.RData test-data/hg19/proseq.RData test-data/hg19/splicemax.RData test-data/hg19/test1.vcf test-data/hg19/test1_sort.bam test-data/hg19/test1_sort.bam.bai test-data/hg19/txdb.sqlite test-data/hg19_dbsnp_snv.fasta test-data/hg19_dbsnp_variant_annotation.rdata test-data/hg19_dbsnp_variant_annotation.sqlite test-data/hg19_genomic_mapping.sqlite test-data/hg19_indel.fasta test-data/hg19_rpkm0.fasta test-data/hg19_rpkm1.fasta test-data/hg19_rpkm1000.fasta test-data/hg19_snv.fasta test-data/hg19_variant_annotation.rdata test-data/hg19_variant_annotation.sqlite test-data/ids.RData test-data/procodingseq.RData test-data/proseq.RData test-data/test1.vcf test-data/test1_sort.bam test-data/test1_sort.bam.bai test-data/test_indel.fasta test-data/test_rpkm.fasta test-data/test_snv.fasta
diffstat 36 files changed, 394 insertions(+), 143 deletions(-) [+]
line wrap: on
line diff
--- a/README.md	Tue Mar 14 14:14:38 2017 -0400
+++ b/README.md	Fri May 12 13:17:40 2017 -0400
@@ -48,7 +48,6 @@
 Authors and contributors:
 
 * Matt Chambers <matt.chambers42@gmail.com>
-  Vanderbilt University Medical Center
 
-* Xiaojing Wang
-  Vanderbilt University Medical Center
+* Xiaojing Wang <xiaojing.wang@bcm.edu>
+  Baylor Medical College
--- a/customProDB.R	Tue Mar 14 14:14:38 2017 -0400
+++ b/customProDB.R	Fri May 12 13:17:40 2017 -0400
@@ -27,64 +27,146 @@
 option_list$cosmic <- make_option('--cosmic', type='character')
 option_list$annotationFromHistory <- make_option('--annotationFromHistory', type='logical', action="store_true", default=FALSE)
 option_list$rpkmCutoff <- make_option('--rpkmCutoff', type='character')
-#option_list$outputIndels <- make_option('--outputIndels', type='logical', action="store_true", default=FALSE)
+option_list$outputIndels <- make_option('--outputIndels', type='logical', action="store_true", default=FALSE)
 #option_list$outputNovelJunctions <- make_option('--outputNovelJunctions', type='logical', action="store_true", default=FALSE)
-option_list$outputFile <- make_option('--outputFile', type='character')
+#option_list$bedFile <- make_option('--bedFile', type='character')
+#option_list$bsGenome <- make_option('--bsGenome', type='character')
+option_list$outputRData <- make_option('--outputRData', type='logical', action="store_true", default=FALSE)
+option_list$outputSQLite <- make_option('--outputSQLite', type='logical', action="store_true", default=FALSE)
 
 
 opt <- parse_args(OptionParser(option_list=option_list))
 
 
 customProDB <- function(
-	bam_file = GalaxyInputFile(required=TRUE), 
-	bai_file = GalaxyInputFile(required=TRUE), 
-	vcf_file = GalaxyInputFile(required=TRUE), 
-	exon_anno_file = GalaxyInputFile(required=TRUE),
-	proteinseq_file = GalaxyInputFile(required=TRUE),
-	procodingseq_file = GalaxyInputFile(required=TRUE),
-	ids_file = GalaxyInputFile(required=TRUE),
-	dbsnpinCoding_file = GalaxyInputFile(required=FALSE),
-	cosmic_file = GalaxyInputFile(required=FALSE),
-	annotationFromHistory = GalaxyLogicalParam(required=FALSE),
-	rpkmCutoff = GalaxyNumericParam(required=TRUE),
-	#outputIndels = GalaxyLogicalParam(required=FALSE),
-	#outputNovelJunctions = GalaxyLogicalParam(required=FALSE),
-	outputFile = GalaxyOutput("FASTA","fasta"))
+    bam_file = GalaxyInputFile(required=TRUE), 
+    bai_file = GalaxyInputFile(required=TRUE), 
+    vcf_file = GalaxyInputFile(required=TRUE), 
+    exon_anno_file = GalaxyInputFile(required=TRUE),
+    proteinseq_file = GalaxyInputFile(required=TRUE),
+    procodingseq_file = GalaxyInputFile(required=TRUE),
+    ids_file = GalaxyInputFile(required=TRUE),
+    dbsnpinCoding_file = GalaxyInputFile(required=FALSE),
+    cosmic_file = GalaxyInputFile(required=FALSE),
+    annotationFromHistory = GalaxyLogicalParam(required=FALSE),
+    rpkmCutoff = GalaxyNumericParam(required=TRUE),
+    outputIndels = GalaxyLogicalParam(required=FALSE),
+    outputRData = GalaxyLogicalParam(required=FALSE),
+    outputSQLite = GalaxyLogicalParam(required=FALSE)
+    #,outputNovelJunctions = GalaxyLogicalParam(required=FALSE)
+    #,bedFile = GalaxyInputFile(required=FALSE)
+    #,bsGenome = GalaxyCharacterParam(required=FALSE)
+    )
 {
+    old <- options(stringsAsFactors = FALSE, gsubfn.engine = "R")
+    on.exit(options(old), add = TRUE)
+
     file.symlink(exon_anno_file, paste(getwd(), "exon_anno.RData", sep="/"))
     file.symlink(proteinseq_file, paste(getwd(), "proseq.RData", sep="/"))
     file.symlink(procodingseq_file, paste(getwd(), "procodingseq.RData", sep="/"))
     file.symlink(ids_file, paste(getwd(), "ids.RData", sep="/"))
 
+    load(exon_anno_file)
+    load(proteinseq_file)
+    load(procodingseq_file)
+    load(ids_file)
+
     if (length(dbsnpinCoding_file) > 0)
     {
         file.symlink(dbsnpinCoding_file, paste(getwd(), "dbsnpinCoding.RData", sep="/"))
-        labelrsid = T
+        labelrsid = TRUE
+        load(dbsnpinCoding_file)
     }
     else
     {
-        labelrsid = F
+        dbsnpinCoding = NULL
+        labelrsid = FALSE
     }
 
     if (length(cosmic_file) > 0)
     {
         file.symlink(cosmic_file, paste(getwd(), "cosmic.RData", sep="/"))
-        cosmic = T
+        use_cosmic = TRUE
+        load(cosmic_file)
     }
     else
     {
-        cosmic = F
+        cosmic = NULL
+        use_cosmic = FALSE
     }
 
     bamLink = "input.bam"
     file.symlink(bam_file, bamLink)
     file.symlink(bai_file, paste(bamLink, ".bai", sep=""))
 
-    suppressPackageStartupMessages(library(customProDB))
+    # load from GitHub until conda package is available
+    download.file("https://github.com/ggrothendieck/sqldf/archive/master.zip", "sqldf.zip", quiet=TRUE)
+    unzip("sqldf.zip")
+    devtools::load_all("sqldf-master")
+
+    # load customProDB from GitHub (NOTE: downloading the zip is faster than cloning the repo with git2r or devtools::install_github)
+    download.file("https://github.com/chambm/customProDB/archive/master.zip", "customProDB.zip", quiet=TRUE)
+    unzip("customProDB.zip")
+    devtools::load_all("customProDB-master")
 
     easyRun(bamFile=bamLink, vcfFile=vcf_file, annotation_path=getwd(),
             rpkm_cutoff=rpkmCutoff, outfile_path=".", outfile_name="output",
-            nov_junction=F, INDEL=T, lablersid=labelrsid, COSMIC=cosmic)
+            nov_junction=FALSE, INDEL=outputIndels,
+            lablersid=labelrsid, COSMIC=use_cosmic)
+
+    # save variant annotations to an RData file (needed by proBAMr)
+    if (outputRData || outputSQLite)
+    {
+        variantAnnotation = getVariantAnnotation(vcf_file, ids, exon, proteinseq, procodingseq, dbsnpinCoding, cosmic)
+        if (outputRData) save(variantAnnotation, file="output.rdata")
+    }
+
+    if (outputSQLite)
+    {
+        # create protein-centric variant annotation table (needed by Galaxy-P viewer MVP)
+        varproseq = unique(rbind(variantAnnotation$snvproseq, variantAnnotation$indelproseq))
+        ref_vs_var_seq = sqldf::sqldf("SELECT reference.pro_name, variant.pro_name AS var_pro_name, reference.peptide AS ref_seq, variant.peptide AS var_seq
+                                       FROM proteinseq reference, varproseq variant
+                                       WHERE reference.tx_name=variant.tx_name
+                                       GROUP BY variant.pro_name")
+        getCigarishString = function(ref, var)
+        {
+            a = Biostrings::pairwiseAlignment(ref, var)
+            d = gsub("[A-Z]", "=", Biostrings::compareStrings(a@pattern, a@subject))
+            r = rle(strsplit(d, "")[[1]])
+            gsub("-", "D", gsub("\\+", "I", gsub("\\?", "X", paste0(r$lengths, r$values, collapse=""))))
+        }
+        ref_vs_var_seq$cigar =  mapply(FUN=getCigarishString, ref_vs_var_seq$ref_seq, ref_vs_var_seq$var_seq, USE.NAMES=FALSE)
+        ref_vs_var_seq$annotation = substring(ref_vs_var_seq$var_pro_name, stringr::str_length(ref_vs_var_seq$pro_name)+2)
+
+        variant_annotation_sqlite = dbConnect(RSQLite::SQLite(), "output_variant_annotation.sqlite")
+        dbWriteTable(variant_annotation_sqlite,
+                     "variant_annotation",
+                     sqldf::sqldf("SELECT var_pro_name, pro_name, cigar, annotation FROM ref_vs_var_seq"))
+        DBI::dbExecute(variant_annotation_sqlite, "CREATE INDEX variant_annotation_var_pro_name ON variant_annotation (var_pro_name)")
+
+        # save genomic mapping to a SQLite file (needed by Galaxy-P viewer MVP)
+        exon$cds_start = as.integer(exon$cds_start)
+        exon$cds_end = as.integer(exon$cds_end)
+        genomic_mapping_sqlite = dbConnect(RSQLite::SQLite(), "output_genomic_mapping.sqlite")
+        varprocoding = unique(rbind(variantAnnotation$snvprocoding, variantAnnotation$indelprocoding))
+        dbWriteTable(genomic_mapping_sqlite,
+                     "genomic_mapping",
+                     sqldf::sqldf("SELECT exon.gene_name, exon.tx_name, varprocoding.pro_name, cds_start, cds_end,
+                                          chromosome_name AS chr_name, cds_chr_start, cds_chr_end, exon.strand
+                                  FROM exon, varprocoding
+                                  WHERE exon.tx_id=varprocoding.tx_id AND cds_chr_start > 0
+                                  GROUP BY exon.tx_id, rank
+                                  UNION
+                                  SELECT gene_name, tx_name, pro_name, cds_start, cds_end,
+                                         chromosome_name AS chr_name, cds_chr_start, cds_chr_end, exon.strand
+                                  FROM exon
+                                  WHERE cds_chr_start > 0
+                                  GROUP BY tx_id, rank"))
+        DBI::dbExecute(genomic_mapping_sqlite, "CREATE INDEX genomic_mapping_pro_name ON genomic_mapping (pro_name)")
+    }
+
+    invisible(NULL)
 }
 
 
--- a/customProDB.xml	Tue Mar 14 14:14:38 2017 -0400
+++ b/customProDB.xml	Fri May 12 13:17:40 2017 -0400
@@ -1,8 +1,24 @@
-<tool id="custom_pro_db" name="CustomProDB" version="1.14.0">
+<tool id="custom_pro_db" name="CustomProDB" version="1.16.0">
   <description>Generate protein FASTAs from exosome or transcriptome data</description>
   <requirements>
-    <requirement type="package" version="1.14.0">bioconductor-customprodb</requirement>
+    <requirement type="package" version="3.3.1">r-base</requirement>
+    <!--<requirement type="package" version="1.14.0">bioconductor-customprodb</requirement>-->
     <requirement type="package" version="1.18.0">bioconductor-rgalaxy</requirement>
+    <requirement type="package" version="1.21.0">bioconductor-biocinstaller</requirement>
+    <requirement type="package" version="1.20.3">bioconductor-variantannotation</requirement>
+    <requirement type="package" version="1.11.1">r-devtools</requirement>
+    <requirement type="package" version="3.98_1.4">r-xml</requirement>
+    <requirement type="package" version="0.10.11">r-rmysql</requirement>
+    <requirement type="package" version="1.0.2">r-testthat</requirement>
+    <requirement type="package" version="0.1.0">r-getoptlong</requirement>
+    <requirement type="package" version="1.1.2">r-stringi</requirement>
+    <requirement type="package" version="1.1.0">r-stringr</requirement>
+    <requirement type="package" version="1.10.0">r-data.table</requirement>
+    <!--<requirement type="package" version="0.4_10">r-sqldf</requirement>-->
+    <requirement type="package" version="0.6_6">r-gsubfn</requirement>
+    <requirement type="package" version="2.3_47">r-chron</requirement>
+    <requirement type="package" version="0.3.10">r-proto</requirement>
+    <requirement type="package" version="1.8.4">r-plyr</requirement>
   </requirements>
   <stdio>
     <exit_code range="1:" level="fatal" description="Job Failed" />
@@ -13,7 +29,9 @@
        --bai='${genome_annotation.bamInput.metadata.bam_index}'
        --vcf='$genome_annotation.vcfInput'
        --rpkmCutoff=$rpkmCutoff
-       --outputFile='${output_rpkm}'
+       $outputIndels
+       $outputSQLite
+       $outputRData
 
        #if str($genome_annotation.source) == 'history':
             --exon_anno='$genome_annotation.exonAnno'
@@ -24,7 +42,7 @@
                 --dbsnpinCoding='$genome_annotation.dbsnpInCoding'
             #end if
             #if str($genome_annotation.cosmic) != 'None':
-                --cosmic='$genome_annotation.cosmic"
+                --cosmic='$genome_annotation.cosmic'
             #end if
        #else:
             #set index_path = $genome_annotation.builtin.fields.path
@@ -87,24 +105,77 @@
       </when>
     </conditional>
     <param name="rpkmCutoff" type="float" value="1" min="0" label="Transcript Expression Cutoff (RPKM)" help="If non-zero, if a transcript does not meet this expression cutoff (based on RPKM) then it will not be included in the output database." />
+    <param name="outputIndels" type="boolean" truevalue="--outputIndels" falsevalue="" label="Create a variant FASTA for short insertions and deletions" />
+    <param name="outputSQLite" type="boolean" truevalue="--outputSQLite" falsevalue="" label="Create SQLite files for mapping proteins to genome and summarizing variant proteins" />
+    <param name="outputRData" type="boolean" truevalue="--outputRData" falsevalue="" label="Create RData file of variant protein coding sequences" help="The PSM2SAM tool needs this to map variant proteins to genomic locations" />
   </inputs>
   <outputs>
     <data format="fasta" name="output_rpkm" from_work_dir="output_rpkm.fasta" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_rpkm.fasta"/>
     <data format="fasta" name="output_snv" from_work_dir="output_snv.fasta" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_snv.fasta"/>
-    <data format="fasta" name="output_indel" from_work_dir="output_indel.fasta" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_indel.fasta"/>
+    <data format="fasta" name="output_indel" from_work_dir="output_indel.fasta" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_indel.fasta">
+      <filter>outputIndels is True</filter>
+    </data>
+    <data format="rdata" name="output_variant_annotation_rdata" from_work_dir="output.rdata" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_variantAnnotation.RData">
+      <filter>outputRData is True</filter>
+    </data>
+    <data format="sqlite" name="output_genomic_mapping_sqlite" from_work_dir="output_genomic_mapping.sqlite" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_genomicMapping.sqlite">
+      <filter>outputSQLite is True</filter>
+    </data>
+    <data format="sqlite" name="output_variant_annotation_sqlite" from_work_dir="output_variant_annotation.sqlite" label="${genome_annotation.bamInput.name.rsplit('.',1)[0]}_variantAnnotation.sqlite">
+      <filter>outputSQLite is True</filter>
+    </data>
   </outputs>
   <tests>
-    <test>
-      <param name="bamInput" value="test1_sort.bam" dbkey="hg19" />
-      <param name="vcfInput" value="test1.vcf" dbkey="hg19" />
+    <test expect_num_outputs="5">
+      <param name="bamInput" value="hg19/test1_sort.bam" dbkey="hg19" />
+      <param name="vcfInput" value="hg19/test1.vcf" dbkey="hg19" />
+      <param name="source" value="history" />
+      <param name="exonAnno" value="hg19/exon_anno.RData" />
+      <param name="proteinSeq" value="hg19/proseq.RData" />
+      <param name="proCodingSeq" value="hg19/procodingseq.RData" />
+      <param name="ids" value="hg19/ids.RData" />
+      <param name="rpkmCutoff" value="1" />
+      <param name="outputIndels" value="" />
+      <param name="outputRData" value="--outputRData" />
+      <param name="outputSQLite" value="--outputSQLite" />
+      <output name="output_rpkm" file="hg19_rpkm1.fasta" />
+      <output name="output_snv" file="hg19_snv.fasta" />
+      <output name="output_variant_annotation_rdata" file="hg19_variant_annotation.rdata" />
+      <output name="output_genomic_mapping_sqlite" file="hg19_genomic_mapping.sqlite" />
+      <output name="output_variant_annotation_sqlite" file="hg19_variant_annotation.sqlite" />
+    </test>
+    <test expect_num_outputs="5">
+      <param name="bamInput" value="hg19/test1_sort.bam" dbkey="hg19" />
+      <param name="vcfInput" value="hg19/test1.vcf" dbkey="hg19" />
       <param name="source" value="history" />
-      <param name="exonAnno" value="exon_anno.RData" />
-      <param name="proteinSeq" value="proseq.RData" />
-      <param name="proCodingSeq" value="procodingseq.RData" />
-      <param name="ids" value="ids.RData" />
-      <output name="output_rpkm" file="test_rpkm.fasta" />
-      <output name="output_snv" file="test_snv.fasta" />
-      <output name="output_indel" file="test_indel.fasta" />
+      <param name="exonAnno" value="hg19/exon_anno.RData" />
+      <param name="proteinSeq" value="hg19/proseq.RData" />
+      <param name="proCodingSeq" value="hg19/procodingseq.RData" />
+      <param name="ids" value="hg19/ids.RData" />
+      <param name="dbsnpInCoding" value="hg19/dbsnpinCoding.RData" />
+      <param name="cosmic" value="hg19/cosmic.RData" />
+      <param name="rpkmCutoff" value="0" />
+      <param name="outputIndels" value="--outputIndels" />
+      <param name="outputSQLite" value="--outputSQLite" />
+      <output name="output_rpkm" file="hg19_rpkm0.fasta" />
+      <output name="output_snv" file="hg19_dbsnp_snv.fasta" />
+      <output name="output_indel" file="hg19_indel.fasta" />
+      <output name="output_variant_annotation_rdata" file="hg19_dbsnp_variant_annotation.rdata" />
+      <output name="output_variant_annotation_sqlite" file="hg19_dbsnp_variant_annotation.sqlite" />
+    </test>
+    <test expect_num_outputs="3">
+      <param name="bamInput" value="hg19/test1_sort.bam" dbkey="hg19" />
+      <param name="vcfInput" value="hg19/test1.vcf" dbkey="hg19" />
+      <param name="source" value="history" />
+      <param name="exonAnno" value="hg19/exon_anno.RData" />
+      <param name="proteinSeq" value="hg19/proseq.RData" />
+      <param name="proCodingSeq" value="hg19/procodingseq.RData" />
+      <param name="ids" value="hg19/ids.RData" />
+      <param name="rpkmCutoff" value="1000" />
+      <param name="outputIndels" value="--outputIndels" />
+      <output name="output_rpkm" file="hg19_rpkm1000.fasta" />
+      <output name="output_snv" file="hg19_snv.fasta" />
+      <output name="output_indel" file="hg19_indel.fasta" />
     </test>
   </tests>
   <help>
@@ -113,8 +184,7 @@
 Generate protein FASTAs from exosome or transcriptome data (in the form of BAM files). </help>
   <citations>
     <citation type="doi">10.1093/bioinformatics/btt543</citation>
-    <citation type="bibtex">@misc{toolsGalaxyP, author = {Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub 
-repository},
+    <citation type="bibtex">@misc{toolsGalaxyP, author = {Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub repository},
                                   year = {2017}, url = {https://github.com/galaxyproteomics/tools-galaxyp}}</citation> <!-- TODO: fix substitution of commit ", commit = 
 {$sha1$}" -->
   </citations>
Binary file test-data/exon_anno.RData has changed
Binary file test-data/hg19/cosmic.RData has changed
Binary file test-data/hg19/dbsnpinCoding.RData has changed
Binary file test-data/hg19/exon_anno.RData has changed
Binary file test-data/hg19/ids.RData has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19/junctions1.bed	Fri May 12 13:17:40 2017 -0400
@@ -0,0 +1,76 @@
+track name=junctions description="TopHat junctions"
+chr1	32479909	32495942	JUNC00002865	8	+	32479909	32495942	255,0,0	2	69,44	0,15989
+chr1	32495950	32497196	JUNC00002866	13	+	32495950	32497196	255,0,0	2	73,72	0,1174
+chr1	32496010	32498851	JUNC00002867	3	+	32496010	32498851	255,0,0	2	13,63	0,2778
+chr1	32497175	32498854	JUNC00002868	20	+	32497175	32498854	255,0,0	2	66,66	0,1613
+chr1	32498867	32502584	JUNC00002869	29	+	32498867	32502584	255,0,0	2	68,74	0,3643
+chr1	32502572	32503560	JUNC00002870	1	+	32502572	32503560	255,0,0	2	22,54	0,934
+chr1	32502571	32503507	JUNC00002871	48	+	32502571	32503507	255,0,0	2	73,72	0,864
+chr1	32503572	32504220	JUNC00002872	45	+	32503572	32504220	255,0,0	2	65,68	0,580
+chr1	32503695	32504219	JUNC00002873	3	+	32503695	32504219	255,0,0	2	25,67	0,457
+chr1	32504152	32505174	JUNC00002874	41	+	32504152	32505174	255,0,0	2	68,59	0,963
+chr1	32505115	32508202	JUNC00002875	43	+	32505115	32508202	255,0,0	2	59,75	0,3012
+chr1	32508245	32510977	JUNC00002876	4	+	32508245	32510977	255,0,0	2	75,46	0,2686
+chr2	48010576	48018138	JUNC00057360	5	+	48010576	48018138	255,0,0	2	56,73	0,7489
+chr2	48028222	48030628	JUNC00057361	15	+	48028222	48030628	255,0,0	2	72,70	0,2336
+chr2	48030789	48032107	JUNC00057362	5	+	48030789	48032107	255,0,0	2	35,59	0,1259
+chr2	48032104	48032810	JUNC00057363	3	+	48032104	48032810	255,0,0	2	62,54	0,652
+chr2	48032781	48033370	JUNC00057364	12	+	48032781	48033370	255,0,0	2	65,28	0,561
+chr2	48033454	48033654	JUNC00057365	10	+	48033454	48033654	255,0,0	2	43,64	0,136
+chr2	48033733	48033987	JUNC00057366	2	+	48033733	48033987	255,0,0	2	57,70	0,184
+chr2	48035311	48035520	JUNC00057367	9	-	48035311	48035520	255,0,0	2	75,53	0,156
+chr5	112197081	112198267	JUNC00080003	4	+	112197081	112198267	255,0,0	2	33,63	0,1123
+chr5	112198248	112200197	JUNC00080004	1	+	112198248	112200197	255,0,0	2	32,44	0,1905
+chr5	112200165	112200377	JUNC00080005	3	+	112200165	112200377	255,0,0	2	60,60	0,152
+chr5	112200186	112227336	JUNC00080006	1	+	112200186	112227336	255,0,0	2	39,37	0,27113
+chr5	112200355	112203167	JUNC00080007	23	+	112200355	112203167	255,0,0	2	74,67	0,2745
+chr7	140439692	140449108	JUNC00096155	1	-	140439692	140449108	255,0,0	2	54,22	0,9394
+chr7	140482938	140487384	JUNC00096156	2	-	140482938	140487384	255,0,0	2	19,37	0,4409
+chr7	140487347	140494120	JUNC00096157	2	-	140487347	140494120	255,0,0	2	37,13	0,6760
+chr7	140494238	140500208	JUNC00096158	1	-	140494238	140500208	255,0,0	2	29,47	0,5923
+chr7	140706282	140710284	JUNC00096159	15	-	140706282	140710284	255,0,0	2	53,66	0,3936
+chr9	86584235	86585148	JUNC00101237	14	-	86584235	86585148	255,0,0	2	60,72	0,841
+chr9	86584288	86585148	JUNC00101238	5	-	86584288	86585148	255,0,0	2	67,72	0,788
+chr9	86585177	86585724	JUNC00101239	171	-	86585177	86585724	255,0,0	2	69,73	0,474
+chr9	86585666	86585827	JUNC00101240	80	-	86585666	86585827	255,0,0	2	68,16	0,145
+chr9	86585811	86586262	JUNC00101241	121	-	86585811	86586262	255,0,0	2	16,75	0,376
+chr17	37856490	37863316	JUNC00043382	57	+	37856490	37863316	255,0,0	2	74,74	0,6752
+chr17	37863341	37864648	JUNC00043383	28	+	37863341	37864648	255,0,0	2	53,75	0,1232
+chr17	37864380	37864622	JUNC00043384	1	+	37864380	37864622	255,0,0	2	27,49	0,193
+chr17	37864713	37865643	JUNC00043385	20	+	37864713	37865643	255,0,0	2	74,73	0,857
+chr17	37865631	37866134	JUNC00043386	29	+	37865631	37866134	255,0,0	2	74,69	0,434
+chr17	37866065	37866413	JUNC00043387	29	+	37866065	37866413	255,0,0	2	69,75	0,273
+chr17	37866380	37866667	JUNC00043388	56	+	37866380	37866667	255,0,0	2	74,75	0,212
+chr17	37866659	37868249	JUNC00043389	68	+	37866659	37868249	255,0,0	2	75,69	0,1521
+chr17	37868237	37868649	JUNC00043390	36	+	37868237	37868649	255,0,0	2	63,75	0,337
+chr17	37868627	37871602	JUNC00043391	24	+	37868627	37871602	255,0,0	2	74,64	0,2911
+chr17	37871542	37871773	JUNC00043392	76	+	37871542	37871773	255,0,0	2	70,75	0,156
+chr17	37871718	37872065	JUNC00043393	56	+	37871718	37872065	255,0,0	2	71,73	0,274
+chr17	37872121	37872628	JUNC00043394	51	+	37872121	37872628	255,0,0	2	71,75	0,432
+chr17	37872629	37872839	JUNC00043395	38	+	37872629	37872839	255,0,0	2	57,72	0,138
+chr17	37872783	37873647	JUNC00043396	93	+	37872783	37873647	255,0,0	2	75,75	0,789
+chr17	37873658	37876087	JUNC00043397	55	+	37873658	37876087	255,0,0	2	75,48	0,2381
+chr17	37876039	37879645	JUNC00043398	31	+	37876039	37879645	255,0,0	2	48,74	0,3532
+chr17	37879639	37879863	JUNC00043399	72	+	37879639	37879863	255,0,0	2	71,73	0,151
+chr17	37879822	37880201	JUNC00043400	5	+	37879822	37880201	255,0,0	2	49,37	0,342
+chr17	37879841	37880239	JUNC00043401	37	+	37879841	37880239	255,0,0	2	72,75	0,323
+chr17	37880190	37881051	JUNC00043402	94	+	37880190	37881051	255,0,0	2	73,73	0,788
+chr17	37881089	37881375	JUNC00043403	50	+	37881089	37881375	255,0,0	2	75,74	0,212
+chr17	37881384	37881651	JUNC00043404	101	+	37881384	37881651	255,0,0	2	73,72	0,195
+chr17	37881584	37882026	JUNC00043405	117	+	37881584	37882026	255,0,0	2	71,67	0,375
+chr17	37882031	37882885	JUNC00043406	117	+	37882031	37882885	255,0,0	2	75,71	0,783
+chr17	37882838	37883141	JUNC00043407	80	+	37882838	37883141	255,0,0	2	74,74	0,229
+chr17	37883186	37883619	JUNC00043408	43	+	37883186	37883619	255,0,0	2	70,72	0,361
+chr17	37883725	37884015	JUNC00043409	279	+	37883725	37884015	255,0,0	2	75,74	0,216
+chr17	37885789	37885996	JUNC00043410	16	-	37885789	37885996	255,0,0	2	69,59	0,148
+chr17	7572938	7573998	JUNC00041578	29	-	7572938	7573998	255,0,0	2	70,72	0,988
+chr17	7573960	7576926	JUNC00041579	28	-	7573960	7576926	255,0,0	2	73,74	0,2892
+chr17	7576852	7577086	JUNC00041580	35	-	7576852	7577086	255,0,0	2	74,68	0,166
+chr17	7577084	7577572	JUNC00041581	49	-	7577084	7577572	255,0,0	2	71,74	0,414
+chr17	7577534	7578239	JUNC00041582	30	-	7577534	7578239	255,0,0	2	74,63	0,642
+chr17	7578221	7578441	JUNC00041583	18	-	7578221	7578441	255,0,0	2	68,71	0,149
+chr17	7578480	7579369	JUNC00041584	19	-	7578480	7579369	255,0,0	2	74,58	0,831
+chr17	7579515	7579724	JUNC00041585	35	-	7579515	7579724	255,0,0	2	75,25	0,184
+chr17	7579699	7579894	JUNC00041586	25	-	7579699	7579894	255,0,0	2	22,56	0,139
+chr17	7579874	7590761	JUNC00041587	29	-	7579874	7590761	255,0,0	2	66,67	0,10820
+chr17	7591825	7592027	JUNC00041588	6	+	7591825	7592027	255,0,0	2	54,62	0,140
Binary file test-data/hg19/procodingseq.RData has changed
Binary file test-data/hg19/proseq.RData has changed
Binary file test-data/hg19/splicemax.RData has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19/test1.vcf	Fri May 12 13:17:40 2017 -0400
@@ -0,0 +1,87 @@
+##fileformat=VCFv4.1
+##samtoolsVersion=0.1.17 (r973:277)
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">
+##INFO=<ID=FQ,Number=1,Type=Float,Description="Phred probability of all samples being the same">
+##INFO=<ID=AF1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele frequency (assuming HWE)">
+##INFO=<ID=AC1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele count (no HWE assumption)">
+##INFO=<ID=G3,Number=3,Type=Float,Description="ML estimate of genotype frequencies">
+##INFO=<ID=HWE,Number=1,Type=Float,Description="Chi^2 based HWE test P-value based on G3">
+##INFO=<ID=CLR,Number=1,Type=Integer,Description="Log ratio of genotype likelihoods with and without the constraint">
+##INFO=<ID=UGT,Number=1,Type=String,Description="The most probable unconstrained genotype configuration in the trio">
+##INFO=<ID=CGT,Number=1,Type=String,Description="The most probable constrained genotype configuration in the trio">
+##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##INFO=<ID=PC2,Number=2,Type=Integer,Description="Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2.">
+##INFO=<ID=PCHI2,Number=1,Type=Float,Description="Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples.">
+##INFO=<ID=QCHI2,Number=1,Type=Integer,Description="Phred scaled PCHI2.">
+##INFO=<ID=PR,Number=1,Type=Integer,Description="# permutations yielding a smaller PCHI2.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods for RR,RA,AA genotypes (R=ref,A=alt)">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">
+##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value">
+##FORMAT=<ID=PL,Number=-1,Type=Integer,Description="List of Phred-scaled genotype likelihoods, number of values is (#ALT+1)*(#ALT+2)/2">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	test
+chr1	32386425	.	T	C	24	.	DP=3;AF1=1;AC1=2;DP4=0,0,0,3;MQ=50;FQ=-36	GT:PL:GQ	1/1:56,9,0:15
+chr1	32507666	.	G	T	6.2	.	DP=5;AF1=0.4999;AC1=1;DP4=3,0,2,0;MQ=50;FQ=8.65;PV4=1,0.062,1,0.36	GT:PL:GQ	0/1:35,0,78:36
+chr1	32524459	.	A	C	3.54	.	DP=5;AF1=0.4998;AC1=1;DP4=1,2,0,2;MQ=50;FQ=5.47;PV4=1,0.0021,1,1	GT:PL:GQ	0/1:31,0,98:30
+chr1	32622505	.	G	A	101	.	DP=18;AF1=0.5;AC1=1;DP4=10,0,5,2;MQ=50;FQ=104;PV4=0.15,0.0055,1,0.0075	GT:PL:GQ	0/1:131,0,162:99
+chr12	25357574	.	CAA	C	109	.	INDEL;DP=5;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-46.5	GT:PL:GQ	1/1:149,12,0:21
+chr12	25357628	.	TA	T	53.4	.	INDEL;DP=3;AF1=1;AC1=2;DP4=0,0,3,0;MQ=50;FQ=-43.5	GT:PL:GQ	1/1:93,9,0:16
+chr12	25358650	.	A	T	73	.	DP=38;AF1=1;AC1=2;DP4=0,0,15,0;MQ=50;FQ=-72	GT:PL:GQ	1/1:106,45,0:87
+chr12	25358662	.	CTTTTTTTT	CTTTTTT,CTTTTTTT	31.6	.	INDEL;DP=38;AF1=1;AC1=2;DP4=0,0,15,0;MQ=50;FQ=-52.5	GT:PL:GQ	1/1:96,42,24,91,0,64:33
+chr12	25358943	.	T	C	67.1	.	DP=7;AF1=1;AC1=2;DP4=0,0,7,0;MQ=50;FQ=-48	GT:PL:GQ	1/1:100,21,0:39
+chr12	25358969	.	T	G	36.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-39	GT:PL:GQ	1/1:69,12,0:21
+chr12	25359352	.	G	A	172	.	DP=16;AF1=1;AC1=2;DP4=0,0,14,0;MQ=50;FQ=-69	GT:PL:GQ	1/1:205,42,0:81
+chr12	25359464	.	TAA	TA	123	.	INDEL;DP=26;AF1=1;AC1=2;DP4=0,0,26,0;MQ=50;FQ=-113	GT:PL:GQ	1/1:164,78,0:99
+chr12	25360138	.	T	C	169	.	DP=21;AF1=1;AC1=2;DP4=0,0,19,0;MQ=50;FQ=-84	GT:PL:GQ	1/1:202,57,0:99
+chr12	25361091	.	T	C	93	.	DP=15;AF1=1;AC1=2;DP4=0,0,14,0;MQ=50;FQ=-69	GT:PL:GQ	1/1:126,42,0:81
+chr12	25362217	.	A	G	179	.	DP=20;AF1=1;AC1=2;DP4=0,0,20,0;MQ=50;FQ=-87	GT:PL:GQ	1/1:212,60,0:99
+chr12	25362465	.	G	A	38.3	.	DP=18;AF1=1;AC1=2;DP4=0,0,5,0;MQ=50;FQ=-42	GT:PL:GQ	1/1:71,15,0:27
+chr12	25362552	.	A	C	156	.	DP=10;AF1=1;AC1=2;DP4=0,0,10,0;MQ=50;FQ=-57	GT:PL:GQ	1/1:189,30,0:57
+chr12	25364387	.	T	C	21	.	DP=7;AF1=0.5;AC1=1;DP4=3,1,0,2;MQ=50;FQ=24;PV4=0.4,1,1,1	GT:PL:GQ	0/1:51,0,112:54
+chr12	25368462	.	C	T	112	.	DP=7;AF1=1;AC1=2;DP4=0,0,7,0;MQ=50;FQ=-48	GT:PL:GQ	1/1:145,21,0:39
+chr17	37866082	.	G	A	141	.	DP=45;AF1=0.5;AC1=1;DP4=25,0,20,0;MQ=50;FQ=144;PV4=1,1,1,1	GT:PL:GQ	0/1:171,0,180:99
+chr17	37870047	.	A	G	30	.	DP=3;AF1=0.5008;AC1=1;DP4=0,1,2,0;MQ=50;FQ=-4.12;PV4=0.33,1,1,1	GT:PL:GQ	0/1:60,0,25:28
+chr17	37879466	.	G	A	7.8	.	DP=3;AF1=0.5001;AC1=1;DP4=1,0,1,1;MQ=50;FQ=4.79;PV4=1,0.064,1,1	GT:PL:GQ	0/1:37,0,31:34
+chr17	37885332	.	G	A	83.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,0,4;MQ=50;FQ=-39	GT:PL:GQ	1/1:116,12,0:21
+chr17	37898543	.	T	C	165	.	DP=26;AF1=1;AC1=2;DP4=0,0,21,0;MQ=50;FQ=-90	GT:PL:GQ	1/1:198,63,0:99
+chr17	7530271	.	C	T	143	.	DP=71;AF1=0.5;AC1=1;DP4=32,2,34,0;MQ=50;FQ=146;PV4=0.49,4.1e-06,1,1	GT:PL:GQ	0/1:173,0,238:99
+chr17	7572657	.	G	T	225	.	DP=122;AF1=0.5;AC1=1;DP4=59,0,58,3;MQ=50;FQ=163;PV4=0.24,0.06,1,0.27	GT:PL:GQ	0/1:255,0,190:99
+chr17	7591866	.	G	T	45	.	DP=14;AF1=0.5;AC1=1;DP4=10,0,4,0;MQ=50;FQ=48;PV4=1,0.062,1,0.01	GT:PL:GQ	0/1:75,0,162:78
+chr17	7606153	.	C	T	74	.	DP=16;AF1=0.5;AC1=1;DP4=10,0,5,0;MQ=50;FQ=77;PV4=1,0.12,1,1	GT:PL:GQ	0/1:104,0,165:99
+chr2	48010558	.	C	A	12.3	.	DP=7;AF1=0.5002;AC1=1;DP4=1,0,2,0;MQ=50;FQ=5.23;PV4=1,0.065,1,1	GT:PL:GQ	0/1:42,0,31:34
+chr2	48016554	.	T	C	32	.	DP=4;AF1=0.5;AC1=1;DP4=1,1,2,0;MQ=50;FQ=20.9;PV4=1,1,1,0.21	GT:PL:GQ	0/1:62,0,48:51
+chr2	48018081	.	A	G	77	.	DP=6;AF1=0.501;AC1=1;DP4=1,0,4,0;MQ=50;FQ=-4.75;PV4=1,1,1,0.34	GT:PL:GQ	0/1:107,0,24:27
+chr2	48018221	.	C	T	22	.	DP=7;AF1=0.5;AC1=1;DP4=5,0,2,0;MQ=50;FQ=25;PV4=1,1,1,1	GT:PL:GQ	0/1:52,0,116:55
+chr2	48027990	.	G	T	122	.	DP=17;AF1=0.5;AC1=1;DP4=7,0,9,0;MQ=50;FQ=104;PV4=1,1,1,0.039	GT:PL:GQ	0/1:152,0,131:99
+chr2	48030458	.	G	C	105	.	DP=4;AF1=1;AC1=2;DP4=0,0,3,1;MQ=50;FQ=-39	GT:PL:GQ	1/1:137,12,0:21
+chr5	112154737	.	CT	C	29	.	INDEL;DP=5;AF1=0.5;AC1=1;DP4=3,0,2,0;MQ=50;FQ=32;PV4=1,1,1,0.0012	GT:PL:GQ	0/1:59,0,85:62
+chr5	112162854	.	T	C	60	.	DP=3;AF1=1;AC1=2;DP4=0,0,3,0;MQ=50;FQ=-36	GT:PL:GQ	1/1:92,9,0:16
+chr5	112164561	.	G	A	87.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-39	GT:PL:GQ	1/1:120,12,0:21
+chr5	112175639	.	C	T	31	.	DP=4;AF1=0.5;AC1=1;DP4=2,0,2,0;MQ=50;FQ=31.5;PV4=1,0.21,1,1	GT:PL:GQ	0/1:61,0,62:61
+chr5	112175897	.	GAA	GA	7.8	.	INDEL;DP=7;AF1=0.5;AC1=1;DP4=4,0,3,0;MQ=50;FQ=10.4;PV4=1,0.0018,1,0.33	GT:PL:GQ	0/1:37,0,97:39
+chr5	112176559	.	T	G	72	.	DP=11;AF1=1;AC1=2;DP4=0,0,8,0;MQ=50;FQ=-51	GT:PL:GQ	1/1:105,24,0:45
+chr5	112176756	.	T	A	143	.	DP=10;AF1=1;AC1=2;DP4=0,0,9,0;MQ=50;FQ=-54	GT:PL:GQ	1/1:176,27,0:51
+chr5	112180015	.	C	A	123	.	DP=11;AF1=0.5;AC1=1;DP4=3,0,8,0;MQ=50;FQ=40;PV4=1,1,1,1	GT:PL:GQ	0/1:153,0,67:70
+chr5	112204170	.	G	A	112	.	DP=5;AF1=1;AC1=2;DP4=0,0,2,2;MQ=50;FQ=-39	GT:PL:GQ	1/1:144,12,0:21
+chr7	140043303	.	C	T	88	.	DP=18;AF1=0.5;AC1=1;DP4=11,0,7,0;MQ=50;FQ=91;PV4=1,0.00034,1,0.42	GT:PL:GQ	0/1:118,0,167:99
+chr7	140065806	.	T	C	38.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-39	GT:PL:GQ	1/1:71,12,0:21
+chr7	140065845	.	C	A	44.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-39	GT:PL:GQ	1/1:77,12,0:21
+chr7	140152904	.	CAAAA	CAAAAA	41.5	.	INDEL;DP=42;AF1=0.5;AC1=1;DP4=23,0,16,0;MQ=50;FQ=44.2;PV4=1,1,1,1	GT:PL:GQ	0/1:79,0,91:82
+chr7	140153495	.	G	T	140	.	DP=52;AF1=0.5;AC1=1;DP4=28,0,22,0;MQ=50;FQ=143;PV4=1,0.082,1,0.00038	GT:PL:GQ	0/1:170,0,184:99
+chr7	140158851	.	C	G	153	.	DP=36;AF1=1;AC1=2;DP4=1,0,33,0;MQ=50;FQ=-102;PV4=1,1,1,1	GT:PL:GQ	1/1:186,75,0:99
+chr7	140244560	.	C	T	78	.	DP=6;AF1=0.5013;AC1=1;DP4=1,0,4,0;MQ=50;FQ=-5.45;PV4=1,1,1,1	GT:PL:GQ	0/1:108,0,23:26
+chr7	140406430	.	T	A	8.64	.	DP=27;AF1=0.5;AC1=1;DP4=19,0,6,0;MQ=50;FQ=11.3;PV4=1,1,1,0.00021	GT:PL:GQ	0/1:38,0,167:40
+chr7	140406436	.	A	G	4.77	.	DP=14;AF1=0.4999;AC1=1;DP4=12,0,2,0;MQ=50;FQ=6.99;PV4=1,1,1,0.08	GT:PL:GQ	0/1:33,0,170:33
+chr7	140424582	.	G	C	18.1	.	DP=5;AF1=0.5;AC1=1;DP4=0,2,1,2;MQ=50;FQ=20.4;PV4=1,0.00086,1,1	GT:PL:GQ	0/1:48,0,56:50
+chr7	140426098	.	G	A	10.2	.	DP=3;AF1=1;AC1=2;DP4=0,0,2,0;MQ=50;FQ=-33	GT:PL:GQ	1/1:41,6,0:8
+chr7	140702871	.	G	A	77.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-39	GT:PL:GQ	1/1:110,12,0:21
+chr7	140706061	.	G	T	119	.	DP=74;AF1=0.5;AC1=1;DP4=45,0,22,0;MQ=50;FQ=122;PV4=1,1,1,1	GT:PL:GQ	0/1:149,0,178:99
+chr7	140706157	.	G	T	42	.	DP=25;AF1=0.5;AC1=1;DP4=13,0,9,0;MQ=50;FQ=45;PV4=1,5.7e-11,1,0.013	GT:PL:GQ	0/1:72,0,170:75
+chr9	86583076	.	C	T	64	.	DP=17;AF1=0.5;AC1=1;DP4=5,0,11,0;MQ=50;FQ=66;PV4=1,1.4e-08,1,1	GT:PL:GQ	0/1:94,0,100:96
+chr9	86593314	.	G	C	186	.	DP=203;AF1=0.5;AC1=1;DP4=100,0,99,0;MQ=50;FQ=186;PV4=1,1,1,0.072	GT:PL:GQ	0/1:216,0,216:99
+chr9	86595070	.	C	T	140	.	DP=93;AF1=0.5;AC1=1;DP4=53,0,38,0;MQ=50;FQ=143;PV4=1,0.43,1,1	GT:PL:GQ	0/1:170,0,188:99
+chr9	86595498	.	G	A	66	.	DP=128;AF1=0.5;AC1=1;DP4=50,2,76,0;MQ=50;FQ=69;PV4=0.16,6e-81,1,1	GT:PL:GQ	0/1:96,0,225:99
Binary file test-data/hg19/test1_sort.bam has changed
Binary file test-data/hg19/test1_sort.bam.bai has changed
Binary file test-data/hg19/txdb.sqlite has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_dbsnp_snv.fasta	Fri May 12 13:17:40 2017 -0400
@@ -0,0 +1,4 @@
+>NP_000170_T139I,E956D |18647.7757|NM_000179|MSH6|DNA mismatch repair protein Msh6
+MSRQSTLYSFFPKSPALSDANKASARASREGGRAAAAPGASPSPGGDAAWSEAGPGPRPLARSASPPKAKNLNGGLRRSVAPAAPTSCDFSPGDLVWAKMEGYPWWPCLVYNHPFDGTFIREKGKSVRVHVQFFDDSPIRGWVSKRLLKPYTGSKSKEAQKGGHFYSAKPEILRAMQRADEALNKDKIKRLELAVCDEPSEPEEEEEMEVGTTYVTDKSEEDNEIESEEEVQPKTQGSRRSSRQIKKRRVISDSESDIGGSDVEFKPDTKEEGSSDEISSGVGDSESEGLNSPVKVARKRKRMVTGNGSLKRKSSRKETPSATKQATSISSETKNTLRAFSAPQNSESQAHVSGGGDDSSRPTVWYHETLEWLKEEKRRDEHRRRPDHPDFDASTLYVPEDFLNSCTPGMRKWWQIKSQNFDLVICYKVGKFYELYHMDALIGVSELGLVFMKGNWAHSGFPEIAFGRYSDSLVQKGYKVARVEQTETPEMMEARCRKMAHISKYDRVVRREICRIITKGTQTYSVLEGDPSENYSKYLLSLKEKEEDSSGHTRAYGVCFVDTSLGKFFIGQFSDDRHCSRFRTLVAHYPPVQVLFEKGNLSKETKTILKSSLSCSLQEGLIPGSQFWDASKTLRTLLEEEYFREKLSDGIGVMLPQVLKGMTSESDSIGLTPGEKSELALSALGGCVFYLKKCLIDQELLSMANFEEYIPLDSDTVSTTRSGAIFTKAYQRMVLDAVTLNNLEIFLNGTNGSTEGTLLERVDTCHTPFGKRLLKQWLCAPLCNHYAINDRLDAIEDLMVVPDKISEVVELLKKLPDLERLLSKIHNVGSPLKSQNHPDSRAIMYEETTYSKKKIIDFLSALEGFKVMCKIIGIMEEVADGFKSKILKQVISLQTKNPEGRFPDLTVELNRWDTAFDHEKARKTGLITPKAGFDSDYDQALADIRENEQSLLEYLDKQRNRIGCRTIVYWGIGRNRYQLEIPENFTTRNLPEEYELKSTKKGCKRYWTKTIEKKLANLINAEERRDVSLKDCMRRLFYNFDKNYKDWQSAVECIAVLDVLLCLANYSRGGDGPMCRPVILLPEDTPPFLELKGSRHPCITKTFFGDDFIPNDILIGCEEEEQENGKAYCVLVTGPNMGGKSTLMRQAGLLAVMAQMGCYVPAEVCRLTPIDRVFTRLGASDRIMSGESTFFVELSETASILMHATAHSLVLVDELGRGTATFDGTAIANAVVKELAETIKCRTLFSTHYHSLVEDYSQNVAVRLGHMACMVENECEDPSQETITFLYKFIKGACPKSYGFNAARLANLPEEVIQKGHRKAREFEKMNQSLRLFREVCLASERSTVDAEAVHKLLTLIKEL
+>NP_001120983_rs121913332:R1432*,rs459552:V1804D |665.1509|NM_001127511|APC|adenomatous polyposis coli protein isoform a
+MYASLGSGPVAPLPASVPPSVLGSWSTGGSRSCVRQETKSPGGARTSGHWASVWQEVLKQLQGSIEDEAMASSGQIDLLERLKELNLDSSNFPGVKLRSKMSLRSYGSREGSVSSRSGECSPVPMGSFPRRGFVNGSRESTGYLEELEKERSLLLADLDKEEKEKDWYYAQLQNLTKRIDSLPLTENFSLQTDMTRRQLEYEARQIRVAMEEQLGTCQDMEKRAQRSSQNKHETGSHDAERQNEGQGVGEINMATSGNGQGSTTRMDHETASVLSSSSTHSAPRRLTSHLGTKVEMVYSLLSMLGTHDKDDMSRTLLAMSSSQDSCISMRQSGCLPLLIQLLHGNDKDSVLLGNSRGSKEARARASAALHNIIHSQPDDKRGRREIRVLHLLEQIRAYCETCWEWQEAHEPGMDQDKNPMPAPVEHQICPAVCVLMKLSFDEEHRHAMNELGGLQAIAELLQVDCEMYGLTNDHYSITLRRYAGMALTNLTFGDVANKATLCSMKGCMRALVAQLKSESEDLQQVIASVLRNLSWRADVNSKKTLREVGSVKALMECALEVKKESTLKSVLSALWNLSAHCTENKADICAVDGALAFLVGTLTYRSQTNTLAIIESGGGILRNVSSLIATNEDHRQILRENNCLQTLLQHLKSHSLTIVSNACGTLWNLSARNPKDQEALWDMGAVSMLKNLIHSKHKMIAMGSAAALRNLMANRPAKYKDANIMSPGSSLPSLHVRKQKALEAELDAQHLSETFDNIDNLSPKASHRSKQRHKQSLYGDYVFDTNRHDDNRSDNFNTGNMTVLSPYLNTTVLPSSSSSRGSLDSSRSEKDRSLERERGIGLGNYHPATENPGTSSKRGLQISTTAAQIAKVMEEVSAIHTSQEDRSSGSTTELHCVTDERNALRRSSAAHTHSNTYNFTKSENSNRTCSMPYAKLEYKRSSNDSLNSVSSSDGYGKRGQMKPSIESYSEDDESKFCSYGQYPADLAHKIHSANHMDDNDGELDTPINYSLKYSDEQLNSGRQSPSQNERWARPKHIIEDEIKQSEQRQSRNQSTTYPVYTESTDDKHLKFQPHFGQQECVSPYRSRGANGSETNRVGSNHGINQNVSQSLCQEDDYEDDKPTNYSERYSEEEQHEEEERPTNYSIKYNEEKRHVDQPIDYSLKYATDIPSSQKQSFSFSKSSSGQSSKTEHMSSSSENTSTPSSNAKRQNQLHPSSAQSRSGQPQKAATCKVSSINQETIQTYCVEDTPICFSRCSSLSSLSSAEDEIGCNQTTQEADSANTLQIAEIKEKIGTRSAEDPVSEVPAVSQHPRTKSSRLQGSSLSSESARHKAVEFSSGAKSPSKSGAQTPKSPPEHYVQETPLMFSRCTSVSSLDSFESRSIASSVQSEPCSGMVSGIISPSDLPDSPGQTMPPSRSKTPPPPPQTAQTK
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_dbsnp_variant_annotation.rdata	Fri May 12 13:17:40 2017 -0400
@@ -0,0 +1,4 @@
+>NP_000170_T139I,E956D |18647.7757|NM_000179|MSH6|DNA mismatch repair protein Msh6
+MSRQSTLYSFFPKSPALSDANKASARASREGGRAAAAPGASPSPGGDAAWSEAGPGPRPLARSASPPKAKNLNGGLRRSVAPAAPTSCDFSPGDLVWAKMEGYPWWPCLVYNHPFDGTFIREKGKSVRVHVQFFDDSPIRGWVSKRLLKPYTGSKSKEAQKGGHFYSAKPEILRAMQRADEALNKDKIKRLELAVCDEPSEPEEEEEMEVGTTYVTDKSEEDNEIESEEEVQPKTQGSRRSSRQIKKRRVISDSESDIGGSDVEFKPDTKEEGSSDEISSGVGDSESEGLNSPVKVARKRKRMVTGNGSLKRKSSRKETPSATKQATSISSETKNTLRAFSAPQNSESQAHVSGGGDDSSRPTVWYHETLEWLKEEKRRDEHRRRPDHPDFDASTLYVPEDFLNSCTPGMRKWWQIKSQNFDLVICYKVGKFYELYHMDALIGVSELGLVFMKGNWAHSGFPEIAFGRYSDSLVQKGYKVARVEQTETPEMMEARCRKMAHISKYDRVVRREICRIITKGTQTYSVLEGDPSENYSKYLLSLKEKEEDSSGHTRAYGVCFVDTSLGKFFIGQFSDDRHCSRFRTLVAHYPPVQVLFEKGNLSKETKTILKSSLSCSLQEGLIPGSQFWDASKTLRTLLEEEYFREKLSDGIGVMLPQVLKGMTSESDSIGLTPGEKSELALSALGGCVFYLKKCLIDQELLSMANFEEYIPLDSDTVSTTRSGAIFTKAYQRMVLDAVTLNNLEIFLNGTNGSTEGTLLERVDTCHTPFGKRLLKQWLCAPLCNHYAINDRLDAIEDLMVVPDKISEVVELLKKLPDLERLLSKIHNVGSPLKSQNHPDSRAIMYEETTYSKKKIIDFLSALEGFKVMCKIIGIMEEVADGFKSKILKQVISLQTKNPEGRFPDLTVELNRWDTAFDHEKARKTGLITPKAGFDSDYDQALADIRENEQSLLEYLDKQRNRIGCRTIVYWGIGRNRYQLEIPENFTTRNLPEEYELKSTKKGCKRYWTKTIEKKLANLINAEERRDVSLKDCMRRLFYNFDKNYKDWQSAVECIAVLDVLLCLANYSRGGDGPMCRPVILLPEDTPPFLELKGSRHPCITKTFFGDDFIPNDILIGCEEEEQENGKAYCVLVTGPNMGGKSTLMRQAGLLAVMAQMGCYVPAEVCRLTPIDRVFTRLGASDRIMSGESTFFVELSETASILMHATAHSLVLVDELGRGTATFDGTAIANAVVKELAETIKCRTLFSTHYHSLVEDYSQNVAVRLGHMACMVENECEDPSQETITFLYKFIKGACPKSYGFNAARLANLPEEVIQKGHRKAREFEKMNQSLRLFREVCLASERSTVDAEAVHKLLTLIKEL
+>NP_001120983_rs121913332:R1432*,rs459552:V1804D |665.1509|NM_001127511|APC|adenomatous polyposis coli protein isoform a
+MYASLGSGPVAPLPASVPPSVLGSWSTGGSRSCVRQETKSPGGARTSGHWASVWQEVLKQLQGSIEDEAMASSGQIDLLERLKELNLDSSNFPGVKLRSKMSLRSYGSREGSVSSRSGECSPVPMGSFPRRGFVNGSRESTGYLEELEKERSLLLADLDKEEKEKDWYYAQLQNLTKRIDSLPLTENFSLQTDMTRRQLEYEARQIRVAMEEQLGTCQDMEKRAQRSSQNKHETGSHDAERQNEGQGVGEINMATSGNGQGSTTRMDHETASVLSSSSTHSAPRRLTSHLGTKVEMVYSLLSMLGTHDKDDMSRTLLAMSSSQDSCISMRQSGCLPLLIQLLHGNDKDSVLLGNSRGSKEARARASAALHNIIHSQPDDKRGRREIRVLHLLEQIRAYCETCWEWQEAHEPGMDQDKNPMPAPVEHQICPAVCVLMKLSFDEEHRHAMNELGGLQAIAELLQVDCEMYGLTNDHYSITLRRYAGMALTNLTFGDVANKATLCSMKGCMRALVAQLKSESEDLQQVIASVLRNLSWRADVNSKKTLREVGSVKALMECALEVKKESTLKSVLSALWNLSAHCTENKADICAVDGALAFLVGTLTYRSQTNTLAIIESGGGILRNVSSLIATNEDHRQILRENNCLQTLLQHLKSHSLTIVSNACGTLWNLSARNPKDQEALWDMGAVSMLKNLIHSKHKMIAMGSAAALRNLMANRPAKYKDANIMSPGSSLPSLHVRKQKALEAELDAQHLSETFDNIDNLSPKASHRSKQRHKQSLYGDYVFDTNRHDDNRSDNFNTGNMTVLSPYLNTTVLPSSSSSRGSLDSSRSEKDRSLERERGIGLGNYHPATENPGTSSKRGLQISTTAAQIAKVMEEVSAIHTSQEDRSSGSTTELHCVTDERNALRRSSAAHTHSNTYNFTKSENSNRTCSMPYAKLEYKRSSNDSLNSVSSSDGYGKRGQMKPSIESYSEDDESKFCSYGQYPADLAHKIHSANHMDDNDGELDTPINYSLKYSDEQLNSGRQSPSQNERWARPKHIIEDEIKQSEQRQSRNQSTTYPVYTESTDDKHLKFQPHFGQQECVSPYRSRGANGSETNRVGSNHGINQNVSQSLCQEDDYEDDKPTNYSERYSEEEQHEEEERPTNYSIKYNEEKRHVDQPIDYSLKYATDIPSSQKQSFSFSKSSSGQSSKTEHMSSSSENTSTPSSNAKRQNQLHPSSAQSRSGQPQKAATCKVSSINQETIQTYCVEDTPICFSRCSSLSSLSSAEDEIGCNQTTQEADSANTLQIAEIKEKIGTRSAEDPVSEVPAVSQHPRTKSSRLQGSSLSSESARHKAVEFSSGAKSPSKSGAQTPKSPPEHYVQETPLMFSRCTSVSSLDSFESRSIASSVQSEPCSGMVSGIISPSDLPDSPGQTMPPSRSKTPPPPPQTAQTK
Binary file test-data/hg19_dbsnp_variant_annotation.sqlite has changed
Binary file test-data/hg19_genomic_mapping.sqlite has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_indel.fasta	Fri May 12 13:17:40 2017 -0400
@@ -0,0 +1,4 @@
+>NP_001120983_954:CT>C |665.1509|NM_001127511|APC|adenomatous polyposis coli protein isoform a|
+MYASLGSGPVAPLPASVPPSVLGSWSTGGSRSCVRQETKSPGGARTSGHWASVWQEVLKQLQGSIEDEAMASSGQIDLLERLKELNLDSSNFPGVKLRSKMSLRSYGSREGSVSSRSGECSPVPMGSFPRRGFVNGSRESTGYLEELEKERSLLLADLDKEEKEKDWYYAQLQNLTKRIDSLPLTENFSLQTDMTRRQLEYEARQIRVAMEEQLGTCQDMEKRAQRSSQNKHETGSHDAERQNEGQGVGEINMATSGNGQGSTTRMDHETASVLSSSSTHSAPRRLTSHLGTKVEMVYSLLSMLGTHDKDDMSRTLLACLAPKTAVYPCDSLDVFLSSSSFYMAMTKTLYCWEIPGAVKRLGPGPVQHSTTSFTHSLMTREAGVKSESFIFWNRYALTVKPVGSGRKLMNQAWTRTKIQCQLLLNIRSVLLCVF
+>NP_001120983_4552:GAA>GA |665.1509|NM_001127511|APC|adenomatous polyposis coli protein isoform a|
+MYASLGSGPVAPLPASVPPSVLGSWSTGGSRSCVRQETKSPGGARTSGHWASVWQEVLKQLQGSIEDEAMASSGQIDLLERLKELNLDSSNFPGVKLRSKMSLRSYGSREGSVSSRSGECSPVPMGSFPRRGFVNGSRESTGYLEELEKERSLLLADLDKEEKEKDWYYAQLQNLTKRIDSLPLTENFSLQTDMTRRQLEYEARQIRVAMEEQLGTCQDMEKRAQRSSQNKHETGSHDAERQNEGQGVGEINMATSGNGQGSTTRMDHETASVLSSSSTHSAPRRLTSHLGTKVEMVYSLLSMLGTHDKDDMSRTLLAMSSSQDSCISMRQSGCLPLLIQLLHGNDKDSVLLGNSRGSKEARARASAALHNIIHSQPDDKRGRREIRVLHLLEQIRAYCETCWEWQEAHEPGMDQDKNPMPAPVEHQICPAVCVLMKLSFDEEHRHAMNELGGLQAIAELLQVDCEMYGLTNDHYSITLRRYAGMALTNLTFGDVANKATLCSMKGCMRALVAQLKSESEDLQQVIASVLRNLSWRADVNSKKTLREVGSVKALMECALEVKKESTLKSVLSALWNLSAHCTENKADICAVDGALAFLVGTLTYRSQTNTLAIIESGGGILRNVSSLIATNEDHRQILRENNCLQTLLQHLKSHSLTIVSNACGTLWNLSARNPKDQEALWDMGAVSMLKNLIHSKHKMIAMGSAAALRNLMANRPAKYKDANIMSPGSSLPSLHVRKQKALEAELDAQHLSETFDNIDNLSPKASHRSKQRHKQSLYGDYVFDTNRHDDNRSDNFNTGNMTVLSPYLNTTVLPSSSSSRGSLDSSRSEKDRSLERERGIGLGNYHPATENPGTSSKRGLQISTTAAQIAKVMEEVSAIHTSQEDRSSGSTTELHCVTDERNALRRSSAAHTHSNTYNFTKSENSNRTCSMPYAKLEYKRSSNDSLNSVSSSDGYGKRGQMKPSIESYSEDDESKFCSYGQYPADLAHKIHSANHMDDNDGELDTPINYSLKYSDEQLNSGRQSPSQNERWARPKHIIEDEIKQSEQRQSRNQSTTYPVYTESTDDKHLKFQPHFGQQECVSPYRSRGANGSETNRVGSNHGINQNVSQSLCQEDDYEDDKPTNYSERYSEEEQHEEEERPTNYSIKYNEEKRHVDQPIDYSLKYATDIPSSQKQSFSFSKSSSGQSSKTEHMSSSSENTSTPSSNAKRQNQLHPSSAQSRSGQPQKAATCKVSSINQETIQTYCVEDTPICFSRCSSLSSLSSAEDEIGCNQTTQEADSANTLQIAEIKEKIGTRSAEDPVSEVPAVSQHPRTKSSRLQGSSLSSESARHKAVEFSSGAKSPSKSGAQTPKSPPEHYVQETPLMFSRCTSVSSLDSFESRSIASSVQSEPCSGMVSGIISPSDLPDSPGQTMPPSRSKTPPPPPQTAQTKREVPKNKAPTAEKRESGPKQAAVNAAVQRVQVLPDADTLLHFATESTPDGFSCSSSLSALSLDEPFIQKDVELRIMPPVQENDNGNEQNQSSLKNQMKTKRKRQKKLLILKRTY
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_rpkm0.fasta	Fri May 12 13:17:40 2017 -0400
@@ -0,0 +1,12 @@
+>NP_004439 |121102.4845|NM_004448|ERBB2|receptor tyrosine-protein kinase erbB-2 isoform a precursor
+MELAALCRWGLLLALLPPGAASTQVCTGTDMKLRLPASPETHLDMLRHLYQGCQVVQGNLELTYLPTNASLSFLQDIQEVQGYVLIAHNQVRQVPLQRLRIVRGTQLFEDNYALAVLDNGDPLNNTTPVTGASPGGLRELQLRSLTEILKGGVLIQRNPQLCYQDTILWKDIFHKNNQLALTLIDTNRSRACHPCSPMCKGSRCWGESSEDCQSLTRTVCAGGCARCKGPLPTDCCHEQCAAGCTGPKHSDCLACLHFNHSGICELHCPALVTYNTDTFESMPNPEGRYTFGASCVTACPYNYLSTDVGSCTLVCPLHNQEVTAEDGTQRCEKCSKPCARVCYGLGMEHLREVRAVTSANIQEFAGCKKIFGSLAFLPESFDGDPASNTAPLQPEQLQVFETLEEITGYLYISAWPDSLPDLSVFQNLQVIRGRILHNGAYSLTLQGLGISWLGLRSLRELGSGLALIHHNTHLCFVHTVPWDQLFRNPHQALLHTANRPEDECVGEGLACHQLCARGHCWGPGPTQCVNCSQFLRGQECVEECRVLQGLPREYVNARHCLPCHPECQPQNGSVTCFGPEADQCVACAHYKDPPFCVARCPSGVKPDLSYMPIWKFPDEEGACQPCPINCTHSCVDLDDKGCPAEQRASPLTSIISAVVGILLVVVLGVVFGILIKRRQQKIRKYTMRRLLQETELVEPLTPSGAMPNQAQMRILKETELRKVKVLGSGAFGTVYKGIWIPDGENVKIPVAIKVLRENTSPKANKEILDEAYVMAGVGSPYVSRLLGICLTSTVQLVTQLMPYGCLLDHVRENRGRLGSQDLLNWCMQIAKGMSYLEDVRLVHRDLAARNVLVKSPNHVKITDFGLARLLDIDETEYHADGGKVPIKWMALESILRRRFTHQSDVWSYGVTVWELMTFGAKPYDGIPAREIPDLLEKGERLPQPPICTIDVYMIMVKCWMIDSECRPRFRELVSEFSRMARDPQRFVVIQNEDLGPASPLDSTFYRSLLEDDDMGDLVDAEEYLVPQQGFFCPDPAPGAGGMVHHRHRSSSTRSGGGDLTLGLEPSEEEAPRSPLAPSEGAGSDVFDGDLGMGAAKGLQSLPTHDPSPLQRYSEDPTVPLPSETDGYVAPLTCSPQPEYVNQPDVRPQPPSPREGPLPAARPAGATLERPKTLSPGKNGVVKDVFAFGGAVENPEYLTPQGGAAPQPHPPPAFSPAFDNLYYWDQDPPERGAPPSTFKGTPTAENPEYLGLDVPV
+>NP_000170 |18647.7757|NM_000179|MSH6|DNA mismatch repair protein Msh6
+MSRQSTLYSFFPKSPALSDANKASARASREGGRAAAAPGASPSPGGDAAWSEAGPGPRPLARSASPPKAKNLNGGLRRSVAPAAPTSCDFSPGDLVWAKMEGYPWWPCLVYNHPFDGTFIREKGKSVRVHVQFFDDSPTRGWVSKRLLKPYTGSKSKEAQKGGHFYSAKPEILRAMQRADEALNKDKIKRLELAVCDEPSEPEEEEEMEVGTTYVTDKSEEDNEIESEEEVQPKTQGSRRSSRQIKKRRVISDSESDIGGSDVEFKPDTKEEGSSDEISSGVGDSESEGLNSPVKVARKRKRMVTGNGSLKRKSSRKETPSATKQATSISSETKNTLRAFSAPQNSESQAHVSGGGDDSSRPTVWYHETLEWLKEEKRRDEHRRRPDHPDFDASTLYVPEDFLNSCTPGMRKWWQIKSQNFDLVICYKVGKFYELYHMDALIGVSELGLVFMKGNWAHSGFPEIAFGRYSDSLVQKGYKVARVEQTETPEMMEARCRKMAHISKYDRVVRREICRIITKGTQTYSVLEGDPSENYSKYLLSLKEKEEDSSGHTRAYGVCFVDTSLGKFFIGQFSDDRHCSRFRTLVAHYPPVQVLFEKGNLSKETKTILKSSLSCSLQEGLIPGSQFWDASKTLRTLLEEEYFREKLSDGIGVMLPQVLKGMTSESDSIGLTPGEKSELALSALGGCVFYLKKCLIDQELLSMANFEEYIPLDSDTVSTTRSGAIFTKAYQRMVLDAVTLNNLEIFLNGTNGSTEGTLLERVDTCHTPFGKRLLKQWLCAPLCNHYAINDRLDAIEDLMVVPDKISEVVELLKKLPDLERLLSKIHNVGSPLKSQNHPDSRAIMYEETTYSKKKIIDFLSALEGFKVMCKIIGIMEEVADGFKSKILKQVISLQTKNPEGRFPDLTVELNRWDTAFDHEKARKTGLITPKAGFDSDYDQALADIRENEQSLLEYLEKQRNRIGCRTIVYWGIGRNRYQLEIPENFTTRNLPEEYELKSTKKGCKRYWTKTIEKKLANLINAEERRDVSLKDCMRRLFYNFDKNYKDWQSAVECIAVLDVLLCLANYSRGGDGPMCRPVILLPEDTPPFLELKGSRHPCITKTFFGDDFIPNDILIGCEEEEQENGKAYCVLVTGPNMGGKSTLMRQAGLLAVMAQMGCYVPAEVCRLTPIDRVFTRLGASDRIMSGESTFFVELSETASILMHATAHSLVLVDELGRGTATFDGTAIANAVVKELAETIKCRTLFSTHYHSLVEDYSQNVAVRLGHMACMVENECEDPSQETITFLYKFIKGACPKSYGFNAARLANLPEEVIQKGHRKAREFEKMNQSLRLFREVCLASERSTVDAEAVHKLLTLIKEL
+>NP_001120983 |665.1509|NM_001127511|APC|adenomatous polyposis coli protein isoform a
+MYASLGSGPVAPLPASVPPSVLGSWSTGGSRSCVRQETKSPGGARTSGHWASVWQEVLKQLQGSIEDEAMASSGQIDLLERLKELNLDSSNFPGVKLRSKMSLRSYGSREGSVSSRSGECSPVPMGSFPRRGFVNGSRESTGYLEELEKERSLLLADLDKEEKEKDWYYAQLQNLTKRIDSLPLTENFSLQTDMTRRQLEYEARQIRVAMEEQLGTCQDMEKRAQRSSQNKHETGSHDAERQNEGQGVGEINMATSGNGQGSTTRMDHETASVLSSSSTHSAPRRLTSHLGTKVEMVYSLLSMLGTHDKDDMSRTLLAMSSSQDSCISMRQSGCLPLLIQLLHGNDKDSVLLGNSRGSKEARARASAALHNIIHSQPDDKRGRREIRVLHLLEQIRAYCETCWEWQEAHEPGMDQDKNPMPAPVEHQICPAVCVLMKLSFDEEHRHAMNELGGLQAIAELLQVDCEMYGLTNDHYSITLRRYAGMALTNLTFGDVANKATLCSMKGCMRALVAQLKSESEDLQQVIASVLRNLSWRADVNSKKTLREVGSVKALMECALEVKKESTLKSVLSALWNLSAHCTENKADICAVDGALAFLVGTLTYRSQTNTLAIIESGGGILRNVSSLIATNEDHRQILRENNCLQTLLQHLKSHSLTIVSNACGTLWNLSARNPKDQEALWDMGAVSMLKNLIHSKHKMIAMGSAAALRNLMANRPAKYKDANIMSPGSSLPSLHVRKQKALEAELDAQHLSETFDNIDNLSPKASHRSKQRHKQSLYGDYVFDTNRHDDNRSDNFNTGNMTVLSPYLNTTVLPSSSSSRGSLDSSRSEKDRSLERERGIGLGNYHPATENPGTSSKRGLQISTTAAQIAKVMEEVSAIHTSQEDRSSGSTTELHCVTDERNALRRSSAAHTHSNTYNFTKSENSNRTCSMPYAKLEYKRSSNDSLNSVSSSDGYGKRGQMKPSIESYSEDDESKFCSYGQYPADLAHKIHSANHMDDNDGELDTPINYSLKYSDEQLNSGRQSPSQNERWARPKHIIEDEIKQSEQRQSRNQSTTYPVYTESTDDKHLKFQPHFGQQECVSPYRSRGANGSETNRVGSNHGINQNVSQSLCQEDDYEDDKPTNYSERYSEEEQHEEEERPTNYSIKYNEEKRHVDQPIDYSLKYATDIPSSQKQSFSFSKSSSGQSSKTEHMSSSSENTSTPSSNAKRQNQLHPSSAQSRSGQPQKAATCKVSSINQETIQTYCVEDTPICFSRCSSLSSLSSAEDEIGCNQTTQEADSANTLQIAEIKEKIGTRSAEDPVSEVPAVSQHPRTKSSRLQGSSLSSESARHKAVEFSSGAKSPSKSGAQTPKSPPEHYVQETPLMFSRCTSVSSLDSFESRSIASSVQSEPCSGMVSGIISPSDLPDSPGQTMPPSRSKTPPPPPQTAQTKREVPKNKAPTAEKRESGPKQAAVNAAVQRVQVLPDADTLLHFATESTPDGFSCSSSLSALSLDEPFIQKDVELRIMPPVQENDNGNETESEQPKESNENQEKEAEKTIDSEKDLLDDSDDDDIEILEECIISAMPTKSSRKAKKPAQTASKLPPPVARKPSQLPVYKLLPSQNRLQPQKHVSFTPGDDMPRVYCVEGTPINFSTATSLSDLTIESPPNELAAGEGVRGGAQSGEFEKRDTIPTEGRSTDEAQGGKTSSVTIPELDDNKAEEGDILAECINSAMPKGKSHKPFRVKKIMDQVQQASASSSAPNKNQLDGKKKKPTSPVKPIPQNTEYRTRVRKNADSKNNLNAERVFSDNKDSKKQNLKNNSKVFNDKLPNNEDRVRGSFAFDSPHHYTPIEGTPYCFSRNDSLSSLDFDDDDVDLSREKAELRKAKENKESEAKVTSHTELTSNQQSANKTQAIAKQPINRGQPKPILQKQSTFPQSSKDIPDRGAATDEKLQNFAIENTPVCFSHNSSLSSLSDIDQENNNKENEPIKETEPPDSQGEPSKPQASGYAPKSFHVEDTPVCFSRNSSLSSLSIDSEDDLLQECISSAMPKKKKPSRLKGDNEKHSPRNMGGILGEDLTLDLKDIQRPDSEHGLSPDSENFDWKAIQEGANSIVSSLHQAAAAACLSRQASSDSDSILSLKSGISLGSPFHLTPDQEEKPFTSNKGPRILKPGEKSTLETKKIESESKGIKGGKKVYKSLITGKVRSNSEISGQMKQPLQANMPSISRGRTMIHIPGVRNSSSSTSPVSKKGPPLKTPASKSPSEGQTATTSPRGAKPSVKSELSPVARQTSQIGGSSKAPSRSGSRDSTPSRPAQQPLSRPIQSPGRNSISPGRNGISPPNKLSQLPRTSSPSTASTKSSGSGKMSYTSPGRQMSQQNLTKQTGLSKNASSIPRSESASKGLNQMNNGNGANKKVELSRMSSTKSSGSESDRSERPVLVRQSTFIKEAPSPTLRRKLEESASFESLSPSSRPASPTRSQAQTPVLSPSLPDMSLSTHSSVQAGGWRKLPPNLSPTIEYNDGRPAKRHDIARSHSESPSRLPINRSGTWKREHSKHSSSLPRVSTWRRTGSSSSILSASSESSEKAKSEDEKHVNSISGTKQSKENQVSAKGTWRKIKENEFSPTNSTSQTVSSGATNGAESKTLIYQMAPAVSKTEDVWVRIEDCPINNPRSGRSPTGNTPPVIDSVSEKANPNIKDSKDNQAKQNVGNGSVPMRTVGLENRLNSFIQVDAPDQKGTEIKPGQNNPVPVSETNESSIVERTPFSSSSSSKHSSPSGTVAARVTPFNYNPSPRKSSADSTSARPSQIPTPVNNNTKKRDSKTDSTESSGTQSPKRHSGSYLVTSV
+>NP_001119584 |0|NM_001126112|TP53|cellular tumor antigen p53 isoform a
+MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD
+>NP_004324 |0|NM_004333|BRAF|serine/threonine-protein kinase B-raf
+MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEHIEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTVTSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDSLKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRKTFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPIPQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQRDRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSPGPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDVAVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHHLHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATVKSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNINNRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARSLPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH
+>NP_203524 |0|NM_033360|KRAS|GTPase KRas isoform a precursor
+MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQRVEDAFYTLVREIRQYRLKKISKEEKTPGCVKIKKCIIM
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_rpkm1.fasta	Fri May 12 13:17:40 2017 -0400
@@ -0,0 +1,6 @@
+>NP_004439 |121102.4845|NM_004448|ERBB2|receptor tyrosine-protein kinase erbB-2 isoform a precursor
+MELAALCRWGLLLALLPPGAASTQVCTGTDMKLRLPASPETHLDMLRHLYQGCQVVQGNLELTYLPTNASLSFLQDIQEVQGYVLIAHNQVRQVPLQRLRIVRGTQLFEDNYALAVLDNGDPLNNTTPVTGASPGGLRELQLRSLTEILKGGVLIQRNPQLCYQDTILWKDIFHKNNQLALTLIDTNRSRACHPCSPMCKGSRCWGESSEDCQSLTRTVCAGGCARCKGPLPTDCCHEQCAAGCTGPKHSDCLACLHFNHSGICELHCPALVTYNTDTFESMPNPEGRYTFGASCVTACPYNYLSTDVGSCTLVCPLHNQEVTAEDGTQRCEKCSKPCARVCYGLGMEHLREVRAVTSANIQEFAGCKKIFGSLAFLPESFDGDPASNTAPLQPEQLQVFETLEEITGYLYISAWPDSLPDLSVFQNLQVIRGRILHNGAYSLTLQGLGISWLGLRSLRELGSGLALIHHNTHLCFVHTVPWDQLFRNPHQALLHTANRPEDECVGEGLACHQLCARGHCWGPGPTQCVNCSQFLRGQECVEECRVLQGLPREYVNARHCLPCHPECQPQNGSVTCFGPEADQCVACAHYKDPPFCVARCPSGVKPDLSYMPIWKFPDEEGACQPCPINCTHSCVDLDDKGCPAEQRASPLTSIISAVVGILLVVVLGVVFGILIKRRQQKIRKYTMRRLLQETELVEPLTPSGAMPNQAQMRILKETELRKVKVLGSGAFGTVYKGIWIPDGENVKIPVAIKVLRENTSPKANKEILDEAYVMAGVGSPYVSRLLGICLTSTVQLVTQLMPYGCLLDHVRENRGRLGSQDLLNWCMQIAKGMSYLEDVRLVHRDLAARNVLVKSPNHVKITDFGLARLLDIDETEYHADGGKVPIKWMALESILRRRFTHQSDVWSYGVTVWELMTFGAKPYDGIPAREIPDLLEKGERLPQPPICTIDVYMIMVKCWMIDSECRPRFRELVSEFSRMARDPQRFVVIQNEDLGPASPLDSTFYRSLLEDDDMGDLVDAEEYLVPQQGFFCPDPAPGAGGMVHHRHRSSSTRSGGGDLTLGLEPSEEEAPRSPLAPSEGAGSDVFDGDLGMGAAKGLQSLPTHDPSPLQRYSEDPTVPLPSETDGYVAPLTCSPQPEYVNQPDVRPQPPSPREGPLPAARPAGATLERPKTLSPGKNGVVKDVFAFGGAVENPEYLTPQGGAAPQPHPPPAFSPAFDNLYYWDQDPPERGAPPSTFKGTPTAENPEYLGLDVPV
+>NP_000170 |18647.7757|NM_000179|MSH6|DNA mismatch repair protein Msh6
+MSRQSTLYSFFPKSPALSDANKASARASREGGRAAAAPGASPSPGGDAAWSEAGPGPRPLARSASPPKAKNLNGGLRRSVAPAAPTSCDFSPGDLVWAKMEGYPWWPCLVYNHPFDGTFIREKGKSVRVHVQFFDDSPTRGWVSKRLLKPYTGSKSKEAQKGGHFYSAKPEILRAMQRADEALNKDKIKRLELAVCDEPSEPEEEEEMEVGTTYVTDKSEEDNEIESEEEVQPKTQGSRRSSRQIKKRRVISDSESDIGGSDVEFKPDTKEEGSSDEISSGVGDSESEGLNSPVKVARKRKRMVTGNGSLKRKSSRKETPSATKQATSISSETKNTLRAFSAPQNSESQAHVSGGGDDSSRPTVWYHETLEWLKEEKRRDEHRRRPDHPDFDASTLYVPEDFLNSCTPGMRKWWQIKSQNFDLVICYKVGKFYELYHMDALIGVSELGLVFMKGNWAHSGFPEIAFGRYSDSLVQKGYKVARVEQTETPEMMEARCRKMAHISKYDRVVRREICRIITKGTQTYSVLEGDPSENYSKYLLSLKEKEEDSSGHTRAYGVCFVDTSLGKFFIGQFSDDRHCSRFRTLVAHYPPVQVLFEKGNLSKETKTILKSSLSCSLQEGLIPGSQFWDASKTLRTLLEEEYFREKLSDGIGVMLPQVLKGMTSESDSIGLTPGEKSELALSALGGCVFYLKKCLIDQELLSMANFEEYIPLDSDTVSTTRSGAIFTKAYQRMVLDAVTLNNLEIFLNGTNGSTEGTLLERVDTCHTPFGKRLLKQWLCAPLCNHYAINDRLDAIEDLMVVPDKISEVVELLKKLPDLERLLSKIHNVGSPLKSQNHPDSRAIMYEETTYSKKKIIDFLSALEGFKVMCKIIGIMEEVADGFKSKILKQVISLQTKNPEGRFPDLTVELNRWDTAFDHEKARKTGLITPKAGFDSDYDQALADIRENEQSLLEYLEKQRNRIGCRTIVYWGIGRNRYQLEIPENFTTRNLPEEYELKSTKKGCKRYWTKTIEKKLANLINAEERRDVSLKDCMRRLFYNFDKNYKDWQSAVECIAVLDVLLCLANYSRGGDGPMCRPVILLPEDTPPFLELKGSRHPCITKTFFGDDFIPNDILIGCEEEEQENGKAYCVLVTGPNMGGKSTLMRQAGLLAVMAQMGCYVPAEVCRLTPIDRVFTRLGASDRIMSGESTFFVELSETASILMHATAHSLVLVDELGRGTATFDGTAIANAVVKELAETIKCRTLFSTHYHSLVEDYSQNVAVRLGHMACMVENECEDPSQETITFLYKFIKGACPKSYGFNAARLANLPEEVIQKGHRKAREFEKMNQSLRLFREVCLASERSTVDAEAVHKLLTLIKEL
+>NP_001120983 |665.1509|NM_001127511|APC|adenomatous polyposis coli protein isoform a
+MYASLGSGPVAPLPASVPPSVLGSWSTGGSRSCVRQETKSPGGARTSGHWASVWQEVLKQLQGSIEDEAMASSGQIDLLERLKELNLDSSNFPGVKLRSKMSLRSYGSREGSVSSRSGECSPVPMGSFPRRGFVNGSRESTGYLEELEKERSLLLADLDKEEKEKDWYYAQLQNLTKRIDSLPLTENFSLQTDMTRRQLEYEARQIRVAMEEQLGTCQDMEKRAQRSSQNKHETGSHDAERQNEGQGVGEINMATSGNGQGSTTRMDHETASVLSSSSTHSAPRRLTSHLGTKVEMVYSLLSMLGTHDKDDMSRTLLAMSSSQDSCISMRQSGCLPLLIQLLHGNDKDSVLLGNSRGSKEARARASAALHNIIHSQPDDKRGRREIRVLHLLEQIRAYCETCWEWQEAHEPGMDQDKNPMPAPVEHQICPAVCVLMKLSFDEEHRHAMNELGGLQAIAELLQVDCEMYGLTNDHYSITLRRYAGMALTNLTFGDVANKATLCSMKGCMRALVAQLKSESEDLQQVIASVLRNLSWRADVNSKKTLREVGSVKALMECALEVKKESTLKSVLSALWNLSAHCTENKADICAVDGALAFLVGTLTYRSQTNTLAIIESGGGILRNVSSLIATNEDHRQILRENNCLQTLLQHLKSHSLTIVSNACGTLWNLSARNPKDQEALWDMGAVSMLKNLIHSKHKMIAMGSAAALRNLMANRPAKYKDANIMSPGSSLPSLHVRKQKALEAELDAQHLSETFDNIDNLSPKASHRSKQRHKQSLYGDYVFDTNRHDDNRSDNFNTGNMTVLSPYLNTTVLPSSSSSRGSLDSSRSEKDRSLERERGIGLGNYHPATENPGTSSKRGLQISTTAAQIAKVMEEVSAIHTSQEDRSSGSTTELHCVTDERNALRRSSAAHTHSNTYNFTKSENSNRTCSMPYAKLEYKRSSNDSLNSVSSSDGYGKRGQMKPSIESYSEDDESKFCSYGQYPADLAHKIHSANHMDDNDGELDTPINYSLKYSDEQLNSGRQSPSQNERWARPKHIIEDEIKQSEQRQSRNQSTTYPVYTESTDDKHLKFQPHFGQQECVSPYRSRGANGSETNRVGSNHGINQNVSQSLCQEDDYEDDKPTNYSERYSEEEQHEEEERPTNYSIKYNEEKRHVDQPIDYSLKYATDIPSSQKQSFSFSKSSSGQSSKTEHMSSSSENTSTPSSNAKRQNQLHPSSAQSRSGQPQKAATCKVSSINQETIQTYCVEDTPICFSRCSSLSSLSSAEDEIGCNQTTQEADSANTLQIAEIKEKIGTRSAEDPVSEVPAVSQHPRTKSSRLQGSSLSSESARHKAVEFSSGAKSPSKSGAQTPKSPPEHYVQETPLMFSRCTSVSSLDSFESRSIASSVQSEPCSGMVSGIISPSDLPDSPGQTMPPSRSKTPPPPPQTAQTKREVPKNKAPTAEKRESGPKQAAVNAAVQRVQVLPDADTLLHFATESTPDGFSCSSSLSALSLDEPFIQKDVELRIMPPVQENDNGNETESEQPKESNENQEKEAEKTIDSEKDLLDDSDDDDIEILEECIISAMPTKSSRKAKKPAQTASKLPPPVARKPSQLPVYKLLPSQNRLQPQKHVSFTPGDDMPRVYCVEGTPINFSTATSLSDLTIESPPNELAAGEGVRGGAQSGEFEKRDTIPTEGRSTDEAQGGKTSSVTIPELDDNKAEEGDILAECINSAMPKGKSHKPFRVKKIMDQVQQASASSSAPNKNQLDGKKKKPTSPVKPIPQNTEYRTRVRKNADSKNNLNAERVFSDNKDSKKQNLKNNSKVFNDKLPNNEDRVRGSFAFDSPHHYTPIEGTPYCFSRNDSLSSLDFDDDDVDLSREKAELRKAKENKESEAKVTSHTELTSNQQSANKTQAIAKQPINRGQPKPILQKQSTFPQSSKDIPDRGAATDEKLQNFAIENTPVCFSHNSSLSSLSDIDQENNNKENEPIKETEPPDSQGEPSKPQASGYAPKSFHVEDTPVCFSRNSSLSSLSIDSEDDLLQECISSAMPKKKKPSRLKGDNEKHSPRNMGGILGEDLTLDLKDIQRPDSEHGLSPDSENFDWKAIQEGANSIVSSLHQAAAAACLSRQASSDSDSILSLKSGISLGSPFHLTPDQEEKPFTSNKGPRILKPGEKSTLETKKIESESKGIKGGKKVYKSLITGKVRSNSEISGQMKQPLQANMPSISRGRTMIHIPGVRNSSSSTSPVSKKGPPLKTPASKSPSEGQTATTSPRGAKPSVKSELSPVARQTSQIGGSSKAPSRSGSRDSTPSRPAQQPLSRPIQSPGRNSISPGRNGISPPNKLSQLPRTSSPSTASTKSSGSGKMSYTSPGRQMSQQNLTKQTGLSKNASSIPRSESASKGLNQMNNGNGANKKVELSRMSSTKSSGSESDRSERPVLVRQSTFIKEAPSPTLRRKLEESASFESLSPSSRPASPTRSQAQTPVLSPSLPDMSLSTHSSVQAGGWRKLPPNLSPTIEYNDGRPAKRHDIARSHSESPSRLPINRSGTWKREHSKHSSSLPRVSTWRRTGSSSSILSASSESSEKAKSEDEKHVNSISGTKQSKENQVSAKGTWRKIKENEFSPTNSTSQTVSSGATNGAESKTLIYQMAPAVSKTEDVWVRIEDCPINNPRSGRSPTGNTPPVIDSVSEKANPNIKDSKDNQAKQNVGNGSVPMRTVGLENRLNSFIQVDAPDQKGTEIKPGQNNPVPVSETNESSIVERTPFSSSSSSKHSSPSGTVAARVTPFNYNPSPRKSSADSTSARPSQIPTPVNNNTKKRDSKTDSTESSGTQSPKRHSGSYLVTSV
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_rpkm1000.fasta	Fri May 12 13:17:40 2017 -0400
@@ -0,0 +1,4 @@
+>NP_004439 |121102.48|NM_004448|ERBB2|receptor tyrosine-protein kinase erbB-2 isoform a precursor
+MELAALCRWGLLLALLPPGAASTQVCTGTDMKLRLPASPETHLDMLRHLYQGCQVVQGNLELTYLPTNASLSFLQDIQEVQGYVLIAHNQVRQVPLQRLRIVRGTQLFEDNYALAVLDNGDPLNNTTPVTGASPGGLRELQLRSLTEILKGGVLIQRNPQLCYQDTILWKDIFHKNNQLALTLIDTNRSRACHPCSPMCKGSRCWGESSEDCQSLTRTVCAGGCARCKGPLPTDCCHEQCAAGCTGPKHSDCLACLHFNHSGICELHCPALVTYNTDTFESMPNPEGRYTFGASCVTACPYNYLSTDVGSCTLVCPLHNQEVTAEDGTQRCEKCSKPCARVCYGLGMEHLREVRAVTSANIQEFAGCKKIFGSLAFLPESFDGDPASNTAPLQPEQLQVFETLEEITGYLYISAWPDSLPDLSVFQNLQVIRGRILHNGAYSLTLQGLGISWLGLRSLRELGSGLALIHHNTHLCFVHTVPWDQLFRNPHQALLHTANRPEDECVGEGLACHQLCARGHCWGPGPTQCVNCSQFLRGQECVEECRVLQGLPREYVNARHCLPCHPECQPQNGSVTCFGPEADQCVACAHYKDPPFCVARCPSGVKPDLSYMPIWKFPDEEGACQPCPINCTHSCVDLDDKGCPAEQRASPLTSIISAVVGILLVVVLGVVFGILIKRRQQKIRKYTMRRLLQETELVEPLTPSGAMPNQAQMRILKETELRKVKVLGSGAFGTVYKGIWIPDGENVKIPVAIKVLRENTSPKANKEILDEAYVMAGVGSPYVSRLLGICLTSTVQLVTQLMPYGCLLDHVRENRGRLGSQDLLNWCMQIAKGMSYLEDVRLVHRDLAARNVLVKSPNHVKITDFGLARLLDIDETEYHADGGKVPIKWMALESILRRRFTHQSDVWSYGVTVWELMTFGAKPYDGIPAREIPDLLEKGERLPQPPICTIDVYMIMVKCWMIDSECRPRFRELVSEFSRMARDPQRFVVIQNEDLGPASPLDSTFYRSLLEDDDMGDLVDAEEYLVPQQGFFCPDPAPGAGGMVHHRHRSSSTRSGGGDLTLGLEPSEEEAPRSPLAPSEGAGSDVFDGDLGMGAAKGLQSLPTHDPSPLQRYSEDPTVPLPSETDGYVAPLTCSPQPEYVNQPDVRPQPPSPREGPLPAARPAGATLERPKTLSPGKNGVVKDVFAFGGAVENPEYLTPQGGAAPQPHPPPAFSPAFDNLYYWDQDPPERGAPPSTFKGTPTAENPEYLGLDVPV
+>NP_000170 |18647.78|NM_000179|MSH6|DNA mismatch repair protein Msh6
+MSRQSTLYSFFPKSPALSDANKASARASREGGRAAAAPGASPSPGGDAAWSEAGPGPRPLARSASPPKAKNLNGGLRRSVAPAAPTSCDFSPGDLVWAKMEGYPWWPCLVYNHPFDGTFIREKGKSVRVHVQFFDDSPTRGWVSKRLLKPYTGSKSKEAQKGGHFYSAKPEILRAMQRADEALNKDKIKRLELAVCDEPSEPEEEEEMEVGTTYVTDKSEEDNEIESEEEVQPKTQGSRRSSRQIKKRRVISDSESDIGGSDVEFKPDTKEEGSSDEISSGVGDSESEGLNSPVKVARKRKRMVTGNGSLKRKSSRKETPSATKQATSISSETKNTLRAFSAPQNSESQAHVSGGGDDSSRPTVWYHETLEWLKEEKRRDEHRRRPDHPDFDASTLYVPEDFLNSCTPGMRKWWQIKSQNFDLVICYKVGKFYELYHMDALIGVSELGLVFMKGNWAHSGFPEIAFGRYSDSLVQKGYKVARVEQTETPEMMEARCRKMAHISKYDRVVRREICRIITKGTQTYSVLEGDPSENYSKYLLSLKEKEEDSSGHTRAYGVCFVDTSLGKFFIGQFSDDRHCSRFRTLVAHYPPVQVLFEKGNLSKETKTILKSSLSCSLQEGLIPGSQFWDASKTLRTLLEEEYFREKLSDGIGVMLPQVLKGMTSESDSIGLTPGEKSELALSALGGCVFYLKKCLIDQELLSMANFEEYIPLDSDTVSTTRSGAIFTKAYQRMVLDAVTLNNLEIFLNGTNGSTEGTLLERVDTCHTPFGKRLLKQWLCAPLCNHYAINDRLDAIEDLMVVPDKISEVVELLKKLPDLERLLSKIHNVGSPLKSQNHPDSRAIMYEETTYSKKKIIDFLSALEGFKVMCKIIGIMEEVADGFKSKILKQVISLQTKNPEGRFPDLTVELNRWDTAFDHEKARKTGLITPKAGFDSDYDQALADIRENEQSLLEYLEKQRNRIGCRTIVYWGIGRNRYQLEIPENFTTRNLPEEYELKSTKKGCKRYWTKTIEKKLANLINAEERRDVSLKDCMRRLFYNFDKNYKDWQSAVECIAVLDVLLCLANYSRGGDGPMCRPVILLPEDTPPFLELKGSRHPCITKTFFGDDFIPNDILIGCEEEEQENGKAYCVLVTGPNMGGKSTLMRQAGLLAVMAQMGCYVPAEVCRLTPIDRVFTRLGASDRIMSGESTFFVELSETASILMHATAHSLVLVDELGRGTATFDGTAIANAVVKELAETIKCRTLFSTHYHSLVEDYSQNVAVRLGHMACMVENECEDPSQETITFLYKFIKGACPKSYGFNAARLANLPEEVIQKGHRKAREFEKMNQSLRLFREVCLASERSTVDAEAVHKLLTLIKEL
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_snv.fasta	Fri May 12 13:17:40 2017 -0400
@@ -0,0 +1,4 @@
+>NP_000170_T139I,E956D |18647.7757|NM_000179|MSH6|DNA mismatch repair protein Msh6
+MSRQSTLYSFFPKSPALSDANKASARASREGGRAAAAPGASPSPGGDAAWSEAGPGPRPLARSASPPKAKNLNGGLRRSVAPAAPTSCDFSPGDLVWAKMEGYPWWPCLVYNHPFDGTFIREKGKSVRVHVQFFDDSPIRGWVSKRLLKPYTGSKSKEAQKGGHFYSAKPEILRAMQRADEALNKDKIKRLELAVCDEPSEPEEEEEMEVGTTYVTDKSEEDNEIESEEEVQPKTQGSRRSSRQIKKRRVISDSESDIGGSDVEFKPDTKEEGSSDEISSGVGDSESEGLNSPVKVARKRKRMVTGNGSLKRKSSRKETPSATKQATSISSETKNTLRAFSAPQNSESQAHVSGGGDDSSRPTVWYHETLEWLKEEKRRDEHRRRPDHPDFDASTLYVPEDFLNSCTPGMRKWWQIKSQNFDLVICYKVGKFYELYHMDALIGVSELGLVFMKGNWAHSGFPEIAFGRYSDSLVQKGYKVARVEQTETPEMMEARCRKMAHISKYDRVVRREICRIITKGTQTYSVLEGDPSENYSKYLLSLKEKEEDSSGHTRAYGVCFVDTSLGKFFIGQFSDDRHCSRFRTLVAHYPPVQVLFEKGNLSKETKTILKSSLSCSLQEGLIPGSQFWDASKTLRTLLEEEYFREKLSDGIGVMLPQVLKGMTSESDSIGLTPGEKSELALSALGGCVFYLKKCLIDQELLSMANFEEYIPLDSDTVSTTRSGAIFTKAYQRMVLDAVTLNNLEIFLNGTNGSTEGTLLERVDTCHTPFGKRLLKQWLCAPLCNHYAINDRLDAIEDLMVVPDKISEVVELLKKLPDLERLLSKIHNVGSPLKSQNHPDSRAIMYEETTYSKKKIIDFLSALEGFKVMCKIIGIMEEVADGFKSKILKQVISLQTKNPEGRFPDLTVELNRWDTAFDHEKARKTGLITPKAGFDSDYDQALADIRENEQSLLEYLDKQRNRIGCRTIVYWGIGRNRYQLEIPENFTTRNLPEEYELKSTKKGCKRYWTKTIEKKLANLINAEERRDVSLKDCMRRLFYNFDKNYKDWQSAVECIAVLDVLLCLANYSRGGDGPMCRPVILLPEDTPPFLELKGSRHPCITKTFFGDDFIPNDILIGCEEEEQENGKAYCVLVTGPNMGGKSTLMRQAGLLAVMAQMGCYVPAEVCRLTPIDRVFTRLGASDRIMSGESTFFVELSETASILMHATAHSLVLVDELGRGTATFDGTAIANAVVKELAETIKCRTLFSTHYHSLVEDYSQNVAVRLGHMACMVENECEDPSQETITFLYKFIKGACPKSYGFNAARLANLPEEVIQKGHRKAREFEKMNQSLRLFREVCLASERSTVDAEAVHKLLTLIKEL
+>NP_001120983_R1432*,V1804D |665.1509|NM_001127511|APC|adenomatous polyposis coli protein isoform a
+MYASLGSGPVAPLPASVPPSVLGSWSTGGSRSCVRQETKSPGGARTSGHWASVWQEVLKQLQGSIEDEAMASSGQIDLLERLKELNLDSSNFPGVKLRSKMSLRSYGSREGSVSSRSGECSPVPMGSFPRRGFVNGSRESTGYLEELEKERSLLLADLDKEEKEKDWYYAQLQNLTKRIDSLPLTENFSLQTDMTRRQLEYEARQIRVAMEEQLGTCQDMEKRAQRSSQNKHETGSHDAERQNEGQGVGEINMATSGNGQGSTTRMDHETASVLSSSSTHSAPRRLTSHLGTKVEMVYSLLSMLGTHDKDDMSRTLLAMSSSQDSCISMRQSGCLPLLIQLLHGNDKDSVLLGNSRGSKEARARASAALHNIIHSQPDDKRGRREIRVLHLLEQIRAYCETCWEWQEAHEPGMDQDKNPMPAPVEHQICPAVCVLMKLSFDEEHRHAMNELGGLQAIAELLQVDCEMYGLTNDHYSITLRRYAGMALTNLTFGDVANKATLCSMKGCMRALVAQLKSESEDLQQVIASVLRNLSWRADVNSKKTLREVGSVKALMECALEVKKESTLKSVLSALWNLSAHCTENKADICAVDGALAFLVGTLTYRSQTNTLAIIESGGGILRNVSSLIATNEDHRQILRENNCLQTLLQHLKSHSLTIVSNACGTLWNLSARNPKDQEALWDMGAVSMLKNLIHSKHKMIAMGSAAALRNLMANRPAKYKDANIMSPGSSLPSLHVRKQKALEAELDAQHLSETFDNIDNLSPKASHRSKQRHKQSLYGDYVFDTNRHDDNRSDNFNTGNMTVLSPYLNTTVLPSSSSSRGSLDSSRSEKDRSLERERGIGLGNYHPATENPGTSSKRGLQISTTAAQIAKVMEEVSAIHTSQEDRSSGSTTELHCVTDERNALRRSSAAHTHSNTYNFTKSENSNRTCSMPYAKLEYKRSSNDSLNSVSSSDGYGKRGQMKPSIESYSEDDESKFCSYGQYPADLAHKIHSANHMDDNDGELDTPINYSLKYSDEQLNSGRQSPSQNERWARPKHIIEDEIKQSEQRQSRNQSTTYPVYTESTDDKHLKFQPHFGQQECVSPYRSRGANGSETNRVGSNHGINQNVSQSLCQEDDYEDDKPTNYSERYSEEEQHEEEERPTNYSIKYNEEKRHVDQPIDYSLKYATDIPSSQKQSFSFSKSSSGQSSKTEHMSSSSENTSTPSSNAKRQNQLHPSSAQSRSGQPQKAATCKVSSINQETIQTYCVEDTPICFSRCSSLSSLSSAEDEIGCNQTTQEADSANTLQIAEIKEKIGTRSAEDPVSEVPAVSQHPRTKSSRLQGSSLSSESARHKAVEFSSGAKSPSKSGAQTPKSPPEHYVQETPLMFSRCTSVSSLDSFESRSIASSVQSEPCSGMVSGIISPSDLPDSPGQTMPPSRSKTPPPPPQTAQTK
Binary file test-data/hg19_variant_annotation.rdata has changed
Binary file test-data/hg19_variant_annotation.sqlite has changed
Binary file test-data/ids.RData has changed
Binary file test-data/procodingseq.RData has changed
Binary file test-data/proseq.RData has changed
--- a/test-data/test1.vcf	Tue Mar 14 14:14:38 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-##fileformat=VCFv4.1
-##samtoolsVersion=0.1.17 (r973:277)
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
-##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
-##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">
-##INFO=<ID=FQ,Number=1,Type=Float,Description="Phred probability of all samples being the same">
-##INFO=<ID=AF1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele frequency (assuming HWE)">
-##INFO=<ID=AC1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele count (no HWE assumption)">
-##INFO=<ID=G3,Number=3,Type=Float,Description="ML estimate of genotype frequencies">
-##INFO=<ID=HWE,Number=1,Type=Float,Description="Chi^2 based HWE test P-value based on G3">
-##INFO=<ID=CLR,Number=1,Type=Integer,Description="Log ratio of genotype likelihoods with and without the constraint">
-##INFO=<ID=UGT,Number=1,Type=String,Description="The most probable unconstrained genotype configuration in the trio">
-##INFO=<ID=CGT,Number=1,Type=String,Description="The most probable constrained genotype configuration in the trio">
-##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">
-##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
-##INFO=<ID=PC2,Number=2,Type=Integer,Description="Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2.">
-##INFO=<ID=PCHI2,Number=1,Type=Float,Description="Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples.">
-##INFO=<ID=QCHI2,Number=1,Type=Integer,Description="Phred scaled PCHI2.">
-##INFO=<ID=PR,Number=1,Type=Integer,Description="# permutations yielding a smaller PCHI2.">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods for RR,RA,AA genotypes (R=ref,A=alt)">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">
-##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value">
-##FORMAT=<ID=PL,Number=-1,Type=Integer,Description="List of Phred-scaled genotype likelihoods, number of values is (#ALT+1)*(#ALT+2)/2">
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	test
-chr1	32386425	.	T	C	24	.	DP=3;AF1=1;AC1=2;DP4=0,0,0,3;MQ=50;FQ=-36	GT:PL:GQ	1/1:56,9,0:15
-chr1	32507666	.	G	T	6.2	.	DP=5;AF1=0.4999;AC1=1;DP4=3,0,2,0;MQ=50;FQ=8.65;PV4=1,0.062,1,0.36	GT:PL:GQ	0/1:35,0,78:36
-chr1	32524459	.	A	C	3.54	.	DP=5;AF1=0.4998;AC1=1;DP4=1,2,0,2;MQ=50;FQ=5.47;PV4=1,0.0021,1,1	GT:PL:GQ	0/1:31,0,98:30
-chr1	32622505	.	G	A	101	.	DP=18;AF1=0.5;AC1=1;DP4=10,0,5,2;MQ=50;FQ=104;PV4=0.15,0.0055,1,0.0075	GT:PL:GQ	0/1:131,0,162:99
-chr12	25357574	.	CAA	C	109	.	INDEL;DP=5;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-46.5	GT:PL:GQ	1/1:149,12,0:21
-chr12	25357628	.	TA	T	53.4	.	INDEL;DP=3;AF1=1;AC1=2;DP4=0,0,3,0;MQ=50;FQ=-43.5	GT:PL:GQ	1/1:93,9,0:16
-chr12	25358650	.	A	T	73	.	DP=38;AF1=1;AC1=2;DP4=0,0,15,0;MQ=50;FQ=-72	GT:PL:GQ	1/1:106,45,0:87
-chr12	25358662	.	CTTTTTTTT	CTTTTTT,CTTTTTTT	31.6	.	INDEL;DP=38;AF1=1;AC1=2;DP4=0,0,15,0;MQ=50;FQ=-52.5	GT:PL:GQ	1/1:96,42,24,91,0,64:33
-chr12	25358943	.	T	C	67.1	.	DP=7;AF1=1;AC1=2;DP4=0,0,7,0;MQ=50;FQ=-48	GT:PL:GQ	1/1:100,21,0:39
-chr12	25358969	.	T	G	36.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-39	GT:PL:GQ	1/1:69,12,0:21
-chr12	25359352	.	G	A	172	.	DP=16;AF1=1;AC1=2;DP4=0,0,14,0;MQ=50;FQ=-69	GT:PL:GQ	1/1:205,42,0:81
-chr12	25359464	.	TAA	TA	123	.	INDEL;DP=26;AF1=1;AC1=2;DP4=0,0,26,0;MQ=50;FQ=-113	GT:PL:GQ	1/1:164,78,0:99
-chr12	25360138	.	T	C	169	.	DP=21;AF1=1;AC1=2;DP4=0,0,19,0;MQ=50;FQ=-84	GT:PL:GQ	1/1:202,57,0:99
-chr12	25361091	.	T	C	93	.	DP=15;AF1=1;AC1=2;DP4=0,0,14,0;MQ=50;FQ=-69	GT:PL:GQ	1/1:126,42,0:81
-chr12	25362217	.	A	G	179	.	DP=20;AF1=1;AC1=2;DP4=0,0,20,0;MQ=50;FQ=-87	GT:PL:GQ	1/1:212,60,0:99
-chr12	25362465	.	G	A	38.3	.	DP=18;AF1=1;AC1=2;DP4=0,0,5,0;MQ=50;FQ=-42	GT:PL:GQ	1/1:71,15,0:27
-chr12	25362552	.	A	C	156	.	DP=10;AF1=1;AC1=2;DP4=0,0,10,0;MQ=50;FQ=-57	GT:PL:GQ	1/1:189,30,0:57
-chr12	25364387	.	T	C	21	.	DP=7;AF1=0.5;AC1=1;DP4=3,1,0,2;MQ=50;FQ=24;PV4=0.4,1,1,1	GT:PL:GQ	0/1:51,0,112:54
-chr12	25368462	.	C	T	112	.	DP=7;AF1=1;AC1=2;DP4=0,0,7,0;MQ=50;FQ=-48	GT:PL:GQ	1/1:145,21,0:39
-chr17	37866082	.	G	A	141	.	DP=45;AF1=0.5;AC1=1;DP4=25,0,20,0;MQ=50;FQ=144;PV4=1,1,1,1	GT:PL:GQ	0/1:171,0,180:99
-chr17	37870047	.	A	G	30	.	DP=3;AF1=0.5008;AC1=1;DP4=0,1,2,0;MQ=50;FQ=-4.12;PV4=0.33,1,1,1	GT:PL:GQ	0/1:60,0,25:28
-chr17	37879466	.	G	A	7.8	.	DP=3;AF1=0.5001;AC1=1;DP4=1,0,1,1;MQ=50;FQ=4.79;PV4=1,0.064,1,1	GT:PL:GQ	0/1:37,0,31:34
-chr17	37885332	.	G	A	83.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,0,4;MQ=50;FQ=-39	GT:PL:GQ	1/1:116,12,0:21
-chr17	37898543	.	T	C	165	.	DP=26;AF1=1;AC1=2;DP4=0,0,21,0;MQ=50;FQ=-90	GT:PL:GQ	1/1:198,63,0:99
-chr17	7530271	.	C	T	143	.	DP=71;AF1=0.5;AC1=1;DP4=32,2,34,0;MQ=50;FQ=146;PV4=0.49,4.1e-06,1,1	GT:PL:GQ	0/1:173,0,238:99
-chr17	7572657	.	G	T	225	.	DP=122;AF1=0.5;AC1=1;DP4=59,0,58,3;MQ=50;FQ=163;PV4=0.24,0.06,1,0.27	GT:PL:GQ	0/1:255,0,190:99
-chr17	7591866	.	G	T	45	.	DP=14;AF1=0.5;AC1=1;DP4=10,0,4,0;MQ=50;FQ=48;PV4=1,0.062,1,0.01	GT:PL:GQ	0/1:75,0,162:78
-chr17	7606153	.	C	T	74	.	DP=16;AF1=0.5;AC1=1;DP4=10,0,5,0;MQ=50;FQ=77;PV4=1,0.12,1,1	GT:PL:GQ	0/1:104,0,165:99
-chr2	48010558	.	C	A	12.3	.	DP=7;AF1=0.5002;AC1=1;DP4=1,0,2,0;MQ=50;FQ=5.23;PV4=1,0.065,1,1	GT:PL:GQ	0/1:42,0,31:34
-chr2	48016554	.	T	C	32	.	DP=4;AF1=0.5;AC1=1;DP4=1,1,2,0;MQ=50;FQ=20.9;PV4=1,1,1,0.21	GT:PL:GQ	0/1:62,0,48:51
-chr2	48018081	.	A	G	77	.	DP=6;AF1=0.501;AC1=1;DP4=1,0,4,0;MQ=50;FQ=-4.75;PV4=1,1,1,0.34	GT:PL:GQ	0/1:107,0,24:27
-chr2	48018221	.	C	T	22	.	DP=7;AF1=0.5;AC1=1;DP4=5,0,2,0;MQ=50;FQ=25;PV4=1,1,1,1	GT:PL:GQ	0/1:52,0,116:55
-chr2	48027990	.	G	T	122	.	DP=17;AF1=0.5;AC1=1;DP4=7,0,9,0;MQ=50;FQ=104;PV4=1,1,1,0.039	GT:PL:GQ	0/1:152,0,131:99
-chr2	48030458	.	G	C	105	.	DP=4;AF1=1;AC1=2;DP4=0,0,3,1;MQ=50;FQ=-39	GT:PL:GQ	1/1:137,12,0:21
-chr5	112154737	.	CT	C	29	.	INDEL;DP=5;AF1=0.5;AC1=1;DP4=3,0,2,0;MQ=50;FQ=32;PV4=1,1,1,0.0012	GT:PL:GQ	0/1:59,0,85:62
-chr5	112162854	.	T	C	60	.	DP=3;AF1=1;AC1=2;DP4=0,0,3,0;MQ=50;FQ=-36	GT:PL:GQ	1/1:92,9,0:16
-chr5	112164561	.	G	A	87.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-39	GT:PL:GQ	1/1:120,12,0:21
-chr5	112175639	.	C	T	31	.	DP=4;AF1=0.5;AC1=1;DP4=2,0,2,0;MQ=50;FQ=31.5;PV4=1,0.21,1,1	GT:PL:GQ	0/1:61,0,62:61
-chr5	112175897	.	GAA	GA	7.8	.	INDEL;DP=7;AF1=0.5;AC1=1;DP4=4,0,3,0;MQ=50;FQ=10.4;PV4=1,0.0018,1,0.33	GT:PL:GQ	0/1:37,0,97:39
-chr5	112176559	.	T	G	72	.	DP=11;AF1=1;AC1=2;DP4=0,0,8,0;MQ=50;FQ=-51	GT:PL:GQ	1/1:105,24,0:45
-chr5	112176756	.	T	A	143	.	DP=10;AF1=1;AC1=2;DP4=0,0,9,0;MQ=50;FQ=-54	GT:PL:GQ	1/1:176,27,0:51
-chr5	112180015	.	C	A	123	.	DP=11;AF1=0.5;AC1=1;DP4=3,0,8,0;MQ=50;FQ=40;PV4=1,1,1,1	GT:PL:GQ	0/1:153,0,67:70
-chr5	112204170	.	G	A	112	.	DP=5;AF1=1;AC1=2;DP4=0,0,2,2;MQ=50;FQ=-39	GT:PL:GQ	1/1:144,12,0:21
-chr7	140043303	.	C	T	88	.	DP=18;AF1=0.5;AC1=1;DP4=11,0,7,0;MQ=50;FQ=91;PV4=1,0.00034,1,0.42	GT:PL:GQ	0/1:118,0,167:99
-chr7	140065806	.	T	C	38.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-39	GT:PL:GQ	1/1:71,12,0:21
-chr7	140065845	.	C	A	44.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-39	GT:PL:GQ	1/1:77,12,0:21
-chr7	140152904	.	CAAAA	CAAAAA	41.5	.	INDEL;DP=42;AF1=0.5;AC1=1;DP4=23,0,16,0;MQ=50;FQ=44.2;PV4=1,1,1,1	GT:PL:GQ	0/1:79,0,91:82
-chr7	140153495	.	G	T	140	.	DP=52;AF1=0.5;AC1=1;DP4=28,0,22,0;MQ=50;FQ=143;PV4=1,0.082,1,0.00038	GT:PL:GQ	0/1:170,0,184:99
-chr7	140158851	.	C	G	153	.	DP=36;AF1=1;AC1=2;DP4=1,0,33,0;MQ=50;FQ=-102;PV4=1,1,1,1	GT:PL:GQ	1/1:186,75,0:99
-chr7	140244560	.	C	T	78	.	DP=6;AF1=0.5013;AC1=1;DP4=1,0,4,0;MQ=50;FQ=-5.45;PV4=1,1,1,1	GT:PL:GQ	0/1:108,0,23:26
-chr7	140406430	.	T	A	8.64	.	DP=27;AF1=0.5;AC1=1;DP4=19,0,6,0;MQ=50;FQ=11.3;PV4=1,1,1,0.00021	GT:PL:GQ	0/1:38,0,167:40
-chr7	140406436	.	A	G	4.77	.	DP=14;AF1=0.4999;AC1=1;DP4=12,0,2,0;MQ=50;FQ=6.99;PV4=1,1,1,0.08	GT:PL:GQ	0/1:33,0,170:33
-chr7	140424582	.	G	C	18.1	.	DP=5;AF1=0.5;AC1=1;DP4=0,2,1,2;MQ=50;FQ=20.4;PV4=1,0.00086,1,1	GT:PL:GQ	0/1:48,0,56:50
-chr7	140426098	.	G	A	10.2	.	DP=3;AF1=1;AC1=2;DP4=0,0,2,0;MQ=50;FQ=-33	GT:PL:GQ	1/1:41,6,0:8
-chr7	140702871	.	G	A	77.5	.	DP=4;AF1=1;AC1=2;DP4=0,0,4,0;MQ=50;FQ=-39	GT:PL:GQ	1/1:110,12,0:21
-chr7	140706061	.	G	T	119	.	DP=74;AF1=0.5;AC1=1;DP4=45,0,22,0;MQ=50;FQ=122;PV4=1,1,1,1	GT:PL:GQ	0/1:149,0,178:99
-chr7	140706157	.	G	T	42	.	DP=25;AF1=0.5;AC1=1;DP4=13,0,9,0;MQ=50;FQ=45;PV4=1,5.7e-11,1,0.013	GT:PL:GQ	0/1:72,0,170:75
-chr9	86583076	.	C	T	64	.	DP=17;AF1=0.5;AC1=1;DP4=5,0,11,0;MQ=50;FQ=66;PV4=1,1.4e-08,1,1	GT:PL:GQ	0/1:94,0,100:96
-chr9	86593314	.	G	C	186	.	DP=203;AF1=0.5;AC1=1;DP4=100,0,99,0;MQ=50;FQ=186;PV4=1,1,1,0.072	GT:PL:GQ	0/1:216,0,216:99
-chr9	86595070	.	C	T	140	.	DP=93;AF1=0.5;AC1=1;DP4=53,0,38,0;MQ=50;FQ=143;PV4=1,0.43,1,1	GT:PL:GQ	0/1:170,0,188:99
-chr9	86595498	.	G	A	66	.	DP=128;AF1=0.5;AC1=1;DP4=50,2,76,0;MQ=50;FQ=69;PV4=0.16,6e-81,1,1	GT:PL:GQ	0/1:96,0,225:99
Binary file test-data/test1_sort.bam has changed
Binary file test-data/test1_sort.bam.bai has changed
--- a/test-data/test_indel.fasta	Tue Mar 14 14:14:38 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
->NP_001120983_954:CT>C |665.1509|NM_001127511|APC|adenomatous polyposis coli protein isoform a|
-MYASLGSGPVAPLPASVPPSVLGSWSTGGSRSCVRQETKSPGGARTSGHWASVWQEVLKQLQGSIEDEAMASSGQIDLLERLKELNLDSSNFPGVKLRSKMSLRSYGSREGSVSSRSGECSPVPMGSFPRRGFVNGSRESTGYLEELEKERSLLLADLDKEEKEKDWYYAQLQNLTKRIDSLPLTENFSLQTDMTRRQLEYEARQIRVAMEEQLGTCQDMEKRAQRSSQNKHETGSHDAERQNEGQGVGEINMATSGNGQGSTTRMDHETASVLSSSSTHSAPRRLTSHLGTKVEMVYSLLSMLGTHDKDDMSRTLLACLAPKTAVYPCDSLDVFLSSSSFYMAMTKTLYCWEIPGAVKRLGPGPVQHSTTSFTHSLMTREAGVKSESFIFWNRYALTVKPVGSGRKLMNQAWTRTKIQCQLLLNIRSVLLCVF
->NP_001120983_4552:GAA>GA |665.1509|NM_001127511|APC|adenomatous polyposis coli protein isoform a|
-MYASLGSGPVAPLPASVPPSVLGSWSTGGSRSCVRQETKSPGGARTSGHWASVWQEVLKQLQGSIEDEAMASSGQIDLLERLKELNLDSSNFPGVKLRSKMSLRSYGSREGSVSSRSGECSPVPMGSFPRRGFVNGSRESTGYLEELEKERSLLLADLDKEEKEKDWYYAQLQNLTKRIDSLPLTENFSLQTDMTRRQLEYEARQIRVAMEEQLGTCQDMEKRAQRSSQNKHETGSHDAERQNEGQGVGEINMATSGNGQGSTTRMDHETASVLSSSSTHSAPRRLTSHLGTKVEMVYSLLSMLGTHDKDDMSRTLLAMSSSQDSCISMRQSGCLPLLIQLLHGNDKDSVLLGNSRGSKEARARASAALHNIIHSQPDDKRGRREIRVLHLLEQIRAYCETCWEWQEAHEPGMDQDKNPMPAPVEHQICPAVCVLMKLSFDEEHRHAMNELGGLQAIAELLQVDCEMYGLTNDHYSITLRRYAGMALTNLTFGDVANKATLCSMKGCMRALVAQLKSESEDLQQVIASVLRNLSWRADVNSKKTLREVGSVKALMECALEVKKESTLKSVLSALWNLSAHCTENKADICAVDGALAFLVGTLTYRSQTNTLAIIESGGGILRNVSSLIATNEDHRQILRENNCLQTLLQHLKSHSLTIVSNACGTLWNLSARNPKDQEALWDMGAVSMLKNLIHSKHKMIAMGSAAALRNLMANRPAKYKDANIMSPGSSLPSLHVRKQKALEAELDAQHLSETFDNIDNLSPKASHRSKQRHKQSLYGDYVFDTNRHDDNRSDNFNTGNMTVLSPYLNTTVLPSSSSSRGSLDSSRSEKDRSLERERGIGLGNYHPATENPGTSSKRGLQISTTAAQIAKVMEEVSAIHTSQEDRSSGSTTELHCVTDERNALRRSSAAHTHSNTYNFTKSENSNRTCSMPYAKLEYKRSSNDSLNSVSSSDGYGKRGQMKPSIESYSEDDESKFCSYGQYPADLAHKIHSANHMDDNDGELDTPINYSLKYSDEQLNSGRQSPSQNERWARPKHIIEDEIKQSEQRQSRNQSTTYPVYTESTDDKHLKFQPHFGQQECVSPYRSRGANGSETNRVGSNHGINQNVSQSLCQEDDYEDDKPTNYSERYSEEEQHEEEERPTNYSIKYNEEKRHVDQPIDYSLKYATDIPSSQKQSFSFSKSSSGQSSKTEHMSSSSENTSTPSSNAKRQNQLHPSSAQSRSGQPQKAATCKVSSINQETIQTYCVEDTPICFSRCSSLSSLSSAEDEIGCNQTTQEADSANTLQIAEIKEKIGTRSAEDPVSEVPAVSQHPRTKSSRLQGSSLSSESARHKAVEFSSGAKSPSKSGAQTPKSPPEHYVQETPLMFSRCTSVSSLDSFESRSIASSVQSEPCSGMVSGIISPSDLPDSPGQTMPPSRSKTPPPPPQTAQTKREVPKNKAPTAEKRESGPKQAAVNAAVQRVQVLPDADTLLHFATESTPDGFSCSSSLSALSLDEPFIQKDVELRIMPPVQENDNGNEQNQSSLKNQMKTKRKRQKKLLILKRTY
--- a/test-data/test_rpkm.fasta	Tue Mar 14 14:14:38 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
->NP_004439 |121102.4845|NM_004448|ERBB2|receptor tyrosine-protein kinase erbB-2 isoform a precursor
-MELAALCRWGLLLALLPPGAASTQVCTGTDMKLRLPASPETHLDMLRHLYQGCQVVQGNLELTYLPTNASLSFLQDIQEVQGYVLIAHNQVRQVPLQRLRIVRGTQLFEDNYALAVLDNGDPLNNTTPVTGASPGGLRELQLRSLTEILKGGVLIQRNPQLCYQDTILWKDIFHKNNQLALTLIDTNRSRACHPCSPMCKGSRCWGESSEDCQSLTRTVCAGGCARCKGPLPTDCCHEQCAAGCTGPKHSDCLACLHFNHSGICELHCPALVTYNTDTFESMPNPEGRYTFGASCVTACPYNYLSTDVGSCTLVCPLHNQEVTAEDGTQRCEKCSKPCARVCYGLGMEHLREVRAVTSANIQEFAGCKKIFGSLAFLPESFDGDPASNTAPLQPEQLQVFETLEEITGYLYISAWPDSLPDLSVFQNLQVIRGRILHNGAYSLTLQGLGISWLGLRSLRELGSGLALIHHNTHLCFVHTVPWDQLFRNPHQALLHTANRPEDECVGEGLACHQLCARGHCWGPGPTQCVNCSQFLRGQECVEECRVLQGLPREYVNARHCLPCHPECQPQNGSVTCFGPEADQCVACAHYKDPPFCVARCPSGVKPDLSYMPIWKFPDEEGACQPCPINCTHSCVDLDDKGCPAEQRASPLTSIISAVVGILLVVVLGVVFGILIKRRQQKIRKYTMRRLLQETELVEPLTPSGAMPNQAQMRILKETELRKVKVLGSGAFGTVYKGIWIPDGENVKIPVAIKVLRENTSPKANKEILDEAYVMAGVGSPYVSRLLGICLTSTVQLVTQLMPYGCLLDHVRENRGRLGSQDLLNWCMQIAKGMSYLEDVRLVHRDLAARNVLVKSPNHVKITDFGLARLLDIDETEYHADGGKVPIKWMALESILRRRFTHQSDVWSYGVTVWELMTFGAKPYDGIPAREIPDLLEKGERLPQPPICTIDVYMIMVKCWMIDSECRPRFRELVSEFSRMARDPQRFVVIQNEDLGPASPLDSTFYRSLLEDDDMGDLVDAEEYLVPQQGFFCPDPAPGAGGMVHHRHRSSSTRSGGGDLTLGLEPSEEEAPRSPLAPSEGAGSDVFDGDLGMGAAKGLQSLPTHDPSPLQRYSEDPTVPLPSETDGYVAPLTCSPQPEYVNQPDVRPQPPSPREGPLPAARPAGATLERPKTLSPGKNGVVKDVFAFGGAVENPEYLTPQGGAAPQPHPPPAFSPAFDNLYYWDQDPPERGAPPSTFKGTPTAENPEYLGLDVPV
->NP_000170 |18647.7757|NM_000179|MSH6|DNA mismatch repair protein Msh6
-MSRQSTLYSFFPKSPALSDANKASARASREGGRAAAAPGASPSPGGDAAWSEAGPGPRPLARSASPPKAKNLNGGLRRSVAPAAPTSCDFSPGDLVWAKMEGYPWWPCLVYNHPFDGTFIREKGKSVRVHVQFFDDSPTRGWVSKRLLKPYTGSKSKEAQKGGHFYSAKPEILRAMQRADEALNKDKIKRLELAVCDEPSEPEEEEEMEVGTTYVTDKSEEDNEIESEEEVQPKTQGSRRSSRQIKKRRVISDSESDIGGSDVEFKPDTKEEGSSDEISSGVGDSESEGLNSPVKVARKRKRMVTGNGSLKRKSSRKETPSATKQATSISSETKNTLRAFSAPQNSESQAHVSGGGDDSSRPTVWYHETLEWLKEEKRRDEHRRRPDHPDFDASTLYVPEDFLNSCTPGMRKWWQIKSQNFDLVICYKVGKFYELYHMDALIGVSELGLVFMKGNWAHSGFPEIAFGRYSDSLVQKGYKVARVEQTETPEMMEARCRKMAHISKYDRVVRREICRIITKGTQTYSVLEGDPSENYSKYLLSLKEKEEDSSGHTRAYGVCFVDTSLGKFFIGQFSDDRHCSRFRTLVAHYPPVQVLFEKGNLSKETKTILKSSLSCSLQEGLIPGSQFWDASKTLRTLLEEEYFREKLSDGIGVMLPQVLKGMTSESDSIGLTPGEKSELALSALGGCVFYLKKCLIDQELLSMANFEEYIPLDSDTVSTTRSGAIFTKAYQRMVLDAVTLNNLEIFLNGTNGSTEGTLLERVDTCHTPFGKRLLKQWLCAPLCNHYAINDRLDAIEDLMVVPDKISEVVELLKKLPDLERLLSKIHNVGSPLKSQNHPDSRAIMYEETTYSKKKIIDFLSALEGFKVMCKIIGIMEEVADGFKSKILKQVISLQTKNPEGRFPDLTVELNRWDTAFDHEKARKTGLITPKAGFDSDYDQALADIRENEQSLLEYLEKQRNRIGCRTIVYWGIGRNRYQLEIPENFTTRNLPEEYELKSTKKGCKRYWTKTIEKKLANLINAEERRDVSLKDCMRRLFYNFDKNYKDWQSAVECIAVLDVLLCLANYSRGGDGPMCRPVILLPEDTPPFLELKGSRHPCITKTFFGDDFIPNDILIGCEEEEQENGKAYCVLVTGPNMGGKSTLMRQAGLLAVMAQMGCYVPAEVCRLTPIDRVFTRLGASDRIMSGESTFFVELSETASILMHATAHSLVLVDELGRGTATFDGTAIANAVVKELAETIKCRTLFSTHYHSLVEDYSQNVAVRLGHMACMVENECEDPSQETITFLYKFIKGACPKSYGFNAARLANLPEEVIQKGHRKAREFEKMNQSLRLFREVCLASERSTVDAEAVHKLLTLIKEL
->NP_001120983 |665.1509|NM_001127511|APC|adenomatous polyposis coli protein isoform a
-MYASLGSGPVAPLPASVPPSVLGSWSTGGSRSCVRQETKSPGGARTSGHWASVWQEVLKQLQGSIEDEAMASSGQIDLLERLKELNLDSSNFPGVKLRSKMSLRSYGSREGSVSSRSGECSPVPMGSFPRRGFVNGSRESTGYLEELEKERSLLLADLDKEEKEKDWYYAQLQNLTKRIDSLPLTENFSLQTDMTRRQLEYEARQIRVAMEEQLGTCQDMEKRAQRSSQNKHETGSHDAERQNEGQGVGEINMATSGNGQGSTTRMDHETASVLSSSSTHSAPRRLTSHLGTKVEMVYSLLSMLGTHDKDDMSRTLLAMSSSQDSCISMRQSGCLPLLIQLLHGNDKDSVLLGNSRGSKEARARASAALHNIIHSQPDDKRGRREIRVLHLLEQIRAYCETCWEWQEAHEPGMDQDKNPMPAPVEHQICPAVCVLMKLSFDEEHRHAMNELGGLQAIAELLQVDCEMYGLTNDHYSITLRRYAGMALTNLTFGDVANKATLCSMKGCMRALVAQLKSESEDLQQVIASVLRNLSWRADVNSKKTLREVGSVKALMECALEVKKESTLKSVLSALWNLSAHCTENKADICAVDGALAFLVGTLTYRSQTNTLAIIESGGGILRNVSSLIATNEDHRQILRENNCLQTLLQHLKSHSLTIVSNACGTLWNLSARNPKDQEALWDMGAVSMLKNLIHSKHKMIAMGSAAALRNLMANRPAKYKDANIMSPGSSLPSLHVRKQKALEAELDAQHLSETFDNIDNLSPKASHRSKQRHKQSLYGDYVFDTNRHDDNRSDNFNTGNMTVLSPYLNTTVLPSSSSSRGSLDSSRSEKDRSLERERGIGLGNYHPATENPGTSSKRGLQISTTAAQIAKVMEEVSAIHTSQEDRSSGSTTELHCVTDERNALRRSSAAHTHSNTYNFTKSENSNRTCSMPYAKLEYKRSSNDSLNSVSSSDGYGKRGQMKPSIESYSEDDESKFCSYGQYPADLAHKIHSANHMDDNDGELDTPINYSLKYSDEQLNSGRQSPSQNERWARPKHIIEDEIKQSEQRQSRNQSTTYPVYTESTDDKHLKFQPHFGQQECVSPYRSRGANGSETNRVGSNHGINQNVSQSLCQEDDYEDDKPTNYSERYSEEEQHEEEERPTNYSIKYNEEKRHVDQPIDYSLKYATDIPSSQKQSFSFSKSSSGQSSKTEHMSSSSENTSTPSSNAKRQNQLHPSSAQSRSGQPQKAATCKVSSINQETIQTYCVEDTPICFSRCSSLSSLSSAEDEIGCNQTTQEADSANTLQIAEIKEKIGTRSAEDPVSEVPAVSQHPRTKSSRLQGSSLSSESARHKAVEFSSGAKSPSKSGAQTPKSPPEHYVQETPLMFSRCTSVSSLDSFESRSIASSVQSEPCSGMVSGIISPSDLPDSPGQTMPPSRSKTPPPPPQTAQTKREVPKNKAPTAEKRESGPKQAAVNAAVQRVQVLPDADTLLHFATESTPDGFSCSSSLSALSLDEPFIQKDVELRIMPPVQENDNGNETESEQPKESNENQEKEAEKTIDSEKDLLDDSDDDDIEILEECIISAMPTKSSRKAKKPAQTASKLPPPVARKPSQLPVYKLLPSQNRLQPQKHVSFTPGDDMPRVYCVEGTPINFSTATSLSDLTIESPPNELAAGEGVRGGAQSGEFEKRDTIPTEGRSTDEAQGGKTSSVTIPELDDNKAEEGDILAECINSAMPKGKSHKPFRVKKIMDQVQQASASSSAPNKNQLDGKKKKPTSPVKPIPQNTEYRTRVRKNADSKNNLNAERVFSDNKDSKKQNLKNNSKVFNDKLPNNEDRVRGSFAFDSPHHYTPIEGTPYCFSRNDSLSSLDFDDDDVDLSREKAELRKAKENKESEAKVTSHTELTSNQQSANKTQAIAKQPINRGQPKPILQKQSTFPQSSKDIPDRGAATDEKLQNFAIENTPVCFSHNSSLSSLSDIDQENNNKENEPIKETEPPDSQGEPSKPQASGYAPKSFHVEDTPVCFSRNSSLSSLSIDSEDDLLQECISSAMPKKKKPSRLKGDNEKHSPRNMGGILGEDLTLDLKDIQRPDSEHGLSPDSENFDWKAIQEGANSIVSSLHQAAAAACLSRQASSDSDSILSLKSGISLGSPFHLTPDQEEKPFTSNKGPRILKPGEKSTLETKKIESESKGIKGGKKVYKSLITGKVRSNSEISGQMKQPLQANMPSISRGRTMIHIPGVRNSSSSTSPVSKKGPPLKTPASKSPSEGQTATTSPRGAKPSVKSELSPVARQTSQIGGSSKAPSRSGSRDSTPSRPAQQPLSRPIQSPGRNSISPGRNGISPPNKLSQLPRTSSPSTASTKSSGSGKMSYTSPGRQMSQQNLTKQTGLSKNASSIPRSESASKGLNQMNNGNGANKKVELSRMSSTKSSGSESDRSERPVLVRQSTFIKEAPSPTLRRKLEESASFESLSPSSRPASPTRSQAQTPVLSPSLPDMSLSTHSSVQAGGWRKLPPNLSPTIEYNDGRPAKRHDIARSHSESPSRLPINRSGTWKREHSKHSSSLPRVSTWRRTGSSSSILSASSESSEKAKSEDEKHVNSISGTKQSKENQVSAKGTWRKIKENEFSPTNSTSQTVSSGATNGAESKTLIYQMAPAVSKTEDVWVRIEDCPINNPRSGRSPTGNTPPVIDSVSEKANPNIKDSKDNQAKQNVGNGSVPMRTVGLENRLNSFIQVDAPDQKGTEIKPGQNNPVPVSETNESSIVERTPFSSSSSSKHSSPSGTVAARVTPFNYNPSPRKSSADSTSARPSQIPTPVNNNTKKRDSKTDSTESSGTQSPKRHSGSYLVTSV
--- a/test-data/test_snv.fasta	Tue Mar 14 14:14:38 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
->NP_000170_T139I,E956D |18647.7757|NM_000179|MSH6|DNA mismatch repair protein Msh6
-MSRQSTLYSFFPKSPALSDANKASARASREGGRAAAAPGASPSPGGDAAWSEAGPGPRPLARSASPPKAKNLNGGLRRSVAPAAPTSCDFSPGDLVWAKMEGYPWWPCLVYNHPFDGTFIREKGKSVRVHVQFFDDSPIRGWVSKRLLKPYTGSKSKEAQKGGHFYSAKPEILRAMQRADEALNKDKIKRLELAVCDEPSEPEEEEEMEVGTTYVTDKSEEDNEIESEEEVQPKTQGSRRSSRQIKKRRVISDSESDIGGSDVEFKPDTKEEGSSDEISSGVGDSESEGLNSPVKVARKRKRMVTGNGSLKRKSSRKETPSATKQATSISSETKNTLRAFSAPQNSESQAHVSGGGDDSSRPTVWYHETLEWLKEEKRRDEHRRRPDHPDFDASTLYVPEDFLNSCTPGMRKWWQIKSQNFDLVICYKVGKFYELYHMDALIGVSELGLVFMKGNWAHSGFPEIAFGRYSDSLVQKGYKVARVEQTETPEMMEARCRKMAHISKYDRVVRREICRIITKGTQTYSVLEGDPSENYSKYLLSLKEKEEDSSGHTRAYGVCFVDTSLGKFFIGQFSDDRHCSRFRTLVAHYPPVQVLFEKGNLSKETKTILKSSLSCSLQEGLIPGSQFWDASKTLRTLLEEEYFREKLSDGIGVMLPQVLKGMTSESDSIGLTPGEKSELALSALGGCVFYLKKCLIDQELLSMANFEEYIPLDSDTVSTTRSGAIFTKAYQRMVLDAVTLNNLEIFLNGTNGSTEGTLLERVDTCHTPFGKRLLKQWLCAPLCNHYAINDRLDAIEDLMVVPDKISEVVELLKKLPDLERLLSKIHNVGSPLKSQNHPDSRAIMYEETTYSKKKIIDFLSALEGFKVMCKIIGIMEEVADGFKSKILKQVISLQTKNPEGRFPDLTVELNRWDTAFDHEKARKTGLITPKAGFDSDYDQALADIRENEQSLLEYLDKQRNRIGCRTIVYWGIGRNRYQLEIPENFTTRNLPEEYELKSTKKGCKRYWTKTIEKKLANLINAEERRDVSLKDCMRRLFYNFDKNYKDWQSAVECIAVLDVLLCLANYSRGGDGPMCRPVILLPEDTPPFLELKGSRHPCITKTFFGDDFIPNDILIGCEEEEQENGKAYCVLVTGPNMGGKSTLMRQAGLLAVMAQMGCYVPAEVCRLTPIDRVFTRLGASDRIMSGESTFFVELSETASILMHATAHSLVLVDELGRGTATFDGTAIANAVVKELAETIKCRTLFSTHYHSLVEDYSQNVAVRLGHMACMVENECEDPSQETITFLYKFIKGACPKSYGFNAARLANLPEEVIQKGHRKAREFEKMNQSLRLFREVCLASERSTVDAEAVHKLLTLIKEL
->NP_001120983_R1432*,V1804D |665.1509|NM_001127511|APC|adenomatous polyposis coli protein isoform a
-MYASLGSGPVAPLPASVPPSVLGSWSTGGSRSCVRQETKSPGGARTSGHWASVWQEVLKQLQGSIEDEAMASSGQIDLLERLKELNLDSSNFPGVKLRSKMSLRSYGSREGSVSSRSGECSPVPMGSFPRRGFVNGSRESTGYLEELEKERSLLLADLDKEEKEKDWYYAQLQNLTKRIDSLPLTENFSLQTDMTRRQLEYEARQIRVAMEEQLGTCQDMEKRAQRSSQNKHETGSHDAERQNEGQGVGEINMATSGNGQGSTTRMDHETASVLSSSSTHSAPRRLTSHLGTKVEMVYSLLSMLGTHDKDDMSRTLLAMSSSQDSCISMRQSGCLPLLIQLLHGNDKDSVLLGNSRGSKEARARASAALHNIIHSQPDDKRGRREIRVLHLLEQIRAYCETCWEWQEAHEPGMDQDKNPMPAPVEHQICPAVCVLMKLSFDEEHRHAMNELGGLQAIAELLQVDCEMYGLTNDHYSITLRRYAGMALTNLTFGDVANKATLCSMKGCMRALVAQLKSESEDLQQVIASVLRNLSWRADVNSKKTLREVGSVKALMECALEVKKESTLKSVLSALWNLSAHCTENKADICAVDGALAFLVGTLTYRSQTNTLAIIESGGGILRNVSSLIATNEDHRQILRENNCLQTLLQHLKSHSLTIVSNACGTLWNLSARNPKDQEALWDMGAVSMLKNLIHSKHKMIAMGSAAALRNLMANRPAKYKDANIMSPGSSLPSLHVRKQKALEAELDAQHLSETFDNIDNLSPKASHRSKQRHKQSLYGDYVFDTNRHDDNRSDNFNTGNMTVLSPYLNTTVLPSSSSSRGSLDSSRSEKDRSLERERGIGLGNYHPATENPGTSSKRGLQISTTAAQIAKVMEEVSAIHTSQEDRSSGSTTELHCVTDERNALRRSSAAHTHSNTYNFTKSENSNRTCSMPYAKLEYKRSSNDSLNSVSSSDGYGKRGQMKPSIESYSEDDESKFCSYGQYPADLAHKIHSANHMDDNDGELDTPINYSLKYSDEQLNSGRQSPSQNERWARPKHIIEDEIKQSEQRQSRNQSTTYPVYTESTDDKHLKFQPHFGQQECVSPYRSRGANGSETNRVGSNHGINQNVSQSLCQEDDYEDDKPTNYSERYSEEEQHEEEERPTNYSIKYNEEKRHVDQPIDYSLKYATDIPSSQKQSFSFSKSSSGQSSKTEHMSSSSENTSTPSSNAKRQNQLHPSSAQSRSGQPQKAATCKVSSINQETIQTYCVEDTPICFSRCSSLSSLSSAEDEIGCNQTTQEADSANTLQIAEIKEKIGTRSAEDPVSEVPAVSQHPRTKSSRLQGSSLSSESARHKAVEFSSGAKSPSKSGAQTPKSPPEHYVQETPLMFSRCTSVSSLDSFESRSIASSVQSEPCSGMVSGIISPSDLPDSPGQTMPPSRSKTPPPPPQTAQTK