Repository 'deepvariant'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/deepvariant

Changeset 0:7608209110d3 (2021-09-06)
Next changeset 1:b778a18bd878 (2021-10-01)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deepvariant commit e46feb5432b28a9360a1d4e8a6618e6ed91008fe"
added:
deepvariant.xml
macros.xml
test-data/fasta_indexes.loc
test-data/output.g.vcf
test-data/output.vcf
test-data/reads.bam
test-data/reads.cram
test-data/reference.fasta
test-data/region.bed
test-data/report.html
test-data/test-cache/reference.fasta
test-data/test-cache/reference.fasta.fai
tool-data/fasta_indexes.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 7608209110d3 deepvariant.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deepvariant.xml Mon Sep 06 17:34:08 2021 +0000
[
b'@@ -0,0 +1,279 @@\n+<tool id=\'deepvariant\' name=\'DeepVariant\' version=\'@TOOL_VERSION@+galaxy@SUFFIX_VERSION@\' profile=\'20.01\'>\n+    <description>deep learning-based variant caller</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro=\'edam_ontology\' />\n+    <expand macro=\'requirements\' />\n+    <command detect_errors=\'exit_code\'><![CDATA[\n+        ln -s \'${reads}\' reads_alignment.bam\n+        && ln -s \'${reads.metadata.bam_index}\' reads_alignment.bam.bai\n+        #if $regions_conditional.regions_option == \'bed\'\n+            && ln -s \'${regions_conditional.bed_file}\' region.bed\n+        #end if\n+        #if $reference_genome.source == \'history\':\n+            #set $ref_genome = \'reference.fasta\'\n+            && ln -s -f \'${reference_genome.history_item}\' $ref_genome\n+            && samtools faidx $ref_genome\n+        #else:\n+            #set $ref_genome = $reference_genome.index.fields.path\n+        #end if\n+        && run_deepvariant\n+        --model_type=$model_type\n+        --ref=$ref_genome\n+        --reads=reads_alignment.bam\n+        --output_vcf=\'./output.vcf.gz\'\n+        #if $output_gvcf\n+            --output_gvcf=\'./output.g.vcf.gz\'\n+        #end if\n+        #if $regions_conditional.regions_option == \'region\'\n+            --regions $regions_conditional.region_literal\n+        #else if $regions_conditional.regions_option == \'bed\'\n+            --regions region.bed\n+        #end if\n+        --call_variants_extra_args="use_openvino=true" ## Setting this will use OpenVINO on Intel CPUs, which empirically reduces call_variants runtime by 15%-25%.\n+        --num_shards=\\${GALAXY_SLOTS:-2}\n+        && gunzip \'./output.vcf.gz\'\n+        #if $output_gvcf\n+            && gunzip \'./output.g.vcf.gz\'\n+        #end if\n+    ]]>    </command>\n+    <inputs>\n+        <conditional name="reference_genome">\n+            <param name="source" type="select" label="Source for the reference genome" help="Built-in references were created using default options.">\n+                <option value="indexed" selected="true">Use a built-in genome</option>\n+                <option value="history">Use a genome from history</option>\n+            </param>\n+            <when value="indexed">\n+                <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team.">\n+                    <options from_data_table="fasta_indexes">\n+                        <filter type="sort_by" column="2" />\n+                        <validator type="no_options" message="No genomes are available for the selected input dataset" />\n+                    </options>\n+                </param>\n+            </when>\n+            <when value="history">\n+                <param name="history_item" type="data" format="fasta" label="Reference genome" help="A reference genome in FASTA format" />\n+            </when>\n+        </conditional>\n+        <param argument="--reads" type="data" format="BAM" label="BAM file" help="An aligned reads files for child and one or two parents in BAM format. The reads must be aligned to the reference genome" />\n+        <param argument="--model_type" type="select" label="Model type" help="Type of model to use for variant calling">\n+            <option value="WGS">WGS: Illumina whole genome sequencing</option>\n+            <option value="WES">WES: Illumina whole exome sequencing</option>\n+            <option value="PACBIO">PacBio HiFi</option>\n+            <option value="HYBRID_PACBIO_ILLUMINA">Hybrid PacBio HiFi-Illumina</option>\n+        </param>\n+        <conditional name="regions_conditional">\n+            <param name="regions_option" type="select" label="Select specific regions to process" help="Restrict the analysis to specific regions. A space-separated list of chromosome regions to process. Individual elements can be region literals, such as chr20:10-20 or paths to BED files.">\n+                <option value="disabled" selected="T'..b'alue="reads.cram"/>\n+            <param name="model_type" value="WGS"/>\n+            <conditional name="regions_conditional">\n+                <param name="regions_option" value="disabled"/>\n+            </conditional>\n+            <output name="vcf_file" ftype="vcf">\n+                <assert_contents>\n+                    <has_text text="##fileformat=VCFv4.2"/>\n+                    <has_size value="2473"/>\n+                </assert_contents>\n+            </output>\n+            <output name="html_report" ftype="html">\n+                <assert_contents>\n+                    <has_size value="19287" delta="100"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <!-- Test indexed reference format input-->\n+        <test expect_num_outputs="2">\n+            <conditional name="reference_genome">\n+                <param name="source" value="indexed"/>\n+                <param name="index" value="phix174"/>\n+            </conditional>\n+            <param name="reads" value="reads.bam"/>\n+            <param name="model_type" value="WGS"/>\n+            <conditional name="regions_conditional">\n+                <param name="regions_option" value="disabled"/>\n+            </conditional>\n+            <output name="vcf_file" ftype="vcf">\n+                <assert_contents>\n+                    <has_text text="##fileformat=VCFv4.2"/>\n+                    <has_size value="2473"/>\n+                </assert_contents>\n+            </output>\n+            <output name="html_report" ftype="html">\n+                <assert_contents>\n+                    <has_size value="19287" delta="100"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+\n+.. class:: infomark\n+\n+**Purpose**\n+\n+DeepVariant is a deep learning-based variant caller that takes aligned reads (in BAM or CRAM format), produces pileup image tensors from them, classifies each tensor using a convolutional neural network, and finally reports the results in a standard VCF or gVCF file.\n+\n+DeepVariant supports germline variant-calling in diploid organisms.\n+\n+- NGS (Illumina) data for either a `whole genome <https://github.com/google/deepvariant/blob/r1.2/docs/deepvariant-case-study.md>`_ or `whole exome <https://github.com/google/deepvariant/blob/r1.2/docs/deepvariant-exome-case-study.md>`_.\n+- PacBio HiFi data, see the `PacBio case study <https://github.com/google/deepvariant/blob/r1.2/docs/deepvariant-pacbio-model-case-study.md>`_.\n+- Hybrid PacBio HiFi + Illumina WGS, see the `hybrid case study <https://github.com/google/deepvariant/blob/r1.2/docs/deepvariant-hybrid-case-study.md>`_.\n+\n+Please also note:\n+\n+For somatic data or any other samples where the genotypes go beyond two copies of DNA, DeepVariant will not work out of the box because the only genotypes supported are hom-alt, het, and hom-ref.\n+\n+The models included with DeepVariant are only trained on human data. For other organisms, see the blog post on `non-human variant-calling <https://google.github.io/deepvariant/posts/2018-12-05-improved-non-human-variant-calling-using-species-specific-deepvariant-models/>`_ for some possible pitfalls and how to handle them.\n+\n+----\n+\n+.. class:: infomark\n+\n+**How DeepVariants works**\n+\n+DeepVariant relies on `Nucleus <https://github.com/google/nucleus>`_, a library of Python and C++ code for reading and writing data in common genomics file formats (like SAM and VCF) designed for painless integration with the `TensorFlow <https://www.tensorflow.org/>`_ machine learning framework. Nucleus was built with DeepVariant in mind and open-sourced separately so it can be used by anyone in the genomics research community for other projects. See this blog post on `Using Nucleus and TensorFlow for DNA Sequencing Error Correction <https://google.github.io/deepvariant/posts/2019-01-31-using-nucleus-and-tensorflow-for-dna-sequencing-error-correction/>`_.\n+\n+\n+]]>    </help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r 7608209110d3 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Sep 06 17:34:08 2021 +0000
b
@@ -0,0 +1,22 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.2.0</token>
+    <token name="@SUFFIX_VERSION@">0</token>
+    <xml name="edam_ontology">
+        <edam_topics>                                                                                  
+            <edam_topic>topic_0199</edam_topic>
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_3227</edam_operation>
+        </edam_operations>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <container type="docker">google/deepvariant:@TOOL_VERSION@</container>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/nbt.4235</citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 7608209110d3 test-data/fasta_indexes.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_indexes.loc Mon Sep 06 17:34:08 2021 +0000
b
@@ -0,0 +1,1 @@
+phix174 phiX174 PhiX174 bacteriophage ${__HERE__}/test-cache/reference.fasta
b
diff -r 000000000000 -r 7608209110d3 test-data/output.g.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.g.vcf Mon Sep 06 17:34:08 2021 +0000
b
@@ -0,0 +1,43 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##FILTER=<ID=RefCall,Description="Genotyping model thinks this site is reference.">
+##FILTER=<ID=LowQual,Description="Confidence in this variant being real is below calling threshold.">
+##FILTER=<ID=NoCall,Description="Site has depth=0 resulting in no call.">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position (for use with symbolic alleles)">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Conditional genotype quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block.">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Read depth for each allele">
+##FORMAT=<ID=VAF,Number=A,Type=Float,Description="Variant allele fractions.">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled genotype likelihoods rounded to the closest integer">
+##FORMAT=<ID=MED_DP,Number=1,Type=Integer,Description="Median DP observed within the GVCF block rounded to the nearest integer.">
+##DeepVariant_version=1.2.0
+##contig=<ID=K03455,length=9719>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SRR8525881
+K03455 1 . T <*> 0 . END=1000 GT:GQ:MIN_DP:PL 0/0:1:0:0,0,0
+K03455 1001 . G <*> 0 . END=2000 GT:GQ:MIN_DP:PL 0/0:1:0:0,0,0
+K03455 2001 . T <*> 0 . END=2597 GT:GQ:MIN_DP:PL 0/0:1:0:0,0,0
+K03455 2598 . G <*> 0 . END=2598 GT:GQ:MIN_DP:PL ./.:0:2:59,6,0
+K03455 2599 . A <*> 0 . END=2603 GT:GQ:MIN_DP:PL 0/0:6:2:0,6,59
+K03455 2604 . CCA C,<*> 5.5 PASS . GT:GQ:DP:AD:VAF:PL 1/1:5:2:0,2,0:1,0:3,15,0,990,990,990
+K03455 2607 . A <*> 0 . END=2607 GT:GQ:MIN_DP:PL 0/0:6:2:0,6,59
+K03455 2608 . A T,<*> 7.8 PASS . GT:GQ:DP:AD:VAF:PL 1/1:7:2:0,2,0:1,0:6,14,0,990,990,990
+K03455 2609 . A T,<*> 4.8 PASS . GT:GQ:DP:AD:VAF:PL 1/1:4:2:0,2,0:1,0:2,14,0,990,990,990
+K03455 2610 . G <*> 0 . END=2612 GT:GQ:MIN_DP:PL 0/0:6:2:0,6,59
+K03455 2613 . A T,<*> 1.1 RefCall . GT:GQ:DP:AD:VAF:PL ./.:7:2:0,2,0:1,0:0,19,5,990,990,990
+K03455 2614 . A <*> 0 . END=2614 GT:GQ:MIN_DP:PL 0/0:6:2:0,6,59
+K03455 2615 . A <*> 0 . END=2615 GT:GQ:MIN_DP:PL 0/0:15:5:0,15,149
+K03455 2616 . C <*> 0 . END=2616 GT:GQ:MIN_DP:PL ./.:0:7:8,0,158
+K03455 2617 . A <*> 0 . END=2617 GT:GQ:MIN_DP:PL ./.:0:6:11,0,131
+K03455 2618 . A <*> 0 . END=2618 GT:GQ:MIN_DP:PL 0/0:18:6:0,18,179
+K03455 2619 . T <*> 0 . END=2619 GT:GQ:MIN_DP:PL 0/0:21:7:0,21,209
+K03455 2620 . G <*> 0 . END=2620 GT:GQ:MIN_DP:PL 0/0:39:13:0,39,389
+K03455 2621 . G <*> 0 . END=2651 GT:GQ:MIN_DP:PL 0/0:50:49:0,147,1469
+K03455 2652 . G A,<*> 20.1 PASS . GT:GQ:DP:AD:VAF:PL 1/1:20:201:0,201,0:1,0:20,47,0,990,990,990
+K03455 2653 . T <*> 0 . END=2659 GT:GQ:MIN_DP:PL 0/0:50:201:0,300,2999
+K03455 2660 . T C,<*> 25.5 PASS . GT:GQ:DP:AD:VAF:PL 1/1:25:213:0,213,0:1,0:25,45,0,990,990,990
+K03455 2661 . T <*> 0 . END=2663 GT:GQ:MIN_DP:PL 0/0:50:213:0,300,2999
+K03455 2664 . A G,<*> 21.3 PASS . GT:GQ:DP:AD:VAF:PL 1/1:21:238:0,238,0:1,0:21,45,0,990,990,990
+K03455 2665 . C <*> 0 . END=2668 GT:GQ:MIN_DP:PL 0/0:50:240:0,300,2999
+K03455 2669 . G A,<*> 20.6 PASS . GT:GQ:DP:AD:VAF:PL 1/1:21:250:0,250,0:1,0:20,42,0,990,990,990
b
diff -r 000000000000 -r 7608209110d3 test-data/output.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.vcf Mon Sep 06 17:34:08 2021 +0000
b
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##FILTER=<ID=RefCall,Description="Genotyping model thinks this site is reference.">
+##FILTER=<ID=LowQual,Description="Confidence in this variant being real is below calling threshold.">
+##FILTER=<ID=NoCall,Description="Site has depth=0 resulting in no call.">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position (for use with symbolic alleles)">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Conditional genotype quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block.">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Read depth for each allele">
+##FORMAT=<ID=VAF,Number=A,Type=Float,Description="Variant allele fractions.">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled genotype likelihoods rounded to the closest integer">
+##FORMAT=<ID=MED_DP,Number=1,Type=Integer,Description="Median DP observed within the GVCF block rounded to the nearest integer.">
+##DeepVariant_version=1.2.0
+##contig=<ID=K03455,length=9719>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SRR8525881
+K03455 2652 . G A 23.7 PASS . GT:GQ:DP:AD:VAF:PL 1/1:24:201:0,200:0.995025:23,45,0
+K03455 2660 . T C 25.3 PASS . GT:GQ:DP:AD:VAF:PL 1/1:25:213:0,213:1:25,45,0
+K03455 2664 . A G 21.4 PASS . GT:GQ:DP:AD:VAF:PL 1/1:21:238:0,238:1:21,45,0
+K03455 2669 . G A 20.3 PASS . GT:GQ:DP:AD:VAF:PL 1/1:20:258:0,250:0.968992:20,43,0
+K03455 2720 . T C 31.9 PASS . GT:GQ:DP:AD:VAF:PL 1/1:32:384:0,380:0.989583:31,47,0
+K03455 2797 . G A 39.4 PASS . GT:GQ:DP:AD:VAF:PL 1/1:39:669:0,666:0.995516:39,54,0
+K03455 2848 . T TA 41.3 PASS . GT:GQ:DP:AD:VAF:PL 1/1:39:820:85,704:0.858537:41,43,0
+K03455 2874 . C T 32 PASS . GT:GQ:DP:AD:VAF:PL 1/1:32:777:1,756:0.972973:32,45,0
+K03455 2882 . G T 33.2 PASS . GT:GQ:DP:AD:VAF:PL 1/1:33:737:9,721:0.97829:33,49,0
+K03455 2894 . T C 28.2 PASS . GT:GQ:DP:AD:VAF:PL 1/1:28:728:1,717:0.98489:28,51,0
+K03455 2906 . C T 23.7 PASS . GT:GQ:DP:AD:VAF:PL 1/1:24:653:2,650:0.995406:23,53,0
+K03455 2913 . G A 27.4 PASS . GT:GQ:DP:AD:VAF:PL 1/1:27:639:1,638:0.998435:27,53,0
+K03455 2987 . C T 16.5 PASS . GT:GQ:DP:AD:VAF:PL 1/1:16:235:0,234:0.995745:16,38,0
+K03455 3020 . A G 3.5 PASS . GT:GQ:DP:AD:VAF:PL 1/1:3:100:0,100:1:0,24,0
+K03455 3042 . A C 0 RefCall . GT:GQ:DP:AD:VAF:PL 0/0:37:12:9,3:0.25:0,40,39
b
diff -r 000000000000 -r 7608209110d3 test-data/reads.bam
b
Binary file test-data/reads.bam has changed
b
diff -r 000000000000 -r 7608209110d3 test-data/reads.cram
b
Binary file test-data/reads.cram has changed
b
diff -r 000000000000 -r 7608209110d3 test-data/reference.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reference.fasta Mon Sep 06 17:34:08 2021 +0000
b
b'@@ -0,0 +1,2 @@\n+>K03455\n+TGGAAGGGCTAATTCACTCCCAACGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTAGCAGAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGAGAAGTTAGAAGAAGCCAACAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATCCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCA'..b'CAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAGTAGTACATGTAACGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAAGATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTAGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATCCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCA\n'
b
diff -r 000000000000 -r 7608209110d3 test-data/region.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/region.bed Mon Sep 06 17:34:08 2021 +0000
b
@@ -0,0 +1,1 @@
+K03455 1 2669
b
diff -r 000000000000 -r 7608209110d3 test-data/report.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/report.html Mon Sep 06 17:34:08 2021 +0000
[
b'@@ -0,0 +1,35 @@\n+<!DOCTYPE html>\n+<html>\n+<head>\n+  <style>\n+    .error {\n+        color: red;\n+    }\n+  </style>\n+  <script type="text/javascript" src="https://storage.googleapis.com/deepvariant/lib/vega/vega@5"></script>\n+  <script type="text/javascript" src="https://storage.googleapis.com/deepvariant/lib/vega/vega-lite@3.4.0"></script>\n+  <script type="text/javascript" src="https://storage.googleapis.com/deepvariant/lib/vega/vega-embed@4"></script>\n+</head>\n+<body>\n+  <div id="vis"></div>\n+  <script>\n+    (function(vegaEmbed) {\n+      var spec = {"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}, "header": {"labelFontSize": 16, "titleFontSize": 20}, "title": {"fontSize": 20}}, "vconcat": [{"hconcat": [{"layer": [{"mark": "bar", "encoding": {"color": {"type": "nominal", "field": "label", "legend": null, "scale": {"domain": ["Biallelic_Insertion", "Biallelic_Deletion", "Biallelic_SNP", "Biallelic_MNP", "Multiallelic_Insertion", "Multiallelic_Deletion", "Multiallelic_SNP", "Multiallelic_Complex", "RefCall"], "scheme": "set1"}}, "tooltip": {"type": "quantitative", "field": "value", "format": ".4s"}, "x": {"type": "nominal", "axis": {"labelAngle": -45}, "field": "label", "sort": ["Biallelic_Insertion", "Biallelic_Deletion", "Biallelic_SNP", "Biallelic_MNP", "Multiallelic_Insertion", "Multiallelic_Deletion", "Multiallelic_SNP", "Multiallelic_Complex", "RefCall"], "title": null}, "y": {"type": "quantitative", "axis": {"format": "s", "title": "Count"}, "field": "value"}}}, {"mark": {"type": "text", "dy": -5}, "encoding": {"color": {"type": "nominal", "field": "label", "legend": null, "scale": {"domain": ["Biallelic_Insertion", "Biallelic_Deletion", "Biallelic_SNP", "Biallelic_MNP", "Multiallelic_Insertion", "Multiallelic_Deletion", "Multiallelic_SNP", "Multiallelic_Complex", "RefCall"], "scheme": "set1"}}, "text": {"type": "quantitative", "field": "value", "format": ".4s"}, "tooltip": {"type": "quantitative", "field": "value", "format": ".4s"}, "x": {"type": "nominal", "axis": {"labelAngle": -45}, "field": "label", "sort": ["Biallelic_Insertion", "Biallelic_Deletion", "Biallelic_SNP", "Biallelic_MNP", "Multiallelic_Insertion", "Multiallelic_Deletion", "Multiallelic_SNP", "Multiallelic_Complex", "RefCall"], "title": null}, "y": {"type": "quantitative", "axis": {"format": "s", "title": "Count"}, "field": "value"}}}], "data": {"name": "data-937b261a5c3b87122763fad938b49170"}, "height": 200, "title": "Variant types", "width": 400}, {"data": {"name": "data-b2d3f7df703b341956fcd700e0a250a9"}, "mark": {"type": "bar", "color": "#4a1486"}, "encoding": {"x": {"type": "quantitative", "field": "s", "title": "Depth"}, "x2": {"field": "e"}, "y": {"type": "quantitative", "axis": {"format": "s"}, "field": "c", "stack": true, "title": "Count"}}, "height": 200, "selection": {"selector001": {"type": "interval", "bind": "scales", "encodings": ["x"]}}, "title": "Depth", "width": 200}, {"data": {"name": "data-1fdc69f1e562b859232e2c7c67fa0330"}, "mark": {"type": "bar", "color": "#0c2c84"}, "encoding": {"x": {"type": "quantitative", "field": "s", "scale": {"domain": [0, 150]}, "title": "QUAL"}, "x2": {"field": "e"}, "y": {"type": "quantitative", "axis": {"format": "s"}, "field": "c", "stack": true, "title": "Count"}}, "height": 200, "selection": {"selector002": {"type": "interval", "bind": "scales", "encodings": ["x"]}}, "title": "Quality score", "width": 200}, {"data": {"name": "data-2eb03df08bc1de8ab65cb5fa9b73d2ca"}, "mark": {"type": "bar", "color": "#0c2c84"}, "encoding": {"x": {"type": "quantitative", "field": "s", "scale": {"domain": [0, 150]}, "title": "GQ"}, "x2": {"field": "e"}, "y": {"type": "quantitative", "axis": {"format": "s"}, "field": "c", "stack": true, "title": "Count"}}, "height": 200, "selection": {"selector003": {"type": "interval", "bind": "scales", "encodings": ["x"]}}, "title": "Genotype quality", "width": 200}], "resolve": {"scale": {"color": "independent"}}}, {"hconcat": [{"data": {"name": "data-3d9c2f5348720'..b' 0.36, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.36, "e": 0.38, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.38, "e": 0.4, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.4, "e": 0.42, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.42, "e": 0.44, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.44, "e": 0.46, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.46, "e": 0.48, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.48, "e": 0.5, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.5, "e": 0.52, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.52, "e": 0.54, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.54, "e": 0.56, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.56, "e": 0.58, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.58, "e": 0.6, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.6, "e": 0.62, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.62, "e": 0.64, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.64, "e": 0.66, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.66, "e": 0.68, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.68, "e": 0.7, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.7, "e": 0.72, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.72, "e": 0.74, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.74, "e": 0.76, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.76, "e": 0.78, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.78, "e": 0.8, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.8, "e": 0.82, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.82, "e": 0.84, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.84, "e": 0.86, "c": 1, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.86, "e": 0.88, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.88, "e": 0.9, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.9, "e": 0.92, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.92, "e": 0.94, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.94, "e": 0.96, "c": 0, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.96, "e": 0.98, "c": 3, "GT": "Hom (x/x)", "g": "main", "l": 1.0}, {"s": 0.98, "e": 1.0, "c": 10, "GT": "Hom (x/x)", "g": "main", "l": 1.0}], "data-c27c47333ad35eb954c1fa8cd921aa87": [{"s": 0.0, "e": 0.5, "c": 0, "GT": "Uncalled (./.)", "g": "others", "l": null}, {"s": 0.5, "e": 1.0, "c": 0, "GT": "Uncalled (./.)", "g": "others", "l": null}, {"s": 0.0, "e": 0.5, "c": 0, "GT": "Het - two variants (x/y)", "g": "others", "l": null}, {"s": 0.5, "e": 1.0, "c": 0, "GT": "Het - two variants (x/y)", "g": "others", "l": null}], "data-da9c8bf99f8c1b3dd20c5e4c1ba410e4": [{"ref": "G", "alt": "A", "count": 4}, {"ref": "T", "alt": "C", "count": 3}, {"ref": "A", "alt": "G", "count": 2}, {"ref": "C", "alt": "T", "count": 3}, {"ref": "G", "alt": "T", "count": 1}], "data-6b58572b26c468e413c4d8f4a9e8509b": [{"label": "Transition", "value": 12}, {"label": "Transversion", "value": 1}], "data-e694159267ddba70ae3dd0d73276928e": [{"c": 1, "s": 0.5, "e": 1.5, "type": "Insertion"}]}};\n+      var embedOpt = {"downloadFileName": "output.visual_report", "mode": "vega-lite"};\n+\n+      function showError(el, error){\n+          el.innerHTML = (\'<div class="error" style="color:red;">\'\n+                          + \'<p>JavaScript Error: \' + error.message + \'</p>\'\n+                          + "<p>This usually means there\'s a typo in your chart specification. "\n+                          + "See the javascript console for the full traceback.</p>"\n+                          + \'</div>\');\n+          throw error;\n+      }\n+      const el = document.getElementById(\'vis\');\n+      vegaEmbed("#vis", spec, embedOpt)\n+        .catch(error => showError(el, error));\n+    })(vegaEmbed);\n+\n+  </script>\n+</body>\n+</html>\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 7608209110d3 test-data/test-cache/reference.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/reference.fasta Mon Sep 06 17:34:08 2021 +0000
b
b'@@ -0,0 +1,2 @@\n+>K03455\n+TGGAAGGGCTAATTCACTCCCAACGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTAGCAGAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGAGAAGTTAGAAGAAGCCAACAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACATGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATCCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCAGTGGCGCCCGAACAGGGACCTGAAAGCGAAAGGGAAACCAGAGGAGCTCTCTCGACGCAGGACTCGGCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGCGGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCA'..b'CAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAGTAAGTAGTACATGTAACGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTATAAGATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTAGCAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGCTGACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAGGGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATCCTGCATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCA\n'
b
diff -r 000000000000 -r 7608209110d3 test-data/test-cache/reference.fasta.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/reference.fasta.fai Mon Sep 06 17:34:08 2021 +0000
b
@@ -0,0 +1,1 @@
+K03455 9719 8 9719 9720
b
diff -r 000000000000 -r 7608209110d3 tool-data/fasta_indexes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample Mon Sep 06 17:34:08 2021 +0000
b
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id> <dbkey> <display_name> <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
b
diff -r 000000000000 -r 7608209110d3 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Sep 06 17:34:08 2021 +0000
b
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Location of SAMTools indexes for FASTA files -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 7608209110d3 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Mon Sep 06 17:34:08 2021 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of FASTA index ffiles for testing -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/fasta_indexes.loc" />
+    </table>
+</tables>