Repository 'manta'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/manta

Changeset 0:42ba283a0fe2 (2020-05-13)
Next changeset 1:c35d9902100e (2020-05-13)
Commit message:
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/manta commit e6c5d87dcd848fc4910af968e73adc481c811d15"
added:
README.rst
configManta.py.ini
customized.ini
manta.xml
manta_macros.xml
test-data/HCC1954_normal.bai
test-data/HCC1954_normal.bam
test-data/HCC1954_tumor.bai
test-data/HCC1954_tumor.bam
test-data/all_fasta.loc
test-data/cached_locally/all_fasta.loc
test-data/cached_locally/cached_region.fa
test-data/cached_locally/cached_region.fa.fai
test-data/candidateSV.vcf.gz
test-data/candidateSmallIndels.vcf.gz
test-data/hg19_region.fa
test-data/hg19_region.fa.fai
tool-data/all_fasta.loc.sample
tool_data_table_conf.xml.sample
b
diff -r 000000000000 -r 42ba283a0fe2 README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Wed May 13 15:15:07 2020 -0400
b
@@ -0,0 +1,10 @@
+# Wrapper of the variant caller 'MANTA', for use it as a Galaxy-based tool :
+
+Run the following commands in a terminal:
+
+planemo s
+
+Open in your browser:
+
+http://127.0.0.1:9090/
+
b
diff -r 000000000000 -r 42ba283a0fe2 configManta.py.ini
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/configManta.py.ini Wed May 13 15:15:07 2020 -0400
[
@@ -0,0 +1,58 @@
+
+#
+# This section contains all configuration settings for the top-level manta workflow,
+#
+[manta]
+
+referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa
+
+# Run discovery and candidate reporting for all SVs/indels at or above this size
+# Separate option (to provide different default) used for runs in RNA-mode
+minCandidateVariantSize = 8
+rnaMinCandidateVariantSize = 1000
+
+# Remove all edges from the graph unless they're supported by this many 'observations'.
+# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted.
+minEdgeObservations = 3
+
+# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge.
+# Set to 0 to turn this filtration off
+graphNodeMaxEdgeCount = 10
+
+# Run discovery and candidate reporting for all SVs/indels with at least this
+# many spanning support observations
+minCandidateSpanningCount = 3
+
+# After candidate identification, only score and report SVs/indels at or above this size:
+minScoredVariantSize = 50
+
+# minimum VCF "QUAL" score for a variant to be included in the diploid vcf:
+minDiploidVariantScore = 10
+
+# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf:
+minPassDiploidVariantScore = 20
+
+# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf:
+minPassDiploidGTScore = 15
+
+# somatic quality scores below this level are not included in the somatic vcf:
+minSomaticScore = 10
+
+# somatic quality scores below this level are filtered in the somatic vcf:
+minPassSomaticScore = 30
+
+# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote
+# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads
+# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime
+# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read
+# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling.
+# This feature can be enabled/disabled separately for germline and cancer calling below.
+#
+# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes
+# all other calling modes.
+enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1
+enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0
+
+# Set if an overlapping read pair will be considered as evidence
+# Set to 0 to skip overlapping read pairs
+useOverlapPairEvidence = 0
b
diff -r 000000000000 -r 42ba283a0fe2 customized.ini
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/customized.ini Wed May 13 15:15:07 2020 -0400
[
@@ -0,0 +1,58 @@
+
+#
+# This section contains all configuration settings for the top-level manta workflow,
+#
+[manta]
+
+referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa
+
+# Run discovery and candidate reporting for all SVs/indels at or above this size
+# Separate option (to provide different default) used for runs in RNA-mode
+minCandidateVariantSize = 8
+rnaMinCandidateVariantSize = 1000
+
+# Remove all edges from the graph unless they're supported by this many 'observations'.
+# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted.
+minEdgeObservations = 3
+
+# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge.
+# Set to 0 to turn this filtration off
+graphNodeMaxEdgeCount = 10
+
+# Run discovery and candidate reporting for all SVs/indels with at least this
+# many spanning support observations
+minCandidateSpanningCount = 3
+
+# After candidate identification, only score and report SVs/indels at or above this size:
+minScoredVariantSize = 50
+
+# minimum VCF "QUAL" score for a variant to be included in the diploid vcf:
+minDiploidVariantScore = 10
+
+# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf:
+minPassDiploidVariantScore = 20
+
+# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf:
+minPassDiploidGTScore = 15
+
+# somatic quality scores below this level are not included in the somatic vcf:
+minSomaticScore = 10
+
+# somatic quality scores below this level are filtered in the somatic vcf:
+minPassSomaticScore = 30
+
+# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote
+# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads
+# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime
+# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read
+# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling.
+# This feature can be enabled/disabled separately for germline and cancer calling below.
+#
+# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes
+# all other calling modes.
+enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1
+enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0
+
+# Set if an overlapping read pair will be considered as evidence
+# Set to 0 to skip overlapping read pairs
+useOverlapPairEvidence = 0
b
diff -r 000000000000 -r 42ba283a0fe2 manta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/manta.xml Wed May 13 15:15:07 2020 -0400
[
b'@@ -0,0 +1,324 @@\n+\xef\xbb\xbf<tool id="manta" name="Manta" version="@WRAPPER_VERSION@">\n+\n+    <description>Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads.</description>\n+\n+    <macros>\n+        <import>manta_macros.xml</import>\n+    </macros>\n+    <expand macro="requirements"/>\n+    <expand macro="stdio"/>\n+\n+    <command detect_errors="exit_code"><![CDATA[\n+    @VERSION@\n+    @pipefail@\n+    @set_reference_fasta_filename@\n+\n+    #import os\n+    #import random\n+    #set job_dir=os.getcwd()\n+    #set run_dir = job_dir + \'/MantaWorkflow_\' + (\' \' + str(random.randint(1,100000))).strip()\n+    #set config_file = $__tool_directory__ + \'/configManta.py.ini\'\n+    #set config_file_custom = $__tool_directory__ + \'/customized.ini\'   \n+    #set $input_normal = \'normal.bam\'\n+    #set $input_tumor = \'tumor.bam\'\n+\n+    #if str( $bam_input.bam_input_selector ) == "not_tumor_bam":\n+    ln -s \'$bam_input.normal_bam_file\' $input_normal &&\n+    ln -s \'$bam_input.normal_bam_file.metadata.bam_index\' normal.bai &&\n+    #else if str( $bam_input.bam_input_selector ) == "tumor_bam":\n+    ln -s \'$bam_input.normal_bam_file\' $input_normal &&\n+    ln -s \'$bam_input.normal_bam_file.metadata.bam_index\' normal.bai &&\n+    ln -s \'$bam_input.tumor_bam_file\' $input_tumor &&\n+    ln -s \'$bam_input.tumor_bam_file.metadata.bam_index\' tumor.bai &&\n+    #end if\n+\n+    cp ${config_file} ${config_file_custom} &&\n+\n+    #if str( $set_configuration.set_configuration_switch ) == "Customized":\n+    sed -i \'s/minCandidateVariantSize = 8/minCandidateVariantSize = $set_configuration.minCandidateVariantSize/\' ${config_file_custom} &&\n+    sed -i \'s/rnaMinCandidateVariantSize = 1000/rnaMinCandidateVariantSize = $set_configuration.rnaMinCandidateVariantSize/\' ${config_file_custom} &&\n+    sed -i \'s/minEdgeObservations = 3/minEdgeObservations = $set_configuration.minEdgeObservations/\' ${config_file_custom} &&\n+    sed -i \'s/graphNodeMaxEdgeCount = 10/graphNodeMaxEdgeCount = $set_configuration.graphNodeMaxEdgeCount/\' ${config_file_custom} &&\n+    sed -i \'s/minCandidateSpanningCount = 3/minCandidateSpanningCount = $set_configuration.minCandidateSpanningCount/\' ${config_file_custom} &&\n+    sed -i \'s/minScoredVariantSize = 50/minScoredVariantSize = $set_configuration.minScoredVariantSize/\' ${config_file_custom} &&\n+    sed -i \'s/minDiploidVariantScore = 10/minDiploidVariantScore = $set_configuration.minDiploidVariantScore/\' ${config_file_custom} &&\n+    sed -i \'s/minPassDiploidVariantScore = 20/minPassDiploidVariantScore = $set_configuration.minPassDiploidVariantScore/\' ${config_file_custom} &&\n+    sed -i \'s/minPassDiploidGTScore = 15/minPassDiploidGTScore = $set_configuration.minPassDiploidGTScore/\' ${config_file_custom} &&\n+    sed -i \'s/minSomaticScore = 10/minSomaticScore = $set_configuration.minSomaticScore/\' ${config_file_custom} &&\n+    sed -i \'s/minPassSomaticScore = 30/minPassSomaticScore = $set_configuration.minPassSomaticScore/\' ${config_file_custom} &&\n+    sed -i \'s/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInGermlineCallingModes/\' ${config_file_custom} &&\n+    sed -i \'s/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInCancerCallingModes/\' ${config_file_custom} &&\n+    sed -i \'s/useOverlapPairEvidence = 0/useOverlapPairEvidence = $set_configuration.useOverlapPairEvidence/\' ${config_file_custom} &&\n+    #end if\n+\n+    configManta.py\n+    --referenceFasta=\'${reference_fasta_filename}\'\n+\n+    #if str( $set_configuration.set_configuration_switch ) == "Custom_config_file":\n+    #set config_file = $set_configuration.CustomConfigFile\n+    #else if str( $set_configuration.set_configuration_switch ) == "Customized":\n+    #set config_file = config_file_custom\n+    #end i'..b'e inputs will be treated as each BAM\n+                        file representing a different sample. [optional] (no\n+                        default)\n+    --tumorBam=FILE, --tumourBam=FILE\n+                        Tumor sample BAM or CRAM file. Only up to one tumor\n+                        bam file accepted. [optional] (no default)\n+    --exome             Set options for WES input: turn off depth filters\n+    --rna               Set options for RNA-Seq input. Must specify exactly\n+                        one bam input file\n+    --unstrandedRNA     Set if RNA-Seq input is unstranded: Allows splice-\n+                        junctions on either strand\n+    --referenceFasta=FILE\n+                        samtools-indexed reference fasta file [required]\n+    --runDir=DIR        Name of directory to be created where all workflow\n+                        scripts and output will be written. Each analysis\n+                        requires a separate directory. (default:\n+                        MantaWorkflow)\n+    --callRegions=FILE  Optionally provide a bgzip-compressed/tabix-indexed\n+                        BED file containing the set of regions to call. No VCF\n+                        output will be provided outside of these regions. The\n+                        full genome will still be used to estimate statistics\n+                        from the input (such as expected fragment size\n+                        distribution). Only one BED file may be specified.\n+                        (default: call the entire genome)\n+**Extended options**\n+    These options are either unlikely to be reset after initial site\n+    configuration or only of interest for workflow development/debugging.\n+    They will not be printed here if a default exists unless --allHelp is\n+    specified\n+    --existingAlignStatsFile=FILE\n+                        Pre-calculated alignment statistics file. Skips\n+                        alignment stats calculation.\n+    --useExistingChromDepths\n+                        Use pre-calculated chromosome depths.\n+    --candidateBins=candidateBins\n+                        Provide the total number of tasks which candidate\n+                        generation  will be sub-divided into. (default: 256)\n+    --retainTempFiles   Keep all temporary files (for workflow debugging)\n+    --generateEvidenceBam\n+                        Generate a bam of supporting reads for all SVs\n+    --outputContig      Output assembled contig sequences in VCF file\n+    --scanSizeMb=INT    Maximum sequence region size (in megabases) scanned by\n+                        each task during SV Locus graph generation. (default:\n+                        12)\n+    --region=REGION     Limit the analysis to a region of the genome for\n+                        debugging purposes. If this argument is provided\n+                        multiple times all specified regions will be analyzed\n+                        together. All regions must be non-overlapping to get a\n+                        meaningful result. Examples: \'--region chr20\' (whole\n+                        chromosome), \'--region chr2:100-2000 --region\n+                        chr3:2500-3000\' (two regions)\'. If this option is\n+                        specified (one or more times) together with the\n+                        --callRegions BED file, then all region arguments will\n+                        be intersected with the callRegions BED track.\n+    --callMemMb=INT     Set default task memory requirement (in megabytes) for\n+                        common tasks. This may benefit an analysis of unusual\n+                        depth, chimera rate, etc.. \'Common\' tasks refers to\n+                        most compute intensive scatter-phase tasks of graph\n+                        creation and candidate generation.\n+\n+    For further info see: https://github.com/Illumina/manta\n+\n+    ]]></help>\n+\n+    <citations>\n+        <citation type="doi">10.1093/bioinformatics/btv710</citation>\n+    </citations>\n+\n+</tool>\n'
b
diff -r 000000000000 -r 42ba283a0fe2 manta_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/manta_macros.xml Wed May 13 15:15:07 2020 -0400
[
@@ -0,0 +1,96 @@
+<macros>
+
+    <token name="@VERSION@">1.6</token>
+    <token name="@WRAPPER_VERSION@">@VERSION@+galaxy2</token>
+    <token name="@pipefail@"><![CDATA[set -o | grep -q pipefail && set -o pipefail;]]></token>
+
+    <token name="@set_reference_fasta_filename@"><![CDATA[
+    #set $reference_fasta_filename = "localref.fa"
+
+    #if str( $reference_source.reference_source_selector ) == "history":
+    ln -s -f '${reference_source.ref_file}' '${reference_fasta_filename}' &&
+    samtools faidx '${reference_fasta_filename}' 2>&1 || echo "Error running samtools faidx for Manta" >&2 &&
+    #else:
+    #set $reference_fasta_filename = str( $reference_source.index.fields.path )
+    #end if
+    ]]></token>
+
+    <token name="@set_configuration_file@"><![CDATA[
+    #if str( $configuration.configuration_switch ) == "Custom_config_file":
+    #set $config_file = '$configuration.CustomConfigFile'
+    #else if str( $configuration.configuration_switch )== "Customized":
+    #set $config_file = '$configuration.Customized'
+    #else:
+    #set $config_file = $__tool_directory__ + '/configManta.py.ini'
+    #end if
+    ]]></token>
+
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.7">samtools</requirement>
+            <requirement type="package" version="@VERSION@">manta</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+            <regex match="Error:" />
+            <regex match="Exception:" />
+            <regex match="\[bns_restore_core\] Parse error reading" />
+        </stdio>
+    </xml>
+
+    <macro name="reference_source_conditional">
+        <conditional name="reference_source">
+            <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below">
+                <option value="cached">Use a built-in genome index</option>
+                <option value="history">Use a genome from history and build index</option>
+            </param>
+            <when value="cached">
+                <param name="index" type="select" label="Using reference genome" help="Select genome from the list">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No indexes are available" />
+                    </options>
+                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence"
+                    help="You can upload a FASTA sequence to the history and use it as reference" />
+            </when>
+        </conditional>
+    </macro>
+
+    <macro name="manta_configuration">
+        <conditional name="configuration">
+            <param name="configuration_switch" type="select" label="How do you want to configure manta?">
+                <option value="Custom_config_file">Upload a different config file</option>
+                <option value="Customized">Customize the options</option>
+            </param>
+            <when value="Custom_config_file">
+                <param format="ini" name="CustomConfigFile" type="data" label="config file"/>
+            </when>
+            <when value="Customized">
+                <param name="minCandidateVariantSize" type="integer" value="8" label="minCandidateVariantSize" help="Run discovery and candidate reporting for all SVs/indels at or above this size."/>
+                <param name="rnaMinCandidateVariantSize" type="integer" value="1000" label="rnaMinCandidateVariantSize" help="Separate option (to provide different default) used for runs in RNA-mode."/>
+                <param name="minEdgeObservations" type="integer" value="3" label="minEdgeObservations" help="Remove all edges from the graph unless they're supported by this many 'observations'."/>
+                <param name="graphNodeMaxEdgeCount" type="integer" value="10" label="graphNodeMaxEdgeCount" help="If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge."/>
+                <param name="minCandidateSpanningCount" type="integer" value="3" label="minCandidateSpanningCount" help="Run discovery and candidate reporting for all SVs/indels with at least this many spanning support observations."/>
+                <param name="minScoredVariantSize" type="integer" value="50" label="minScoredVariantSize" help="After candidate identification, only score and report SVs/indels at or above this size."/>
+                <param name="minDiploidVariantScore" type="integer" value="10" label="minDiploidVariantScore" help="Minimum VCF 'QUAL' score for a variant to be included in the diploid vcf."/>
+                <param name="minPassDiploidVariantScore" type="integer" value="20" label="minPassDiploidVariantScore" help="VCF 'QUAL' score below which a variant is marked as filtered in the diploid vcf."/>
+                <param name="minPassDiploidGTScore" type="integer" value="15" label="minPassDiploidGTScore" help="Minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf."/>
+                <param name="minSomaticScore" type="integer" value="10" label="minSomaticScore" help="Somatic quality scores below this level are not included in the somatic vcf."/>
+                <param name="minPassSomaticScore" type="integer" value="30" label="minPassSomaticScore" help="Somatic quality scores below this level are filtered in the somatic vcf."/>
+                <param name="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" type="integer" value="1" label="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" help="Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote locations with poor mapping quality. This feature can be enabled/disabled separately for germline and cancer calling below."/>
+                <param name="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" type="integer" value="0" label="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" help="Here 'CancerCallingModes' includes tumor-normal subtraction and tumor-only calling. 'GermlineCallingModes' includes all other calling modes."/>
+                <param name="useOverlapPairEvidence" type="integer" value="0" label="useOverlapPairEvidence" help="Set if an overlapping read pair will be considered as evidence. Set this value &lt;= 0 to skip overlapping read pairs."/>
+            </when>
+        </conditional>
+    </macro>
+
+</macros>
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/HCC1954_normal.bai
b
Binary file test-data/HCC1954_normal.bai has changed
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/HCC1954_normal.bam
b
Binary file test-data/HCC1954_normal.bam has changed
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/HCC1954_tumor.bai
b
Binary file test-data/HCC1954_tumor.bai has changed
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/HCC1954_tumor.bam
b
Binary file test-data/HCC1954_tumor.bam has changed
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/all_fasta.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc Wed May 13 15:15:07 2020 -0400
b
@@ -0,0 +1,16 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). 
+This file has the format (white space characters are TAB characters):
+#
+#<unique_build_id>      <dbkey>         <display_name>  <file_path>
+#
+#So, it could look something like this:
+#
+#hg19canon      hg19            Human (Homo sapiens): hg19 Canonical            /path/to/genome/hg19/hg19canon.fa
+#hg19full       hg19            Human (Homo sapiens): hg19 Full                 /path/to/genome/hg19/hg19full.fa
+#
+#Your .loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+hg19 hg19 Human hg19 ${__HERE__}/cached_locally/cached_region.fa
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/cached_locally/all_fasta.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/all_fasta.loc Wed May 13 15:15:07 2020 -0400
b
@@ -0,0 +1,1 @@
+hg19 hg19 Human hg19 ${__HERE__}/cached_region.fa
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/cached_locally/cached_region.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/cached_region.fa Wed May 13 15:15:07 2020 -0400
b
b'@@ -0,0 +1,13426 @@\n+>1 dna:chromosome chromosome:GRCh37:1:1:249250621:1\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNN'..b'AATTGGTT\n+GAAAGAGTTATTATTAGGCCGGGCACGGTGGCTCACGCCTGTCATCCCAGCACTTTGGGA\n+AGCCAAGGCGGGCGGATCACCTGAAGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG\n+AGACCCCCGTTGCTACCAAAAATACAAAAAATTAGCTGGGAATGGTGGCAAGTGCTTGTA\n+ATCCCAGCTGTGCTGGAGGCTGAGGCAGGCGAATAGCTTGAATCCAGGAGGCGGAGGCTG\n+CAGTAAGCTGAGATCATGACACTGCACACCAGTCTGCGCAACAGAGCGAGACCCTGTCTC\n+TGAAAAAAAAAAAAAGAAGAAAAAAAGAGTTATTAGTAGAAAGGAATATCTGCATTAAGA\n+TAAGAGGATGTGGAGACGAAGGTTTTTTGTTTTTGAACGGGAGTCTCACTCTGTCTCCCA\n+GGCTGGAGTGCAATGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCG\n+ATTCTCCTGCCTCAGCCTCCCAGTAGCTGGGACTACAGGCGCGCGCCAGCACGGCTAAAT\n+GATTTTTGTATTTTTACCAGTGATGGGGTTTTGCCATGTTGGCCAGGCCGGTTTCGAACT\n+CCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGTTGAAGTGCTCGAATTACA\n+GGCGTGAGCCAGCGGGCCCCGCCCAGACCTGCATTTTAACCTCCCCTCCACCCCGCGGCC\n+CCGGGACCCTGGGCATCCGGAGGCTCACAGCGGCCCTGCTGGGATGCTCCAGGCAGATCA\n+CTGCACAGCCCTGCAGGCAGAGGGGAGGCCGTGCAGGAGGAGGGGAGGCCGTGCAGGGGG\n+AAGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGGGGAAGGGGGGCCGTGCAGGGGG\n+AAGGGAGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGCAGAGGGGAAGACGTGCAGGGGG\n+AGGGGAGGCCGTGCAGGAGGAGGGGAGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGCGG\n+AGGGGGGTACGTGCAGGGGGCAGCGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGGG\n+GAAGGGGGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGCG\n+GAGGGGGGTACGTGCAGGGGGCAGCGGAGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGG\n+GGAGGGGAGGCCGTGCAGGGGGAAGGGGGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGG\n+GGAAGGGAGGCCGTGCAGGCGGAGGGGAAGACGTGCAGGGGGAGGGGAGGCCGTGCAGGG\n+GGAGGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGG\n+GGAAGGGAGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGCGGAGGGGGGTACGTGCAGGG\n+GGCAGCGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGGGGAAGGGGGGCCGTGCAGG\n+GGGAGGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGG\n+GGGAGGGGGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGG\n+GGAGGGGGGGCCGTGCAGGCGGAGGGGGGCCGTGCAGGGGGAGGGGGGGCCGTGCAGGCG\n+GAGCGGGGGGCGTGCAGGGGGAGGGGAAGCCGTCCTGGGCCTTTTCCAGCTGGCTGCAGA\n+GAAGGGGCCAGCTCCCTCCTGGGGACCCGGAGCCGCGGTACAGGTGTGGTTGCTTCTCTT\n+GGAGAAAGAGGCTGAGCTGACATCCCCCGGGCTGATAAAGAATGGGCTCCTCCTCCTGGG\n+CCCAGCAGGCTCCCGGGACCCTCCCTCCCTCCCTCCCTCCCTCCCTCCTTCCGGGCAGCA\n+GGGAAGATCTGAGTTCATGTAGCTGGTGTTGGCTTAGGGTCTGGGAGGAAGGCTTTTGGG\n+AAGATGTAAATAAGAACAAAATCTGCAGCCACCTGGGAAGCCTGGCCTCAGTGTGGAAGA\n+GAAGGCAGCAGGATTATTACAGAACCTTGTGAAGCCAACGCGGGCAGCCGCCAGGAGCTG\n+CAGACCGAGAGGATCTCGTCCTTTCTTGCGGCCCAGGGAGACCAGGCCTTTCATTCTGGG\n+CTCGAGACCAACAATTCGAATTCCGAACTCCCCCTGCGTGTGGGACTCAAGGTGGGTTTG\n+CAGTTTGCAGGCAGCTGAAGTTTGTCTCTTCTCCAGGAGGCCGGGGCTTCTTCCCTTCCT\n+CTCTGTCCCATTTCTTTTTTCTTGAGACAGAGTCTCACTTTGTCACCCAGGCTGGAGTGC\n+AATGGTGTGATTGTGGCTCACTACAGCCCCCGCCTCCCGGGTTCAAGCCTCAGCCTCCTG\n+AGTAGCTGGGATTACAGGCGTGCGCCACCACGGCCGGCTACTTTTTGTACTTTTAGTAGA\n+AATGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACCCCTGACCTCAGGTGATCCACC\n+CACCTCAGCCTCCCAAAGTGATGGGATGACAGGCGTGAGCCACCGTGCCCGGCCCCTCCA\n+GGTCTCATTTCTAAGAGGAGGCCTCAGGTCCACCAGGAAACATTCCTCAGATGTGAAACT\n+GTCAACAGGCTGATTTCTGGGCTCAAGATCAACAATTCTAATTGATTTGATTAAATCAAT\n+TAGATCTAATGATTTTAATCTAATCAGTTTTAATCTAAATGATTAAAAATCTTACATACA\n+TTGCCGGGCGTGGTGGCGGACGCCTGTAATCCCAGCTACTCTGGAGGCTGAGGCAGGAGA\n+ATTGCTTGAACCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATTGCATCATTGCACTCCAG\n+CCTGGGTAACAAGAGTGAAACCCTGTCTTTAAAAAAAACAAAACAAAACAAAAAAAAAAC\n+CGTACATACAGCTGGGCACCGTGGCTCACGCCAGTAATCCCAGCACTTTGGGAGGCCGAG\n+GCAGGCAGATCACCTGAGGTCAGGAGTTCAAGACTAGCCTGACCAAGATAGTGAAACCCC\n+GTCTCTACCAAAAATACAAAAATTAAGCAGGTGTGGTGGCGGGCGCCTGTAATCCCAGCT\n+ACTCTGGAGGCTGAGGCAGGAGAATTGCTTGAACCTGGGAGGCGGAGGTTGCAGTGAGCC\n+GCGATCGCGCCATTGCAGTCCAGCCTGGGCAACGAGAGGGAAACTGTGTCAAAAAAAAAA\n+AAAAAAGACCAACCAAAAAAGTTATATACACTTCAGAGGCAGAGAAAGAATTTACAAGTT\n+GTCTAAAATGTCCTTATGGAAAGGGTCACTTCCCTTATTTTCAACAGTATATTATATATA\n+TATACTTATATATGTATATATAGTGATGTATATATGTATATATGTTATGTATGTGTTATA\n+TATGTCTATATTATATATGTATATATGTTATACATGTATGTTATATATATATTATATATA\n+TATTATATATGTATATATTATATGTATAATATATATTATATGTATATATTATATATGTTA\n+TATATATGTTATGTATATAATATGTATATATGTATATATTCTGTTATGTGTGTATGTGTG\n+TGTGTGTGTGTGTGTGTAGGCGACACGGACACACGTGGAGTGGTTTTAAGGAGCGGAGAG\n+TTTAATAGGAAAGAAGGGAGGTCCGGGCAGTGGCTCACGCCTGTAATCCCAGCACCGCGG\n+AGGTTGCGGTGAGCCGAGATCGCGCCATTTCACTGCAGCCTGGGCAACAAGAGCGAAACT\n+GCGTCTCAAAAAAAAAAAAACCAAGGCGAGAAGGCAGAAAGAAGTGGCTCCCCTGGACTG\n+AGACAGAGGGACGGGGGCTC\n'
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/cached_locally/cached_region.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/cached_region.fa.fai Wed May 13 15:15:07 2020 -0400
b
@@ -0,0 +1,6 @@
+1 200000 52 60 61
+2 200000 203438 60 61
+3 200000 406824 60 61
+8 1282 610161 60 61
+11 3696 611469 60 61
+X 200000 615279 60 61
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/candidateSV.vcf.gz
b
Binary file test-data/candidateSV.vcf.gz has changed
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/candidateSmallIndels.vcf.gz
b
Binary file test-data/candidateSmallIndels.vcf.gz has changed
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/hg19_region.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_region.fa Wed May 13 15:15:07 2020 -0400
b
b'@@ -0,0 +1,13426 @@\n+>1 dna:chromosome chromosome:GRCh37:1:1:249250621:1\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNN'..b'AATTGGTT\n+GAAAGAGTTATTATTAGGCCGGGCACGGTGGCTCACGCCTGTCATCCCAGCACTTTGGGA\n+AGCCAAGGCGGGCGGATCACCTGAAGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG\n+AGACCCCCGTTGCTACCAAAAATACAAAAAATTAGCTGGGAATGGTGGCAAGTGCTTGTA\n+ATCCCAGCTGTGCTGGAGGCTGAGGCAGGCGAATAGCTTGAATCCAGGAGGCGGAGGCTG\n+CAGTAAGCTGAGATCATGACACTGCACACCAGTCTGCGCAACAGAGCGAGACCCTGTCTC\n+TGAAAAAAAAAAAAAGAAGAAAAAAAGAGTTATTAGTAGAAAGGAATATCTGCATTAAGA\n+TAAGAGGATGTGGAGACGAAGGTTTTTTGTTTTTGAACGGGAGTCTCACTCTGTCTCCCA\n+GGCTGGAGTGCAATGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCG\n+ATTCTCCTGCCTCAGCCTCCCAGTAGCTGGGACTACAGGCGCGCGCCAGCACGGCTAAAT\n+GATTTTTGTATTTTTACCAGTGATGGGGTTTTGCCATGTTGGCCAGGCCGGTTTCGAACT\n+CCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGTTGAAGTGCTCGAATTACA\n+GGCGTGAGCCAGCGGGCCCCGCCCAGACCTGCATTTTAACCTCCCCTCCACCCCGCGGCC\n+CCGGGACCCTGGGCATCCGGAGGCTCACAGCGGCCCTGCTGGGATGCTCCAGGCAGATCA\n+CTGCACAGCCCTGCAGGCAGAGGGGAGGCCGTGCAGGAGGAGGGGAGGCCGTGCAGGGGG\n+AAGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGGGGAAGGGGGGCCGTGCAGGGGG\n+AAGGGAGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGCAGAGGGGAAGACGTGCAGGGGG\n+AGGGGAGGCCGTGCAGGAGGAGGGGAGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGCGG\n+AGGGGGGTACGTGCAGGGGGCAGCGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGGG\n+GAAGGGGGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGCG\n+GAGGGGGGTACGTGCAGGGGGCAGCGGAGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGG\n+GGAGGGGAGGCCGTGCAGGGGGAAGGGGGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGG\n+GGAAGGGAGGCCGTGCAGGCGGAGGGGAAGACGTGCAGGGGGAGGGGAGGCCGTGCAGGG\n+GGAGGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGG\n+GGAAGGGAGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGCGGAGGGGGGTACGTGCAGGG\n+GGCAGCGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGGGGAAGGGGGGCCGTGCAGG\n+GGGAGGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGG\n+GGGAGGGGGGCCGTGCAGGGGGAAGGGAGGCCGTGCAGGGGGAGGGGAGGCCGTGCAGGG\n+GGAGGGGGGGCCGTGCAGGCGGAGGGGGGCCGTGCAGGGGGAGGGGGGGCCGTGCAGGCG\n+GAGCGGGGGGCGTGCAGGGGGAGGGGAAGCCGTCCTGGGCCTTTTCCAGCTGGCTGCAGA\n+GAAGGGGCCAGCTCCCTCCTGGGGACCCGGAGCCGCGGTACAGGTGTGGTTGCTTCTCTT\n+GGAGAAAGAGGCTGAGCTGACATCCCCCGGGCTGATAAAGAATGGGCTCCTCCTCCTGGG\n+CCCAGCAGGCTCCCGGGACCCTCCCTCCCTCCCTCCCTCCCTCCCTCCTTCCGGGCAGCA\n+GGGAAGATCTGAGTTCATGTAGCTGGTGTTGGCTTAGGGTCTGGGAGGAAGGCTTTTGGG\n+AAGATGTAAATAAGAACAAAATCTGCAGCCACCTGGGAAGCCTGGCCTCAGTGTGGAAGA\n+GAAGGCAGCAGGATTATTACAGAACCTTGTGAAGCCAACGCGGGCAGCCGCCAGGAGCTG\n+CAGACCGAGAGGATCTCGTCCTTTCTTGCGGCCCAGGGAGACCAGGCCTTTCATTCTGGG\n+CTCGAGACCAACAATTCGAATTCCGAACTCCCCCTGCGTGTGGGACTCAAGGTGGGTTTG\n+CAGTTTGCAGGCAGCTGAAGTTTGTCTCTTCTCCAGGAGGCCGGGGCTTCTTCCCTTCCT\n+CTCTGTCCCATTTCTTTTTTCTTGAGACAGAGTCTCACTTTGTCACCCAGGCTGGAGTGC\n+AATGGTGTGATTGTGGCTCACTACAGCCCCCGCCTCCCGGGTTCAAGCCTCAGCCTCCTG\n+AGTAGCTGGGATTACAGGCGTGCGCCACCACGGCCGGCTACTTTTTGTACTTTTAGTAGA\n+AATGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACCCCTGACCTCAGGTGATCCACC\n+CACCTCAGCCTCCCAAAGTGATGGGATGACAGGCGTGAGCCACCGTGCCCGGCCCCTCCA\n+GGTCTCATTTCTAAGAGGAGGCCTCAGGTCCACCAGGAAACATTCCTCAGATGTGAAACT\n+GTCAACAGGCTGATTTCTGGGCTCAAGATCAACAATTCTAATTGATTTGATTAAATCAAT\n+TAGATCTAATGATTTTAATCTAATCAGTTTTAATCTAAATGATTAAAAATCTTACATACA\n+TTGCCGGGCGTGGTGGCGGACGCCTGTAATCCCAGCTACTCTGGAGGCTGAGGCAGGAGA\n+ATTGCTTGAACCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATTGCATCATTGCACTCCAG\n+CCTGGGTAACAAGAGTGAAACCCTGTCTTTAAAAAAAACAAAACAAAACAAAAAAAAAAC\n+CGTACATACAGCTGGGCACCGTGGCTCACGCCAGTAATCCCAGCACTTTGGGAGGCCGAG\n+GCAGGCAGATCACCTGAGGTCAGGAGTTCAAGACTAGCCTGACCAAGATAGTGAAACCCC\n+GTCTCTACCAAAAATACAAAAATTAAGCAGGTGTGGTGGCGGGCGCCTGTAATCCCAGCT\n+ACTCTGGAGGCTGAGGCAGGAGAATTGCTTGAACCTGGGAGGCGGAGGTTGCAGTGAGCC\n+GCGATCGCGCCATTGCAGTCCAGCCTGGGCAACGAGAGGGAAACTGTGTCAAAAAAAAAA\n+AAAAAAGACCAACCAAAAAAGTTATATACACTTCAGAGGCAGAGAAAGAATTTACAAGTT\n+GTCTAAAATGTCCTTATGGAAAGGGTCACTTCCCTTATTTTCAACAGTATATTATATATA\n+TATACTTATATATGTATATATAGTGATGTATATATGTATATATGTTATGTATGTGTTATA\n+TATGTCTATATTATATATGTATATATGTTATACATGTATGTTATATATATATTATATATA\n+TATTATATATGTATATATTATATGTATAATATATATTATATGTATATATTATATATGTTA\n+TATATATGTTATGTATATAATATGTATATATGTATATATTCTGTTATGTGTGTATGTGTG\n+TGTGTGTGTGTGTGTGTAGGCGACACGGACACACGTGGAGTGGTTTTAAGGAGCGGAGAG\n+TTTAATAGGAAAGAAGGGAGGTCCGGGCAGTGGCTCACGCCTGTAATCCCAGCACCGCGG\n+AGGTTGCGGTGAGCCGAGATCGCGCCATTTCACTGCAGCCTGGGCAACAAGAGCGAAACT\n+GCGTCTCAAAAAAAAAAAAACCAAGGCGAGAAGGCAGAAAGAAGTGGCTCCCCTGGACTG\n+AGACAGAGGGACGGGGGCTC\n'
b
diff -r 000000000000 -r 42ba283a0fe2 test-data/hg19_region.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_region.fa.fai Wed May 13 15:15:07 2020 -0400
b
@@ -0,0 +1,6 @@
+1 200000 52 60 61
+2 200000 203438 60 61
+3 200000 406824 60 61
+8 1282 610161 60 61
+11 3696 611469 60 61
+X 200000 615279 60 61
b
diff -r 000000000000 -r 42ba283a0fe2 tool-data/all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample Wed May 13 15:15:07 2020 -0400
b
@@ -0,0 +1,15 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). 
+This file has the format (white space characters are TAB characters):
+#
+#<unique_build_id>      <dbkey>         <display_name>  <file_path>
+#
+#So, it could look something like this:
+#
+#hg19canon      hg19            Human (Homo sapiens): hg19 Canonical            /path/to/genome/hg19/hg19canon.fa
+#hg19full       hg19            Human (Homo sapiens): hg19 Full                 /path/to/genome/hg19/hg19full.fa
+#
+#Your .loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
b
diff -r 000000000000 -r 42ba283a0fe2 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed May 13 15:15:07 2020 -0400
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+</tables>