Repository 'strelka_germline'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/strelka_germline

Changeset 0:1fbe84e8a740 (2021-01-27)
Next changeset 1:19481653a22f (2021-03-02)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/strelka commit 2e445e7c519b2b77498cb74c03ca6ed12b22423a"
added:
macros.xml
strelka_germline.xml
test-data/genome_test1.vcf
test-data/hg98.fa
test-data/hg98.fa.fai
test-data/indels_test2.vcf.gz
test-data/sample1.bam
test-data/sample1.cram
test-data/sample2.bam
test-data/sample2.cram
test-data/sample3.bam
test-data/snvs_test2.vcf.gz
test-data/test_fasta_indexes.loc
test-data/variants_test2.vcf
tool-data/fasta_indexes.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 1fbe84e8a740 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Jan 27 14:48:23 2021 +0000
[
b'@@ -0,0 +1,218 @@\n+<?xml version="1.0"?>\n+<macros>\n+    <token name="@TOOL_VERSION@">2.9.10</token>\n+    <token name="@GALAXY_VERSION@">galaxy0</token>\n+    <token name="@DESCRIPTION@">small variant caller</token>\n+    <xml name="requirements">\n+        <requirements>\n+            <requirement type="package" version="@TOOL_VERSION@">strelka</requirement>\n+            <requirement type="package" version="1.9">samtools</requirement>\n+        </requirements>\n+    </xml>\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">10.1038/s41592-018-0051-x</citation>\n+        </citations>\n+    </xml>\n+\n+    <!-- \n+        command\n+    -->\n+\n+    <token name="@INIT@"><![CDATA[\n+        ##ln -s \'$referenceFasta\' \'./input_ref.fasta\' &&\n+        ##samtools faidx \'./input_ref.fasta\' &&\n+\n+        ## Make all optional regions files available\n+        ## Note: all of these must be tabixed\n+        #set $reg_options = []\n+        #for $i, $sites in enumerate($forced_regions):\n+            #set $target_file = \'input_forcedgt_%d.vcf.gz\' % $i\n+            #if $sites.whitelist.ext == \'vcf\':\n+                bgzip -c \'${sites.whitelist}\' > $target_file &&\n+                tabix -p vcf $target_file &&\n+            #else:\n+                ln -s \'${sites.whitelist}\' $target_file &&\n+                ln -s \'${sites.whitelist.metadata.tabix_index}\' ${target_file}.tbi\' &&\n+            #end if\n+            #if str($sites.use_whitelist_as) == \'indel_candidates\':\n+                #silent $reg_options.extend([\'--indelCandidates\', $target_file])\n+            #else:\n+                #silent $reg_options.extend([\'--forcedGT\', $target_file])\n+            #end if\n+        #end for\n+        #if str($regions.restrict_to_region) == \'regions_from_file\':\n+            #silent $reg_options.append(\'--callRegions\')\n+            #set $target_file = \'input_callregions.bed.gz\'\n+            #if $regions.callRegions.ext == \'bed\':\n+                bgzip -c \'$regions.callRegions\' $target_file &&\n+                tabix -p bed $target_file &&\n+            else:\n+                ln -s \'$regions.callRegions\' $target_file &&\n+                ln -s \'$regions.callRegions.tabix_index\' ${target_file}.tbi &&\n+            #end if\n+            #silent $reg_options.append($target_file)\n+        #end if\n+        #set $region_spec = \' \'.join($reg_options)\n+        #if str($ref_cond.ref_sel) == \'history\':\n+            #set $reference_fasta_fn = \'input_ref.fasta\'\n+            ln -s \'$ref_cond.ref\' $reference_fasta_fn &&\n+            samtools faidx $reference_fasta_fn &&\n+        #else\n+            #set $reference_fasta_fn = str($ref_cond.ref.fields.path)\n+        #end if\n+    ]]></token>\n+    <token name="@CREATE@"><![CDATA[\n+        --config=\'$config_file\'\n+        $optimization\n+        #if str($expert_settings.evs.selector) == "disableEVS"\n+            --disableEVS\n+        #else\n+            #if $expert_settings.evs.snvScoringModelFile\n+                --snvScoringModelFile \'$expert_settings.evs.snvScoringModelFile\'\n+            #end if\n+            #if $expert_settings.evs.indelScoringModelFile\n+                --indelScoringModelFile \'$expert_settings.evs.indelScoringModelFile\'\n+            #end if\n+            $expert_settings.evs.reportEVSFeatures\n+        #end if\n+        $region_spec\n+        --referenceFasta \'${reference_fasta_fn}\'\n+        --runDir results &&\n+    ]]></token>\n+    <token name="@RUN@"><![CDATA[\n+        results/runWorkflow.py\n+            -m local\n+            -j \\${GALAXY_SLOTS:-2}\n+            -g \\${GALAXY_MEMORY_MB:-8192}\n+    ]]></token>\n+\n+    <!-- \n+        configfile - parser cannot handle indents\n+    -->\n+\n+    <token name="@CONFIG@"><![CDATA[\n+maxIndelSize = $strelka.maxIndelSize\n+isWriteRealignedBam = 0 ## not inplemented\n+extraVariantCallerArguments = ## not implemented\n+    ]]></token>\n+\n+    <!--\n+        input \n+    -->\n+    \n+    <xml name="input_required" token_ref="normalBam">\n+        <conditional name="ref_cond">\n+       '..b'e default model" />\n+                    <param argument="--indelScoringModelFile" type="data" format="json" optional="true"\n+                    label="Optional indel scoring model to overwrite default model" />\n+                    <param argument="--reportEVSFeatures" type="boolean" truevalue="--reportEVSFeatures" falsevalue=""\n+                    label="Report all empirical variant scoring features in VCF output"\n+                    help="WARNING: Do not use this feature with Strelka Germline and more than one input sample or the tool run will fail!" />\n+                </when>\n+            </conditional>\n+        </section>\n+    </xml>\n+    <xml name="input_output">\n+        <param name="vcf_type" type="boolean" truevalue="compressed" falsevalue="decompressed"\n+        label="Generate compressed variants output (vcf.gz)"\n+        help="Default is uncompressed vcf" />\n+    </xml>\n+    <xml name="input_strelka">\n+        <param argument="maxIndelSize" name="maxIndelSize" type="integer" value="49" label="Set maximum reported indel size" help=""/>\n+    </xml>\n+\n+    <!--\n+        Help\n+    -->\n+\n+    <token name="@HELP_INPUT@">\n+*Sequencing Data*\n+\n+The input sequencing reads are expected to come from a paired-end sequencing assay. Any input other than paired-end reads are ignored by default except to double-check for putative somatic variant evidence in the normal sample during somatic variant analysis. Read lengths above ~400 bases are not tested.\n+\n+*Alignment Files*\n+\n+All input sequencing reads should be mapped by an external tool and provided as input in `BAM &lt;https://samtools.github.io/hts-specs/SAMv1.pdf&gt;`_. or `CRAM &lt;https://samtools.github.io/hts-specs/CRAMv3.pdf&gt;`_ format.\n+\n+The following limitations apply to the input BAM/CRAM alignment records:\n+\n+- Alignments cannot contain the "=" character in the SEQ field.\n+- RG (read group) tags are ignored -- each alignment file must represent one sample.\n+- Alignments with basecall quality values greater than 70 will trigger a runtime error (these are not supported on the assumption that the high basecall quality indicates an offset error)\n+\n+*VCF Files*\n+\n+Input `VCF &lt;http://samtools.github.io/hts-specs/VCFv4.1.pdf&gt;`_ files are accepted for a number of roles as described below. All input VCF records are checked for compatibility with the given reference genome, in additional to role-specific checks described below. If any VCF record\'s REF field is not compatible with the reference genome a runtime error will be triggered. \'Compatible with the reference genome\' means that each VCF record\'s REF base either (1) matches the corresponding reference genome base or the VCF record\'s REF base is \'N\' or the reference genome base is any ambiguous IUPAC base code (all ambiguous base codes are converted to \'N\' while importing the reference).\n+    </token>\n+    <token name="@HELP_STRELKA@">\n+Strelka2 is a fast and accurate small variant caller optimized for analysis of germline variation in small cohorts (Strelka Germline) and somatic variation in tumor/normal sample pairs (Strelka Somatic).\n+\n+Strelka accepts input read mappings from BAM or CRAM files, and optionally candidate and/or forced-call alleles from VCF. It reports all small variant predictions in VCF 4.1 format. Germline variant reporting uses the gVCF conventions to represent both variant and reference call confidence. For best somatic indel performance, Strelka is designed to be run with the Manta structural variant and indel caller, which provides additional indel candidates up to a given maxiumum indel size (by default this is 49). By design, Manta and Strelka run together with default settings provide complete coverage over all indel sizes (in additional to all SVs and SNVs) for clinical somatic and germline analysis scenarios.\n+    </token>\n+    <token name="@HELP_REFERENCES@"><![CDATA[\n+More information are available on `github <https://github.com/Illumina/strelka>`_.\n+    ]]></token>\n+</macros>\n'
b
diff -r 000000000000 -r 1fbe84e8a740 strelka_germline.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka_germline.xml Wed Jan 27 14:48:23 2021 +0000
[
b'@@ -0,0 +1,250 @@\n+<?xml version="1.0"?>\n+<tool id="strelka_germline" name="Strelka Germline" version="@TOOL_VERSION@+@GALAXY_VERSION@">\n+    <description>@DESCRIPTION@ for germline variation in small cohorts</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="requirements"/>\n+    <command detect_errors="exit_code"><![CDATA[\n+    ## sanity check\n+    #if len($bam) > 1 and str($expert_settings.evs.selector) == "enableEVS" and $expert_settings.evs.reportEVSFeatures\n+        echo "Reporting of EVS features can only be used with a single input sample" 1>&2; exit 1\n+    #else\n+        ## initialize\n+        #set $bam_inputs = []\n+        #for $i, $s in enumerate($bam):\n+            #set $target_file = \'input_sample_%d.%s\' % ($i, $s.ext)\n+            ln -s \'$s\' $target_file &&\n+            #if $s.is_of_type(\'bam\')\n+                ln -s \'$s.metadata.bam_index\' ${target_file}.bai &&\n+            #elif $s.is_of_type(\'cram\')\n+                ln -s \'$s.metadata.cram_index\' ${target_file}.crai &&\n+            #end if\n+            #silent $bam_inputs.extend([\'--bam\', $target_file])\n+        #end for\n+        #set $bam_spec = \' \'.join($bam_inputs)\n+        ## Strelka requires both the --ploidy vcf and the --noCompress bed\n+        ## to be bgzipped and tabixed.\n+        ## Same for the --callRegions bed, but that\'s handled inside\n+        ## the shared INIT code.\n+        #if $pl.ploidy\n+            #if $pl.ploidy.ext == \'vcf\'\n+                ln -s \'$pl.ploidy\' input_ploidy.vcf &&\n+                bgzip -c input_ploidy.vcf > input_ploidy.vcf.gz &&\n+                tabix -p vcf input_ploidy.vcf.gz &&\n+            #else\n+                ## File is bgzipped and tabixed already\n+                ## -> just symlink data and index\n+                ln -s \'$pl.ploidy\' input_ploidy.vcf.gz &&\n+                ln -s \'$pl.ploidy.metadata.tabix_index\' input_ploidy.vcf.gz.tbi &&\n+            #end if\n+        #end if\n+        #if $oo.gvcf.emit_gvcfs == \'yes\' and $oo.gvcf.noCompress\n+            ln -s \'$oo.gvcf.noCompress\' input_nocompress.bed &&\n+            bgzip -c input_nocompress.bed > input_nocompress.bed.gz &&\n+            tabix -p bed input_nocompress.bed.gz &&\n+        #end if\n+        @INIT@\n+\n+        ## create workflow\n+        configureStrelkaGermlineWorkflow.py\n+            $bam_spec\n+            #if str($pl.callContinuousVf)\n+                --callContinuousVf \'$pl.callContinuousVf\'\n+            #end if\n+            #if $pl.ploidy\n+                --ploidy input_ploidy.vcf.gz\n+            #end if\n+            #if $oo.gvcf.emit_gvcfs == \'yes\' and $oo.gvcf.noCompress\n+                --noCompress input_nocompress.bed.gz\n+            #end if\n+            $expert_settings.s_e_e\n+            @CREATE@\n+\n+        ## run workflow\n+        @RUN@\n+\n+        ## decompress results\n+        #if $oo.vcf_type == "decompressed"\n+            ## we decompress just the main variants file\n+            ## per-sample gvcf files are always emitted as a collection of\n+            ## compressed files.\n+            && bgzip -d results/results/variants/variants.vcf.gz\n+            && mv results/results/variants/variants.vcf results/results/variants/variants_out\n+        #else\n+            && mv results/results/variants/variants.vcf.gz results/results/variants/variants_out\n+        #end if\n+    #end if\n+    ]]></command>\n+    \n+    <configfiles>\n+        <configfile name="config_file">\n+## parser cannot handle indents\n+[StrelkaGermline]\n+minMapq = $strelka.minMapq\n+@CONFIG@\n+        </configfile>\n+    </configfiles>\n+\n+    <inputs>\n+        <param argument="--bam" type="data" format="bam,cram" multiple="true" label="Select sample file(s)" help=""/>\n+        <expand macro="input_required" ref="bam"/>\n+        <expand macro="calling_model">\n+            <option value="--rna">RNA sequencing data (--rna)</option>\n+        </expand>\n+        <expand macro="calling_model_expert">\n+            <param name="s_e_e" type="select"'..b'ts>\n+            </output>\n+            <output_collection name="out_genome" type="list" count="3">\n+                <element name="S1" ftype="vcf_bgzip" file="genome_test1.vcf" decompress="true" compare="diff" lines_diff="8" />\n+            </output_collection>\n+        </test>\n+        <!-- #2; input cram, compressed -->\n+        <test expect_num_outputs="1">\n+            <param name="bam" value="sample1.cram,sample2.cram"/>\n+            <conditional name="ref_cond">\n+                <param name="ref_sel" value="history"/>\n+                <param name="ref" value="hg98.fa" ftype="fasta"/>\n+            </conditional>\n+            <section name="expert_settings">\n+                <param name="s_e_e" value="--disableSequenceErrorEstimation" />\n+            </section>\n+            <section name="oo">\n+                <param name="vcf_type" value="compressed"/>\n+            </section>\n+            <output name="out_variants" ftype="vcf_bgzip" file="variants_test2.vcf" decompress="true" compare="diff" lines_diff="8" />\n+        </test>\n+        <!-- #3; input bam, no defaults -->\n+        <test expect_num_outputs="2">\n+            <param name="bam" value="sample1.cram,sample2.cram"/>\n+            <conditional name="ref_cond">\n+                <param name="ref_sel" value="history"/>\n+                <param name="ref" value="hg98.fa" ftype="fasta"/>\n+            </conditional>\n+            <param name="optimization" value="--rna" />\n+            <section name="oo">\n+                <param name="vcf_type" value="decompressed"/>\n+                <conditional name="gvcf">\n+                    <param name="emit_gvcfs" value="yes" />\n+                </conditional>\n+            </section>\n+            <section name="pl">\n+                <param name="callContinuousVf" value="Chr1"/>\n+            </section>\n+            <section name="strelka">\n+                <param name="minMapq" value="21"/>\n+                <param name="maxIndelSize" value="51"/>\n+            </section>\n+            <output name="out_variants" ftype="vcf">\n+                <assert_contents>\n+                    <has_n_lines n="81"/>\n+                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>\n+                    <has_line_matching expression="demo20&#009;3664&#009;.+"/>\n+                </assert_contents>\n+            </output>\n+            <output_collection name="out_genome" type="list" count="2">\n+                <element name="S1" ftype="vcf_bgzip">\n+                    <assert_contents>\n+                        <has_n_lines n="219"/>\n+                        <has_line_matching expression="demo20&#009;4101&#009;.+"/>\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+.. class:: infomark\n+\n+**What it does**\n+\n+@HELP_STRELKA@\n+\n+The germline caller employs an efficient tiered haplotype model to improve accuracy and provide read-backed phasing, adaptively selecting between assembly and a faster alignment-based haplotyping approach at each variant locus. The germline caller also analyzes input sequencing data using a mixture-model indel error estimation method to improve robustness to indel noise.\n+\n+**Input**\n+\n+@HELP_INPUT@\n+\n+**Output**\n+\n+*Variants*\n+\n+This describes all potential variant loci across all samples. Note this file includes non-variant loci if they have a non-trivial level of variant evidence or contain one or more alleles for which genotyping has been forced. Please see the multi-sample variants VCF section below for additional details on interpreting this file.\n+\n+*Genome*\n+\n+This is the genome VCF output for sample N, which includes both variant records and compressed non-variant blocks. The sample index, N is 1-indexed and corresponds to the input order of alignment files on the configuration command-line.\n+\n+.. class:: infomark\n+\n+**References**\n+\n+@HELP_REFERENCES@\n+    ]]></help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/genome_test1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_test1.vcf Wed Jan 27 14:48:23 2021 +0000
[
b'@@ -0,0 +1,95 @@\n+##fileformat=VCFv4.1\n+##fileDate=.\n+##source=strelka\n+##source_version=2.9.10\n+##startTime=.\n+##cmdline=./configureStrelkaGermlineWorkflow.py --bam input_sample_0.bam --bam input_sample_1.bam --bam input_sample_2.bam --disableSequenceErrorEstimation --config=/tmp/tmpmywmzdlj/job_working_directory/000/7/configs/tmpzw3187cr --referenceFasta input_ref.fasta --runDir results\n+##reference=file:///tmp/tmpmywmzdlj/job_working_directory/000/7/working/input_ref.fasta\n+##contig=<ID=demo20,length=5000>\n+##content=strelka germline small-variant calls\n+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">\n+##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant multi-site block. Non-variant blocks are defined independently for each sample. All sites in such a block are constrained to be non-variant, have the same filter value, and have sample values {GQX,DP,DPF} in range [x,y], y <= max(x+3,(x*1.3)).">\n+##INFO=<ID=SNVHPOL,Number=1,Type=Integer,Description="SNV contextual homopolymer length">\n+##INFO=<ID=CIGAR,Number=A,Type=String,Description="CIGAR alignment for each alternate indel allele">\n+##INFO=<ID=RU,Number=A,Type=String,Description="Smallest repeating sequence unit extended or contracted in the indel allele relative to the reference. RUs are not reported if longer than 20 bases">\n+##INFO=<ID=REFREP,Number=A,Type=Integer,Description="Number of times RU is repeated in reference">\n+##INFO=<ID=IDREP,Number=A,Type=Integer,Description="Number of times RU is repeated in indel allele">\n+##INFO=<ID=MQ,Number=1,Type=Integer,Description="RMS of mapping quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Empirically calibrated genotype quality score for variant sites, otherwise minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">\n+##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">\n+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum filtered basecall depth used for site genotyping within a non-variant multi-site block">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.51 or higher that read contains indicated allele vs all other intersecting indel alleles)">\n+##FORMAT=<ID=ADF,Number=.,Type=Integer,Description="Allelic depths on the forward strand">\n+##FORMAT=<ID=ADR,Number=.,Type=Integer,Description="Allelic depths on the reverse strand">\n+##FORMAT=<ID=FT,Number=1,Type=String,Description="Sample filter, \'PASS\' indicates that all filters have passed for this sample">\n+##FORMAT=<ID=DPI,Number=1,Type=Integer,Description="Read depth associated with indel, taken from the site preceding the indel">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase set identifier">\n+##FORMAT=<ID=SB,Number=1,Type=Float,Description="Sample site strand bias">\n+##FILTER=<ID=IndelConflict,Description="Indel genotypes from two or more loci conflict in at least one sample">\n+##FILTER=<ID=SiteConflict,Description="Site is filtered due to an overlapping indel call filter">\n+##FILTER=<ID=LowGQX,Description="Locus GQX is below threshold or not present">\n+##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a s'..b'670\t.\tC\t.\t.\tPASS\tEND=1705;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:45:19:0:16\n+demo20\t1706\t.\tC\tT\t608\tPASS\tSNVHPOL=2;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t1/1:54:30:19:0:0,19:0,8:0,11:-35.5:PASS:342,57,0\n+demo20\t1707\t.\tG\t.\t.\tPASS\tEND=1743;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:54:21:0:19\n+demo20\t1744\t.\tC\tT\t312\tPASS\tSNVHPOL=3;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:159:30:21:0:9,12:5,6:4,6:-20.7:PASS:191,0,156\n+demo20\t1745\t.\tG\t.\t.\tPASS\tEND=1845;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:40:21:1:16\n+demo20\t1846\t.\tC\tT\t165\tPASS\tSNVHPOL=3;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:116:30:24:1:16,8:13,5:3,3:-12.4:PASS:117,0,224\n+demo20\t1847\t.\tG\t.\t.\tPASS\tEND=1872;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:60:23:1:21\n+demo20\t1873\t.\tC\tT\t122\tPASS\tSNVHPOL=3;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/0:60:60:21:0:21,0:15,0:6,0:0.0:PASS:0,63,360\n+demo20\t1874\t.\tC\t.\t.\tPASS\tEND=2073;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:45:21:0:16\n+demo20\t2074\t.\tT\tC\t246\tPASS\tSNVHPOL=2;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:156:30:24:1:13,11:4,8:9,3:-9.7:PASS:158,0,191\n+demo20\t2075\t.\tA\t.\t.\tPASS\tEND=2198;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:63:31:1:22\n+demo20\t2199\t.\tG\tA\t297\tPASS\tSNVHPOL=3;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:181:30:28:1:14,14:12,5:2,9:-14.3:PASS:183,0,189\n+demo20\t2200\t.\tC\t.\t.\tPASS\tEND=2300;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:60:30:1:26\n+demo20\t2301\t.\tG\tT\t369\tPASS\tSNVHPOL=2;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:161:22:29:1:12,17:6,11:6,6:-21.0:PASS:219,0,158\n+demo20\t2302\t.\tT\t.\t.\tPASS\tEND=2454;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:72:30:0:25\n+demo20\t2455\t.\tT\tC\t889\tPASS\tSNVHPOL=2;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t1/1:90:30:31:2:0,31:0,14:0,17:-51.4:PASS:370,93,0\n+demo20\t2456\t.\tG\t.\t.\tPASS\tEND=2511;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:87:36:2:30\n+demo20\t2512\t.\tA\tG\t531\tPASS\tSNVHPOL=2;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:151:22:39:1:13,26:9,11:4,15:-28.4:PASS:300,0,148\n+demo20\t2513\t.\tT\t.\t.\tPASS\tEND=2639;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:81:37:1:28\n+demo20\t2640\t.\tC\tT\t751\tPASS\tSNVHPOL=3;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t1/1:81:30:28:0:0,28:0,14:0,14:-47.3:PASS:370,84,0\n+demo20\t2641\t.\tT\t.\t.\tPASS\tEND=2659;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:60:23:0:21\n+demo20\t2660\t.\tG\tT\t567\tPASS\tSNVHPOL=3;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t1/1:60:30:21:1:0,21:0,11:0,10:-36.2:PASS:321,63,0\n+demo20\t2661\t.\tG\t.\t.\tPASS\tEND=3037;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:41:27:1:18\n+demo20\t3038\t.\tC\t.\t.\tPASS\tEND=3053;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:27:20:1:17\n+demo20\t3054\t.\tG\tC\t214\tPASS\tSNVHPOL=2;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:140:22:20:0:10,10:6,6:4,4:-12.8:PASS:142,0,153\n+demo20\t3055\t.\tC\t.\t.\tPASS\tEND=3365;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:51:25:1:18\n+demo20\t3366\t.\tG\tT\t753\tPASS\tSNVHPOL=4;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t1/1:75:30:26:0:0,26:0,15:0,11:-42.1:PASS:370,78,0\n+demo20\t3367\t.\tG\t.\t.\tPASS\tEND=3536;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:60:28:0:21\n+demo20\t3537\t.\tC\tT\t191\tPASS\tSNVHPOL=2;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:128:22:31:1:21,10:8,6:13,4:-11.3:PASS:130,0,256\n+demo20\t3538\t.\tT\t.\t.\tPASS\tEND=3664;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:54:29:1:19\n+demo20\t3664\t.\tTC\tT\t572\tPASS\tCIGAR=1M1D;RU=C;REFREP=4;IDREP=3;MQ=59\tGT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL\t0/1:249:27:41:18,20:10,10:8,10:PASS:322,0,246\n+demo20\t3665\t.\tC\t.\t.\tPASS\t.\tGT:GQX:DP:DPF:MIN_DP\t0:249:19:0:19\n+demo20\t3666\t.\tC\t.\t.\tPASS\tEND=4019;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:48:32:1:17\n+demo20\t4020\t.\tC\t.\t.\tPASS\tEND=4059;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:24:12:1:9\n+demo20\t4060\t.\tC\t.\t.\tPASS\tEND=4072;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:15:6:0:6\n+demo20\t4073\t.\tC\t.\t.\tLowGQX\tEND=4091;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:7:4:0:3\n+demo20\t4092\t.\tT\t.\t.\tLowGQX;LowDepth\tEND=4100;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t0/0:3:1:0:1\n+demo20\t4101\t.\tT\t.\t.\tLowGQX\tEND=5000;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF:MIN_DP\t.:.:0:0:0\n'
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/hg98.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg98.fa Wed Jan 27 14:48:23 2021 +0000
b
@@ -0,0 +1,85 @@
+>demo20
+TGCCTGCTTTGTGCCAGGTTCTGGGTTGGGAGGTGCTGGGGACAGGGAGATGAGTCAGAC
+CTgggaagatttcgtagaggaggtgacagtaagctggaacctgtgtaatgagcaggagtt
+gcccagtggagaaggggaaggtgttccaggcggaagaaacagcatatgcaaaggccccaa
+ggtaggaagggccctagtgtgtgcagaggacagggcatggggaggggaactaaggctgag
+gccaaggagaggaaatgactcacaccgtgagagaggagttgagaccagggaggCTGCTTG
+CTGTATGATGCAACTGAGAGGGTAGAACAAGGCTGGCACAGAGAAGGTGGGGAAGGAAAA
+GGAGAGACGAAGCTGAGATTTCAGCAGGGCCAAGTCAGCCGTGAGTGCCAGGCTGCGGAG
+CCCAGATTCTCTGGGCTGagaaagagcactctgtccagagtgtggaggggggcctggagg
+ggatgagactcaaagctgggaggccagagaggaggctgctagagttttctgggagagagt
+tactggggcctgaacTCCAGTGAGGCActtcccatttcacagaccaggaaactgaggccc
+aagagtgaggcaactggcccaagggcacacagccaggtaaggcagaacCTTCCTTCTTTG
+GAGCTCCCTTGGGTGGGAAGCTGTGGGCTCCCCTTCATAGCCCACCCTTTTGGCTGTATC
+TCCCTGCTGCCCTGGGCATATGCTCCCTTATTCTGTCCTCCCTTGAAGCTGACTGCTGGC
+CTTAAAGGGCCCCTGTTTCTTCCCTCTGGACTACATGAGATCGGGATAGTATTAATGACT
+AAAACCTACCAGGGGTTTCTAGGCCTGGCCTGGAAAAAGTGACTGTTGACAAACAAAGTG
+CAGAGATTTAAAATCTCCTCTGTCTCAATTAGTGGAATCCAGTTAGAGGTTTGAACTATG
+ATTCTACCAGAATCCAATCTCTCTGGGTAGCCAGGTACCCAGGATGGGGCTAAAATTCCA
+GATGGATAGGTTGTCAACACCAGTGAGGAACCAGGAGGCTACCACAGGGTGGGACTTCCT
+GGTTTGGCTTTACATCTGAACTTCAGGGATCCCAGATCCTGGATCTGGGGCACTTGTCCA
+GAGAAGGCTATTGCTCTCATGTCACAAATGAGATGACTAAGACCCCCAAATCAATTCCAG
+TTCACTCACAAGCATTTCCTGGGCAGTGGAGACCCCTGCCCCACCTGTTGGCACCCCCTC
+AGCTCCCCACAGGGAATTGGAGTCCAGCCAAGCATGAGGAGGCTGTTGGCCTCAAGGTGA
+GCAGGGATGGGCTGAACCTCACCCAGTAAGGCAAGGACAGAGCCAGGGTTGGCCTGAGAT
+TTCCAGCCACCCTTTCCAAGGCTCTGCTCACTGTTATTTTCCTTAGTCTacaacaatatc
+aataacaataacaacaataatatcaacaCAAAAAGTGAAATACTCACCATGTATTGTAGT
+GTTTCCAAGGTGTCATGTAATGCCAGGGGTAGTTTGGGGCCAGGAAAAATATTTTTGGGA
+GGCATAAGAATAGGATGGACTGATATTGATATGCAACAGTTTGATCTGGTCCTCCTCTGA
+ATATCTGGGCTGGTAATTTGTACCAGTTTCCCTCGCTTTtgtgcataggcactgtgctga
+acccttttgtatgcatgaactcatccgattctctgtgcaagaactctatgagattattat
+tcccgttttacaagtaagaaaaattgaggctctgagaagttaaataaatgacttgtatga
+agttccagtgctaattaataagtgaaggagccagggcttgaactccggcccatctgactg
+caaagccagtgcccttcctcctacacATCTTCCTTTGGATTTCCACCACTGAGCATATGT
+AAGGTTGGGCAAACAGCCTGCATGAACAATCGCTGCTTTTATATCATGCACAAGTTTGGT
+CTTTTCTGCCTGTGCCCATGTCCTTGTAACCTTCTGAACCAAACTCCCCAGTGCCTGGGA
+ACATCAGAAGACTTGACTCTCTTCTCCTTTCACTAGCCTCCACCTGACTGGGACAAAGCC
+ATGCAGAGAGCTAGTGCTCCCTTCCTGCTAGACTTCAAGGATGCCTGGTTCCTGTGCCCC
+ATCTCCATCCAGCCCTCTCTTCTACACCTGGTGACTGAGCCTCTCCTTCAGTTTCTCCAT
+CCAGAAGGGGGTGAAAGCAACTGCCTAGTGTCCTTCCCTGGTGATAGTGGAGCACGGGGG
+ACAGGGTGTTTGGGCAAAAGGTGCCCAAGGTGAGGTGCCCAACACAACCTCCTACTCAGA
+CGATTGAGCAGACATTCAGCCTCATCTGGGGACTGGGTTACCAGTGGGTTAGTGGGAGGC
+ATTGGGCCCAGGCCCTGTGCCTTGGGCTGAGCTACAAGAAACCCACACATGGGATGAATT
+CAGGCAGCTCAAGGCCAGGTCTGTGCATACGCCAGTAATAGGTTCAGGTTAATCCACATG
+TCGCGATTTGGAAGGTGTCTACTTTTCCTACCTGTAGCTTCCTTAGGCCTCAAACCCCTA
+CTCAGCTGGGTCTGCCAGACTGAGATGGAGCCAGGGTGGAATCTTCTGCCCTCAAATCCC
+TGTCAGCCCTGGTGGTGCCGGGAGCGCCATCACTATTGGGTCTTAAAGGCTTTCCAGCCT
+TCCACTATGGATCCAGGAGCAGCAGTAGCCCCTTTGGTCTTTCTCTCTCATCAGGACATC
+TCCACTCATGGTTCCAGTCAGAGCTTCTTGAAAGTAGTCCCACTCTGTTCAAAAGCCTCC
+CATGCCCCCTGCTAGCCTCAGGCTAAGAGCCCTTCTCCTTCGCACAGCCTTTGGACCTGT
+CTATTTTTATGGTCTGGAAACTTCAGGAACACTGATAGCTGAGCATCTGGCACATATTAT
+GCACTCAAAAACCATGTATTTCTTTCTCCTTCCCTTTGGGACCCGTAAACCAGGGactgg
+acatttttgcaagagacaggagctgtgactgtgcattcactgctgtatccccagcaccca
+gcactgggcctgccacacagtaagtgcttagtaaatgtttgttgactgagtgaTTGCAGC
+TGGGGCCAAGAATGCCTTGGACACCCCAAGTAGGCCGTGTTAGAAGGAGTCAGTGAGAGC
+CTGGGAGCCCAGCCCAGAATTGTTTTCTTGACCCAGAAGCCAGGGCCAGGGATGCCTCTT
+CACTTCTGTTTGGCCCTCTTGGGCTTAGGGGCAGGGGCATTAAGATGAgagaggtccttg
+gggtgcattgagtctaacctcccagttcctcccattctacagccaggaaaactgaggccc
+agggaggggtaggacaagcccaagAAAGTGGGGCTGGAGAATGAATCCTGGAGACCAAAC
+TTGTCAGTCTGGATTGCTGTTGCCCTCATCCTCGCCTCCAAAACCCatgggtaaactgag
+gccaagagaggggcaggggcatgcccaaggtcacccatggaatcaggggacagggcctgg
+attgggattgttgttgacgccattattactgtttattgttgtttctatttcacAGATGGT
+CGGGGAGGGTGGGGCCCGCAATGGCTCCCAGGCGCCCAGAGACCCTGGAGGGTGAGCAGG
+GTCTCCCCTCCCCTCTCCTGCCCGTCTTTAGCCACACTGGGGCGCACACCGCTCACTCAC
+CCGGGGCCGAGGCGTTAGCCCTTTCTTGCACCAGGTGCCGCAACAACACCAGCAGCTGGC
+GCAGGCTGTGCTGCTGGTCCTGCAGGAGGCTGGAGTTGTGCCTGACACCGCGCAGGCCGC
+GCTCGATGTTGGTGAGGGCGGCGCTCTGGCGGCTCAGCGTGTTCAGCAGCTTCGCCTTCT
+TGCTGAGGATGCTGGCCAGCTCCTCCTGCTGCTTGGTCTCCAGGGCCTGCAACCGCTTCT
+CGAGCGCGCTGCGGGGTAGGGGGCGCACAGAGGTGAGCCTGGCATCCTCGCGAAGCACGC
+ACCCCCGCGCGCCTCCCCGGCCCTGGAGTCCCTGCAGCCCGACGATGAGACTCAAGTGTG
+GTGGAACGTCCTGTGCCCACTGTAGGCACAGATTGAGGAGGGGAGAAAAGAGATACCCGG
+CCCTGGAGTAATATAGATTGAGGTTTAGTGGAAGAAAGAGGTGGTGTGGGAGGGACACCA
+GCAACTGGGTAGCTATTATCAAATCCCAACTGTGCTTGCTTTTTGACCCAGCAGTCTACT
+CTCAGGAGTTATCCTCTAGGGAGCATGGTCAAGAATGGCCCTGGGGACTTGCTTGTAATA
+GAAAAACAAACTAACCAACCAACCAAAAAAGATATAGCCTAGATGCCCAAAAGCCAGGAC
+TGGCTGAATCCGTTGCGTTTTGGCGTCCCTGGAAATGTTCCGCAGTCATGAAGGAGGAGG
+CATTTGCACAGAATTGGAAAGATGCCCAGGACTTGGGGCACATCAAGCCTAACCCCATGT
+GTGGCAAGAGAAGAAAGAAAGTATTAATGTAAATAAAGAGAAATGGGGTGAACATATAGG
+AGAAGGCTGGAAAGACCGCAGTGGTGCCTGTGTTTGGGAAGAATATGAAAGAAATTCCCT
+CAAGTGCTGTGACTTCTGCAGAGCAGGTTTGAGTGGATGAAGATGGAGAGGAGGAAGATG
+GGGGCAGGATGGAGGGCCCAACTTTCACTTTTATTTTGTACAGGTTCCTGTTGTCCGATG
+ATATTATAATAATCAAGAGACATTTTTTGTAATGGATTTAGAAGCAAAGAGGAGTTTTTC
+AAAAGAAAGCCTTAGACTCAGCTCTTTCTTTTTGGACATTTTATCCTCCAGATTTACTca
+catgtgtgtgaaatgagatatggaaatgttactcatcgtatcactggttggattagtaaa
+aggctggaagcaacctcaatatccattaactggggactggaggaataaaagcagggacca
+catatggtggagcattataa
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/hg98.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg98.fa.fai Wed Jan 27 14:48:23 2021 +0000
b
@@ -0,0 +1,1 @@
+demo20 5000 8 60 61
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/indels_test2.vcf.gz
b
Binary file test-data/indels_test2.vcf.gz has changed
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/sample1.bam
b
Binary file test-data/sample1.bam has changed
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/sample1.cram
b
Binary file test-data/sample1.cram has changed
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/sample2.bam
b
Binary file test-data/sample2.bam has changed
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/sample2.cram
b
Binary file test-data/sample2.cram has changed
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/sample3.bam
b
Binary file test-data/sample3.bam has changed
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/snvs_test2.vcf.gz
b
Binary file test-data/snvs_test2.vcf.gz has changed
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/test_fasta_indexes.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_fasta_indexes.loc Wed Jan 27 14:48:23 2021 +0000
b
@@ -0,0 +1,1 @@
+hg19 hg19 hg19 ${__HERE__}/hg98.fa
\ No newline at end of file
b
diff -r 000000000000 -r 1fbe84e8a740 test-data/variants_test2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/variants_test2.vcf Wed Jan 27 14:48:23 2021 +0000
[
b'@@ -0,0 +1,62 @@\n+##fileformat=VCFv4.1\n+##fileDate=.\n+##source=strelka\n+##source_version=2.9.10\n+##startTime=.\n+##cmdline=./configureStrelkaGermlineWorkflow.py --bam input_sample_0.cram --bam input_sample_1.cram --disableSequenceErrorEstimation --config=/tmp/tmpmxn8erma/job_working_directory/000/4/configs/tmpx1j1a_0u --referenceFasta input_ref.fasta --runDir results\n+##reference=file:///tmp/tmpmxn8erma/job_working_directory/000/4/working/input_ref.fasta\n+##contig=<ID=demo20,length=5000>\n+##content=strelka germline small-variant calls\n+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">\n+##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant multi-site block. Non-variant blocks are defined independently for each sample. All sites in such a block are constrained to be non-variant, have the same filter value, and have sample values {GQX,DP,DPF} in range [x,y], y <= max(x+3,(x*1.3)).">\n+##INFO=<ID=SNVHPOL,Number=1,Type=Integer,Description="SNV contextual homopolymer length">\n+##INFO=<ID=CIGAR,Number=A,Type=String,Description="CIGAR alignment for each alternate indel allele">\n+##INFO=<ID=RU,Number=A,Type=String,Description="Smallest repeating sequence unit extended or contracted in the indel allele relative to the reference. RUs are not reported if longer than 20 bases">\n+##INFO=<ID=REFREP,Number=A,Type=Integer,Description="Number of times RU is repeated in reference">\n+##INFO=<ID=IDREP,Number=A,Type=Integer,Description="Number of times RU is repeated in indel allele">\n+##INFO=<ID=MQ,Number=1,Type=Integer,Description="RMS of mapping quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Empirically calibrated genotype quality score for variant sites, otherwise minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">\n+##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">\n+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum filtered basecall depth used for site genotyping within a non-variant multi-site block">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.51 or higher that read contains indicated allele vs all other intersecting indel alleles)">\n+##FORMAT=<ID=ADF,Number=.,Type=Integer,Description="Allelic depths on the forward strand">\n+##FORMAT=<ID=ADR,Number=.,Type=Integer,Description="Allelic depths on the reverse strand">\n+##FORMAT=<ID=FT,Number=1,Type=String,Description="Sample filter, \'PASS\' indicates that all filters have passed for this sample">\n+##FORMAT=<ID=DPI,Number=1,Type=Integer,Description="Read depth associated with indel, taken from the site preceding the indel">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase set identifier">\n+##FORMAT=<ID=SB,Number=1,Type=Float,Description="Sample site strand bias">\n+##FILTER=<ID=IndelConflict,Description="Indel genotypes from two or more loci conflict in at least one sample">\n+##FILTER=<ID=SiteConflict,Description="Site is filtered due to an overlapping indel call filter">\n+##FILTER=<ID=LowGQX,Description="Locus GQX is below threshold or not present">\n+##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.4'..b'ILTER=<ID=HighDepth,Description="Locus depth is greater than 3x the mean chromosome depth">\n+##Depth_demo20=53.00\n+##FILTER=<ID=LowDepth,Description="Locus depth is below 3">\n+##FILTER=<ID=NotGenotyped,Description="Locus contains forcedGT input alleles which could not be genotyped">\n+##FILTER=<ID=PloidyConflict,Description="Genotype call from variant caller not consistent with chromosome ploidy">\n+##FILTER=<ID=NoPassedVariantGTs,Description="No samples at this locus pass all sample filters and have a variant genotype">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA12891\tNA12892\n+demo20\t991\t.\tC\tG\t38\tPASS\tSNVHPOL=2;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:71:9:9:1:5,4:1,4:4,0:2.8:PASS:72,0,123\t0/0:33:33:12:0:12,0:9,0:3,0:0.0:PASS:0,36,258\n+demo20\t1148\t.\tC\tCTAT\t72\tPASS\tCIGAR=1M3I;RU=TAT;REFREP=1;IDREP=2;MQ=60\tGT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL\t0/1:114:27:20:11,8:5,3:6,5:PASS:111,0,147\t0/0:69:69:28:24,0:12,0:12,0:PASS:0,72,443\n+demo20\t1271\t.\tA\tG\t134\tPASS\tSNVHPOL=4;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:126:30:18:0:8,10:7,6:1,4:-18.6:PASS:169,0,123\t0/0:75:75:26:0:26,0:18,0:8,0:0.0:PASS:0,78,370\n+demo20\t1508\t.\tA\tG\t156\tPASS\tSNVHPOL=3;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:172:30:22:1:10,12:4,6:6,6:-21.5:PASS:191,0,169\t0/0:108:108:37:2:37,0:19,0:18,0:0.0:PASS:0,111,370\n+demo20\t1706\t.\tC\tT\t304\tPASS\tSNVHPOL=2;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t1/1:54:30:19:0:0,19:0,8:0,11:-35.5:PASS:342,57,0\t0/0:90:90:31:2:31,0:7,0:24,0:0.0:PASS:0,93,370\n+demo20\t1744\t.\tC\tT\t156\tPASS\tSNVHPOL=3;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:159:30:21:0:9,12:5,6:4,6:-20.7:PASS:191,0,156\t0/0:78:78:27:0:27,0:6,0:21,0:0.0:PASS:0,81,370\n+demo20\t1846\t.\tC\tT\t83\tPASS\tSNVHPOL=3;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:116:30:24:1:16,8:13,5:3,3:-12.4:PASS:117,0,224\t0/0:60:60:21:0:21,0:14,0:7,0:0.0:PASS:0,63,370\n+demo20\t1873\t.\tC\tT\t122\tPASS\tSNVHPOL=3;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/0:60:60:21:0:21,0:15,0:6,0:0.0:PASS:0,63,360\t0/1:155:30:23:0:13,10:8,7:5,3:-14.9:PASS:157,0,195\n+demo20\t2074\t.\tT\tC\t123\tPASS\tSNVHPOL=2;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:156:30:24:1:13,11:4,8:9,3:-9.7:PASS:158,0,191\t0/0:75:75:26:0:26,0:14,0:12,0:0.0:PASS:0,78,370\n+demo20\t2199\t.\tG\tA\t149\tPASS\tSNVHPOL=3;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:181:30:28:1:14,14:12,5:2,9:-14.3:PASS:183,0,189\t0/0:96:96:33:0:33,0:17,0:16,0:0.0:PASS:0,99,370\n+demo20\t2301\t.\tG\tT\t184\tPASS\tSNVHPOL=2;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:161:22:29:1:12,17:6,11:6,6:-21.0:PASS:219,0,158\t0/0:75:75:26:1:26,0:15,0:11,0:0.0:PASS:0,78,370\n+demo20\t2455\t.\tT\tC\t445\tPASS\tSNVHPOL=2;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t1/1:90:30:31:2:0,31:0,14:0,17:-51.4:PASS:370,93,0\t0/0:78:78:27:1:27,0:11,0:16,0:0.0:PASS:0,81,370\n+demo20\t2512\t.\tA\tG\t266\tPASS\tSNVHPOL=2;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:151:22:39:1:13,26:9,11:4,15:-28.4:PASS:300,0,148\t0/0:69:69:24:2:24,0:8,0:16,0:0.0:PASS:0,72,370\n+demo20\t2640\t.\tC\tT\t375\tPASS\tSNVHPOL=3;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t1/1:81:30:28:0:0,28:0,14:0,14:-47.3:PASS:370,84,0\t0/0:102:102:35:0:35,0:17,0:18,0:0.0:PASS:0,105,370\n+demo20\t2660\t.\tG\tT\t283\tPASS\tSNVHPOL=3;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t1/1:60:30:21:1:0,21:0,11:0,10:-36.2:PASS:321,63,0\t0/0:87:87:30:0:30,0:15,0:15,0:0.0:PASS:0,90,370\n+demo20\t3054\t.\tG\tC\t107\tPASS\tSNVHPOL=2;MQ=58\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:140:22:20:0:10,10:6,6:4,4:-12.8:PASS:142,0,153\t0/0:24:24:9:2:9,0:4,0:5,0:0.0:PASS:0,27,201\n+demo20\t3366\t.\tG\tT\t377\tPASS\tSNVHPOL=4;MQ=60\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t1/1:75:30:26:0:0,26:0,15:0,11:-42.1:PASS:370,78,0\t0/0:75:75:26:0:26,0:13,0:13,0:0.0:PASS:0,78,370\n+demo20\t3537\t.\tC\tT\t95\tPASS\tSNVHPOL=2;MQ=59\tGT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL\t0/1:128:22:31:1:21,10:8,6:13,4:-11.3:PASS:130,0,256\t0/0:84:84:29:1:29,0:10,0:19,0:0.0:PASS:0,87,370\n+demo20\t3664\t.\tTC\tT\t286\tPASS\tCIGAR=1M1D;RU=C;REFREP=4;IDREP=3;MQ=59\tGT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL\t0/1:249:27:41:18,20:10,10:8,10:PASS:322,0,246\t0/0:70:70:25:25,0:10,0:15,0:PASS:0,73,493\n'
b
diff -r 000000000000 -r 1fbe84e8a740 tool-data/fasta_indexes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample Wed Jan 27 14:48:23 2021 +0000
b
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id> <dbkey> <display_name> <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
\ No newline at end of file
b
diff -r 000000000000 -r 1fbe84e8a740 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Jan 27 14:48:23 2021 +0000
b
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r 1fbe84e8a740 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Wed Jan 27 14:48:23 2021 +0000
b
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/test_fasta_indexes.loc"/>
+    </table>
+</tables>
\ No newline at end of file