changeset 0:edbdbc64b397 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/strelka commit 2e445e7c519b2b77498cb74c03ca6ed12b22423a"
author iuc
date Wed, 27 Jan 2021 14:47:52 +0000
parents
children
files macros.xml strelka_somatic.xml test-data/genome_test1.vcf test-data/hg98.fa test-data/hg98.fa.fai test-data/indels_test2.vcf.gz test-data/sample1.bam test-data/sample1.cram test-data/sample2.bam test-data/sample2.cram test-data/sample3.bam test-data/snvs_test2.vcf.gz test-data/test_fasta_indexes.loc test-data/variants_test2.vcf tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 17 files changed, 788 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Wed Jan 27 14:47:52 2021 +0000
@@ -0,0 +1,218 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">2.9.10</token>
+    <token name="@GALAXY_VERSION@">galaxy0</token>
+    <token name="@DESCRIPTION@">small variant caller</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">strelka</requirement>
+            <requirement type="package" version="1.9">samtools</requirement>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41592-018-0051-x</citation>
+        </citations>
+    </xml>
+
+    <!-- 
+        command
+    -->
+
+    <token name="@INIT@"><![CDATA[
+        ##ln -s '$referenceFasta' './input_ref.fasta' &&
+        ##samtools faidx './input_ref.fasta' &&
+
+        ## Make all optional regions files available
+        ## Note: all of these must be tabixed
+        #set $reg_options = []
+        #for $i, $sites in enumerate($forced_regions):
+            #set $target_file = 'input_forcedgt_%d.vcf.gz' % $i
+            #if $sites.whitelist.ext == 'vcf':
+                bgzip -c '${sites.whitelist}' > $target_file &&
+                tabix -p vcf $target_file &&
+            #else:
+                ln -s '${sites.whitelist}' $target_file &&
+                ln -s '${sites.whitelist.metadata.tabix_index}' ${target_file}.tbi' &&
+            #end if
+            #if str($sites.use_whitelist_as) == 'indel_candidates':
+                #silent $reg_options.extend(['--indelCandidates', $target_file])
+            #else:
+                #silent $reg_options.extend(['--forcedGT', $target_file])
+            #end if
+        #end for
+        #if str($regions.restrict_to_region) == 'regions_from_file':
+            #silent $reg_options.append('--callRegions')
+            #set $target_file = 'input_callregions.bed.gz'
+            #if $regions.callRegions.ext == 'bed':
+                bgzip -c '$regions.callRegions' $target_file &&
+                tabix -p bed $target_file &&
+            else:
+                ln -s '$regions.callRegions' $target_file &&
+                ln -s '$regions.callRegions.tabix_index' ${target_file}.tbi &&
+            #end if
+            #silent $reg_options.append($target_file)
+        #end if
+        #set $region_spec = ' '.join($reg_options)
+        #if str($ref_cond.ref_sel) == 'history':
+            #set $reference_fasta_fn = 'input_ref.fasta'
+            ln -s '$ref_cond.ref' $reference_fasta_fn &&
+            samtools faidx $reference_fasta_fn &&
+        #else
+            #set $reference_fasta_fn = str($ref_cond.ref.fields.path)
+        #end if
+    ]]></token>
+    <token name="@CREATE@"><![CDATA[
+        --config='$config_file'
+        $optimization
+        #if str($expert_settings.evs.selector) == "disableEVS"
+            --disableEVS
+        #else
+            #if $expert_settings.evs.snvScoringModelFile
+                --snvScoringModelFile '$expert_settings.evs.snvScoringModelFile'
+            #end if
+            #if $expert_settings.evs.indelScoringModelFile
+                --indelScoringModelFile '$expert_settings.evs.indelScoringModelFile'
+            #end if
+            $expert_settings.evs.reportEVSFeatures
+        #end if
+        $region_spec
+        --referenceFasta '${reference_fasta_fn}'
+        --runDir results &&
+    ]]></token>
+    <token name="@RUN@"><![CDATA[
+        results/runWorkflow.py
+            -m local
+            -j \${GALAXY_SLOTS:-2}
+            -g \${GALAXY_MEMORY_MB:-8192}
+    ]]></token>
+
+    <!-- 
+        configfile - parser cannot handle indents
+    -->
+
+    <token name="@CONFIG@"><![CDATA[
+maxIndelSize = $strelka.maxIndelSize
+isWriteRealignedBam = 0 ## not inplemented
+extraVariantCallerArguments = ## not implemented
+    ]]></token>
+
+    <!--
+        input 
+    -->
+    
+    <xml name="input_required" token_ref="normalBam">
+        <conditional name="ref_cond">
+            <param name="ref_sel" type="select" label="Choose the source for the reference genome" help="(--referenceFasta)">
+                <option value="cached">Locally cached</option>
+                <option value="history">History</option>
+            </param>
+            <when value="cached">
+                <param name="ref" type="select" label="Reference genome" help="">
+                    <options from_data_table="fasta_indexes">
+                        <filter type="data_meta" column="dbkey" key="dbkey" ref="@REF@"/>
+                        <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file."/>
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref" type="data" format="fasta" label="Reference sequence" help="(--referenceFasta)"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="regions_select">
+        <conditional name="regions">
+            <param name="restrict_to_region" type="select"
+            label="Call variants across">
+                <option value="genome">Whole reference</option>
+                <option value="regions_from_file">Regions specified in BED</option>
+            </param>
+            <when value="genome" />
+            <when value="regions_from_file">
+                <param argument="--callRegions" type="data" format="bed"
+                label="BED dataset with regions to examine"
+                help="Specify a set of regions to call. No VCF output will be provided outside of these regions. Note that the full genome may still be used to calculate certain input statistics (such as expected depth per chromosome)."/>
+            </when>
+        </conditional>
+        <repeat name="forced_regions" title="Whitelists of SNV/indel sites that should always be considered" default="0" min="0"
+        help="Add whitelisted SNVs/indels to list of considered/reported alleles explicitly">
+            <param name="whitelist" type="data" format="vcf"
+            label="Select file with candidate alleles"
+                help="" />
+            <param name="use_whitelist_as" type="select" display="radio"
+            label="Use this whitelist as"
+            help="An indel candidates list is used during the realignment and calling steps to increase the chances of detecting given indels if they exist in any sample. If the indel is NOT found despite these efforts, it will NOT be reported, however. With a list of 'SNV sites and/or indels of interest', on the other hand, indels in the list undergo that same treatment, but listed indels and SNPs are both guaranteed to be reported in the variants output, even if they are judged as not being present in any sample.">
+                <option value="indel_candidates">A list of indel candidates to be considered during realignment/calling (--indelCandidates)</option>
+                <option value="forced_gt_sites">A list of SNV sites/indels of interest that should always be reported (--forcedGT)</option>
+            </param>
+        </repeat>
+    </xml>
+    <xml name="calling_model">
+        <param name="optimization" type="select" label="Optimize variant calling for">
+            <option value="">Whole-genome sequencing (WGS) data (default mode)</option>
+            <option value="--exome">Whole-exome sequencing (WES) data (--exome)</option>
+            <yield />
+        </param>
+    </xml>
+    <xml name="calling_model_expert">
+        <section name="expert_settings" title="Expert configuration of calling model" expanded="false">
+            <yield />
+            <conditional name="evs">
+                <param name="selector" type="select" label="Configure empirical variant scoring (EVS) model">
+                    <option value="disableEVS">Don't use EVS, just simple threshold-based filtering (--disableEVS)</option>
+                    <option value="enableEVS" selected="true">Use EVS models (default)</option>
+                </param>
+                <when value="disableEVS" />
+                <when value="enableEVS">
+                    <param argument="--snvScoringModelFile" type="data" format="json" optional="true"
+                    label="Optional SNV scoring model to overwrite default model" />
+                    <param argument="--indelScoringModelFile" type="data" format="json" optional="true"
+                    label="Optional indel scoring model to overwrite default model" />
+                    <param argument="--reportEVSFeatures" type="boolean" truevalue="--reportEVSFeatures" falsevalue=""
+                    label="Report all empirical variant scoring features in VCF output"
+                    help="WARNING: Do not use this feature with Strelka Germline and more than one input sample or the tool run will fail!" />
+                </when>
+            </conditional>
+        </section>
+    </xml>
+    <xml name="input_output">
+        <param name="vcf_type" type="boolean" truevalue="compressed" falsevalue="decompressed"
+        label="Generate compressed variants output (vcf.gz)"
+        help="Default is uncompressed vcf" />
+    </xml>
+    <xml name="input_strelka">
+        <param argument="maxIndelSize" name="maxIndelSize" type="integer" value="49" label="Set maximum reported indel size" help=""/>
+    </xml>
+
+    <!--
+        Help
+    -->
+
+    <token name="@HELP_INPUT@">
+*Sequencing Data*
+
+The input sequencing reads are expected to come from a paired-end sequencing assay. Any input other than paired-end reads are ignored by default except to double-check for putative somatic variant evidence in the normal sample during somatic variant analysis. Read lengths above ~400 bases are not tested.
+
+*Alignment Files*
+
+All input sequencing reads should be mapped by an external tool and provided as input in `BAM &lt;https://samtools.github.io/hts-specs/SAMv1.pdf&gt;`_. or `CRAM &lt;https://samtools.github.io/hts-specs/CRAMv3.pdf&gt;`_ format.
+
+The following limitations apply to the input BAM/CRAM alignment records:
+
+- Alignments cannot contain the "=" character in the SEQ field.
+- RG (read group) tags are ignored -- each alignment file must represent one sample.
+- Alignments with basecall quality values greater than 70 will trigger a runtime error (these are not supported on the assumption that the high basecall quality indicates an offset error)
+
+*VCF Files*
+
+Input `VCF &lt;http://samtools.github.io/hts-specs/VCFv4.1.pdf&gt;`_ files are accepted for a number of roles as described below. All input VCF records are checked for compatibility with the given reference genome, in additional to role-specific checks described below. If any VCF record's REF field is not compatible with the reference genome a runtime error will be triggered. 'Compatible with the reference genome' means that each VCF record's REF base either (1) matches the corresponding reference genome base or the VCF record's REF base is 'N' or the reference genome base is any ambiguous IUPAC base code (all ambiguous base codes are converted to 'N' while importing the reference).
+    </token>
+    <token name="@HELP_STRELKA@">
+Strelka2 is a fast and accurate small variant caller optimized for analysis of germline variation in small cohorts (Strelka Germline) and somatic variation in tumor/normal sample pairs (Strelka Somatic).
+
+Strelka accepts input read mappings from BAM or CRAM files, and optionally candidate and/or forced-call alleles from VCF. It reports all small variant predictions in VCF 4.1 format. Germline variant reporting uses the gVCF conventions to represent both variant and reference call confidence. For best somatic indel performance, Strelka is designed to be run with the Manta structural variant and indel caller, which provides additional indel candidates up to a given maxiumum indel size (by default this is 49). By design, Manta and Strelka run together with default settings provide complete coverage over all indel sizes (in additional to all SVs and SNVs) for clinical somatic and germline analysis scenarios.
+    </token>
+    <token name="@HELP_REFERENCES@"><![CDATA[
+More information are available on `github <https://github.com/Illumina/strelka>`_.
+    ]]></token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka_somatic.xml	Wed Jan 27 14:47:52 2021 +0000
@@ -0,0 +1,283 @@
+<?xml version="1.0"?>
+<tool id="strelka_somatic" name="Strelka Somatic" version="@TOOL_VERSION@+@GALAXY_VERSION@">
+    <description>@DESCRIPTION@ for somatic variation in tumor/normal sample pairs</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        ## initialize
+        #if $normalBam.is_of_type('bam')
+            ln -s '$normalBam' './input_normal.bam' &&
+            ln -s '$normalBam.metadata.bam_index' './input_normal.bam.bai' &&
+        #elif $normalBam.is_of_type('cram')
+            ln -s '$normalBam' './input_normal.cram' &&
+            ln -s '$normalBam.metadata.cram_index' './input_normal.cram.crai' &&
+        #end if
+        #if $tumorBam.is_of_type('bam')
+            ln -s '$tumorBam' './input_tumor.bam' &&
+            ln -s '$tumorBam.metadata.bam_index' './input_tumor.bam.bai' &&
+        #elif $tumorBam.is_of_type('cram')
+            ln -s '$tumorBam' './input_tumor.cram' &&
+            ln -s '$tumorBam.metadata.cram_index' './input_tumor.cram.crai' &&
+        #end if
+        @INIT@
+
+        ## create workflow
+        configureStrelkaSomaticWorkflow.py
+            #if $normalBam.is_of_type('bam')
+                --normalBam ./input_normal.bam
+            #elif $normalBam.is_of_type('cram')
+                --normalBam ./input_normal.cram
+            #end if
+            #if $tumorBam.is_of_type('bam')
+                --tumorBam ./input_tumor.bam
+            #elif $tumorBam.is_of_type('cram')
+                --tumorBam ./input_tumor.cram
+            #end if
+            $oo.outputCallableRegions
+            @CREATE@
+
+        ## run workflow
+        @RUN@
+
+        ## decompress results if needed and move everything to final destinations
+        #if $oo.vcf_type == "decompressed"
+            && bgzip -d results/results/variants/somatic.indels.vcf.gz
+            && bgzip -d results/results/variants/somatic.snvs.vcf.gz
+            && mv results/results/variants/somatic.indels.vcf '$out_indels'
+            && mv results/results/variants/somatic.snvs.vcf '$out_snvs'
+        #else
+            && mv results/results/variants/somatic.indels.vcf.gz '$out_indels'
+            && mv results/results/variants/somatic.snvs.vcf.gz '$out_snvs'
+        #end if
+        #if $oo.outputCallableRegions
+            && bgzip -d results/results/regions/somatic.callable.regions.bed.gz
+            && mv results/results/regions/somatic.callable.regions.bed '$out_callable'
+        #end if
+
+    ]]></command>
+    <configfiles>
+        <configfile name="config_file">
+## parser cannot handle indents
+[StrelkaSomatic]
+depthFilterMultiple = $strelka.depthFilterMultiple
+snvMaxFilteredBasecallFrac = $strelka.snvMaxFilteredBasecallFrac
+snvMaxSpanningDeletionFrac = $strelka.snvMaxSpanningDeletionFrac
+indelMaxWindowFilteredBasecallFrac = $strelka.indelMaxWindowFilteredBasecallFrac
+ssnvPrior = $strelka.ssnvPrior
+sindelPrior = $strelka.sindelPrior
+ssnvNoise = $strelka.ssnvNoise
+sindelNoiseFactor = $strelka.sindelNoiseFactor
+ssnvNoiseStrandBiasFrac = $strelka.ssnvNoiseStrandBiasFrac
+minTier1Mapq = $strelka.minTier1Mapq
+minTier2Mapq = $strelka.minTier2Mapq
+ssnvQuality_LowerBound = $strelka.ssnvQuality_LowerBound
+sindelQuality_LowerBound = $strelka.sindelQuality_LowerBound
+ssnvContamTolerance = $strelka.ssnvContamTolerance
+indelContamTolerance = $strelka.indelContamTolerance
+@CONFIG@
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param argument="--normalBam" type="data" format="bam,cram" multiple="false" label="Select normal sample file" help="In bam or cram format."/>
+        <param argument="--tumorBam" type="data" format="bam,cram" multiple="false" label="Select tumor sample file" help="In bam or cram format."/>
+        <expand macro="input_required"/>
+        <expand macro="calling_model" />
+        <expand macro="calling_model_expert" />
+        <expand macro="regions_select" />
+
+        <section name="oo" title="Output options" expanded="false">
+            <expand macro="input_output"/>
+            <param argument="--outputCallableRegions" type="boolean" checked="false" truevalue="--outputCallableRegions" falsevalue="" label="Generate bed file describing somatic callable regions of the genome" help=""/>
+        </section>
+
+        <section name="strelka" title="Strelka run configuration" expanded="false">
+            <expand macro="input_strelka"/>
+            <param argument="depthFilterMultiple" type="float" value="3.0" label="Set depthFilterMultiple" help="If the depth filter is not skipped, all variants which occur at a depth greater than depthFilterMultiple*chromosome mean depth will be filtered out."/>
+            <param argument="snvMaxFilteredBasecallFrac" type="float" value="0.4" min="0.0" max="1.0" label="Set snvMaxFilteredBasecallFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of basecalls have been removed by the mismatch density filter in either sample."/>
+            <param argument="snvMaxSpanningDeletionFrac" type="float" value="0.75" min="0.0" max="1.0" label="Set snvMaxSpanningDeletionFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of overlapping reads contain deletions which span the SNV call site."/>
+            <param argument="indelMaxWindowFilteredBasecallFrac" type="float" value="0.3" min="0.0" max="1.0" label="Set indelMaxWindowFilteredBasecallFrac" help="Somatic indel calls are filtered if greater than this fraction of basecalls in a window extending 50 bases to each side of an indel's call position have been removed by the mismatch density filter."/>
+            <param argument="ssnvPrior" type="float" value="0.0001" min="0.0" label="Set ssnvPrior" help="Prior probability of a somatic snv or indel."/>
+            <param argument="sindelPrior" type="float" value="0.000001" min="0.0" label="Set sindelPrior" help="Prior probability of a somatic snv or indel."/>
+            <param argument="ssnvNoise" type="float" value="0.0000000005" min="0.0" label="Set ssnvNoise" help="Probability of an snv or indel noise allele NB: in the calling model a noise allele is shared in tumor and normal samples, but occurs at any frequency."/>
+            <param argument="sindelNoiseFactor" type="float" value="2.2" label="Set sindelNoiseFactor" help="Somatic indel noise factor."/>
+            <param argument="ssnvNoiseStrandBiasFrac" type="float" value="0.0" min="0.0" max="1.0" label="Set ssnvNoiseStrandBiasFrac" help="Fraction of snv noise attributed to strand-bias. It is not recommended to change this setting. However, if it is essential to turn the strand bias penalization off, the following is recommended: Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5, (1) set ssnvNoiseStrandBiasFrac = 0 (2) divide the current ssnvNoise value by 2."/>
+            <param argument="minTier1Mapq" type="integer" value="20" label="Set minTier1Mapq" help="Minimum MAPQ score for reads at tier1."/>
+            <param argument="minTier2Mapq" type="integer" value="0" label="Set minTier2Mapq" help="Minimum MAPQ score for reads at tier2."/>
+            <param argument="ssnvQuality_LowerBound" type="integer" value="15" label="Set ssnvQuality_LowerBound" help="Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are marked as filtered."/>
+            <param argument="sindelQuality_LowerBound" type="integer" value="40" label="Set sindelQuality_LowerBound" help="Somatic quality score (QSI_NT, NT=ref) below which somatic indels are marked as filtered."/>
+            <param argument="ssnvContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set ssnvContamTolerance" help="Tolerance of tumor contamination in the normal sample."/>
+            <param argument="indelContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set indelContamTolerance" help="Tolerance of tumor contamination in the normal sample."/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="out_indels" format="vcf" label="${tool.name} on ${on_string}, Indels, vcf">
+            <change_format>
+                <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" />
+            </change_format>
+        </data>
+        <data name="out_snvs" format="vcf" label="${tool.name} on ${on_string}, SNVs, vcf">
+            <change_format>
+                <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" />
+            </change_format>
+        </data>
+        <data name="out_callable" format="bed" label="${tool.name} on ${on_string}, Callable regions, bed">
+            <filter>bool(oo['outputCallableRegions'])</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- #1; input bam, decompressed -->
+        <test expect_num_outputs="2">
+            <param name="normalBam" value="sample1.bam" ftype="bam"/>
+            <param name="tumorBam" value="sample2.bam" ftype="bam"/>
+            <conditional name="ref_cond">
+                <param name="ref_sel" value="history"/>
+                <param name="ref" value="hg98.fa" ftype="fasta"/>
+            </conditional>
+            <section name="oo">
+                <param name="vcf_type" value="decompressed"/>
+            </section>
+            <output name="out_indels" ftype="vcf">
+                <assert_contents>
+                    <has_n_lines n="41"/>
+                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
+                    <has_line_matching expression="demo20&#009;3664&#009;.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_snvs" ftype="vcf">
+                <assert_contents>
+                    <has_n_lines n="52"/>
+                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
+                    <has_line_matching expression="demo20&#009;3537&#009;.+"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #2; input cram, compressed -->
+        <test expect_num_outputs="2">
+            <param name="normalBam" value="sample1.cram" ftype="cram"/>
+            <param name="tumorBam" value="sample2.cram" ftype="cram"/>
+            <conditional name="ref_cond">
+                <param name="ref_sel" value="history"/>
+                <param name="ref" value="hg98.fa" ftype="fasta"/>
+            </conditional>
+            <section name="oo">
+                <param name="vcf_type" value="compressed"/>
+            </section>
+            <output name="out_indels" file="indels_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/>
+            <output name="out_snvs" file="snvs_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/>
+        </test>
+        <!-- #3; input bam, decompressed, no defaults -->
+        <test expect_num_outputs="3">
+            <param name="normalBam" value="sample1.bam" ftype="bam"/>
+            <param name="tumorBam" value="sample2.bam" ftype="bam"/>
+            <conditional name="ref_cond">
+                <param name="ref_sel" value="history"/>
+                <param name="ref" value="hg98.fa" ftype="fasta"/>
+            </conditional>
+            <param name="optimization" value="--exome" />
+            <section name="oo">
+                <param name="vcf_type" value="decompressed"/>
+                <param name="outputCallableRegions" value="true"/>
+            </section>
+            <section name="strelka">
+                <param name="depthFilterMultiple" value="2.8"/>
+                <param name="snvMaxFilteredBasecallFrac" value="0.5"/>
+                <param name="snvMaxSpanningDeletionFrac" value="0.76"/>
+                <param name="indelMaxWindowFilteredBasecallFrac" value="0.4"/>
+                <param name="ssnvPrior" value="0.0002"/>
+                <param name="sindelPrior" value="0.000002"/>
+                <param name="ssnvNoise" value="0.0000000004"/>
+                <param name="sindelNoiseFactor" value="2.1"/>
+                <param name="ssnvNoiseStrandBiasFrac" value="0.1"/>
+                <param name="minTier1Mapq" value="21"/>
+                <param name="minTier2Mapq" value="1"/>
+                <param name="ssnvQuality_LowerBound" value="14"/>
+                <param name="sindelQuality_LowerBound" value="41"/>
+                <param name="ssnvContamTolerance" value="0.16"/>
+                <param name="indelContamTolerance" value="0.16"/>
+                <param name="maxIndelSize" value="50"/>
+            </section>
+            <output name="out_indels" ftype="vcf">
+                <assert_contents>
+                    <has_n_lines n="39"/>
+                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
+                    <has_line_matching expression="demo20&#009;3664&#009;.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_snvs" ftype="vcf">
+                <assert_contents>
+                    <has_n_lines n="51"/>
+                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
+                    <has_line_matching expression="demo20&#009;3537&#009;.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_callable" ftype="bed">
+                <assert_contents>
+                    <has_n_lines n="136"/>
+                    <has_line_matching expression="demo20&#009;3971&#009;.+"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #4; bam, reference cached -->
+        <test expect_num_outputs="2">
+            <param name="normalBam" dbkey="hg19" value="sample1.bam" ftype="bam"/>
+            <param name="tumorBam" value="sample2.bam" ftype="bam"/>
+            <conditional name="ref_cond">
+                <param name="ref_sel" value="cached"/>
+                <param name="ref" value="hg19"/>
+            </conditional>
+            <section name="oo">
+                <param name="vcf_type" value="decompressed"/>
+            </section>
+            <output name="out_indels" ftype="vcf">
+                <assert_contents>
+                    <has_n_lines n="41"/>
+                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
+                    <has_line_matching expression="demo20&#009;3664&#009;.+"/>
+                </assert_contents>
+            </output>
+            <output name="out_snvs" ftype="vcf">
+                <assert_contents>
+                    <has_n_lines n="52"/>
+                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
+                    <has_line_matching expression="demo20&#009;3537&#009;.+"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@HELP_STRELKA@
+
+The somatic calling model improves on the original Strelka method for liquid and late-stage tumor analysis by accounting for possible tumor cell contamination in the normal sample. A final empirical variant re-scoring step using random forest models trained on various call quality features has been added to both callers to further improve precision.
+
+**Input**
+
+@HELP_INPUT@
+
+**Output**
+
+*INDEL*
+
+All somatic indels inferred in the tumor sample in VCF format.
+
+*SNVS*
+
+All somatic SNVs inferred in the tumor sample in VCF format. 
+
+*Callability*
+
+The somatic variant caller can be configured with the option --outputCallableRegions, which will extend the somatic SNV quality model calculation to be applied as a test of somatic SNV callability at all positions in the genome. The outcome of this callability calculation will be summarized in a BED-formatted callability track. This BED track contains regions which are determined to be callable, indicating that there is sufficient evidence to either call a somatic SNV or assert the absence of a somatic SNV with a variant frequency of 10% or greater. Both somatic and non-somatic sites are determined to be 'callable' if the somatic or non-somatic quality threshold is at least 15.
+
+.. class:: infomark
+
+**References**
+
+@HELP_REFERENCES@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_test1.vcf	Wed Jan 27 14:47:52 2021 +0000
@@ -0,0 +1,95 @@
+##fileformat=VCFv4.1
+##fileDate=.
+##source=strelka
+##source_version=2.9.10
+##startTime=.
+##cmdline=./configureStrelkaGermlineWorkflow.py --bam input_sample_0.bam --bam input_sample_1.bam --bam input_sample_2.bam --disableSequenceErrorEstimation --config=/tmp/tmpmywmzdlj/job_working_directory/000/7/configs/tmpzw3187cr --referenceFasta input_ref.fasta --runDir results
+##reference=file:///tmp/tmpmywmzdlj/job_working_directory/000/7/working/input_ref.fasta
+##contig=<ID=demo20,length=5000>
+##content=strelka germline small-variant calls
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
+##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant multi-site block. Non-variant blocks are defined independently for each sample. All sites in such a block are constrained to be non-variant, have the same filter value, and have sample values {GQX,DP,DPF} in range [x,y], y <= max(x+3,(x*1.3)).">
+##INFO=<ID=SNVHPOL,Number=1,Type=Integer,Description="SNV contextual homopolymer length">
+##INFO=<ID=CIGAR,Number=A,Type=String,Description="CIGAR alignment for each alternate indel allele">
+##INFO=<ID=RU,Number=A,Type=String,Description="Smallest repeating sequence unit extended or contracted in the indel allele relative to the reference. RUs are not reported if longer than 20 bases">
+##INFO=<ID=REFREP,Number=A,Type=Integer,Description="Number of times RU is repeated in reference">
+##INFO=<ID=IDREP,Number=A,Type=Integer,Description="Number of times RU is repeated in indel allele">
+##INFO=<ID=MQ,Number=1,Type=Integer,Description="RMS of mapping quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Empirically calibrated genotype quality score for variant sites, otherwise minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">
+##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">
+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum filtered basecall depth used for site genotyping within a non-variant multi-site block">
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.51 or higher that read contains indicated allele vs all other intersecting indel alleles)">
+##FORMAT=<ID=ADF,Number=.,Type=Integer,Description="Allelic depths on the forward strand">
+##FORMAT=<ID=ADR,Number=.,Type=Integer,Description="Allelic depths on the reverse strand">
+##FORMAT=<ID=FT,Number=1,Type=String,Description="Sample filter, 'PASS' indicates that all filters have passed for this sample">
+##FORMAT=<ID=DPI,Number=1,Type=Integer,Description="Read depth associated with indel, taken from the site preceding the indel">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase set identifier">
+##FORMAT=<ID=SB,Number=1,Type=Float,Description="Sample site strand bias">
+##FILTER=<ID=IndelConflict,Description="Indel genotypes from two or more loci conflict in at least one sample">
+##FILTER=<ID=SiteConflict,Description="Site is filtered due to an overlapping indel call filter">
+##FILTER=<ID=LowGQX,Description="Locus GQX is below threshold or not present">
+##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.4">
+##FILTER=<ID=HighSNVSB,Description="Sample SNV strand bias value (SB) exceeds 10">
+##FILTER=<ID=HighDepth,Description="Locus depth is greater than 3x the mean chromosome depth">
+##Depth_demo20=79.00
+##FILTER=<ID=LowDepth,Description="Locus depth is below 3">
+##FILTER=<ID=NotGenotyped,Description="Locus contains forcedGT input alleles which could not be genotyped">
+##FILTER=<ID=PloidyConflict,Description="Genotype call from variant caller not consistent with chromosome ploidy">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12891
+demo20	1	.	T	.	.	LowGQX	END=899;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	.:.:0:0:0
+demo20	900	.	G	.	.	LowGQX;LowDepth	END=906;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:5:2:0:2
+demo20	907	.	T	.	.	LowGQX	END=931;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:7:4:0:3
+demo20	932	.	G	.	.	PASS	END=990;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:15:8:0:6
+demo20	991	.	C	G	75	PASS	SNVHPOL=2;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:71:9:9:1:5,4:1,4:4,0:2.8:PASS:72,0,123
+demo20	992	.	C	.	.	PASS	END=1084;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:24:14:0:9
+demo20	1085	.	T	.	.	PASS	.	GT:GQX:DP:DPF:MIN_DP	0/0:22:21:1:21
+demo20	1086	.	G	.	.	PASS	END=1148;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:54:22:1:19
+demo20	1148	.	C	CTAT	144	PASS	CIGAR=1M3I;RU=TAT;REFREP=1;IDREP=2;MQ=59	GT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL	0/1:114:27:20:11,8:5,3:6,5:PASS:111,0,147
+demo20	1149	.	T	.	.	PASS	END=1270;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:39:19:0:14
+demo20	1271	.	A	G	268	PASS	SNVHPOL=4;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:126:30:18:0:8,10:7,6:1,4:-18.6:PASS:169,0,123
+demo20	1272	.	G	.	.	PASS	END=1507;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:39:20:1:14
+demo20	1508	.	A	G	312	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:172:30:22:1:10,12:4,6:6,6:-21.5:PASS:191,0,169
+demo20	1509	.	G	.	.	PASS	END=1582;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:27:17:0:10
+demo20	1583	.	A	.	.	PASS	END=1669;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:21:15:0:8
+demo20	1670	.	C	.	.	PASS	END=1705;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:45:19:0:16
+demo20	1706	.	C	T	608	PASS	SNVHPOL=2;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	1/1:54:30:19:0:0,19:0,8:0,11:-35.5:PASS:342,57,0
+demo20	1707	.	G	.	.	PASS	END=1743;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:54:21:0:19
+demo20	1744	.	C	T	312	PASS	SNVHPOL=3;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:159:30:21:0:9,12:5,6:4,6:-20.7:PASS:191,0,156
+demo20	1745	.	G	.	.	PASS	END=1845;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:40:21:1:16
+demo20	1846	.	C	T	165	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:116:30:24:1:16,8:13,5:3,3:-12.4:PASS:117,0,224
+demo20	1847	.	G	.	.	PASS	END=1872;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:60:23:1:21
+demo20	1873	.	C	T	122	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/0:60:60:21:0:21,0:15,0:6,0:0.0:PASS:0,63,360
+demo20	1874	.	C	.	.	PASS	END=2073;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:45:21:0:16
+demo20	2074	.	T	C	246	PASS	SNVHPOL=2;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:156:30:24:1:13,11:4,8:9,3:-9.7:PASS:158,0,191
+demo20	2075	.	A	.	.	PASS	END=2198;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:63:31:1:22
+demo20	2199	.	G	A	297	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:181:30:28:1:14,14:12,5:2,9:-14.3:PASS:183,0,189
+demo20	2200	.	C	.	.	PASS	END=2300;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:60:30:1:26
+demo20	2301	.	G	T	369	PASS	SNVHPOL=2;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:161:22:29:1:12,17:6,11:6,6:-21.0:PASS:219,0,158
+demo20	2302	.	T	.	.	PASS	END=2454;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:72:30:0:25
+demo20	2455	.	T	C	889	PASS	SNVHPOL=2;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	1/1:90:30:31:2:0,31:0,14:0,17:-51.4:PASS:370,93,0
+demo20	2456	.	G	.	.	PASS	END=2511;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:87:36:2:30
+demo20	2512	.	A	G	531	PASS	SNVHPOL=2;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:151:22:39:1:13,26:9,11:4,15:-28.4:PASS:300,0,148
+demo20	2513	.	T	.	.	PASS	END=2639;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:81:37:1:28
+demo20	2640	.	C	T	751	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	1/1:81:30:28:0:0,28:0,14:0,14:-47.3:PASS:370,84,0
+demo20	2641	.	T	.	.	PASS	END=2659;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:60:23:0:21
+demo20	2660	.	G	T	567	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	1/1:60:30:21:1:0,21:0,11:0,10:-36.2:PASS:321,63,0
+demo20	2661	.	G	.	.	PASS	END=3037;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:41:27:1:18
+demo20	3038	.	C	.	.	PASS	END=3053;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:27:20:1:17
+demo20	3054	.	G	C	214	PASS	SNVHPOL=2;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:140:22:20:0:10,10:6,6:4,4:-12.8:PASS:142,0,153
+demo20	3055	.	C	.	.	PASS	END=3365;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:51:25:1:18
+demo20	3366	.	G	T	753	PASS	SNVHPOL=4;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	1/1:75:30:26:0:0,26:0,15:0,11:-42.1:PASS:370,78,0
+demo20	3367	.	G	.	.	PASS	END=3536;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:60:28:0:21
+demo20	3537	.	C	T	191	PASS	SNVHPOL=2;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:128:22:31:1:21,10:8,6:13,4:-11.3:PASS:130,0,256
+demo20	3538	.	T	.	.	PASS	END=3664;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:54:29:1:19
+demo20	3664	.	TC	T	572	PASS	CIGAR=1M1D;RU=C;REFREP=4;IDREP=3;MQ=59	GT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL	0/1:249:27:41:18,20:10,10:8,10:PASS:322,0,246
+demo20	3665	.	C	.	.	PASS	.	GT:GQX:DP:DPF:MIN_DP	0:249:19:0:19
+demo20	3666	.	C	.	.	PASS	END=4019;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:48:32:1:17
+demo20	4020	.	C	.	.	PASS	END=4059;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:24:12:1:9
+demo20	4060	.	C	.	.	PASS	END=4072;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:15:6:0:6
+demo20	4073	.	C	.	.	LowGQX	END=4091;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:7:4:0:3
+demo20	4092	.	T	.	.	LowGQX;LowDepth	END=4100;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	0/0:3:1:0:1
+demo20	4101	.	T	.	.	LowGQX	END=5000;BLOCKAVG_min30p3a	GT:GQX:DP:DPF:MIN_DP	.:.:0:0:0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg98.fa	Wed Jan 27 14:47:52 2021 +0000
@@ -0,0 +1,85 @@
+>demo20
+TGCCTGCTTTGTGCCAGGTTCTGGGTTGGGAGGTGCTGGGGACAGGGAGATGAGTCAGAC
+CTgggaagatttcgtagaggaggtgacagtaagctggaacctgtgtaatgagcaggagtt
+gcccagtggagaaggggaaggtgttccaggcggaagaaacagcatatgcaaaggccccaa
+ggtaggaagggccctagtgtgtgcagaggacagggcatggggaggggaactaaggctgag
+gccaaggagaggaaatgactcacaccgtgagagaggagttgagaccagggaggCTGCTTG
+CTGTATGATGCAACTGAGAGGGTAGAACAAGGCTGGCACAGAGAAGGTGGGGAAGGAAAA
+GGAGAGACGAAGCTGAGATTTCAGCAGGGCCAAGTCAGCCGTGAGTGCCAGGCTGCGGAG
+CCCAGATTCTCTGGGCTGagaaagagcactctgtccagagtgtggaggggggcctggagg
+ggatgagactcaaagctgggaggccagagaggaggctgctagagttttctgggagagagt
+tactggggcctgaacTCCAGTGAGGCActtcccatttcacagaccaggaaactgaggccc
+aagagtgaggcaactggcccaagggcacacagccaggtaaggcagaacCTTCCTTCTTTG
+GAGCTCCCTTGGGTGGGAAGCTGTGGGCTCCCCTTCATAGCCCACCCTTTTGGCTGTATC
+TCCCTGCTGCCCTGGGCATATGCTCCCTTATTCTGTCCTCCCTTGAAGCTGACTGCTGGC
+CTTAAAGGGCCCCTGTTTCTTCCCTCTGGACTACATGAGATCGGGATAGTATTAATGACT
+AAAACCTACCAGGGGTTTCTAGGCCTGGCCTGGAAAAAGTGACTGTTGACAAACAAAGTG
+CAGAGATTTAAAATCTCCTCTGTCTCAATTAGTGGAATCCAGTTAGAGGTTTGAACTATG
+ATTCTACCAGAATCCAATCTCTCTGGGTAGCCAGGTACCCAGGATGGGGCTAAAATTCCA
+GATGGATAGGTTGTCAACACCAGTGAGGAACCAGGAGGCTACCACAGGGTGGGACTTCCT
+GGTTTGGCTTTACATCTGAACTTCAGGGATCCCAGATCCTGGATCTGGGGCACTTGTCCA
+GAGAAGGCTATTGCTCTCATGTCACAAATGAGATGACTAAGACCCCCAAATCAATTCCAG
+TTCACTCACAAGCATTTCCTGGGCAGTGGAGACCCCTGCCCCACCTGTTGGCACCCCCTC
+AGCTCCCCACAGGGAATTGGAGTCCAGCCAAGCATGAGGAGGCTGTTGGCCTCAAGGTGA
+GCAGGGATGGGCTGAACCTCACCCAGTAAGGCAAGGACAGAGCCAGGGTTGGCCTGAGAT
+TTCCAGCCACCCTTTCCAAGGCTCTGCTCACTGTTATTTTCCTTAGTCTacaacaatatc
+aataacaataacaacaataatatcaacaCAAAAAGTGAAATACTCACCATGTATTGTAGT
+GTTTCCAAGGTGTCATGTAATGCCAGGGGTAGTTTGGGGCCAGGAAAAATATTTTTGGGA
+GGCATAAGAATAGGATGGACTGATATTGATATGCAACAGTTTGATCTGGTCCTCCTCTGA
+ATATCTGGGCTGGTAATTTGTACCAGTTTCCCTCGCTTTtgtgcataggcactgtgctga
+acccttttgtatgcatgaactcatccgattctctgtgcaagaactctatgagattattat
+tcccgttttacaagtaagaaaaattgaggctctgagaagttaaataaatgacttgtatga
+agttccagtgctaattaataagtgaaggagccagggcttgaactccggcccatctgactg
+caaagccagtgcccttcctcctacacATCTTCCTTTGGATTTCCACCACTGAGCATATGT
+AAGGTTGGGCAAACAGCCTGCATGAACAATCGCTGCTTTTATATCATGCACAAGTTTGGT
+CTTTTCTGCCTGTGCCCATGTCCTTGTAACCTTCTGAACCAAACTCCCCAGTGCCTGGGA
+ACATCAGAAGACTTGACTCTCTTCTCCTTTCACTAGCCTCCACCTGACTGGGACAAAGCC
+ATGCAGAGAGCTAGTGCTCCCTTCCTGCTAGACTTCAAGGATGCCTGGTTCCTGTGCCCC
+ATCTCCATCCAGCCCTCTCTTCTACACCTGGTGACTGAGCCTCTCCTTCAGTTTCTCCAT
+CCAGAAGGGGGTGAAAGCAACTGCCTAGTGTCCTTCCCTGGTGATAGTGGAGCACGGGGG
+ACAGGGTGTTTGGGCAAAAGGTGCCCAAGGTGAGGTGCCCAACACAACCTCCTACTCAGA
+CGATTGAGCAGACATTCAGCCTCATCTGGGGACTGGGTTACCAGTGGGTTAGTGGGAGGC
+ATTGGGCCCAGGCCCTGTGCCTTGGGCTGAGCTACAAGAAACCCACACATGGGATGAATT
+CAGGCAGCTCAAGGCCAGGTCTGTGCATACGCCAGTAATAGGTTCAGGTTAATCCACATG
+TCGCGATTTGGAAGGTGTCTACTTTTCCTACCTGTAGCTTCCTTAGGCCTCAAACCCCTA
+CTCAGCTGGGTCTGCCAGACTGAGATGGAGCCAGGGTGGAATCTTCTGCCCTCAAATCCC
+TGTCAGCCCTGGTGGTGCCGGGAGCGCCATCACTATTGGGTCTTAAAGGCTTTCCAGCCT
+TCCACTATGGATCCAGGAGCAGCAGTAGCCCCTTTGGTCTTTCTCTCTCATCAGGACATC
+TCCACTCATGGTTCCAGTCAGAGCTTCTTGAAAGTAGTCCCACTCTGTTCAAAAGCCTCC
+CATGCCCCCTGCTAGCCTCAGGCTAAGAGCCCTTCTCCTTCGCACAGCCTTTGGACCTGT
+CTATTTTTATGGTCTGGAAACTTCAGGAACACTGATAGCTGAGCATCTGGCACATATTAT
+GCACTCAAAAACCATGTATTTCTTTCTCCTTCCCTTTGGGACCCGTAAACCAGGGactgg
+acatttttgcaagagacaggagctgtgactgtgcattcactgctgtatccccagcaccca
+gcactgggcctgccacacagtaagtgcttagtaaatgtttgttgactgagtgaTTGCAGC
+TGGGGCCAAGAATGCCTTGGACACCCCAAGTAGGCCGTGTTAGAAGGAGTCAGTGAGAGC
+CTGGGAGCCCAGCCCAGAATTGTTTTCTTGACCCAGAAGCCAGGGCCAGGGATGCCTCTT
+CACTTCTGTTTGGCCCTCTTGGGCTTAGGGGCAGGGGCATTAAGATGAgagaggtccttg
+gggtgcattgagtctaacctcccagttcctcccattctacagccaggaaaactgaggccc
+agggaggggtaggacaagcccaagAAAGTGGGGCTGGAGAATGAATCCTGGAGACCAAAC
+TTGTCAGTCTGGATTGCTGTTGCCCTCATCCTCGCCTCCAAAACCCatgggtaaactgag
+gccaagagaggggcaggggcatgcccaaggtcacccatggaatcaggggacagggcctgg
+attgggattgttgttgacgccattattactgtttattgttgtttctatttcacAGATGGT
+CGGGGAGGGTGGGGCCCGCAATGGCTCCCAGGCGCCCAGAGACCCTGGAGGGTGAGCAGG
+GTCTCCCCTCCCCTCTCCTGCCCGTCTTTAGCCACACTGGGGCGCACACCGCTCACTCAC
+CCGGGGCCGAGGCGTTAGCCCTTTCTTGCACCAGGTGCCGCAACAACACCAGCAGCTGGC
+GCAGGCTGTGCTGCTGGTCCTGCAGGAGGCTGGAGTTGTGCCTGACACCGCGCAGGCCGC
+GCTCGATGTTGGTGAGGGCGGCGCTCTGGCGGCTCAGCGTGTTCAGCAGCTTCGCCTTCT
+TGCTGAGGATGCTGGCCAGCTCCTCCTGCTGCTTGGTCTCCAGGGCCTGCAACCGCTTCT
+CGAGCGCGCTGCGGGGTAGGGGGCGCACAGAGGTGAGCCTGGCATCCTCGCGAAGCACGC
+ACCCCCGCGCGCCTCCCCGGCCCTGGAGTCCCTGCAGCCCGACGATGAGACTCAAGTGTG
+GTGGAACGTCCTGTGCCCACTGTAGGCACAGATTGAGGAGGGGAGAAAAGAGATACCCGG
+CCCTGGAGTAATATAGATTGAGGTTTAGTGGAAGAAAGAGGTGGTGTGGGAGGGACACCA
+GCAACTGGGTAGCTATTATCAAATCCCAACTGTGCTTGCTTTTTGACCCAGCAGTCTACT
+CTCAGGAGTTATCCTCTAGGGAGCATGGTCAAGAATGGCCCTGGGGACTTGCTTGTAATA
+GAAAAACAAACTAACCAACCAACCAAAAAAGATATAGCCTAGATGCCCAAAAGCCAGGAC
+TGGCTGAATCCGTTGCGTTTTGGCGTCCCTGGAAATGTTCCGCAGTCATGAAGGAGGAGG
+CATTTGCACAGAATTGGAAAGATGCCCAGGACTTGGGGCACATCAAGCCTAACCCCATGT
+GTGGCAAGAGAAGAAAGAAAGTATTAATGTAAATAAAGAGAAATGGGGTGAACATATAGG
+AGAAGGCTGGAAAGACCGCAGTGGTGCCTGTGTTTGGGAAGAATATGAAAGAAATTCCCT
+CAAGTGCTGTGACTTCTGCAGAGCAGGTTTGAGTGGATGAAGATGGAGAGGAGGAAGATG
+GGGGCAGGATGGAGGGCCCAACTTTCACTTTTATTTTGTACAGGTTCCTGTTGTCCGATG
+ATATTATAATAATCAAGAGACATTTTTTGTAATGGATTTAGAAGCAAAGAGGAGTTTTTC
+AAAAGAAAGCCTTAGACTCAGCTCTTTCTTTTTGGACATTTTATCCTCCAGATTTACTca
+catgtgtgtgaaatgagatatggaaatgttactcatcgtatcactggttggattagtaaa
+aggctggaagcaacctcaatatccattaactggggactggaggaataaaagcagggacca
+catatggtggagcattataa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg98.fa.fai	Wed Jan 27 14:47:52 2021 +0000
@@ -0,0 +1,1 @@
+demo20	5000	8	60	61
Binary file test-data/indels_test2.vcf.gz has changed
Binary file test-data/sample1.bam has changed
Binary file test-data/sample1.cram has changed
Binary file test-data/sample2.bam has changed
Binary file test-data/sample2.cram has changed
Binary file test-data/sample3.bam has changed
Binary file test-data/snvs_test2.vcf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_fasta_indexes.loc	Wed Jan 27 14:47:52 2021 +0000
@@ -0,0 +1,1 @@
+hg19	hg19	hg19	${__HERE__}/hg98.fa
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/variants_test2.vcf	Wed Jan 27 14:47:52 2021 +0000
@@ -0,0 +1,62 @@
+##fileformat=VCFv4.1
+##fileDate=.
+##source=strelka
+##source_version=2.9.10
+##startTime=.
+##cmdline=./configureStrelkaGermlineWorkflow.py --bam input_sample_0.cram --bam input_sample_1.cram --disableSequenceErrorEstimation --config=/tmp/tmpmxn8erma/job_working_directory/000/4/configs/tmpx1j1a_0u --referenceFasta input_ref.fasta --runDir results
+##reference=file:///tmp/tmpmxn8erma/job_working_directory/000/4/working/input_ref.fasta
+##contig=<ID=demo20,length=5000>
+##content=strelka germline small-variant calls
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
+##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant multi-site block. Non-variant blocks are defined independently for each sample. All sites in such a block are constrained to be non-variant, have the same filter value, and have sample values {GQX,DP,DPF} in range [x,y], y <= max(x+3,(x*1.3)).">
+##INFO=<ID=SNVHPOL,Number=1,Type=Integer,Description="SNV contextual homopolymer length">
+##INFO=<ID=CIGAR,Number=A,Type=String,Description="CIGAR alignment for each alternate indel allele">
+##INFO=<ID=RU,Number=A,Type=String,Description="Smallest repeating sequence unit extended or contracted in the indel allele relative to the reference. RUs are not reported if longer than 20 bases">
+##INFO=<ID=REFREP,Number=A,Type=Integer,Description="Number of times RU is repeated in reference">
+##INFO=<ID=IDREP,Number=A,Type=Integer,Description="Number of times RU is repeated in indel allele">
+##INFO=<ID=MQ,Number=1,Type=Integer,Description="RMS of mapping quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Empirically calibrated genotype quality score for variant sites, otherwise minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">
+##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">
+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum filtered basecall depth used for site genotyping within a non-variant multi-site block">
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.51 or higher that read contains indicated allele vs all other intersecting indel alleles)">
+##FORMAT=<ID=ADF,Number=.,Type=Integer,Description="Allelic depths on the forward strand">
+##FORMAT=<ID=ADR,Number=.,Type=Integer,Description="Allelic depths on the reverse strand">
+##FORMAT=<ID=FT,Number=1,Type=String,Description="Sample filter, 'PASS' indicates that all filters have passed for this sample">
+##FORMAT=<ID=DPI,Number=1,Type=Integer,Description="Read depth associated with indel, taken from the site preceding the indel">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase set identifier">
+##FORMAT=<ID=SB,Number=1,Type=Float,Description="Sample site strand bias">
+##FILTER=<ID=IndelConflict,Description="Indel genotypes from two or more loci conflict in at least one sample">
+##FILTER=<ID=SiteConflict,Description="Site is filtered due to an overlapping indel call filter">
+##FILTER=<ID=LowGQX,Description="Locus GQX is below threshold or not present">
+##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.4">
+##FILTER=<ID=HighSNVSB,Description="Sample SNV strand bias value (SB) exceeds 10">
+##FILTER=<ID=HighDepth,Description="Locus depth is greater than 3x the mean chromosome depth">
+##Depth_demo20=53.00
+##FILTER=<ID=LowDepth,Description="Locus depth is below 3">
+##FILTER=<ID=NotGenotyped,Description="Locus contains forcedGT input alleles which could not be genotyped">
+##FILTER=<ID=PloidyConflict,Description="Genotype call from variant caller not consistent with chromosome ploidy">
+##FILTER=<ID=NoPassedVariantGTs,Description="No samples at this locus pass all sample filters and have a variant genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12891	NA12892
+demo20	991	.	C	G	38	PASS	SNVHPOL=2;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:71:9:9:1:5,4:1,4:4,0:2.8:PASS:72,0,123	0/0:33:33:12:0:12,0:9,0:3,0:0.0:PASS:0,36,258
+demo20	1148	.	C	CTAT	72	PASS	CIGAR=1M3I;RU=TAT;REFREP=1;IDREP=2;MQ=60	GT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL	0/1:114:27:20:11,8:5,3:6,5:PASS:111,0,147	0/0:69:69:28:24,0:12,0:12,0:PASS:0,72,443
+demo20	1271	.	A	G	134	PASS	SNVHPOL=4;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:126:30:18:0:8,10:7,6:1,4:-18.6:PASS:169,0,123	0/0:75:75:26:0:26,0:18,0:8,0:0.0:PASS:0,78,370
+demo20	1508	.	A	G	156	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:172:30:22:1:10,12:4,6:6,6:-21.5:PASS:191,0,169	0/0:108:108:37:2:37,0:19,0:18,0:0.0:PASS:0,111,370
+demo20	1706	.	C	T	304	PASS	SNVHPOL=2;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	1/1:54:30:19:0:0,19:0,8:0,11:-35.5:PASS:342,57,0	0/0:90:90:31:2:31,0:7,0:24,0:0.0:PASS:0,93,370
+demo20	1744	.	C	T	156	PASS	SNVHPOL=3;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:159:30:21:0:9,12:5,6:4,6:-20.7:PASS:191,0,156	0/0:78:78:27:0:27,0:6,0:21,0:0.0:PASS:0,81,370
+demo20	1846	.	C	T	83	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:116:30:24:1:16,8:13,5:3,3:-12.4:PASS:117,0,224	0/0:60:60:21:0:21,0:14,0:7,0:0.0:PASS:0,63,370
+demo20	1873	.	C	T	122	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/0:60:60:21:0:21,0:15,0:6,0:0.0:PASS:0,63,360	0/1:155:30:23:0:13,10:8,7:5,3:-14.9:PASS:157,0,195
+demo20	2074	.	T	C	123	PASS	SNVHPOL=2;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:156:30:24:1:13,11:4,8:9,3:-9.7:PASS:158,0,191	0/0:75:75:26:0:26,0:14,0:12,0:0.0:PASS:0,78,370
+demo20	2199	.	G	A	149	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:181:30:28:1:14,14:12,5:2,9:-14.3:PASS:183,0,189	0/0:96:96:33:0:33,0:17,0:16,0:0.0:PASS:0,99,370
+demo20	2301	.	G	T	184	PASS	SNVHPOL=2;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:161:22:29:1:12,17:6,11:6,6:-21.0:PASS:219,0,158	0/0:75:75:26:1:26,0:15,0:11,0:0.0:PASS:0,78,370
+demo20	2455	.	T	C	445	PASS	SNVHPOL=2;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	1/1:90:30:31:2:0,31:0,14:0,17:-51.4:PASS:370,93,0	0/0:78:78:27:1:27,0:11,0:16,0:0.0:PASS:0,81,370
+demo20	2512	.	A	G	266	PASS	SNVHPOL=2;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:151:22:39:1:13,26:9,11:4,15:-28.4:PASS:300,0,148	0/0:69:69:24:2:24,0:8,0:16,0:0.0:PASS:0,72,370
+demo20	2640	.	C	T	375	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	1/1:81:30:28:0:0,28:0,14:0,14:-47.3:PASS:370,84,0	0/0:102:102:35:0:35,0:17,0:18,0:0.0:PASS:0,105,370
+demo20	2660	.	G	T	283	PASS	SNVHPOL=3;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	1/1:60:30:21:1:0,21:0,11:0,10:-36.2:PASS:321,63,0	0/0:87:87:30:0:30,0:15,0:15,0:0.0:PASS:0,90,370
+demo20	3054	.	G	C	107	PASS	SNVHPOL=2;MQ=58	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:140:22:20:0:10,10:6,6:4,4:-12.8:PASS:142,0,153	0/0:24:24:9:2:9,0:4,0:5,0:0.0:PASS:0,27,201
+demo20	3366	.	G	T	377	PASS	SNVHPOL=4;MQ=60	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	1/1:75:30:26:0:0,26:0,15:0,11:-42.1:PASS:370,78,0	0/0:75:75:26:0:26,0:13,0:13,0:0.0:PASS:0,78,370
+demo20	3537	.	C	T	95	PASS	SNVHPOL=2;MQ=59	GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL	0/1:128:22:31:1:21,10:8,6:13,4:-11.3:PASS:130,0,256	0/0:84:84:29:1:29,0:10,0:19,0:0.0:PASS:0,87,370
+demo20	3664	.	TC	T	286	PASS	CIGAR=1M1D;RU=C;REFREP=4;IDREP=3;MQ=59	GT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL	0/1:249:27:41:18,20:10,10:8,10:PASS:322,0,246	0/0:70:70:25:25,0:10,0:15,0:PASS:0,73,493
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample	Wed Jan 27 14:47:52 2021 +0000
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon	hg18	Human (Homo sapiens): hg18 Canonical	/depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full	hg18	Human (Homo sapiens): hg18 Full	/depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/depot/data2/galaxy/hg19/sam/hg19full.fa
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Jan 27 14:47:52 2021 +0000
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Wed Jan 27 14:47:52 2021 +0000
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/test_fasta_indexes.loc"/>
+    </table>
+</tables>
\ No newline at end of file