diff macros.xml @ 0:edbdbc64b397 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/strelka commit 2e445e7c519b2b77498cb74c03ca6ed12b22423a"
author iuc
date Wed, 27 Jan 2021 14:47:52 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Wed Jan 27 14:47:52 2021 +0000
@@ -0,0 +1,218 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">2.9.10</token>
+    <token name="@GALAXY_VERSION@">galaxy0</token>
+    <token name="@DESCRIPTION@">small variant caller</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">strelka</requirement>
+            <requirement type="package" version="1.9">samtools</requirement>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41592-018-0051-x</citation>
+        </citations>
+    </xml>
+
+    <!-- 
+        command
+    -->
+
+    <token name="@INIT@"><![CDATA[
+        ##ln -s '$referenceFasta' './input_ref.fasta' &&
+        ##samtools faidx './input_ref.fasta' &&
+
+        ## Make all optional regions files available
+        ## Note: all of these must be tabixed
+        #set $reg_options = []
+        #for $i, $sites in enumerate($forced_regions):
+            #set $target_file = 'input_forcedgt_%d.vcf.gz' % $i
+            #if $sites.whitelist.ext == 'vcf':
+                bgzip -c '${sites.whitelist}' > $target_file &&
+                tabix -p vcf $target_file &&
+            #else:
+                ln -s '${sites.whitelist}' $target_file &&
+                ln -s '${sites.whitelist.metadata.tabix_index}' ${target_file}.tbi' &&
+            #end if
+            #if str($sites.use_whitelist_as) == 'indel_candidates':
+                #silent $reg_options.extend(['--indelCandidates', $target_file])
+            #else:
+                #silent $reg_options.extend(['--forcedGT', $target_file])
+            #end if
+        #end for
+        #if str($regions.restrict_to_region) == 'regions_from_file':
+            #silent $reg_options.append('--callRegions')
+            #set $target_file = 'input_callregions.bed.gz'
+            #if $regions.callRegions.ext == 'bed':
+                bgzip -c '$regions.callRegions' $target_file &&
+                tabix -p bed $target_file &&
+            else:
+                ln -s '$regions.callRegions' $target_file &&
+                ln -s '$regions.callRegions.tabix_index' ${target_file}.tbi &&
+            #end if
+            #silent $reg_options.append($target_file)
+        #end if
+        #set $region_spec = ' '.join($reg_options)
+        #if str($ref_cond.ref_sel) == 'history':
+            #set $reference_fasta_fn = 'input_ref.fasta'
+            ln -s '$ref_cond.ref' $reference_fasta_fn &&
+            samtools faidx $reference_fasta_fn &&
+        #else
+            #set $reference_fasta_fn = str($ref_cond.ref.fields.path)
+        #end if
+    ]]></token>
+    <token name="@CREATE@"><![CDATA[
+        --config='$config_file'
+        $optimization
+        #if str($expert_settings.evs.selector) == "disableEVS"
+            --disableEVS
+        #else
+            #if $expert_settings.evs.snvScoringModelFile
+                --snvScoringModelFile '$expert_settings.evs.snvScoringModelFile'
+            #end if
+            #if $expert_settings.evs.indelScoringModelFile
+                --indelScoringModelFile '$expert_settings.evs.indelScoringModelFile'
+            #end if
+            $expert_settings.evs.reportEVSFeatures
+        #end if
+        $region_spec
+        --referenceFasta '${reference_fasta_fn}'
+        --runDir results &&
+    ]]></token>
+    <token name="@RUN@"><![CDATA[
+        results/runWorkflow.py
+            -m local
+            -j \${GALAXY_SLOTS:-2}
+            -g \${GALAXY_MEMORY_MB:-8192}
+    ]]></token>
+
+    <!-- 
+        configfile - parser cannot handle indents
+    -->
+
+    <token name="@CONFIG@"><![CDATA[
+maxIndelSize = $strelka.maxIndelSize
+isWriteRealignedBam = 0 ## not inplemented
+extraVariantCallerArguments = ## not implemented
+    ]]></token>
+
+    <!--
+        input 
+    -->
+    
+    <xml name="input_required" token_ref="normalBam">
+        <conditional name="ref_cond">
+            <param name="ref_sel" type="select" label="Choose the source for the reference genome" help="(--referenceFasta)">
+                <option value="cached">Locally cached</option>
+                <option value="history">History</option>
+            </param>
+            <when value="cached">
+                <param name="ref" type="select" label="Reference genome" help="">
+                    <options from_data_table="fasta_indexes">
+                        <filter type="data_meta" column="dbkey" key="dbkey" ref="@REF@"/>
+                        <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file."/>
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref" type="data" format="fasta" label="Reference sequence" help="(--referenceFasta)"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="regions_select">
+        <conditional name="regions">
+            <param name="restrict_to_region" type="select"
+            label="Call variants across">
+                <option value="genome">Whole reference</option>
+                <option value="regions_from_file">Regions specified in BED</option>
+            </param>
+            <when value="genome" />
+            <when value="regions_from_file">
+                <param argument="--callRegions" type="data" format="bed"
+                label="BED dataset with regions to examine"
+                help="Specify a set of regions to call. No VCF output will be provided outside of these regions. Note that the full genome may still be used to calculate certain input statistics (such as expected depth per chromosome)."/>
+            </when>
+        </conditional>
+        <repeat name="forced_regions" title="Whitelists of SNV/indel sites that should always be considered" default="0" min="0"
+        help="Add whitelisted SNVs/indels to list of considered/reported alleles explicitly">
+            <param name="whitelist" type="data" format="vcf"
+            label="Select file with candidate alleles"
+                help="" />
+            <param name="use_whitelist_as" type="select" display="radio"
+            label="Use this whitelist as"
+            help="An indel candidates list is used during the realignment and calling steps to increase the chances of detecting given indels if they exist in any sample. If the indel is NOT found despite these efforts, it will NOT be reported, however. With a list of 'SNV sites and/or indels of interest', on the other hand, indels in the list undergo that same treatment, but listed indels and SNPs are both guaranteed to be reported in the variants output, even if they are judged as not being present in any sample.">
+                <option value="indel_candidates">A list of indel candidates to be considered during realignment/calling (--indelCandidates)</option>
+                <option value="forced_gt_sites">A list of SNV sites/indels of interest that should always be reported (--forcedGT)</option>
+            </param>
+        </repeat>
+    </xml>
+    <xml name="calling_model">
+        <param name="optimization" type="select" label="Optimize variant calling for">
+            <option value="">Whole-genome sequencing (WGS) data (default mode)</option>
+            <option value="--exome">Whole-exome sequencing (WES) data (--exome)</option>
+            <yield />
+        </param>
+    </xml>
+    <xml name="calling_model_expert">
+        <section name="expert_settings" title="Expert configuration of calling model" expanded="false">
+            <yield />
+            <conditional name="evs">
+                <param name="selector" type="select" label="Configure empirical variant scoring (EVS) model">
+                    <option value="disableEVS">Don't use EVS, just simple threshold-based filtering (--disableEVS)</option>
+                    <option value="enableEVS" selected="true">Use EVS models (default)</option>
+                </param>
+                <when value="disableEVS" />
+                <when value="enableEVS">
+                    <param argument="--snvScoringModelFile" type="data" format="json" optional="true"
+                    label="Optional SNV scoring model to overwrite default model" />
+                    <param argument="--indelScoringModelFile" type="data" format="json" optional="true"
+                    label="Optional indel scoring model to overwrite default model" />
+                    <param argument="--reportEVSFeatures" type="boolean" truevalue="--reportEVSFeatures" falsevalue=""
+                    label="Report all empirical variant scoring features in VCF output"
+                    help="WARNING: Do not use this feature with Strelka Germline and more than one input sample or the tool run will fail!" />
+                </when>
+            </conditional>
+        </section>
+    </xml>
+    <xml name="input_output">
+        <param name="vcf_type" type="boolean" truevalue="compressed" falsevalue="decompressed"
+        label="Generate compressed variants output (vcf.gz)"
+        help="Default is uncompressed vcf" />
+    </xml>
+    <xml name="input_strelka">
+        <param argument="maxIndelSize" name="maxIndelSize" type="integer" value="49" label="Set maximum reported indel size" help=""/>
+    </xml>
+
+    <!--
+        Help
+    -->
+
+    <token name="@HELP_INPUT@">
+*Sequencing Data*
+
+The input sequencing reads are expected to come from a paired-end sequencing assay. Any input other than paired-end reads are ignored by default except to double-check for putative somatic variant evidence in the normal sample during somatic variant analysis. Read lengths above ~400 bases are not tested.
+
+*Alignment Files*
+
+All input sequencing reads should be mapped by an external tool and provided as input in `BAM &lt;https://samtools.github.io/hts-specs/SAMv1.pdf&gt;`_. or `CRAM &lt;https://samtools.github.io/hts-specs/CRAMv3.pdf&gt;`_ format.
+
+The following limitations apply to the input BAM/CRAM alignment records:
+
+- Alignments cannot contain the "=" character in the SEQ field.
+- RG (read group) tags are ignored -- each alignment file must represent one sample.
+- Alignments with basecall quality values greater than 70 will trigger a runtime error (these are not supported on the assumption that the high basecall quality indicates an offset error)
+
+*VCF Files*
+
+Input `VCF &lt;http://samtools.github.io/hts-specs/VCFv4.1.pdf&gt;`_ files are accepted for a number of roles as described below. All input VCF records are checked for compatibility with the given reference genome, in additional to role-specific checks described below. If any VCF record's REF field is not compatible with the reference genome a runtime error will be triggered. 'Compatible with the reference genome' means that each VCF record's REF base either (1) matches the corresponding reference genome base or the VCF record's REF base is 'N' or the reference genome base is any ambiguous IUPAC base code (all ambiguous base codes are converted to 'N' while importing the reference).
+    </token>
+    <token name="@HELP_STRELKA@">
+Strelka2 is a fast and accurate small variant caller optimized for analysis of germline variation in small cohorts (Strelka Germline) and somatic variation in tumor/normal sample pairs (Strelka Somatic).
+
+Strelka accepts input read mappings from BAM or CRAM files, and optionally candidate and/or forced-call alleles from VCF. It reports all small variant predictions in VCF 4.1 format. Germline variant reporting uses the gVCF conventions to represent both variant and reference call confidence. For best somatic indel performance, Strelka is designed to be run with the Manta structural variant and indel caller, which provides additional indel candidates up to a given maxiumum indel size (by default this is 49). By design, Manta and Strelka run together with default settings provide complete coverage over all indel sizes (in additional to all SVs and SNVs) for clinical somatic and germline analysis scenarios.
+    </token>
+    <token name="@HELP_REFERENCES@"><![CDATA[
+More information are available on `github <https://github.com/Illumina/strelka>`_.
+    ]]></token>
+</macros>