comparison macros.xml @ 0:edbdbc64b397 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/strelka commit 2e445e7c519b2b77498cb74c03ca6ed12b22423a"
author iuc
date Wed, 27 Jan 2021 14:47:52 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:edbdbc64b397
1 <?xml version="1.0"?>
2 <macros>
3 <token name="@TOOL_VERSION@">2.9.10</token>
4 <token name="@GALAXY_VERSION@">galaxy0</token>
5 <token name="@DESCRIPTION@">small variant caller</token>
6 <xml name="requirements">
7 <requirements>
8 <requirement type="package" version="@TOOL_VERSION@">strelka</requirement>
9 <requirement type="package" version="1.9">samtools</requirement>
10 </requirements>
11 </xml>
12 <xml name="citations">
13 <citations>
14 <citation type="doi">10.1038/s41592-018-0051-x</citation>
15 </citations>
16 </xml>
17
18 <!--
19 command
20 -->
21
22 <token name="@INIT@"><![CDATA[
23 ##ln -s '$referenceFasta' './input_ref.fasta' &&
24 ##samtools faidx './input_ref.fasta' &&
25
26 ## Make all optional regions files available
27 ## Note: all of these must be tabixed
28 #set $reg_options = []
29 #for $i, $sites in enumerate($forced_regions):
30 #set $target_file = 'input_forcedgt_%d.vcf.gz' % $i
31 #if $sites.whitelist.ext == 'vcf':
32 bgzip -c '${sites.whitelist}' > $target_file &&
33 tabix -p vcf $target_file &&
34 #else:
35 ln -s '${sites.whitelist}' $target_file &&
36 ln -s '${sites.whitelist.metadata.tabix_index}' ${target_file}.tbi' &&
37 #end if
38 #if str($sites.use_whitelist_as) == 'indel_candidates':
39 #silent $reg_options.extend(['--indelCandidates', $target_file])
40 #else:
41 #silent $reg_options.extend(['--forcedGT', $target_file])
42 #end if
43 #end for
44 #if str($regions.restrict_to_region) == 'regions_from_file':
45 #silent $reg_options.append('--callRegions')
46 #set $target_file = 'input_callregions.bed.gz'
47 #if $regions.callRegions.ext == 'bed':
48 bgzip -c '$regions.callRegions' $target_file &&
49 tabix -p bed $target_file &&
50 else:
51 ln -s '$regions.callRegions' $target_file &&
52 ln -s '$regions.callRegions.tabix_index' ${target_file}.tbi &&
53 #end if
54 #silent $reg_options.append($target_file)
55 #end if
56 #set $region_spec = ' '.join($reg_options)
57 #if str($ref_cond.ref_sel) == 'history':
58 #set $reference_fasta_fn = 'input_ref.fasta'
59 ln -s '$ref_cond.ref' $reference_fasta_fn &&
60 samtools faidx $reference_fasta_fn &&
61 #else
62 #set $reference_fasta_fn = str($ref_cond.ref.fields.path)
63 #end if
64 ]]></token>
65 <token name="@CREATE@"><![CDATA[
66 --config='$config_file'
67 $optimization
68 #if str($expert_settings.evs.selector) == "disableEVS"
69 --disableEVS
70 #else
71 #if $expert_settings.evs.snvScoringModelFile
72 --snvScoringModelFile '$expert_settings.evs.snvScoringModelFile'
73 #end if
74 #if $expert_settings.evs.indelScoringModelFile
75 --indelScoringModelFile '$expert_settings.evs.indelScoringModelFile'
76 #end if
77 $expert_settings.evs.reportEVSFeatures
78 #end if
79 $region_spec
80 --referenceFasta '${reference_fasta_fn}'
81 --runDir results &&
82 ]]></token>
83 <token name="@RUN@"><![CDATA[
84 results/runWorkflow.py
85 -m local
86 -j \${GALAXY_SLOTS:-2}
87 -g \${GALAXY_MEMORY_MB:-8192}
88 ]]></token>
89
90 <!--
91 configfile - parser cannot handle indents
92 -->
93
94 <token name="@CONFIG@"><![CDATA[
95 maxIndelSize = $strelka.maxIndelSize
96 isWriteRealignedBam = 0 ## not inplemented
97 extraVariantCallerArguments = ## not implemented
98 ]]></token>
99
100 <!--
101 input
102 -->
103
104 <xml name="input_required" token_ref="normalBam">
105 <conditional name="ref_cond">
106 <param name="ref_sel" type="select" label="Choose the source for the reference genome" help="(--referenceFasta)">
107 <option value="cached">Locally cached</option>
108 <option value="history">History</option>
109 </param>
110 <when value="cached">
111 <param name="ref" type="select" label="Reference genome" help="">
112 <options from_data_table="fasta_indexes">
113 <filter type="data_meta" column="dbkey" key="dbkey" ref="@REF@"/>
114 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file."/>
115 </options>
116 </param>
117 </when>
118 <when value="history">
119 <param name="ref" type="data" format="fasta" label="Reference sequence" help="(--referenceFasta)"/>
120 </when>
121 </conditional>
122 </xml>
123 <xml name="regions_select">
124 <conditional name="regions">
125 <param name="restrict_to_region" type="select"
126 label="Call variants across">
127 <option value="genome">Whole reference</option>
128 <option value="regions_from_file">Regions specified in BED</option>
129 </param>
130 <when value="genome" />
131 <when value="regions_from_file">
132 <param argument="--callRegions" type="data" format="bed"
133 label="BED dataset with regions to examine"
134 help="Specify a set of regions to call. No VCF output will be provided outside of these regions. Note that the full genome may still be used to calculate certain input statistics (such as expected depth per chromosome)."/>
135 </when>
136 </conditional>
137 <repeat name="forced_regions" title="Whitelists of SNV/indel sites that should always be considered" default="0" min="0"
138 help="Add whitelisted SNVs/indels to list of considered/reported alleles explicitly">
139 <param name="whitelist" type="data" format="vcf"
140 label="Select file with candidate alleles"
141 help="" />
142 <param name="use_whitelist_as" type="select" display="radio"
143 label="Use this whitelist as"
144 help="An indel candidates list is used during the realignment and calling steps to increase the chances of detecting given indels if they exist in any sample. If the indel is NOT found despite these efforts, it will NOT be reported, however. With a list of 'SNV sites and/or indels of interest', on the other hand, indels in the list undergo that same treatment, but listed indels and SNPs are both guaranteed to be reported in the variants output, even if they are judged as not being present in any sample.">
145 <option value="indel_candidates">A list of indel candidates to be considered during realignment/calling (--indelCandidates)</option>
146 <option value="forced_gt_sites">A list of SNV sites/indels of interest that should always be reported (--forcedGT)</option>
147 </param>
148 </repeat>
149 </xml>
150 <xml name="calling_model">
151 <param name="optimization" type="select" label="Optimize variant calling for">
152 <option value="">Whole-genome sequencing (WGS) data (default mode)</option>
153 <option value="--exome">Whole-exome sequencing (WES) data (--exome)</option>
154 <yield />
155 </param>
156 </xml>
157 <xml name="calling_model_expert">
158 <section name="expert_settings" title="Expert configuration of calling model" expanded="false">
159 <yield />
160 <conditional name="evs">
161 <param name="selector" type="select" label="Configure empirical variant scoring (EVS) model">
162 <option value="disableEVS">Don't use EVS, just simple threshold-based filtering (--disableEVS)</option>
163 <option value="enableEVS" selected="true">Use EVS models (default)</option>
164 </param>
165 <when value="disableEVS" />
166 <when value="enableEVS">
167 <param argument="--snvScoringModelFile" type="data" format="json" optional="true"
168 label="Optional SNV scoring model to overwrite default model" />
169 <param argument="--indelScoringModelFile" type="data" format="json" optional="true"
170 label="Optional indel scoring model to overwrite default model" />
171 <param argument="--reportEVSFeatures" type="boolean" truevalue="--reportEVSFeatures" falsevalue=""
172 label="Report all empirical variant scoring features in VCF output"
173 help="WARNING: Do not use this feature with Strelka Germline and more than one input sample or the tool run will fail!" />
174 </when>
175 </conditional>
176 </section>
177 </xml>
178 <xml name="input_output">
179 <param name="vcf_type" type="boolean" truevalue="compressed" falsevalue="decompressed"
180 label="Generate compressed variants output (vcf.gz)"
181 help="Default is uncompressed vcf" />
182 </xml>
183 <xml name="input_strelka">
184 <param argument="maxIndelSize" name="maxIndelSize" type="integer" value="49" label="Set maximum reported indel size" help=""/>
185 </xml>
186
187 <!--
188 Help
189 -->
190
191 <token name="@HELP_INPUT@">
192 *Sequencing Data*
193
194 The input sequencing reads are expected to come from a paired-end sequencing assay. Any input other than paired-end reads are ignored by default except to double-check for putative somatic variant evidence in the normal sample during somatic variant analysis. Read lengths above ~400 bases are not tested.
195
196 *Alignment Files*
197
198 All input sequencing reads should be mapped by an external tool and provided as input in `BAM &lt;https://samtools.github.io/hts-specs/SAMv1.pdf&gt;`_. or `CRAM &lt;https://samtools.github.io/hts-specs/CRAMv3.pdf&gt;`_ format.
199
200 The following limitations apply to the input BAM/CRAM alignment records:
201
202 - Alignments cannot contain the "=" character in the SEQ field.
203 - RG (read group) tags are ignored -- each alignment file must represent one sample.
204 - Alignments with basecall quality values greater than 70 will trigger a runtime error (these are not supported on the assumption that the high basecall quality indicates an offset error)
205
206 *VCF Files*
207
208 Input `VCF &lt;http://samtools.github.io/hts-specs/VCFv4.1.pdf&gt;`_ files are accepted for a number of roles as described below. All input VCF records are checked for compatibility with the given reference genome, in additional to role-specific checks described below. If any VCF record's REF field is not compatible with the reference genome a runtime error will be triggered. 'Compatible with the reference genome' means that each VCF record's REF base either (1) matches the corresponding reference genome base or the VCF record's REF base is 'N' or the reference genome base is any ambiguous IUPAC base code (all ambiguous base codes are converted to 'N' while importing the reference).
209 </token>
210 <token name="@HELP_STRELKA@">
211 Strelka2 is a fast and accurate small variant caller optimized for analysis of germline variation in small cohorts (Strelka Germline) and somatic variation in tumor/normal sample pairs (Strelka Somatic).
212
213 Strelka accepts input read mappings from BAM or CRAM files, and optionally candidate and/or forced-call alleles from VCF. It reports all small variant predictions in VCF 4.1 format. Germline variant reporting uses the gVCF conventions to represent both variant and reference call confidence. For best somatic indel performance, Strelka is designed to be run with the Manta structural variant and indel caller, which provides additional indel candidates up to a given maxiumum indel size (by default this is 49). By design, Manta and Strelka run together with default settings provide complete coverage over all indel sizes (in additional to all SVs and SNVs) for clinical somatic and germline analysis scenarios.
214 </token>
215 <token name="@HELP_REFERENCES@"><![CDATA[
216 More information are available on `github <https://github.com/Illumina/strelka>`_.
217 ]]></token>
218 </macros>