comparison manta.xml @ 0:42ba283a0fe2 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/manta commit e6c5d87dcd848fc4910af968e73adc481c811d15"
author artbio
date Wed, 13 May 2020 15:15:07 -0400
parents
children d648e40c6da9
comparison
equal deleted inserted replaced
-1:000000000000 0:42ba283a0fe2
1 <tool id="manta" name="Manta" version="@WRAPPER_VERSION@">
2
3 <description>Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads.</description>
4
5 <macros>
6 <import>manta_macros.xml</import>
7 </macros>
8 <expand macro="requirements"/>
9 <expand macro="stdio"/>
10
11 <command detect_errors="exit_code"><![CDATA[
12 @VERSION@
13 @pipefail@
14 @set_reference_fasta_filename@
15
16 #import os
17 #import random
18 #set job_dir=os.getcwd()
19 #set run_dir = job_dir + '/MantaWorkflow_' + (' ' + str(random.randint(1,100000))).strip()
20 #set config_file = $__tool_directory__ + '/configManta.py.ini'
21 #set config_file_custom = $__tool_directory__ + '/customized.ini'
22 #set $input_normal = 'normal.bam'
23 #set $input_tumor = 'tumor.bam'
24
25 #if str( $bam_input.bam_input_selector ) == "not_tumor_bam":
26 ln -s '$bam_input.normal_bam_file' $input_normal &&
27 ln -s '$bam_input.normal_bam_file.metadata.bam_index' normal.bai &&
28 #else if str( $bam_input.bam_input_selector ) == "tumor_bam":
29 ln -s '$bam_input.normal_bam_file' $input_normal &&
30 ln -s '$bam_input.normal_bam_file.metadata.bam_index' normal.bai &&
31 ln -s '$bam_input.tumor_bam_file' $input_tumor &&
32 ln -s '$bam_input.tumor_bam_file.metadata.bam_index' tumor.bai &&
33 #end if
34
35 cp ${config_file} ${config_file_custom} &&
36
37 #if str( $set_configuration.set_configuration_switch ) == "Customized":
38 sed -i 's/minCandidateVariantSize = 8/minCandidateVariantSize = $set_configuration.minCandidateVariantSize/' ${config_file_custom} &&
39 sed -i 's/rnaMinCandidateVariantSize = 1000/rnaMinCandidateVariantSize = $set_configuration.rnaMinCandidateVariantSize/' ${config_file_custom} &&
40 sed -i 's/minEdgeObservations = 3/minEdgeObservations = $set_configuration.minEdgeObservations/' ${config_file_custom} &&
41 sed -i 's/graphNodeMaxEdgeCount = 10/graphNodeMaxEdgeCount = $set_configuration.graphNodeMaxEdgeCount/' ${config_file_custom} &&
42 sed -i 's/minCandidateSpanningCount = 3/minCandidateSpanningCount = $set_configuration.minCandidateSpanningCount/' ${config_file_custom} &&
43 sed -i 's/minScoredVariantSize = 50/minScoredVariantSize = $set_configuration.minScoredVariantSize/' ${config_file_custom} &&
44 sed -i 's/minDiploidVariantScore = 10/minDiploidVariantScore = $set_configuration.minDiploidVariantScore/' ${config_file_custom} &&
45 sed -i 's/minPassDiploidVariantScore = 20/minPassDiploidVariantScore = $set_configuration.minPassDiploidVariantScore/' ${config_file_custom} &&
46 sed -i 's/minPassDiploidGTScore = 15/minPassDiploidGTScore = $set_configuration.minPassDiploidGTScore/' ${config_file_custom} &&
47 sed -i 's/minSomaticScore = 10/minSomaticScore = $set_configuration.minSomaticScore/' ${config_file_custom} &&
48 sed -i 's/minPassSomaticScore = 30/minPassSomaticScore = $set_configuration.minPassSomaticScore/' ${config_file_custom} &&
49 sed -i 's/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInGermlineCallingModes/' ${config_file_custom} &&
50 sed -i 's/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInCancerCallingModes/' ${config_file_custom} &&
51 sed -i 's/useOverlapPairEvidence = 0/useOverlapPairEvidence = $set_configuration.useOverlapPairEvidence/' ${config_file_custom} &&
52 #end if
53
54 configManta.py
55 --referenceFasta='${reference_fasta_filename}'
56
57 #if str( $set_configuration.set_configuration_switch ) == "Custom_config_file":
58 #set config_file = $set_configuration.CustomConfigFile
59 #else if str( $set_configuration.set_configuration_switch ) == "Customized":
60 #set config_file = config_file_custom
61 #end if
62
63 --config=${config_file}
64
65 #if str( $bam_input.bam_input_selector ) == "not_tumor_bam":
66 --bam=$input_normal
67 #else if str( $bam_input.bam_input_selector ) == "tumor_bam":
68 --bam=$input_normal
69 --tumorBam=$input_tumor
70 #end if
71
72 --runDir='${run_dir}'
73 --scanSizeMb=${advanced.scanSizeMb}
74 --callMemMb=${advanced.callMemMb} &&
75
76 ln -s -f '${run_dir}/runWorkflow.py' '${run_manta_workflow}' &&
77 ln -s -f '${config_file}' '${set_conf_file}' &&
78 python2 '${run_dir}/runWorkflow.py' -m local -j 8 &&
79 ln -s -f '${run_dir}/results/variants/candidateSV.vcf.gz' '${out_vcf1}' &&
80 ln -s -f '${run_dir}/results/variants/diploidSV.vcf.gz' '${out_vcf2}' &&
81 ln -s -f '${run_dir}/results/variants/candidateSmallIndels.vcf.gz' '${out_vcf3}'
82
83 ]]></command>
84
85 <inputs>
86 <expand macro="reference_source_conditional" />
87
88 <conditional name="bam_input">
89 <param name="bam_input_selector" type="select" label="Just 'normal' BAM file or 'normal' + 'tumor' BAM files" help="Select between a single normal BAM file or a pair of normal / tumor BAM files">
90 <option value="not_tumor_bam">Normal</option>
91 <option value="tumor_bam">Normal + Tumor</option>
92 </param>
93
94 <when value="not_tumor_bam">
95 <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." />
96 </when>
97
98 <when value='tumor_bam'>
99 <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." />
100 <param name="tumor_bam_file" type="data" format="bam" label="select tumor BAM" help="Select the files you wish to send to Manta (tumor sample, it must be in BAM format)." />
101 </when>
102 </conditional>
103
104 <param name="additional_param" type="select" multiple="true" display="checkboxes" label="Additional outputs" help="Additional parameters.">
105 <option value="exome">Set options for WES input: turn off depth filters</option>
106 <option value="rna">Set options for RNA-Seq input. Must specify exactly one bam input file</option>
107 <option value="unstrandedRNA">Set if RNA-Seq input is unstranded: Allows splice-junctions on either strand</option>
108 </param>
109
110 <section name="advanced" title="Advanced options" expanded="false">
111
112 <param name="callMemMb" type="integer" value="8000" label="Set default task memory requirements" help="The maximum memory size to assign to tasks" />
113 <param name="scanSizeMb" type="integer" value="12" label="Set maximum sequence region size" help="The maximum sequence region size (in megabases) scanned by each task during SV Locus graph generation. (default: 12)" />
114 <param name="retainTempFiles" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Keep all temporary files" help="Click yes so all temporary files (for workflow debugging) will be kept."/>
115 <param name="generateEvidenceBam" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Generate a bam of supporting reads for all SVs" help="Click yes for generating a BAM of supporting reads for all SVs."/>
116
117 </section>
118
119 <!-- <expand macro="manta_configuration"/> -->
120
121 <conditional name="set_configuration">
122 <param name="set_configuration_switch" type="select" label="Do you want to change default configuration settings?">
123 <option value="Default_config_file">Default</option>
124 <option value="Custom_config_file">Upload a different config file</option>
125 <option value="Customized">Customize the options</option>
126 </param>
127 <when value="Default_config_file">
128 </when>
129 <when value="Custom_config_file">
130 <param format="ini" name="CustomConfigFile" type="data" label="config file"/>
131 </when>
132 <when value="Customized">
133 <param name="minCandidateVariantSize" type="integer" value="8" label="minCandidateVariantSize" help="Run discovery and candidate reporting for all SVs/indels at or above this size."/>
134 <param name="rnaMinCandidateVariantSize" type="integer" value="1000" label="rnaMinCandidateVariantSize" help="Separate option (to provide different default) used for runs in RNA-mode."/>
135 <param name="minEdgeObservations" type="integer" value="3" label="minEdgeObservations" help="Remove all edges from the graph unless they're supported by this many 'observations'."/>
136 <param name="graphNodeMaxEdgeCount" type="integer" value="10" label="graphNodeMaxEdgeCount" help="If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge."/>
137 <param name="minCandidateSpanningCount" type="integer" value="3" label="minCandidateSpanningCount" help="Run discovery and candidate reporting for all SVs/indels with at least this many spanning support observations."/>
138 <param name="minScoredVariantSize" type="integer" value="50" label="minScoredVariantSize" help="After candidate identification, only score and report SVs/indels at or above this size."/>
139 <param name="minDiploidVariantScore" type="integer" value="10" label="minDiploidVariantScore" help="Minimum VCF 'QUAL' score for a variant to be included in the diploid vcf."/>
140 <param name="minPassDiploidVariantScore" type="integer" value="20" label="minPassDiploidVariantScore" help="VCF 'QUAL' score below which a variant is marked as filtered in the diploid vcf."/>
141 <param name="minPassDiploidGTScore" type="integer" value="15" label="minPassDiploidGTScore" help="Minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf."/>
142 <param name="minSomaticScore" type="integer" value="10" label="minSomaticScore" help="Somatic quality scores below this level are not included in the somatic vcf."/>
143 <param name="minPassSomaticScore" type="integer" value="30" label="minPassSomaticScore" help="Somatic quality scores below this level are filtered in the somatic vcf."/>
144 <param name="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" type="integer" value="1" label="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" help="Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote locations with poor mapping quality. This feature can be enabled/disabled separately for germline and cancer calling below."/>
145 <param name="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" type="integer" value="0" label="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" help="Here 'CancerCallingModes' includes tumor-normal subtraction and tumor-only calling. 'GermlineCallingModes' includes all other calling modes."/>
146 <param name="useOverlapPairEvidence" type="integer" value="0" label="useOverlapPairEvidence" help="Set if an overlapping read pair will be considered as evidence. Set this value &lt;= 0 to skip overlapping read pairs."/>
147 </when>
148 </conditional>
149
150 <param name="runworkflow_file_check" type="boolean" label="output manta run_workflow file" checked="False" help="Show run_workflow file on history"/>
151 <param name="config_file_check" type="boolean" label="output conf file" checked="False" help="Show configuration file on history"/>
152 <param name="O1_check" type="boolean" label="snvs filtred" checked="False" help="Show filtred snvs"/>
153 <param name="O2_check" type="boolean" label="indels filtred" checked="False" help="Show filtred indels"/>
154 <param name="O3_check" type="boolean" label="all snvs" checked="False" help="Show snvs"/>
155
156 </inputs>
157
158 <outputs>
159
160 <data format="txt" name="run_manta_workflow" label="Parameters for running Manta">
161 <filter>runworkflow_file_check == True</filter>
162 </data>
163
164 <data format="tabular" name="set_conf_file" label="conf_file.ini">
165 <filter>config_file_check == True</filter>
166 </data>
167 <data format="vcf_bgzip" name="out_vcf1" label="${tool.name} on ${on_string} (Generating the candidateSV.vcf file)" from_work_dir="MantaWorkflow/results/variants/candidateSV.vcf.gz">
168 <filter>O1_check == True</filter>
169 </data>
170 <data format="vcf_bgzip" name="out_vcf2" label="${tool.name} on ${on_string} (Generating the diploidSV.vcf file)" from_work_dir="MantaWorkflow/results/variants/diploidSV.vcf.gz">
171 <filter>O2_check == True</filter>
172 </data>
173 <data format="vcf_bgzip" name="out_vcf3" label="${tool.name} on ${on_string} (Generating the candidateSmallIndels.vcf file)" from_work_dir="MantaWorkflow/results/variants/candidateSmallIndels.vcf.gz">
174 <filter>O3_check == True</filter>
175 </data>
176 </outputs>
177
178 <tests>
179 <test>
180 <conditional name="reference_source">
181 <param name="reference_source_selector" value="history"/>
182 <param name="ref_file" ftype="fasta" value="hg19_region.fa"/>
183 </conditional>
184
185 <conditional name="bam_input">
186 <param name="bam_input_selector" value="tumor_bam"/>
187 <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
188 <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
189 </conditional>
190
191 <conditional name="set_configuration">
192 <param name="set_configuration_switch" value="Default_config_file"/>
193 </conditional>
194 <param name="callMemMb" value="1000"/>
195 <param name="O1_check" value="True"/>
196 <output name="out_vcf1" file="candidateSV.vcf.gz" decompress="true" lines_diff="4"/>
197 </test>
198 <test>
199 <conditional name="reference_source">
200 <param name="reference_source_selector" value="history"/>
201 <param name="ref_file" ftype="fasta" value="hg19_region.fa"/>
202 </conditional>
203
204 <conditional name="bam_input">
205 <param name="bam_input_selector" value="tumor_bam"/>
206 <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
207 <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
208 </conditional>
209
210 <conditional name="set_configuration">
211 <param name="set_configuration_switch" value="Default_config_file"/>
212 </conditional>
213 <param name="callMemMb" value="1000"/>
214 <param name="O3_check" value="True"/>
215 <output name="out_vcf3" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/>
216 </test>
217 <test>
218 <conditional name="reference_source">
219 <param name="reference_source_selector" value="cached"/>
220 <param name="index" value="hg19"/>
221 </conditional>
222
223 <conditional name="bam_input">
224 <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/>
225 <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
226 <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
227 </conditional>
228
229 <conditional name="set_configuration">
230 <param name="set_configuration_switch" value="Default_config_file"/>
231 </conditional>
232 <param name="callMemMb" value="1000"/>
233 <param name="O3_check" value="True"/>
234 <output name="out_vcf3" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/>
235 </test>
236 </tests>
237
238 <help><![CDATA[
239 **Manta**
240 This script configures the Manta SV analysis pipeline.
241 You must specify a BAM or CRAM file for at least one sample.
242 Configuration will produce a workflow run script which
243 can execute the workflow on a single node or through
244 sge and resume any interrupted execution.
245
246 **Options**
247 --version show program's version number and exit
248 -h, --help show this help message and exit
249 --config=FILE provide a configuration file to override defaults in
250 global config file (/home/lpanunzi/Desktop/Hackaton_GC
251 C2019/manta_sv/manta/bin/configManta.py.ini)
252 --allHelp show all extended/hidden options
253 **Workflow options**
254 --bam=FILE, --normalBam=FILE
255 Normal sample BAM or CRAM file. May be specified more
256 than once, multiple inputs will be treated as each BAM
257 file representing a different sample. [optional] (no
258 default)
259 --tumorBam=FILE, --tumourBam=FILE
260 Tumor sample BAM or CRAM file. Only up to one tumor
261 bam file accepted. [optional] (no default)
262 --exome Set options for WES input: turn off depth filters
263 --rna Set options for RNA-Seq input. Must specify exactly
264 one bam input file
265 --unstrandedRNA Set if RNA-Seq input is unstranded: Allows splice-
266 junctions on either strand
267 --referenceFasta=FILE
268 samtools-indexed reference fasta file [required]
269 --runDir=DIR Name of directory to be created where all workflow
270 scripts and output will be written. Each analysis
271 requires a separate directory. (default:
272 MantaWorkflow)
273 --callRegions=FILE Optionally provide a bgzip-compressed/tabix-indexed
274 BED file containing the set of regions to call. No VCF
275 output will be provided outside of these regions. The
276 full genome will still be used to estimate statistics
277 from the input (such as expected fragment size
278 distribution). Only one BED file may be specified.
279 (default: call the entire genome)
280 **Extended options**
281 These options are either unlikely to be reset after initial site
282 configuration or only of interest for workflow development/debugging.
283 They will not be printed here if a default exists unless --allHelp is
284 specified
285 --existingAlignStatsFile=FILE
286 Pre-calculated alignment statistics file. Skips
287 alignment stats calculation.
288 --useExistingChromDepths
289 Use pre-calculated chromosome depths.
290 --candidateBins=candidateBins
291 Provide the total number of tasks which candidate
292 generation will be sub-divided into. (default: 256)
293 --retainTempFiles Keep all temporary files (for workflow debugging)
294 --generateEvidenceBam
295 Generate a bam of supporting reads for all SVs
296 --outputContig Output assembled contig sequences in VCF file
297 --scanSizeMb=INT Maximum sequence region size (in megabases) scanned by
298 each task during SV Locus graph generation. (default:
299 12)
300 --region=REGION Limit the analysis to a region of the genome for
301 debugging purposes. If this argument is provided
302 multiple times all specified regions will be analyzed
303 together. All regions must be non-overlapping to get a
304 meaningful result. Examples: '--region chr20' (whole
305 chromosome), '--region chr2:100-2000 --region
306 chr3:2500-3000' (two regions)'. If this option is
307 specified (one or more times) together with the
308 --callRegions BED file, then all region arguments will
309 be intersected with the callRegions BED track.
310 --callMemMb=INT Set default task memory requirement (in megabytes) for
311 common tasks. This may benefit an analysis of unusual
312 depth, chimera rate, etc.. 'Common' tasks refers to
313 most compute intensive scatter-phase tasks of graph
314 creation and candidate generation.
315
316 For further info see: https://github.com/Illumina/manta
317
318 ]]></help>
319
320 <citations>
321 <citation type="doi">10.1093/bioinformatics/btv710</citation>
322 </citations>
323
324 </tool>