comparison bcftools_mpileup.xml @ 0:12f7c5315f7f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 9d03fe38504a35d11660dadb44cb1beee32fcf4e
author iuc
date Thu, 13 Apr 2017 17:41:11 -0400
parents
children cf06b44624c7
comparison
equal deleted inserted replaced
-1:000000000000 0:12f7c5315f7f
1 <?xml version='1.0' encoding='utf-8'?>
2 <tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@VERSION@.0">
3 <description>Generate VCF or BCF containing genotype likelihoods for one or multiple alignment (BAM or CRAM) files</description>
4 <macros>
5 <token name="@EXECUTABLE@">mpileup</token>
6 <import>macros.xml</import>
7 <xml name="bam_flag_options">
8 <option value="1">Read is paired</option>
9 <option value="2">Read is mapped in a proper pair</option>
10 <option value="4">The read is unmapped</option>
11 <option value="8">The mate is unmapped</option>
12 <option value="16">Read strand</option>
13 <option value="32">Mate strand</option>
14 <option value="64">Read is the first in a pair</option>
15 <option value="128">Read is the second in a pair</option>
16 <option value="256">The alignment or this read is not primary</option>
17 <option value="512">The read fails platform/vendor quality checks</option>
18 <option value="1024">The read is a PCR or optical duplicate</option>
19 </xml>
20 </macros>
21 <expand macro="requirements">
22 <expand macro="samtools_requirement"/>
23 </expand>
24 <expand macro="version_command" />
25 <command detect_errors="aggressive"><![CDATA[
26 #import re
27 #set bam_list = []
28 #if $input.input_number == 'single':
29 #set $input_base = $re.sub('\W','_',$input.input_bam.display_name.replace('.bam','').replace('.cram',''))
30 #set $ext = 'bam'
31 #set $idx_ext = 'bai'
32 #if $input.input_bam.ext == 'cram':
33 #set $ext = 'cram'
34 #set $idx_ext = 'crai'
35 #end if
36 #set $input_name = $input_base + '.' + $ext
37 #silent $bam_list.append($input_name)
38 ln -s '${input.input_bam}' ${input_name} &&
39 #if $input.input_bam.ext == 'bam':
40 ln -s '${input.input_bam.metadata.bam_index}' ${input_name}.${idx_ext} &&
41 #else:
42 ln -s '${input.input_bam.metadata.cram_index}' ${input_name}.${idx_ext} &&
43 #end if
44 #else:
45 #for $bam_count, $input_bam in enumerate( $input.input_bams ):
46 #set $input_base = $re.sub('\W','_',$input_bam.display_name.replace('.bam','').replace('.cram',''))
47 #set $ext = 'bam'
48 #set $idx_ext = 'bai'
49 #if $input_bam.ext == 'cram':
50 #set $ext = 'cram'
51 #set $idx_ext = 'crai'
52 #end if
53 #set $input_name = $input_base + '.' + $ext
54 #silent $bam_list.append($input_name)
55 ln -s '${input_bam}' ${input_name} &&
56 ln -s '${input_bam.metadata.bam_index}' ${input_name}.${idx_ext} &&
57 #end for
58 #end if
59
60 #set $input_fa_ref = None
61 #if $reference_source.reference_source_selector == "history":
62 #set $input_fa_ref = 'ref.fa'
63 ln -s '${reference_source.ref_file}' $input_fa_ref &&
64 samtools faidx $input_fa_ref &&
65 #elif $reference_source.reference_source_selector == "cached":
66 #set $input_fa_ref = $reference_source.ref_file.fields.path
67 #end if
68
69 #set $section = $sec_restrict
70 @PREPARE_REGIONS_FILE@
71 @PREPARE_TARGETS_FILE@
72
73 bcftools @EXECUTABLE@
74
75 #if $input_fa_ref is not None:
76 --fasta-ref '$input_fa_ref'
77 #else:
78 --non-reference
79 #end if
80
81 ## Indel Calling section
82 #set $section = $sec_indel
83 #if $section.perform_indel_calling.perform_indel_calling_selector == 'do_not_perform_indel_calling':
84 --skip-indels
85 #elif $section.perform_indel_calling.perform_indel_calling_selector == 'perform_indel_calling':
86 -o "${section.perform_indel_calling.gap_open_sequencing_error_probability}"
87 -e "${section.perform_indel_calling.gap_extension_sequencing_error_probability}"
88 -h "${section.perform_indel_calling.coefficient_for_modeling_homopolymer_errors}"
89 -L "${section.perform_indel_calling.skip_indel_calling_above_sample_depth}"
90 -m "${section.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}"
91 --open-prob "${section.perform_indel_calling.open_seq_error_probability}"
92 -F "${section.perform_indel_calling.minimum_gapped_read_fraction}"
93 ${section.perform_indel_calling.gapped_read_per_sample}
94 #if len( $section.perform_indel_calling.platform_list_repeat ):
95 -P "${ ",".join( [ str( platform.platform_entry ) for platform in $section.perform_indel_calling.platform_list_repeat ] ) }"
96 #end if
97 #end if
98
99 ## Filter section
100 #set $section = $sec_filtering
101 #if str( $section.filter_by_flags.filter_flags ) == "filter":
102 #if $section.filter_by_flags.require_flags:
103 --rf ${sum([int(flag) for flag in str($section.filter_by_flags.require_flags).split(',')])}
104 #end if
105 #if $section.filter_by_flags.exclude_flags:
106 --ff ${sum([int(flag) for flag in str($section.filter_by_flags.exclude_flags).split(',')])}
107 #end if
108 #end if
109 -d "${section.max_reads_per_bam}"
110 ${section.skip_anomalous_read_pairs}
111 #if str( $section.quality.quality_settings ) == "adjust":
112 $section.quality.baq
113 -q "${section.quality.minimum_mapping_quality}"
114 -Q "${section.quality.minimum_base_quality}"
115 -C "${section.quality.coefficient_for_downgrading}"
116 #end if
117 #if str( $section.read_groups.read_groups_selector ) == "ignore_rg":
118 --ignore-RG
119 #elif str( $section.read_groups.read_groups_selector ) == "paste":
120 -G "${section.read_groups.rg_action}${read_groups_file}"
121 #elif str( $section.read_groups.read_groups_selector ) == "history"
122 -G "${section.read_groups.rg_action}${section.read_groups.read_groups}"
123 #end if
124
125 #set $section = $sec_output_options
126 #if $section.output_tags:
127 --annotate "$section.output_tags"
128 #end if
129 #if $section.gvcf:
130 --gvcf "$section.gvcf"
131 #end if
132
133 ## Subset section
134 #set $section = $sec_subset
135 @SAMPLES@
136
137 ## Restrict section
138 #set $section = $sec_restrict
139 @REGIONS@
140 @TARGETS@
141
142 @THREADS@
143
144 @OUTPUT_TYPE@
145
146 ## Primary Input/Outputs
147 #echo ' '.join($bam_list)#
148 > '$output_file'
149 #if str( $sec_filtering.read_groups.read_groups_selector ) == "paste":
150 && echo 'read-groups:'
151 && cat ${read_groups_file}
152 #end if
153 ]]>
154 </command>
155 <configfiles>
156 <configfile name="read_groups_file">
157 <![CDATA[#slurp
158 #set pasted_data = ''
159 #set $section = $sec_filtering
160 #if str( $section.read_groups.read_groups_selector ) == "paste":
161 #set pasted_data = '\t'.join( str( $section.read_groups.group_paste).split() )
162 #end if
163 #slurp
164 ${pasted_data}
165 ]]>
166 </configfile>
167 </configfiles>
168 <inputs>
169
170 <conditional name="input">
171 <param name="input_number" type="select" label="Alignment Inputs">
172 <option value="single">Single BAM/CRAM</option>
173 <option value="multiple">Multiple BAM/CRAMs</option>
174 </param>
175 <when value="single">
176 <param name="input_bam" type="data" format="bam,cram" label="Input BAM/CRAM" />
177 </when>
178 <when value="multiple">
179 <param name="input_bams" type="data" format="bam,cram" multiple="true" label="Input BAM/CRAMs" />
180 </when>
181 </conditional>
182
183 <conditional name="reference_source">
184 <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
185 <option value="cached">Locally cached</option>
186 <option value="history">History</option>
187 <option value="none">No Reference</option>
188 </param>
189 <when value="cached">
190 <param name="ref_file" type="select" label="Select reference genome">
191 <options from_data_table="fasta_indexes"/>
192 </param>
193 </when>
194 <when value="history">
195 <param name="ref_file" type="data" format="fasta" label="Genome Reference" />
196 </when>
197 <when value="none"/>
198 </conditional>
199
200 <section name="sec_indel" expanded="false" title="Indel Calling">
201 <conditional name="perform_indel_calling">
202 <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling">
203 <option selected="True" value="perform_indel_calling_def">Perform INDEL calling using default options</option>
204 <option value="perform_indel_calling">Perform INDEL calling and set advanced options</option>
205 <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option>
206 </param>
207 <when value="perform_indel_calling_def" />
208 <when value="perform_indel_calling">
209 <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" help="--open-prob; Reducing this value leads to more indel calls; default=40"/>
210 <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" help="--ext-prob; Reducing this value leads to longer indels. default=20"/>
211 <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." help="--tandem-qual; default=100"/>
212 <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" help="--max-idepth; default=250"/>
213 <param name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" label="Minimum gapped reads for indel candidates" help="--min-ireads; default=1"/>
214 <param name="open_seq_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" help="--open-prob; Reducing this value leads to more indel calls; default=40"/>
215 <param name="minimum_gapped_read_fraction" type="float" value="0.002" label="Minimum fraction of gapped reads" help="--gap-frac; default=0.002"/>
216 <param name="gapped_read_per_sample" type="boolean" truevalue="-p" falsevalue="" checked="False" label="Apply --min-ireads and --gap-frac values on a per-sample basis" help="--per-sample-mF; by default both options are applied to reads pooled from all samples"/>
217 <repeat name="platform_list_repeat" title="Platform for INDEL candidates">
218 <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" help="It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA"/>
219 </repeat>
220 </when>
221 <when value="do_not_perform_indel_calling" />
222 </conditional>
223 </section>
224
225 <section name="sec_filtering" expanded="false" title="Input Filtering Options">
226 <param name="max_reads_per_bam" type="integer" value="250" max="1024" min="1" label="Max reads per BAM" help="--max-depth; default=250"/>
227 <param name="ignore_overlaps" type="boolean" truevalue="-x" falsevalue="" checked="False" label="Disable read-pair overlap detection" help="--ignore-overlaps"/>
228 <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" help="--count-orphans"/>
229 <conditional name="filter_by_flags">
230 <param name="filter_flags" type="select" label="Set filter by flags">
231 <option selected="True" value="nofilter">Do not filter</option>
232 <option value="filter">Filter by flags to exclude or require</option>
233 </param>
234 <when value="filter">
235 <param name="require_flags" type="select" display="checkboxes" label="Require" multiple="True" help="--incl-flags">
236 <expand macro="bam_flag_options" />
237 </param>
238 <param name="exclude_flags" type="select" display="checkboxes" label="Exclude" multiple="True" help="--excl-flags">
239 <expand macro="bam_flag_options" />
240 </param>
241 </when>
242 <when value="nofilter" />
243 </conditional>
244 <conditional name="quality">
245 <param label="Quality Options" name="quality_settings" type="select">
246 <option value="none" selected="True">defaults</option>
247 <option value="adjust">Set base and mapping quality options</option>
248 </param>
249 <when value="adjust">
250 <param name="baq" type="select" optional="true" label="per-Base Alignment Quality">
251 <help>
252 --no-BAQ; BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments.
253 --redo-BAQ; ignore existing BQ tags
254 </help>
255 <option value="--no-BAQ">disable BAQ (per-Base Alignment Quality) (no-BAQ)</option>
256 <option value="--redo-BAQ">recalculate BAQ on the fly, ignore existing BQs (redo-BAQ)</option>
257 </param>
258 <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" help="--adjust-MQ; Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. default=0"/>
259 <param label="Minimum mapping quality for an alignment to be used" name="minimum_mapping_quality" type="integer" value="0" help="-min-MQ; default=0"/>
260 <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" help="--min-BQ; default=13"/>
261 </when>
262 <when value="none"/>
263 </conditional>
264
265 <conditional name="read_groups">
266 <param name="read_groups_selector" type="select" label="Select read groups to include or exclude" help="--read-groups">
267 <option value="no_limit" selected="True">use defaults</option>
268 <option value="history">From an uploaded text file</option>
269 <option value="paste">Paste a list of read groups</option>
270 <option value="ignore_rg">Ignore RG tags. Treat all reads in one alignment file as one sample. </option>
271 </param>
272 <when value="history">
273 <param name="read_groups" format="txt" label="Text file" type="data">
274 <validator type="dataset_ok_validator" />
275 </param>
276 <param name="rg_action" type="select" label="Include or Exclude these Read Groups">
277 <option value="" selected="true">Include</option>
278 <option value="^">Exclude</option>
279 </param>
280 </when>
281 <when value="paste">
282 <param name="group_paste" type="text" size="10x35" area="true" label="Read groups" help="Paste a list of read groups" />
283 <param name="rg_action" type="select" label="Include or Exclude these Read Groups">
284 <option value="" selected="true">Include</option>
285 <option value="^">Exclude</option>
286 </param>
287 </when>
288 <when value="ignore_rg" />
289 <when value="no_limit" />
290 </conditional>
291 </section>
292
293 <section name="sec_restrict" expanded="false" title="Restrict to">
294 <expand macro="macro_regions" />
295 <expand macro="macro_targets" />
296 </section>
297 <section name="sec_subset" expanded="false" title="Subset Options">
298 <expand macro="macro_samples" />
299 </section>
300
301 <section name="sec_output_options" expanded="false" title="Output options">
302 <param name="output_tags" optional="True" type="select" multiple="True" display="checkboxes" label="Optional tags to output" help="--output-tags">
303 <option value="DP">DP (Number of high-quality bases)</option>
304 <option value="AD">AD (Allelic depth)</option>
305 <option value="ADF">ADF (Allelic depth on the forward strand)</option>
306 <option value="ADR">ADR (Allelic depth on the reverse strand)</option>
307 <option value="INFO/AD">INFO/AD (Allelic depth)</option>
308 <option value="INFO/ADF">INFO/ADF (Allelic depth on the forward strand)</option>
309 <option value="INFO/ADR">INFO/ADR (Allelic depth on the reverse strand)</option>
310 <option value="SP">SP (Phred-scaled strand bias P-value)</option>
311 <option value="DV">DV (Number of high-quality non-reference bases)</option>
312 <option value="DP4">DP4 (Number of high-quality ref-forward, ref-reverse, alt-forward and alt-reverse bases)</option>
313 <option value="DPR">DRP (Number of high-quality bases for each observed allele)</option>
314 <option value="INFO/DPR">INFO/DPR (Number of high-quality bases for each observed allele)</option>
315 </param>
316 <param name="gvcf" type="text" value="" label="gVCF blocks of homozygous REF calls">
317 <help>
318 output gVCF blocks of homozygous REF calls, with depth (DP) ranges specified by the list of integers. For example, passing 5,15 will group sites into two types of gVCF blocks, the first with minimum per-sample DP from the interval [5,15) and the latter with minimum depth 15 or more. In this example, sites with minimum per-sample depth less than 5 will be printed as separate records, outside of gVCF blocks.
319 </help>
320 <validator type="regex" message="integers separated by commas">^(\d+(,\d+)*)?$</validator>
321 </param>
322 </section>
323
324 <expand macro="macro_select_output_type" />
325
326 </inputs>
327 <outputs>
328 <expand macro="macro_vcf_output" />
329 </outputs>
330 <tests>
331 <test>
332 <param name="input_number" value="single" />
333 <param name="input_bam" ftype="bam" value="mpileup.1.bam" />
334 <param name="reference_source_selector" value="history" />
335 <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
336 <param name="output_type" value="v" />
337 <output name="output_file">
338 <assert_contents>
339 <has_text text="mpileup" />
340 <has_text text="HG00100" />
341 <has_text_matching expression="17\t1\t.\tA\t...\t0\t.\tDP=5;" />
342 <has_text_matching expression="17\t100\t.\tC\t...\t0\t.\tDP=9;" />
343 </assert_contents>
344 </output>
345 </test>
346 <test>
347 <param name="input_number" value="single" />
348 <param name="input_bam" ftype="cram" value="mpileup.3.cram" />
349 <param name="reference_source_selector" value="history" />
350 <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
351 <param name="output_type" value="v" />
352 <output name="output_file">
353 <assert_contents>
354 <has_text text="mpileup" />
355 <has_text text="HG00102" />
356 <has_text_matching expression="17\t100\t.\tC\t...\t0\t.\tDP=5;" />
357 </assert_contents>
358 </output>
359 </test>
360 <test>
361 <param name="input_number" value="multiple" />
362 <param name="input_bams" ftype="bam" value="mpileup.1.bam,mpileup.2.bam,mpileup.3.bam" />
363 <param name="reference_source_selector" value="history" />
364 <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
365 <param name="regions_src" value="regions" />
366 <param name="regions" value="17:100-110" />
367 <param name="output_tags" value="DP,INFO/AD,DV" />
368 <param name="output_type" value="v" />
369 <output name="output_file">
370 <assert_contents>
371 <has_text text="mpileup" />
372 <has_text text="HG00100" />
373 <has_text text="HG00101" />
374 <has_text text="HG00102" />
375 <has_text text="ID=DP," />
376 <not_has_text text="17\t111" />
377 <has_text_matching expression="17\t100\t.\tC\t...\t0\t.\tDP=18;AD=17,0;" />
378 </assert_contents>
379 </output>
380 </test>
381 <test>
382 <param name="input_number" value="multiple" />
383 <param name="input_bams" ftype="bam" value="mpileup.1.bam,mpileup.2.bam,mpileup.3.bam" />
384 <param name="reference_source_selector" value="history" />
385 <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
386 <param name="regions_src" value="regions_file" />
387 <param name="regions_file" ftype="bed" value="mpileup.regions.bed" />
388 <param name="targets_src" value="targets" />
389 <param name="targets" value="17:100-104" />
390 <param name="output_tags" value="DP,INFO/AD,DV" />
391 <param name="output_type" value="v" />
392 <output name="output_file">
393 <assert_contents>
394 <has_text text="mpileup" />
395 <has_text text="HG00100" />
396 <has_text text="HG00101" />
397 <has_text text="HG00102" />
398 <has_text text="ID=DP," />
399 <not_has_text text="17\t105" />
400 <has_text_matching expression="17\t100\t.\tC\t...\t0\t.\tDP=18;AD=17,0;" />
401 </assert_contents>
402 </output>
403 </test>
404 <test>
405 <param name="input_number" value="multiple" />
406 <param name="input_bams" ftype="bam" value="mpileup.1.bam,mpileup.2.bam,mpileup.3.bam" />
407 <param name="reference_source_selector" value="history" />
408 <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
409 <param name="regions_src" value="regions" />
410 <param name="regions" value="17:1050-1060" />
411 <param name="filter_flags" value="filter" />
412 <param name="exclude_flags" value="4,16" />
413 <param name="output_type" value="v" />
414 <output name="output_file">
415 <assert_contents>
416 <has_text text="mpileup" />
417 <has_text text="HG00100" />
418 <has_text_matching expression="17\t1050\t.\tA\t...\t0\t.\tDP=12;" />
419 </assert_contents>
420 </output>
421 </test>
422 </tests>
423 <help><![CDATA[
424 =====================================
425 bcftools @EXECUTABLE@
426 =====================================
427
428 Haplotype aware consequence predictor which correctly handles combined variants such as MNPs split over multiple VCF records, SNPs separated by an intron (but adjacent in the spliced transcript) or nearby frame-shifting indels which in combination in fact are not frame-shifting.
429
430 The output VCF is annotated with INFO/BCSQ and FORMAT/BCSQ tag (configurable with the -c option). The latter is a bitmask of indexes to INFO/BCSQ, with interleaved haplotypes. See the usage examples below for using the %TBCSQ converter in query for extracting a more human readable form from this bitmask. The contruction of the bitmask limits the number of consequences that can be referenced in the FORMAT/BCSQ tags. By default this is 16, but if more are required, see the --ncsq option.
431
432 The program requires on input a VCF/BCF file, the reference genome in fasta format (--fasta-ref) and genomic features in the GFF3 format downloadable from the Ensembl website (--gff-annot), and outputs an annotated VCF/BCF file. Currently, only Ensembl GFF3 files are supported.
433
434 By default, the input VCF should be phased. If phase is unknown, or only partially known, the --phase option can be used to indicate how to handle unphased data. Alternatively, haplotype aware calling can be turned off with the --local-csq option.
435
436 If conflicting (overlapping) variants within one haplotype are detected, a warning will be emitted and predictions will be based on only the first variant in the analysis.
437
438 Symbolic alleles are not supported. They will remain unannotated in the output VCF and are ignored for the prediction analysis.
439
440
441 @REGIONS_HELP@
442 @TARGETS_HELP@
443
444 @BCFTOOLS_MANPAGE@#@EXECUTABLE@
445
446 @BCFTOOLS_WIKI@
447 ]]>
448 </help>
449 <expand macro="citations" />
450 </tool>