comparison samtools_mpileup.xml @ 9:fa7ad9b89f4a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_mpileup commit 831f76c1ac20172b902d9edf79aced718feb96e2
author iuc
date Mon, 03 Sep 2018 13:10:02 -0400
parents 583abf29fc8e
children 8da515fbc1bf
comparison
equal deleted inserted replaced
8:583abf29fc8e 9:fa7ad9b89f4a
1 <tool id="samtools_mpileup" name="MPileup" version="2.1.3"> 1 <tool id="samtools_mpileup" name="samtools mpileup" version="2.1.4">
2 <description>multi-way pileup of variants</description> 2 <description>multi-way pileup of variants</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <expand macro="stdio" /> 7 <expand macro="stdio" />
8 <expand macro="version_command" /> 8 <expand macro="version_command" />
9 <command><![CDATA[ 9 <command><![CDATA[
10 #for $bam_count, $input_bam in enumerate( $reference_source.input_bam ): 10
11 ln -s '${input_bam}' 'localbam_${bam_count}.bam' && 11 #set $input_bams = $reference_source.input_bam
12 ln -s '${input_bam.metadata.bam_index}' 'localbam_${bam_count}.bam.bai' && 12 @PREPARE_IDX_MULTIPLE@
13 #end for
14 13
15 #if $reference_source.reference_source_selector == "history": 14 #if $reference_source.reference_source_selector == "history":
16 ln -s '${reference_source.ref_file}' && 15 ln -s '${reference_source.ref_file}' &&
17 samtools faidx `basename '${reference_source.ref_file}'` && 16 samtools faidx `basename '${reference_source.ref_file}'` &&
18 #end if 17 #end if
21 #if $reference_source.reference_source_selector != "history": 20 #if $reference_source.reference_source_selector != "history":
22 -f '${reference_source.ref_file.fields.path}' 21 -f '${reference_source.ref_file.fields.path}'
23 #else: 22 #else:
24 -f '${reference_source.ref_file}' 23 -f '${reference_source.ref_file}'
25 #end if 24 #end if
26 #for $bam_count, $input_bam in enumerate( $reference_source.input_bam ): 25 #for $i in range(len( $input_bams )):
27 localbam_${bam_count}.bam 26 '${i}'
28 #end for 27 #end for
28
29 #if str( $advanced_options.advanced_options_selector ) == "advanced": 29 #if str( $advanced_options.advanced_options_selector ) == "advanced":
30 #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter": 30 #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter":
31 #if $advanced_options.filter_by_flags.require_flags: 31 #if $advanced_options.filter_by_flags.require_flags:
32 --rf ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.require_flags).split(',')])} 32 #set $filter = $advanced_options.filter_by_flags.require_flags
33 @FLAGS@
34 --rf $flags
33 #end if 35 #end if
34 #if $advanced_options.filter_by_flags.exclude_flags: 36 #if $advanced_options.filter_by_flags.exclude_flags:
35 --ff ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.exclude_flags).split(',')])} 37 #set $filter = $advanced_options.filter_by_flags.exclude_flags
38 @FLAGS@
39 --ff $flags
36 #end if 40 #end if
37 #end if 41 #end if
38 #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": 42 #if str($advanced_options.limit_by_region.limit_by_regions) == "limit":
39 -l '$pasted_regions' 43 #if str( $advanced_options.limit_by_region.region_paste ) != "None":
40 #elif str( $advanced_options.limit_by_region.limit_by_regions ) == "history" 44 -r '$advanced_options.limit_by_region.region_paste'
41 -l '$advanced_options.limit_by_region.bed_regions' 45 #end if
46 #if str( $advanced_options.limit_by_region.bed_regions ) != "None"
47 -l '$advanced_options.limit_by_region.bed_regions'
48 #end if
42 #end if 49 #end if
50
43 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": 51 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
44 -G '$excluded_read_groups' 52 -G '$excluded_read_groups'
45 #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history" 53 #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history"
46 -G '$advanced_options.exclude_read_group.read_groups' 54 -G '$advanced_options.exclude_read_group.read_groups'
47 #end if 55 #end if
56 ${advanced_options.ignore_overlaps}
48 ${advanced_options.skip_anomalous_read_pairs} 57 ${advanced_options.skip_anomalous_read_pairs}
49 ${advanced_options.disable_probabilistic_realignment} 58 ${advanced_options.disable_probabilistic_realignment}
50 -C ${advanced_options.coefficient_for_downgrading} 59 -C ${advanced_options.coefficient_for_downgrading}
51 -d ${advanced_options.max_reads_per_bam} 60 -d ${advanced_options.max_reads_per_bam}
52 ${advanced_options.extended_BAQ_computation} 61 ${advanced_options.extended_BAQ_computation}
53 -q ${advanced_options.minimum_mapping_quality} 62 -q ${advanced_options.minimum_mapping_quality}
54 -Q ${advanced_options.minimum_base_quality} 63 -Q ${advanced_options.minimum_base_quality}
55 #if str( $advanced_options.region_string ): 64 $advanced_options.qualities_illumina_onethree
56 -r '${advanced_options.region_string}' 65 #end if
57 #end if 66 #if str( $output_options_cond.output_options_selector ) == 'advanced':
58 #end if 67 ${output_options_cond.base_position_on_reads}
59 #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation': 68 ${output_options_cond.output_mapping_quality}
60 ${genotype_likelihood_computation_type.output_format} 69 ${output_options_cond.output_read_names}
61 ${genotype_likelihood_computation_type.compressed} 70 ${output_options_cond.output_all_pos}
62 71 #end if
63 #if str( $genotype_likelihood_computation_type.output_tags ) != "None": 72 --output '$output_file_pu'
64 --output-tags '${genotype_likelihood_computation_type.output_tags}'
65 #end if
66
67 #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling':
68 --open-prob ${genotype_likelihood_computation_type.perform_indel_calling.gap_open_sequencing_error_probability}
69 -e ${genotype_likelihood_computation_type.perform_indel_calling.gap_extension_sequencing_error_probability}
70 -h ${genotype_likelihood_computation_type.perform_indel_calling.coefficient_for_modeling_homopolymer_errors}
71 -L ${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}
72 -m ${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}
73 -F ${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_read_fraction}
74 ${genotype_likelihood_computation_type.perform_indel_calling.gapped_read_per_sample}
75 #if len( $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ):
76 -P '${ ",".join( str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ) }'
77 #end if
78 #elif str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'do_not_perform_indel_calling':
79 -I
80 #end if
81 #else:
82 ${genotype_likelihood_computation_type.base_position_on_reads}
83 ${genotype_likelihood_computation_type.output_mapping_quality}
84 #end if
85 --output '$output_mpileup'
86 ]]></command> 73 ]]></command>
87
88 <configfiles>
89 <configfile name="excluded_read_groups"><![CDATA[
90 #set pasted_data = ''
91 #if str( $advanced_options.advanced_options_selector ) == "advanced":
92 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
93 #set pasted_data = '\t'.join( str( $advanced_options.exclude_read_group['read_groups'] ).split() )
94 #end if
95 #end if
96 ${pasted_data}
97 ]]></configfile>
98 <configfile name="pasted_regions"><![CDATA[
99 #set pasted_data = ''
100 #if str( $advanced_options.advanced_options_selector ) == "advanced":
101 #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste":
102 #set pasted_data = '\t'.join( str( $advanced_options.limit_by_region['region_paste'] ).split() )
103 #end if
104 #end if
105 ${pasted_data}
106 ]]></configfile>
107 </configfiles>
108
109 <inputs> 74 <inputs>
110 <conditional name="reference_source"> 75 <conditional name="reference_source">
111 <param name="reference_source_selector" type="select" label="Choose the source for the reference genome"> 76 <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
112 <option value="cached">Use a built-in genome</option> 77 <option value="cached">Use a built-in genome</option>
113 <option value="history">Use a genome from the history</option> 78 <option value="history">Use a genome from the history</option>
126 <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" /> 91 <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" />
127 </param> 92 </param>
128 <param name="ref_file" type="data" format="fasta" label="Using reference genome" /> 93 <param name="ref_file" type="data" format="fasta" label="Using reference genome" />
129 </when> 94 </when>
130 </conditional> 95 </conditional>
131 <conditional name="genotype_likelihood_computation_type">
132 <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation">
133 <option selected="True" value="perform_genotype_likelihood_computation">Perform genotype likelihood computation (--VCF, --BCF options)</option>
134 <option value="do_not_perform_genotype_likelihood_computation">Do not perform genotype likelihood computation (output pileup)</option>
135 </param>
136 <when value="perform_genotype_likelihood_computation">
137 <param name="output_format" type="select" label="Choose the output format">
138 <option value="--VCF">VCF</option>
139 <option value="--BCF">BCF</option>
140 </param>
141 <param name="compressed" argument="--uncompressed" type="boolean" truevalue="" falsevalue="--uncompressed" checked="False" label="Compress output" />
142 <param name="output_tags" argument="--output-tags" type="select" optional="True" multiple="True" display="checkboxes" label="Optional tags to output">
143 <option value="DP">DP (Number of high-quality bases)</option>
144 <option value="DPR">DRP (Number of high-quality bases for each observed allele)</option>
145 <option value="DV">DV (Number of high-quality non-reference bases)</option>
146 <option value="DP4">DP4 (Number of high-quality ref-forward, ref-reverse, alt-forward and alt-reverse bases)</option>
147 <option value="INFO/DPR">INFO/DPR (Number of high-quality bases for each observed allele)</option>
148 <option value="SP">SP (Phred-scaled strand bias P-value)</option>
149 </param>
150 <conditional name="perform_indel_calling">
151 <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling">
152 <option selected="True" value="perform_indel_calling_def">Perform INDEL calling using default options</option>
153 <option value="perform_indel_calling">Perform INDEL calling and set advanced options</option>
154 <option value="do_not_perform_indel_calling">Do not perform INDEL calling (-I)</option>
155 </param>
156 <when value="perform_indel_calling_def" />
157 <when value="perform_indel_calling">
158 <param name="gap_open_sequencing_error_probability" argument="--open-prob" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" help="Reducing this value leads to more indel calls" />
159 <param name="gap_extension_sequencing_error_probability" argument="--ext-prob" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" help="Reducing this value leads to longer indels" />
160 <param name="coefficient_for_modeling_homopolymer_errors" argument="--tandem-qual" type="integer" value="100" label="Coefficient for modeling homopolymer errors" />
161 <param name="skip_indel_calling_above_sample_depth" argument="--max-idepth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" />
162 <param name="minimum_gapped_reads_for_indel_candidates" argument="--min-ireads" type="integer" value="1" label="Minimum gapped reads for indel candidates" />
163 <param name="minimum_gapped_read_fraction" argument="--gap-frac" type="float" value="0.002" label="Minimum fraction of gapped reads" />
164 <param name="gapped_read_per_sample" argument="--per-sample-mF" type="boolean" truevalue="-p" falsevalue="" checked="False" label="Apply --min-ireads and --gap-frac values on a per-sample basis" help="By default both options are applied to reads pooled from all samples"/>
165 <repeat name="platform_list_repeat" title="Platform for INDEL candidates" help="--platforms">
166 <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" help="It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA"/>
167 </repeat>
168 </when>
169 <when value="do_not_perform_indel_calling" />
170 </conditional>
171
172 </when>
173 <when value="do_not_perform_genotype_likelihood_computation">
174 <param name="base_position_on_reads" argument="--output-BP" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Output base positions on reads" />
175 <param name="output_mapping_quality" argument="--output-MQ" type="boolean" truevalue="-s" falsevalue="" checked="False" label="Output mapping quality" />
176 </when>
177 </conditional>
178 <conditional name="advanced_options"> 96 <conditional name="advanced_options">
179 <param name="advanced_options_selector" type="select" label="Set advanced options"> 97 <param name="advanced_options_selector" type="select" label="Set advanced options">
180 <option selected="True" value="basic">Basic</option> 98 <option selected="True" value="default">Basic</option>
181 <option value="advanced">Advanced</option> 99 <option value="advanced">Advanced</option>
182 </param> 100 </param>
183 <when value="basic" /> 101 <when value="default" />
184 <when value="advanced"> 102 <when value="advanced">
185 <conditional name="filter_by_flags"> 103 <conditional name="filter_by_flags">
186 <param name="filter_flags" type="select" label="Set filter by flags"> 104 <param name="filter_flags" type="select" label="Set filter by flags">
187 <option selected="True" value="nofilter">Do not filter</option> 105 <option selected="True" value="nofilter">Do not filter</option>
188 <option value="filter">Filter by flags to exclude or require</option> 106 <option value="filter">Filter by flags to exclude or require</option>
189 </param> 107 </param>
190 <when value="filter"> 108 <when value="filter">
191 <param name="require_flags" argument="--incl-flags" type="select" multiple="True" display="checkboxes" label="Require"> 109 <param name="require_flags" argument="--rf/--incl-flags" type="select" multiple="True" display="checkboxes" label="Require">
192 <option value="1">Read is paired</option> 110 <expand macro="flag_options" />
193 <option value="2">Read is mapped in a proper pair</option> 111 </param>
194 <option value="4">The read is unmapped</option> 112 <param name="exclude_flags" argument="--ff/--excl-flags" type="select" multiple="True" display="checkboxes" label="Exclude">
195 <option value="8">The mate is unmapped</option> 113 <expand macro="flag_options" />
196 <option value="16">Read strand</option>
197 <option value="32">Mate strand</option>
198 <option value="64">Read is the first in a pair</option>
199 <option value="128">Read is the second in a pair</option>
200 <option value="256">The alignment or this read is not primary</option>
201 <option value="512">The read fails platform/vendor quality checks</option>
202 <option value="1024">The read is a PCR or optical duplicate</option>
203 </param>
204 <param name="exclude_flags" argument="--excl-flags" type="select" multiple="True" display="checkboxes" label="Exclude">
205 <option value="1">Read is paired</option>
206 <option value="2">Read is mapped in a proper pair</option>
207 <option value="4">The read is unmapped</option>
208 <option value="8">The mate is unmapped</option>
209 <option value="16">Read strand</option>
210 <option value="32">Mate strand</option>
211 <option value="64">Read is the first in a pair</option>
212 <option value="128">Read is the second in a pair</option>
213 <option value="256">The alignment or this read is not primary</option>
214 <option value="512">The read fails platform/vendor quality checks</option>
215 <option value="1024">The read is a PCR or optical duplicate</option>
216 </param> 114 </param>
217 </when> 115 </when>
218 <when value="nofilter" /> 116 <when value="nofilter" />
219 </conditional> 117 </conditional>
220 <conditional name="limit_by_region"> 118 <conditional name="limit_by_region">
221 <param name="limit_by_regions" argument="--positions" type="select" label="Select regions to call"> 119 <param name="limit_by_regions" type="select" label="Select regions to call">
222 <option selected="True" value="no_limit">Do not limit</option> 120 <option selected="True" value="no_limit">Do not limit</option>
223 <option value="history">From a BED file</option> 121 <option value="limit">Specify regions</option>
224 <option value="paste">Paste a list of regions or BED</option>
225 </param> 122 </param>
226 <when value="history"> 123 <when value="limit">
227 <param name="bed_regions" type="data" format="bed" label="BED file"> 124 <param name="bed_regions" argument="-l/--positions" type="data" format="bed" optional="true" label="skip unlisted positions (chr pos) or regions">
228 <validator type="dataset_ok_validator" /> 125 <validator type="dataset_ok_validator" />
229 </param> 126 </param>
230 </when> 127 <param name="region_paste" argument="-r/--region" type="text" optional="true" label="region in which pileup is generated" help="Format CHR:FROM-TO, e.g. 17:100-150. If used in conjunction with -l then considers the intersection of the two requests." />
231 <when value="paste">
232 <param name="region_paste" type="text" area="true" size="10x35" label="Regions" help="Paste a list of regions in BED format or as a list of chromosomes and positions" />
233 </when> 128 </when>
234 <when value="no_limit" /> 129 <when value="no_limit" />
235 </conditional> 130 </conditional>
131
132
236 <conditional name="exclude_read_group"> 133 <conditional name="exclude_read_group">
237 <param name="exclude_read_groups" argument="--exclude-RG" type="select" label="Select read groups to exclude"> 134 <param name="exclude_read_groups" argument="-R/--exclude-RG" type="select" label="Select read groups to exclude">
238 <option selected="True" value="no_limit">Do not exclude</option> 135 <option selected="True" value="no_limit">Do not exclude</option>
239 <option value="history">From a text file</option> 136 <option value="history">From a text file</option>
240 <option value="paste">Paste a list of read groups</option> 137 <option value="paste">Paste a list of read groups</option>
241 </param> 138 </param>
242 <when value="history"> 139 <when value="history">
247 <when value="paste"> 144 <when value="paste">
248 <param name="group_paste" type="text" area="true" size="10x35" label="Read groups" help="Paste a list of read groups" /> 145 <param name="group_paste" type="text" area="true" size="10x35" label="Read groups" help="Paste a list of read groups" />
249 </when> 146 </when>
250 <when value="no_limit" /> 147 <when value="no_limit" />
251 </conditional> 148 </conditional>
252 <param name="ignore_overlaps" argument="--ignore-overlaps" type="boolean" truevalue="-x" falsevalue="" checked="False" label="Disable read-pair overlap detection" /> 149 <param name="ignore_overlaps" argument="-x/--ignore-overlaps" type="boolean" truevalue="-x" falsevalue="" checked="False" label="Disable read-pair overlap detection" />
253 <param name="skip_anomalous_read_pairs" argument="--count-orphans" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> 150 <param name="skip_anomalous_read_pairs" argument="-A/--count-orphans" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not discard anomalous read pairs" />
254 <param name="disable_probabilistic_realignment" argument="--no-BAQ" type="boolean" truevalue="-B" falsevalue="" checked="False" label="Disable probabilistic realignment for the computation of base alignment quality (BAQ)" help="BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments" /> 151 <param name="disable_probabilistic_realignment" argument="-B/--no-BAQ" type="boolean" truevalue="-B" falsevalue="" checked="False" label="Disable BAQ (per-Base Alignment Quality), see below" />
255 <param name="coefficient_for_downgrading" argument="--adjust-MQ" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" help="Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50" /> 152 <param name="coefficient_for_downgrading" argument="-C/--adjust-MQ" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" help="Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50" />
256 <param name="max_reads_per_bam" argument="--max-depth" type="integer" max="1024" min="1" value="250" label="Max reads per BAM" /> 153 <param name="max_reads_per_bam" argument="-d/--max-depth" type="integer" min="0" value="8000" label="max per-file depth; avoids excessive memory usage" />
257 <param name="extended_BAQ_computation" argument="--redo-BAQ" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Redo BAQ computation" help="Ignore existing BQ tags" /> 154 <param name="extended_BAQ_computation" argument="-E/--redo-BAQ" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Recalculate BAQ on the fly" help="Ignore existing BQ tags" />
258 <param name="minimum_mapping_quality" argument="--min-MQ" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> 155 <param name="minimum_mapping_quality" argument="-q/--min-MQ" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" />
259 <param name="minimum_base_quality" argument="--min-BQ" type="integer" value="13" label="Minimum base quality for a base to be considered" /> 156 <param name="minimum_base_quality" argument="-Q/--min-BQ" type="integer" value="13" label="Minimum base quality for a base to be considered" />
260 <param name="region_string" argument="--region" type="text" value="" label="Only generate pileup in region" help="If used in conjunction with --positions, then considers the intersection of the two requests. Defaults to all sites" /> 157 <param name="qualities_illumina_onethree" argument="-6/--illumina1.3+" type="boolean" truevalue="-6" falsevalue="" checked="False" label="quality is in the Illumina-1.3+ encoding"/>
158 </when>
159 </conditional>
160 <conditional name="output_options_cond">
161 <param name="output_options_selector" type="select" label="Output options">
162 <option selected="True" value="default">Default</option>
163 <option value="advanced">Advanced</option>
164 </param>
165 <when value="default"/>
166 <when value="advanced">
167 <param name="base_position_on_reads" argument="-O/--output-BP" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Output base positions on reads" />
168 <param name="output_mapping_quality" argument="-s/--output-MQ" type="boolean" truevalue="-s" falsevalue="" checked="False" label="Output mapping quality" />
169 <param name="output_read_names" argument="--output-QNAME" type="boolean" truevalue="--output-QNAME" falsevalue="" checked="False" label="Output an extra column containing comma-separated read names. (--output-QNAME)" />
170 <param name="output_all_pos" argument="-a" type="select" label="Output absolutely all positions" help="Output all positions, including those with zero depth. (-a) Output absolutely all positions, including unused reference sequences (-aa). Note that when used in conjunction with a BED file the -a option may sometimes operate as if -aa was specified if the reference sequence has coverage outside of the region specified in the BED file.">
171 <option selected="True" value="">No</option>
172 <option value="-a">all (including those with zero depth)</option>
173 <option value="-aa">absolutely all (including unused reference sequences)</option>
174 </param>
261 </when> 175 </when>
262 </conditional> 176 </conditional>
263 </inputs> 177 </inputs>
264 <outputs> 178 <outputs>
265 <data name="output_mpileup" format="pileup" label="${tool.name} on ${on_string}"> 179 <data name="output_file_pu" format="pileup" label="${tool.name} on ${on_string} pileup"/>
266 <change_format>
267 <when format="bcf" input="genotype_likelihood_computation_type.output_format" value="--BCF" />
268 <when format="vcf" input="genotype_likelihood_computation_type.output_format" value="--VCF" />
269 </change_format>
270 </data>
271 </outputs> 180 </outputs>
272 <tests> 181 <tests>
182 <!-- samtools test https://github.com/samtools/samtools/blob/4651d25f2b14cd68ffb0915a74b0c1b529b8cfa1/test/test.pl#L757 -->
183 <test>
184 <conditional name="reference_source">
185 <param name="reference_source_selector" value="history" />
186 <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
187 <param name="input_bam" ftype="bam" value="mpileup.1.bam,mpileup.2.bam,mpileup.3.bam" />
188 </conditional>
189 <conditional name="advanced_options">
190 <param name="advanced_options_selector" value="advanced" />
191 <conditional name="limit_by_region">
192 <param name="limit_by_regions" value="limit"/>
193 <param name="region_paste" value="17:100-150" />
194 </conditional>
195 </conditional>
196 <output name="output_file_pu" file="mpileup.out.1" ftype="pileup" />
197 </test>
198 <!-- test_cmd($opts,out=>'dat/mpileup.out.1',err=>'dat/mpileup.err.1',cmd=>"$$opts{bin}/samtools mpileup -b $$opts{tmp}/mpileup.bam.list -f $$opts{tmp}/mpileup.ref.fa.gz -r17:100-150");-->
199 <test>
200 <conditional name="reference_source">
201 <param name="reference_source_selector" value="history" />
202 <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
203 <param name="input_bam" ftype="bam" value="mpileup.1.bam" />
204 </conditional>
205 <conditional name="advanced_options">
206 <param name="advanced_options_selector" value="advanced" />
207 <conditional name="filter_by_flags">
208 <param name="filter_flags" value="filter"/>
209 <param name="exclude_flags" value="4,16"/>
210 </conditional>
211 <conditional name="limit_by_region">
212 <param name="limit_by_regions" value="limit"/>
213 <param name="region_paste" value="17:1050-1060" />
214 </conditional>
215 <param name="disable_probabilistic_realignment" value="-B" />
216 </conditional>
217 <output name="output_file_pu" file="mpileup.out.3" ftype="pileup" />
218 </test>
219 <!-- test_cmd($opts,out=>'dat/mpileup.out.3',cmd=>"$$opts{bin}/samtools mpileup -B \-\-ff 0x14 -f $$opts{tmp}/mpileup.ref.fa.gz -r17:1050-1060 $$opts{tmp}/mpileup.1.bam | grep -v mpileup");
220 -->
221 <!-- original test from galaxy tool-->
222 <test>
223 <conditional name="reference_source">
224 <param name="reference_source_selector" value="history" />
225 <param name="ref_file" ftype="fasta" value="phiX.fasta" />
226 <param name="input_bam" ftype="bam" value="samtools_mpileup_in_1.bam" />
227 </conditional>
228 <conditional name="output_options_cond">
229 <param name="output_options_selector" value="advanced" />
230 <param name="base_position_on_reads" value="true" />
231 <param name="output_mapping_quality" value="true" />
232 </conditional>
233 <conditional name="advanced_options">
234 <param name="advanced_options_selector" value="default" />
235 </conditional>
236 <output name="output_file_pu" file="samtools_mpileup_out_1.pileup" ftype="pileup" />
237 </test>
238 <test>
239 <param name="reference_source_selector" value="history" />
240 <param name="ref_file" ftype="fasta" value="phiX.fasta" />
241 <param name="input_bam" ftype="bam" value="phiX.bam" />
242 <conditional name="output_options_cond">
243 <param name="output_options_selector" value="default" />
244 </conditional>
245 <conditional name="advanced_options">
246 <param name="advanced_options_selector" value="advanced" />
247 <param name="skip_anomalous_read_pairs" value="-A" />
248 </conditional>
249 <output name="output_file_pu" file="samtools_mpileup_out_2.pileup" ftype="pileup" />
250 </test>
273 <test> 251 <test>
274 <param name="reference_source_selector" value="history" /> 252 <param name="reference_source_selector" value="history" />
275 <param name="ref_file" ftype="fasta" value="phiX.fasta" /> 253 <param name="ref_file" ftype="fasta" value="phiX.fasta" />
276 <param name="input_bam" ftype="bam" value="samtools_mpileup_in_1.bam" /> 254 <param name="input_bam" ftype="bam" value="samtools_mpileup_in_1.bam" />
277 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> 255 <conditional name="output_options_cond">
278 <param name="advanced_options_selector" value="basic" /> 256 <param name="output_options_cond" value="advanced" />
279 <param name="base_position_on_reads" value="true" /> 257 <param name="base_position_on_reads" value="true" />
280 <param name="output_mapping_quality" value="true" /> 258 <param name="output_mapping_quality" value="true" />
281 <output name="output_mpileup" file="samtools_mpileup_out_1.pileup" /> 259 </conditional>
282 </test> 260 <conditional name="advanced_options">
283 <test> 261 <param name="advanced_options_selector" value="advanced" />
284 <param name="reference_source_selector" value="history" /> 262 <param name="minimum_base_quality" value="0" /><!-- most reads have ultra low quality resuling in empty columns -->
285 <param name="ref_file" ftype="fasta" value="phiX.fasta" /> 263 </conditional>
286 <param name="input_bam" ftype="bam" value="phiX.bam" /> 264 <output name="output_file_pu" file="samtools_mpileup_out_3.pileup" ftype="pileup" />
287 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" />
288 <param name="gap_extension_sequencing_error_probability" value="20" />
289 <param name="coefficient_for_modeling_homopolymer_errors" value="100" />
290 <param name="perform_indel_calling_selector" value="perform_indel_calling" />
291 <param name="skip_indel_calling_above_sample_depth" value="250" />
292 <param name="gap_open_sequencing_error_probability" value="40" />
293 <param name="platform_list_repeat" value="0" />
294 <param name="advanced_options_selector" value="basic" />
295 <param name="genotype_likelihood_computation_type|output_format" value="VCF" />
296 <output name="output_mpileup" file="samtools_mpileup_out_2.vcf" ftype="vcf" lines_diff="8" />
297 </test>
298 <test>
299 <param name="reference_source_selector" value="history" />
300 <param name="ref_file" ftype="fasta" value="phiX.fasta" />
301 <param name="input_bam" ftype="bam" value="samtools_mpileup_in_1.bam" />
302 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" />
303 <param name="advanced_options_selector" value="advanced" />
304 <param name="minimum_base_quality" value="0" /><!-- most reads have ultra low quality resuling in empty columns -->
305 <param name="base_position_on_reads" value="true" />
306 <param name="output_mapping_quality" value="true" />
307 <output name="output_mpileup" file="samtools_mpileup_out_3.pileup" />
308 </test> 265 </test>
309 </tests> 266 </tests>
310 <help><![CDATA[ 267 <help><![CDATA[
311 **What it does** 268 **What it does**
312 269
313 Report variants for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. 270 Generate pileup for one or multiple BAM files. Alignment records are grouped by sample (SM) identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample.
314 If sample identifiers are absent, each input file is regarded as one sample. 271
315 272 Generation of VCF and BCF output, is deprecated and not available in the Galaxy tool. Please use bcftools mpileup for this instead.
316 **Notes**: Assuming diploid individuals. 273
274 In the pileup format (without -u or -g), each line represents a genomic position, consisting of chromosome name, 1-based coordinate, reference base, the number of reads covering the site, read bases, base qualities and alignment mapping qualities. Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all encoded at the read base column. At this column, a dot stands for a match to the reference base on the forward strand, a comma for a match on the reverse strand, a '>' or '<' for a reference skip, 'ACGTN' for a mismatch on the forward strand and 'acgtn' for a mismatch on the reverse strand. A pattern '\\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this reference position and the next reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence. Similarly, a pattern '-[0-9]+[ACGTNacgtn]+' represents a deletion from the reference. The deleted bases will be presented as '*' in the following lines. Also at the read base column, a symbol '^' marks the start of a read. The ASCII of the character following '^' minus 33 gives the mapping quality. A symbol '$' marks the end of a read segment.
275
276 Note that there are two orthogonal ways to specify locations in the input file; via -r region and -l file. The former uses (and requires) an index to do random access while the latter streams through the file contents filtering out the specified regions, requiring no index. The two may be used in conjunction. For example a BED file containing locations of genes in chromosome 20 could be specified using -r 20 -l chr20.bed, meaning that the index is used to find chromosome 20 and then it is filtered for the regions listed in the bed file.
277
278 **BAQ (Base Alignment Quality)**
279
280 BAQ is the Phred-scaled probability of a read base being misaligned. It greatly helps to reduce false SNPs caused by misalignments. BAQ is calculated using the probabilistic realignment method described in the paper “Improving SNP discovery by base alignment quality”, Heng Li, Bioinformatics, Volume 27, Issue 8 <https://doi.org/10.1093/bioinformatics/btr076>
281
282 BAQ is turned on when a reference file is supplied using the -f option. To disable it, use the -B option.
283
284 It is possible to store pre-calculated BAQ values in a SAM BQ:Z tag. Samtools mpileup will use the precalculated values if it finds them. The -E option can be used to make it ignore the contents of the BQ:Z tag and force it to recalculate the BAQ scores by making a new alignment.
317 ]]></help> 285 ]]></help>
318 <expand macro="citations" /> 286 <expand macro="citations" />
319 </tool> 287 </tool>