comparison samtools_mpileup.xml @ 4:c6fdfe3331d6 draft

Uploaded
author devteam
date Tue, 21 Apr 2015 16:29:10 -0400
parents 973fea5b4bdf
children aa0ef6f0ee89
comparison
equal deleted inserted replaced
3:973fea5b4bdf 4:c6fdfe3331d6
1 <tool id="samtools_mpileup" name="MPileup" version="0.0.3"> 1 <tool id="samtools_mpileup" name="MPileup" version="2.0">
2 <description>SNP and indel caller</description> 2 <description>call variants</description>
3 <requirements> 3 <macros>
4 <requirement type="package" version="0.1.19">samtools</requirement> 4 <import>macros.xml</import>
5 </requirements> 5 </macros>
6 <command interpreter="python">samtools_wrapper.py 6 <expand macro="requirements" />
7 -p 'samtools mpileup' 7 <expand macro="stdio" />
8 --stdout "${output_log}" 8 <expand macro="version_command" />
9 <command>
10 <![CDATA[
11 #if $reference_source.reference_source_selector == "history":
12 ln -s "${reference_source.ref_file}" && samtools faidx `basename "${reference_source.ref_file}"` && samtools mpileup
13 #else:
14 samtools mpileup
15 #end if
9 #if $reference_source.reference_source_selector != "history": 16 #if $reference_source.reference_source_selector != "history":
10 -p '-f "${reference_source.ref_file.fields.path}"' 17 -f "${reference_source.ref_file.fields.path}"
11 #else: 18 #else:
12 -d "-f" "${reference_source.ref_file}" "fa" "reference_input" 19 -f "${reference_source.ref_file}"
13 #end if 20 #end if
14 #for $i, $input_bam in enumerate( $reference_source.input_bams ): 21 #for $i, $input_bam in enumerate( $reference_source.input_bams ):
15 -d " " "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "bam_input_${i}" 22 "${input_bam.input_bam}"
16 -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "bam_input_${i}" ##hardcode galaxy ext type as bam_index
17 #end for 23 #end for
18 -p '
19 #if str( $advanced_options.advanced_options_selector ) == "advanced": 24 #if str( $advanced_options.advanced_options_selector ) == "advanced":
20 ${advanced_options.skip_anomalous_read_pairs} 25 #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter":
26 #if $advanced_options.filter_by_flags.require_flags:
27 --rf ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.require_flags).split(',')])}
28 #end if
29 #if $advanced_options.filter_by_flags.exclude_flags:
30 --ff ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.exclude_flags).split(',')])}
31 #end if
32 #end if
33 #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste":
34 -l "$pasted_regions"
35 #elif str( $advanced_options.limit_by_region.limit_by_regions ) == "history"
36 -l "$bed_regions"
37 #end if
38 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
39 -G "$excluded_read_groups"
40 #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history"
41 -G "$read_groups"
42 #end if
43 ${advanced_options.skip_anomalous_read_pairs}
21 ${advanced_options.disable_probabilistic_realignment} 44 ${advanced_options.disable_probabilistic_realignment}
22 -C "${advanced_options.coefficient_for_downgrading}" 45 -C "${advanced_options.coefficient_for_downgrading}"
23 -d "${advanced_options.max_reads_per_bam}" 46 -d "${advanced_options.max_reads_per_bam}"
24 ${advanced_options.extended_BAQ_computation} 47 ${advanced_options.extended_BAQ_computation}
25 #if str( $advanced_options.position_list ) != 'None':
26 -l "${advanced_options.position_list}"
27 #end if
28 -q "${advanced_options.minimum_mapping_quality}" 48 -q "${advanced_options.minimum_mapping_quality}"
29 -Q "${advanced_options.minimum_base_quality}" 49 -Q "${advanced_options.minimum_base_quality}"
30 #if str( $advanced_options.region_string ): 50 #if str( $advanced_options.region_string ):
31 -r "${advanced_options.region_string}" 51 -r "${advanced_options.region_string}"
32 #end if 52 #end if
33 ${advanced_options.output_per_sample_read_depth} 53
34 ${advanced_options.output_per_sample_strand_bias_p_value}
35 #end if 54 #end if
36 #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation': 55 #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation':
37 ##-g or -u 56 ##
38 -g 57
39 -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}" 58 ${genotype_likelihood_computation_type.output_format}
40 -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}" 59 ${genotype_likelihood_computation_type.compressed}
60
61 #if str( $genotype_likelihood_computation_type.output_tags ) != "None":
62 -output-tags "${genotype_likelihood_computation_type.output_tags}"
63 #end if
64
41 #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': 65 #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling':
66 -o "${genotype_likelihood_computation_type.perform_indel_calling.gap_open_sequencing_error_probability}"
67 -e "${genotype_likelihood_computation_type.perform_indel_calling.gap_extension_sequencing_error_probability}"
68 -h "${genotype_likelihood_computation_type.perform_indel_calling.coefficient_for_modeling_homopolymer_errors}"
42 -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" 69 -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}"
43 #else: 70 -m "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}"
71 --open-prob "${genotype_likelihood_computation_type.perform_indel_calling.open_seq_error_probability}"
72 -F "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_read_fraction}"
73 ${genotype_likelihood_computation_type.perform_indel_calling.gapped_read_per_sample}
74 #if len( $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ):
75 -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ] ) }"
76 #end if
77 #elif str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'do_not_perform_indel_calling':
44 -I 78 -I
45 #end if 79 #end if
46 -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}" 80
47 #if len( $genotype_likelihood_computation_type.platform_list_repeat ): 81
48 -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }" 82 #else:
49 #end if 83 ${genotype_likelihood_computation_type.base_position_on_reads}
50 #end if 84 ${genotype_likelihood_computation_type.output_mapping_quality}
51 &gt; "${output_mpileup}" 85 #end if
52 ' 86 --output "$output_mpileup" 2> "$output_log"
53 </command> 87 ]]>
54 <inputs> 88 </command>
55 <conditional name="reference_source"> 89 <inputs>
56 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> 90 <conditional name="reference_source">
57 <option value="cached">Locally cached</option> 91 <param label="Choose the source for the reference genome" name="reference_source_selector" type="select">
58 <option value="history">History</option> 92 <option value="cached">Use a built-in genome</option>
59 </param> 93 <option value="history">Use a genome from the history</option>
60 <when value="cached">
61 <repeat name="input_bams" title="BAM file" min="1">
62 <param name="input_bam" type="data" format="bam" label="BAM file">
63 <validator type="unspecified_build" />
64 <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
65 </param>
66 </repeat>
67 <param name="ref_file" type="select" label="Using reference genome">
68 <options from_data_table="fasta_indexes">
69 <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...-->
70 </options>
71 </param>
72 </when>
73 <when value="history"> <!-- FIX ME!!!! -->
74 <repeat name="input_bams" title="BAM file" min="1">
75 <param name="input_bam" type="data" format="bam" label="BAM file">
76 <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." />
77 </param>
78 </repeat>
79 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
80 </when>
81 </conditional>
82
83
84 <conditional name="genotype_likelihood_computation_type">
85 <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation">
86 <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option>
87 <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option>
88 </param>
89 <when value="perform_genotype_likelihood_computation">
90 <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" />
91 <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." />
92 <conditional name="perform_indel_calling">
93 <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling">
94 <option value="perform_indel_calling" selected="True">Perform INDEL calling</option>
95 <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option>
96 </param> 94 </param>
97 <when value="perform_indel_calling"> 95 <when value="cached">
98 <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" /> 96 <repeat min="1" name="input_bams" title="BAM file">
99 </when> 97 <param format="bam" label="BAM file" name="input_bam" type="data">
100 <when value="do_not_perform_indel_calling" /> 98 <validator type="unspecified_build" />
101 </conditional> 99 <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="fasta_indexes" type="dataset_metadata_in_data_table" />
102 <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" /> 100 </param>
103 <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> 101 </repeat>
104 <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" /> 102 <param label="Using reference genome" name="ref_file" type="select">
105 </repeat> 103 <options from_data_table="fasta_indexes" />
106 </when> 104 </param>
107 <when value="do_not_perform_genotype_likelihood_computation"> 105 </when>
108 <!-- Do nothing here --> 106 <when value="history">
109 </when> 107 <repeat min="1" name="input_bams" title="BAM file">
110 </conditional> 108 <param format="bam" label="BAM file" name="input_bam" type="data">
111 <conditional name="advanced_options"> 109 <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" />
112 <param name="advanced_options_selector" type="select" label="Set advanced options"> 110 </param>
113 <option value="basic" selected="True">Basic</option> 111 </repeat>
114 <option value="advanced">Advanced</option> 112 <param format="fasta" label="Using reference genome" name="ref_file" type="data" />
115 </param> 113 </when>
116 <when value="advanced"> 114 </conditional>
117 <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> 115 <conditional name="genotype_likelihood_computation_type">
118 <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" /> 116 <param label="Genotype Likelihood Computation" name="genotype_likelihood_computation_type_selector" type="select">
119 <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" /> 117 <option selected="True" value="perform_genotype_likelihood_computation">Perform genotype likelihood computation (--VCF, --BCF options)</option>
120 <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" /> 118 <option value="do_not_perform_genotype_likelihood_computation">Do not perform genotype likelihood computation (output pileup)</option>
121 <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" /> 119 </param>
122 <param name="position_list" type="data" format="bed" label="List of regions or sites on which to operate" optional="True" /> 120 <when value="perform_genotype_likelihood_computation">
123 <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> 121 <param label="Choose the output format" name="output_format" type="select">
124 <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" /> 122 <option value="--VCF">VCF</option>
125 <param name="region_string" type="text" value="" label="Only generate pileup in region" /> 123 <option value="--BCF">BCF</option>
126 <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" /> 124 </param>
127 <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" /> 125 <param checked="False" falsevalue="--uncompressed" label="Compress output" name="compressed" truevalue="" type="boolean" help="--incompressed; default=False"/>
128 </when> 126 <param name="output_tags" optional="True" type="select" multiple="True" display="checkboxes" label="Optional tags to output" help="--output-tags">
129 <when value="basic" /> 127 <option value="DP">DP (Number of high-quality bases)</option>
130 </conditional> 128 <option value="DPR">DRP (Number of high-quality bases for each observed allele)</option>
131 </inputs> 129 <option value="DV">DV (Number of high-quality non-reference bases)</option>
132 <outputs> 130 <option value="DP4">DP4 (Number of high-quality ref-forward, ref-reverse, alt-forward and alt-reverse bases)</option>
133 <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}"> 131 <option value="INFO/DPR">INFO/DPR (Number of high-quality bases for each observed allele)</option>
134 <change_format> 132 <option value="SP">SP (Phred-scaled strand bias P-value)</option>
135 <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" /> 133 </param>
136 </change_format> 134 <conditional name="perform_indel_calling">
137 </data> 135 <param label="Perform INDEL calling" name="perform_indel_calling_selector" type="select">
138 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> 136 <option selected="True" value="perform_indel_calling_def">Perform INDEL calling using default options</option>
139 </outputs> 137 <option value="perform_indel_calling">Perform INDEL calling and set advanced options</option>
140 <tests> 138 <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option>
141 <test> 139 </param>
142 <param name="reference_source_selector" value="history" /> 140 <when value="perform_indel_calling_def" />
143 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> 141 <when value="perform_indel_calling">
144 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> 142 <param label="Phred-scaled gap open sequencing error probability" name="gap_open_sequencing_error_probability" type="integer" value="40" help="--open-prob; Reducing this value leads to more indel calls; default=40"/>
145 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> 143 <param label="Phred-scaled gap extension sequencing error probability" name="gap_extension_sequencing_error_probability" type="integer" value="20" help="--ext-prob; Reducing this value leads to longer indels. default=20"/>
146 <param name="advanced_options_selector" value="basic" /> 144 <param label="Coefficient for modeling homopolymer errors." name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" help="--tandem-qual; default=100"/>
147 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_1.pileup" /> 145 <param label="Skip INDEL calling if the average per-sample depth is above" name="skip_indel_calling_above_sample_depth" type="integer" value="250" help="--max-idepth; default=250"/>
148 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> 146 <param label="Minimum gapped reads for indel candidates" name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" help="--min-ireads; default=1"/>
149 </test> 147 <param label="Phred-scaled gap open sequencing error probability" name="open_seq_error_probability" type="integer" value="40" help="--open-prob; Reducing this value leads to more indel calls; default=40"/>
150 <test> 148 <param label="Minimum fraction of gapped reads" name="minimum_gapped_read_fraction" type="float" value="0.002" help="--gap-frac; default=0.002"/>
151 <param name="reference_source_selector" value="history" /> 149 <param checked="False" falsevalue="" label="Apply --min-ireads and --gap-frac values on a per-sample basis" name="gapped_read_per_sample" truevalue="-p" type="boolean" help="--per-sample-mF; by default both options are applied to reads pooled from all samples"/>
152 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> 150 <repeat name="platform_list_repeat" title="Platform for INDEL candidates">
153 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> 151 <param label="Platform to use for INDEL candidates" name="platform_entry" type="text" value="" help="It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA"/>
154 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> 152 </repeat>
155 <param name="gap_extension_sequencing_error_probability" value="20" /> 153 </when>
156 <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> 154 <when value="do_not_perform_indel_calling" />
157 <param name="perform_indel_calling_selector" value="perform_indel_calling" /> 155 </conditional>
158 <param name="skip_indel_calling_above_sample_depth" value="250" /> 156
159 <param name="gap_open_sequencing_error_probability" value="40" /> 157 </when>
160 <param name="platform_list_repeat" value="0" /> 158 <when value="do_not_perform_genotype_likelihood_computation">
161 <param name="advanced_options_selector" value="basic" /> 159 <param checked="False" falsevalue="" label="Output base positions on reads" name="base_position_on_reads" truevalue="-O" type="boolean" help="--output-BP"/>
162 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_2.bcf" /> 160 <param checked="False" falsevalue="" label="Output mapping quality" name="output_mapping_quality" truevalue="-s" type="boolean" help="--output-MQ"/>
163 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> 161 </when>
164 </test> 162 </conditional>
165 </tests> 163 <conditional name="advanced_options">
166 <help> 164 <param label="Set advanced options" name="advanced_options_selector" type="select">
165 <option selected="True" value="basic">Basic</option>
166 <option value="advanced">Advanced</option>
167 </param>
168 <when value="advanced">
169 <conditional name="filter_by_flags">
170 <param label="Set filter by flags" name="filter_flags" type="select">
171 <option selected="True" value="nofilter">Do not filter</option>
172 <option value="filter">Filter by flags to exclude or require</option>
173 </param>
174 <when value="filter">
175 <param display="checkboxes" label="Require" multiple="True" name="require_flags" type="select" help="--incl-flags">
176 <option value="1">Read is paired</option>
177 <option value="2">Read is mapped in a proper pair</option>
178 <option value="4">The read is unmapped</option>
179 <option value="8">The mate is unmapped</option>
180 <option value="16">Read strand</option>
181 <option value="32">Mate strand</option>
182 <option value="64">Read is the first in a pair</option>
183 <option value="128">Read is the second in a pair</option>
184 <option value="256">The alignment or this read is not primary</option>
185 <option value="512">The read fails platform/vendor quality checks</option>
186 <option value="1024">The read is a PCR or optical duplicate</option>
187 </param>
188 <param display="checkboxes" label="Exclude" multiple="True" name="exclude_flags" type="select" help="--excl-flags">
189 <option value="1">Read is paired</option>
190 <option value="2">Read is mapped in a proper pair</option>
191 <option value="4">The read is unmapped</option>
192 <option value="8">The mate is unmapped</option>
193 <option value="16">Read strand</option>
194 <option value="32">Mate strand</option>
195 <option value="64">Read is the first in a pair</option>
196 <option value="128">Read is the second in a pair</option>
197 <option value="256">The alignment or this read is not primary</option>
198 <option value="512">The read fails platform/vendor quality checks</option>
199 <option value="1024">The read is a PCR or optical duplicate</option>
200 </param>
201 </when>
202 <when value="nofilter" />
203 </conditional>
204 <conditional name="limit_by_region">
205 <param label="Select regions to call" name="limit_by_regions" type="select">
206 <option selected="True" value="no_limit">Do not limit</option>
207 <option value="history">From an uploaded BED file (--positions)</option>
208 <option value="paste">Paste a list of regions or BED (--region)</option>
209 </param>
210 <when value="history">
211 <param format="bed" label="BED file" name="bed_regions" type="data" help="--positions">
212 <validator type="dataset_ok_validator" />
213 </param>
214 </when>
215 <when value="paste">
216 <param area="true" help="Paste a list of regions in BED format or as a list of chromosomes and positions" label="Regions" name="region_paste" size="10x35" type="text"/>
217 </when>
218 <when value="no_limit" />
219 </conditional>
220 <conditional name="exclude_read_group">
221 <param label="Select read groups to exclude" name="exclude_read_groups" type="select" help="--exclude-RG">
222 <option selected="True" value="no_limit">Do not exclude</option>
223 <option value="history">From an uploaded text file</option>
224 <option value="paste">Paste a list of read groups</option>
225 </param>
226 <when value="history">
227 <param format="txt" label="Text file" name="read_groups" type="data">
228 <validator type="dataset_ok_validator" />
229 </param>
230 </when>
231 <when value="paste">
232 <param area="true" help="Paste a list of read groups" label="Read groups" name="group_paste" size="10x35" type="text" />
233 </when>
234 <when value="no_limit" />
235 </conditional>
236 <param checked="False" falsevalue="" label="Disable read-pair overlap detection" name="ignore_overlaps" truevalue="-x" type="boolean" help="--ignore-overlaps"/>
237 <param checked="False" falsevalue="" label="Do not skip anomalous read pairs in variant calling" name="skip_anomalous_read_pairs" truevalue="-A" type="boolean" help="--count-orphans"/>
238 <param checked="False" falsevalue="" label="Disable probabilistic realignment for the computation of base alignment quality (BAQ)" name="disable_probabilistic_realignment" truevalue="-B" type="boolean" help="--no-BAQ; BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments"/>
239 <param label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" name="coefficient_for_downgrading" type="integer" value="0" help="--adjust-MQ; Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. default=0"/>
240 <param label="Max reads per BAM" max="1024" min="1" name="max_reads_per_bam" type="integer" value="250" help="--max-depth; default=250"/>
241 <param checked="False" falsevalue="" label="Redo BAQ computation" name="extended_BAQ_computation" truevalue="-E" type="boolean" help="--redo-BAQ; ignore existing BQ tags"/>
242 <param label="Minimum mapping quality for an alignment to be used" name="minimum_mapping_quality" type="integer" value="0" help="-min-MQ; default=0"/>
243 <param label="Minimum base quality for a base to be considered" name="minimum_base_quality" type="integer" value="13" help="--min-BQ; default=13"/>
244 </when>
245 <when value="basic" />
246 </conditional>
247 </inputs>
248 <outputs>
249 <data format="pileup" label="${tool.name} on ${on_string}" name="output_mpileup">
250 <change_format>
251 <when format="bcf" input="genotype_likelihood_computation_type.output_format" value="--BCF" />
252 <when format="vcf" input="genotype_likelihood_computation_type.output_format" value="--VCF" />
253 </change_format>
254 </data>
255 <data format="txt" label="${tool.name} on ${on_string} (log)" name="output_log" />
256 </outputs>
257 <tests>
258 <test>
259 <param name="reference_source_selector" value="history" />
260 <param ftype="fasta" name="ref_file" value="phiX.fasta" />
261 <param ftype="bam" name="input_bam" value="samtools_mpileup_in_1.bam" />
262 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" />
263 <param name="advanced_options_selector" value="basic" />
264 <param name="base_position_on_reads" value="true" />
265 <param name="output_mapping_quality" value="true" />
266 <output file="samtools_mpileup_out_1.pileup" name="output_mpileup" />
267 <output file="samtools_mpileup_out_1.log" name="output_log" />
268 </test>
269 <test>
270 <param name="reference_source_selector" value="history" />
271 <param ftype="fasta" name="ref_file" value="phiX.fasta" />
272 <param ftype="bam" name="input_bam" value="phiX.bam" />
273 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" />
274 <param name="gap_extension_sequencing_error_probability" value="20" />
275 <param name="coefficient_for_modeling_homopolymer_errors" value="100" />
276 <param name="perform_indel_calling_selector" value="perform_indel_calling" />
277 <param name="skip_indel_calling_above_sample_depth" value="250" />
278 <param name="gap_open_sequencing_error_probability" value="40" />
279 <param name="platform_list_repeat" value="0" />
280 <param name="advanced_options_selector" value="basic" />
281 <param name="genotype_likelihood_computation_type|output_format" value="VCF" />
282 <output file="samtools_mpileup_out_2.vcf" ftype="vcf" lines_diff="8" name="output_mpileup" />
283 <output file="samtools_mpileup_out_2.log" name="output_log" />
284 </test>
285 </tests>
286 <help>
287 <![CDATA[
167 **What it does** 288 **What it does**
168 289
169 Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. 290 Report variants for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample.
170 291
171 ------ 292 ------
172 293
173 **Settings**:: 294 **Input options**::
174 295
175 Input Options: 296 -6, --illumina1.3+ quality is in the Illumina-1.3+ encoding
176 -6 Assume the quality is in the Illumina 1.3+ encoding. 297 -A, --count-orphans do not discard anomalous read pairs
177 -A Do not skip anomalous read pairs in variant calling. 298 -b, --bam-list FILE list of input BAM filenames, one per line
178 -B Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments. 299 -B, --no-BAQ disable BAQ (per-Base Alignment Quality)
179 -b FILE List of input BAM files, one file per line [null] 300 -C, --adjust-MQ INT adjust mapping quality; recommended:50, disable:0 [0]
180 -C INT Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0] 301 -d, --max-depth INT max per-BAM depth; avoids excessive memory usage [250]
181 -d INT At a position, read maximally INT reads per input BAM. [250] 302 -E, --redo-BAQ recalculate BAQ on the fly, ignore existing BQs
182 -E Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit. 303 -f, --fasta-ref FILE faidx indexed reference sequence file
183 -f FILE The faidx-indexed reference file in the FASTA format. The file can be optionally compressed by razip. [null] 304 -G, --exclude-RG FILE exclude read groups listed in FILE
184 -l FILE BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null] 305 -l, --positions FILE skip unlisted positions (chr pos) or regions (BED)
185 -q INT Minimum mapping quality for an alignment to be used [0] 306 -q, --min-MQ INT skip alignments with mapQ smaller than INT [0]
186 -Q INT Minimum base quality for a base to be considered [13] 307 -Q, --min-BQ INT skip bases with baseQ/BAQ smaller than INT [13]
187 -r STR Only generate pileup in region STR [all sites] 308 -r, --region REG region in which pileup is generated
188 Output Options: 309 -R, --ignore-RG ignore RG tags (one BAM = one sample)
189 310 --rf, --incl-flags STR|INT required flags: skip reads with mask bits unset []
190 -D Output per-sample read depth 311 --ff, --excl-flags STR|INT filter flags: skip reads with mask bits set
191 -g Compute genotype likelihoods and output them in the binary call format (BCF). 312 [UNMAP,SECONDARY,QCFAIL,DUP]
192 -S Output per-sample Phred-scaled strand bias P-value 313 -x, --ignore-overlaps disable read-pair overlap detection
193 -u Similar to -g except that the output is uncompressed BCF, which is preferred for piping. 314
194 315 **Output options**::
195 Options for Genotype Likelihood Computation (for -g or -u): 316
196 317 -o, --output FILE write output to FILE [standard output]
197 -e INT Phred-scaled gap extension sequencing error probability. Reducing INT leads to longer indels. [20] 318 -g, --BCF generate genotype likelihoods in BCF format
198 -h INT Coefficient for modeling homopolymer errors. Given an l-long homopolymer run, the sequencing error of an indel of size s is modeled as INT*s/l. [100] 319 -v, --VCF generate genotype likelihoods in VCF format
199 -I Do not perform INDEL calling 320
200 -L INT Skip INDEL calling if the average per-sample depth is above INT. [250] 321 **Output options for mpileup format** (without -g/-v)::
201 -o INT Phred-scaled gap open sequencing error probability. Reducing INT leads to more indel calls. [40] 322
202 -P STR Comma dilimited list of platforms (determined by @RG-PL) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all] 323 -O, --output-BP output base positions on reads
203 324 -s, --output-MQ output mapping quality
204 ------ 325
205 326 **Output options for genotype likelihoods** (when -g/-v is used)::
206 **Citation** 327
207 328 -t, --output-tags LIST optional tags to output: DP,DPR,DV,DP4,INFO/DPR,SP []
208 For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. &lt;http://www.ncbi.nlm.nih.gov/pubmed/19505943&gt;`_ 329 -u, --uncompressed generate uncompressed VCF/BCF output
209 330
210 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* 331 **SNP/INDEL genotype likelihoods options** (effective with -g/-v)::
211 332
212 </help> 333 -e, --ext-prob INT Phred-scaled gap extension seq error probability [20]
334 -F, --gap-frac FLOAT minimum fraction of gapped reads [0.002]
335 -h, --tandem-qual INT coefficient for homopolymer errors [100]
336 -I, --skip-indels do not perform indel calling
337 -L, --max-idepth INT maximum per-sample depth for INDEL calling [250]
338 -m, --min-ireads INT minimum number gapped reads for indel candidates [1]
339 -o, --open-prob INT Phred-scaled gap open seq error probability [40]
340 -p, --per-sample-mF apply -m and -F per-sample for increased sensitivity
341 -P, --platforms STR comma separated list of platforms for indels [all]
342
343 **Notes**: Assuming diploid individuals.
344 ]]>
345 </help>
346 <configfiles>
347 <configfile name="excluded_read_groups">
348 <![CDATA[
349 #set pasted_data = ''
350 #if str( $advanced_options.advanced_options_selector ) == "advanced":
351 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
352 #set pasted_data = '\t'.join( str( $advanced_options.exclude_read_group['read_groups'] ).split() )
353 #end if
354 #end if
355 ${pasted_data}
356 ]]>
357 </configfile>
358 <configfile name="pasted_regions">
359 <![CDATA[
360 #set pasted_data = ''
361 #if str( $advanced_options.advanced_options_selector ) == "advanced":
362 #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste":
363 #set pasted_data = '\t'.join( str( $advanced_options.limit_by_region['region_paste'] ).split() )
364 #end if
365 #end if
366 ${pasted_data}
367 ]]>
368 </configfile>
369 </configfiles>
370 <expand macro="citations" />
213 </tool> 371 </tool>