Mercurial > repos > devteam > samtools_mpileup
comparison samtools_mpileup.xml @ 4:c6fdfe3331d6 draft
Uploaded
author | devteam |
---|---|
date | Tue, 21 Apr 2015 16:29:10 -0400 |
parents | 973fea5b4bdf |
children | aa0ef6f0ee89 |
comparison
equal
deleted
inserted
replaced
3:973fea5b4bdf | 4:c6fdfe3331d6 |
---|---|
1 <tool id="samtools_mpileup" name="MPileup" version="0.0.3"> | 1 <tool id="samtools_mpileup" name="MPileup" version="2.0"> |
2 <description>SNP and indel caller</description> | 2 <description>call variants</description> |
3 <requirements> | 3 <macros> |
4 <requirement type="package" version="0.1.19">samtools</requirement> | 4 <import>macros.xml</import> |
5 </requirements> | 5 </macros> |
6 <command interpreter="python">samtools_wrapper.py | 6 <expand macro="requirements" /> |
7 -p 'samtools mpileup' | 7 <expand macro="stdio" /> |
8 --stdout "${output_log}" | 8 <expand macro="version_command" /> |
9 <command> | |
10 <![CDATA[ | |
11 #if $reference_source.reference_source_selector == "history": | |
12 ln -s "${reference_source.ref_file}" && samtools faidx `basename "${reference_source.ref_file}"` && samtools mpileup | |
13 #else: | |
14 samtools mpileup | |
15 #end if | |
9 #if $reference_source.reference_source_selector != "history": | 16 #if $reference_source.reference_source_selector != "history": |
10 -p '-f "${reference_source.ref_file.fields.path}"' | 17 -f "${reference_source.ref_file.fields.path}" |
11 #else: | 18 #else: |
12 -d "-f" "${reference_source.ref_file}" "fa" "reference_input" | 19 -f "${reference_source.ref_file}" |
13 #end if | 20 #end if |
14 #for $i, $input_bam in enumerate( $reference_source.input_bams ): | 21 #for $i, $input_bam in enumerate( $reference_source.input_bams ): |
15 -d " " "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "bam_input_${i}" | 22 "${input_bam.input_bam}" |
16 -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "bam_input_${i}" ##hardcode galaxy ext type as bam_index | |
17 #end for | 23 #end for |
18 -p ' | |
19 #if str( $advanced_options.advanced_options_selector ) == "advanced": | 24 #if str( $advanced_options.advanced_options_selector ) == "advanced": |
20 ${advanced_options.skip_anomalous_read_pairs} | 25 #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter": |
26 #if $advanced_options.filter_by_flags.require_flags: | |
27 --rf ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.require_flags).split(',')])} | |
28 #end if | |
29 #if $advanced_options.filter_by_flags.exclude_flags: | |
30 --ff ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.exclude_flags).split(',')])} | |
31 #end if | |
32 #end if | |
33 #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": | |
34 -l "$pasted_regions" | |
35 #elif str( $advanced_options.limit_by_region.limit_by_regions ) == "history" | |
36 -l "$bed_regions" | |
37 #end if | |
38 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": | |
39 -G "$excluded_read_groups" | |
40 #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history" | |
41 -G "$read_groups" | |
42 #end if | |
43 ${advanced_options.skip_anomalous_read_pairs} | |
21 ${advanced_options.disable_probabilistic_realignment} | 44 ${advanced_options.disable_probabilistic_realignment} |
22 -C "${advanced_options.coefficient_for_downgrading}" | 45 -C "${advanced_options.coefficient_for_downgrading}" |
23 -d "${advanced_options.max_reads_per_bam}" | 46 -d "${advanced_options.max_reads_per_bam}" |
24 ${advanced_options.extended_BAQ_computation} | 47 ${advanced_options.extended_BAQ_computation} |
25 #if str( $advanced_options.position_list ) != 'None': | |
26 -l "${advanced_options.position_list}" | |
27 #end if | |
28 -q "${advanced_options.minimum_mapping_quality}" | 48 -q "${advanced_options.minimum_mapping_quality}" |
29 -Q "${advanced_options.minimum_base_quality}" | 49 -Q "${advanced_options.minimum_base_quality}" |
30 #if str( $advanced_options.region_string ): | 50 #if str( $advanced_options.region_string ): |
31 -r "${advanced_options.region_string}" | 51 -r "${advanced_options.region_string}" |
32 #end if | 52 #end if |
33 ${advanced_options.output_per_sample_read_depth} | 53 |
34 ${advanced_options.output_per_sample_strand_bias_p_value} | |
35 #end if | 54 #end if |
36 #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation': | 55 #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation': |
37 ##-g or -u | 56 ## |
38 -g | 57 |
39 -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}" | 58 ${genotype_likelihood_computation_type.output_format} |
40 -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}" | 59 ${genotype_likelihood_computation_type.compressed} |
60 | |
61 #if str( $genotype_likelihood_computation_type.output_tags ) != "None": | |
62 -output-tags "${genotype_likelihood_computation_type.output_tags}" | |
63 #end if | |
64 | |
41 #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': | 65 #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': |
66 -o "${genotype_likelihood_computation_type.perform_indel_calling.gap_open_sequencing_error_probability}" | |
67 -e "${genotype_likelihood_computation_type.perform_indel_calling.gap_extension_sequencing_error_probability}" | |
68 -h "${genotype_likelihood_computation_type.perform_indel_calling.coefficient_for_modeling_homopolymer_errors}" | |
42 -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" | 69 -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" |
43 #else: | 70 -m "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}" |
71 --open-prob "${genotype_likelihood_computation_type.perform_indel_calling.open_seq_error_probability}" | |
72 -F "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_read_fraction}" | |
73 ${genotype_likelihood_computation_type.perform_indel_calling.gapped_read_per_sample} | |
74 #if len( $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ): | |
75 -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ] ) }" | |
76 #end if | |
77 #elif str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'do_not_perform_indel_calling': | |
44 -I | 78 -I |
45 #end if | 79 #end if |
46 -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}" | 80 |
47 #if len( $genotype_likelihood_computation_type.platform_list_repeat ): | 81 |
48 -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }" | 82 #else: |
49 #end if | 83 ${genotype_likelihood_computation_type.base_position_on_reads} |
50 #end if | 84 ${genotype_likelihood_computation_type.output_mapping_quality} |
51 > "${output_mpileup}" | 85 #end if |
52 ' | 86 --output "$output_mpileup" 2> "$output_log" |
53 </command> | 87 ]]> |
54 <inputs> | 88 </command> |
55 <conditional name="reference_source"> | 89 <inputs> |
56 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> | 90 <conditional name="reference_source"> |
57 <option value="cached">Locally cached</option> | 91 <param label="Choose the source for the reference genome" name="reference_source_selector" type="select"> |
58 <option value="history">History</option> | 92 <option value="cached">Use a built-in genome</option> |
59 </param> | 93 <option value="history">Use a genome from the history</option> |
60 <when value="cached"> | |
61 <repeat name="input_bams" title="BAM file" min="1"> | |
62 <param name="input_bam" type="data" format="bam" label="BAM file"> | |
63 <validator type="unspecified_build" /> | |
64 <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> | |
65 </param> | |
66 </repeat> | |
67 <param name="ref_file" type="select" label="Using reference genome"> | |
68 <options from_data_table="fasta_indexes"> | |
69 <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...--> | |
70 </options> | |
71 </param> | |
72 </when> | |
73 <when value="history"> <!-- FIX ME!!!! --> | |
74 <repeat name="input_bams" title="BAM file" min="1"> | |
75 <param name="input_bam" type="data" format="bam" label="BAM file"> | |
76 <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." /> | |
77 </param> | |
78 </repeat> | |
79 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> | |
80 </when> | |
81 </conditional> | |
82 | |
83 | |
84 <conditional name="genotype_likelihood_computation_type"> | |
85 <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation"> | |
86 <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option> | |
87 <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option> | |
88 </param> | |
89 <when value="perform_genotype_likelihood_computation"> | |
90 <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" /> | |
91 <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." /> | |
92 <conditional name="perform_indel_calling"> | |
93 <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling"> | |
94 <option value="perform_indel_calling" selected="True">Perform INDEL calling</option> | |
95 <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option> | |
96 </param> | 94 </param> |
97 <when value="perform_indel_calling"> | 95 <when value="cached"> |
98 <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" /> | 96 <repeat min="1" name="input_bams" title="BAM file"> |
99 </when> | 97 <param format="bam" label="BAM file" name="input_bam" type="data"> |
100 <when value="do_not_perform_indel_calling" /> | 98 <validator type="unspecified_build" /> |
101 </conditional> | 99 <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="fasta_indexes" type="dataset_metadata_in_data_table" /> |
102 <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" /> | 100 </param> |
103 <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> | 101 </repeat> |
104 <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" /> | 102 <param label="Using reference genome" name="ref_file" type="select"> |
105 </repeat> | 103 <options from_data_table="fasta_indexes" /> |
106 </when> | 104 </param> |
107 <when value="do_not_perform_genotype_likelihood_computation"> | 105 </when> |
108 <!-- Do nothing here --> | 106 <when value="history"> |
109 </when> | 107 <repeat min="1" name="input_bams" title="BAM file"> |
110 </conditional> | 108 <param format="bam" label="BAM file" name="input_bam" type="data"> |
111 <conditional name="advanced_options"> | 109 <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" /> |
112 <param name="advanced_options_selector" type="select" label="Set advanced options"> | 110 </param> |
113 <option value="basic" selected="True">Basic</option> | 111 </repeat> |
114 <option value="advanced">Advanced</option> | 112 <param format="fasta" label="Using reference genome" name="ref_file" type="data" /> |
115 </param> | 113 </when> |
116 <when value="advanced"> | 114 </conditional> |
117 <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> | 115 <conditional name="genotype_likelihood_computation_type"> |
118 <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" /> | 116 <param label="Genotype Likelihood Computation" name="genotype_likelihood_computation_type_selector" type="select"> |
119 <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" /> | 117 <option selected="True" value="perform_genotype_likelihood_computation">Perform genotype likelihood computation (--VCF, --BCF options)</option> |
120 <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" /> | 118 <option value="do_not_perform_genotype_likelihood_computation">Do not perform genotype likelihood computation (output pileup)</option> |
121 <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" /> | 119 </param> |
122 <param name="position_list" type="data" format="bed" label="List of regions or sites on which to operate" optional="True" /> | 120 <when value="perform_genotype_likelihood_computation"> |
123 <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> | 121 <param label="Choose the output format" name="output_format" type="select"> |
124 <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" /> | 122 <option value="--VCF">VCF</option> |
125 <param name="region_string" type="text" value="" label="Only generate pileup in region" /> | 123 <option value="--BCF">BCF</option> |
126 <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" /> | 124 </param> |
127 <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" /> | 125 <param checked="False" falsevalue="--uncompressed" label="Compress output" name="compressed" truevalue="" type="boolean" help="--incompressed; default=False"/> |
128 </when> | 126 <param name="output_tags" optional="True" type="select" multiple="True" display="checkboxes" label="Optional tags to output" help="--output-tags"> |
129 <when value="basic" /> | 127 <option value="DP">DP (Number of high-quality bases)</option> |
130 </conditional> | 128 <option value="DPR">DRP (Number of high-quality bases for each observed allele)</option> |
131 </inputs> | 129 <option value="DV">DV (Number of high-quality non-reference bases)</option> |
132 <outputs> | 130 <option value="DP4">DP4 (Number of high-quality ref-forward, ref-reverse, alt-forward and alt-reverse bases)</option> |
133 <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}"> | 131 <option value="INFO/DPR">INFO/DPR (Number of high-quality bases for each observed allele)</option> |
134 <change_format> | 132 <option value="SP">SP (Phred-scaled strand bias P-value)</option> |
135 <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" /> | 133 </param> |
136 </change_format> | 134 <conditional name="perform_indel_calling"> |
137 </data> | 135 <param label="Perform INDEL calling" name="perform_indel_calling_selector" type="select"> |
138 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> | 136 <option selected="True" value="perform_indel_calling_def">Perform INDEL calling using default options</option> |
139 </outputs> | 137 <option value="perform_indel_calling">Perform INDEL calling and set advanced options</option> |
140 <tests> | 138 <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option> |
141 <test> | 139 </param> |
142 <param name="reference_source_selector" value="history" /> | 140 <when value="perform_indel_calling_def" /> |
143 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> | 141 <when value="perform_indel_calling"> |
144 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> | 142 <param label="Phred-scaled gap open sequencing error probability" name="gap_open_sequencing_error_probability" type="integer" value="40" help="--open-prob; Reducing this value leads to more indel calls; default=40"/> |
145 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> | 143 <param label="Phred-scaled gap extension sequencing error probability" name="gap_extension_sequencing_error_probability" type="integer" value="20" help="--ext-prob; Reducing this value leads to longer indels. default=20"/> |
146 <param name="advanced_options_selector" value="basic" /> | 144 <param label="Coefficient for modeling homopolymer errors." name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" help="--tandem-qual; default=100"/> |
147 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_1.pileup" /> | 145 <param label="Skip INDEL calling if the average per-sample depth is above" name="skip_indel_calling_above_sample_depth" type="integer" value="250" help="--max-idepth; default=250"/> |
148 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> | 146 <param label="Minimum gapped reads for indel candidates" name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" help="--min-ireads; default=1"/> |
149 </test> | 147 <param label="Phred-scaled gap open sequencing error probability" name="open_seq_error_probability" type="integer" value="40" help="--open-prob; Reducing this value leads to more indel calls; default=40"/> |
150 <test> | 148 <param label="Minimum fraction of gapped reads" name="minimum_gapped_read_fraction" type="float" value="0.002" help="--gap-frac; default=0.002"/> |
151 <param name="reference_source_selector" value="history" /> | 149 <param checked="False" falsevalue="" label="Apply --min-ireads and --gap-frac values on a per-sample basis" name="gapped_read_per_sample" truevalue="-p" type="boolean" help="--per-sample-mF; by default both options are applied to reads pooled from all samples"/> |
152 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> | 150 <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> |
153 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> | 151 <param label="Platform to use for INDEL candidates" name="platform_entry" type="text" value="" help="It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA"/> |
154 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> | 152 </repeat> |
155 <param name="gap_extension_sequencing_error_probability" value="20" /> | 153 </when> |
156 <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> | 154 <when value="do_not_perform_indel_calling" /> |
157 <param name="perform_indel_calling_selector" value="perform_indel_calling" /> | 155 </conditional> |
158 <param name="skip_indel_calling_above_sample_depth" value="250" /> | 156 |
159 <param name="gap_open_sequencing_error_probability" value="40" /> | 157 </when> |
160 <param name="platform_list_repeat" value="0" /> | 158 <when value="do_not_perform_genotype_likelihood_computation"> |
161 <param name="advanced_options_selector" value="basic" /> | 159 <param checked="False" falsevalue="" label="Output base positions on reads" name="base_position_on_reads" truevalue="-O" type="boolean" help="--output-BP"/> |
162 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_2.bcf" /> | 160 <param checked="False" falsevalue="" label="Output mapping quality" name="output_mapping_quality" truevalue="-s" type="boolean" help="--output-MQ"/> |
163 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> | 161 </when> |
164 </test> | 162 </conditional> |
165 </tests> | 163 <conditional name="advanced_options"> |
166 <help> | 164 <param label="Set advanced options" name="advanced_options_selector" type="select"> |
165 <option selected="True" value="basic">Basic</option> | |
166 <option value="advanced">Advanced</option> | |
167 </param> | |
168 <when value="advanced"> | |
169 <conditional name="filter_by_flags"> | |
170 <param label="Set filter by flags" name="filter_flags" type="select"> | |
171 <option selected="True" value="nofilter">Do not filter</option> | |
172 <option value="filter">Filter by flags to exclude or require</option> | |
173 </param> | |
174 <when value="filter"> | |
175 <param display="checkboxes" label="Require" multiple="True" name="require_flags" type="select" help="--incl-flags"> | |
176 <option value="1">Read is paired</option> | |
177 <option value="2">Read is mapped in a proper pair</option> | |
178 <option value="4">The read is unmapped</option> | |
179 <option value="8">The mate is unmapped</option> | |
180 <option value="16">Read strand</option> | |
181 <option value="32">Mate strand</option> | |
182 <option value="64">Read is the first in a pair</option> | |
183 <option value="128">Read is the second in a pair</option> | |
184 <option value="256">The alignment or this read is not primary</option> | |
185 <option value="512">The read fails platform/vendor quality checks</option> | |
186 <option value="1024">The read is a PCR or optical duplicate</option> | |
187 </param> | |
188 <param display="checkboxes" label="Exclude" multiple="True" name="exclude_flags" type="select" help="--excl-flags"> | |
189 <option value="1">Read is paired</option> | |
190 <option value="2">Read is mapped in a proper pair</option> | |
191 <option value="4">The read is unmapped</option> | |
192 <option value="8">The mate is unmapped</option> | |
193 <option value="16">Read strand</option> | |
194 <option value="32">Mate strand</option> | |
195 <option value="64">Read is the first in a pair</option> | |
196 <option value="128">Read is the second in a pair</option> | |
197 <option value="256">The alignment or this read is not primary</option> | |
198 <option value="512">The read fails platform/vendor quality checks</option> | |
199 <option value="1024">The read is a PCR or optical duplicate</option> | |
200 </param> | |
201 </when> | |
202 <when value="nofilter" /> | |
203 </conditional> | |
204 <conditional name="limit_by_region"> | |
205 <param label="Select regions to call" name="limit_by_regions" type="select"> | |
206 <option selected="True" value="no_limit">Do not limit</option> | |
207 <option value="history">From an uploaded BED file (--positions)</option> | |
208 <option value="paste">Paste a list of regions or BED (--region)</option> | |
209 </param> | |
210 <when value="history"> | |
211 <param format="bed" label="BED file" name="bed_regions" type="data" help="--positions"> | |
212 <validator type="dataset_ok_validator" /> | |
213 </param> | |
214 </when> | |
215 <when value="paste"> | |
216 <param area="true" help="Paste a list of regions in BED format or as a list of chromosomes and positions" label="Regions" name="region_paste" size="10x35" type="text"/> | |
217 </when> | |
218 <when value="no_limit" /> | |
219 </conditional> | |
220 <conditional name="exclude_read_group"> | |
221 <param label="Select read groups to exclude" name="exclude_read_groups" type="select" help="--exclude-RG"> | |
222 <option selected="True" value="no_limit">Do not exclude</option> | |
223 <option value="history">From an uploaded text file</option> | |
224 <option value="paste">Paste a list of read groups</option> | |
225 </param> | |
226 <when value="history"> | |
227 <param format="txt" label="Text file" name="read_groups" type="data"> | |
228 <validator type="dataset_ok_validator" /> | |
229 </param> | |
230 </when> | |
231 <when value="paste"> | |
232 <param area="true" help="Paste a list of read groups" label="Read groups" name="group_paste" size="10x35" type="text" /> | |
233 </when> | |
234 <when value="no_limit" /> | |
235 </conditional> | |
236 <param checked="False" falsevalue="" label="Disable read-pair overlap detection" name="ignore_overlaps" truevalue="-x" type="boolean" help="--ignore-overlaps"/> | |
237 <param checked="False" falsevalue="" label="Do not skip anomalous read pairs in variant calling" name="skip_anomalous_read_pairs" truevalue="-A" type="boolean" help="--count-orphans"/> | |
238 <param checked="False" falsevalue="" label="Disable probabilistic realignment for the computation of base alignment quality (BAQ)" name="disable_probabilistic_realignment" truevalue="-B" type="boolean" help="--no-BAQ; BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments"/> | |
239 <param label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" name="coefficient_for_downgrading" type="integer" value="0" help="--adjust-MQ; Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. default=0"/> | |
240 <param label="Max reads per BAM" max="1024" min="1" name="max_reads_per_bam" type="integer" value="250" help="--max-depth; default=250"/> | |
241 <param checked="False" falsevalue="" label="Redo BAQ computation" name="extended_BAQ_computation" truevalue="-E" type="boolean" help="--redo-BAQ; ignore existing BQ tags"/> | |
242 <param label="Minimum mapping quality for an alignment to be used" name="minimum_mapping_quality" type="integer" value="0" help="-min-MQ; default=0"/> | |
243 <param label="Minimum base quality for a base to be considered" name="minimum_base_quality" type="integer" value="13" help="--min-BQ; default=13"/> | |
244 </when> | |
245 <when value="basic" /> | |
246 </conditional> | |
247 </inputs> | |
248 <outputs> | |
249 <data format="pileup" label="${tool.name} on ${on_string}" name="output_mpileup"> | |
250 <change_format> | |
251 <when format="bcf" input="genotype_likelihood_computation_type.output_format" value="--BCF" /> | |
252 <when format="vcf" input="genotype_likelihood_computation_type.output_format" value="--VCF" /> | |
253 </change_format> | |
254 </data> | |
255 <data format="txt" label="${tool.name} on ${on_string} (log)" name="output_log" /> | |
256 </outputs> | |
257 <tests> | |
258 <test> | |
259 <param name="reference_source_selector" value="history" /> | |
260 <param ftype="fasta" name="ref_file" value="phiX.fasta" /> | |
261 <param ftype="bam" name="input_bam" value="samtools_mpileup_in_1.bam" /> | |
262 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> | |
263 <param name="advanced_options_selector" value="basic" /> | |
264 <param name="base_position_on_reads" value="true" /> | |
265 <param name="output_mapping_quality" value="true" /> | |
266 <output file="samtools_mpileup_out_1.pileup" name="output_mpileup" /> | |
267 <output file="samtools_mpileup_out_1.log" name="output_log" /> | |
268 </test> | |
269 <test> | |
270 <param name="reference_source_selector" value="history" /> | |
271 <param ftype="fasta" name="ref_file" value="phiX.fasta" /> | |
272 <param ftype="bam" name="input_bam" value="phiX.bam" /> | |
273 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> | |
274 <param name="gap_extension_sequencing_error_probability" value="20" /> | |
275 <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> | |
276 <param name="perform_indel_calling_selector" value="perform_indel_calling" /> | |
277 <param name="skip_indel_calling_above_sample_depth" value="250" /> | |
278 <param name="gap_open_sequencing_error_probability" value="40" /> | |
279 <param name="platform_list_repeat" value="0" /> | |
280 <param name="advanced_options_selector" value="basic" /> | |
281 <param name="genotype_likelihood_computation_type|output_format" value="VCF" /> | |
282 <output file="samtools_mpileup_out_2.vcf" ftype="vcf" lines_diff="8" name="output_mpileup" /> | |
283 <output file="samtools_mpileup_out_2.log" name="output_log" /> | |
284 </test> | |
285 </tests> | |
286 <help> | |
287 <![CDATA[ | |
167 **What it does** | 288 **What it does** |
168 | 289 |
169 Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. | 290 Report variants for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. |
170 | 291 |
171 ------ | 292 ------ |
172 | 293 |
173 **Settings**:: | 294 **Input options**:: |
174 | 295 |
175 Input Options: | 296 -6, --illumina1.3+ quality is in the Illumina-1.3+ encoding |
176 -6 Assume the quality is in the Illumina 1.3+ encoding. | 297 -A, --count-orphans do not discard anomalous read pairs |
177 -A Do not skip anomalous read pairs in variant calling. | 298 -b, --bam-list FILE list of input BAM filenames, one per line |
178 -B Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments. | 299 -B, --no-BAQ disable BAQ (per-Base Alignment Quality) |
179 -b FILE List of input BAM files, one file per line [null] | 300 -C, --adjust-MQ INT adjust mapping quality; recommended:50, disable:0 [0] |
180 -C INT Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0] | 301 -d, --max-depth INT max per-BAM depth; avoids excessive memory usage [250] |
181 -d INT At a position, read maximally INT reads per input BAM. [250] | 302 -E, --redo-BAQ recalculate BAQ on the fly, ignore existing BQs |
182 -E Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit. | 303 -f, --fasta-ref FILE faidx indexed reference sequence file |
183 -f FILE The faidx-indexed reference file in the FASTA format. The file can be optionally compressed by razip. [null] | 304 -G, --exclude-RG FILE exclude read groups listed in FILE |
184 -l FILE BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null] | 305 -l, --positions FILE skip unlisted positions (chr pos) or regions (BED) |
185 -q INT Minimum mapping quality for an alignment to be used [0] | 306 -q, --min-MQ INT skip alignments with mapQ smaller than INT [0] |
186 -Q INT Minimum base quality for a base to be considered [13] | 307 -Q, --min-BQ INT skip bases with baseQ/BAQ smaller than INT [13] |
187 -r STR Only generate pileup in region STR [all sites] | 308 -r, --region REG region in which pileup is generated |
188 Output Options: | 309 -R, --ignore-RG ignore RG tags (one BAM = one sample) |
189 | 310 --rf, --incl-flags STR|INT required flags: skip reads with mask bits unset [] |
190 -D Output per-sample read depth | 311 --ff, --excl-flags STR|INT filter flags: skip reads with mask bits set |
191 -g Compute genotype likelihoods and output them in the binary call format (BCF). | 312 [UNMAP,SECONDARY,QCFAIL,DUP] |
192 -S Output per-sample Phred-scaled strand bias P-value | 313 -x, --ignore-overlaps disable read-pair overlap detection |
193 -u Similar to -g except that the output is uncompressed BCF, which is preferred for piping. | 314 |
194 | 315 **Output options**:: |
195 Options for Genotype Likelihood Computation (for -g or -u): | 316 |
196 | 317 -o, --output FILE write output to FILE [standard output] |
197 -e INT Phred-scaled gap extension sequencing error probability. Reducing INT leads to longer indels. [20] | 318 -g, --BCF generate genotype likelihoods in BCF format |
198 -h INT Coefficient for modeling homopolymer errors. Given an l-long homopolymer run, the sequencing error of an indel of size s is modeled as INT*s/l. [100] | 319 -v, --VCF generate genotype likelihoods in VCF format |
199 -I Do not perform INDEL calling | 320 |
200 -L INT Skip INDEL calling if the average per-sample depth is above INT. [250] | 321 **Output options for mpileup format** (without -g/-v):: |
201 -o INT Phred-scaled gap open sequencing error probability. Reducing INT leads to more indel calls. [40] | 322 |
202 -P STR Comma dilimited list of platforms (determined by @RG-PL) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all] | 323 -O, --output-BP output base positions on reads |
203 | 324 -s, --output-MQ output mapping quality |
204 ------ | 325 |
205 | 326 **Output options for genotype likelihoods** (when -g/-v is used):: |
206 **Citation** | 327 |
207 | 328 -t, --output-tags LIST optional tags to output: DP,DPR,DV,DP4,INFO/DPR,SP [] |
208 For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ | 329 -u, --uncompressed generate uncompressed VCF/BCF output |
209 | 330 |
210 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* | 331 **SNP/INDEL genotype likelihoods options** (effective with -g/-v):: |
211 | 332 |
212 </help> | 333 -e, --ext-prob INT Phred-scaled gap extension seq error probability [20] |
334 -F, --gap-frac FLOAT minimum fraction of gapped reads [0.002] | |
335 -h, --tandem-qual INT coefficient for homopolymer errors [100] | |
336 -I, --skip-indels do not perform indel calling | |
337 -L, --max-idepth INT maximum per-sample depth for INDEL calling [250] | |
338 -m, --min-ireads INT minimum number gapped reads for indel candidates [1] | |
339 -o, --open-prob INT Phred-scaled gap open seq error probability [40] | |
340 -p, --per-sample-mF apply -m and -F per-sample for increased sensitivity | |
341 -P, --platforms STR comma separated list of platforms for indels [all] | |
342 | |
343 **Notes**: Assuming diploid individuals. | |
344 ]]> | |
345 </help> | |
346 <configfiles> | |
347 <configfile name="excluded_read_groups"> | |
348 <![CDATA[ | |
349 #set pasted_data = '' | |
350 #if str( $advanced_options.advanced_options_selector ) == "advanced": | |
351 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": | |
352 #set pasted_data = '\t'.join( str( $advanced_options.exclude_read_group['read_groups'] ).split() ) | |
353 #end if | |
354 #end if | |
355 ${pasted_data} | |
356 ]]> | |
357 </configfile> | |
358 <configfile name="pasted_regions"> | |
359 <![CDATA[ | |
360 #set pasted_data = '' | |
361 #if str( $advanced_options.advanced_options_selector ) == "advanced": | |
362 #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": | |
363 #set pasted_data = '\t'.join( str( $advanced_options.limit_by_region['region_paste'] ).split() ) | |
364 #end if | |
365 #end if | |
366 ${pasted_data} | |
367 ]]> | |
368 </configfile> | |
369 </configfiles> | |
370 <expand macro="citations" /> | |
213 </tool> | 371 </tool> |