comparison samtools_mpileup.xml @ 11:111f83918fe6 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_mpileup commit 0f75269223c0821c6c82acf98fde947d0f816f2b"
author iuc
date Tue, 28 Sep 2021 16:15:20 +0000
parents 8da515fbc1bf
children 329deb17a9f7
comparison
equal deleted inserted replaced
10:8da515fbc1bf 11:111f83918fe6
1 <tool id="samtools_mpileup" name="samtools mpileup" version="2.1.4"> 1 <tool id="samtools_mpileup" name="Samtools mpileup" version="2.1.5" profile="@PROFILE@">
2 <description>multi-way pileup of variants</description> 2 <description>multi-way pileup of variants</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <expand macro="stdio" /> 7 <expand macro="stdio" />
8 <expand macro="version_command" /> 8 <expand macro="version_command" />
9 <command><![CDATA[ 9 <command><![CDATA[
10 10
11 #set $input_bams = $reference_source.input_bam 11 #set $input_bams = $input
12 @PREPARE_IDX_MULTIPLE@ 12 @PREPARE_IDX_MULTIPLE@
13 13 @PREPARE_FASTA_IDX@
14 #if $reference_source.reference_source_selector == "history":
15 #set ref_fa = 'ref.fa'
16 ln -s '${reference_source.ref_file}' 'ref.fa' &&
17 samtools faidx ref.fa &&
18 #else:
19 #set ref_fa = str( $reference_source.ref_file.fields.path )
20 #end if
21 14
22 samtools mpileup 15 samtools mpileup
23 -f '$ref_fa' 16 -f '$reffa'
24 #for $i in range(len( $input_bams )): 17 #for $i in range(len( $input_bams )):
25 '${i}' 18 '${i}'
26 #end for 19 #end for
27 20
28 #if str( $advanced_options.advanced_options_selector ) == "advanced": 21 #if str( $advanced_options.advanced_options_selector ) == "advanced":
29 #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter": 22 #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter":
30 #if $advanced_options.filter_by_flags.require_flags: 23 #set $filter = $advanced_options.filter_by_flags.require_flags
31 #set $filter = $advanced_options.filter_by_flags.require_flags 24 @FLAGS@
32 @FLAGS@ 25 --rf $flags
33 --rf $flags 26 #set $filter = $advanced_options.filter_by_flags.exclude_flags
34 #end if 27 @FLAGS@
35 #if $advanced_options.filter_by_flags.exclude_flags: 28 --ff $flags
36 #set $filter = $advanced_options.filter_by_flags.exclude_flags
37 @FLAGS@
38 --ff $flags
39 #end if
40 #end if 29 #end if
41 #if str($advanced_options.limit_by_region.limit_by_regions) == "limit": 30 #if str($advanced_options.limit_by_region.limit_by_regions) == "limit":
42 #if str( $advanced_options.limit_by_region.region_paste ) != "None": 31 #if str( $advanced_options.limit_by_region.region_paste ) != "None":
43 -r '$advanced_options.limit_by_region.region_paste' 32 -r '$advanced_options.limit_by_region.region_paste'
44 #end if 33 #end if
45 #if str( $advanced_options.limit_by_region.bed_regions ) != "None" 34 #if str( $advanced_options.limit_by_region.bed_regions ) != "None"
46 -l '$advanced_options.limit_by_region.bed_regions' 35 -l '$advanced_options.limit_by_region.bed_regions'
47 #end if 36 #end if
48 #end if 37 #end if
49 38
50 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": 39 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
51 -G '$excluded_read_groups' 40 -G '$excluded_read_groups'
52 #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history" 41 #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history"
53 -G '$advanced_options.exclude_read_group.read_groups' 42 -G '$advanced_options.exclude_read_group.read_groups'
54 #end if 43 #end if
65 #if str( $output_options_cond.output_options_selector ) == 'advanced': 54 #if str( $output_options_cond.output_options_selector ) == 'advanced':
66 ${output_options_cond.base_position_on_reads} 55 ${output_options_cond.base_position_on_reads}
67 ${output_options_cond.output_mapping_quality} 56 ${output_options_cond.output_mapping_quality}
68 ${output_options_cond.output_read_names} 57 ${output_options_cond.output_read_names}
69 ${output_options_cond.output_all_pos} 58 ${output_options_cond.output_all_pos}
59 #if $output_options_cond.output_tags:
60 --output-extra '$output_options_cond.output_tags'
61 #end if
70 #end if 62 #end if
71 --output '$output_file_pu' 63 --output '$output_file_pu'
72 ]]></command> 64 ]]></command>
73 <inputs> 65 <inputs>
74 <conditional name="reference_source"> 66 <param name="input" type="data" format="bam" multiple="true" min="1" label="BAM file(s)">
75 <param name="reference_source_selector" type="select" label="Choose the source for the reference genome"> 67 <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" />
76 <option value="cached">Use a built-in genome</option> 68 </param>
77 <option value="history">Use a genome from the history</option> 69 <expand macro="mandatory_reference" argument="-f"/>
78 </param>
79 <when value="cached">
80 <param name="input_bam" type="data" format="bam" multiple="True" min="1" label="BAM file(s)">
81 <validator type="unspecified_build" />
82 <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="fasta_indexes" type="dataset_metadata_in_data_table" />
83 </param>
84 <param name="ref_file" type="select" label="Using reference genome">
85 <options from_data_table="fasta_indexes" />
86 </param>
87 </when>
88 <when value="history">
89 <param name="input_bam" type="data" format="bam" multiple="True" min="1" label="BAM file(s)">
90 <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" />
91 </param>
92 <param name="ref_file" type="data" format="fasta" label="Using reference genome" />
93 </when>
94 </conditional>
95 <conditional name="advanced_options"> 70 <conditional name="advanced_options">
96 <param name="advanced_options_selector" type="select" label="Set advanced options"> 71 <param name="advanced_options_selector" type="select" label="Set advanced options">
97 <option selected="True" value="default">Basic</option> 72 <option selected="True" value="default">Basic</option>
98 <option value="advanced">Advanced</option> 73 <option value="advanced">Advanced</option>
99 </param> 74 </param>
107 <when value="filter"> 82 <when value="filter">
108 <param name="require_flags" argument="--rf/--incl-flags" type="select" multiple="True" display="checkboxes" label="Require"> 83 <param name="require_flags" argument="--rf/--incl-flags" type="select" multiple="True" display="checkboxes" label="Require">
109 <expand macro="flag_options" /> 84 <expand macro="flag_options" />
110 </param> 85 </param>
111 <param name="exclude_flags" argument="--ff/--excl-flags" type="select" multiple="True" display="checkboxes" label="Exclude"> 86 <param name="exclude_flags" argument="--ff/--excl-flags" type="select" multiple="True" display="checkboxes" label="Exclude">
112 <expand macro="flag_options" /> 87 <expand macro="flag_options" s4="true" s256="true" s512="true" s1024="true"/>
113 </param> 88 </param>
114 </when> 89 </when>
115 <when value="nofilter" /> 90 <when value="nofilter" />
116 </conditional> 91 </conditional>
117 <conditional name="limit_by_region"> 92 <conditional name="limit_by_region">
169 <param name="output_all_pos" argument="-a" type="select" label="Output absolutely all positions" help="Output all positions, including those with zero depth. (-a) Output absolutely all positions, including unused reference sequences (-aa). Note that when used in conjunction with a BED file the -a option may sometimes operate as if -aa was specified if the reference sequence has coverage outside of the region specified in the BED file."> 144 <param name="output_all_pos" argument="-a" type="select" label="Output absolutely all positions" help="Output all positions, including those with zero depth. (-a) Output absolutely all positions, including unused reference sequences (-aa). Note that when used in conjunction with a BED file the -a option may sometimes operate as if -aa was specified if the reference sequence has coverage outside of the region specified in the BED file.">
170 <option selected="True" value="">No</option> 145 <option selected="True" value="">No</option>
171 <option value="-a">all (including those with zero depth)</option> 146 <option value="-a">all (including those with zero depth)</option>
172 <option value="-aa">absolutely all (including unused reference sequences)</option> 147 <option value="-aa">absolutely all (including unused reference sequences)</option>
173 </param> 148 </param>
149 <param name="output_tags" type="text" argument="--output-extra" label="Add tags to output" help="Output extra read fields and read tag values, e.g., NM and XS (must be comma seperated, e.g., NM,XS)" />
174 </when> 150 </when>
175 </conditional> 151 </conditional>
176 </inputs> 152 </inputs>
177 <outputs> 153 <outputs>
178 <data name="output_file_pu" format="pileup" label="${tool.name} on ${on_string} pileup"/> 154 <data name="output_file_pu" format="pileup" label="${tool.name} on ${on_string}"/>
179 </outputs> 155 </outputs>
180 <tests> 156 <tests>
181 <!-- samtools test https://github.com/samtools/samtools/blob/4651d25f2b14cd68ffb0915a74b0c1b529b8cfa1/test/test.pl#L757 --> 157 <!-- samtools test https://github.com/samtools/samtools/blob/4651d25f2b14cd68ffb0915a74b0c1b529b8cfa1/test/test.pl#L757 -->
182 <test> 158 <test>
183 <conditional name="reference_source"> 159 <param name="input" ftype="bam" value="mpileup.1.bam,mpileup.2.bam,mpileup.3.bam" />
184 <param name="reference_source_selector" value="history" /> 160 <conditional name="addref_cond">
185 <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" /> 161 <param name="addref_select" value="history" />
186 <param name="input_bam" ftype="bam" value="mpileup.1.bam,mpileup.2.bam,mpileup.3.bam" /> 162 <param name="ref" ftype="fasta" value="mpileup.ref.fa" />
187 </conditional> 163 </conditional>
188 <conditional name="advanced_options"> 164 <conditional name="advanced_options">
189 <param name="advanced_options_selector" value="advanced" /> 165 <param name="advanced_options_selector" value="advanced" />
190 <conditional name="limit_by_region"> 166 <conditional name="limit_by_region">
191 <param name="limit_by_regions" value="limit"/> 167 <param name="limit_by_regions" value="limit"/>
194 </conditional> 170 </conditional>
195 <output name="output_file_pu" file="mpileup.out.1" ftype="pileup" /> 171 <output name="output_file_pu" file="mpileup.out.1" ftype="pileup" />
196 </test> 172 </test>
197 <!-- test_cmd($opts,out=>'dat/mpileup.out.1',err=>'dat/mpileup.err.1',cmd=>"$$opts{bin}/samtools mpileup -b $$opts{tmp}/mpileup.bam.list -f $$opts{tmp}/mpileup.ref.fa.gz -r17:100-150");--> 173 <!-- test_cmd($opts,out=>'dat/mpileup.out.1',err=>'dat/mpileup.err.1',cmd=>"$$opts{bin}/samtools mpileup -b $$opts{tmp}/mpileup.bam.list -f $$opts{tmp}/mpileup.ref.fa.gz -r17:100-150");-->
198 <test> 174 <test>
199 <conditional name="reference_source"> 175 <param name="input" ftype="bam" value="mpileup.1.bam" />
200 <param name="reference_source_selector" value="history" /> 176 <conditional name="addref_cond">
201 <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" /> 177 <param name="addref_select" value="history" />
202 <param name="input_bam" ftype="bam" value="mpileup.1.bam" /> 178 <param name="ref" ftype="fasta" value="mpileup.ref.fa" />
203 </conditional> 179 </conditional>
204 <conditional name="advanced_options"> 180 <conditional name="advanced_options">
205 <param name="advanced_options_selector" value="advanced" /> 181 <param name="advanced_options_selector" value="advanced" />
206 <conditional name="filter_by_flags"> 182 <conditional name="filter_by_flags">
207 <param name="filter_flags" value="filter"/> 183 <param name="filter_flags" value="filter"/>
217 </test> 193 </test>
218 <!-- test_cmd($opts,out=>'dat/mpileup.out.3',cmd=>"$$opts{bin}/samtools mpileup -B \-\-ff 0x14 -f $$opts{tmp}/mpileup.ref.fa.gz -r17:1050-1060 $$opts{tmp}/mpileup.1.bam | grep -v mpileup"); 194 <!-- test_cmd($opts,out=>'dat/mpileup.out.3',cmd=>"$$opts{bin}/samtools mpileup -B \-\-ff 0x14 -f $$opts{tmp}/mpileup.ref.fa.gz -r17:1050-1060 $$opts{tmp}/mpileup.1.bam | grep -v mpileup");
219 --> 195 -->
220 <!-- original test from galaxy tool--> 196 <!-- original test from galaxy tool-->
221 <test> 197 <test>
222 <conditional name="reference_source"> 198 <param name="input" ftype="bam" value="samtools_mpileup_in_1.bam" />
223 <param name="reference_source_selector" value="history" /> 199 <conditional name="addref_cond">
224 <param name="ref_file" ftype="fasta" value="phiX.fasta" /> 200 <param name="addref_select" value="history" />
225 <param name="input_bam" ftype="bam" value="samtools_mpileup_in_1.bam" /> 201 <param name="ref" ftype="fasta" value="phiX.fasta" />
226 </conditional> 202 </conditional>
227 <conditional name="output_options_cond"> 203 <conditional name="output_options_cond">
228 <param name="output_options_selector" value="advanced" /> 204 <param name="output_options_selector" value="advanced" />
229 <param name="base_position_on_reads" value="true" /> 205 <param name="base_position_on_reads" value="true" />
230 <param name="output_mapping_quality" value="true" /> 206 <param name="output_mapping_quality" value="true" />
233 <param name="advanced_options_selector" value="default" /> 209 <param name="advanced_options_selector" value="default" />
234 </conditional> 210 </conditional>
235 <output name="output_file_pu" file="samtools_mpileup_out_1.pileup" ftype="pileup" /> 211 <output name="output_file_pu" file="samtools_mpileup_out_1.pileup" ftype="pileup" />
236 </test> 212 </test>
237 <test> 213 <test>
238 <param name="reference_source_selector" value="history" /> 214 <param name="input" ftype="bam" value="phiX.bam" />
239 <param name="ref_file" ftype="fasta" value="phiX.fasta" /> 215 <conditional name="addref_cond">
240 <param name="input_bam" ftype="bam" value="phiX.bam" /> 216 <param name="addref_select" value="history" />
217 <param name="ref" ftype="fasta" value="phiX.fasta" />
218 </conditional>
241 <conditional name="output_options_cond"> 219 <conditional name="output_options_cond">
242 <param name="output_options_selector" value="default" /> 220 <param name="output_options_selector" value="default" />
243 </conditional> 221 </conditional>
244 <conditional name="advanced_options"> 222 <conditional name="advanced_options">
245 <param name="advanced_options_selector" value="advanced" /> 223 <param name="advanced_options_selector" value="advanced" />
246 <param name="skip_anomalous_read_pairs" value="-A" /> 224 <param name="skip_anomalous_read_pairs" value="-A" />
247 </conditional> 225 </conditional>
248 <output name="output_file_pu" file="samtools_mpileup_out_2.pileup" ftype="pileup" /> 226 <output name="output_file_pu" file="samtools_mpileup_out_2.pileup" ftype="pileup" />
249 </test> 227 </test>
250 <test> 228 <test>
251 <param name="reference_source_selector" value="history" /> 229 <param name="input" ftype="bam" value="samtools_mpileup_in_1.bam" />
252 <param name="ref_file" ftype="fasta" value="phiX.fasta" /> 230 <conditional name="addref_cond">
253 <param name="input_bam" ftype="bam" value="samtools_mpileup_in_1.bam" /> 231 <param name="addref_select" value="history" />
254 <conditional name="output_options_cond"> 232 <param name="ref" ftype="fasta" value="phiX.fasta" />
255 <param name="output_options_cond" value="advanced" /> 233 </conditional>
234 <conditional name="output_options_cond">
235 <param name="output_options_selector" value="advanced" />
256 <param name="base_position_on_reads" value="true" /> 236 <param name="base_position_on_reads" value="true" />
257 <param name="output_mapping_quality" value="true" /> 237 <param name="output_mapping_quality" value="true" />
258 </conditional> 238 </conditional>
259 <conditional name="advanced_options"> 239 <conditional name="advanced_options">
260 <param name="advanced_options_selector" value="advanced" /> 240 <param name="advanced_options_selector" value="advanced" />
261 <param name="minimum_base_quality" value="0" /><!-- most reads have ultra low quality resuling in empty columns --> 241 <param name="minimum_base_quality" value="0" /><!-- most reads have ultra low quality resuling in empty columns -->
262 </conditional> 242 </conditional>
263 <output name="output_file_pu" file="samtools_mpileup_out_3.pileup" ftype="pileup" /> 243 <output name="output_file_pu" file="samtools_mpileup_out_3.pileup" ftype="pileup" />
244 </test>
245 <test>
246 <param name="input" ftype="bam" value="mpileup.1.bam" />
247 <conditional name="addref_cond">
248 <param name="addref_select" value="history" />
249 <param name="ref" ftype="fasta" value="mpileup.ref.fa" />
250 </conditional>
251 <conditional name="output_options_cond">
252 <param name="output_options_selector" value="advanced" />
253 <param name="output_tags" value="NM,AM" />
254 </conditional>
255 <output name="output_file_pu" file="mpileup.out.4" ftype="pileup" />
264 </test> 256 </test>
265 </tests> 257 </tests>
266 <help><![CDATA[ 258 <help><![CDATA[
267 **What it does** 259 **What it does**
268 260
270 262
271 Generation of VCF and BCF output, is deprecated and not available in the Galaxy tool. Please use bcftools mpileup for this instead. 263 Generation of VCF and BCF output, is deprecated and not available in the Galaxy tool. Please use bcftools mpileup for this instead.
272 264
273 In the pileup format (without -u or -g), each line represents a genomic position, consisting of chromosome name, 1-based coordinate, reference base, the number of reads covering the site, read bases, base qualities and alignment mapping qualities. Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all encoded at the read base column. At this column, a dot stands for a match to the reference base on the forward strand, a comma for a match on the reverse strand, a '>' or '<' for a reference skip, 'ACGTN' for a mismatch on the forward strand and 'acgtn' for a mismatch on the reverse strand. A pattern '\\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this reference position and the next reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence. Similarly, a pattern '-[0-9]+[ACGTNacgtn]+' represents a deletion from the reference. The deleted bases will be presented as '*' in the following lines. Also at the read base column, a symbol '^' marks the start of a read. The ASCII of the character following '^' minus 33 gives the mapping quality. A symbol '$' marks the end of a read segment. 265 In the pileup format (without -u or -g), each line represents a genomic position, consisting of chromosome name, 1-based coordinate, reference base, the number of reads covering the site, read bases, base qualities and alignment mapping qualities. Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all encoded at the read base column. At this column, a dot stands for a match to the reference base on the forward strand, a comma for a match on the reverse strand, a '>' or '<' for a reference skip, 'ACGTN' for a mismatch on the forward strand and 'acgtn' for a mismatch on the reverse strand. A pattern '\\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this reference position and the next reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence. Similarly, a pattern '-[0-9]+[ACGTNacgtn]+' represents a deletion from the reference. The deleted bases will be presented as '*' in the following lines. Also at the read base column, a symbol '^' marks the start of a read. The ASCII of the character following '^' minus 33 gives the mapping quality. A symbol '$' marks the end of a read segment.
274 266
275 Note that there are two orthogonal ways to specify locations in the input file; via -r region and -l file. The former uses (and requires) an index to do random access while the latter streams through the file contents filtering out the specified regions, requiring no index. The two may be used in conjunction. For example a BED file containing locations of genes in chromosome 20 could be specified using -r 20 -l chr20.bed, meaning that the index is used to find chromosome 20 and then it is filtered for the regions listed in the bed file. 267 Note that there are two orthogonal ways to specify locations in the input file; via -r region and -l file. The former uses (and requires) an index to do random access while the latter streams through the file contents filtering out the specified regions, requiring no index. The two may be used in conjunction. For example a BED file containing locations of genes in chromosome 20 could be specified using -r 20 -l chr20.bed, meaning that the index is used to find chromosome 20 and then it is filtered for the regions listed in the bed file.
276 268
277 **BAQ (Base Alignment Quality)** 269 **BAQ (Base Alignment Quality)**
278 270
279 BAQ is the Phred-scaled probability of a read base being misaligned. It greatly helps to reduce false SNPs caused by misalignments. BAQ is calculated using the probabilistic realignment method described in the paper “Improving SNP discovery by base alignment quality”, Heng Li, Bioinformatics, Volume 27, Issue 8 <https://doi.org/10.1093/bioinformatics/btr076> 271 BAQ is the Phred-scaled probability of a read base being misaligned. It greatly helps to reduce false SNPs caused by misalignments. BAQ is calculated using the probabilistic realignment method described in the paper “Improving SNP discovery by base alignment quality”, Heng Li, Bioinformatics, Volume 27, Issue 8 <https://doi.org/10.1093/bioinformatics/btr076>
280 272
281 BAQ is turned on when a reference file is supplied using the -f option. To disable it, use the -B option. 273 BAQ is turned on when a reference file is supplied using the -f option. To disable it, use the -B option.
282 274
283 It is possible to store pre-calculated BAQ values in a SAM BQ:Z tag. Samtools mpileup will use the precalculated values if it finds them. The -E option can be used to make it ignore the contents of the BQ:Z tag and force it to recalculate the BAQ scores by making a new alignment. 275 It is possible to store pre-calculated BAQ values in a SAM BQ:Z tag. Samtools mpileup will use the precalculated values if it finds them. The -E option can be used to make it ignore the contents of the BQ:Z tag and force it to recalculate the BAQ scores by making a new alignment.
284 ]]></help> 276 ]]></help>
285 <expand macro="citations" /> 277 <expand macro="citations" />
286 </tool> 278 </tool>