comparison sambamba_filter.xml @ 0:e3cbb848d8f7 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sambamba commit 1ff1d6786536e134d019c6d6d12ee9885f44b601"
author artbio
date Thu, 21 May 2020 09:51:19 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e3cbb848d8f7
1 <tool id="sambamba_sample_or_filter" name="Sample or Filter BAM" version="0.4">
2 <description>
3 on flags, fields, and tags using Sambamba
4 </description>
5 <requirements>
6 <requirement type="package" version="0.7.1">sambamba</requirement>
7 </requirements>
8 <stdio>
9 <exit_code range="1:" level="fatal" description="Error occured" />
10 </stdio>
11 <!-- <version_command>sambamba 2>&amp;1 | grep "sambamba v" | sed 's/^sambamba v\(.*\)/\1/'</version_command> -->
12 <command detect_errors="exit_code"><![CDATA[
13 ln -s $input input.bam &&
14 ln -s $input.metadata.bam_index input.bai &&
15 #if $sambamba_options.selector == 'filter'
16 sambamba view -h -t \${GALAXY_SLOTS:-4}
17 #if $sambamba_options.query != '':
18 --filter='$sambamba_options.query'
19 -f '$format' -o $outfile input.bam $sambamba_options.region
20 #end if
21 #else
22 sambamba view -h -t \${GALAXY_SLOTS:-4} -f '$format'
23 --subsampling-seed='$sambamba_options.seed'
24 -s '$sambamba_options.fraction' -o '$outfile' input.bam
25 #end if
26 ]]></command>
27 <inputs>
28 <param name="input" type="data" format="bam" label="BAM or SAM file to filter"/>
29 <param name="format" type="select" label="format of the tool output">
30 <option value="bam">BAM</option>
31 <option value="sam">SAM</option>
32 </param>
33 <conditional name="sambamba_options">
34 <param name="selector" type="select" label="Filter or Down-sample alignments">
35 <option value="sample">Down-sample bam or sam alignments</option>
36 <option value="filter" selected="true">Filter bam or sam alignements</option>
37 </param>
38 <when value="filter">
39 <param name="query" type="text" size="80">
40 <sanitizer invalid_char="X">
41 <valid initial="string.ascii_letters,string.digits, string.punctuation">
42 <add value=" " />
43 </valid>
44 </sanitizer>
45 <label>Filter expression</label>
46 <help>See below for query syntax.</help>
47 </param>
48 <param name="region" type="text" size="40" label="Region in format chr:beg-end">
49 <help>
50 Regions can be specified as 'chr2' (the whole chr2), 'chr2:1000000'
51 (region starting from 1,000,000bp) or 'chr2:1,000,000-2,000,000'
52 (region between 1,000,000 and 2,000,000bp including the end points).
53 The coordinates are 1-based.
54 </help>
55 </param>
56 </when>
57 <when value="sample">
58 <param name="seed" type="integer" value="123" size="10">
59 <label>Seed value for randomisation</label>
60 <help>
61 Be careful at selecting different seed values if you
62 re-subsample a subsample output of this tool
63 </help>
64 </param>
65 <param name="fraction" type="float" value="0.1" max="1" size="10" label="fraction to retrieve after subsampling">
66 <help>
67 Use a real number between 0 and 1 to indicate the relative size of
68 the fraction you wish to retrieve
69 </help>
70 </param>
71 </when>
72 </conditional>
73 </inputs>
74 <outputs>
75 <data name="outfile" format="bam">
76 <change_format>
77 <when input="format" value="sam" format="sam" />
78 </change_format>
79 </data>
80 </outputs>
81 <tests>
82 <test>
83 <param name="input" value="ex1_header.sam" ftype="sam" />
84 <param name="selector" value="filter" />
85 <param name="query" value="[H0] == 1 and read_name =~ /^EAS51_62/" />
86 <param name="format" value="bam" />
87 <param name="region" value="" />
88 <output name="outfile" file="ex1_header_filtered.bam" ftype="bam" />
89 </test>
90 <test>
91 <param name="input" value="c1215_fixmate.bam" ftype="bam" />
92 <param name="selector" value="filter" />
93 <param name="query" value="[MD] =~ /^\d+T\d+A\d+/ and first_of_pair" />
94 <param name="format" value="sam" />
95 <param name="region" value="AL096846:1000-5000" />
96 <output name="outfile" file="c1215_fixmate_filtered.sam" ftype="sam" lines_diff="2"/>
97 </test>
98 <test>
99 <param name="input" value="ex1_header.sam" ftype="sam" />
100 <param name="selector" value="sample" />
101 <param name="seed" value="123" />
102 <param name="fraction" value="0.1" />
103 <param name="format" value="bam" />
104 <output name="outfile" file="ex1_header_sampled.bam" ftype="bam" />
105 </test>
106 <test>
107 <param name="input" value="c1215_fixmate.bam" ftype="bam" />
108 <param name="selector" value="sample" />
109 <param name="seed" value="123" />
110 <param name="fraction" value="0.1" />
111 <param name="format" value="sam" />
112 <output name="outfile" file="c1215_fixmate_sampled.sam" ftype="sam" lines_diff="2"/>
113 </test>
114 </tests>
115 <help>
116 Sambamba Filter Overview
117 ========================
118
119 This tool uses the sambamba_ ``view`` command to filter BAM/SAM on flags, fields, tags, and region. Input is SAM or BAM file.
120
121
122 Filter Syntax
123 =============
124
125 A *filter expression* is a number of *basic conditions* linked by ``and``, ``or``, ``not`` logical operators, and enclosed in parentheses where needed.
126
127 *Basic condition* is a one for a single record field, tag, or flag.
128
129 You can use ``==,`` ``!=,`` ``&gt;``, ``&lt;``, ``&gt;=``, ``&lt;=`` comparison operators for both integers and strings.
130
131 Strings are delimited by single quotes, if you need a single quote inside a string, escape it with ``\\``.
132
133 Examples of filter expressions
134 ------------------------------
135
136 ::
137
138 mapping_quality >= 30 and ([RG] =~ /^abcd/ or [NM] == 7)
139 read_name == 'abc\'def'
140
141 Basic conditions for flags
142 --------------------------
143
144 The following flag names are recognized:
145 * paired
146 * proper_pair
147 * unmapped
148 * mate_is_unmapped
149 * reverse_strand
150 * mate_is_reverse_strand
151 * first_of_pair
152 * second_of_pair
153 * secondary_alignment
154 * failed_quality_control
155 * duplicate
156
157 Example
158 ~~~~~~~
159
160 ::
161
162 not (unmapped or mate_is_unmapped) and first_of_pair
163
164 Basic conditions for fields
165 ---------------------------
166
167 Conditions for integer and string fields are supported.
168
169 List of integer fields:
170 * ref_id
171 * position
172 * mapping_quality
173 * sequence_length
174 * mate_ref_id
175 * mate_position
176 * template_length
177
178
179 List of string fields:
180 * read_name
181 * sequence
182 * cigar
183
184
185 Example
186 ~~~~~~~
187
188 ::
189
190 ref_id == 3 and mapping_quality >= 50 and sequence_length >= 80
191
192 Basic conditions for tags
193 -------------------------
194
195 Tags are denoted by their names in square brackets, for instance, ``[RG]`` or ``[Q2].`` They support conditions for both integers and strings, i.e. the tag must also hold value of the corresponding type.
196
197 In order to do filtering based on the presence of a particular tag, you can use special ``null`` value.
198
199 Example
200 ~~~~~~~
201
202 ::
203
204 [RG] != null and [AM] == 37
205
206 Down-sampling
207 =============
208
209 The tool is using the following sambamba command line for sampling:
210
211 ::
212
213 sambamba view -h -t &lt;number of Galaxy threads configured in job_conf.xml&gt; -f &lt;bam or sam&gt;
214 --subsampling-seed=&lt;an integer&gt;
215 -s &lt;a real number between 0 and 1&gt; -o &lt;bam or sam output&gt; input_file
216
217 Warnings
218 --------
219
220 The tool does not down-sample at a user given **number of lines**, because sambamba does not
221 expose this functionality. For tool performances, we decided not to add it in this wrapper.
222
223 If you down-sample a dataset that has been *already down-sampled* with this tool, it is
224 important that you choose **another seed** for randomisation. Otherwise, the new subsampling
225 was reported not to conform the indicated fraction.
226
227
228 .. _sambamba: http://github.com/lomereiter/sambamba
229
230 </help>
231 <citations>
232 <citation type="doi">10.1093/bioinformatics/btv098</citation>
233 </citations>
234 </tool>