comparison sambamba.xml @ 1:6195f15d4541 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sambamba commit 516e8d55d6d45e6f2266805b78eb25a711621321"
author artbio
date Mon, 25 May 2020 17:10:17 -0400
parents
children 7ad3484aa5db
comparison
equal deleted inserted replaced
0:e3cbb848d8f7 1:6195f15d4541
1 <tool id="sambamba_sample_or_filter" name="Sample or Filter BAM" version="0.6">
2 <description>
3 on flags, fields, and tags using Sambamba
4 </description>
5 <requirements>
6 <requirement type="package" version="0.7.1">sambamba</requirement>
7 </requirements>
8 <stdio>
9 <exit_code range="1:" level="fatal" description="Error occured" />
10 </stdio>
11 <!-- <version_command>sambamba 2>&amp;1 | grep "sambamba v" | sed 's/^sambamba v\(.*\)/\1/'</version_command> -->
12 <command detect_errors="exit_code"><![CDATA[
13 ln -s $input input.bam &&
14 ln -s $input.metadata.bam_index input.bai &&
15 #if $sambamba_options.selector == 'filter'
16 sambamba view -h -t \${GALAXY_SLOTS:-4}
17 #if $sambamba_options.query != '':
18 --filter='$sambamba_options.query'
19 #end if
20 -f '$format' -o $outfile input.bam $sambamba_options.region
21 #else
22 sambamba view -h -t \${GALAXY_SLOTS:-4} -f '$format'
23 --subsampling-seed='$sambamba_options.seed'
24 -s '$sambamba_options.fraction' -o '$outfile' input.bam
25 #end if
26 ]]></command>
27 <inputs>
28 <param name="input" type="data" format="bam" label="BAM or SAM file to filter"/>
29 <param name="format" type="select" label="format of the tool output">
30 <option value="bam">BAM</option>
31 <option value="sam">SAM</option>
32 </param>
33 <conditional name="sambamba_options">
34 <param name="selector" type="select" label="Filter or Down-sample alignments">
35 <option value="sample">Down-sample bam or sam alignments</option>
36 <option value="filter" selected="true">Filter bam or sam alignements</option>
37 </param>
38 <when value="filter">
39 <param name="query" type="text" size="80">
40 <sanitizer invalid_char="X">
41 <valid initial="string.ascii_letters,string.digits, string.punctuation">
42 <add value=" " />
43 </valid>
44 </sanitizer>
45 <label>Filter expression</label>
46 <help>See below for query syntax.</help>
47 </param>
48 <param name="region" type="text" size="40" label="Region in format chr:beg-end">
49 <help>
50 Regions can be specified as 'chr2' (the whole chr2), 'chr2:1000000'
51 (region starting from 1,000,000bp) or 'chr2:1,000,000-2,000,000'
52 (region between 1,000,000 and 2,000,000bp including the end points).
53 The coordinates are 1-based.
54 </help>
55 </param>
56 </when>
57 <when value="sample">
58 <param name="seed" type="integer" value="123" size="10">
59 <label>Seed value for randomisation</label>
60 <help>
61 Be careful at selecting different seed values if you
62 re-subsample a subsample output of this tool
63 </help>
64 </param>
65 <param name="fraction" type="float" value="0.1" max="1" size="10" label="fraction to retrieve after subsampling">
66 <help>
67 Use a real number between 0 and 1 to indicate the relative size of
68 the fraction you wish to retrieve
69 </help>
70 </param>
71 </when>
72 </conditional>
73 </inputs>
74 <outputs>
75 <data name="outfile" format="bam">
76 <change_format>
77 <when input="format" value="sam" format="sam" />
78 </change_format>
79 </data>
80 </outputs>
81 <tests>
82 <test>
83 <param name="input" value="ex1_header.sam" ftype="sam" />
84 <param name="selector" value="filter" />
85 <param name="query" value="[H0] == 1 and read_name =~ /^EAS51_62/" />
86 <param name="format" value="bam" />
87 <param name="region" value="" />
88 <output name="outfile" file="ex1_header_filtered.bam" ftype="bam" />
89 </test>
90 <test>
91 <param name="input" value="c1215_fixmate.bam" ftype="bam" />
92 <param name="selector" value="filter" />
93 <param name="query" value="[MD] =~ /^\d+T\d+A\d+/ and first_of_pair" />
94 <param name="format" value="sam" />
95 <param name="region" value="AL096846:1000-5000" />
96 <output name="outfile" file="c1215_fixmate_filtered.sam" ftype="sam" lines_diff="2"/>
97 </test>
98 <test>
99 <param name="input" value="c1215_fixmate.bam" ftype="bam" />
100 <param name="selector" value="filter" />
101 <param name="query" value='' />
102 <param name="format" value="sam" />
103 <param name="region" value="AL096846:1000-5000" />
104 <output name="outfile" file="c1215_fixmate_region-filtered.sam" ftype="sam" lines_diff="2"/>
105 </test>
106 <test>
107 <param name="input" value="ex1_header.sam" ftype="sam" />
108 <param name="selector" value="sample" />
109 <param name="seed" value="123" />
110 <param name="fraction" value="0.1" />
111 <param name="format" value="bam" />
112 <output name="outfile" file="ex1_header_sampled.bam" ftype="bam" />
113 </test>
114 <test>
115 <param name="input" value="c1215_fixmate.bam" ftype="bam" />
116 <param name="selector" value="sample" />
117 <param name="seed" value="123" />
118 <param name="fraction" value="0.1" />
119 <param name="format" value="sam" />
120 <output name="outfile" file="c1215_fixmate_sampled.sam" ftype="sam" lines_diff="2"/>
121 </test>
122 </tests>
123 <help>
124 Sambamba Filter Overview
125 ========================
126
127 This tool uses the sambamba_ ``view`` command to filter BAM/SAM on flags, fields, tags, and region. Input is SAM or BAM file.
128
129
130 Filter Syntax
131 =============
132
133 A *filter expression* is a number of *basic conditions* linked by ``and``, ``or``, ``not`` logical operators, and enclosed in parentheses where needed.
134
135 *Basic condition* is a one for a single record field, tag, or flag.
136
137 You can use ``==,`` ``!=,`` ``&gt;``, ``&lt;``, ``&gt;=``, ``&lt;=`` comparison operators for both integers and strings.
138
139 Strings are delimited by single quotes, if you need a single quote inside a string, escape it with ``\\``.
140
141 Examples of filter expressions
142 ------------------------------
143
144 ::
145
146 mapping_quality >= 30 and ([RG] =~ /^abcd/ or [NM] == 7)
147 read_name == 'abc\'def'
148
149 Basic conditions for flags
150 --------------------------
151
152 The following flag names are recognized:
153 * paired
154 * proper_pair
155 * unmapped
156 * mate_is_unmapped
157 * reverse_strand
158 * mate_is_reverse_strand
159 * first_of_pair
160 * second_of_pair
161 * secondary_alignment
162 * failed_quality_control
163 * duplicate
164
165 Example
166 ~~~~~~~
167
168 ::
169
170 not (unmapped or mate_is_unmapped) and first_of_pair
171
172 Basic conditions for fields
173 ---------------------------
174
175 Conditions for integer and string fields are supported.
176
177 List of integer fields:
178 * ref_id
179 * position
180 * mapping_quality
181 * sequence_length
182 * mate_ref_id
183 * mate_position
184 * template_length
185
186
187 List of string fields:
188 * read_name
189 * sequence
190 * cigar
191
192
193 Example
194 ~~~~~~~
195
196 ::
197
198 ref_id == 3 and mapping_quality >= 50 and sequence_length >= 80
199
200 Basic conditions for tags
201 -------------------------
202
203 Tags are denoted by their names in square brackets, for instance, ``[RG]`` or ``[Q2].`` They support conditions for both integers and strings, i.e. the tag must also hold value of the corresponding type.
204
205 In order to do filtering based on the presence of a particular tag, you can use special ``null`` value.
206
207 Example
208 ~~~~~~~
209
210 ::
211
212 [RG] != null and [AM] == 37
213
214 Down-sampling
215 =============
216
217 The tool is using the following sambamba command line for sampling:
218
219 ::
220
221 sambamba view -h -t &lt;number of Galaxy threads configured in job_conf.xml&gt; -f &lt;bam or sam&gt;
222 --subsampling-seed=&lt;an integer&gt;
223 -s &lt;a real number between 0 and 1&gt; -o &lt;bam or sam output&gt; input_file
224
225 Warnings
226 --------
227
228 The tool does not down-sample at a user given **number of lines**, because sambamba does not
229 expose this functionality. For tool performances, we decided not to add it in this wrapper.
230
231 If you down-sample a dataset that has been *already down-sampled* with this tool, it is
232 important that you choose **another seed** for randomisation. Otherwise, the new subsampling
233 was reported not to conform the indicated fraction.
234
235
236 .. _sambamba: http://github.com/lomereiter/sambamba
237
238 </help>
239 <citations>
240 <citation type="doi">10.1093/bioinformatics/btv098</citation>
241 </citations>
242 </tool>