comparison samtools_view.xml @ 0:00d1f08bdcdc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_view commit aa97817f89e4ac6e7bb2326a51ecbc91830de5df
author iuc
date Wed, 19 Sep 2018 08:21:51 -0400
parents
children 6ade12d94f9a
comparison
equal deleted inserted replaced
-1:000000000000 0:00d1f08bdcdc
1 <tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@">
2 <description>reformat, filter, or subsample</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <expand macro="version_command"/>
9 <command><![CDATA[
10 @ADDTHREADS@
11 ## prepare reference data
12 @PREPARE_FASTA_IDX@
13 @PREPARE_IDX@
14
15 ##call samtools view
16 samtools view
17 -@ \$addthreads
18
19 #if $outtype == 'count':
20 -c
21 #elif $outtype == 'bam':
22 -b
23 #elif $outtype == 'cram':
24 -C
25 #end if
26
27 ## filter options (regions filter is the last parameter)
28 #if $cond_filter.select_filter == 'yes':
29 #if $cond_filter.cond_region.select_region == 'bed' and str( $cond_filter.cond_region.bedfile ) != 'None'
30 -L '$cond_filter.bedfile'
31 #end if
32 #if $cond_filter.cond_rg.select_rg == 'text':
33 -r '$cond_filter.cond_rg.readgr'
34 #else if $cond_filter.cond_rg.select_rg == 'file':
35 -R '$cond_filter.cond_rg.rgfile'
36 #end if
37 #if str($cond_filter.quality) != '' and int( $cond_filter.quality ) > 0
38 -q $cond_filter.quality
39 #end if
40 #if str( $cond_filter.library ) != ''
41 -l '$cond_filter.library'
42 #end if
43 #if str( $cond_filter.cigarcons ) != ''
44 -m $cond_filter.cigarcons
45 #end if
46 #if str($cond_filter.inclusive_filter) != 'None':
47 #set $filter = $cond_filter.inclusive_filter
48 @FLAGS@
49 -f $flags
50 #end if
51 #if str($cond_filter.exclusive_filter) != 'None':
52 #set $filter = $cond_filter.exclusive_filter
53 @FLAGS@
54 -F $flags
55 #end if
56 #if str($cond_filter.exclusive_filter_all) != 'None':
57 #set $filter = $cond_filter.exclusive_filter_all
58 @FLAGS@
59 -G $flags
60 #end if
61 #for $i, $s in enumerate($cond_filter.readtags)
62 -x '${s.readtag}'
63 #end for
64 #end if
65 #if $cond_subsample.select_subsample == 'yes':
66 #set fraction=str($cond_subsample.subsample).split('.')[1]
67 #if str($cond_subsample.seed) == '':
68 -s "\${RANDOM}".$fraction
69 #else
70 -s $cond_subsample.seed.$fraction
71 #end if
72 #end if
73
74 ## output options
75 $adv_output.header
76 $adv_output.collapsecigar
77 #if $adv_output.outputpassing == 'yes'
78 -U inv_outfile
79 #end if
80 -o outfile
81
82 ## additional reference data
83 #if $reffa!=None:
84 -T '$reffa'
85 -t '$reffai'
86 #else:
87 --output-fmt-option no_ref
88 #end if
89
90 infile
91
92 ## region filter need to be at the end
93 #if $cond_filter.select_filter == 'yes' and $cond_filter.cond_region.select_region == 'text':
94 '$cond_filter.cond_region.regions'
95 #end if
96
97 ## if data is converted from an unsorted file (SAM, CRAM, or unsorted BAM) to BAM
98 ## then sort the output by coordinate,
99 #if not $input.is_of_type('bam') and $outtype == 'bam':
100 && samtools sort
101 -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T sorttemp
102 -O bam
103 -o 'tmpsam'
104 outfile
105 && mv tmpsam outfile
106 #if $adv_output.outputpassing == 'yes':
107 && samtools sort
108 -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T sorttemp
109 -O bam
110 -o 'tmpsam'
111 inv_outfile
112 && mv tmpsam inv_outfile
113 #end if
114 #end if
115 ]]></command>
116 <inputs>
117 <!-- note unsorted bam includes all derived bam types (inlcuding bam which is sorted) -->
118 <param name="input" format="sam,unsorted.bam,cram" type="data" label="SAM/BAM/CRAM data set" />
119 <param name="outtype" type="select" argument="-b/-C/-c" label="Output type">
120 <help>Select output type. In case of counts only the total number of alignments is returned. All filters are taken into account</help>
121 <option value="sam">SAM</option>
122 <option value="bam">BAM (-b)</option>
123 <option value="cram">CRAM (-C)</option>
124 <option value="count">Count of alignments (-c)</option>
125 </param>
126 <conditional name="cond_filter">
127 <param name="select_filter" type="select" label="Filter alignment">
128 <option value="yes">Yes</option>
129 <option value="no" selected="True">No</option>
130 </param>
131 <when value="yes">
132 <conditional name="cond_region">
133 <param name="select_region" type="select" label="Filter by regions">
134 <option value="no" selected="True">No</option>
135 <option value="text">Manualy specify regions</option>
136 <option value="bed">Regions from BED file</option>
137 </param>
138 <when value="no"/>
139 <when value="text">
140 <param name="regions" type="text" optional="false" label="Filter by regions" help="One or more space-separated region specifications to restrict output to only those alignments which overlap the specified region(s)."/>
141 </when>
142 <when value="bed">
143 <param name="bedfile" format="bed" argument="-L" optional="false" type="data" label="Filter by intervals in a bed file" help="Only output alignments overlapping the intervals in the input bed file." />
144 </when>
145 </conditional>
146 <conditional name="cond_rg">
147 <param name="select_rg" type="select" label="Filter by readgroup">
148 <option value="no" selected="True">No</option>
149 <option value="text">Single read group </option>
150 <option value="file">Read groups from file</option>
151 </param>
152 <when value="no"/>
153 <when value="text">
154 <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." />
155 </when>
156 <when value="file">
157 <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." />
158 </when>
159 </conditional>
160 <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." />
161 <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" />
162 <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." />
163 <param name="inclusive_filter" argument="-f" type="select" multiple="True" label="Require that these flags are set">
164 <expand macro="flag_options" />
165 </param>
166 <param name="exclusive_filter" argument="-F" type="select" multiple="True" label="Exclude reads with any of the following flags set">
167 <expand macro="flag_options" />
168 </param>
169 <param name="exclusive_filter_all" argument="-G" type="select" multiple="True" label="Exclude reads with all of the following flags set">
170 <expand macro="flag_options" />
171 </param>
172 <!-- TODO could also make this a text field (comma/space separated) -->
173 <repeat name="readtags" title="Filter by read tags">
174 <param name="readtag" type="text" argument="-x" label="Filter by read tag" help="Read tag to exclude from output."/>
175 </repeat>
176 </when>
177 <when value="no"/>
178 </conditional>
179 <conditional name="cond_subsample">
180 <param name="select_subsample" type="select" argument="-s" label="Subsample alignment">
181 <option value="yes">Yes</option>
182 <option value="no" selected="True">No</option>
183 </param>
184 <when value="yes">
185 <param name="subsample" type="float" optional="False" value="0.5" min="0" max="1" label="Subsampling fraction" help="sets the fraction of templates/pairs to subsample." />
186 <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." />
187 </when>
188 <when value="no"/>
189 </conditional>
190 <conditional name="addref_cond">
191 <param name="addref_select" type="select" label="Reference data">
192 <help>Reference data as fasta(.gz). Required for SAM input without @SQ headers and useful/required for writing CRAM output (see help).</help>
193 <option value="no" selected="True">No, see help (-output-fmt-option no_ref)</option>
194 <option value="history">Use a genome/index from the history (-t/-T)</option>
195 <option value="cached">Use a built-in genome (-t/-T)</option>
196 </param>
197 <when value="no"/>
198 <when value="history">
199 <param name="ref" format="fasta,fasta.gz" optional="true" type="data" label="Reference file"/>
200 </when>
201 <when value="cached">
202 <param name="ref" type="select" label="Using reference genome">
203 <options from_data_table="fasta_indexes">
204 <filter column="1" key="dbkey" ref="input" type="data_meta" />
205 <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
206 </options>
207 </param>
208 </when>
209 </conditional>
210 <section name="adv_output" title="Output Options" expanded="false">
211 <param name="header" type="select" argument="-h/-H" label="Header options">
212 <option value="-h" selected="True">Include header in SAM output (-h)</option>
213 <option value="-H">Return header only (-H)</option>
214 <option value="">Exclude header</option>
215 </param>
216 <param name="outputpassing" type="boolean" argument="-U" truevalue="yes" falsevalue="no" checked="false" label="Output alignments not passing the filter" help="Write alignments that are not selected by the various filter options to an extra data set. When this option is used, all alignments (or all alignments intersecting the regions specified) are written to either the output data set or this extra output data set, but never both." />
217 <param name="collapsecigar" type="boolean" argument="-B" truevalue="-B" falsevalue="" checked="false" label="Collapse backward CIGAR operation" help="Collapse the backward CIGAR operation." />
218 </section>
219 </inputs>
220 <outputs>
221 <!-- TODO do I need an action for dbkey? -->
222 <data name="outputsam" format_source="input" from_work_dir="outfile" label="${tool.name} on ${on_string}: filtered alignments">
223 <filter>outtype != 'count'</filter>
224 <!-- TODO is change_format deprecated? how can I modify the type for the different bam types? -->
225 <change_format>
226 <when input="outtype" value="sam" format="sam" />
227 <when input="outtype" value="bam" format="bam" />
228 <when input="outtype" value="cram" format="cram" />
229 </change_format>
230 </data>
231 <data name="invoutputsam" format_source="input" from_work_dir="inv_outfile" label="${tool.name} on ${on_string}: unfiltered alignments">
232 <filter>adv_output['outputpassing'] == 'yes' and outtype != 'count'</filter>
233 <change_format>
234 <when input="outtype" value="sam" format="sam" />
235 <when input="outtype" value="bam" format="bam" />
236 <when input="outtype" value="cram" format="cram" />
237 </change_format>
238 </data>
239 <data name="outputcnt" format="txt" from_work_dir="sam" label="${tool.name} on ${on_string}: Counts">
240 <filter>outtype == 'count'</filter>
241 </data>
242 </outputs>
243 <tests>
244 <!-- sam to bam (copied from the sam_to_bam tool) -->
245 <test>
246 <param name="input" ftype="sam" value="sam_to_bam_in1.sam" />
247 <param name="outtype" value="bam" />
248 <conditional name="addref_cond">
249 <param name="addref_select" value="history" />
250 <param name="ref" ftype="fasta" dbkey="equCab2" value="chr_m.fasta" />
251 </conditional>
252 <output name="outputsam" ftype="bam" file="sam_to_bam_out1.bam" />
253 </test>
254 <test>
255 <param name="input" ftype="sam" dbkey="equCab2" value="sam_to_bam_in1.sam" />
256 <param name="outtype" value="bam" />
257 <conditional name="addref_cond">
258 <param name="addref_select" value="cached" />
259 <param name="ref" value="equCab2chrM" />
260 </conditional>
261 <output name="outputsam" ftype="bam" file="sam_to_bam_out2.bam" />
262 </test>
263 <test>
264 <param name="input" ftype="sam" value="sam_to_bam_noheader_in2.sam" />
265 <param name="outtype" value="bam" />
266 <conditional name="addref_cond">
267 <param name="addref_select" value="history" />
268 <param name="ref" ftype="fasta" dbkey="equCab2" value="chr_m.fasta" />
269 </conditional>
270 <output name="outputsam" ftype="bam" file="sam_to_bam_out3.bam" />
271 </test>
272 <!-- bam to cram + region filter (adapted from bam_to_cram tool)-->
273 <test>
274 <param name="input" value="test.bam" ftype="bam" />
275 <param name="outtype" value="cram" />
276 <conditional name="addref_cond">
277 <param name="addref_select" value="history" />
278 <param name="ref" value="test.fa" />
279 </conditional>
280 <conditional name="cond_filter">
281 <param name="select_filter" value="yes" />
282 <conditional name="cond_region">
283 <param name="select_region" value="no"/>
284 </conditional>
285 </conditional>
286 <output name="outputsam" file="test.cram" ftype="cram" compare="sim_size" delta="250" />
287 </test>
288 <test>
289 <param name="input" value="test.sam" ftype="sam" />
290 <param name="outtype" value="cram" />
291 <conditional name="addref_cond">
292 <param name="addref_select" value="history" />
293 <param name="ref" value="test.fa" />
294 </conditional>
295 <conditional name="cond_filter">
296 <param name="select_filter" value="yes" />
297 <conditional name="cond_region">
298 <param name="select_region" value="no"/>
299 </conditional>
300 </conditional>
301 <output name="outputsam" file="test.cram" ftype="cram" compare="sim_size" delta="250" />
302 </test>
303 <test>
304 <param name="input" value="test.bam" ftype="bam" />
305 <param name="outtype" value="cram" />
306 <conditional name="addref_cond">
307 <param name="addref_select" value="history" />
308 <param name="ref" value="test.fa" />
309 </conditional>
310 <conditional name="cond_filter">
311 <param name="select_filter" value="yes" />
312 <conditional name="cond_region">
313 <param name="select_region" value="text"/>
314 <param name="regions" value="CHROMOSOME_I" />
315 </conditional>
316 </conditional>
317 <output name="outputsam" file="test.cram" ftype="cram" compare="sim_size" delta="250" />
318 </test>
319 <!-- bam to sam + header options (adapted from bam_to_sam tool)-->
320 <test>
321 <param ftype="bam" name="input" value="bam_to_sam_in1.bam" />
322 <param name="outtype" value="sam" />
323 <param name="header" value="-h" />
324 <output file="bam_to_sam_out1.sam" ftype="sam" name="outputsam" />
325 </test>
326 <test>
327 <param ftype="bam" name="input" value="bam_to_sam_in1.bam" />
328 <param name="outtype" value="sam" />
329 <param name="header" value="-H" />
330 <output file="bam_to_sam_out2.sam" ftype="sam" name="outputsam" />
331 </test>
332 <test>
333 <param ftype="bam" name="input" value="bam_to_sam_in1.bam" />
334 <param name="outtype" value="sam" />
335 <param name="header" value="" />
336 <output file="bam_to_sam_out3.sam" ftype="sam" name="outputsam" />
337 </test>
338 <!-- cram to bam + region (adapted from cram_to_bam tool)-->
339 <test>
340 <param name="input" value="test.cram" ftype="cram" />
341 <param name="outtype" value="bam" />
342 <conditional name="addref_cond">
343 <param name="addref_select" value="history" />
344 <param name="ref" value="test.fa" />
345 </conditional>
346 <output name="outputsam" file="test.bam" ftype="bam" />
347 </test>
348 <test>
349 <param name="input" value="test.cram" ftype="cram" />
350 <param name="outtype" value="bam" />
351 <conditional name="addref_cond">
352 <param name="addref_select" value="history" />
353 <param name="ref" value="test.fa" />
354 </conditional>
355 <param name="target_region" value="region" />
356 <param name="region_string" value="CHROMOSOME_I" />
357 <output name="outputsam" file="test.bam" ftype="bam" />
358 </test>
359 <test>
360 <param name="input" value="test.cram" ftype="cram" />
361 <param name="outtype" value="bam" />
362 <conditional name="addref_cond">
363 <param name="addref_select" value="history" />
364 <param name="ref" value="test.fa" />
365 </conditional>
366 <param name="target_region" value="regions_bed_file" />
367 <param name="regions_bed_file" value="test.bed" ftype="bed" />
368
369 <output name="outputsam" file="test.bam" ftype="bam" />
370 </test>
371 <test>
372 <param name="input" value="test2.cram" dbkey="equCab2" ftype="cram" />
373 <param name="outtype" value="bam" />
374 <conditional name="addref_cond">
375 <param name="addref_select" value="cached" />
376 <param name="ref" value="equCab2chrM" />
377 </conditional>
378 <conditional name="cond_region">
379 <param name="select_region" value="no"/>
380 </conditional>
381 <output name="outputsam" file="sam_to_bam_out2.bam" ftype="bam" />
382 </test>
383 </tests>
384 <help>
385 **What it does**
386
387 Samtools view can:
388
389 1. filter alignments according to various criteria
390 2. convert between alignment formats (SAM, BAM, CRAM)
391
392 With no options or regions specified, prints all alignments in the specified input alignment file (in SAM, BAM, or CRAM format) to standard output in SAM format (with no header).
393
394 **Alignment format conversion**
395
396 Inputs of type SAM, BAM, and CRAM are accepted and can be converted to each of these formats (alternatively alignment counts can be computed) by selecting the appropriate "Output type".
397
398 .. class:: infomark
399
400 samtools view allows to specify a reference sequence. This is required for SAM input with missing @SQ headers (which include sequence names, length, md5, etc) and useful (and sometimes necessary) for CRAM input and output. In the following the use of reference sequence in the CRAM format is detailed.
401 CRAM is (primarily) a reference-based compressed format, i.e. only differences between the stored sequences and the reference are stored. As a consequence the reference that was used to generate the alignemnts is always needed in order to interpret the alignments (a checksum stored in the CRAM file is used to verify that the only the correct sequence can be used), i.e. the CRAM file on its own is not useful per default. This allows for a more space efficient storage compared to BAM. But it is also possible to use CRAM without a reference with the disadvantage that the reference is stored explicitely (as in SAM and BAM).
402
403 The Galaxy tool allows both possibilities using the "reference data" option:
404
405 - the default ("no reference")
406 - reference data can be chosen from history or built in genomes can be used
407
408 The reference data required for reading/writing reference based CRAM.
409
410 **Filtering alignments**
411
412 samtools view allows to filter alignements based on various criteria, i.e. the output will contain only alignemnts matching all criteria (an additional output containing the remaining alignments can be created additionally, see "Output alignments not passing the filter" in "output options"): e.g. by regions (see below), alignment quality (see below), and tags or flags set in the alignments.
413
414
415
416 **Filtering by regions**
417
418 You may specify one or more space-separated region specifications after the input filename to restrict output to only those alignments which overlap the specified region(s). Use of region specifications requires a coordinate-sorted and indexed input file (in BAM or CRAM format).
419
420 Regions can be specified as: RNAME[:STARTPOS[-ENDPOS]] and all position coordinates are 1-based.
421
422 Important note: when multiple regions are given, some alignments may be output multiple times if they overlap more than one of the specified regions.
423
424 Examples of region specifications:
425
426 - chr1 Output all alignments mapped to the reference sequence named 'chr1' (i.e. @SQ SN:chr1).
427 - chr2:1000000 The region on chr2 beginning at base position 1,000,000 and ending at the end of the chromosome.
428 - chr3:1000-2000 The 1001bp region on chr3 beginning at base position 1,000 and ending at base position 2,000 (including both end positions).
429 - '*' Output the unmapped reads at the end of the file. (This does not include any unmapped reads placed on a reference sequence alongside their mapped mates.)
430 - . Output all alignments. (Mostly unnecessary as not specifying a region at all has the same effect.)
431
432 **Filtering by quality**
433
434 This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignemnt. Note that aligners do not follow a consistent definition.
435
436 The -x, -B, and -s options modify the data which is contained in each alignment.
437 </help>
438 <expand macro="citations"/>
439 </tool>