comparison samtools_view.xml @ 7:b01db2684fa5 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_view commit 6692949aa694102abb64c67d46196a822fcb61bf"
author iuc
date Tue, 21 Jan 2020 07:40:18 -0500
parents ff313de5f7f4
children bf328cec6a42
comparison
equal deleted inserted replaced
6:ff313de5f7f4 7:b01db2684fa5
1 <tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@"> 1 <tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@+galaxy1">
2 <description>reformat, filter, or subsample</description> 2 <description>- reformat, filter, or subsample SAM, BAM or CRAM</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 <token name="@REF_DATA@">
6 ## additional reference data
7 #if $reffa!=None:
8 -T '$reffa'
9 -t '$reffai'
10 #end if
11 </token>
12 <xml name="read_output_formatting">
13 <yield />
14 <section name="adv_output" title="Read Reformatting Options" expanded="false">
15 <repeat name="readtags" title="Strip read tags from output">
16 <param name="readtag" type="text" argument="-x" label="Read tag to strip" help="Read tag to exclude from output."/>
17 </repeat>
18 <param name="collapsecigar" type="boolean" argument="-B" truevalue="-B" falsevalue="" checked="false" label="Collapse backward CIGAR operation" help="Collapse the backward CIGAR operation." />
19 </section>
20 </xml>
21 <xml name="output_format_selector">
22 <conditional name="output_format">
23 <param name="oformat" type="select" label="Output format">
24 <option value="sam">SAM</option>
25 <option value="bam" selected="True">BAM (-b)</option>
26 <option value="cram">CRAM (-C)</option>
27 </param>
28 <when value="sam">
29 <yield />
30 <param name="fmtopt" type="hidden" value="" />
31 </when>
32 <when value="bam">
33 <param name="fmtopt" type="hidden" value="-b" />
34 </when>
35 <when value="cram">
36 <param name="fmtopt" type="hidden" value="-C" />
37 </when>
38 </conditional>
39 </xml>
5 </macros> 40 </macros>
6 <expand macro="requirements"> 41 <expand macro="requirements">
7 <requirement type="package">gawk</requirement> 42 <requirement type="package">gawk</requirement>
8 </expand> 43 </expand>
9 <expand macro="stdio"/> 44 <expand macro="stdio"/>
11 <command><![CDATA[ 46 <command><![CDATA[
12 @ADDTHREADS@ 47 @ADDTHREADS@
13 ## prepare reference data 48 ## prepare reference data
14 @PREPARE_FASTA_IDX@ 49 @PREPARE_FASTA_IDX@
15 @PREPARE_IDX@ 50 @PREPARE_IDX@
16 #if $cond_subsample.select_subsample == 'target': 51
17 ##this must be done before the main command because we don't know the total # reads until execution time 52 #if str($mode.outtype) == 'header':
18 #if str($cond_subsample.seed) == '': 53 ## call samtools view and be done
19 #set seed_expr="int(rand() * 32768)" 54 samtools view
20 #else 55 -H ${mode.output_options.output_format.fmtopt} -o outfile
21 #set seed_expr=$cond_subsample.seed 56 @REF_DATA@
57 infile
58 #else:
59 ## are filtering and/or subsampling in effect?
60 #set $with_filtering = False
61 #set $with_subsampling = False
62 #if str($mode.outtype) == 'selected_reads':
63 #set $with_filtering = True
64 #if str($mode.subsample_config.subsampling_mode.select_subsample) != 'fraction' or $mode.subsample_config.subsampling_mode.factor > 1:
65 #set $with_subsampling = True
66 #end if
22 #end if 67 #end if
23 #if $input.is_of_type('sam'): 68
24 sample_fragment=`samtools view -c infile | awk '{s=\$1} END {frac=$cond_subsample.target/s; print(frac < 1 ? "-s " $seed_expr+frac : "")}'` && 69 ## are we producing one or two outputs?
25 #else 70 #if str($mode.outtype) == 'selected_reads' and str($mode.output_options.reads_report_type) != 'count' and str($mode.output_options.complementary_output) == 'yes':
26 sample_fragment=`samtools idxstats infile | awk '{s+=\$4+\$3} END {frac=$cond_subsample.target/s; print(frac < 1 ? "-s " $seed_expr+frac : "")}'` && 71 #set $with_non_selected_reads_output = True
72 #else:
73 #set $with_non_selected_reads_output = False
74 #end if
75
76 #set $std_filters = ''
77 #set $reg_filters = ''
78 #if $with_filtering:
79 ## build string of all user-configured filter options
80 #if str($mode.filter_config.cond_region.select_region) == 'bed':
81 #set std_filters = $std_filters + " -L '%s'" % str($mode.filter_config.cond_region.bedfile)
82 #elif str($mode.filter_config.cond_region.select_region) == 'text':
83 #set $reg_filters = "'%s'" % str($mode.filter_config.cond_region.regions).strip()
84 #end if
85
86 #if $mode.filter_config.cond_rg.select_rg == 'text':
87 #set $std_filters = $std_filters + " -r '%s'" % str($mode.filter_config.cond_rg.readgr)
88 #else if $mode.filter_config.cond_rg.select_rg == 'file':
89 #set $std_filters = $std_filters + " -R '%s'" % str($mode.filter_config.cond_rg.rgfile)
90 #end if
91 #if str($mode.filter_config.quality) != '' and int($mode.filter_config.quality) > 0:
92 #set $std_filters = $std_filters + " -q %s" % str($mode.filter_config.quality)
93 #end if
94 #if str($mode.filter_config.library):
95 #set $std_filters = $std_filters + " -l '%s'" % str($mode.filter_config.library)
96 #end if
97 #if str($mode.filter_config.cigarcons):
98 #set $std_filters = $std_filters + " -m %s" % str($mode.filter_config.cigarcons)
99 #end if
100 #if str($mode.filter_config.inclusive_filter) != 'None':
101 #set $filter = str($mode.filter_config.inclusive_filter)
102 @FLAGS@
103 #set $std_filters = $std_filters + " -f %s" % str($flags)
104 #end if
105 #if str($mode.filter_config.exclusive_filter) != 'None':
106 #set $filter = str($mode.filter_config.exclusive_filter)
107 @FLAGS@
108 #set $std_filters = $std_filters + " -F %s" % str($flags)
109 #end if
110 #if str($mode.filter_config.exclusive_filter_all) != 'None':
111 #set $filter = str($mode.filter_config.exclusive_filter_all)
112 @FLAGS@
113 #set $std_filters = $std_filters + " -G %s" % str($flags)
114 #end if
27 #end if 115 #end if
28 #end if 116
29 ##call samtools view 117 #if $with_subsampling:
30 samtools view 118 ## handle seed and fraction calculation for subsampling
31 -@ \$addthreads 119 #import random
32 120 #if str($mode.subsample_config.subsampling_mode.seed):
33 #if $outtype == 'count': 121 #set $seed = int($mode.subsample_config.subsampling_mode.seed)
34 -c 122 #else:
35 #elif $outtype == 'bam': 123 #set $seed = random.randrange(32768)
36 -b 124 #end if
37 #elif $outtype == 'cram': 125
38 -C 126 #if $mode.subsample_config.subsampling_mode.select_subsample == 'target':
39 #end if 127 ##this must be done before the main command because we don't know the total # reads until execution time
40 128 #if $input.is_of_type('sam') or $std_filters or $reg_filters:
41 ## filter options (regions filter is the last parameter) 129 ## There is no index or we cannot use it because we are
42 #if $cond_filter.select_filter == 'yes': 130 ## not dealing with all of the reads in the indexed
43 #if $cond_filter.cond_region.select_region == 'bed' and str( $cond_filter.cond_region.bedfile ) != 'None' 131 ## file. We have to do an extra pass over the input to
44 -L '$cond_filter.cond_region.bedfile' 132 ## count the reads to subsample.
133 sample_fragment=`samtools view -c $std_filters infile $reg_filters | awk '{s=\$1} END {frac=${mode.subsample_config.subsampling_mode.target}/s; print(frac < 1 ? $seed+frac : ".0")}'` &&
134 #else:
135 ## We can get the count of reads to subsample using
136 ## an inexpensive call to idxstats.
137 sample_fragment=`samtools idxstats infile | awk '{s+=\$4+\$3} END {frac=${mode.subsample_config.subsampling_mode.target}/s; print(frac < 1 ? $seed+frac : ".0")}'` &&
138 #end if
139 #end if
45 #end if 140 #end if
46 #if $cond_filter.cond_rg.select_rg == 'text': 141
47 -r '$cond_filter.cond_rg.readgr' 142 ## call samtools view
48 #else if $cond_filter.cond_rg.select_rg == 'file': 143 samtools view
49 -R '$cond_filter.cond_rg.rgfile' 144 -@ \$addthreads
145
146 #if str($mode.output_options.reads_report_type) == 'count':
147 -c
148 #else:
149 ${mode.output_options.output_format.fmtopt}
50 #end if 150 #end if
51 #if str($cond_filter.quality) != '' and int( $cond_filter.quality ) > 0 151
52 -q $cond_filter.quality 152 ## filter options (except regions filter, which is the last parameter)
153 $std_filters
154
155 #if $with_subsampling:
156 #if str($mode.subsample_config.subsampling_mode.select_subsample) == "target":
157 ##this is calculated at execution time before the main samtools command
158 -s \${sample_fragment}
159 #else:
160 #set $fraction = $seed + 1 / float($mode.subsample_config.subsampling_mode.factor)
161 -s $fraction
162 #end if
53 #end if 163 #end if
54 #if str( $cond_filter.library ) != '' 164
55 -l '$cond_filter.library' 165 ## output options
166 #if str($mode.output_options.reads_report_type) == 'count':
167 -o outfile
168 #else:
169 ## output options
170 #if str($mode.output_options.output_format.oformat) == 'sam':
171 ${mode.output_options.output_format.with_header}
172 #end if
173 ${mode.output_options.adv_output.collapsecigar}
174 #for $s in $mode.output_options.adv_output.readtags:
175 -x '${s.readtag}'
176 #end for
177 #if str($mode.output_options.reads_report_type) == 'retained'
178 -o outfile
179 #if $with_non_selected_reads_output:
180 -U inv_outfile
181 #end if
182 #else:
183 -U outfile
184 #if $with_non_selected_reads_output:
185 -o inv_outfile
186 #else:
187 -o /dev/null
188 #end if
189 #end if
190
191 ##currently reference based CRAM is disabled (see https://github.com/galaxyproject/tools-iuc/pull/1963)
192 #if $mode.output_options.output_format.oformat == 'cram':
193 --output-fmt-option no_ref
194 #end if
56 #end if 195 #end if
57 #if str( $cond_filter.cigarcons ) != '' 196
58 -m $cond_filter.cigarcons 197 @REF_DATA@
198
199 infile
200
201 ## region filter needs to be at the end
202 $reg_filters
203
204 #if str($mode.output_options.reads_report_type) != 'count':
205 ## if data is converted from an unsorted file (SAM, CRAM, or unsorted BAM) to BAM
206 ## then sort the output by coordinate,
207 #if not $input.is_of_type('bam') and str($mode.output_options.output_format.oformat) == 'bam':
208 && samtools sort
209 -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T "\${TMPDIR:-.}"
210 -O bam
211 -o tmpsam
212 outfile
213 && mv tmpsam outfile
214 #if $with_non_selected_reads_output:
215 && samtools sort
216 -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T "\${TMPDIR:-.}"
217 -O bam
218 -o tmpsam
219 inv_outfile
220 && mv tmpsam inv_outfile
221 #end if
222 #end if
59 #end if 223 #end if
60 #if str($cond_filter.inclusive_filter) != 'None': 224
61 #set $filter = $cond_filter.inclusive_filter 225 ##end of reads and count-specific section
62 @FLAGS@
63 -f $flags
64 #end if
65 #if str($cond_filter.exclusive_filter) != 'None':
66 #set $filter = $cond_filter.exclusive_filter
67 @FLAGS@
68 -F $flags
69 #end if
70 #if str($cond_filter.exclusive_filter_all) != 'None':
71 #set $filter = $cond_filter.exclusive_filter_all
72 @FLAGS@
73 -G $flags
74 #end if
75 #for $i, $s in enumerate($cond_filter.readtags)
76 -x '${s.readtag}'
77 #end for
78 #end if
79 #if $cond_subsample.select_subsample != 'no':
80 #if $cond_subsample.select_subsample == "target":
81 ##this is calculated at execution time before the main samtools command
82 "\${sample_fragment}"
83 #else
84 #set fraction=str($cond_subsample.fraction).split('.')[1]
85 #if str($cond_subsample.seed) == '':
86 -s "\${RANDOM}".$fraction
87 #else
88 -s $cond_subsample.seed.$fraction
89 #end if
90 #end if
91 #end if
92
93 ## output options
94 $adv_output.header
95 $adv_output.collapsecigar
96 #if $adv_output.outputpassing == 'yes'
97 -U inv_outfile
98 #end if
99 -o outfile
100
101 ## additional reference data
102 #if $reffa!=None:
103 -T '$reffa'
104 -t '$reffai'
105 #end if
106
107 ##currently reference based CRAM is disabled (see https://github.com/galaxyproject/tools-iuc/pull/1963)
108 #if $outtype=='cram':
109 --output-fmt-option no_ref
110 #end if
111
112 infile
113
114 ## region filter need to be at the end
115 #if $cond_filter.select_filter == 'yes' and $cond_filter.cond_region.select_region == 'text':
116 '$cond_filter.cond_region.regions'
117 #end if
118
119 ## if data is converted from an unsorted file (SAM, CRAM, or unsorted BAM) to BAM
120 ## then sort the output by coordinate,
121 #if not $input.is_of_type('bam') and $outtype == 'bam':
122 && samtools sort
123 -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T "\${TMPDIR:-.}"
124 -O bam
125 -o 'tmpsam'
126 outfile
127 && mv tmpsam outfile
128 #if $adv_output.outputpassing == 'yes':
129 && samtools sort
130 -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T "\${TMPDIR:-.}"
131 -O bam
132 -o 'tmpsam'
133 inv_outfile
134 && mv tmpsam inv_outfile
135 #end if
136 #end if 226 #end if
137 ]]></command> 227 ]]></command>
138 <inputs> 228 <inputs>
139 <!-- note unsorted bam includes all derived bam types (inlcuding bam which is sorted) --> 229 <!-- note unsorted bam includes all derived bam types (inlcuding bam which is sorted) -->
140 <param name="input" format="sam,unsorted.bam,cram" type="data" label="SAM/BAM/CRAM data set" /> 230 <param name="input" format="sam,unsorted.bam,cram" type="data" label="SAM/BAM/CRAM data set" />
141 <param name="outtype" type="select" argument="-b/-C/-c" label="Output type"> 231 <conditional name="mode">
142 <help>Select output type. In case of counts only the total number of alignments is returned. All filters are taken into account</help> 232 <param name="outtype" type="select" label="What would you like to look at?">
143 <option value="sam">SAM</option> 233 <option value="all_reads">All reads in the input dataset</option>
144 <option value="bam">BAM (-b)</option> 234 <option value="selected_reads">A filtered/subsampled selection of reads</option>
145 <option value="cram">CRAM (-C)</option> 235 <option value="header">Just the input header (-H)</option>
146 <option value="count">Count of alignments (-c)</option>
147 </param>
148 <conditional name="cond_filter">
149 <param name="select_filter" type="select" label="Filter alignment">
150 <option value="yes">Yes</option>
151 <option value="no" selected="True">No</option>
152 </param> 236 </param>
153 <when value="yes"> 237 <when value="all_reads">
154 <conditional name="cond_region"> 238 <conditional name="output_options">
155 <param name="select_region" type="select" label="Filter by regions"> 239 <param name="reads_report_type" type="select" label="What would you like to have reported?">
156 <option value="no" selected="True">No</option> 240 <option value="retained">The actual reads</option>
157 <option value="text">Manualy specify regions</option> 241 <option value="count">The count of reads (-c)</option>
158 <option value="bed">Regions from BED file</option>
159 </param> 242 </param>
160 <when value="no"/> 243 <when value="retained">
161 <when value="text"> 244 <expand macro="read_output_formatting" />
162 <param name="regions" type="text" optional="false" label="Filter by regions" help="One or more space-separated region specifications to restrict output to only those alignments which overlap the specified region(s)."/> 245 <expand macro="output_format_selector">
246 <param name="with_header" type="boolean" truevalue="-h" falsevalue="" checked="True"
247 label="Include SAM header in the output?" />
248 </expand>
163 </when> 249 </when>
164 <when value="bed"> 250 <when value="count" />
165 <param name="bedfile" format="bed" argument="-L" optional="false" type="data" label="Filter by intervals in a bed file" help="Only output alignments overlapping the intervals in the input bed file." /> 251 </conditional>
252 </when>
253 <when value="selected_reads">
254 <section name="filter_config" title="Configure filters" expanded="false">
255 <conditional name="cond_region">
256 <param name="select_region" type="select" label="Filter by regions">
257 <option value="no" selected="True">No</option>
258 <option value="text">Manualy specify regions</option>
259 <option value="bed">Regions from BED file</option>
260 </param>
261 <when value="no"/>
262 <when value="text">
263 <param name="regions" type="text" optional="false" label="Filter by regions" help="One or more space-separated region specifications to restrict output to only those alignments which overlap the specified region(s)."/>
264 </when>
265 <when value="bed">
266 <param name="bedfile" format="bed" argument="-L" optional="false" type="data" label="Filter by intervals in a bed file" help="Only output alignments overlapping the intervals in the input bed file." />
267 </when>
268 </conditional>
269 <conditional name="cond_rg">
270 <param name="select_rg" type="select" label="Filter by readgroup">
271 <option value="no" selected="True">No</option>
272 <option value="text">Single read group </option>
273 <option value="file">Read groups from file</option>
274 </param>
275 <when value="no"/>
276 <when value="text">
277 <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." />
278 </when>
279 <when value="file">
280 <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." />
281 </when>
282 </conditional>
283 <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." />
284 <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" />
285 <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." />
286 <param name="inclusive_filter" argument="-f" type="select" multiple="True" label="Require that these flags are set">
287 <expand macro="flag_options" />
288 </param>
289 <param name="exclusive_filter" argument="-F" type="select" multiple="True" label="Exclude reads with any of the following flags set">
290 <expand macro="flag_options" />
291 </param>
292 <param name="exclusive_filter_all" argument="-G" type="select" multiple="True" label="Exclude reads with all of the following flags set">
293 <expand macro="flag_options" />
294 </param>
295 </section>
296 <section name="subsample_config" title="Configure subsampling" expanded="false">
297 <conditional name="subsampling_mode">
298 <param name="select_subsample" type="select" argument="-s" label="Subsample alignment">
299 <option value="fraction">Specify a downsampling factor</option>
300 <option value="target">Specify a target # of reads</option>
301 </param>
302 <when value="fraction">
303 <param name="factor" type="float" optional="False" value="1" min="1" label="Downsampling factor" help="The factor by which to downsample the input reads. A fraction of approx. 1/factor of the reads will be kept (default: 1 = no downsampling)." />
304 <expand macro="seed_input" />
305 </when>
306 <when value="target">
307 <param name="target" type="integer" optional="False" min="0" value="" label="Target # of reads" help="Sets the approx. target number of reads to subsample." />
308 <expand macro="seed_input" />
309 </when>
310 </conditional>
311 </section>
312 <conditional name="output_options">
313 <param name="reads_report_type" type="select"
314 label="What would you like to have reported?"
315 help="Hint: To invert all of the filtering/subsampling logic configured above, choose 'Reads dropped during filtering and subsampling'.">
316 <option value="retained">All reads retained after filtering and subsampling</option>
317 <option value="dropped">Reads dropped during filtering and subsampling</option>
318 <option value="count">The count of retained reads (-c)</option>
319 </param>
320 <when value="retained">
321 <expand macro="read_output_formatting">
322 <param name="complementary_output" type="boolean" truevalue="yes" falsevalue="no" checked="false"
323 label="Produce extra dataset with dropped reads?" />
324 </expand>
325 <expand macro="output_format_selector">
326 <param name="with_header" type="boolean" truevalue="-h" falsevalue="" checked="True"
327 label="Include SAM header in the output?" />
328 </expand>
166 </when> 329 </when>
167 </conditional> 330 <when value="dropped">
168 <conditional name="cond_rg"> 331 <expand macro="read_output_formatting">
169 <param name="select_rg" type="select" label="Filter by readgroup"> 332 <param name="complementary_output" type="boolean" truevalue="yes" falsevalue="no" checked="false"
170 <option value="no" selected="True">No</option> 333 label="Produce extra dataset with retained reads?" />
171 <option value="text">Single read group </option> 334 </expand>
172 <option value="file">Read groups from file</option> 335 <expand macro="output_format_selector">
336 <param name="with_header" type="boolean" truevalue="-h" falsevalue="" checked="True"
337 label="Include SAM header in the output?" />
338 </expand>
339 </when>
340 <when value="count" />
341 </conditional>
342 </when>
343 <when value="header">
344 <conditional name="output_options">
345 <param name="reads_report_type" type="select" label="What would you like to have reported?">
346 <option value="">The header in ...</option>
173 </param> 347 </param>
174 <when value="no"/> 348 <when value="">
175 <when value="text"> 349 <expand macro="output_format_selector" />
176 <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." />
177 </when> 350 </when>
178 <when value="file"> 351 </conditional>
179 <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." />
180 </when>
181 </conditional>
182 <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." />
183 <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" />
184 <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." />
185 <param name="inclusive_filter" argument="-f" type="select" multiple="True" label="Require that these flags are set">
186 <expand macro="flag_options" />
187 </param>
188 <param name="exclusive_filter" argument="-F" type="select" multiple="True" label="Exclude reads with any of the following flags set">
189 <expand macro="flag_options" />
190 </param>
191 <param name="exclusive_filter_all" argument="-G" type="select" multiple="True" label="Exclude reads with all of the following flags set">
192 <expand macro="flag_options" />
193 </param>
194 <!-- TODO could also make this a text field (comma/space separated) -->
195 <repeat name="readtags" title="Filter by read tags">
196 <param name="readtag" type="text" argument="-x" label="Filter by read tag" help="Read tag to exclude from output."/>
197 </repeat>
198 </when> 352 </when>
199 <when value="no"/>
200 </conditional>
201 <conditional name="cond_subsample">
202 <param name="select_subsample" type="select" argument="-s" label="Subsample alignment">
203 <option value="fraction">Specify a fraction to keep</option>
204 <option value="target">Specify a target # of reads</option>
205 <option value="no" selected="True">No downsampling</option>
206 </param>
207 <when value="fraction">
208 <param name="fraction" type="float" optional="False" value="0.5" min="0" max="1" label="Subsampling fraction" help="sets the fraction of templates/pairs to subsample." />
209 <expand macro="seed_input" />
210 </when>
211 <when value='target'>
212 <param name="target" type="integer" optional="False" value="" min="0" label="Target # of reads" help="sets the target number of reads to subsample." />
213 <expand macro="seed_input" />
214 </when>
215 <when value="no"/>
216 </conditional> 353 </conditional>
217 <conditional name="addref_cond"> 354 <conditional name="addref_cond">
218 <param name="addref_select" type="select" label="Reference data"> 355 <param name="addref_select" type="select" label="Reference data">
219 <help>Reference data as fasta(.gz). Required for SAM input without @SQ headers and useful/required for writing CRAM output (see help).</help> 356 <help>Reference data as fasta(.gz). Required for SAM input without @SQ headers and useful/required for writing CRAM output (see help).</help>
220 <option value="no" selected="True">No, see help (-output-fmt-option no_ref)</option> 357 <option value="no" selected="True">No, see help (-output-fmt-option no_ref)</option>
232 <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" /> 369 <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
233 </options> 370 </options>
234 </param> 371 </param>
235 </when> 372 </when>
236 </conditional> 373 </conditional>
237 <section name="adv_output" title="Output Options" expanded="false">
238 <param name="header" type="select" argument="-h/-H" label="Header options">
239 <option value="-h" selected="True">Include header in SAM output (-h)</option>
240 <option value="-H">Return header only (-H)</option>
241 <option value="">Exclude header</option>
242 </param>
243 <param name="outputpassing" type="boolean" argument="-U" truevalue="yes" falsevalue="no" checked="false" label="Output alignments not passing the filter" help="Write alignments that are not selected by the various filter options to an extra data set. When this option is used, all alignments (or all alignments intersecting the regions specified) are written to either the output data set or this extra output data set, but never both." />
244 <param name="collapsecigar" type="boolean" argument="-B" truevalue="-B" falsevalue="" checked="false" label="Collapse backward CIGAR operation" help="Collapse the backward CIGAR operation." />
245 </section>
246 </inputs> 374 </inputs>
247 <outputs> 375 <outputs>
248 <!-- TODO do I need an action for dbkey? --> 376 <!-- TODO do I need an action for dbkey? -->
249 <data name="outputsam" format_source="input" from_work_dir="outfile" label="${tool.name} on ${on_string}: filtered alignments"> 377 <data name="outputsam" format_source="input" from_work_dir="outfile" label="${tool.name} on ${on_string}: filtered alignments">
250 <filter>outtype != 'count'</filter> 378 <filter>mode['outtype'] == 'header' or mode['output_options']['reads_report_type'] != 'count'</filter>
251 <!-- TODO is change_format deprecated? how can I modify the type for the different bam types? -->
252 <change_format> 379 <change_format>
253 <when input="outtype" value="sam" format="sam" /> 380 <when input="mode.output_options.output_format.oformat" value="sam" format="sam" />
254 <when input="outtype" value="bam" format="bam" /> 381 <when input="mode.output_options.output_format.oformat" value="bam" format="bam" />
255 <when input="outtype" value="cram" format="cram" /> 382 <when input="mode.output_options.output_format.oformat" value="cram" format="cram" />
256 </change_format> 383 </change_format>
257 </data> 384 </data>
258 <data name="invoutputsam" format_source="input" from_work_dir="inv_outfile" label="${tool.name} on ${on_string}: unfiltered alignments"> 385 <data name="invoutputsam" format_source="input" from_work_dir="inv_outfile" label="${tool.name} on ${on_string}: unfiltered alignments">
259 <filter>adv_output['outputpassing'] == 'yes' and outtype != 'count'</filter> 386 <filter>mode['outtype'] == 'selected_reads' and mode['output_options']['reads_report_type'] != 'count' and mode['output_options']['complementary_output']</filter>
260 <change_format> 387 <change_format>
261 <when input="outtype" value="sam" format="sam" /> 388 <when input="mode.output_options.output_format.oformat" value="sam" format="sam" />
262 <when input="outtype" value="bam" format="bam" /> 389 <when input="mode.output_options.output_format.oformat" value="bam" format="bam" />
263 <when input="outtype" value="cram" format="cram" /> 390 <when input="mode.output_options.output_format.oformat" value="cram" format="cram" />
264 </change_format> 391 </change_format>
265 </data> 392 </data>
266 <data name="outputcnt" format="txt" from_work_dir="outfile" label="${tool.name} on ${on_string}: Counts"> 393 <data name="outputcnt" format="txt" from_work_dir="outfile" label="${tool.name} on ${on_string}: Counts">
267 <filter>outtype == 'count'</filter> 394 <filter>mode['outtype'] != 'header' and mode['output_options']['reads_report_type'] == 'count'</filter>
268 </data> 395 </data>
269 </outputs> 396 </outputs>
270 <tests> 397 <tests>
271 <!-- sam to bam (copied from the sam_to_bam tool) --> 398 <!-- sam to bam (copied from the sam_to_bam tool) -->
272 <test> 399 <test>
273 <param name="input" ftype="sam" value="sam_to_bam_in1.sam" /> 400 <param name="input" ftype="sam" value="sam_to_bam_in1.sam" />
274 <param name="outtype" value="bam" /> 401 <output name="outputsam" ftype="bam" file="sam_to_bam_out1.bam" />
402 </test>
403 <test>
404 <param name="input" ftype="sam" dbkey="equCab2" value="sam_to_bam_in1.sam" />
405 <conditional name="addref_cond">
406 <param name="addref_select" value="cached" />
407 <param name="ref" value="equCab2chrM" />
408 </conditional>
409 <output name="outputsam" ftype="bam" file="sam_to_bam_out2.bam" />
410 </test>
411 <test>
412 <param name="input" ftype="sam" value="sam_to_bam_noheader_in2.sam" />
275 <conditional name="addref_cond"> 413 <conditional name="addref_cond">
276 <param name="addref_select" value="history" /> 414 <param name="addref_select" value="history" />
277 <param name="ref" ftype="fasta" dbkey="equCab2" value="chr_m.fasta" /> 415 <param name="ref" ftype="fasta" dbkey="equCab2" value="chr_m.fasta" />
278 </conditional> 416 </conditional>
279 <output name="outputsam" ftype="bam" file="sam_to_bam_out1.bam" /> 417 <output name="outputsam" ftype="bam" file="sam_to_bam_out3.bam" />
280 </test> 418 </test>
281 <test> 419 <!-- bam to cram + region filter (adapted from bam_to_cram tool)-->
282 <param name="input" ftype="sam" dbkey="equCab2" value="sam_to_bam_in1.sam" /> 420 <test>
283 <param name="outtype" value="bam" /> 421 <param name="input" value="test.bam" ftype="bam" />
422 <conditional name="mode">
423 <param name="outtype" value="selected_reads" />
424 <section name="filter_config">
425 <conditional name="cond_region">
426 <param name="select_region" value="no"/>
427 </conditional>
428 </section>
429 <conditional name="output_options">
430 <conditional name="output_format">
431 <param name="oformat" value="cram" />
432 </conditional>
433 </conditional>
434 </conditional>
435 <conditional name="addref_cond">
436 <param name="addref_select" value="history" />
437 <param name="ref" value="test.fa" />
438 </conditional>
439 <output name="outputsam" file="test.cram" ftype="cram" compare="sim_size" delta="250" />
440 </test>
441 <!-- count alignments -->
442 <test>
443 <param name="input" value="test.bam" ftype="bam" />
444 <conditional name="mode">
445 <param name="outtype" value="all_reads" />
446 <conditional name="output_options">
447 <param name="reads_report_type" value="count" />
448 </conditional>
449 </conditional>
450 <output name="outputcnt" file="test_counts.tab" ftype="txt" />
451 </test>
452 <test>
453 <param name="input" value="test.sam" ftype="sam" />
454 <conditional name="mode">
455 <param name="outtype" value="selected_reads" />
456 <section name="filter_config">
457 <conditional name="cond_region">
458 <param name="select_region" value="no"/>
459 </conditional>
460 </section>
461 <conditional name="output_options">
462 <conditional name="output_format">
463 <param name="oformat" value="cram" />
464 </conditional>
465 </conditional>
466 </conditional>
467 <conditional name="addref_cond">
468 <param name="addref_select" value="history" />
469 <param name="ref" value="test.fa" />
470 </conditional>
471 <output name="outputsam" file="test.cram" ftype="cram" compare="sim_size" delta="250" />
472 </test>
473 <test>
474 <param name="input" value="test.bam" ftype="bam" />
475 <conditional name="mode">
476 <param name="outtype" value="selected_reads" />
477 <section name="filter_config">
478 <conditional name="cond_region">
479 <param name="select_region" value="text"/>
480 <param name="regions" value="CHROMOSOME_I" />
481 </conditional>
482 </section>
483 <conditional name="output_options">
484 <conditional name="output_format">
485 <param name="oformat" value="cram" />
486 </conditional>
487 </conditional>
488 </conditional>
489 <conditional name="addref_cond">
490 <param name="addref_select" value="history" />
491 <param name="ref" value="test.fa" />
492 </conditional>
493 <output name="outputsam" file="test.cram" ftype="cram" compare="sim_size" delta="250" />
494 </test>
495 <!-- bam to sam + header options (adapted from bam_to_sam tool)-->
496 <test>
497 <param ftype="bam" name="input" value="bam_to_sam_in1.bam" />
498 <conditional name="mode">
499 <conditional name="output_options">
500 <conditional name="output_format">
501 <param name="oformat" value="sam" />
502 <param name="with_header" value="true" />
503 </conditional>
504 </conditional>
505 </conditional>
506 <output file="bam_to_sam_out1.sam" ftype="sam" name="outputsam" />
507 </test>
508 <test>
509 <param ftype="bam" name="input" value="bam_to_sam_in1.bam" />
510 <conditional name="mode">
511 <param name="outtype" value="header" />
512 <conditional name="output_options">
513 <conditional name="output_format">
514 <param name="oformat" value="sam" />
515 </conditional>
516 </conditional>
517 </conditional>
518 <output file="bam_to_sam_out2.sam" ftype="sam" name="outputsam" />
519 </test>
520 <test>
521 <param ftype="bam" name="input" value="bam_to_sam_in1.bam" />
522 <conditional name="mode">
523 <conditional name="output_options">
524 <conditional name="output_format">
525 <param name="oformat" value="sam" />
526 <param name="with_header" value="false" />
527 </conditional>
528 </conditional>
529 </conditional>
530 <output file="bam_to_sam_out3.sam" ftype="sam" name="outputsam" />
531 </test>
532 <!-- cram to bam + region (adapted from cram_to_bam tool)-->
533 <test>
534 <param name="input" value="test.cram" ftype="cram" />
535 <conditional name="addref_cond">
536 <param name="addref_select" value="history" />
537 <param name="ref" value="test.fa" />
538 </conditional>
539 <output name="outputsam" file="test.bam" ftype="bam" />
540 </test>
541 <test>
542 <param name="input" value="test.cram" ftype="cram" />
543 <conditional name="mode">
544 <param name="outtype" value="selected_reads" />
545 <section name="filter_config">
546 <conditional name="cond_region">
547 <param name="select_region" value="text"/>
548 <param name="regions" value="CHROMOSOME_I" />
549 </conditional>
550 </section>
551 <conditional name="output_options">
552 <conditional name="output_format">
553 <param name="oformat" value="bam" />
554 </conditional>
555 </conditional>
556 </conditional>
557 <conditional name="addref_cond">
558 <param name="addref_select" value="history" />
559 <param name="ref" value="test.fa" />
560 </conditional>
561 <output name="outputsam" file="test.bam" ftype="bam" />
562 </test>
563 <test>
564 <param name="input" value="test.cram" ftype="cram" />
565 <conditional name="mode">
566 <param name="outtype" value="selected_reads" />
567 <section name="filter_config">
568 <conditional name="cond_region">
569 <param name="select_region" value="bed" />
570 <param name="bedfile" value="test.bed" ftype="bed" />
571 </conditional>
572 </section>
573 <conditional name="output_options">
574 <conditional name="output_format">
575 <param name="oformat" value="bam" />
576 </conditional>
577 </conditional>
578 </conditional>
579 <conditional name="addref_cond">
580 <param name="addref_select" value="history" />
581 <param name="ref" value="test.fa" />
582 </conditional>
583 <output name="outputsam" file="test.bam" ftype="bam" />
584 </test>
585 <test>
586 <param name="input" value="test2.cram" dbkey="equCab2" ftype="cram" />
587 <conditional name="mode">
588 <param name="outtype" value="selected_reads" />
589 <section name="filter_config">
590 <conditional name="cond_region">
591 <param name="select_region" value="no"/>
592 </conditional>
593 </section>
594 <conditional name="output_options">
595 <conditional name="output_format">
596 <param name="oformat" value="bam" />
597 </conditional>
598 </conditional>
599 </conditional>
284 <conditional name="addref_cond"> 600 <conditional name="addref_cond">
285 <param name="addref_select" value="cached" /> 601 <param name="addref_select" value="cached" />
286 <param name="ref" value="equCab2chrM" /> 602 <param name="ref" value="equCab2chrM" />
287 </conditional> 603 </conditional>
288 <output name="outputsam" ftype="bam" file="sam_to_bam_out2.bam" /> 604 <output name="outputsam" file="sam_to_bam_out2.bam" ftype="bam" />
289 </test> 605 </test>
290 <test> 606 <!-- sampling options-->
291 <param name="input" ftype="sam" value="sam_to_bam_noheader_in2.sam" /> 607 <test>
292 <param name="outtype" value="bam" /> 608 <param name="input" value="test.sam" ftype="sam" />
293 <conditional name="addref_cond"> 609 <conditional name="mode">
294 <param name="addref_select" value="history" /> 610 <param name="outtype" value="selected_reads" />
295 <param name="ref" ftype="fasta" dbkey="equCab2" value="chr_m.fasta" /> 611 <section name="subsample_config">
296 </conditional> 612 <conditional name="subsampling_mode">
297 <output name="outputsam" ftype="bam" file="sam_to_bam_out3.bam" /> 613 <param name="select_subsample" value="target" />
298 </test> 614 <param name="target" value="2" />
299 <!-- bam to cram + region filter (adapted from bam_to_cram tool)--> 615 </conditional>
616 </section>
617 <conditional name="output_options">
618 <conditional name="output_format">
619 <param name="oformat" value="sam" />
620 </conditional>
621 </conditional>
622 </conditional>
623 <output name="outputsam" file="test_ds.sam" ftype="sam" compare="diff" lines_diff="6" />
624 </test>
625 <test>
626 <param name="input" value="test.sam" ftype="sam" />
627 <conditional name="mode">
628 <param name="outtype" value="selected_reads" />
629 <section name="subsample_config">
630 <conditional name="subsampling_mode">
631 <param name="select_subsample" value="target" />
632 <param name="target" value="20" />
633 </conditional>
634 </section>
635 <conditional name="output_options">
636 <conditional name="output_format">
637 <param name="oformat" value="sam" />
638 </conditional>
639 </conditional>
640 </conditional>
641 <output name="outputsam" file="test.sam" ftype="sam" />
642 </test>
643 <test>
644 <param name="input" value="test.sam" ftype="sam" />
645 <conditional name="mode">
646 <param name="outtype" value="selected_reads" />
647 <section name="subsample_config">
648 <conditional name="subsampling_mode">
649 <param name="select_subsample" value="target" />
650 <param name="seed" value="7" />
651 <param name="target" value="2" />
652 </conditional>
653 </section>
654 <conditional name="output_options">
655 <conditional name="output_format">
656 <param name="oformat" value="sam" />
657 </conditional>
658 </conditional>
659 </conditional>
660 <output name="outputsam" file="test_ds.sam" ftype="sam" compare="diff" lines_diff="6" />
661 </test>
300 <test> 662 <test>
301 <param name="input" value="test.bam" ftype="bam" /> 663 <param name="input" value="test.bam" ftype="bam" />
302 <param name="outtype" value="cram" /> 664 <conditional name="mode">
303 <conditional name="addref_cond"> 665 <param name="outtype" value="selected_reads" />
304 <param name="addref_select" value="history" /> 666 <section name="subsample_config">
305 <param name="ref" value="test.fa" /> 667 <conditional name="subsampling_mode">
306 </conditional> 668 <param name="select_subsample" value="target" />
307 <conditional name="cond_filter"> 669 <param name="seed" value="7" />
308 <param name="select_filter" value="yes" /> 670 <param name="target" value="2" />
309 <conditional name="cond_region"> 671 </conditional>
310 <param name="select_region" value="no"/> 672 </section>
311 </conditional> 673 <conditional name="output_options">
312 </conditional> 674 <conditional name="output_format">
313 <output name="outputsam" file="test.cram" ftype="cram" compare="sim_size" delta="250" /> 675 <param name="oformat" value="bam" />
314 </test> 676 </conditional>
315 <!-- count alignments --> 677 </conditional>
316 <test> 678 </conditional>
679 <output name="outputsam" file="test_ds.bam" ftype="bam" />
680 </test>
681 <test>
317 <param name="input" value="test.bam" ftype="bam" /> 682 <param name="input" value="test.bam" ftype="bam" />
318 <param name="outtype" value="count" /> 683 <conditional name="mode">
319 <output name="outputcnt" file="test_counts.tab" ftype="txt" /> 684 <param name="outtype" value="selected_reads" />
320 </test> 685 <section name="subsample_config">
321 <test> 686 <conditional name="subsampling_mode">
322 <param name="input" value="test.sam" ftype="sam" /> 687 <param name="select_subsample" value="target" />
323 <param name="outtype" value="cram" /> 688 <param name="seed" value="7" />
324 <conditional name="addref_cond"> 689 <param name="target" value="20" />
325 <param name="addref_select" value="history" /> 690 </conditional>
326 <param name="ref" value="test.fa" /> 691 </section>
327 </conditional> 692 <conditional name="output_options">
328 <conditional name="cond_filter"> 693 <conditional name="output_format">
329 <param name="select_filter" value="yes" /> 694 <param name="oformat" value="bam" />
330 <conditional name="cond_region"> 695 </conditional>
331 <param name="select_region" value="no"/> 696 </conditional>
332 </conditional> 697 </conditional>
333 </conditional> 698 <output name="outputsam" file="test.bam" ftype="bam" />
334 <output name="outputsam" file="test.cram" ftype="cram" compare="sim_size" delta="250" />
335 </test> 699 </test>
336 <test> 700 <test>
337 <param name="input" value="test.bam" ftype="bam" /> 701 <param name="input" value="test.bam" ftype="bam" />
338 <param name="outtype" value="cram" /> 702 <conditional name="mode">
339 <conditional name="addref_cond"> 703 <param name="outtype" value="selected_reads" />
340 <param name="addref_select" value="history" /> 704 <section name="subsample_config">
341 <param name="ref" value="test.fa" /> 705 <conditional name="subsampling_mode">
342 </conditional> 706 <param name="select_subsample" value="fraction" />
343 <conditional name="cond_filter"> 707 <param name="seed" value="7" />
344 <param name="select_filter" value="yes" /> 708 <param name="factor" value="5" />
345 <conditional name="cond_region"> 709 </conditional>
346 <param name="select_region" value="text"/> 710 </section>
347 <param name="regions" value="CHROMOSOME_I" /> 711 <conditional name="output_options">
348 </conditional> 712 <conditional name="output_format">
349 </conditional> 713 <param name="oformat" value="bam" />
350 <output name="outputsam" file="test.cram" ftype="cram" compare="sim_size" delta="250" /> 714 </conditional>
351 </test> 715 </conditional>
352 <!-- bam to sam + header options (adapted from bam_to_sam tool)-->
353 <test>
354 <param ftype="bam" name="input" value="bam_to_sam_in1.bam" />
355 <param name="outtype" value="sam" />
356 <param name="header" value="-h" />
357 <output file="bam_to_sam_out1.sam" ftype="sam" name="outputsam" />
358 </test>
359 <test>
360 <param ftype="bam" name="input" value="bam_to_sam_in1.bam" />
361 <param name="outtype" value="sam" />
362 <param name="header" value="-H" />
363 <output file="bam_to_sam_out2.sam" ftype="sam" name="outputsam" />
364 </test>
365 <test>
366 <param ftype="bam" name="input" value="bam_to_sam_in1.bam" />
367 <param name="outtype" value="sam" />
368 <param name="header" value="" />
369 <output file="bam_to_sam_out3.sam" ftype="sam" name="outputsam" />
370 </test>
371 <!-- cram to bam + region (adapted from cram_to_bam tool)-->
372 <test>
373 <param name="input" value="test.cram" ftype="cram" />
374 <param name="outtype" value="bam" />
375 <conditional name="addref_cond">
376 <param name="addref_select" value="history" />
377 <param name="ref" value="test.fa" />
378 </conditional>
379 <output name="outputsam" file="test.bam" ftype="bam" />
380 </test>
381 <test>
382 <param name="input" value="test.cram" ftype="cram" />
383 <param name="outtype" value="bam" />
384 <conditional name="addref_cond">
385 <param name="addref_select" value="history" />
386 <param name="ref" value="test.fa" />
387 </conditional>
388 <param name="target_region" value="region" />
389 <param name="region_string" value="CHROMOSOME_I" />
390 <output name="outputsam" file="test.bam" ftype="bam" />
391 </test>
392 <test>
393 <param name="input" value="test.cram" ftype="cram" />
394 <param name="outtype" value="bam" />
395 <conditional name="addref_cond">
396 <param name="addref_select" value="history" />
397 <param name="ref" value="test.fa" />
398 </conditional>
399 <param name="target_region" value="regions_bed_file" />
400 <param name="regions_bed_file" value="test.bed" ftype="bed" />
401
402 <output name="outputsam" file="test.bam" ftype="bam" />
403 </test>
404 <test>
405 <param name="input" value="test2.cram" dbkey="equCab2" ftype="cram" />
406 <param name="outtype" value="bam" />
407 <conditional name="addref_cond">
408 <param name="addref_select" value="cached" />
409 <param name="ref" value="equCab2chrM" />
410 </conditional>
411 <conditional name="cond_region">
412 <param name="select_region" value="no"/>
413 </conditional>
414 <output name="outputsam" file="sam_to_bam_out2.bam" ftype="bam" />
415 </test>
416 <!-- sampling options-->
417 <test>
418 <param name="input" value="test.sam" ftype="sam" />
419 <param name="outtype" value="sam" />
420 <conditional name="cond_subsample">
421 <param name="select_subsample" value="target" />
422 <param name="target" value="2" />
423 </conditional>
424 <output name="outputsam" file="test_ds.sam" ftype="sam" compare="diff" lines_diff="6" />
425 </test>
426 <test>
427 <param name="input" value="test.sam" ftype="sam" />
428 <param name="outtype" value="sam" />
429 <conditional name="cond_subsample">
430 <param name="select_subsample" value="target" />
431 <param name="seed" value="7" />
432 <param name="target" value="2" />
433 </conditional>
434 <output name="outputsam" file="test_ds.sam" ftype="sam" compare="diff" lines_diff="6" />
435 </test>
436 <test>
437 <param name="input" value="test.bam" ftype="bam" />
438 <param name="outtype" value="bam" />
439 <conditional name="cond_subsample">
440 <param name="select_subsample" value="target" />
441 <param name="seed" value="7" />
442 <param name="target" value="2" />
443 </conditional> 716 </conditional>
444 <output name="outputsam" file="test_ds.bam" ftype="bam" /> 717 <output name="outputsam" file="test_ds.bam" ftype="bam" />
445 </test> 718 </test>
446 <test> 719 <test>
447 <param name="input" value="test.bam" ftype="bam" /> 720 <param name="input" value="test.bam" ftype="bam" />
448 <param name="outtype" value="bam" /> 721 <conditional name="mode">
449 <conditional name="cond_subsample"> 722 <param name="outtype" value="selected_reads" />
450 <param name="select_subsample" value="fraction" /> 723 <section name="subsample_config">
451 <param name="seed" value="7" /> 724 <conditional name="subsampling_mode">
452 <param name="fraction" value=".2" /> 725 <param name="select_subsample" value="fraction" />
726 <param name="seed" value="7" />
727 <param name="factor" value="1.25" />
728 </conditional>
729 </section>
730 <conditional name="output_options">
731 <param name="reads_report_type" value="dropped" />
732 <conditional name="output_format">
733 <param name="oformat" value="bam" />
734 </conditional>
735 </conditional>
453 </conditional> 736 </conditional>
454 <output name="outputsam" file="test_ds.bam" ftype="bam" /> 737 <output name="outputsam" file="test_ds.bam" ftype="bam" />
455 </test> 738 </test>
456 </tests> 739 </tests>
457 <help> 740 <help>
458 **What it does** 741 **What it does**
459 742
460 Samtools view can: 743 Samtools view can:
461 744
462 1. filter alignments according to various criteria 745 1. convert between alignment formats (SAM, BAM, CRAM)
463 2. convert between alignment formats (SAM, BAM, CRAM) 746 2. filter and subsample alignments according to user-specified criteria
464 747 3. count the reads in the input dataset or those retained after filtering
465 With no options or regions specified, prints all alignments in the specified input alignment file (in SAM, BAM, or CRAM format) to standard output in SAM format (with no header). 748 and subsampling
749 4. obtain just the header of the input in any supported format
750
751 In addition, the tool has (limited) options to modify read records during conversion and/or filtering by:
752
753 - stripping them of user-specified tags
754 - collapsing backward CIGAR operations if they are specified in their CIGAR
755 fields
756
757 With default settings, the tool generates a BAM dataset with the header and
758 reads found in the input dataset (which can be in SAM, BAM, or CRAM format).
466 759
467 **Alignment format conversion** 760 **Alignment format conversion**
468 761
762 By changing the *Output format* it is possible to convert an input dataset to
763 another format.
469 Inputs of type SAM, BAM, and CRAM are accepted and can be converted to each of these formats (alternatively alignment counts can be computed) by selecting the appropriate "Output type". 764 Inputs of type SAM, BAM, and CRAM are accepted and can be converted to each of these formats (alternatively alignment counts can be computed) by selecting the appropriate "Output type".
470 765
471 .. class:: infomark 766 .. class:: infomark
472 767
473 samtools view allows to specify a reference sequence. This is required for SAM input with missing @SQ headers (which include sequence names, length, md5, etc) and useful (and sometimes necessary) for CRAM input and output. In the following the use of reference sequence in the CRAM format is detailed. 768 The tool allows you to specify a reference sequence. This is required for SAM input with missing @SQ headers (which include sequence names, length, md5, etc) and useful (and sometimes necessary) for CRAM input and output. In the following the use of the reference sequence in the CRAM format is detailed.
474 CRAM is (intended as a primarily) a reference-based compressed format, i.e. only differences between the stored sequences and the reference are stored. As a consequence the reference that was used to generate the alignemnts is always needed in order to interpret the alignments (a checksum stored in the CRAM file is used to verify that the only the correct sequence can be used), i.e. the CRAM file on its own is not useful per default. This allows for a more space efficient storage compared to BAM. 769 CRAM is (primarily) a reference-based compressed format, i.e. only sequence differences between aligned reads and the reference are stored. As a consequence, the reference that was used during read mapping is needed in order to interpret the alignment records (a checksum stored in the CRAM file is used to verify that only the correct reference sequence can be used). This allows for more space-efficient storage than with BAM format, but such a CRAM file is not usable without its reference.
475 But it is also possible to use CRAM without a reference with the disadvantage that the reference is stored explicitely (as in SAM and BAM). 770 It is also possible, however, to use CRAM without a reference with the disadvantage that the reference sequence gets stored then explicitely (as in SAM and BAM).
476 771
477 The Galaxy tool **currently generates only CRAM without reference sequence**. 772 The Galaxy tool **currently generates only CRAM without reference sequence**.
478 773
479 For reference based CRAM input the correct refernce sequence needs to be specified. 774 For reference based CRAM input the correct refernce sequence needs to be specified.
480 775
481 **Filtering alignments** 776 **Filtering alignments**
482 777
483 samtools view allows to filter alignements based on various criteria, i.e. the output will contain only alignemnts matching all criteria (an additional output containing the remaining alignments can be created additionally, see "Output alignments not passing the filter" in "output options"): e.g. by regions (see below), alignment quality (see below), and tags or flags set in the alignments. 778 If you ask for *A filtered/subsampled selection of reads*, the tool will allow
484 779 you to specify filter conditions and/or to choose a subsampling strategy, and
780 the output will contain one of the following depending on your choice under
781 *What would you like to have reported?*:
782
783 - All reads retained after filtering and subsampling
784 - Reads dropped during filtering and subsampling
785
786 If instead you want to *split* the input reads based on your criteria and
787 obtain *two* datasets, one with the retained and one with the dropped reads, check
788 the *Produce extra dataset with dropped/retained reads?* option.
485 789
486 790
487 **Filtering by regions** 791 **Filtering by regions**
488 792
489 You may specify one or more space-separated region specifications after the input filename to restrict output to only those alignments which overlap the specified region(s). Use of region specifications requires a coordinate-sorted and indexed input file (in BAM or CRAM format). 793 You may specify one or more space-separated region specifications after the input filename to restrict output to only those alignments which overlap the specified region(s). Use of region specifications requires a coordinate-sorted and indexed input file (in BAM or CRAM format).
490 794
491 Regions can be specified as: RNAME[:STARTPOS[-ENDPOS]] and all position coordinates are 1-based. 795 Regions can be specified as: RNAME[:STARTPOS[-ENDPOS]] and all position coordinates are 1-based.
492 796
493 Important note: when multiple regions are given, some alignments may be output multiple times if they overlap more than one of the specified regions. 797 .. class:: Warning mark
798
799 When multiple regions are given, some alignments may be output multiple times if they overlap more than one of the specified regions.
494 800
495 Examples of region specifications: 801 Examples of region specifications:
496 802
497 - chr1 Output all alignments mapped to the reference sequence named 'chr1' (i.e. @SQ SN:chr1). 803 ``chr1``
498 - chr2:1000000 The region on chr2 beginning at base position 1,000,000 and ending at the end of the chromosome. 804 Output all alignments mapped to the reference sequence named 'chr1' (i.e. @SQ SN:chr1).
499 - chr3:1000-2000 The 1001bp region on chr3 beginning at base position 1,000 and ending at base position 2,000 (including both end positions). 805
500 - '*' Output the unmapped reads at the end of the file. (This does not include any unmapped reads placed on a reference sequence alongside their mapped mates.) 806 ``chr2:1000000``
501 - . Output all alignments. (Mostly unnecessary as not specifying a region at all has the same effect.) 807 The region on chr2 beginning at base position 1,000,000 and ending at the end of the chromosome.
808
809 ``chr3:1000-2000``
810 The 1001bp region on chr3 beginning at base position 1,000 and ending at base position 2,000 (including both end positions).
811
812 ``*``
813 Output the unmapped reads at the end of the file. (This does not include any unmapped reads placed on a reference sequence alongside their mapped mates.)
814
815 ``.``
816 Output all alignments. (Mostly unnecessary as not specifying a region at all has the same effect.)
502 817
503 **Filtering by quality** 818 **Filtering by quality**
504 819
505 This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignemnt. Note that aligners do not follow a consistent definition. 820 This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignment. Note that aligners do not follow a consistent definition.
506
507 The -x, -B, and -s options modify the data which is contained in each alignment.
508 </help> 821 </help>
509 <expand macro="citations"/> 822 <expand macro="citations"/>
510 </tool> 823 </tool>