view samtools_view.xml @ 14:5826298f6a73 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_view commit 5cb103832529f17e5c72e7f122758c13519fbe5e
author iuc
date Mon, 15 Aug 2022 09:19:43 +0000
parents 0dbf49c414ae
children 6be888be75f9
line wrap: on
line source

<tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
    <description>- reformat, filter, or subsample SAM, BAM or CRAM</description>
    <macros>
        <import>macros.xml</import>
        <token name="@REF_DATA@">
            ## additional reference data
            #if $use_ref:
                -T "\$reffa"
                -t "\$reffai"
            #end if
        </token>
        <xml name="read_output_formatting">
            <yield />
            <section name="adv_output" title="Read Reformatting Options" expanded="false">
                <repeat name="readtags" title="Strip read tags from output">
                    <param name="readtag" type="text" argument="-x" label="Read tag to strip" help="Read tag to exclude from output."/>
                </repeat>
                <param name="collapsecigar" type="boolean" argument="-B" truevalue="-B" falsevalue="" checked="false" label="Collapse backward CIGAR operation" help="Collapse the backward CIGAR operation." />
            </section>
        </xml>
        <xml name="output_format_selector">
            <conditional name="output_format">
                <param name="oformat" type="select" label="Output format"
                help="Note on BAM output format: The tool will generate coordinate-sorted BAM, i.e., may change the order of reads compared to the input. For BAM input, select 'Same as input' to produce BAM output with the read order retained.">
                    <option value="input">Same as input</option>
                    <option value="sam">SAM</option>
                    <option value="bam" selected="True">BAM (-b)</option>
                    <option value="cram">CRAM (-C)</option>
                </param>
                <when value="input" />
                <when value="sam">
                    <yield />
                </when>
                <when value="bam" />
                <when value="cram" />
            </conditional>
        </xml>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="5.1.0">gawk</requirement>
    </expand>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command><![CDATA[
        @ADDTHREADS@
        @ADDMEMORY@
        ## prepare reference data
        @PREPARE_FASTA_IDX@
        @PREPARE_IDX@

        ## determine the output format flag to pass to samtools view
        ## -c for count mode
        ## -b to produce BAM-formatted output
        ## -C to produce CRAM-formatted output
        ## SAM is the default ouput format
        #set $fmtopt = ''
        #if str($mode.output_options.reads_report_type) == 'count':
            #set $fmtopt = '-c'
        #else:
            #if str($mode.output_options.output_format.oformat) == 'bam':
                #set $fmtopt = '-b'
            #elif str($mode.output_options.output_format.oformat) == 'cram':
                #set $fmtopt = '-C'
            #elif str($mode.output_options.output_format.oformat) == 'input':
                #if $input.is_of_type('bam'):
                    #set $fmtopt = '-b'
                #elif $input.is_of_type('cram'):
                    #set $fmtopt = '-C'
                #else:
                    ## input in SAM format, make sure to keep header if present
                    #set $fmtopt = '-h'
                #end if
            #end if
        #end if

        #if str($mode.outtype) == 'header':
            ## call samtools view and be done
            samtools view
            -H $fmtopt -o outfile
            @REF_DATA@
            infile
        #else:
            ## are filtering and/or subsampling in effect?
            #set $with_filtering = False
            #set $with_subsampling = False
            #if str($mode.outtype) == 'selected_reads':
                #set $with_filtering = True
                #if str($mode.subsample_config.subsampling_mode.select_subsample) != 'fraction' or $mode.subsample_config.subsampling_mode.factor > 1:
                    #set $with_subsampling = True
                #end if
            #end if

            ## are we producing one or two outputs?
            #if str($mode.outtype) == 'selected_reads' and str($mode.output_options.reads_report_type) != 'count' and str($mode.output_options.complementary_output) == 'yes':
                #set $with_non_selected_reads_output = True
            #else:
                #set $with_non_selected_reads_output = False
	    #end if

            #set $std_filters = ''
            #set $reg_filters = ''
            #if $with_filtering:
                ## build string of all user-configured filter options
                #if str($mode.filter_config.cond_region.select_region) == 'bed':
                    #set std_filters = $std_filters + " -L '%s'" % str($mode.filter_config.cond_region.bedfile)
                #elif str($mode.filter_config.cond_region.select_region) == 'text':
                    #set $reg_filters = "'%s'" % str($mode.filter_config.cond_region.regions).strip()
                #end if

               #if $mode.filter_config.cond_rg.select_rg == 'text':
                    #set $std_filters = $std_filters + " -r '%s'" % str($mode.filter_config.cond_rg.readgr)
                #else if $mode.filter_config.cond_rg.select_rg == 'file':
                    #set $std_filters = $std_filters + " -R '%s'" % str($mode.filter_config.cond_rg.rgfile)
                #end if
                #if str($mode.filter_config.quality) != '' and int($mode.filter_config.quality) > 0:
                    #set $std_filters = $std_filters + " -q %s" % str($mode.filter_config.quality)
                #end if
                #if str($mode.filter_config.library):
                    #set $std_filters = $std_filters + " -l '%s'" % str($mode.filter_config.library)
                #end if
                #if str($mode.filter_config.cigarcons):
                    #set $std_filters = $std_filters + " -m %s" % str($mode.filter_config.cigarcons)
                #end if
                #set $filter = $mode.filter_config.inclusive_filter
                @FLAGS@
                #set $std_filters = $std_filters + " -f %s" % str($flags)
                #set $filter = $mode.filter_config.exclusive_filter
                @FLAGS@
                #set $std_filters = $std_filters + " -F %s" % str($flags)
                #set $filter = $mode.filter_config.exclusive_filter_all
                @FLAGS@
                #set $std_filters = $std_filters + " -G %s" % str($flags)
                #if $mode.filter_config.tag:
                    #set $std_filters = $std_filters + " --tag '%s'" % $mode.filter_config.tag
                #end if
                #if $mode.filter_config.qname_file:
                    #set std_filters = $std_filters + " --qname-file '%s'" % $mode.filter_config.qname_file
                #end if
            #end if

            #if $with_subsampling:
                ## handle seed and fraction calculation for subsampling
                #import random
                #if str($mode.subsample_config.subsampling_mode.seed):
                    #set $seed = int($mode.subsample_config.subsampling_mode.seed)
                #else:
                    #set $seed = random.randrange(32768)
                #end if

                #if $mode.subsample_config.subsampling_mode.select_subsample == 'target':
                    ##this must be done before the main command because we don't know the total # reads until execution time
                    #if $input.is_of_type('sam') or $std_filters or $reg_filters:
                        ## There is no index or we cannot use it because we are
                        ## not dealing with all of the reads in the indexed
                        ## file. We have to do an extra pass over the input to
                        ## count the reads to subsample.
                        sample_fragment=`samtools view -c $std_filters infile $reg_filters | awk '{s=\$1} END {frac=s/${mode.subsample_config.subsampling_mode.target}; printf("%.8f\n", frac > 1 ? $seed+1/frac : ".0")}'` &&
                    #else:
                        ## We can get the count of reads to subsample using
                        ## an inexpensive call to idxstats.
                        sample_fragment=`samtools idxstats infile | awk '{s+=\$4+\$3} END {frac=s/${mode.subsample_config.subsampling_mode.target}; printf("%.8f\n", frac > 1 ? $seed+1/frac : ".0")}'` &&
                    #end if
                #end if
            #end if

            ## call samtools view
            samtools view
            -@ \$addthreads
            $fmtopt

            ## filter options (except regions filter, which is the last parameter)
            $std_filters

            #if $with_subsampling:
                #if str($mode.subsample_config.subsampling_mode.select_subsample) == "target":
                    ##this is calculated at execution time before the main samtools command
                    -s \${sample_fragment}
                #else:
                    #set $fraction = $seed + 1 / float($mode.subsample_config.subsampling_mode.factor)
                    -s $fraction
                #end if
            #end if

            ## output options
            #if str($mode.output_options.reads_report_type) == 'count':
                -o outfile
            #else:
                ## output options
                #if str($mode.output_options.output_format.oformat) == 'sam':
                    ${mode.output_options.output_format.with_header}
                #end if
                ${mode.output_options.adv_output.collapsecigar}
                #for $s in $mode.output_options.adv_output.readtags:
                    -x '${s.readtag}'
                #end for
                #if str($mode.output_options.reads_report_type) == 'retained'
                    -o outfile
                    #if $with_non_selected_reads_output:
                        -U inv_outfile
                    #end if
                #else:
                    -U outfile
                    #if $with_non_selected_reads_output:
                        -o inv_outfile
                    #else:
                        -o /dev/null
                    #end if
                #end if

                ##currently reference based CRAM is disabled (see https://github.com/galaxyproject/tools-iuc/pull/1963)
                #if $mode.output_options.output_format.oformat == 'cram':
                    --output-fmt-option no_ref
                #end if
            #end if

            @REF_DATA@

            infile

            ## region filter needs to be at the end
            $reg_filters

            #if str($mode.output_options.reads_report_type) != 'count':
                ## if data is converted from an unsorted file (SAM, CRAM, or unsorted BAM) to BAM
                ## then sort the output by coordinate,
                #if not $input.is_of_type('bam') and str($mode.output_options.output_format.oformat) == 'bam':
                    && samtools sort
                        -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}"
                        -O bam
                        -o tmpsam
                        outfile
                        && mv tmpsam outfile
                    #if $with_non_selected_reads_output:
                        && samtools sort
                            -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}"
                            -O bam
                            -o tmpsam
                            inv_outfile
                        && mv tmpsam inv_outfile
                    #end if
                #end if
            #end if

            ##end of reads and count-specific section
        #end if
    ]]></command>
    <inputs>
        <!-- note unsorted bam includes all derived bam types (inlcuding bam which is sorted) -->
        <param name="input" format="sam,unsorted.bam,cram" type="data" label="SAM/BAM/CRAM data set" />
        <conditional name="mode">
            <param name="outtype" type="select" label="What would you like to look at?">
                <option value="all_reads">All reads in the input dataset</option>
                <option value="selected_reads">A filtered/subsampled selection of reads</option>
                <option value="header">Just the input header (-H)</option>
            </param>
            <when value="all_reads">
                <conditional name="output_options">
                    <param name="reads_report_type" type="select" label="What would you like to have reported?">
                        <option value="retained">The actual reads</option>
                        <option value="count">The count of reads (-c)</option>
                    </param>
                    <when value="retained">
                        <expand macro="read_output_formatting" />
                        <expand macro="output_format_selector">
                            <param name="with_header" type="boolean" truevalue="-h" falsevalue="" checked="True"
                            label="Include SAM header in the output?" />
                        </expand>
                    </when>
                    <when value="count" />
                </conditional>
            </when>
            <when value="selected_reads">
                <section name="filter_config" title="Configure filters" expanded="false">
                    <conditional name="cond_region">
                        <param name="select_region" type="select" label="Filter by regions">
                            <option value="no" selected="True">No</option>
                            <option value="text">Manualy specify regions</option>
                            <option value="bed">Regions from BED file</option>
                        </param>
                        <when value="no"/>
                        <when value="text">
                            <param name="regions" type="text" optional="false" label="Filter by regions" help="One or more space-separated region specifications to restrict output to only those alignments which overlap the specified region(s)."/>
                        </when>
                        <when value="bed">
                            <param name="bedfile" format="bed" argument="-L" optional="false" type="data" label="Filter by intervals in a bed file" help="Only output alignments overlapping the intervals in the input bed file." />
                        </when>
                    </conditional>
                    <conditional name="cond_rg">
                        <param name="select_rg" type="select" label="Filter by readgroup">
                            <option value="no" selected="True">No</option>
                            <option value="text">Single read group </option>
                            <option value="file">Read groups from file</option>
                        </param>
                        <when value="no"/>
                        <when value="text">
                            <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." />
                        </when>
                        <when value="file">
                            <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." />
                        </when>
                    </conditional>
                    <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." />
                    <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" />
                    <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." />
                    <param name="inclusive_filter" argument="-f" type="select" multiple="True" label="Require that these flags are set">
                        <expand macro="flag_options" />
                    </param>
                    <param name="exclusive_filter" argument="-F" type="select" multiple="True" label="Exclude reads with any of the following flags set">
                        <expand macro="flag_options" />
                    </param>
                    <param name="exclusive_filter_all" argument="-G" type="select" multiple="True" label="Exclude reads with all of the following flags set">
                        <expand macro="flag_options" />
                    </param>
                    <param  argument="--tag" type="text" optional="true" label="Filter by tag" help="Only include reads with tag STR1 and associated value STR2. Write in the format STR1:STR2 (see help for more details)." />
                    <!-- TODO implement -D -->
                    <param argument="--qname-file" format="txt" optional="true" type="data" label="Filter by readnames" help="Only output alignments with readnames that are listed in this file." />
                </section>
                <section name="subsample_config" title="Configure subsampling" expanded="false">
                    <conditional name="subsampling_mode">
                        <param name="select_subsample" type="select" argument="-s" label="Subsample alignment">
                            <option value="fraction">Specify a downsampling factor</option>
                            <option value="target">Specify a target # of reads</option>
                        </param>
                        <when value="fraction">
                            <param name="factor" type="float" optional="False" value="1" min="1" label="Downsampling factor" help="The factor by which to downsample the input reads. A fraction of approx. 1/factor of the reads will be kept (default: 1 = no downsampling)." />
                            <expand macro="seed_input" />
                        </when>
                        <when value="target">
                            <param name="target" type="integer" optional="False" min="1" value="" label="Target # of reads" help="Sets the approx. target number of reads to subsample." />
                            <expand macro="seed_input" />
                        </when>
                    </conditional>
                </section>
                <conditional name="output_options">
                    <param name="reads_report_type" type="select"
                    label="What would you like to have reported?"
                    help="Hint: To invert all of the filtering/subsampling logic configured above, choose 'Reads dropped during filtering and subsampling'.">
                        <option value="retained">All reads retained after filtering and subsampling</option>
                        <option value="dropped">Reads dropped during filtering and subsampling</option>
                        <option value="count">The count of retained reads (-c)</option>
                    </param>
                    <when value="retained">
                        <expand macro="read_output_formatting">
                            <param name="complementary_output" type="boolean" truevalue="yes" falsevalue="no" checked="false"
                            label="Produce extra dataset with dropped reads?" />
                        </expand>
                        <expand macro="output_format_selector">
                            <param name="with_header" type="boolean" truevalue="-h" falsevalue="" checked="True"
                            label="Include SAM header in the output?" />
                        </expand>
                    </when>
                    <when value="dropped">
                        <expand macro="read_output_formatting">
                            <param name="complementary_output" type="boolean" truevalue="yes" falsevalue="no" checked="false"
                            label="Produce extra dataset with retained reads?" />
                        </expand>
                        <expand macro="output_format_selector">
                            <param name="with_header" type="boolean" truevalue="-h" falsevalue="" checked="True"
                            label="Include SAM header in the output?" />
                        </expand>
                    </when>
                    <when value="count" />
                </conditional>
            </when>
            <when value="header">
                <conditional name="output_options">
                    <param name="reads_report_type" type="select" label="What would you like to have reported?">
                        <option value="">The header in ...</option>
                    </param>
                    <when value="">
                        <expand macro="output_format_selector" />
                    </when>
                </conditional>
            </when>
        </conditional>
        <expand macro="optional_reference" argument="-t" help="Reference data as fasta(.gz). Required for SAM input without @SQ headers and useful/required for writing CRAM output (see help)."/>
    </inputs>
    <outputs>
        <!-- TODO do I need an action for dbkey? -->
        <data name="outputsam" format_source="input" from_work_dir="outfile" label="${tool.name} on ${on_string}: filtered alignments">
            <filter>mode['outtype'] == 'header' or mode['output_options']['reads_report_type'] != 'count'</filter>
            <change_format>
                <when input="mode.output_options.output_format.oformat" value="sam" format="sam" />
                <when input="mode.output_options.output_format.oformat" value="bam" format="bam" />
                <when input="mode.output_options.output_format.oformat" value="cram" format="cram" />
            </change_format>
        </data>
        <data name="invoutputsam" format_source="input" from_work_dir="inv_outfile"  label="${tool.name} on ${on_string}: unfiltered alignments">
            <filter>mode['outtype'] == 'selected_reads' and mode['output_options']['reads_report_type'] != 'count' and mode['output_options']['complementary_output']</filter>
            <change_format>
                <when input="mode.output_options.output_format.oformat" value="sam" format="sam" />
                <when input="mode.output_options.output_format.oformat" value="bam" format="bam" />
                <when input="mode.output_options.output_format.oformat" value="cram" format="cram" />
            </change_format>
        </data>
        <data name="outputcnt" format="tabular" from_work_dir="outfile" label="${tool.name} on ${on_string}: Counts">
            <filter>mode['outtype'] != 'header' and mode['output_options']['reads_report_type'] == 'count'</filter>
        </data>
    </outputs>
    <tests>
<!-- 1) sam to bam (copied from the sam_to_bam tool) -->
        <test>
            <param name="input" ftype="sam" value="in_test_1.sam" />
            <output name="outputsam" ftype="bam" file="test_1.bam" lines_diff="4" />
        </test>
        <!-- 2) -->
        <test>
            <param name="input" ftype="sam" dbkey="equCab2" value="in_test_1.sam" />
            <conditional name="addref_cond">
                <param name="addref_select" value="cached" />
                <param name="ref" value="equCab2chrM" />
            </conditional>
            <output name="outputsam" ftype="bam" file="test_2.bam" lines_diff="4" />
        </test>
        <!-- 3) -->
        <test>
            <param name="input" ftype="sam" value="in_test_3.sam" />
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" ftype="fasta" dbkey="equCab2" value="chr_m.fasta" />
            </conditional>
            <output name="outputsam" ftype="bam" file="test_3.bam" lines_diff="4" />
        </test>
        <!-- 4) cram to bam -->
        <test>
            <param name="input" value="in_test_4.cram" ftype="cram" />
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" value="test.fa" />
            </conditional>
            <output name="outputsam" file="test_4.bam" ftype="bam" lines_diff="4" />
        </test>
        <!-- 5) within bam operations expected to result in sorting or not -->
        <test >
            <!-- sorted bam should always result in unmodifed output -->
            <param name="input" ftype="bam" value="in_test_5.bam" />
            <assert_command>
                <not_has_text text="samtools sort" />
            </assert_command>
            <output name="outputsam" ftype="bam" file="test_5.bam" lines_diff="2"/>
        </test>
        <!-- 6) -->
        <test>
            <!-- sorted bam should always result in unmodifed output -->
            <param name="input" ftype="bam" value="in_test_5.bam" />
            <conditional name="mode">
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <assert_command>
                <not_has_text text="samtools sort" />
            </assert_command>
            <output name="outputsam" ftype="bam" file="test_5.bam" lines_diff="2"/>
        </test>
        <!-- 7) -->
        <test>
            <!-- qname_sorted.bam should get sorted during "conversion" to bam ... -->
            <param name="input" ftype="qname_sorted.bam" value="in_test_7.bam" />
            <assert_command>
                <has_text text="samtools sort" />
            </assert_command>
            <output name="outputsam" ftype="bam" file="test_7.bam" lines_diff="4" />
        </test>
        <!-- 8) -->
        <test>
            <!-- ... but should be emitted unmodifed when using input format -->
            <param name="input" ftype="qname_sorted.bam" value="in_test_7.bam" />
            <conditional name="mode">
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <assert_command>
                <not_has_text text="samtools sort" />
            </assert_command>
            <output name="outputsam" ftype="qname_sorted.bam" file="test_8.bam" lines_diff="2"/>
        </test>
        <!-- 9) -->
        <test>
            <!-- unsorted.bam should get sorted during "conversion" to bam ... -->
            <param name="input" ftype="unsorted.bam" value="in_test_7.bam" />
            <assert_command>
                <has_text text="samtools sort" />
            </assert_command>
            <output name="outputsam" ftype="bam" file="test_7.bam" lines_diff="4" />
        </test>
        <!-- 10) -->
        <test>
            <!-- ... ... but should be emitted unmodifed when using input format -->
            <param name="input" ftype="unsorted.bam" value="in_test_7.bam" />
            <conditional name="mode">
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <assert_command>
                <not_has_text text="samtools sort" />
            </assert_command>
            <output name="outputsam" ftype="unsorted.bam" file="test_8.bam" lines_diff="2" />
        </test>
        <!-- 11) bam to sam + header options (adapted from bam_to_sam tool)-->
        <test>
            <param ftype="bam" name="input" value="in_test_11.bam" />
            <conditional name="mode">
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="sam" />
                        <param name="with_header" value="true" />
                    </conditional>
                </conditional>
            </conditional>
            <output file="test_11.sam" ftype="sam" name="outputsam" lines_diff="2" />
        </test>
        <!-- 12) -->
        <test>
            <param ftype="bam" name="input" value="in_test_11.bam" />
            <conditional name="mode">
                <param name="outtype" value="header" />
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="sam" />
                    </conditional>
                </conditional>
            </conditional>
            <output file="test_12.sam" ftype="sam" name="outputsam" lines_diff="2" />
        </test>
        <!-- 13) -->
        <test>
            <param ftype="bam" name="input" value="in_test_11.bam" />
            <conditional name="mode">
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="sam" />
                        <param name="with_header" value="false" />
                    </conditional>
                </conditional>
            </conditional>
            <output file="test_13.sam" ftype="sam" name="outputsam" lines_diff="2" />
        </test>
        <!-- 14) count alignments -->
        <test>
            <param name="input" value="in_test_14.bam" ftype="bam" />
            <conditional name="mode">
                <param name="outtype" value="all_reads" />
                <conditional name="output_options">
                    <param name="reads_report_type" value="count" />
                </conditional>
            </conditional>
            <output name="outputcnt" file="test_14.tab" ftype="tabular" lines_diff="2" />
        </test>
        <!-- 15) region filters -->
        <test>
            <param name="input" value="in_test_15.sam" ftype="sam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="filter_config">
                    <conditional name="cond_region">
                        <param name="select_region" value="no"/>
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="cram" />
                    </conditional>
                </conditional>
            </conditional>
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" value="test.fa" />
            </conditional>
            <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" />
        </test>
        <!-- 16) -->
        <test>
            <param name="input" value="in_test_14.bam" ftype="bam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="filter_config">
                    <conditional name="cond_region">
                        <param name="select_region" value="no"/>
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="cram" />
                    </conditional>
                </conditional>
            </conditional>
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" value="test.fa" />
            </conditional>
            <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" />
        </test>
        <!-- 17) -->
        <test>
            <param name="input" value="in_test_17.cram" dbkey="equCab2" ftype="cram" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="filter_config">
                    <conditional name="cond_region">
                        <param name="select_region" value="no"/>
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="bam" />
                    </conditional>
                </conditional>
            </conditional>
            <conditional name="addref_cond">
                <param name="addref_select" value="cached" />
                <param name="ref" value="equCab2chrM" />
            </conditional>
            <output name="outputsam" file="test_17.bam" ftype="bam" lines_diff="4" />
        </test>
        <!-- 18) -->
        <test>
            <param name="input" value="in_test_14.bam" ftype="bam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="filter_config">
                    <conditional name="cond_region">
                        <param name="select_region" value="text"/>
                        <param name="regions" value="CHROMOSOME_I" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="cram" />
                    </conditional>
                </conditional>
            </conditional>
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" value="test.fa" />
            </conditional>
            <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" />
        </test>
        <!-- 19) -->
        <test>
            <param name="input" value="test_15.cram" ftype="cram" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="filter_config">
                    <conditional name="cond_region">
                        <param name="select_region" value="text"/>
                        <param name="regions" value="CHROMOSOME_I" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="bam" />
                    </conditional>
                </conditional>
            </conditional>
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" value="test.fa" />
            </conditional>
            <output name="outputsam" file="test_19.bam" ftype="bam" lines_diff="4"/>
        </test>
        <!-- 20) -->
        <test>
            <param name="input" value="test_15.cram" ftype="cram" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="filter_config">
                    <conditional name="cond_region">
                        <param name="select_region" value="bed" />
                        <param name="bedfile" value="test.bed" ftype="bed" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="bam" />
                    </conditional>
                </conditional>
            </conditional>
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" value="test.fa" />
            </conditional>
            <output name="outputsam" file="test_20.bam" ftype="bam" lines_diff="4" />
        </test>
        <!-- 21) sampling options target < total reads -->
        <test>
            <param name="input" value="in_test_15.sam" ftype="sam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="subsample_config">
                    <conditional name="subsampling_mode">
                        <param name="select_subsample" value="target" />
                        <param name="target" value="2" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <output name="outputsam" file="test_21.sam" ftype="sam" compare="diff" lines_diff="10" />
        </test>
        <!-- 22) target > total reads -->
        <test>
            <param name="input" value="in_test_15.sam" ftype="sam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="subsample_config">
                    <conditional name="subsampling_mode">
                        <param name="select_subsample" value="target" />
                        <param name="target" value="20" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <output name="outputsam" file="test_22.sam" ftype="sam" lines_diff="2"/>
        </test>
        <!-- 23) -->
        <test>
            <!-- subsampling SAM input without reads -->
            <param name="input" value="in_test_23.sam" ftype="sam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="subsample_config">
                    <conditional name="subsampling_mode">
                        <param name="select_subsample" value="target" />
                        <param name="target" value="20" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <output name="outputsam" file="test_23.sam" ftype="sam" lines_diff="2"/>
        </test>
        <!-- 24) -->
        <test>
            <!-- subsampling BAM input without reads -->
            <param name="input" value="in_test_24.bam" ftype="bam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="subsample_config">
                    <conditional name="subsampling_mode">
                        <param name="select_subsample" value="target" />
                        <param name="target" value="20" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <output name="outputsam" file="test_24.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- 25) -->
        <test>
            <param name="input" value="in_test_15.sam" ftype="sam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="subsample_config">
                    <conditional name="subsampling_mode">
                        <param name="select_subsample" value="target" />
                        <param name="seed" value="7" />
                        <param name="target" value="2" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <output name="outputsam" file="test_25.sam" ftype="sam" compare="diff" lines_diff="2" />
        </test>
        <!-- 26) -->
        <test>
            <param name="input" value="in_test_14.bam" ftype="bam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="subsample_config">
                    <conditional name="subsampling_mode">
                        <param name="select_subsample" value="target" />
                        <param name="seed" value="7" />
                        <param name="target" value="2" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <output name="outputsam" file="test_26.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- 27) -->
        <test>
            <param name="input" value="in_test_14.bam" ftype="bam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="subsample_config">
                    <conditional name="subsampling_mode">
                        <param name="select_subsample" value="target" />
                        <param name="seed" value="7" />
                        <param name="target" value="20" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <output name="outputsam" file="test_27.bam" ftype="bam" lines_diff="2"/>
        </test>
        <!-- 28) -->
        <test>
            <param name="input" value="in_test_14.bam" ftype="bam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="subsample_config">
                    <conditional name="subsampling_mode">
                        <param name="select_subsample" value="fraction" />
                        <param name="seed" value="7" />
                        <param name="factor" value="5" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <output name="outputsam" file="test_28.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- 29) -->
        <test>
            <param name="input" value="in_test_14.bam" ftype="bam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="subsample_config">
                    <conditional name="subsampling_mode">
                        <param name="select_subsample" value="fraction" />
                        <param name="seed" value="7" />
                        <param name="factor" value="1.25" />
                    </conditional>
                </section>
                <conditional name="output_options">
                    <param name="reads_report_type" value="dropped" />
                    <conditional name="output_format">
                        <param name="oformat" value="input" />
                    </conditional>
                </conditional>
            </conditional>
            <output name="outputsam" file="test_29.bam" ftype="bam" lines_diff="2"/>
        </test>
        <!-- 30) testing tag filtering -->
        <test>
            <param name="input" value="in_test_30.bam" ftype="bam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="filter_config">
                    <param name="tag" value="XS:-18" />
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="bam" />
                    </conditional>
                </conditional>
            </conditional>
            <assert_command>
                <has_text text="--tag 'XS:-18'"/>
            </assert_command>
            <output name="outputsam" file="test_30.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- 31) testing readname filtering -->
        <test>
            <param name="input" value="in_test_30.bam" ftype="bam" />
            <conditional name="mode">
                <param name="outtype" value="selected_reads" />
                <section name="filter_config">
                    <param name="qname_file" value="readnames.txt" />
                </section>
                <conditional name="output_options">
                    <conditional name="output_format">
                        <param name="oformat" value="bam" />
                    </conditional>
                </conditional>
            </conditional>
            <assert_command>
                <has_text text="--qname-file"/>
            </assert_command>
            <output name="outputsam" file="test_31.bam" ftype="bam" lines_diff="2" />
        </test>
    </tests>
    <help>
**What it does**

Samtools view can:

1. convert between alignment formats (SAM, BAM, CRAM)
2. filter and subsample alignments according to user-specified criteria
3. count the reads in the input dataset or those retained after filtering
   and subsampling
4. obtain just the header of the input in any supported format

In addition, the tool has (limited) options to modify read records during conversion and/or filtering by:

- stripping them of user-specified tags
- collapsing backward CIGAR operations if they are specified in their CIGAR
  fields

With default settings, the tool generates a BAM dataset with the header and
reads found in the input dataset (which can be in SAM, BAM, or CRAM format).

**Alignment format conversion**

By changing the *Output format* it is possible to convert an input dataset to
another format.
Inputs of type SAM, BAM, and CRAM are accepted and can be converted to each of these formats (alternatively alignment counts can be computed) by selecting the appropriate "Output type".

.. class:: infomark

The tool allows you to specify a reference sequence. This is required for SAM input with missing @SQ headers (which include sequence names, length, md5, etc) and useful (and sometimes necessary) for CRAM input and output. In the following the use of the reference sequence in the CRAM format is detailed.
CRAM is (primarily) a reference-based compressed format, i.e. only sequence differences between aligned reads and the reference are stored. As a consequence, the reference that was used during read mapping is needed in order to interpret the alignment records (a checksum stored in the CRAM file is used to verify that only the correct reference sequence can be used). This allows for more space-efficient storage than with BAM format, but such a CRAM file is not usable without its reference.
It is also possible, however, to use CRAM without a reference with the disadvantage that the reference sequence gets stored then explicitely (as in SAM and BAM).

The Galaxy tool **currently generates only CRAM without reference sequence**.

For reference based CRAM input the correct refernce sequence needs to be specified.

**Filtering alignments**

If you ask for *A filtered/subsampled selection of reads*, the tool will allow
you to specify filter conditions and/or to choose a subsampling strategy, and
the output will contain one of the following depending on your choice under
*What would you like to have reported?*:

- All reads retained after filtering and subsampling
- Reads dropped during filtering and subsampling

If instead you want to *split* the input reads based on your criteria and
obtain *two* datasets, one with the retained and one with the dropped reads, check
the *Produce extra dataset with dropped/retained reads?* option.


**Filtering by regions**

You may specify one or more space-separated region specifications after the input filename to restrict output to only those alignments which overlap the specified region(s). Use of region specifications requires a coordinate-sorted and indexed input file (in BAM or CRAM format).

Regions can be specified as: RNAME[:STARTPOS[-ENDPOS]] and all position coordinates are 1-based.

.. class:: Warning mark

When multiple regions are given, some alignments may be output multiple times if they overlap more than one of the specified regions.

Examples of region specifications:

``chr1``
   Output all alignments mapped to the reference sequence named 'chr1' (i.e. @SQ SN:chr1).

``chr2:1000000``
   The region on chr2 beginning at base position 1,000,000 and ending at the end of the chromosome.

``chr3:1000-2000``
   The 1001bp region on chr3 beginning at base position 1,000 and ending at base position 2,000 (including both end positions).

``*``
   Output the unmapped reads at the end of the file. (This does not include any unmapped reads placed on a reference sequence alongside their mapped mates.)

``.``
   Output all alignments. (Mostly unnecessary as not specifying a region at all has the same effect.)

**Filtering by quality**

This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignment. Note that aligners do not follow a consistent definition.

## Filtering by Tag **

This filter allows to select reads based on tool or user specific tags, e.g., XS:i:-18 the alignment score tag of bowtie.
Thus to filter for a specific value of the tag you need the format STR1:STR2, e.g., XS:-18 to filter reads with an aligment score of -18.
You can also just write STR1 without the value STR2 hence the filter selects all reads with the tag STR1, e.g., XS.

    </help>
    <expand macro="citations"/>
</tool>