view samtools_markdup.xml @ 8:a389f74c3630 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_markdup commit cd62639660bef041ba14ecff337fb98e84e75d8a
author iuc
date Mon, 20 Nov 2023 22:15:54 +0000
parents ce32171c6d44
children
line wrap: on
line source

<tool id="samtools_markdup" name="Samtools markdup" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" >
    <description>marks duplicate alignments</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command><![CDATA[
@ADDTHREADS@
## coordinate sort input
#if not $bamfile.is_of_type('bam'):
    samtools sort
    -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T "\${TMPDIR:-.}"
    -O sam
    -o coordsort.sam
    '$bamfile' &&
#else:
    ln -s '$bamfile' coordsort.sam &&
#end if

## copy fasta reference if needed
## because samtools will try to write a .fai file next to it
#if $output_options.output_format.select_oformat == "CRAM"
    ln -s '$output_options.output_format.ref_file' ref_file.fa &&
#end if

samtools markdup

-@ \$addthreads
#if str($maxlen) != '':
    -l $maxlen
#end if
$remove
$supp
#if $odist:
    -d $odist
#end if
$existing_tags
-m $mode
$include_fails
#if $output_options.stats == 'yes'
    -s
    -f '$stats_output'
#end if
-O $output_options.output_format.select_oformat
#if $output_options.output_format.select_oformat == "CRAM"
    --reference ref_file.fa
#end if
coordsort.sam
'$output'
    ]]></command>
    <inputs>
        <param name="bamfile" type="data" format="sam,unsorted.bam,cram" optional="false" label="Alignment" />
        <param name="remove" type="boolean" argument="-r" truevalue="-r" falsevalue="" label="Remove duplicate reads" />
        <param name="supp" type="boolean" argument="-S" truevalue="-S" falsevalue="" label="Mark supplementary reads of duplicates as duplicates" />
        <param name="existing_tags" type="boolean" argument="-c" truevalue="-c" falsevalue="" label="Clear previous duplicate settings and tags." />
        <param name="maxlen" type="integer" optional="true" argument="-l" min="0" label="Expected maximum read length of INT bases. (default 300, min=0)"/>
        <param name="odist" type="integer" optional="true" argument="-d" min="1" label="Optical distance (if set, marks with dt tag, min=1)"/>
        <param argument="--mode" type="select" label="Duplicate decision method for paired reads.">
            <option selected="true" value="t">(t) measure positions based on template start/end.</option>
            <option value="s">(s) measure positions based on sequence start.  </option>
        </param>
        <param argument="--include-fails" type="boolean" truevalue="--include-fails" falsevalue="" label="Include quality check failed reads." />
        <section name="output_options" title="Output Options" expanded="true">
            <param name="stats" type="select" argument="-s" label="Print basic statistics">
                <option value="yes">Yes</option>
                <option value="no" selected="True">No</option>
            </param>
            <conditional name="output_format">
                <param name="select_oformat" type="select" label="Output format" help="Specify output format">
                    <option value="SAM">SAM</option>
                    <option value="BAM" selected="True">BAM</option>
                    <option value="CRAM">CRAM</option>
                </param>
                <when value="SAM" />
                <when value="BAM" />
                <when value="CRAM">
                    <param name="ref_file" type="data" format="fasta" label="Reference FASTA file" />
                </when>
            </conditional>
        </section>
    </inputs>
    <outputs>
        <!-- output bam, if input was name sorted then restore this sorting order -->
        <data name="output" format="bam" from_work_dir="outfile" label="${tool.name} on ${on_string}">
            <change_format>
                <when input="output_options.output_format.select_oformat" value="SAM" format="sam" />
                <when input="output_options.output_format.select_oformat" value="BAM" format="bam" />
                <when input="output_options.output_format.select_oformat" value="CRAM" format="cram" />
            </change_format>
        </data>
        <data name="stats_output" format="txt" label="${tool.name} on ${on_string}: statistics">
            <filter>(output_options['stats'] == 'yes')</filter>
        </data>
    </outputs>
    <tests>
        <!-- 1) -->
        <test expect_num_outputs="1">
            <param name="bamfile" value="1_markdup.sam" ftype="sam" />
            <output name="output" file="1_markdup.expected.bam" ftype="bam" lines_diff="4" />
        </test>
        <!-- 2) -->
        <test expect_num_outputs="1">
            <param name="bamfile" value="2_remove_dups.sam" ftype="sam"  />
            <param name="remove" value="-r" />
            <output name="output" file="2_remove_dups.expected.bam" ftype="bam" lines_diff="4" />
        </test>
        <!-- 3) -->
        <test expect_num_outputs="1">
            <param name="bamfile" value="3_mark_supp_dup.bam" ftype="bam" />
            <param name="supp" value="-S" />
            <output name="output" file="3_mark_supp_dup.expected.bam" ftype="bam" lines_diff="4" />
            <assert_command>
                <has_text text="samtools sort" negate="true"/>
            </assert_command>
        </test>
        <!-- 4) test stats output -->
        <test expect_num_outputs="2">
            <param name="bamfile" value="1_markdup.sam" ftype="sam"/>
            <param name="stats" value="yes" />
            <output name="output" file="1_markdup.expected.bam" ftype="bam" lines_diff="4" />
            <output name="stats_output" file="stats.txt" lines_diff="2" />
        </test>
        <!-- 5) check that stderr is not swallowed w test data from fixmate  -->
        <test expect_exit_code="1" expect_failure="true">
            <param name="bamfile" value="3_two_read_mapped.sam" ftype="sam" />
            <param name="stats" value="yes"/>
            <assert_stderr>
                <has_line line="[markdup] error: no MC tag. Please run samtools fixmate on file first."/>
            </assert_stderr>
        </test>
        <!-- 6) check optical distance and check -c option -->
        <test expect_num_outputs="1">
            <param name="bamfile" value="1_markdup.sam" ftype="sam"/>
            <param name="odist" value="10" />
            <param name="existing_tags" value="-c" />
            <output name="output" file="6_markdup.expected.bam" ftype="bam" lines_diff="4" />
        </test>
        <!-- 7) check new mode s -->
        <test expect_num_outputs="1">
            <param name="bamfile" value="1_markdup.sam" ftype="sam"/>
            <param name="mode" value="s" />
            <output name="output" file="7_markdup.expected.bam" ftype="bam" lines_diff="4" />
        </test>
        <!-- 8) check include-fails -->
        <test expect_num_outputs="1">
            <param name="bamfile" value="1_markdup.sam" ftype="sam"/>
            <param name="include_fails" value="true" />
            <output name="output" file="8_markdup.expected.bam" ftype="bam" lines_diff="4" />
        </test>
        <!-- 9) test sam format -->
        <test expect_num_outputs="1">
            <param name="bamfile" value="1_markdup.sam" ftype="sam"/>
            <param name="select_oformat" value="SAM" />
            <output name="output" file="9_markdup.expected.sam" ftype="sam" lines_diff="4" />
        </test>
        <!-- 10) essentially the same as test 9 (just converted input to sorted bam .. but telling Galaxy its qname sorted)
                to test qname sorted bam format and ensure that sorting happens in the tool
                ie. the qname_sorted bam is not converted implicitly -->
        <test expect_num_outputs="1">
            <param name="bamfile" value="1_markdup.qname_sorted.bam" ftype="qname_sorted.bam" />
            <param name="select_oformat" value="SAM" />
            <output name="output" file="9_markdup.expected.sam" lines_diff="4" />
            <assert_command>
                <has_text text="samtools sort"/>
            </assert_command>
        </test>

        <!-- 11) test cram format -->
        <test expect_num_outputs="1">
            <param name="bamfile" value="10_markdup.sam" ftype="sam"/>
            <param name="select_oformat" value="CRAM" />
            <param name="ref_file" value="test.fa" />
            <output name="output" file="11_markdup.expected.cram" ftype="cram" compare="sim_size" delta="250"/>
            <assert_command>
                <has_text text="samtools sort"/>
            </assert_command>
        </test>
    </tests>
    <help>
Mark duplicate alignments from a coordinate sorted file that has been run through fixmate with the -m option. This program relies on the MC and ms tags that fixmate provides.

Note: The Galaxy tool sorts the data automatically if the input is SAM or query name sorted.
The output is BAM (which is query name sorted again if the input is).

The optional basic statistics output of samtools markdup can be visualized with MultiQC.

	</help>
    <expand macro="citations"/>
</tool>