view samtools_markdup.xml @ 2:a312a0fdaf31 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_markdup commit e33502c1fc025859ccbcd2a273f837fac9d322e0"
author iuc
date Thu, 22 Apr 2021 12:58:55 +0000
parents 83b8e36e9cbe
children d0a568e1904b
line wrap: on
line source

<tool id="samtools_markdup" name="Samtools markdup" version="@TOOL_VERSION@+galaxy3">
    <description>marks duplicate alignments</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command><![CDATA[
@ADDTHREADS@
## coordinate sort input 
#if not $bamfile.is_of_type('bam'):
    samtools sort
    -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T "\${TMPDIR:-.}"
    -O sam
    -o coordsort.sam
    '$bamfile' &&
#else:
    ln -s '$bamfile' coordsort.sam &&
#end if

samtools markdup 

-@ \$addthreads
#if str($maxlen) != '':
    -l $maxlen
#end if 
$remove 
$stats 
$supp
coordsort.sam 
'$output'
#if $stats
    2> >(tee -a '$stats_output' >&2)
#end if
    ]]></command>
    <inputs>
        <param name="bamfile" type="data" format="sam,bam,cram" optional="false" label="Alignment" />
        <param name="remove" type="boolean" argument="-r" truevalue="-r" falsevalue="" label="Remove duplicate reads" />
        <param name="maxlen" type="integer" optional="true" argument="-l" min="0" label="Expected maximum read length of INT bases. (default 300)"/>
        <param name="stats" type="boolean" argument="-s" truevalue="-s" falsevalue="" label="Print basic statistics" />
        <param name="supp" type="boolean" argument="-S" truevalue="-S" falsevalue="" label="Mark supplementary reads of duplicates as duplicates" />
    </inputs>
    <outputs>
        <!-- output bam, if input was name sorted then restore this sorting order -->
        <data name="output" format="bam"/>
        <data name="stats_output" format="txt" label="${tool.name} on ${on_string}: statistics">
            <filter>stats</filter>
        </data>
    </outputs>
    <tests>
        <!-- tests and data extracted from 
             https://github.com/samtools/samtools/blob/6d79411685d8f0fbb34e123f52d72b63271f4dcb/test/test.pl#L2616 
             TODO the 1st 4 tests are negative, I do not know how to test for the error code 
-->
<!--    test_cmd($opts, out=>'markdup/1_name_sort.expected.sam', err=>'1_name_sort.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/1_name_sort.sam -", expect_fail=>1);
    test_cmd($opts, out=>'markdup/2_bad_order.expected.sam', err=>'2_bad_order.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/2_bad_order.sam -", expect_fail=>1);
    test_cmd($opts, out=>'markdup/3_missing_mc.expected.sam', err=>'3_missing_mc.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/3_missing_mc.sam -", expect_fail=>1);
    test_cmd($opts, out=>'markdup/4_missing_ms.expected.sam', err=>'4_missing_ms.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/4_missing_ms.sam -", expect_fail=>1);-->
        <!--    test_cmd($opts, out=>'markdup/5_markdup.expected.sam', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/5_markdup.sam -");-->
        <test expect_num_outputs="1">
            <param name="bamfile" value="5_markdup.sam" />
            <output name="output" file="5_markdup.expected.bam" />
        </test>
        <!--    test_cmd($opts, out=>'markdup/6_remove_dups.expected.sam', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam -r $$opts{path}/markdup/6_remove_dups.sam -");-->
        <test expect_num_outputs="1">
            <param name="bamfile" value="6_remove_dups.sam" />
            <param name="remove" value="-r" />
            <output name="output" file="6_remove_dups.expected.bam" />
        </test>
        <!-- test_cmd($opts, out=>'markdup/7_mark_supp_dup.expected.sam', cmd=>"$$opts{bin}/samtools markdup${threads} -S -O sam $$opts{path}/markdup/7_mark_supp_dup.sam -");-->
        <test expect_num_outputs="1">
            <param name="bamfile" value="7_mark_supp_dup.bam" />
            <param name="supp" value="-S" />
            <output name="output" file="7_mark_supp_dup.expected.bam" />
        </test>
        <!-- test stats output -->
        <test expect_num_outputs="2">
            <param name="bamfile" value="5_markdup.sam" />
            <param name="stats" value="-s" />
            <output name="output" file="5_markdup.expected.bam" />
            <output name="stats_output" file="stats.txt" />
        </test>
        <!-- check that stderr is not swallowed w test data from fixmate  -->
        <test expect_num_outputs="2" expect_exit_code="1" expect_failure="true">
            <param name="bamfile" value="7_two_read_mapped.sam" />
            <param name="stats" value="true"/>
            <!-- for some reason this is not possible at the moment
            <output name="stats_output">
                <assert_contents>
                    <has_line line="[markdup] error: no MC tag. Please run samtools fixmate on file first."/>
                </assert_contents>
            </output> -->
            <assert_stderr>
                <has_line line="[markdup] error: no MC tag. Please run samtools fixmate on file first."/>
            </assert_stderr>
        </test>
    </tests>
    <help>
Mark duplicate alignments from a coordinate sorted file that has been run through fixmate with the -m option. This program relies on the MC and ms tags that fixmate provides. 

Note: The Galaxy tool sorts the data automatically if the input is SAM or query name sorted. 
The output is BAM (which is query name sorted again if the input is).

The optional basic statistics output of samtools markdup can be visualized with MultiQC.

	</help>
    <expand macro="citations"/>
</tool>