view lumpy_preprocessing.xml @ 0:21f2361c32c9 draft

"planemo upload for repository https://github.com/arq5x/lumpy-sv commit cce17262b21b0964c31eb983bac5e89ae92b8ee9"
author iuc
date Thu, 12 Nov 2020 16:48:34 +0000
parents
children 80adce82354c
line wrap: on
line source

<tool id="lumpy_prep" name="LUMPY preprocessing" version="@WRAPPER_VERSION@">
    <description>extracts discordant read pairs and split-read alignments from a BAM dataset</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">lumpy-sv</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
samtools collate -O --output-fmt sam '$bam_in' | samblaster $dup_handling --addMateTags | samtools view -bS - > blasted.bam &&

samtools view -b -F 1294 blasted.bam > discordants.unsorted.bam &&
samtools view -h blasted.bam | extractSplitReads_BwaMem -i stdin | samtools view -Sb - > splitters.unsorted.bam &&

samtools sort discordants.unsorted.bam > discordants.bam &&
samtools sort splitters.unsorted.bam > splitters.bam
    ]]></command>
    <inputs>
        <param name="bam_in" type="data" format="bam" label="BAM input dataset" />
        <param name="dup_handling" type="select"
        label="Duplicate detection and removal"
        help="Duplicate reads can be detected and removed with the help of samblaster. If you have already marked duplicates in the input dataset with another tool, you can choose to skip duplicate detection and remove only pre-marked duplicates. Not handling duplicates is only recommended if you have already deduplicated your input with other tools.">
            <option value="--removeDups">Detect and remove duplicates (samblaster -r)</option>
            <option value="--acceptDupMarks --removeDups">Remove duplicates marked in input data (samblaster -a -r)</option>
            <option value="">Don't handle duplicates</option>
        </param>
    </inputs>
    <outputs>
        <data name="blasted_bam" format="qname_sorted.bam" from_work_dir="blasted.bam" label="${tool.name}: Collated and groomed on ${on_string}" />
        <data name="discordants_bam" format="bam" from_work_dir="discordants.bam" label="${tool.name}: Discordant read pairs on ${on_string}" />
        <data name="splitters_bam" format="bam" from_work_dir="splitters.bam" label="${tool.name}: Split-read alignments on ${on_string}" />
    </outputs>
    <tests>
        <test>
            <param name="bam_in" ftype="bam" value="sample.bam" />
            <param name="dup_handling" value="" />
            <output name="blasted_bam" ftype="qname_sorted.bam" file="blasted.bam" />
            <output name="discordants_bam" ftype="bam" file="discordants.bam" />
            <output name="splitters_bam" ftype="bam" file="splitters.bam" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This tool implements the mapped reads preprocessing `suggested for LUMPY <https://github.com/arq5x/lumpy-sv#pre-processing>`__. It uses samblaster_, samtools and LUMPY's extractSplitReads_BwaMem script to collate and groom an input BAM dataset (preferably produced with bwa-mem) and to extract discordant read pairs and split-read alignments from it. The three output datasets can serve as the three inputs to LUMPY.

.. _samblaster: https://github.com/GregoryFaust/samblaster
    ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1093/bioinformatics/btu314</citation>
        <citation type="bibtex">
            @misc{SamTools_github,
            title={SAMTools GitHub page},
            url = {https://github.com/samtools/samtools},}
        </citation>
    </expand>
</tool>