view lumpy_preprocessing.xml @ 1:80adce82354c draft default tip

"planemo upload for repository https://github.com/arq5x/lumpy-sv commit 41cb10683d89fa20e122f4c60b8f5bf0e3c390d8"
author iuc
date Sat, 10 Jul 2021 07:40:25 +0000
parents 21f2361c32c9
children
line wrap: on
line source

<tool id="lumpy_prep" name="LUMPY preprocessing" version="@WRAPPER_VERSION@" profile="20.05">
    <description>extracts discordant read pairs and split-read alignments from a BAM dataset</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">lumpy-sv</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
set -o pipefail;

samtools collate -O --output-fmt sam '$bam_in' | samblaster $dup_handling --addMateTags | samtools view -bS - > blasted.bam &&

samtools view -b -F 1294 blasted.bam > discordants.unsorted.bam &&
samtools view -h blasted.bam | extractSplitReads_BwaMem -i stdin | samtools view -Sb - > splitters.unsorted.bam &&

samtools sort discordants.unsorted.bam > discordants.bam &&
samtools sort splitters.unsorted.bam > splitters.bam
    ]]></command>
    <inputs>
        <param name="bam_in" type="data" format="bam" label="BAM input dataset" />
        <param name="dup_handling" type="select"
        label="Duplicate detection and removal"
        help="Duplicate reads can be detected and removed with the help of samblaster. If you have already marked duplicates in the input dataset with another tool, you can choose to skip duplicate detection and remove only pre-marked duplicates. Not handling duplicates is only recommended if you have already deduplicated your input with other tools.">
            <option value="--removeDups">Detect and remove duplicates (samblaster -r)</option>
            <option value="--acceptDupMarks --removeDups">Remove duplicates marked in input data (samblaster -a -r)</option>
            <option value="">Don't handle duplicates</option>
        </param>
    </inputs>
    <outputs>
        <data name="blasted_bam" format="qname_sorted.bam" from_work_dir="blasted.bam" label="${tool.name}: Collated and groomed on ${on_string}" />
        <data name="discordants_bam" format="bam" from_work_dir="discordants.bam" label="${tool.name}: Discordant read pairs on ${on_string}" />
        <data name="splitters_bam" format="bam" from_work_dir="splitters.bam" label="${tool.name}: Split-read alignments on ${on_string}" />
    </outputs>
    <tests>
        <test>
            <param name="bam_in" ftype="bam" value="sample.bam" />
            <param name="dup_handling" value="" />
            <output name="blasted_bam" ftype="qname_sorted.bam" file="blasted.bam" lines_diff="2"/>
            <output name="discordants_bam" ftype="bam" file="discordants.bam" lines_diff="2"/>
            <output name="splitters_bam" ftype="bam" file="splitters.bam" lines_diff="2"/>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This tool implements the mapped reads preprocessing `suggested for LUMPY <https://github.com/arq5x/lumpy-sv#pre-processing>`__. It uses samblaster_, samtools and LUMPY's extractSplitReads_BwaMem script to collate and groom an input BAM dataset (preferably produced with bwa-mem) and to extract discordant read pairs and split-read alignments from it. The three output datasets can serve as the three inputs to LUMPY.

.. _samblaster: https://github.com/GregoryFaust/samblaster
    ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1093/bioinformatics/btu314</citation>
        <citation type="bibtex">
            @misc{SamTools_github,
            title={SAMTools GitHub page},
            url = {https://github.com/samtools/samtools},}
        </citation>
    </expand>
</tool>