view artbio_bam_cleaning.xml @ 6:999c2b871f36 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/artbio_bam_cleaning commit 587b99e907726b20f0cdd978be8f9ed257a68243"
author artbio
date Wed, 07 Apr 2021 01:31:51 +0000
parents ebdaf5b3d6a7
children 745f529127b8
line wrap: on
line source

<tool id="artbio_bam_cleaning" name="ARTbio bam cleaning" version="1.6+galaxy6">
    <description>
        on flags and PCR Duplicates and MD recalibration
    </description>
    <macros>
        <import>macro.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="1.6">samtools</requirement>
        <requirement type="package" version="0.7.1">sambamba</requirement>
        <requirement type="package" version="1.3.2">freebayes</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" level="fatal" description="Error occured" />
    </stdio>
    <command detect_errors="exit_code"><![CDATA[
    @pipefail@
    @set_fasta_index@
    #set input_base = 'input'   
    ln -f -s $input_bam.metadata.bam_index input.bam.bai &&
    ln -s $input_bam input.bam &&
    sambamba view -h -t \${GALAXY_SLOTS:-2} --filter='mapping_quality >= 1 and not(unmapped) and not(mate_is_unmapped)' -f 'bam' $input_base".bam"
    #if $skip_rmdup == 'no':
        | samtools rmdup -s - - | tee $input_base".filt1.dedup.bam"
    #end if
    | bamleftalign --fasta-reference reference.fa -c --max-iterations "5" -
    | samtools calmd  -C 50 -b -@ \${GALAXY_SLOTS:-2} - reference.fa
    #if $skip_laststep == 'yes':
        > $calmd
    #else if $skip_laststep == 'no':
        | tee $calmd
        | sambamba view -h -t \${GALAXY_SLOTS:-2} --filter='mapping_quality <= 254' -f 'bam' /dev/stdin > $fullfilter
    #end if
    ]]></command>
    <inputs>
        <expand macro="reference_source_conditional" />
        <param name="input_bam" type="data" format="bam" label="BAM or SAM file to process"/>
        <param name="skip_rmdup" type="select" label="skip remove pcr duplicate step ?" display="radio"
               help="useful if duplicates are already marked by other tools">
            <option value="no" selected="true">No</option>
            <option value="yes">Yes</option>
        </param>
        <param name="skip_laststep" type="select" label="skip last samtool view filter ?" display="radio"
               help="Only generate the calMD output">
            <option value="no" selected="true">No</option>
            <option value="yes">Yes</option>
        </param>
    </inputs>
    <outputs>
        <data name="calmd" format="bam" label="CalMD filter (for lumpy-smoove)" />
        <data name="fullfilter" format="bam" label="Full filtering (for somatic-varscan)">
            <filter>skip_laststep == "no"</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input_bam" value="match_chr21_DBA_974.bam" ftype="bam" />
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" value="chr21.fa" />
            <output name="calmd" file="calmd.bam" ftype="bam" />
            <output name="fullfilter" file="full.bam" ftype="bam" />
        </test>
        <test>
            <param name="input_bam" value="match_chr21_DBA_974.bam" ftype="bam" />
            <param name="reference_source_selector" value="history" />
            <param name="skip_rmdup" value="yes" />
            <param name="ref_file" value="chr21.fa" />
            <output name="calmd" file="normdup_calmd.bam" ftype="bam" />
            <output name="fullfilter" file="normdup_full.bam" ftype="bam" />
        </test>
    </tests>
    <help>
ARTbio bam cleaning overview
============================

This tool is wrapping several cleaning steps to produce bam files suitable for subsequent
analyses with lumpy-smoove (or other large structural variation callers) or with
somatic-varscan (or small structural variation callers)


Workflow 
=============

The tool is using the following command line for filtering:

::

    sambamba view -h -t 8 --filter='mapping_quality >= 1 and not(unmapped) and not(mate_is_unmapped)' -f 'bam' $input_base".bam"
    &#124; samtools rmdup - -
    &#124;tee $input_base".filt1.dedup.bam" &#124; bamleftalign --fasta-reference reference.fa -c --max-iterations "5" -
    &#124; samtools calmd  -C 50 -b -@ 4 - reference.fa &gt; $input_base".filt1.dedup.bamleft.calmd.bam" ;
    sambamba view -h -t 8 --filter='mapping_quality &lt;&#61; 254' -f 'bam' -o $input_base".filt1.dedup.bamleft.calmd.filt2.bam" $input_base".filt1.dedup.bamleft.calmd.bam"
    
Purpose
--------

This "workflow" tool was generated in order to limit the number of ``python metadata/set.py`` jobs
which occur at each step of standard galaxy workflows. Indeed, these jobs are poorly optimized and may last considerable
amounts of time when datasets are large, at each step, lowering the overall performance of the workflow.

    </help>
    <citations>
        <citation type="doi">10.1371/journal.pone.0168397</citation>
    </citations>
</tool>