view bcftools_concat.xml @ 22:74303445f6bd draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 868f4386992de55d25da197660a43a913a09c8df
author iuc
date Wed, 14 Aug 2024 16:40:38 +0000
parents 03e0645395d2
children
line wrap: on
line source

<?xml version='1.0' encoding='utf-8'?>
<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Concatenate or combine VCF/BCF files</description>
    <macros>
        <token name="@EXECUTABLE@">concat</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools" />
    <expand macro="requirements" />
    <expand macro="version_command" />
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_ENV@
@PREPARE_INPUT_FILES@
#set $section = $sec_restrict
@PREPARE_REGIONS_FILE@

bcftools @EXECUTABLE@

## Default section
#set $section = $sec_default

#if $section.mode.naive == "yes":
    --naive
#else:
    #if $section.mode.overlaps.allow_overlaps == 'yes':
        --allow-overlaps
        #if $section.mode.overlaps.rm_dups:
            --rm-dups $section.mode.overlaps.rm_dups
        #end if
    #end if
    ${section.mode.ligate}
    #if $section.mode.ligate_mode
        $section.mode.ligate_mode
    #end if
#end if
${section.compact_PS}
#if str($section.min_PQ):
    --min-PQ ${section.min_PQ}
#end if

#set $section = $sec_restrict
@REGIONS@

@OUTPUT_TYPE@
@THREADS@

## Primary Input/Outputs
@INPUT_FILES@
#if $sec_default.mode.naive == "yes" and $output_type == 'v':
    > output.gz && bcftools index output.gz && bcftools view -O v -o '$output_file' output.gz
#else:
    > '$output_file'
#end if
    ]]></command>
    <inputs>
        <expand macro="macro_inputs" />
        <section name="sec_restrict" expanded="false" title="Restrict to">
            <expand macro="macro_restrict" />
        </section>
        <section name="sec_default" expanded="true" title="Concat Options">
            <conditional name="mode">
                <param argument="--naive" type="select" label="Naive concat">
                    <help><![CDATA[
--naive concatenates VCF or BCF files without recompression. This can be used used to combine results that were generated separately for each chromosome.  This is very fast but requires that all files are of the same type (all VCF or all BCF) and have the same headers. This is because all tags and chromosome names in the BCF body rely on the implicit order of the contig and tag definitions in the header. Currently no sanity checks are in place. Dangerous, use with caution.
                    ]]></help>
                    <option value="no">No </option>
                    <option value="yes">Yes </option>
                </param>
                <when value="yes"/>
                <when value="no">
                    <conditional name="overlaps">
                        <param name="allow_overlaps" type="select" label="Allow Overlaps">
                            <help>
                            First coordinate of the next file can precede last record of the current file.
                            </help>
                            <option value="yes">Yes </option>
                            <option value="no">No </option>
                        </param>
                        <when value="yes">
                            <param name="rm_dups" type="select" label="Remove duplicate" optional="true">
                                <help><![CDATA[
                                 Output duplicate records present in multiple files only once:
                                   rm-dups <snps|indels|both|all|none>
                                ]]></help>
                                <option value="snps">snps - SNP records</option>
                                <option value="indels">indels - indel records</option>
                                <option value="both">both - both SNP and indel records</option>
                                <option value="all">all - records</option>
                                <option value="none">none - output multiple records instead</option>
                            </param>
                        </when>
                        <when value="no"/>
                    </conditional>
                    <param name="ligate" type="boolean" truevalue="--ligate" falsevalue="" label="Ligate"
                           help="Ligate phased VCFs by matching phase at overlapping haplotypes" />
                    <param name="ligate_mode" type="select" optional="true" label="Ligate mode" help="It allows finer control of --ligate behavior">
                        <option value="--ligate-warn">Ligate warn: Drop sites that are present in one chunk but absent in the other (--ligate-warn)</option>
                        <option value="--ligate-force">Ligate force: Kepp sites that are present only in one chunk (--ligate-force)</option>
                    </param>
                </when>
            </conditional>
            <param name="compact_PS" type="boolean" truevalue="--compact-PS" falsevalue="" label="Compact Ps"
                   help="Do not output PS tag at each site, only at the start of a new phase set block." />
            <param name="min_PQ" type="integer" label="Min Pq" value="30" optional="true"
                   help="Break phase set if phasing quality is lower than &lt;int&gt;" />
        </section>
        <expand macro="macro_select_output_type" />
    </inputs>
    <outputs>
        <expand macro="macro_vcf_output"/>
    </outputs>
    <tests>
        <test>
            <param name="input_files" ftype="vcf" value="concat.1.b.vcf,concat.1.a.vcf" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text_matching expression="1\t100"/>
                    <has_text_matching expression="3\t192"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_files" ftype="vcf" value="concat.1.b.vcf,concat.1.a.vcf" />
            <param name="naive" value="no" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text_matching expression="1\t100"/>
                    <has_text_matching expression="3\t192"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_files" ftype="vcf" value="concat.2.b.vcf,concat.2.a.vcf" />
            <param name="allow_overlaps" value="yes" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text_matching expression="2\t140\t.\tA\tG"/>
                    <has_text_matching expression="2\t140\t.\tGT\tG"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_files" ftype="vcf" value="concat.2.b.vcf,concat.2.a.vcf" />
            <param name="allow_overlaps" value="yes" />
            <param name="rm_dups" value="none" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text_matching expression="2\t160\t.\tTAAAA"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test region overlap-->
        <test>
            <param name="input_files" ftype="vcf" value="concat.1.b.vcf,concat.1.a.vcf" />
            <param name="output_type" value="v" />
            <section name="sec_restrict">
                <param name="regions_overlap" value="1"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text_matching expression="1\t100"/>
                    <has_text_matching expression="3\t192"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--regions-overlap" />
            </assert_command>
        </test>
        <!-- Test ligate options -->
        <test>
            <param name="input_files" ftype="vcf" value="concat.1.b.vcf,concat.1.a.vcf" />
            <param name="output_type" value="v" />
            <section name="sec_default">
                <conditional name="mode">
                    <conditional name="overlaps">
                        <param name="allow_overlaps" value="no"/>
                    </conditional>
                    <param name="ligate" value="true"/>
                    <param name="ligate_mode" value="--ligate-warn"/>
                </conditional>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text_matching expression="1\t100"/>
                    <has_text_matching expression="3\t192"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--ligate-warn" />
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
=====================================
 bcftools @EXECUTABLE@ plugin
=====================================

Concatenate or combine VCF/BCF files. All source files must have the same sample columns appearing in the same order. The program can be used, for example, to concatenate chromosome VCFs into one VCF, or combine a SNP VCF and an indel VCF into one. The input files must be sorted by chr and position. The files must be given in the correct order to produce sorted VCF on output unless the -a, --allow-overlaps option is specified.


Naive concatenation is useful when using a galaxy workflow that splits a BAM file by chromosome, processes each in parallel, then bcftools concat merges the results into a single VCF file:

BAM -> bamtools split => bcftools mpileup => bcftools call => bcftools concat -> VCF


@REGIONS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_WIKI@
    ]]></help>
    <expand macro="citations" />
</tool>