view samtools_mpileup.xml @ 13:9da0311ab1dd draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_mpileup commit 1354b1358df89a922c3243f7e27ce789094b8644"
author iuc
date Sun, 19 Dec 2021 15:57:35 +0000
parents 329deb17a9f7
children b61f5d6a7f25
line wrap: on
line source

<tool id="samtools_mpileup" name="Samtools mpileup" version="2.1.6" profile="@PROFILE@">
    <description>multi-way pileup of variants</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <expand macro="version_command" />
    <command><![CDATA[

    #set $input_bams = $input
    @PREPARE_IDX_MULTIPLE@
    @PREPARE_FASTA_IDX@

    samtools mpileup
        -f "\$reffa"
    #for $i in range(len( $input_bams )):
        '${i}'
    #end for

    #if str( $advanced_options.advanced_options_selector ) == "advanced":
        #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter":
            #set $filter = $advanced_options.filter_by_flags.require_flags
            @FLAGS@
            --rf $flags
            #set $filter = $advanced_options.filter_by_flags.exclude_flags
            @FLAGS@
            --ff $flags
        #end if
        #if str($advanced_options.limit_by_region.limit_by_regions) == "limit":
            #if str( $advanced_options.limit_by_region.region_paste ) != "None":
                -r '$advanced_options.limit_by_region.region_paste'
            #end if
            #if str( $advanced_options.limit_by_region.bed_regions ) != "None"
                -l '$advanced_options.limit_by_region.bed_regions'
            #end if
        #end if

        #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
            -G '$$advanced_options.exclude_read_group.group_paste'
        #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history"
            -G '$advanced_options.exclude_read_group.read_groups'
        #end if
        ${advanced_options.ignore_overlaps}
        ${advanced_options.skip_anomalous_read_pairs}
        ${advanced_options.disable_probabilistic_realignment}
        -C ${advanced_options.coefficient_for_downgrading}
        -d ${advanced_options.max_reads_per_bam}
        ${advanced_options.extended_BAQ_computation}
        -q ${advanced_options.minimum_mapping_quality}
        -Q ${advanced_options.minimum_base_quality}
        $advanced_options.qualities_illumina_onethree
    #end if
    #if str( $output_options_cond.output_options_selector ) == 'advanced':
        ${output_options_cond.base_position_on_reads}
        ${output_options_cond.output_mapping_quality}
        ${output_options_cond.output_read_names}
        ${output_options_cond.output_all_pos}
        #if $output_options_cond.output_tags:
            --output-extra '$output_options_cond.output_tags'
        #end if
    #end if
    --output '$output_file_pu'
    ]]></command>
    <inputs>
        <param name="input" type="data" format="bam" multiple="true" min="1" label="BAM file(s)">
            <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" />
        </param>
        <expand macro="mandatory_reference" argument="-f"/>
        <conditional name="advanced_options">
            <param name="advanced_options_selector" type="select" label="Set advanced options">
                <option selected="true" value="default">Basic</option>
                <option value="advanced">Advanced</option>
            </param>
            <when value="default" />
            <when value="advanced">
                <conditional name="filter_by_flags">
                    <param name="filter_flags" type="select" label="Set filter by flags">
                        <option selected="true" value="nofilter">Do not filter</option>
                        <option value="filter">Filter by flags to exclude or require</option>
                    </param>
                    <when value="filter">
                        <param name="require_flags" argument="--rf/--incl-flags" type="select" multiple="true" display="checkboxes" label="Require">
                            <expand  macro="flag_options" />
                        </param>
                        <param name="exclude_flags" argument="--ff/--excl-flags" type="select" multiple="true" display="checkboxes" label="Exclude">
                            <expand  macro="flag_options" s4="true" s256="true" s512="true" s1024="true"/>
                        </param>
                    </when>
                    <when value="nofilter" />
                </conditional>
                <conditional name="limit_by_region">
                    <param name="limit_by_regions" type="select" label="Select regions to call">
                        <option selected="true" value="no_limit">Do not limit</option>
                        <option value="limit">Specify regions</option>
                    </param>
                    <when value="limit">
                        <param name="bed_regions" argument="-l/--positions" type="data" format="bed" optional="true" label="skip unlisted positions (chr pos) or regions">
                            <validator type="dataset_ok_validator" />
                        </param>
                        <param name="region_paste" argument="-r/--region" type="text" optional="true" label="region in which pileup is generated" help="Format CHR:FROM-TO, e.g. 17:100-150. If used in conjunction with -l then considers the intersection of the two requests." />
                    </when>
                    <when value="no_limit" />
                </conditional>


                <conditional name="exclude_read_group">
                    <param name="exclude_read_groups" argument="-R/--exclude-RG" type="select" label="Select read groups to exclude">
                        <option selected="true" value="no_limit">Do not exclude</option>
                        <option value="history">From a text file</option>
                        <option value="paste">Paste a list of read groups</option>
                    </param>
                    <when value="history">
                        <param name="read_groups" type="data" format="txt" label="Text file">
                            <validator type="dataset_ok_validator" />
                        </param>
                    </when>
                    <when value="paste">
                        <param name="group_paste" type="text" area="true" size="10x35" label="Read groups" help="Paste a list of read groups" />
                    </when>
                    <when value="no_limit" />
                </conditional>
                <param name="ignore_overlaps" argument="-x/--ignore-overlaps" type="boolean" truevalue="-x" falsevalue="" checked="False" label="Disable read-pair overlap detection" />
                <param name="skip_anomalous_read_pairs" argument="-A/--count-orphans" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not discard anomalous read pairs" />
                <param name="disable_probabilistic_realignment" argument="-B/--no-BAQ" type="boolean" truevalue="-B" falsevalue="" checked="False" label="Disable BAQ (per-Base Alignment Quality), see below" />
                <param name="coefficient_for_downgrading" argument="-C/--adjust-MQ" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" help="Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50" />
                <param name="max_reads_per_bam" argument="-d/--max-depth" type="integer" min="0" value="8000" label="max per-file depth; avoids excessive memory usage" />
                <param name="extended_BAQ_computation" argument="-E/--redo-BAQ" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Recalculate BAQ on the fly" help="Ignore existing BQ tags" />
                <param name="minimum_mapping_quality" argument="-q/--min-MQ" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" />
                <param name="minimum_base_quality" argument="-Q/--min-BQ" type="integer" value="13" label="Minimum base quality for a base to be considered" />
                <param name="qualities_illumina_onethree" argument="-6/--illumina1.3+" type="boolean" truevalue="-6" falsevalue="" checked="False" label="quality is in the Illumina-1.3+ encoding"/>
            </when>
        </conditional>
        <conditional name="output_options_cond">
            <param name="output_options_selector" type="select" label="Output options">
                <option selected="true" value="default">Default</option>
                <option value="advanced">Advanced</option>
            </param>
            <when value="default"/>
            <when value="advanced">
                <param name="base_position_on_reads" argument="-O/--output-BP" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Output base positions on reads" />
                <param name="output_mapping_quality" argument="-s/--output-MQ" type="boolean" truevalue="-s" falsevalue="" checked="False" label="Output mapping quality" />
                <param name="output_read_names" argument="--output-QNAME" type="boolean" truevalue="--output-QNAME" falsevalue="" checked="False" label="Output an extra column containing comma-separated read names. (--output-QNAME)" />
                <param name="output_all_pos" argument="-a" type="select" label="Output absolutely all positions" help="Output all positions, including those with zero depth. (-a) Output absolutely all positions, including unused reference sequences (-aa). Note that when used in conjunction with a BED file the -a option may sometimes operate as if -aa was specified if the reference sequence has coverage outside of the region specified in the BED file.">
                    <option selected="true" value="">No</option>
                    <option value="-a">all (including those with zero depth)</option>
                    <option value="-aa">absolutely all (including unused reference sequences)</option>
                </param>
                <param name="output_tags" type="text" argument="--output-extra" label="Add tags to output" help="Output extra read fields and read tag values, e.g., NM and XS (must be comma seperated, e.g., NM,XS)" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output_file_pu" format="pileup" label="${tool.name} on ${on_string}"/>
    </outputs>
    <tests>
        <!-- samtools test https://github.com/samtools/samtools/blob/4651d25f2b14cd68ffb0915a74b0c1b529b8cfa1/test/test.pl#L757 -->
        <test>
            <param name="input" ftype="bam" value="mpileup.1.bam,mpileup.2.bam,mpileup.3.bam" />
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" ftype="fasta" value="mpileup.ref.fa" />
            </conditional>
            <conditional name="advanced_options">
                <param name="advanced_options_selector" value="advanced" />
                <conditional name="limit_by_region">
                    <param name="limit_by_regions" value="limit"/>
                    <param name="region_paste" value="17:100-150" />
                </conditional>
            </conditional>
            <output name="output_file_pu" file="mpileup.out.1" ftype="pileup" />
        </test>
<!-- test_cmd($opts,out=>'dat/mpileup.out.1',err=>'dat/mpileup.err.1',cmd=>"$$opts{bin}/samtools mpileup -b $$opts{tmp}/mpileup.bam.list -f $$opts{tmp}/mpileup.ref.fa.gz -r17:100-150");-->
        <test>
            <param name="input" ftype="bam" value="mpileup.1.bam" />
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" ftype="fasta" value="mpileup.ref.fa" />
            </conditional>
            <conditional name="advanced_options">
                <param name="advanced_options_selector" value="advanced" />
                <conditional name="filter_by_flags">
                    <param name="filter_flags" value="filter"/>
                    <param name="exclude_flags" value="4,16"/>
                </conditional>
                <conditional name="limit_by_region">
                    <param name="limit_by_regions" value="limit"/>
                    <param name="region_paste" value="17:1050-1060" />
                </conditional>
                <param name="disable_probabilistic_realignment" value="-B" />
            </conditional>
            <output name="output_file_pu" file="mpileup.out.3" ftype="pileup" />
        </test>
<!--    test_cmd($opts,out=>'dat/mpileup.out.3',cmd=>"$$opts{bin}/samtools mpileup -B \-\-ff 0x14 -f $$opts{tmp}/mpileup.ref.fa.gz -r17:1050-1060 $$opts{tmp}/mpileup.1.bam | grep -v mpileup");
    -->
        <!-- original test from galaxy tool-->
        <test>
            <param name="input" ftype="bam" value="samtools_mpileup_in_1.bam" />
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" ftype="fasta" value="phiX.fasta" />
            </conditional>
            <conditional name="output_options_cond">
                <param name="output_options_selector" value="advanced" />
                <param name="base_position_on_reads" value="true" />
                <param name="output_mapping_quality" value="true" />
            </conditional>
            <conditional name="advanced_options">
                <param name="advanced_options_selector" value="default" />
            </conditional>
            <output name="output_file_pu" file="samtools_mpileup_out_1.pileup" ftype="pileup" />
        </test>
        <test>
            <param name="input" ftype="bam" value="phiX.bam" />
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" ftype="fasta" value="phiX.fasta" />
            </conditional>
            <conditional name="output_options_cond">
                <param name="output_options_selector" value="default" />
            </conditional>
            <conditional name="advanced_options">
                <param name="advanced_options_selector" value="advanced" />
                <param name="skip_anomalous_read_pairs" value="-A" />
            </conditional>
            <output name="output_file_pu" file="samtools_mpileup_out_2.pileup" ftype="pileup" />
        </test>
        <test>
            <param name="input" ftype="bam" value="samtools_mpileup_in_1.bam" />
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" ftype="fasta" value="phiX.fasta" />
            </conditional>
            <conditional name="output_options_cond">
                <param name="output_options_selector" value="advanced" />
                <param name="base_position_on_reads" value="true" />
                <param name="output_mapping_quality" value="true" />
            </conditional>
            <conditional name="advanced_options">
                <param name="advanced_options_selector" value="advanced" />
                <param name="minimum_base_quality" value="0" /><!-- most reads have ultra low quality resuling in empty columns -->
            </conditional>
            <output name="output_file_pu" file="samtools_mpileup_out_3.pileup" ftype="pileup" />
        </test>
        <test>
            <param name="input" ftype="bam" value="mpileup.1.bam" />
            <conditional name="addref_cond">
                <param name="addref_select" value="history" />
                <param name="ref" ftype="fasta" value="mpileup.ref.fa" />
            </conditional>
            <conditional name="output_options_cond">
                <param name="output_options_selector" value="advanced" />
                <param name="output_tags" value="NM,AM" />
            </conditional>
            <output name="output_file_pu" file="mpileup.out.4" ftype="pileup" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

Generate pileup for one or multiple BAM files. Alignment records are grouped by sample (SM) identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample.

Generation of VCF and BCF output, is deprecated and not available in the Galaxy tool. Please use bcftools mpileup for this instead.

In the pileup format (without -u or -g), each line represents a genomic position, consisting of chromosome name, 1-based coordinate, reference base, the number of reads covering the site, read bases, base qualities and alignment mapping qualities. Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all encoded at the read base column. At this column, a dot stands for a match to the reference base on the forward strand, a comma for a match on the reverse strand, a '>' or '<' for a reference skip, 'ACGTN' for a mismatch on the forward strand and 'acgtn' for a mismatch on the reverse strand. A pattern '\\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this reference position and the next reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence. Similarly, a pattern '-[0-9]+[ACGTNacgtn]+' represents a deletion from the reference. The deleted bases will be presented as '*' in the following lines. Also at the read base column, a symbol '^' marks the start of a read. The ASCII of the character following '^' minus 33 gives the mapping quality. A symbol '$' marks the end of a read segment.

Note that there are two orthogonal ways to specify locations in the input file; via -r region and -l file. The former uses (and requires) an index to do random access while the latter streams through the file contents filtering out the specified regions, requiring no index. The two may be used in conjunction. For example a BED file containing locations of genes in chromosome 20 could be specified using -r 20 -l chr20.bed, meaning that the index is used to find chromosome 20 and then it is filtered for the regions listed in the bed file.

**BAQ (Base Alignment Quality)**

BAQ is the Phred-scaled probability of a read base being misaligned. It greatly helps to reduce false SNPs caused by misalignments. BAQ is calculated using the probabilistic realignment method described in the paper “Improving SNP discovery by base alignment quality”, Heng Li, Bioinformatics, Volume 27, Issue 8 <https://doi.org/10.1093/bioinformatics/btr076>

BAQ is turned on when a reference file is supplied using the -f option. To disable it, use the -B option.

It is possible to store pre-calculated BAQ values in a SAM BQ:Z tag. Samtools mpileup will use the precalculated values if it finds them. The -E option can be used to make it ignore the contents of the BQ:Z tag and force it to recalculate the BAQ scores by making a new alignment.
    ]]></help>
    <expand macro="citations" />
</tool>