view sam_pileup.xml @ 4:a3b4ad6858ff draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam_pileup commit 8301d37348be25a038b3c63b049b1178d05f5003"
author devteam
date Thu, 06 Feb 2020 07:11:33 -0500
parents 3ff8935743a9
children
line wrap: on
line source

<tool id="sam_pileup" name="Generate pileup" version="1.1.3" profile="16.04">
    <description>from BAM dataset</description>
    <requirements>
        <requirement type="package" version="0.1.16">samtools</requirement>
    </requirements>
    <command><![CDATA[
ln -s '$input1' input1.bam &&
ln -s '${input1.metadata.bam_index}' 'input1.bam.bai' &&
#if $refOrHistory.reference == 'history':
    ln -s '$refOrHistory.ownFile' reference.fasta &&
    samtools faidx reference.fasta &&
#end if
samtools pileup
#if $lastCol == 'yes':
    -s
#end if
#if $indels == 'yes':
    -i
#end if
-M $mapCap
#if $c.consensus == 'yes':
    -c
    -T $c.theta
    -N $c.hapNum
    -r $c.fraction
    -I $c.phredProb
#end if
-f
#if $refOrHistory.reference == 'indexed':
    '${refOrHistory.index.fields.path}'
#else:
    reference.fasta
#end if
input1.bam
> '$output1'
    ]]></command>
    <inputs>
        <conditional name="refOrHistory">
            <param name="reference" type="select" label="Will you select a reference genome from your history or use a built-in index?">
                <option value="indexed">Use a built-in index</option>
                <option value="history">Use one from the history</option>
            </param>
            <when value="indexed">
                <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for">
                    <validator type="unspecified_build" />
                    <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." />
                </param>
                <param name="index" type="select" label="Using reference genome">
                <options from_data_table="fasta_indexes">
                    <filter type="data_meta" ref="input1" key="dbkey" column="1" />
                    <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
                </options>
                </param>
            </when>
            <when value="history">
                <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for" />
                <param name="ownFile" argument="-f" type="data" format="fasta" label="Select a reference genome" />
            </when>
        </conditional>
        <param name="lastCol" argument="-s" type="select" label="Whether or not to print the mapping quality as the last column" help="Makes the output easier to parse, but is space inefficient">
            <option value="no">Do not print the mapping quality as the last column</option>
            <option value="yes">Print the mapping quality as the last column</option>
        </param>
        <param name="indels" argument="-i" type="select" label="Whether or not to print only output pileup lines containing indels">
            <option value="no">Print all lines</option>
            <option value="yes">Print only lines containing indels</option>
        </param>
        <param name="mapCap" argument="-M" type="integer" value="60" label="Where to cap mapping quality" />
        <conditional name="c">
            <param name="consensus" argument="-c" type="select" label="Call consensus according to MAQ model?">
                <option selected="true" value="no">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="no" />
            <when value="yes">
                <param name="theta" argument="-T" type="float" value="0.85" label="Theta parameter (error dependency coefficient) in the MAQ consensus calling model" />
                <param name="hapNum" argument="-N" type="integer" value="2" label="Number of haplotypes in the sample" help="Greater than or equal to 2" />
                <param name="fraction" argument="-r" type="float" value="0.001" label="Expected fraction of differences between a pair of haplotypes" />
                <param name="phredProb" argument="-I" type="integer" value="40" label="Phred probability of an indel in sequencing/prep" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output1" format="tabular" label="${tool.name} on ${on_string}: converted pileup" />
    </outputs>
    <tests>
        <test>
            <!--
            Bam to pileup command:
            samtools faidx chr_m.fasta
            samtools pileup -M 60 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out1.pileup
            chr_m.fasta is the prefix of the index
            -->
            <param name="reference" value="history" />
            <param name="input1" value="sam_pileup_in1.bam" ftype="bam" />
            <param name="ownFile" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" />
            <param name="lastCol" value="no" />
            <param name="indels" value="no" />
            <param name="mapCap" value="60" />
            <param name="consensus" value="no" />
            <output name="output1" file="sam_pileup_out1.pileup" />
        </test>
        <!--
        <test>
            Bam to pileup command:
            samtools pileup -M 60 -c -T 0.85 -N 2 -r 0.001 -I 40 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out2.pileup
            chr_m.fasta is the prefix of the index
            <param name="reference" value="indexed" />
            <param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" />
            <param name="index" value="chr_m" />
            <param name="lastCol" value="no" />
            <param name="indels" value="no" />
            <param name="mapCap" value="60" />
            <param name="consensus" value="yes" />
            <param name="theta" value="0.85" />
            <param name="hapNum" value="2" />
            <param name="fraction" value="0.001" />
            <param name="phredProb" value="40" />
            <output name="output1" file="sam_pileup_out2.pileup" />
        </test>
        -->
    </tests>
    <help><![CDATA[
**What it does**

Uses SAMTools_' pileup command to produce a pileup dataset from a provided BAM dataset. It generates two types of pileup datasets depending on the specified options. If *Call consensus according to MAQ model?* option is set to **No**, the tool produces simple pileup. If the option is set to **Yes**, a ten column pileup dataset with consensus is generated. Both types of datasets are briefly summarized below.

.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml

------

**Types of pileup datasets**

The description of pileup format below is largely based on information that can be found on SAMTools Pileup_ documentation page. The 6- and 10-column variants are described below.

.. _Pileup: http://samtools.sourceforge.net/pileup.shtml

**Six column pileup**::

    1    2  3  4        5        6
 ---------------------------------
 chrM  412  A  2       .,       II
 chrM  413  G  4     ..t,     IIIH
 chrM  414  C  4     ...a     III2
 chrM  415  C  4     TTTt     III7

where::

  Column Definition
 ------- ----------------------------
       1 Chromosome
       2 Position (1-based)
       3 Reference base at that position
       4 Coverage (# reads aligning over that position)
       5 Bases within reads where (see Galaxy wiki for more info)
       6 Quality values (phred33 scale, see Galaxy wiki for more)

**Ten column pileup**

The `ten-column` (consensus_) pileup incorporates additional consensus information generated with *-c* option of *samtools pileup* command::


    1    2  3  4   5   6   7   8       9       10
 ------------------------------------------------
 chrM  412  A  A  75   0  25  2       .,       II
 chrM  413  G  G  72   0  25  4     ..t,     IIIH
 chrM  414  C  C  75   0  25  4     ...a     III2
 chrM  415  C  T  75  75  25  4     TTTt     III7

where::

  Column Definition
 ------- --------------------------------------------------------
       1 Chromosome
       2 Position (1-based)
       3 Reference base at that position
       4 Consensus bases
       5 Consensus quality
       6 SNP quality
       7 Maximum mapping quality
       8 Coverage (# reads aligning over that position)
       9 Bases within reads where (see Galaxy wiki for more info)
      10 Quality values (phred33 scale, see Galaxy wiki for more)


.. _consensus: http://samtools.sourceforge.net/cns0.shtml
    ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btp352</citation>
    </citations>
</tool>