view RPKM_count.xml @ 50:f242ee103277 draft

planemo upload for repository https://github.com/lparsons/galaxy_tools/tree/master/tools/rseqc commit 91ad241aa3f34b70649d13a5f18611da7577a5ee
author lparsons
date Tue, 03 May 2016 16:36:57 -0400
parents 6b33e31bda10
children
line wrap: on
line source

<tool id="rseqc_RPKM_count" name="RPKM Count" version="2.4galaxy1">
    <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description>

    <macros>
        <import>rseqc_macros.xml</import>
    </macros>

    <requirements>
        <expand macro="requirement_package_numpy" />
        <expand macro="requirement_package_rseqc" />
    </requirements>

    <expand macro="stdio" />

    <version_command><![CDATA[RPKM_count.py --version]]></version_command>

    <command><![CDATA[
        ln -s "${input}" "local_input.bam" &&
        ln -s "${input.metadata.bam_index}" "local_input.bam.bai" &&
        RPKM_count.py -i "local_input.bam" -o output -r $refgene

        #if str($strand_type.strand_specific) == "pair"
            -d
            #if str($strand_type.pair_type) == "sd"
                '1++,1--,2+-,2-+'
            #else
                '1+-,1-+,2++,2--'
            #end if
        #end if

        #if str($strand_type.strand_specific) == "single"
            -d
            #if str($strand_type.single_type) == "s"
                '++,--'
            #else
                '+-,-+'
            #end if
        #end if

        #if $multihits.skipmultihits
            --skip-multi-hits
            --mapq=$multihits.mapq
        #end if

        $onlyexonic
        ]]>
    </command>

    <inputs>
        <param name="input" type="data" label="Input .bam File" format="bam" help="(--input-file)"/>
        <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)"/>
        <conditional name="strand_type">
            <param name="strand_specific" type="select" label="Strand-specific?" value="None">
                <option value="none">None</option>
                <option value="pair">Pair-End RNA-seq</option>
                <option value="single">Single-End RNA-seq</option>
            </param>
            <when value="pair">
                <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd" help="(--strand)">
                    <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option>
                    <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option>
                </param>
            </when>
            <when value="single">
                <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s" help="(--strand)">
                    <option value="s">positive --> positive; negative --> negative</option>
                    <option value="d">positive --> negative; negative --> positive</option>
                </param>
            </when>
            <when value="none"></when>
        </conditional>

        <conditional name="multihits">
            <param name="skipmultihits" type="boolean" label="Skip Multiple Hit Reads/Only Use Uniquely Mapped Reads" value="false" help="(--skip-multi-hits)" />
            <when value="true">
                <param name="mapq" value="30" type="integer" label="Minimum mapping quality for an alignment to be called 'uniquly mapped'" help="(--mapq)" />
            </when>
            <when value="false" />
        </conditional>

        <param name="onlyexonic" type="boolean" value="false" truevalue="--only-exonic" falsevalue="" label="Only use exonic (UTR exons and CDS exons) reads, otherwise use all reads" help="(--only-exonic)"/>
    </inputs>

    <outputs>
        <data format="xls" name="outputxls" from_work_dir="output_read_count.xls"/>
    </outputs>

    <tests>
        <test>
            <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/>
            <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/>
            <output name="outputxls" file="output_read_count.xls"/>
        </test>
    </tests>

    <help><![CDATA[
RPKM_count.py
+++++++++++++

Given a BAM file and reference gene model, this program will calculate the raw count and RPKM
values for transcript at exon, intron and mRNA level. For strand specific RNA-seq data,
program will assign read to its parental gene according to strand rule, if you don't know the
strand rule, run infer_experiment.py. Please note that chromosome ID, genome cooridinates
should be concordant between BAM and BED files.

Inputs
++++++++++++++

Input BAM/SAM file
    Alignment file in BAM/SAM format.

Reference gene model
    Gene model in BED format.

Strand sequencing type (default=none)
    See Infer Experiment tool if uncertain.

Options
++++++++++++++

Skip Multiple Hit Reads
    Use Multiple hit reads or use only uniquely mapped reads.

Only use exonic reads
    Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads.

Sample Output
++++++++++++++

=====   ========   ========   =====================    =====  ===========   =============   =============   ========  =========
chrom   start      end        accession                score  gene strand   tag count (+)   tag count (-)   RPKM (+)  RPKM (-)
=====   ========   ========   =====================    =====  ===========   =============   =============   ========  =========
chr1    29213722   29313959   NM_001166007_intron_1    0      '+'             431             4329            0.086     0.863
chr1    29314417   29319841   NM_001166007_intron_2    0      '+'             31              1               0.114     0.004
chr1    29320054   29323726   NM_001166007_intron_3    0      '+'             32              0               0.174     0.000
chr1    29213602   29213722   NM_001166007_exon_1      0      '+'             164             0               27.321    0.000
chr1    29313959   29314417   NM_001166007_exon_2      0      '+'            1699            4               74.158    0.175
chr1    29319841   29320054   NM_001166007_exon_3      0      '+'             528             1               49.554    0.094
=====   ========   ========   =====================    =====  ===========   =============   =============   ========  =========

-----

About RSeQC
+++++++++++

The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.

The RSeQC package is licensed under the GNU GPL v3 license.

.. image:: http://rseqc.sourceforge.net/_static/logo.png

.. _RSeQC: http://rseqc.sourceforge.net/
]]>
    </help>

    <expand macro="citations" />

</tool>