view signature.xml @ 10:68ee7c84d498 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 4e84386b619a7941f23d175d7fc86aba7990ac36"
author artbio
date Tue, 07 Jan 2020 06:59:56 -0500
parents 59ee49bfb7bb
children 8d3ca9652a5b
line wrap: on
line source

<tool id="signature" name="Small RNA Signatures" version="3.2.1">
    <description />
    <requirements>
        <requirement type="package" version="1.11.2=py27_0">numpy</requirement>
        <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>
        <requirement type="package" version="1.6.1=r351h6115d3f_0">r-optparse</requirement>
        <requirement type="package" version="0.6_28=r351h6115d3f_0">r-latticeextra</requirement>
        <requirement type="package" version="2.3=r351h6115d3f_0">r-gridextra</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" level="fatal" description="Tool exception" />
    </stdio>
      <command detect_errors="exit_code"><![CDATA[
        samtools index '$input' &&
        python '$__tool_directory__'/signature.py
           --input '$input'
           --minquery '$minquery'
           --maxquery '$maxquery'
           --mintarget '$mintarget'
           --maxtarget '$maxtarget'
           --minscope '$minscope'
           --maxscope '$maxscope'
           --output_h '$h_dataframe'
           --output_z '$z_dataframe' &&
        Rscript '$__tool_directory__'/signature.r
           --h_dataframe '$h_dataframe'
           --z_dataframe '$z_dataframe'
           --plot_method '$plot_method'
           --pdf '$pdf'
           --title "Overlap Signatures of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs"
    ]]></command>
    <inputs>
        <param format="bam" label="Compute signature from this bowtie standard output" name="input" type="data" />
        <param help="'23' = 23 nucleotides" label="Min size of query small RNAs" name="minquery" size="3" type="integer" value="23" />
        <param help="'29' = 29 nucleotides" label="Max size of query small RNAs" name="maxquery" size="3" type="integer" value="29" />
        <param help="'23' = 23 nucleotides" label="Min size of target small RNAs" name="mintarget" size="3" type="integer" value="23" />
        <param help="'29' = 29 nucleotides" label="Max size of target small RNAs" name="maxtarget" size="3" type="integer" value="29" />
        <param help="'1' = 1 nucleotide overlap" label="Minimal relative overlap analyzed" name="minscope" size="3" type="integer" value="1" />
        <param help="'1' = 1 nucleotide overlap" label="Maximal relative overlap analyzed" name="maxscope" size="3" type="integer" value="26" />
        <param help="Signature can be computed globally or by item present in the alignment file" label="Graph type" name="plot_method" type="select">
            <option selected="True" value="global">Global</option>
            <option value="lattice">Lattice</option>
        </param>
    </inputs>
    <outputs>
        <data format="tabular" label="z-signature data frame" name="z_dataframe">
            <actions>
                <action name="column_names" type="metadata" default="Chromosome,Overlap,Number_of_pairs,z-score" />
            </actions>
        </data>
        <data format="tabular" label="h-signature data frame" name="h_dataframe">
            <actions>
                <action name="column_names" type="metadata" default="Chromosome,Overlap,overlap_probability,z-score" />
            </actions>
        </data>
        <data format="pdf" label="Overlap probabilities" name="pdf" />
    </outputs>
    <tests>
        <test>
            <param ftype="bam" name="input" value="sr_bowtie.bam" />
            <param name="minquery" value="23" />
            <param name="maxquery" value="29" />
            <param name="mintarget" value="23" />
            <param name="maxtarget" value="29" />
            <param name="minscope" value="5" />
            <param name="maxscope" value="15" />
            <param name="plot_method" value="global" />
            <output file="h.tab" ftype="tabular" name="h_dataframe" />
            <output file="z.tab" ftype="tabular" name="z_dataframe" />
            <output file="global.pdf" ftype="pdf" name="pdf" />
        </test>
        <test>
            <param ftype="bam" name="input" value="sr_bowtie.bam" />
            <param name="minquery" value="23" />
            <param name="maxquery" value="29" />
            <param name="mintarget" value="23" />
            <param name="maxtarget" value="29" />
            <param name="minscope" value="5" />
            <param name="maxscope" value="15" />
            <param name="plot_method" value="lattice" />
            <output file="h.tab" ftype="tabular" name="h_dataframe" />
            <output file="z.tab" ftype="tabular" name="z_dataframe" />
            <output file="lattice.pdf" ftype="pdf" name="pdf" />
        </test>
    </tests>
    <help>

**What it does**

Compute small RNA (piRNA, siRNA, ...) signatures.

This tool computes (i) the number of pairs **aligned** reads by overlap classes (in nt) and associated z-scores,
and (ii) the ping-pong signal (`Brennecke et al, 2009`_) and associated z-scores.

**Note** that the number of pairs of aligned reads is disctint from the number of pairs of reads
when these reads can be aligned at multiple positions in the genome. The two values are equal only
when the analysis is restricted to uniquely mapping reads.

Options set the min and max size of both the query small rna class and the target small rna class, 
the range of overlaps (in nt) over which to compute the signatures, and whether the signatures should be reported at 
genome-wide level or by item (chromosomes, genes, etc.). For details on computational algorithmes 
for piRNA and siRNA signatures, see `Antoniewski (2014)`_.

.. _Brennecke et al, 2009: http://dx.doi.org/10.1126/science.1165171
.. _Antoniewski (2014): https://link.springer.com/protocol/10.1007%2F978-1-4939-0931-5_12

**Input**

A **sorted** BAM alignment file.

**Outputs**

**Global**: The number of pairs found, the ping-pong signal and the associated z-scores 
are computed at genome-wide level and returned in a pdf file.

**Lattice**: The number of pairs found, the ping-pong signals and the associated z-scores 
are computed for each items described in the BAM alignment input and returned in a pdf file as a lattice graph.


        </help>
    <citations>
            <citation type="doi">10.1007/978-1-4939-0931-5_12</citation>
    </citations>
</tool>