view small_rna_maps.xml @ 16:600e2498bd21 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
author artbio
date Tue, 13 Nov 2018 17:03:46 -0500
parents 82fedc576024
children b28dcd4051e8
line wrap: on
line source

<tool id="small_rna_maps" name="small_rna_maps" version="2.7.0">
  <description></description>
  <requirements>
        <requirement type="package" version="1.11.2=py27_0">numpy</requirement>
        <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>
        <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>
        <requirement type="package" version="0.6_28=r3.3.2_0">r-latticeextra</requirement>
        <requirement type="package" version="2.2.1=r3.3.2_0">r-gridextra</requirement>
        <requirement type="package" version="1.4.2=r3.3.2_0">r-reshape2</requirement>
        <requirement type="package" version="0.6.6">sambamba</requirement>
  </requirements>
  <stdio>
      <exit_code range="1:" level="fatal" description="Tool exception" />
  </stdio>
  <command detect_errors="exit_code"><![CDATA[
          #for $file in $series
              sambamba view -t \${GALAXY_SLOTS} -F "not unmapped and sequence_length >= ${minsize} and sequence_length <= ${maxsize}" -f bam '$file.inputs' >'$file.inputs.name' &&
              samtools index '$file.inputs.name' &&
          #end for
          python '$__tool_directory__'/small_rna_maps.py
              --inputs
              #for $file in $series
                  '$file.inputs.name'
              #end for
                  --sample_names
              #for $sample in $series
                  '$sample.inputs.name'
              #end for
              --minsize $minsize
              --maxsize $maxsize
              #if str($plots_options.plots_options_selector ) == "two_plot":
                  --plot_methods '${plots_options.first_plot}' '${plots_options.extra_plot}'
                  --outputs '$output_tab' '$extra_output_tab' &&
              #elif str($plots_options.plots_options_selector ) == "global":
                  --plot_methods 'Size'
                  --outputs '$output_tab' &&
              #elif str($plots_options.plots_options_selector ) == "cluster":
                  --plot_methods 'Counts'
                  --outputs '$output_tab'
                  --cluster ${plots_options.cluster} &&
              #else:
                  --plot_methods '${plots_options.first_plot}'
                  --outputs '$output_tab' &&
              #end if

          Rscript '$__tool_directory__'/small_rna_maps.r
              --first_dataframe '$output_tab'
              --extra_dataframe '$extra_output_tab'
              --normalization
              #set $norm = ""
              #for $file in $series
                     #set $norm += str($file.normalization)+' '
              #end for
              #set $norm = $norm[:-1]
              '$norm'
              #if $ylimits_cond.ylimits == "no":
                  --ymin '' --ymax ''
              #else:
                  --ymin '${ylimits_cond.ymin}' --ymax '${ylimits_cond.ymax}'
              #end if
              #if str($plots_options.plots_options_selector ) == "two_plot":
                  --first_plot_method '${plots_options.first_plot}'
                  --extra_plot_method '${plots_options.extra_plot}'
              #elif str($plots_options.plots_options_selector ) == "global":
                  --first_plot_method 'Size'
                  --extra_plot_method ''
                  --global '${plots_options.mergestrands}'
              #else:
                  --first_plot_method '${plots_options.first_plot}'
                  --extra_plot_method ''
              #end if
              --output_pdf '$output_pdf'
  ]]></command>
 <inputs>
    <repeat name="series" title="Input" min="1">
        <param name="inputs" type="data" format="bam" label="Select a single alignment file to parse" multiple="false"
               help="If 'multiple datasets' method is used, the same normalization factor will be applied to each dataset. Please refer to the help section for more information." />
        <param name="normalization" type="float" label="Enter a size/normalization factor."
               help="Enter only one value e.g. '0.75' or '1.23'" value="1.0"/>
    </repeat>
    <param name="minsize" type="integer" label="Minimal size of reads for inclusion in analysis"
           value="0" help="default value: 0" />
    <param name="maxsize" type="integer" label="Maximal size of reads for inclusion in analysis"
           value="10000" help="default value: 10000" />
    <conditional name="plots_options">
        <param name="plots_options_selector" type="select" display="radio" label="Plot Options">
            <option value="one_plot">Just one plot per chromosome</option>
            <option value="two_plot" selected="True">Two plots per chromosome</option>
            <option value="global">Global read size distributions of aligned reads</option>
            <option value="cluster">Map read clusters</option>
        </param>
        <when value="two_plot">
            <param name="first_plot" type="select" display="radio" label="Select the type of the top plot">
                <option value="Counts">Counts</option>
                <option value="Coverage">Coverage</option>
                <option value="Mean">Mean Sizes</option>
                <option value="Median">Median Sizes</option>
                <option value="Size">Size Distributions</option>
            </param>
            <param name="extra_plot" type="select" display="radio" label="Select the type of the bottom plot">
                <option value="Counts">Counts</option>
                <option value="Coverage">Coverage</option>
                <option value="Mean">Mean Sizes</option>
                <option value="Median">Median Sizes</option>
                <option value="Size">Size Distributions</option>
            </param>
        </when>
        <when value="one_plot">
            <param name="first_plot" type="select" display="radio" label="select the type of plot">
                <option value="Counts">Counts</option>
                <option value="Coverage">Coverage</option>
                <option value="Mean">Mean Sizes</option>
                <option value="Median">Median Sizes</option>
                <option value="Size">Size Distributions</option>
            </param>
        </when>
        <when value="global">
            <param name="first_plot" type="hidden" value="Size"/>
            <param name="mergestrands" type="select" display="radio" label="Whether forward and reverse aligned reads should be merged or not in the histogram">
                <option value="nomerge">Do not merge</option>
                <option value="merge">Merge forward and reverse reads</option>
            </param>
        </when>
        <when value="cluster">
            <param name="first_plot" type="hidden" value="Counts"/>
            <param name="cluster" type="integer" label="Aggregation distance in nucleotides" value="1"
                   help="Sets the distance (in nt) below which reads are clustered to a single median position" />
        </when>
    </conditional>
    <conditional name="ylimits_cond">
        <param name="ylimits" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Do you wish to set an y axis limit to the plots?"
               help="This limit won't be applied to size distribution plots"/>
        <when value="yes">
            <param name="ymin" type="float" label="Enter minimum value" value="0.0" help="e.g. '-5.0'"/>
            <param name="ymax" type="float" label="Enter maximum value" value="0.0" help="e.g. '5.0'"/>
        </when>
        <when value="no">
        </when>
    </conditional>
 </inputs>

 <outputs>
    <data format="tabular" name="output_tab" label="$plots_options.first_plot dataframe" />
    <data format="tabular" name="extra_output_tab" label="$plots_options.extra_plot dataframe">
        <filter>plots_options['plots_options_selector'] == 'two_plot'</filter>
    </data>
    <data format="pdf" name="output_pdf" label="small RNA maps" />
</outputs>

    <tests>
        <test> <!-- 1 -->
            <repeat name="series">
                <param name="inputs" value="input1.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <repeat name="series">
                <param name="inputs" value="input_new2.bam" ftype="bam" />
                <param name="normalization" value="2.0" />
            </repeat>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="first_plot" value="Counts" />
            <output file="input1_input2new_norm_1_2_counts.tab" name="output_tab" />
            <output file="input1_input2new_norm_1_2_single_plot_counts.pdf" name="output_pdf" />
        </test>
        <test> <!-- 2 -->
            <repeat name="series">
                <param name="inputs" value="input1.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="ylimits" value="yes" />
            <param name="ymin" value="-5" />
            <param name="ymax" value="5" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="first_plot" value="Counts" />
            <output file="input1_counts_yminneg5_5.tab" name="output_tab" />
            <output file="input1_yminneg5_5_single_plot_counts.pdf" name="output_pdf" />
        </test>
        <test> <!-- 3 -->
            <repeat name="series">
                <param name="inputs" value="input1.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="plots_options_selector" value="cluster" />
            <param name="first_plot" value="Counts" />
            <param name="cluster" value="5" />
            <output file="clustering.tab" name="output_tab" />
            <output file="clustering.pdf" name="output_pdf" />
        </test>
        <test> <!-- 4 -->
            <repeat name="series">
                <param name="inputs" value="input1.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <param name="minsize" value="20" />
            <param name="maxsize" value="30" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="first_plot" value="Size" />
            <output file="input1_min20_max30_size.tab" name="output_tab" />
            <output file="input1_min20_max30_single_plot_size.pdf" name="output_pdf" />
        </test>
        <test> <!-- 5 -->
            <repeat name="series">
                <param name="inputs" value="input1.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="first_plot" value="Mean" />
            <output file="input1_mean.tab" name="output_tab" />
            <output file="input1__single_plot_mean.pdf" name="output_pdf" />
        </test>
        <test> <!-- 6 -->
            <repeat name="series">
                <param name="inputs" value="input1.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="first_plot" value="Median" />
            <output file="input1_median.tab" name="output_tab" />
            <output file="input1_single_plot_median.pdf" name="output_pdf" />
        </test>
        <test> <!-- 7 -->
            <repeat name="series">
                <param name="inputs" value="input1.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <repeat name="series">
                <param name="inputs" value="input2.bam" ftype="bam" />
                <param name="normalization" value="2.0" />
            </repeat>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="first_plot" value="Counts" />
            <output file="input1_input2_norm_1_2_counts.tab" name="output_tab" />
            <output file="input1_input2_norm_1_2_single_plot_counts.pdf" name="output_pdf" />
        </test>
        <test> <!-- 8 -->
            <repeat name="series">
                <param name="inputs" value="input1.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <repeat name="series">
                <param name="inputs" value="input2.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="ylimits" value="yes" />
            <param name="ymin" value="-5" />
            <param name="ymax" value="5" />
            <param name="plots_options_selector" value="two_plot" />
            <param name="first_plot" value="Counts" />
            <param name="extra_plot" value="Size" />
            <output file="input1_input2_counts.tab" name="output_tab" />
            <output file="input1_input2_size.tab" name="extra_output_tab" />
            <output file="input1_input2_double_plot_counts_size_ylimneg5_5.pdf" name="output_pdf" />
        </test>
        <test> <!-- 9 -->
            <repeat name="series">
                <param name="inputs" value="input_single_chr.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <repeat name="series">
                <param name="inputs" value="input_single_chr.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <repeat name="series">
                <param name="inputs" value="input_single_chr.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <repeat name="series">
                <param name="inputs" value="input_single_chr.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <repeat name="series">
                <param name="inputs" value="input_single_chr.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <repeat name="series">
                <param name="inputs" value="input_single_chr.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="first_plot" value="Coverage" />
            <output file="input_single_chr_x_6_single_plot_coverage.tab" name="output_tab" />
            <output file="input_single_chr_x_6_single_plot_coverage.pdf" name="output_pdf" />
        </test>
        <test> <!-- 10 -->
            <repeat name="series">
                <param name="inputs" value="input1.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <repeat name="series">
                <param name="inputs" value="input2.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="plots_options_selector" value="global" />
            <param name="mergestrands" value="nomerge" />
            <param name="first_plot" value="Size" />
            <output file="size.tab" name="output_tab" />
            <output file="global_nomerge.pdf" name="output_pdf" />
        </test>
        <test> <!-- 11 -->
            <repeat name="series">
                <param name="inputs" value="input1.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <repeat name="series">
                <param name="inputs" value="input2.bam" ftype="bam" />
                <param name="normalization" value="1.0" />
            </repeat>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="plots_options_selector" value="global" />
            <param name="mergestrands" value="merge" />
            <param name="first_plot" value="Size" />
            <output file="size.tab" name="output_tab" />
            <output file="global_merge.pdf" name="output_pdf" />
        </test>
    </tests>
<help>

**What it does**

Plots mapping statistics of an alignment along the reference chromosomes :

 - counts
 - mean sizes
 - median sizes
 - coverage depth
 - size distribution

Read counts, mean sizes and median sizes are computed by counting the number of 5' end of reads
in each position of a chromosome reference.
Coverage depths are computed from the input bam alignment files using the python pysam module.

The metrics mentioned above can be plotted either separately:

.. image:: one_plot.png

Or in all possible pairwise combinations:

.. image:: two_plot.png

For comparison purposes, values from bam alignment files can be normalized by a size factor
before plotting.

**Inputs**

bam alignment files that must be

  - single-read
  - sorted
  - mapped to the same reference

To plot 2 alignment files in the same PDF output the 'single dataset' method should be used.

.. class:: warningmark

If the 'multiple dataset' method is used the normalization factor will be applied to every file selected in the input list.
Additionally each file in the selected list will be plotted in a separate PDF file.

**Output**

A pdf file generated by the R package lattice and one or two dataframes used to plot the data.

</help>

<citations>
    <citation type="doi">10.1093/bioinformatics/btp352</citation>
     <citation type="bibtex">@Book{,
    title = {Lattice: Multivariate Data Visualization with R},
    author = {Deepayan Sarkar},
    publisher = {Springer},
    address = {New York},
    year = {2008},
    note = {ISBN 978-0-387-75968-5},
    url = {http://lmdvr.r-forge.r-project.org},
  }</citation>
</citations>
</tool>