view small_rna_clusters.xml @ 1:160e35e432a0 draft default tip

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_clusters commit 51dc6c56c7d95fc229ffee958354211cd454fd36"
author artbio
date Sun, 09 May 2021 17:10:29 +0000
parents 8028521b6e4f
children
line wrap: on
line source

<tool id="small_rna_clusters" name="small_rna_clusters" version="1.3.0">
  <description></description>
  <requirements>
        <requirement type="package" version="0.16.0.1=py38hbdc2ae9_1">pysam</requirement>
        <requirement type="package" version="1.6.6=r40h6115d3f_1">r-optparse</requirement>
        <requirement type="package" version="0.6_29=r40h6115d3f_1">r-latticeextra</requirement>
        <requirement type="package" version="2.3=r40h6115d3f_1003">r-gridextra</requirement>
        <requirement type="package" version="1.4.4=r40h0357c0b_1">r-reshape2</requirement>
        <requirement type="package" version="0.8.0=h984e79f_0">sambamba</requirement>
        <requirement type="package" version="1.10=h2e538c0_3">samtools</requirement>
        <requirement type="package" version="68.1=h58526e2_0">icu</requirement>
  </requirements>
  <stdio>
      <exit_code range="1:" level="fatal" description="Tool exception" />
  </stdio>
  <command detect_errors="exit_code"><![CDATA[
          #import json
          #import os
          #for $file in $inputs
              sambamba view -t \${GALAXY_SLOTS} -F "not unmapped and sequence_length >= ${minsize} and sequence_length <= ${maxsize}" -f bam '$file' -o '$file.element_identifier' &&
              samtools index '$file.element_identifier' &&
          #end for

          python '$__tool_directory__'/small_rna_clusters.py
              --inputs ${ ' '.join(['"%s"' % x.element_identifier for x in $inputs]) }
              #set $labels = list()
              #for $file in $inputs:
                  $labels.append(str($file.element_identifier))
              #end for
              --sample_names ${ ' '.join(['"%s"' % x for x in $labels]) }
              --minsize $minsize
              --maxsize $maxsize
              --outputs '$output_tab'
              --cluster $cluster
              --bed '$output_bed'
              --bed_skipsize $skip_size
              --bed_skipcounts $skip_counts
              --bed_skipdensity $skip_density
              $strandness &&

          Rscript '$__tool_directory__'/small_rna_clusters.r
              --first_dataframe '$output_tab'
              --first_plot_method 'Counts'
              --output_pdf '$output_pdf'
  ]]></command>
 <inputs>
    <param name="inputs" type="data" format="bam" label="Select a alignment files to parse" multiple="true"
           help="maps from these bam inputs will be collected in a single pdf output" />
    <param name="minsize" type="integer" label="Minimal size of reads for inclusion in analysis"
           value="19" help="default value: 19" />
    <param name="maxsize" type="integer" label="Maximal size of reads for inclusion in analysis"
           value="29" help="default value: 29" />
    <param name="first_plot" type="hidden" value="Counts"/>
    <param name="cluster" type="integer" label="Clustering distance in nucleotides" value="1"
           help="Sets the distance (in nt) below which reads are clustered to a single median position" />
    <param name="strandness" argument="--nostrand" type="boolean" truevalue="--nostrand" falsevalue="" checked="false"
           label="Ignore polarity of reads ?" help="Set if you wish to cluster reads regardless of whether they are forward or reverse"/>
    <param name="skip_size" type="integer" label="do not report clusters whose size is less than the specified value" value="1"
           help="Cluster size threshod (in nucleotides) for reporting. Set to 1 (default) reports all clusters, including singlets" />
    <param name="skip_counts" type="integer" label="do not report cluster with a number of reads lower than the specified value" value="1"
           help="Number-of-reads threshod (in nucleotides) for cluster reporting. Set to 1 (default) reports all clusters, irrespective of their counts" />
     <param name="skip_density" type="float" label="do not report cluster with density equal or less than the specified value" value="0"
           help="Density threshod (in reads per nucleotides) for reporting. Set to 0 (default) reports all cluster densities" />
 </inputs>

 <outputs>
    <data format="tabular" name="output_tab" label="Counts Dataframe" />
    <data format="bed" name="output_bed" label="bed file for clusters" />
    <data format="pdf" name="output_pdf" label="small RNA maps" />
</outputs>

    <tests>
        <test> <!-- 0 -->
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam" />
            <param name="cluster" value="500" />
            <param name="skip_size" value="1" />
            <param name="strandness" value="false" />
            <output file="clustering_0.tab" name="output_tab" />
            <output file="clustering_0.pdf" name="output_pdf" />
            <output file="bed_0.bed" name="output_bed" />
        </test>
        <test> <!-- 1 -->
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam" />
            <param name="cluster" value="500" />
            <param name="skip_size" value="1" />
            <param name="strandness" value="true" />
            <output file="clustering_1.tab" name="output_tab" />
            <output file="clustering_1.pdf" name="output_pdf" />
            <output file="bed_1.bed" name="output_bed" />
        </test>
        <test> <!-- 2 -->
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam" />
            <param name="cluster" value="500" />
            <param name="skip_size" value="1000" />
            <param name="strandness" value="false" />
            <output file="clustering_2.tab" name="output_tab" />
            <output file="clustering_2.pdf" name="output_pdf" />
            <output file="bed_2.bed" name="output_bed" />
        </test>
        <test> <!-- 3 -->
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam" />
            <param name="cluster" value="500" />
            <param name="skip_size" value="1000" />
            <param name="skip_counts" value="200" />
            <param name="skip_density" value="0.1" />
            <param name="strandness" value="false" />
            <output file="clustering_3.tab" name="output_tab" />
            <output file="clustering_3.pdf" name="output_pdf" />
            <output file="bed_3.bed" name="output_bed" />
        </test>
        <test> <!-- 4 -->
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam" />
            <param name="cluster" value="2000" />
            <param name="skip_size" value="2000" />
            <param name="skip_counts" value="100" />
            <param name="skip_density" value="0.1" />
            <param name="strandness" value="true" />
            <output file="clustering_4.tab" name="output_tab" />
            <output file="clustering_4.pdf" name="output_pdf" />
            <output file="bed_4.bed" name="output_bed" />
        </test>
    </tests>
<help>
**What it does**

Clusters of read alignments (provided as bam files) are aggregated along regions of
*variable* lengths. The Clustering algorithm works as follows:

A read is clustered with the next read on the genomic reference if the two reads are
separated by *at maximum* the clustering distance (set in nucleotides). If clustered, the
step is repeated with the following read until clustering fails. A new cluster is then
searched.

For clustering procedure, one has the possibility to consider the polarity of reads
(default setting, only forward reads or reverse reads can be clustered, separately), or to
ignore this polarity.

Clusters of reads are plotted as single bars, their coordinates being the medians of
the flanking coordinates of the clusters.

In addition, cluster are reported in a bed file. There, clusters can be filtered out upon
various parameters: cluster size, cluster read number or cluster read density (number of
reads divided by the length of the cluster).

Note that bed filtering options only affect the number of reported line in the bed file.
All clusters are shown in the plot. **i.e. the only parameter that affects the number of
found clusters is the clustering distance.**

**Inputs**

bam alignment files that must be

  - single-read
  - sorted
  - mapped to the same reference

.. class:: warningmark

This tools follows a "map-reduce" procedure: multiple inputs, which can be arranged in a
data collection, are visualised side by side in a single pdf file and are reported in a
single bed file.
 
**Output**

A pdf file generated by the R package lattice, a dataframe used to plot the clusters, and
a bed file that reports significant clusters.
</help>

<citations>
    <citation type="doi">10.1093/bioinformatics/btp352</citation>
     <citation type="bibtex">@Book{,
    title = {Lattice: Multivariate Data Visualization with R},
    author = {Deepayan Sarkar},
    publisher = {Springer},
    address = {New York},
    year = {2008},
    note = {ISBN 978-0-387-75968-5},
    url = {http://lmdvr.r-forge.r-project.org},
  }</citation>
</citations>
</tool>