comparison small_rna_clusters.xml @ 0:8028521b6e4f draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_clusters commit f38805cf151cbda1cf7de0a92cdfeb5978f26547"
author artbio
date Mon, 07 Oct 2019 12:51:25 -0400
parents
children 160e35e432a0
comparison
equal deleted inserted replaced
-1:000000000000 0:8028521b6e4f
1 <tool id="small_rna_clusters" name="small_rna_clusters" version="1.0.0">
2 <description></description>
3 <requirements>
4 <requirement type="package" version="0.15.3=py27hda2845c_1">pysam</requirement>
5 <requirement type="package" version="1.6.4=r36h6115d3f_0">r-optparse</requirement>
6 <requirement type="package" version="0.6_28=r36h6115d3f_1002">r-latticeextra</requirement>
7 <requirement type="package" version="2.3=r36h6115d3f_1002">r-gridextra</requirement>
8 <requirement type="package" version="1.4.3=r36h29659fb_0">r-reshape2</requirement>
9 <requirement type="package" version="0.6.6">sambamba</requirement>
10 <requirement type="package" version="1.9=h10a08f8_12">samtools</requirement>
11 <requirement type="package" version="64.2=he1b5a44_1">icu</requirement>
12 </requirements>
13 <stdio>
14 <exit_code range="1:" level="fatal" description="Tool exception" />
15 </stdio>
16 <command detect_errors="exit_code"><![CDATA[
17 #import json
18 #import os
19 #for $file in $inputs
20 sambamba view -t \${GALAXY_SLOTS} -F "not unmapped and sequence_length >= ${minsize} and sequence_length <= ${maxsize}" -f bam '$file' -o '$file.element_identifier' &&
21 samtools index '$file.element_identifier' &&
22 #end for
23
24 python '$__tool_directory__'/small_rna_clusters.py
25 --inputs ${ ' '.join(['"%s"' % x.element_identifier for x in $inputs]) }
26 #set $labels = list()
27 #for $file in $inputs:
28 $labels.append(str($file.element_identifier))
29 #end for
30 --sample_names ${ ' '.join(['"%s"' % x for x in $labels]) }
31 --minsize $minsize
32 --maxsize $maxsize
33 --outputs '$output_tab'
34 --cluster $cluster
35 --bed '$output_bed'
36 --bed_skipsize $skip_size
37 --bed_skipcounts $skip_counts
38 --bed_skipdensity $skip_density
39 $strandness &&
40
41 Rscript '$__tool_directory__'/small_rna_clusters.r
42 --first_dataframe '$output_tab'
43 --first_plot_method 'Counts'
44 --output_pdf '$output_pdf'
45 ]]></command>
46 <inputs>
47 <param name="inputs" type="data" format="bam" label="Select a alignment files to parse" multiple="true"
48 help="maps from these bam inputs will be collected in a single pdf output" />
49 <param name="minsize" type="integer" label="Minimal size of reads for inclusion in analysis"
50 value="19" help="default value: 19" />
51 <param name="maxsize" type="integer" label="Maximal size of reads for inclusion in analysis"
52 value="29" help="default value: 29" />
53 <param name="first_plot" type="hidden" value="Counts"/>
54 <param name="cluster" type="integer" label="Clustering distance in nucleotides" value="1"
55 help="Sets the distance (in nt) below which reads are clustered to a single median position" />
56 <param name="strandness" argument="--nostrand" type="boolean" truevalue="--nostrand" falsevalue="" checked="false"
57 label="Ignore polarity of reads ?" help="Set if you wish to cluster reads regardless of whether they are forward or reverse"/>
58 <param name="skip_size" type="integer" label="do not report clusters whose size is less than the specified value" value="1"
59 help="Cluster size threshod (in nucleotides) for reporting. Set to 1 (default) reports all clusters, including singlets" />
60 <param name="skip_counts" type="integer" label="do not report cluster with a number of reads lower than the specified value" value="1"
61 help="Number-of-reads threshod (in nucleotides) for cluster reporting. Set to 1 (default) reports all clusters, irrespective of their counts" />
62 <param name="skip_density" type="float" label="do not report cluster with density equal or less than the specified value" value="0"
63 help="Density threshod (in reads per nucleotides) for reporting. Set to 0 (default) reports all cluster densities" />
64 </inputs>
65
66 <outputs>
67 <data format="tabular" name="output_tab" label="Counts Dataframe" />
68 <data format="bed" name="output_bed" label="bed file for clusters" />
69 <data format="pdf" name="output_pdf" label="small RNA maps" />
70 </outputs>
71
72 <tests>
73 <test> <!-- 0 -->
74 <param name="inputs" value="input1.bam,input2.bam" ftype="bam" />
75 <param name="cluster" value="500" />
76 <param name="skip_size" value="1" />
77 <param name="strandness" value="false" />
78 <output file="clustering_0.tab" name="output_tab" />
79 <output file="clustering_0.pdf" name="output_pdf" />
80 <output file="bed_0.bed" name="output_bed" />
81 </test>
82 <test> <!-- 1 -->
83 <param name="inputs" value="input1.bam,input2.bam" ftype="bam" />
84 <param name="cluster" value="500" />
85 <param name="skip_size" value="1" />
86 <param name="strandness" value="true" />
87 <output file="clustering_1.tab" name="output_tab" />
88 <output file="clustering_1.pdf" name="output_pdf" />
89 <output file="bed_1.bed" name="output_bed" />
90 </test>
91 <test> <!-- 2 -->
92 <param name="inputs" value="input1.bam,input2.bam" ftype="bam" />
93 <param name="cluster" value="500" />
94 <param name="skip_size" value="1000" />
95 <param name="strandness" value="false" />
96 <output file="clustering_2.tab" name="output_tab" />
97 <output file="clustering_2.pdf" name="output_pdf" />
98 <output file="bed_2.bed" name="output_bed" />
99 </test>
100 <test> <!-- 3 -->
101 <param name="inputs" value="input1.bam,input2.bam" ftype="bam" />
102 <param name="cluster" value="500" />
103 <param name="skip_size" value="1000" />
104 <param name="skip_counts" value="200" />
105 <param name="skip_density" value="0.1" />
106 <param name="strandness" value="false" />
107 <output file="clustering_3.tab" name="output_tab" />
108 <output file="clustering_3.pdf" name="output_pdf" />
109 <output file="bed_3.bed" name="output_bed" />
110 </test>
111 <test> <!-- 4 -->
112 <param name="inputs" value="input1.bam,input2.bam" ftype="bam" />
113 <param name="cluster" value="2000" />
114 <param name="skip_size" value="2000" />
115 <param name="skip_counts" value="100" />
116 <param name="skip_density" value="0.1" />
117 <param name="strandness" value="true" />
118 <output file="clustering_4.tab" name="output_tab" />
119 <output file="clustering_4.pdf" name="output_pdf" />
120 <output file="bed_4.bed" name="output_bed" />
121 </test>
122 </tests>
123 <help>
124 **What it does**
125
126 Clusters of read alignments (provided as bam files) are aggregated along regions of
127 *variable* lengths. The Clustering algorithm works as follows:
128
129 A read is clustered with the next read on the genomic reference if the two reads are
130 separated by *at maximum* the clustering distance (set in nucleotides). If clustered, the
131 step is repeated with the following read until clustering fails. A new cluster is then
132 searched.
133
134 For clustering procedure, one has the possibility to consider the polarity of reads
135 (default setting, only forward reads or reverse reads can be clustered, separately), or to
136 ignore this polarity.
137
138 Clusters of reads are plotted as single bars, their coordinates being the medians of
139 the flanking coordinates of the clusters.
140
141 In addition, cluster are reported in a bed file. There, clusters can be filtered out upon
142 various parameters: cluster size, cluster read number or cluster read density (number of
143 reads divided by the length of the cluster).
144
145 Note that bed filtering options only affect the number of reported line in the bed file.
146 All clusters are shown in the plot. **i.e. the only parameter that affects the number of
147 found clusters is the clustering distance.**
148
149 **Inputs**
150
151 bam alignment files that must be
152
153 - single-read
154 - sorted
155 - mapped to the same reference
156
157 .. class:: warningmark
158
159 This tools follows a "map-reduce" procedure: multiple inputs, which can be arranged in a
160 data collection, are visualised side by side in a single pdf file and are reported in a
161 single bed file.
162
163 **Output**
164
165 A pdf file generated by the R package lattice, a dataframe used to plot the clusters, and
166 a bed file that reports significant clusters.
167 </help>
168
169 <citations>
170 <citation type="doi">10.1093/bioinformatics/btp352</citation>
171 <citation type="bibtex">@Book{,
172 title = {Lattice: Multivariate Data Visualization with R},
173 author = {Deepayan Sarkar},
174 publisher = {Springer},
175 address = {New York},
176 year = {2008},
177 note = {ISBN 978-0-387-75968-5},
178 url = {http://lmdvr.r-forge.r-project.org},
179 }</citation>
180 </citations>
181 </tool>