comparison size_histogram.xml @ 0:ef64759eb181 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
author drosofff
date Wed, 21 Oct 2015 11:38:40 -0400
parents
children 00852209fd9f
comparison
equal deleted inserted replaced
-1:000000000000 0:ef64759eb181
1 <tool id="Size_histogram" name="Generate size histograms from alignment files" version="0.9.7">
2 <description>from sRbowtie aligment</description>
3 <requirements>
4 <requirement type="package" version="0.12.7">bowtie</requirement>
5 <requirement type="package" version="0.7.7">pysam</requirement>
6 <requirement type="package" version="3.1.2">R</requirements>
7 <requirement type="package" version="2.14">biocbasics</requirement>
8 <requirement type="package" version="1.9">numpy</requirement>
9 </requirements>
10 <command interpreter="python">
11 size_histogram.py
12 #if $refGenomeSource.genomeSource == "history":
13 --reference_fasta ## sys.argv[2]
14 $refGenomeSource.ownFile ## index source
15 #else:
16 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
17 --reference_bowtie_index
18 $reference
19 #end if
20 --rcode
21 $plotCode
22 --output_size_distribution
23 $size_distribution_dataframe
24 --minquery
25 $minquery
26 --maxquery
27 $maxquery
28 --input
29 #for $i in $refGenomeSource.series
30 $i.input
31 #end for
32 --ext
33 #for $i in $refGenomeSource.series
34 $i.input.ext
35 #end for
36 --label
37 #for $i in $refGenomeSource.series
38 "$i.input.name"
39 #end for
40 --normalization_factor
41 #for $i in $refGenomeSource.series
42 $i.norm
43 #end for
44 #if $gff:
45 --gff
46 $gff
47 #end if
48 #if $global.value == 'yes':
49 --global_size
50 #end if
51 #if $collapsestrands.value == 'yes':
52 --collapse
53 #end if
54
55 </command>
56 <inputs>
57 <conditional name="refGenomeSource">
58 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
59 <option value="indexed">Use a built-in index</option>
60 <option value="history">Use one from the history</option>
61 </param>
62 <when value="indexed">
63 <repeat name="series" title="Add alignment files">
64 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam">
65 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
66 </param>
67 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
68 </repeat>
69 </when>
70 <when value="history">
71 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
72 <repeat name="series" title="Add alignment files">
73 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"/>
74 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
75 </repeat>
76 </when>
77 </conditional>
78 <param name="gff" type="data" format="gff,gff3" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/>
79 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
80 <param name="global" type="select" label="Generate size distribution for each item, or generate a global alignment">
81 <option value="no">for each item</option>
82 <option value="yes">global</option>
83 </param>
84 <param name="collapsestrands" type="select" label="Whether + and - reads should be collapsed or not">
85 <option value="no">Do not collapse</option>
86 <option value="yes">Collapse + and - reads</option>
87 </param>
88 <param name="minquery" type="integer" size="3" value="18" label="Min size of reads to plot" help="'15' = 15 nucleotides"/>
89 <param name="maxquery" type="integer" size="3" value="28" label="Max size of reads to plot" help="'30' = 30 nucleotides"/>
90 <param name="title" type="text" size="15" value="Size distribution" label="Main Titles"/>
91 <param name="xlabel" type="text" size="15" value="Size in nucleotides" label="x axis label"/>
92 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
93 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
94 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
95 </param>
96 </inputs>
97 <configfiles>
98 <configfile name="plotCode">
99 ## Setup R error handling to go to stderr
100 options( show.error.messages=F,
101 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
102 library(RColorBrewer)
103 library(lattice)
104 library(latticeExtra)
105 library(grid)
106 library(gridExtra)
107
108 ##cheetahtemplate data frame implementation
109 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL)
110 n_samples = length(unique (size\$sample))
111 n_genes = length (unique (levels(size\$gene)))
112
113 par.settings.size=list(layout.heights=list(top.padding=1, bottom.padding=1),
114 strip.background = list(col = c("lightblue", "lightgreen"))
115 )
116
117 smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);} # use if one want y axis in the middle of the plot
118
119 plot_size_distribution= function(df, ...) {
120 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
121 horizontal=FALSE,
122 group=polarity,
123 stack=TRUE,
124 col=c('red', 'blue'),
125 cex=0.75,
126 scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.5, alternating=T), x=list(cex=.6 ) ),
127 xlab = "readsize in nucleotides",
128 ylab = "${ylabel}",
129 main="${title}" ,
130 par.strip.text = list(cex=0.75),
131 as.table=TRUE,
132 newpage = T,
133 ...)
134
135 combineLimits(update(useOuterStrips(bc,
136 strip.left = strip.custom(par.strip.text = list(cex=0.5))
137 ),
138 layout=c(n_samples,${rows_per_page})),
139 margin.x=F, margin.y=1)
140 }
141
142 # per_gene_size=lapply(genes, function(x) subset(size, gene==x)) # no object in this script
143
144 global = "no"
145 #if $global.value == 'yes':
146 global = "yes"
147 #end if
148
149 if (global=="no") {
150
151 options(warn=-1)
152 pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677*n_samples/4)
153 plot_size_distribution(size, par.settings=par.settings.size) # removed , prepanel=smR.prepanel
154
155 } else {
156
157 pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677)
158 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample)), data = size, origin = 0,
159 horizontal=FALSE,
160 group=polarity,
161 stack=TRUE,
162 col=c('red', 'blue'),
163 # par.settings=list(fontsize = list(text=8, points=8)),
164 scales=list(y=list(tick.number=4, rot=90, relation="same"), cex=1),
165 xlab = "readsize in nucleotides",
166 ylab = "${ylabel}",
167 main="${title}" , as.table=TRUE, newpage = T,
168 aspect=0.5,
169 strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue")
170 )
171 bc
172 }
173 devname=dev.off()
174
175 </configfile>
176 </configfiles>
177
178 <outputs>
179 <data format="tabular" name="size_distribution_dataframe" label="Size_distribution_dataframe.tab"/>
180 <data format="pdf" name="size_PDF" label="Size_distribution.pdf"/>
181 </outputs>
182 <help>
183
184 **What it does**
185
186 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a histogram of read sizes,
187 where by default for each "chromosome" a histogram of read sizes is drawn.
188 Reads that map in sense are on the top (red), reads that map antisense are on the bottom (blue).
189
190
191 .. class:: warningmark
192
193 '''TIP''' The input data can be produced using the sRbowtie tool.
194
195 ----
196
197 '''Example'''
198
199 Query sequence::
200 For a SAM file as the following:
201
202 5 16 2L_79 24393 255 17M * 0 0 CCTTCATCTTTTTTTTT IIIIIIIIIIIIIIIII XA:i:0 MD:Z:17 NM:i:0
203
204 11 0 2R_1 12675 255 21M * 0 0 AAAAAAAACGCGTCCTTGTGC IIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:21 NM:i:0
205
206 2 16 2L_5 669 255 23M * 0 0 TGTTGCTGCATTTCTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:23 NM:i:0
207
208 produce a plot like this:
209
210 ----
211
212 .. image:: static/images/size_histogram.png
213 :height: 800
214 :width: 500
215
216 </help>
217 <tests>
218 <test>
219 <param name="genomeSource" value="history" />
220 <param name="ownFile" value="transposons.fasta" ftype="fasta" />
221 <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/>
222 <param name="series_0|norm" value="1" />
223 <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/>
224 <param name="series_1|norm" value="1" />
225 <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/>
226 <param name="series_2|norm" value="1" />
227 <param name="global" value="no" />
228 <param name="collapsestrands" value="no" />
229 <param name="minquery" value="18"/>
230 <param name="maxquery" value="30"/>
231 <param name="title" value="Size distribution"/>
232 <param name="xlabel" value="Size in nucleotides"/>
233 <param name="ylabel" value="Number of reads"/>
234 <param name="rows_per_page" value="10"/>
235 <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" />
236 <output name="size_PDF" ftype="pdf" file="Size_distribution.pdf" />
237 </test>
238 </tests>
239 </tool>
240