comparison readmap.xml @ 8:be0c6b6466cc draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 97b40d7a593cef6c3303f7baba781a84d242e454
author mvdbeek
date Mon, 19 Sep 2016 06:16:21 -0400
parents 68f58363f1c6
children 92898cc3ea19
comparison
equal deleted inserted replaced
7:c9e267cb84c0 8:be0c6b6466cc
1 <tool id="Readmap" name="Generate readmap and histograms from alignment files" version="1.1.5"> 1 <tool id="Readmap" name="Generate readmap and histograms from alignment files" version="1.2.0">
2 <description>from sRbowtie aligment</description> 2 <description>from sRbowtie aligment</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.12.7">bowtie</requirement> 4 <requirement type="package" version="1.0.0">bowtie</requirement>
5 <requirement type="package" version="0.7.7">pysam</requirement> 5 <requirement type="package" version="0.9.0">pysam</requirement>
6 <requirement type="package" version="3.1.2">R</requirement> 6 <requirement type="package" version="1.9.3">numpy</requirement>
7 <requirement type="package" version="2.14">biocbasics</requirement> 7 <requirement type="package" version="1.3.0">r-optparse</requirement>
8 <requirement type="package" version="1.9">numpy</requirement> 8 <requirement type="package" version="0.6_26">r-latticeextra</requirement>
9 </requirements> 9 <requirement type="package" version="2.0.0">r-gridextra</requirement>
10 <command interpreter="python"> 10 </requirements>
11 readmap.py 11 <command><![CDATA[
12 #if $refGenomeSource.genomeSource == "history": 12 python2 $__tool_directory__/readmap.py
13 --reference_fasta ## sys.argv[2] 13 #if $refGenomeSource.genomeSource == "history":
14 $refGenomeSource.ownFile ## index source 14 --reference_fasta
15 #else: 15 $refGenomeSource.ownFile ## index source
16 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1] 16 #else:
17 --reference_bowtie_index 17 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
18 $reference 18 --reference_bowtie_index
19 #end if 19 $reference
20 --rcode 20 #end if
21 $plotCode 21 --output_readmap
22 --output_readmap 22 "$readmap_dataframe"
23 $readmap_dataframe 23 --output_size_distribution
24 --output_size_distribution 24 "$size_distribution_dataframe"
25 $size_distribution_dataframe 25 --minquery $minquery
26 --minquery 26 --maxquery $maxquery
27 $minquery 27 --input
28 --maxquery 28 #for $i in $refGenomeSource.series
29 $maxquery 29 $i.input
30 --input 30 #end for
31 #for $i in $refGenomeSource.series 31 --ext
32 $i.input 32 #for $i in $refGenomeSource.series
33 #end for 33 $i.input.ext
34 --ext 34 #end for
35 #for $i in $refGenomeSource.series 35 --label
36 $i.input.ext 36 #for $i in $refGenomeSource.series
37 #end for 37 "$i.input.name"
38 --label 38 #end for
39 #for $i in $refGenomeSource.series 39 --normalization_factor
40 "$i.input.name" 40 #for $i in $refGenomeSource.series
41 #end for 41 $i.norm
42 --normalization_factor 42 #end for
43 #for $i in $refGenomeSource.series 43 #if $gff:
44 $i.norm 44 --gff
45 #end for 45 $gff
46 #if $gff: 46 #end if
47 --gff 47 ; Rscript $__tool_directory__/plot_size_readmap.r
48 $gff 48 --readmap_tab "$readmap_dataframe"
49 #end if 49 --size_distribution_tab "$size_distribution_dataframe"
50 50 --readmap_pdf "$readmap_PDF"
51 --size_distribution_pdf "$size_PDF"
52 --combi_pdf "$combi_PDF"
53 --title "$title"
54 --xlabel "$xlabel"
55 --ylabel "$ylabel"
56 --yrange "$yrange"
57 --rows_per_page "$rows_per_page"
58 ]]>
51 </command> 59 </command>
52 <inputs> 60 <inputs>
53 <conditional name="refGenomeSource"> 61 <conditional name="refGenomeSource">
54 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> 62 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
55 <option value="indexed">Use a built-in index</option> 63 <option value="indexed">Use a built-in index</option>
56 <option value="history">Use one from the history</option> 64 <option value="history">Use one from the history</option>
57 </param> 65 </param>
58 <when value="indexed"> 66 <when value="indexed">
59 <repeat name="series" title="Add alignment files"> 67 <repeat name="series" title="Add alignment files">
60 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"> 68 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam">
61 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/> 69 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
62 </param> 70 </param>
63 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/> 71 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
64 </repeat> 72 </repeat>
65 </when> 73 </when>
66 <when value="history"> 74 <when value="history">
67 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, that served as the reference index for the alignments" /> 75 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, that served as the reference index for the alignments" />
68 <repeat name="series" title="Add alignment files"> 76 <repeat name="series" title="Add alignment files">
69 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"/> 77 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"/>
70 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/> 78 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
71 </repeat> 79 </repeat>
72 </when> 80 </when>
73 </conditional> 81 </conditional>
74 <param name="gff" type="data" format="gff3" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/> 82 <param name="gff" type="data" format="gff3" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/>
75 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> --> 83 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
76 <param name="minquery" type="integer" size="3" value="18" label="Min size of query small RNAs" help="'18' = 18 nucleotides"/> 84 <param name="minquery" type="integer" size="3" value="18" label="Min size of query small RNAs" help="'18' = 18 nucleotides"/>
77 <param name="maxquery" type="integer" size="3" value="28" label="Max size of query small RNAs" help="'28' = 28 nucleotides"/> 85 <param name="maxquery" type="integer" size="3" value="28" label="Max size of query small RNAs" help="'28' = 28 nucleotides"/>
78 <param name="title" type="text" size="15" value= "Readmaps and size distributions" label="Main Titles"/> 86 <param name="title" type="text" size="15" value= "Readmaps and size distributions" label="Main Titles"/>
79 <param name="xlabel" type="text" size="15" value="Coordinates/read size" label="x axis label"/> 87 <param name="xlabel" type="text" size="15" value="Coordinates/read size" label="x axis label"/>
80 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/> 88 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
81 <param name="yrange" type="integer" size="3" value="0" label="y axis range for readmaps. 0 means auto-scaling."/> 89 <param name="yrange" type="integer" size="3" value="0" label="y axis range for readmaps. 0 means auto-scaling."/>
82 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?"> 90 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
83 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/> 91 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
84 </param> 92 </param>
85 </inputs> 93 </inputs>
86 <configfiles> 94 <outputs>
87 <configfile name="plotCode"> 95 <data format="tabular" name="readmap_dataframe" label="Readmap dataframe"/>
88 ## Setup R error handling to go to stderr 96 <data format="tabular" name="size_distribution_dataframe" label="Size distribution dataframe"/>
89 options( show.error.messages=F, 97 <data format="pdf" name="readmap_PDF" label="Readmaps"/>
90 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 98 <data format="pdf" name="size_PDF" label="Size distribution"/>
91 library(RColorBrewer) 99 <data format="pdf" name="combi_PDF" label="Size distribution and Readmaps"/>
92 library(lattice) 100 </outputs>
93 library(latticeExtra) 101 <help>
94 library(grid)
95 library(gridExtra)
96
97 ## data frames implementation
98
99 rm=read.delim("${readmap_dataframe}", header=T, row.names=NULL)
100 n_samples=length(unique(rm\$sample))
101 genes=unique(levels(rm\$gene))
102 per_gene_readmap=lapply(genes, function(x) subset(rm, gene==x)) ####### ?
103 n_genes=length(per_gene_readmap)
104
105 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL)
106 per_gene_size=lapply(genes, function(x) subset(size, gene==x)) ###### ?
107
108 ## end of data frames implementation
109
110 ## functions
111
112 plot_readmap=function(df, ...) {
113 combineLimits(xyplot(count~coord|factor(sample, levels=unique(sample))+reorder(gene, count, function(x) -sum(abs(x))),
114 data=df,
115 type='h',
116 scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
117 xlab=NULL, main=NULL, ylab=NULL,
118 as.table=T,
119 origin = 0,
120 horizontal=FALSE,
121 group=polarity,
122 col=c("red","blue"),
123 par.strip.text = list(cex=0.7),
124 ...))
125 }
126
127 plot_size_distribution= function(df, ...) {
128 smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
129 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
130 horizontal=FALSE,
131 group=polarity,
132 stack=TRUE,
133 col=c('red', 'blue'),
134 cex=0.75,
135 scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ),
136 prepanel=smR.prepanel,
137 xlab = NULL,
138 ylab = NULL,
139 main = NULL,
140 as.table=TRUE,
141 newpage = T,
142 par.strip.text = list(cex=0.7),
143 ...)
144 combineLimits(bc)
145 }
146
147 ## end of functions
148
149 ## function parameters'
150
151 par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) )
152 par.settings.size=list(layout.heights=list(top.padding=-1, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) )
153 par.settings.combination.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-3), strip.background=list(col=c("lightblue","lightgreen")) )
154 par.settings.combination.size=list(layout.heights=list(top.padding=-2, bottom.padding=-0.5), strip.background=list(col=c("lightblue", "lightgreen")) )
155
156 ## end of function parameters'
157
158 ## GRAPHS
159
160 if (n_genes > 7) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=${rows_per_page}; extrarow=0 } else {
161 rows_per_page= n_genes; page_height_simple = 2.5*n_genes; page_height_combi=page_height_simple*2; extrarow=0 }
162 ## rows_per_page= 8; page_height_simple = 11.69/7*n_genes; page_height_combi=11.69/9*(n_genes*2); extrarow=0 }
163 ## rows_per_page= n_genes; page_height_simple = 11.69/n_genes/4; page_height_combi=11.69/(n_genes*2); extrarow=1 }
164 if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/3} # to test
165
166 pdf(file="${readmap_PDF}", paper="special", height=page_height_simple, width=page_width)
167 for (i in seq(1,n_genes,rows_per_page)) {
168 start=i
169 end=i+rows_per_page-1
170 if (end>n_genes) {end=n_genes}
171 if (${yrange} == 0) { readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap)) } else {
172 readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, ylim=c(-${yrange}, ${yrange}) , par.settings=par.settings.readmap)) }
173 args.list=c(readmap_plot.list, list(nrow=rows_per_page, ncol=1,
174 main=textGrob("Read Maps (nucleotide coordinates)", gp=gpar(cex=1), just="top"),
175 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90)
176 #sub=textGrob("readmap coordinates", gp=gpar(cex=.75), just="bottom")
177 )
178 )
179 do.call(grid.arrange, args.list)
180 }
181 devname=dev.off()
182
183
184 pdf(file="${size_PDF}", paper="special", height=page_height_simple, width=page_width)
185 for (i in seq(1,n_genes,rows_per_page)) {
186 start=i
187 end=i+rows_per_page-1
188 if (end>n_genes) {end=n_genes}
189 plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, par.settings=par.settings.size) )
190 args.list=c(plot.list, list(nrow=rows_per_page, ncol=1,
191 main=textGrob("Size distributions (in nucleotides)", gp=gpar(cex=1), just="top"),
192 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90)
193 #sub="readsize in nucleotides"
194 )
195 )
196 do.call(grid.arrange, args.list)
197 }
198 devname=dev.off()
199
200 pdf(file="${combi_PDF}", paper="special", height=page_height_combi, width=page_width)
201 if (rows_per_page %% 2 != 0) { rows_per_page = rows_per_page + 1}
202 for (i in seq(1,n_genes,rows_per_page/2)) {
203 start=i
204 end=i+rows_per_page/2-1
205 if (end>n_genes) {end=n_genes}
206 if (${yrange} == 0) {readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap)) } else {
207 readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, ylim=c(-${yrange}, ${yrange}), par.settings=par.settings.readmap)) }
208 size_plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, strip=FALSE, par.settings=par.settings.combination.size))
209 plot.list=rbind(readmap_plot.list, size_plot.list )
210 args.list=c(plot.list, list(nrow=rows_per_page + extrarow, ncol=1,
211 main=textGrob("${title}", gp=gpar(cex=1), just="top"),
212 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90),
213 sub=textGrob("${xlabel}", gp=gpar(cex=1), just="bottom")
214 )
215 )
216 do.call(grid.arrange, args.list)
217 }
218 devname=dev.off()
219
220
221 </configfile>
222 </configfiles>
223
224 <outputs>
225 <data format="tabular" name="readmap_dataframe" label="Readmap dataframe"/>
226 <data format="tabular" name="size_distribution_dataframe" label="Size distribution dataframe"/>
227 <data format="pdf" name="readmap_PDF" label="Readmaps"/>
228 <data format="pdf" name="size_PDF" label="Size distribution"/>
229 <data format="pdf" name="combi_PDF" label="Size distribution and Readmaps"/>
230 </outputs>
231 <help>
232 102
233 **What it does** 103 **What it does**
234 104
235 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a "Readmap", 105 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a "Readmap",
236 where by default for each "chromosome" the position of the read is recorded on the x-axis, and the y-axis indicates 106 where by default for each "chromosome" the position of the read is recorded on the x-axis, and the y-axis indicates
237 the number of reads per position. Reads that map in sense are on the top, reads that map antisense are on the bottom. 107 the number of reads per position. Reads that map in sense are on the top, reads that map antisense are on the bottom.
238 108
239 109
240 .. class:: warningmark 110 .. class:: warningmark
241 111
246 '''Example''' 116 '''Example'''
247 117
248 Query sequence:: 118 Query sequence::
249 For a SAM file as the following: 119 For a SAM file as the following:
250 120
251 5 16 2L_79 24393 255 17M * 0 0 CCTTCATCTTTTTTTTT IIIIIIIIIIIIIIIII XA:i:0 MD:Z:17 NM:i:0 121 5 16 2L_79 24393 255 17M * 0 0 CCTTCATCTTTTTTTTT IIIIIIIIIIIIIIIII XA:i:0 MD:Z:17 NM:i:0
252 122
253 11 0 2R_1 12675 255 21M * 0 0 AAAAAAAACGCGTCCTTGTGC IIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:21 NM:i:0 123 11 0 2R_1 12675 255 21M * 0 0 AAAAAAAACGCGTCCTTGTGC IIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:21 NM:i:0
254 124
255 2 16 2L_5 669 255 23M * 0 0 TGTTGCTGCATTTCTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:23 NM:i:0 125 2 16 2L_5 669 255 23M * 0 0 TGTTGCTGCATTTCTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:23 NM:i:0
256 126
257 produce a plot like this: 127 produce a plot like this:
258 128
259 ---- 129 ----
260 130
261 .. image:: static/images/readmap.png 131 .. image:: static/images/readmap.png
262 :height: 800 132 :height: 800
263 :width: 500 133 :width: 500
264 134
265 </help> 135 </help>
266 <tests> 136 <tests>
267 <test> 137 <test>
268 <param name="genomeSource" value="history" /> 138 <param name="genomeSource" value="history" />
269 <param name="ownFile" value ="transposons.fasta" ftype="fasta" /> 139 <param name="ownFile" value ="transposons.fasta" ftype="fasta" />
270 <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/> 140 <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/>
271 <param name="series_0|norm" value="1" /> 141 <param name="series_0|norm" value="1" />
272 <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/> 142 <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/>
273 <param name="series_1|norm" value="1" /> 143 <param name="series_1|norm" value="1" />
274 <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/> 144 <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/>
275 <param name="series_2|norm" value="1" /> 145 <param name="series_2|norm" value="1" />
276 <param name="minquery" value="20" /> 146 <param name="minquery" value="20" />
277 <param name="maxquery" value="30" /> 147 <param name="maxquery" value="30" />
278 <param name="title" value="Readmaps and size distributions" /> 148 <param name="title" value="Readmaps and size distributions" />
279 <param name="xlabel" value="Coordinates/read size" /> 149 <param name="xlabel" value="Coordinates/read size" />
280 <param name="ylabel" value="Number of reads" /> 150 <param name="ylabel" value="Number of reads" />
281 <param name="rows_per_page" value="8" /> 151 <param name="rows_per_page" value="8" />
282 <output name="readmap_dataframe" ftype="tabular" file="Readmap_dataframe.tab" /> 152 <output name="readmap_dataframe" ftype="tabular" file="Readmap_dataframe.tab" />
283 <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" /> 153 <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" />
284 <output name="readmap_PDF" ftype="pdf" file="Readmaps.pdf" /> 154 </test>
285 <output name="size_PDF" ftype="pdf" file="Size_distribution.pdf" /> 155 </tests>
286 <output name="combi_PDF" ftype="pdf" file="Size_distribution_and_Readmaps.pdf" />
287 </test>
288 </tests>
289 </tool> 156 </tool>