comparison small_rna_maps.r @ 5:12c14642e6ac draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 24a21619d79d83b38cef7f1a7b858c621e4c8449
author artbio
date Sun, 08 Oct 2017 17:56:13 -0400
parents 507383cce5a8
children a3be3601bcb3
comparison
equal deleted inserted replaced
4:a6b9a081064b 5:12c14642e6ac
1 ## Setup R error handling to go to stderr 1 ## Setup R error handling to go to stderr
2 options( show.error.messages=F, 2 options( show.error.messages=F,
3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
4 warnings() 4 # options(warn = -1)
5 library(RColorBrewer) 5 library(RColorBrewer)
6 library(lattice) 6 library(lattice)
7 library(latticeExtra) 7 library(latticeExtra)
8 library(grid) 8 library(grid)
9 library(gridExtra) 9 library(gridExtra)
10 library(optparse) 10 library(optparse)
11 11
12 option_list <- list( 12 option_list <- list(
13 make_option(c("-f", "--first_dataframe"), type="character", help="path to first dataframe"), 13 make_option(c("-f", "--first_dataframe"), type="character", help="path to first dataframe"),
14 make_option(c("-e", "--extra_dataframe"), type="character", help="path to additional dataframe"), 14 make_option(c("-e", "--extra_dataframe"), type="character", help="path to additional dataframe"),
15 make_option("--first_plot_method", type = "character", help="How additional data should be plotted"),
15 make_option("--extra_plot_method", type = "character", help="How additional data should be plotted"), 16 make_option("--extra_plot_method", type = "character", help="How additional data should be plotted"),
16 make_option("--output_pdf", type = "character", help="path to the pdf file with plots") 17 make_option("--output_pdf", type = "character", help="path to the pdf file with plots")
17 ) 18 )
18 19
19 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) 20 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
20 args = parse_args(parser) 21 args = parse_args(parser)
21 22
22 # data frames implementation 23 # data frames implementation
23 24 ## first table
24 Table = read.delim(args$first_dataframe, header=T, row.names=NULL) 25 Table = read.delim(args$first_dataframe, header=T, row.names=NULL)
25 Table <- within(Table, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1)) 26 if (args$first_plot_method == "Counts" | args$first_plot_method == "Size") {
27 Table <- within(Table, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1))
28 }
26 n_samples=length(unique(Table$Dataset)) 29 n_samples=length(unique(Table$Dataset))
27 genes=unique(levels(Table$Chromosome)) 30 genes=unique(levels(Table$Chromosome))
28 per_gene_readmap=lapply(genes, function(x) subset(Table, Chromosome==x)) 31 per_gene_readmap=lapply(genes, function(x) subset(Table, Chromosome==x))
29 per_gene_limit=lapply(genes, function(x) c(1, unique(subset(Table, Chromosome==x)$Chrom_length)) ) 32 per_gene_limit=lapply(genes, function(x) c(1, unique(subset(Table, Chromosome==x)$Chrom_length)) )
30 n_genes=length(per_gene_readmap) 33 n_genes=length(per_gene_readmap)
31 34 # second table
32 ExtraTable=read.delim(args$extra_dataframe, header=T, row.names=NULL) 35 if (args$extra_plot_method != '') {
33 if (args$extra_plot_method=='Size') { 36 ExtraTable=read.delim(args$extra_dataframe, header=T, row.names=NULL)
34 ExtraTable <- within(ExtraTable, Count[Polarity=="R"] <- (Count[Polarity=="R"]*-1)) 37 if (args$extra_plot_method == "Counts" | args$extra_plot_method=='Size') {
38 ExtraTable <- within(ExtraTable, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1))
39 }
40 per_gene_size=lapply(genes, function(x) subset(ExtraTable, Chromosome==x))
35 } 41 }
36 per_gene_size=lapply(genes, function(x) subset(ExtraTable, Chromosome==x))
37
38 ## end of data frames implementation
39 42
40 ## functions 43 ## functions
41 44
42 first_plot = function(df, ...) { 45 plot_unit = function(df, method=args$first_plot_method, ...) {
43 combineLimits(xyplot(Counts~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)), 46 if (method == 'Counts') {
44 data=df, 47 p = xyplot(Counts~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
45 type='h', 48 data=df,
46 lwd=1.5, 49 type='h',
47 scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)), 50 lwd=1.5,
48 xlab=NULL, main=NULL, ylab=NULL, 51 scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
49 as.table=T, 52 xlab=NULL, main=NULL, ylab=NULL,
50 origin = 0, 53 as.table=T,
51 horizontal=FALSE, 54 origin = 0,
52 group=Polarity, 55 horizontal=FALSE,
53 col=c("red","blue"), 56 group=Polarity,
54 par.strip.text = list(cex=0.7), 57 col=c("red","blue"),
55 ...)) 58 par.strip.text = list(cex=0.7),
59 ...)
60 } else if (method != "Size") {
61 p = xyplot(eval(as.name(method))~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
62 data=df,
63 type='p',
64 pch=19,
65 cex=0.35,
66 scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
67 xlab=NULL, main=NULL, ylab=NULL,
68 as.table=T,
69 origin = 0,
70 horizontal=FALSE,
71 group=Polarity,
72 col=c("red","blue"),
73 par.strip.text = list(cex=0.7),
74 ...)
75 } else {
76 p = barchart(Counts~as.factor(Size)|factor(Dataset, levels=unique(Dataset))+Chromosome, data = df, origin = 0,
77 horizontal=FALSE,
78 group=Polarity,
79 stack=TRUE,
80 col=c('red', 'blue'),
81 scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(rot=0, cex=0.7, axs="i", tck=0.5)),
82 xlab = NULL,
83 ylab = NULL,
84 main = NULL,
85 as.table=TRUE,
86 par.strip.text = list(cex=0.6),
87 ...)
56 } 88 }
57 89 combineLimits(p)
58 90 }
59 second_plot = function(df, ...) { 91
60 #smR.prepanel=function(x,y,...) {; yscale=c(y*0, max(abs(y)));list(ylim=yscale);} 92 plot_single <- function(df, method=args$first_plot_method, rows_per_page=rows_per_page, ...) {
61 sizeplot = xyplot(eval(as.name(args$extra_plot_method))~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)), 93 if (method == 'Counts') {
62 data=df, 94 p = xyplot(Counts~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
63 type='p', 95 data=df,
64 cex=0.35, 96 type='h',
65 pch=19, 97 lwd=1.5,
66 scales= list(relation="free", x=list(rot=0, cex=0, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)), 98 scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
67 xlab=NULL, main=NULL, ylab=NULL, 99 xlab=list(label=bottom_first_method[[args$first_plot_method]], cex=.85),
68 as.table=T, 100 ylab=list(label=legend_first_method[[args$first_plot_method]], cex=.85),
69 origin = 0, 101 main=title_first_method[[args$first_plot_method]],
70 horizontal=FALSE, 102 origin = 0,
71 group=Polarity, 103 group=Polarity,
72 col=c("darkred","darkblue"), 104 col=c("red","blue"),
73 par.strip.text = list(cex=0.7), 105 par.strip.text = list(cex=0.7),
74 ...) 106 as.table=T,
75 combineLimits(sizeplot) 107 ...)
76 } 108 p = update(useOuterStrips(p, strip.left=strip.custom(par.strip.text = list(cex=0.5))), layout=c(n_samples, rows_per_page))
77 109 return(p)
78 second_plot_size = function(df, ...) { 110 } else if (method != "Size") {
79 # smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);} 111 p = xyplot(eval(as.name(method))~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
80 bc= barchart(Count~as.factor(Size)|factor(Dataset, levels=unique(Dataset))+Chromosome, data = df, origin = 0, 112 data=df,
81 horizontal=FALSE, 113 type='p',
82 group=Polarity, 114 pch=19,
83 stack=TRUE, 115 cex=0.35,
84 col=c('red', 'blue'), 116 scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
85 cex=0.75, 117 xlab=list(label=bottom_first_method[[args$first_plot_method]], cex=.85),
86 scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ), 118 ylab=list(label=legend_first_method[[args$first_plot_method]], cex=.85),
87 # prepanel=smR.prepanel, 119 main=title_first_method[[args$first_plot_method]],
88 xlab = NULL, 120 origin = 0,
89 ylab = NULL, 121 group=Polarity,
90 main = NULL, 122 col=c("red","blue"),
91 as.table=TRUE, 123 par.strip.text = list(cex=0.7),
92 newpage = T, 124 as.table=T,
93 par.strip.text = list(cex=0.7), 125 ...)
94 ...) 126 p = update(useOuterStrips(p, strip.left=strip.custom(par.strip.text = list(cex=0.5))), layout=c(n_samples, rows_per_page))
95 combineLimits(bc) 127 return(p)
96 } 128 } else {
97 129 p= barchart(Counts~as.factor(Size)|factor(Dataset, levels=unique(Dataset))+Chromosome, data = df, origin = 0,
98 130 horizontal=FALSE,
99 ## end of functions 131 group=Polarity,
132 stack=TRUE,
133 col=c('red', 'blue'),
134 scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.5, alternating=T), x=list(rot=0, cex=0.6, tck=0.5, alternating=c(3,3))),
135 xlab=list(label=bottom_first_method[[args$first_plot_method]], cex=.85),
136 ylab=list(label=legend_first_method[[args$first_plot_method]], cex=.85),
137 main=title_first_method[[args$first_plot_method]],
138 par.strip.text = list(cex=0.7),
139 nrow = 8,
140 as.table=TRUE,
141
142 ...)
143 p = update(useOuterStrips(p, strip.left=strip.custom(par.strip.text = list(cex=0.5))), layout=c(n_samples, rows_per_page))
144
145 p = combineLimits(p, extend=TRUE)
146 return (p)
147 }
148 }
100 149
101 ## function parameters 150 ## function parameters
102 par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=0), strip.background = list(col=c("lightblue","lightgreen")) ) 151
103 par.settings.size=list(layout.heights=list(top.padding=0, bottom.padding=0)) 152 #par.settings.firstplot = list(layout.heights=list(top.padding=11, bottom.padding = -14))
104 graph_title=list(Coverage="Read Maps and Coverages", Median="Read Maps and Median sizes", Mean="Read Maps and Mean sizes", SizeDistribution="Read Maps and Size Distributions") 153 #par.settings.secondplot=list(layout.heights=list(top.padding=11, bottom.padding = -15), strip.background=list(col=c("lavender","deepskyblue")))
105 graph_legend=list(Coverage="Read counts / Coverage", Median="Read counts / Median size", Mean="Read counts / Mean size", SizeDistribution="Read counts") 154 par.settings.firstplot = list(layout.heights=list(top.padding=-2, bottom.padding=-2))
106 graph_bottom=list(Coverage="Nucleotide coordinates", Median="Nucleotide coordinates", Mean="Nucleotide coordinates", Size="Read sizes / Nucleotide coordinates") 155 par.settings.secondplot=list(layout.heights=list(top.padding=-1, bottom.padding=-1), strip.background=list(col=c("lavender","deepskyblue")))
107 ## end of function parameters' 156 par.settings.single_plot=list(strip.background = list(col = c("lightblue", "lightgreen")))
108 157 title_first_method = list(Counts="Read Counts", Coverage="Coverage depths", Median="Median sizes", Mean="Mean sizes", Size="Size Distributions")
109 ## GRAPHS 158 title_extra_method = list(Counts="Read Counts", Coverage="Coverage depths", Median="Median sizes", Mean="Mean sizes", Size="Size Distributions")
110 159 legend_first_method =list(Counts="Read count", Coverage="Coverage depth", Median="Median size", Mean="Mean size", Size="Read count")
111 if (n_genes > 5) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=6} else { 160 legend_extra_method =list(Counts="Read count", Coverage="Coveragedepth", Median="Median size", Mean="Mean size", Size="Read count")
112 rows_per_page= n_genes; page_height_simple = 2.5*n_genes; page_height_combi=page_height_simple*2 } 161 bottom_first_method =list(Counts="Coordinates (nbre of bases)",Coverage="Coordinates (nbre of bases)", Median="Coordinates (nbre of bases)", Mean="Coordinates (nbre of bases)", Size="Sizes of reads")
113 if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/2} # to test 162 bottom_extra_method =list(Counts="Coordinates (nbre of bases)",Coverage="Coordinates (nbre of bases)", Median="Coordinates (nbre of bases)", Mean="Coordinates (nbre of bases)", Size="Sizes of reads")
114 163
115 164 ## Plotting Functions
116 pdf(file=args$output_pdf, paper="special", height=page_height_simple, width=page_width) 165
117 if (rows_per_page %% 2 != 0) { rows_per_page = rows_per_page + 1} 166 double_plot <- function(...) {
118 for (i in seq(1,n_genes,rows_per_page/2)) { 167 if (n_genes > 5) {page_height=15; rows_per_page=10} else {
119 start=i 168 rows_per_page= 2 * n_genes; page_height=1.5*n_genes}
120 end=i+rows_per_page/2-1 169 if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 7 * n_samples/2}
121 if (end>n_genes) {end=n_genes} 170 pdf(file=args$output_pdf, paper="special", height=page_height, width=page_width)
122 first_plot.list=lapply(per_gene_readmap[start:end], function(x) first_plot(x, strip=FALSE, par.settings=par.settings.readmap)) 171 for (i in seq(1,n_genes,rows_per_page/2)) {
123 if (args$extra_plot_method == "Size") { 172 start=i
124 second_plot.list=lapply(per_gene_size[start:end], function(x) second_plot_size(x, par.settings=par.settings.size)) 173 end=i+rows_per_page/2-1
125 } 174 if (end>n_genes) {end=n_genes}
126 else { 175 first_plot.list = lapply(per_gene_readmap[start:end], function(x) plot_unit(x, strip=FALSE, par.settings=par.settings.firstplot))
127 second_plot.list=lapply(per_gene_size[start:end], function(x) second_plot(x, par.settings=par.settings.size)) 176 second_plot.list = lapply(per_gene_size[start:end], function(x) plot_unit(x, method=args$extra_plot_method, par.settings=par.settings.secondplot))
128 } 177 plot.list=rbind(second_plot.list, first_plot.list)
129 178 args_list=c(plot.list, list( nrow=rows_per_page+1, ncol=1, heights=unit(c(40,30,40,30,40,30,40,30,40,30,10), rep("mm", 11)),
130 179 top=textGrob(paste(title_first_method[[args$first_plot_method]], "and", title_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), vjust=0, just="top"),
131 plot.list=rbind(second_plot.list, first_plot.list) 180 left=textGrob(paste(legend_first_method[[args$first_plot_method]], "/", legend_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), vjust=2, rot=90),
132 args_list=c(plot.list, list(nrow=rows_per_page+1, ncol=1, 181 sub=textGrob(paste(bottom_first_method[[args$first_plot_method]], "/", bottom_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), just="bottom", vjust=2)
133 top=textGrob(graph_title[[args$extra_plot_method]], gp=gpar(cex=1), just="top"),
134 left=textGrob(graph_legend[[args$extra_plot_method]], gp=gpar(cex=1), vjust=1, rot=90),
135 sub=textGrob(graph_bottom[[args$extra_plot_method]], gp=gpar(cex=1), just="bottom")
136 ) 182 )
137 ) 183 )
138 do.call(grid.arrange, args_list) 184 do.call(grid.arrange, args_list)
139 } 185 }
140 devname=dev.off() 186 devname=dev.off()
141 187 }
188
189
190 single_plot <- function(...) {
191 width = 8.2677 * n_samples / 2
192 rows_per_page=8
193 pdf(file=args$output_pdf, paper="special", height=11.69, width=width)
194 for (i in seq(1,n_genes,rows_per_page)) {
195 start=i
196 end=i+rows_per_page-1
197 if (end>n_genes) {end=n_genes}
198 bunch = do.call(rbind, per_gene_readmap[start:end]) # sub dataframe from the list
199 p = plot_single(bunch, method=args$first_plot_method, par.settings=par.settings.single_plot, rows_per_page=rows_per_page)
200 plot(p)
201 }
202 devname=dev.off()
203 }
204
205 # main
206
207 if (args$extra_plot_method != '') { double_plot() }
208 if (args$extra_plot_method == '') {
209 single_plot()
210 }
211
212
213
214
215
216
217
218
219