diff small_rna_maps.r @ 5:12c14642e6ac draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 24a21619d79d83b38cef7f1a7b858c621e4c8449
author artbio
date Sun, 08 Oct 2017 17:56:13 -0400
parents 507383cce5a8
children a3be3601bcb3
line wrap: on
line diff
--- a/small_rna_maps.r	Fri Aug 25 12:22:03 2017 -0400
+++ b/small_rna_maps.r	Sun Oct 08 17:56:13 2017 -0400
@@ -1,7 +1,7 @@
 ## Setup R error handling to go to stderr
 options( show.error.messages=F,
        error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
-warnings()
+# options(warn = -1)
 library(RColorBrewer)
 library(lattice)
 library(latticeExtra)
@@ -12,6 +12,7 @@
 option_list <- list(
     make_option(c("-f", "--first_dataframe"), type="character", help="path to first dataframe"),
     make_option(c("-e", "--extra_dataframe"), type="character", help="path to additional dataframe"),
+    make_option("--first_plot_method", type = "character", help="How additional data should be plotted"),
     make_option("--extra_plot_method", type = "character", help="How additional data should be plotted"),
     make_option("--output_pdf", type = "character", help="path to the pdf file with plots")
     )
@@ -20,122 +21,199 @@
 args = parse_args(parser)
  
 # data frames implementation
-
+## first table
 Table = read.delim(args$first_dataframe, header=T, row.names=NULL)
-Table <- within(Table, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1))
+if (args$first_plot_method == "Counts" | args$first_plot_method == "Size") {
+    Table <- within(Table, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1))
+}
 n_samples=length(unique(Table$Dataset))
 genes=unique(levels(Table$Chromosome))
 per_gene_readmap=lapply(genes, function(x) subset(Table, Chromosome==x))
 per_gene_limit=lapply(genes, function(x) c(1, unique(subset(Table, Chromosome==x)$Chrom_length)) )
 n_genes=length(per_gene_readmap)
-
-ExtraTable=read.delim(args$extra_dataframe, header=T, row.names=NULL)
-if (args$extra_plot_method=='Size') {
-    ExtraTable <- within(ExtraTable, Count[Polarity=="R"] <- (Count[Polarity=="R"]*-1))
+# second table
+if (args$extra_plot_method != '') {
+    ExtraTable=read.delim(args$extra_dataframe, header=T, row.names=NULL)
+    if (args$extra_plot_method == "Counts" | args$extra_plot_method=='Size') {
+        ExtraTable <- within(ExtraTable, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1))
+        }
+    per_gene_size=lapply(genes, function(x) subset(ExtraTable, Chromosome==x))
     }
-per_gene_size=lapply(genes, function(x) subset(ExtraTable, Chromosome==x))
-    
-## end of data frames implementation
 
 ## functions
 
-first_plot = function(df, ...) {
-    combineLimits(xyplot(Counts~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
-    data=df,
-    type='h',
-    lwd=1.5,
-    scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
-    xlab=NULL, main=NULL, ylab=NULL,
-    as.table=T,
-    origin = 0,
-    horizontal=FALSE,
-    group=Polarity,
-    col=c("red","blue"),
-    par.strip.text = list(cex=0.7),
-    ...))
+plot_unit = function(df, method=args$first_plot_method, ...) {
+    if (method == 'Counts') {
+        p = xyplot(Counts~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
+        data=df,
+        type='h',
+        lwd=1.5,
+        scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
+        xlab=NULL, main=NULL, ylab=NULL,
+        as.table=T,
+        origin = 0,
+        horizontal=FALSE,
+        group=Polarity,
+        col=c("red","blue"),
+        par.strip.text = list(cex=0.7),
+        ...)
+    } else if (method != "Size") {
+        p = xyplot(eval(as.name(method))~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
+        data=df,
+        type='p',
+        pch=19,
+        cex=0.35,
+        scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
+        xlab=NULL, main=NULL, ylab=NULL,
+        as.table=T,
+        origin = 0,
+        horizontal=FALSE,
+        group=Polarity,
+        col=c("red","blue"),
+        par.strip.text = list(cex=0.7),
+        ...)
+    } else {
+        p = barchart(Counts~as.factor(Size)|factor(Dataset, levels=unique(Dataset))+Chromosome, data = df, origin = 0,
+                     horizontal=FALSE,
+                     group=Polarity,
+                     stack=TRUE,
+                     col=c('red', 'blue'),
+                     scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(rot=0, cex=0.7, axs="i", tck=0.5)),
+        xlab = NULL,
+        ylab = NULL,
+        main = NULL,
+        as.table=TRUE,
+        par.strip.text = list(cex=0.6),
+        ...)
     }
+    combineLimits(p)
+}
+
+plot_single <- function(df, method=args$first_plot_method, rows_per_page=rows_per_page, ...) {
+    if (method == 'Counts') {
+        p = xyplot(Counts~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
+                   data=df,
+                   type='h',
+                   lwd=1.5,
+                   scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
+                   xlab=list(label=bottom_first_method[[args$first_plot_method]], cex=.85),
+                   ylab=list(label=legend_first_method[[args$first_plot_method]], cex=.85),
+                   main=title_first_method[[args$first_plot_method]],
+                   origin = 0,
+                   group=Polarity,
+                   col=c("red","blue"),
+                   par.strip.text = list(cex=0.7),
+                   as.table=T,
+                   ...)
+        p = update(useOuterStrips(p, strip.left=strip.custom(par.strip.text = list(cex=0.5))), layout=c(n_samples, rows_per_page))
+        return(p)
+    } else if (method != "Size") {
+        p = xyplot(eval(as.name(method))~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
+                   data=df,
+                   type='p',
+                   pch=19,
+                   cex=0.35,
+                   scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
+                   xlab=list(label=bottom_first_method[[args$first_plot_method]], cex=.85),
+                   ylab=list(label=legend_first_method[[args$first_plot_method]], cex=.85),
+                   main=title_first_method[[args$first_plot_method]],
+                   origin = 0,
+                   group=Polarity,
+                   col=c("red","blue"),
+                   par.strip.text = list(cex=0.7),
+                   as.table=T,
+                   ...)
+        p = update(useOuterStrips(p, strip.left=strip.custom(par.strip.text = list(cex=0.5))), layout=c(n_samples, rows_per_page))
+        return(p)
+    } else {
+        p= barchart(Counts~as.factor(Size)|factor(Dataset, levels=unique(Dataset))+Chromosome, data = df, origin = 0,
+                    horizontal=FALSE,
+                    group=Polarity,
+                    stack=TRUE,
+                    col=c('red', 'blue'),
+                    scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.5, alternating=T), x=list(rot=0, cex=0.6, tck=0.5, alternating=c(3,3))),
+                    xlab=list(label=bottom_first_method[[args$first_plot_method]], cex=.85),
+                    ylab=list(label=legend_first_method[[args$first_plot_method]], cex=.85),
+                    main=title_first_method[[args$first_plot_method]],
+                    par.strip.text = list(cex=0.7),
+                    nrow = 8,
+                    as.table=TRUE,
+                    
+                    ...)
+          p = update(useOuterStrips(p, strip.left=strip.custom(par.strip.text = list(cex=0.5))), layout=c(n_samples, rows_per_page))
+          
+          p = combineLimits(p, extend=TRUE)
+          return (p)
+        }
+}
+
+## function parameters
+
+#par.settings.firstplot = list(layout.heights=list(top.padding=11, bottom.padding = -14))
+#par.settings.secondplot=list(layout.heights=list(top.padding=11, bottom.padding = -15), strip.background=list(col=c("lavender","deepskyblue")))
+par.settings.firstplot = list(layout.heights=list(top.padding=-2, bottom.padding=-2))
+par.settings.secondplot=list(layout.heights=list(top.padding=-1, bottom.padding=-1), strip.background=list(col=c("lavender","deepskyblue")))
+par.settings.single_plot=list(strip.background = list(col = c("lightblue", "lightgreen")))
+title_first_method = list(Counts="Read Counts", Coverage="Coverage depths", Median="Median sizes", Mean="Mean sizes", Size="Size Distributions")
+title_extra_method = list(Counts="Read Counts", Coverage="Coverage depths", Median="Median sizes", Mean="Mean sizes", Size="Size Distributions")
+legend_first_method =list(Counts="Read count", Coverage="Coverage depth", Median="Median size", Mean="Mean size", Size="Read count")
+legend_extra_method =list(Counts="Read count", Coverage="Coveragedepth", Median="Median size", Mean="Mean size", Size="Read count")
+bottom_first_method =list(Counts="Coordinates (nbre of bases)",Coverage="Coordinates (nbre of bases)", Median="Coordinates (nbre of bases)", Mean="Coordinates (nbre of bases)", Size="Sizes of reads")
+bottom_extra_method =list(Counts="Coordinates (nbre of bases)",Coverage="Coordinates (nbre of bases)", Median="Coordinates (nbre of bases)", Mean="Coordinates (nbre of bases)", Size="Sizes of reads")
+
+## Plotting Functions
+
+double_plot <- function(...) {
+    if (n_genes > 5) {page_height=15; rows_per_page=10} else {
+                     rows_per_page= 2 * n_genes; page_height=1.5*n_genes}
+    if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 7 * n_samples/2}
+    pdf(file=args$output_pdf, paper="special", height=page_height, width=page_width)
+    for (i in seq(1,n_genes,rows_per_page/2)) {
+        start=i
+        end=i+rows_per_page/2-1
+        if (end>n_genes) {end=n_genes}
+        first_plot.list = lapply(per_gene_readmap[start:end], function(x) plot_unit(x, strip=FALSE, par.settings=par.settings.firstplot))
+        second_plot.list = lapply(per_gene_size[start:end], function(x) plot_unit(x, method=args$extra_plot_method, par.settings=par.settings.secondplot))
+        plot.list=rbind(second_plot.list, first_plot.list)
+        args_list=c(plot.list, list( nrow=rows_per_page+1, ncol=1,  heights=unit(c(40,30,40,30,40,30,40,30,40,30,10), rep("mm", 11)),
+                                    top=textGrob(paste(title_first_method[[args$first_plot_method]], "and", title_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), vjust=0, just="top"),
+                                    left=textGrob(paste(legend_first_method[[args$first_plot_method]], "/", legend_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), vjust=2, rot=90),
+                                    sub=textGrob(paste(bottom_first_method[[args$first_plot_method]], "/", bottom_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), just="bottom", vjust=2)
+                                    )
+                   )
+        do.call(grid.arrange, args_list)
+        }
+    devname=dev.off()
+}
 
 
-second_plot = function(df, ...) {
-    #smR.prepanel=function(x,y,...) {; yscale=c(y*0, max(abs(y)));list(ylim=yscale);}
-    sizeplot = xyplot(eval(as.name(args$extra_plot_method))~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
-    data=df,
-    type='p',
-    cex=0.35,
-    pch=19,
-    scales= list(relation="free", x=list(rot=0, cex=0, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
-    xlab=NULL, main=NULL, ylab=NULL,
-    as.table=T,
-    origin = 0,
-    horizontal=FALSE,
-    group=Polarity,
-    col=c("darkred","darkblue"),
-    par.strip.text = list(cex=0.7),
-    ...)
-    combineLimits(sizeplot)
-    }
+single_plot <- function(...) {
+    width = 8.2677 * n_samples / 2
+    rows_per_page=8
+    pdf(file=args$output_pdf, paper="special", height=11.69, width=width)
+    for (i in seq(1,n_genes,rows_per_page)) {
+        start=i
+        end=i+rows_per_page-1
+        if (end>n_genes) {end=n_genes}
+        bunch = do.call(rbind, per_gene_readmap[start:end]) # sub dataframe from the list
+        p = plot_single(bunch, method=args$first_plot_method, par.settings=par.settings.single_plot, rows_per_page=rows_per_page)
+        plot(p)
+        }
+    devname=dev.off()
+}
 
-second_plot_size = function(df, ...) {
-#  smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
-  bc= barchart(Count~as.factor(Size)|factor(Dataset, levels=unique(Dataset))+Chromosome, data = df, origin = 0,
-    horizontal=FALSE,
-group=Polarity,
-stack=TRUE,
-    col=c('red', 'blue'),
-    cex=0.75,
-    scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ),
-#    prepanel=smR.prepanel,
-    xlab = NULL,
-    ylab = NULL,
-    main = NULL,
-    as.table=TRUE,
-    newpage = T,
-    par.strip.text = list(cex=0.7),
-    ...)
-  combineLimits(bc)
-  }
+# main
+
+if (args$extra_plot_method != '') { double_plot() }
+if (args$extra_plot_method == '') {
+    single_plot()
+}
 
 
-## end of functions
 
-## function parameters
-par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=0), strip.background = list(col=c("lightblue","lightgreen")) )
-par.settings.size=list(layout.heights=list(top.padding=0, bottom.padding=0))
-graph_title=list(Coverage="Read Maps and Coverages", Median="Read Maps and Median sizes", Mean="Read Maps and Mean sizes", SizeDistribution="Read Maps and Size Distributions")
-graph_legend=list(Coverage="Read counts / Coverage", Median="Read counts / Median size", Mean="Read counts / Mean size", SizeDistribution="Read counts")
-graph_bottom=list(Coverage="Nucleotide coordinates", Median="Nucleotide coordinates", Mean="Nucleotide coordinates", Size="Read sizes / Nucleotide coordinates")
-## end of function parameters'
 
-## GRAPHS
-
-if (n_genes > 5) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=6} else {
-                 rows_per_page= n_genes; page_height_simple = 2.5*n_genes; page_height_combi=page_height_simple*2 }
-if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/2} # to test
 
 
-pdf(file=args$output_pdf, paper="special", height=page_height_simple, width=page_width)
-if (rows_per_page %% 2 != 0) { rows_per_page = rows_per_page + 1}
-for (i in seq(1,n_genes,rows_per_page/2)) {
-    start=i
-    end=i+rows_per_page/2-1
-    if (end>n_genes) {end=n_genes}
-    first_plot.list=lapply(per_gene_readmap[start:end], function(x) first_plot(x, strip=FALSE, par.settings=par.settings.readmap))
-    if (args$extra_plot_method == "Size") {
-        second_plot.list=lapply(per_gene_size[start:end], function(x) second_plot_size(x, par.settings=par.settings.size))
-        }
-    else {
-        second_plot.list=lapply(per_gene_size[start:end], function(x) second_plot(x, par.settings=par.settings.size))
-        }
-    
-        
-    plot.list=rbind(second_plot.list, first_plot.list)
-    args_list=c(plot.list, list(nrow=rows_per_page+1, ncol=1,
-                                    top=textGrob(graph_title[[args$extra_plot_method]], gp=gpar(cex=1), just="top"),
-                                    left=textGrob(graph_legend[[args$extra_plot_method]], gp=gpar(cex=1), vjust=1, rot=90),
-                                    sub=textGrob(graph_bottom[[args$extra_plot_method]], gp=gpar(cex=1), just="bottom")
-                                    )
-           )
-do.call(grid.arrange, args_list)
-}
-devname=dev.off()
+
 
+