Mercurial > repos > iuc > scater_plot_tsne
comparison scater-plot-dist-scatter.R @ 0:a30f4bfe8f01 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 61f3899168453092fd25691cf31871a3a350fd3b"
| author | iuc |
|---|---|
| date | Tue, 03 Sep 2019 14:30:21 -0400 |
| parents | |
| children | 2b09ca1c5e41 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a30f4bfe8f01 |
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 | |
| 3 # Plot the distribution of read counts and feature counts, side by side, then a scatter plot of read counts vs feature counts below | |
| 4 | |
| 5 # Load optparse we need to check inputs | |
| 6 | |
| 7 library(optparse) | |
| 8 library(workflowscriptscommon) | |
| 9 library(LoomExperiment) | |
| 10 library(scater) | |
| 11 library(ggpubr) | |
| 12 library(scales) | |
| 13 | |
| 14 # parse options | |
| 15 | |
| 16 option_list = list( | |
| 17 make_option( | |
| 18 c("-i", "--input-loom"), | |
| 19 action = "store", | |
| 20 default = NA, | |
| 21 type = 'character', | |
| 22 help = "A SingleCellExperiment object file in Loom format." | |
| 23 ), | |
| 24 make_option( | |
| 25 c("-o", "--output-plot-file"), | |
| 26 action = "store", | |
| 27 default = NA, | |
| 28 type = 'character', | |
| 29 help = "Path of the PDF output file to save plot to." | |
| 30 ), | |
| 31 make_option( | |
| 32 c("-l", "--log-scale"), | |
| 33 action="store_true", | |
| 34 default=FALSE, | |
| 35 type = 'logical', | |
| 36 help = "Plot on log scale (recommended for large datasets)." | |
| 37 ) | |
| 38 ) | |
| 39 | |
| 40 opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_plot_file', 'log_scale')) | |
| 41 | |
| 42 # Check parameter values | |
| 43 | |
| 44 if ( ! file.exists(opt$input_loom)){ | |
| 45 stop((paste('File', opt$input_loom, 'does not exist'))) | |
| 46 } | |
| 47 | |
| 48 # Input from Loom format | |
| 49 | |
| 50 scle <- import(opt$input_loom, format='loom', type='SingleCellLoomExperiment') | |
| 51 | |
| 52 #do the scatter plot of reads vs genes | |
| 53 total_counts <- scle$total_counts | |
| 54 total_features <- scle$total_features_by_counts | |
| 55 count_feats <- cbind(total_counts, total_features) | |
| 56 cf_dm <- as.data.frame(count_feats) | |
| 57 | |
| 58 # Calculate binwidths for reads and features plots. Use 20 bins | |
| 59 read_bins <- max(total_counts / 1e6) / 20 | |
| 60 feat_bins <- max(total_features) / 20 | |
| 61 | |
| 62 # Make the plots | |
| 63 plot <- ggplot(cf_dm, aes(x=total_counts / 1e6, y=total_features)) + geom_point(shape=1) + geom_smooth() + xlab("Read count (millions)") + | |
| 64 ylab("Feature count") + ggtitle("Scatterplot of reads vs features") | |
| 65 plot1 <- qplot(total_counts / 1e6, geom="histogram", binwidth = read_bins, ylab="Number of cells", xlab = "Read counts (millions)", fill=I("darkseagreen3")) + ggtitle("Read counts per cell") | |
| 66 plot2 <- qplot(total_features, geom="histogram", binwidth = feat_bins, ylab="Number of cells", xlab = "Feature counts", fill=I("darkseagreen3")) + ggtitle("Feature counts per cell") | |
| 67 plot3 <- plotColData(scle, y="pct_counts_MT", x="total_features_by_counts") + ggtitle("% MT genes") + geom_point(shape=1) + theme(text = element_text(size=15)) + theme(plot.title = element_text(size=15)) | |
| 68 | |
| 69 if (! opt$log_scale){ | |
| 70 final_plot <- ggarrange(plot1, plot2, plot, plot3, ncol=2, nrow=2) | |
| 71 ggsave(opt$output_plot_file, final_plot, device="pdf") | |
| 72 } else { | |
| 73 plot_log_both <- plot + scale_x_continuous(trans = 'log10') + scale_y_continuous(trans = 'log10') | |
| 74 plot1_log <- plot1 + scale_y_continuous(trans = 'log10') | |
| 75 plot2_log <- plot2 + scale_y_continuous(trans = 'log10') | |
| 76 plot3_log <- plot3 + scale_y_log10(labels=number) | |
| 77 final_plot_log <- ggarrange(plot1_log, plot2_log, plot_log_both, plot3_log, ncol=2, nrow=2) | |
| 78 ggsave(opt$output_plot_file, final_plot_log, device="pdf") | |
| 79 } |
