Previous changeset 0:e6ca62ac65c6 (2019-07-18) Next changeset 2:7a365ec81b52 (2021-09-09) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 61f3899168453092fd25691cf31871a3a350fd3b" |
modified:
README.md macros.xml scater-filter.xml scater-manual-filter.R scater-plot-dist-scatter.R |
added:
environment.yml scater-plot-tsne.R test-data/scater_reads_genes_dist_log.pdf test-data/scater_tsne_plot.pdf |
b |
diff -r e6ca62ac65c6 -r b7ea9f09c02f README.md --- a/README.md Thu Jul 18 11:13:41 2019 -0400 +++ b/README.md Tue Sep 03 14:27:39 2019 -0400 |
[ |
@@ -20,6 +20,13 @@ ## Command-line usage +The scripts require the installation of scater and few other R/BioConductor packages. An easy way to install them is to create a [conda](https://conda.io/) environment using the `environment.yml` file distributed together with these wrappers: + +``` +conda env create -f environment.yml +conda activate scater +``` + For help with any of the following scripts, run: `<script-name> --help` |
b |
diff -r e6ca62ac65c6 -r b7ea9f09c02f environment.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/environment.yml Tue Sep 03 14:27:39 2019 -0400 |
b |
@@ -0,0 +1,14 @@ +name: scater +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconductor-loomexperiment=1.2.0 + - bioconductor-scater=1.12.2 + - r-ggpubr=0.2.2 + - r-mvoutlier=2.0.9 + - r-optparse=1.6.2 + - r-rtsne=0.15 + - r-scales=1.0.0 + - r-workflowscriptscommon=0.0.4 |
b |
diff -r e6ca62ac65c6 -r b7ea9f09c02f macros.xml --- a/macros.xml Thu Jul 18 11:13:41 2019 -0400 +++ b/macros.xml Tue Sep 03 14:27:39 2019 -0400 |
b |
@@ -1,11 +1,11 @@ <macros> - <token name="@TOOL_VERSION@">1.10.1</token> + <token name="@TOOL_VERSION@">1.12.2</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">bioconductor-scater</requirement> <requirement type="package" version="1.6.2">r-optparse</requirement> <requirement type="package" version="0.0.4">r-workflowscriptscommon</requirement> - <requirement type="package" version="1.0.4">bioconductor-loomexperiment</requirement> + <requirement type="package" version="1.2.0">bioconductor-loomexperiment</requirement> <yield /> </requirements> </xml> |
b |
diff -r e6ca62ac65c6 -r b7ea9f09c02f scater-filter.xml --- a/scater-filter.xml Thu Jul 18 11:13:41 2019 -0400 +++ b/scater-filter.xml Tue Sep 03 14:27:39 2019 -0400 |
b |
@@ -16,6 +16,9 @@ #if str($filter_type.library_size): --library-size ${filter_type.library_size} #end if + #if str($filter_type.expressed_genes): + --expressed-genes ${filter_type.expressed_genes} + #end if #if str($filter_type.percent_counts_MT): --percent-counts-MT ${filter_type.percent_counts_MT} #end if @@ -35,6 +38,7 @@ <when value="manual"> <param name="detection_limit" argument="--detection-limit" type="float" optional="true" label="Number of reads mapped to a gene for it to be counted as expressed" help="Raising this number will raise the stringency and may lower the number of expressed genes" /> <param name="library_size" argument="--library-size" type="integer" optional="true" label="Minimum library size (mapped reads) to filter cells on" help="Raising this number will raise the stringency and may lower the number of included cells" /> + <param name="expressed_genes" argument="--expressed-genes" type="integer" optional="true" label="Minimum number of expressed genes to filter cells on" /> <param name="percent_counts_MT" argument="--percent-counts-MT" type="float" optional="true" label="Maximum % of mitochondrial genes expressed per cell" help="Cells that exceed this value will be filtered out" /> </when> <when value="pca" /> @@ -49,6 +53,7 @@ <param name="filter_type_selector" value="manual" /> <param name="detection_limit" value="4" /> <param name="library_size" value="100000" /> + <param name="expressed_genes" value="400" /> <param name="percent_counts_MT" value="33.0" /> <output name="output_loom" file="scater_manual_filtered.loom" compare="sim_size" /> </test> |
b |
diff -r e6ca62ac65c6 -r b7ea9f09c02f scater-manual-filter.R --- a/scater-manual-filter.R Thu Jul 18 11:13:41 2019 -0400 +++ b/scater-manual-filter.R Tue Sep 03 14:27:39 2019 -0400 |
[ |
@@ -31,6 +31,13 @@ help = "Minimum library size (mapped reads) to filter cells on" ), make_option( + c("-e", "--expressed-genes"), + action = "store", + default = 0, + type = 'numeric', + help = "Minimum number of expressed genes to filter cells on" + ), + make_option( c("-m", "--percent-counts-MT"), action = "store", default = 100, @@ -71,6 +78,14 @@ print(paste("After filtering out low library counts: ", ncol(scle), "cells and", nrow(scle), "features.")) + +# Filter low expressed genes +to_keep <- scle$total_features_by_counts > opt$expressed_genes +scle <- scle[, to_keep] + +print(paste("After filtering out low expressed: ", ncol(scle), "cells and", nrow(scle), "features.")) + + # Filter out high MT counts to_keep <- scle$pct_counts_MT < opt$percent_counts_MT scle <- scle[, to_keep] |
b |
diff -r e6ca62ac65c6 -r b7ea9f09c02f scater-plot-dist-scatter.R --- a/scater-plot-dist-scatter.R Thu Jul 18 11:13:41 2019 -0400 +++ b/scater-plot-dist-scatter.R Tue Sep 03 14:27:39 2019 -0400 |
b |
@@ -9,6 +9,7 @@ library(LoomExperiment) library(scater) library(ggpubr) +library(scales) # parse options @@ -26,10 +27,17 @@ default = NA, type = 'character', help = "Path of the PDF output file to save plot to." + ), + make_option( + c("-l", "--log-scale"), + action="store_true", + default=FALSE, + type = 'logical', + help = "Plot on log scale (recommended for large datasets)." ) ) -opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_plot_file')) +opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_plot_file', 'log_scale')) # Check parameter values @@ -51,12 +59,21 @@ read_bins <- max(total_counts / 1e6) / 20 feat_bins <- max(total_features) / 20 -#make the plots +# Make the plots plot <- ggplot(cf_dm, aes(x=total_counts / 1e6, y=total_features)) + geom_point(shape=1) + geom_smooth() + xlab("Read count (millions)") + ylab("Feature count") + ggtitle("Scatterplot of reads vs features") plot1 <- qplot(total_counts / 1e6, geom="histogram", binwidth = read_bins, ylab="Number of cells", xlab = "Read counts (millions)", fill=I("darkseagreen3")) + ggtitle("Read counts per cell") plot2 <- qplot(total_features, geom="histogram", binwidth = feat_bins, ylab="Number of cells", xlab = "Feature counts", fill=I("darkseagreen3")) + ggtitle("Feature counts per cell") plot3 <- plotColData(scle, y="pct_counts_MT", x="total_features_by_counts") + ggtitle("% MT genes") + geom_point(shape=1) + theme(text = element_text(size=15)) + theme(plot.title = element_text(size=15)) -final_plot <- ggarrange(plot1, plot2, plot, plot3, ncol=2, nrow=2) -ggsave(opt$output_plot_file, final_plot, device="pdf") +if (! opt$log_scale){ + final_plot <- ggarrange(plot1, plot2, plot, plot3, ncol=2, nrow=2) + ggsave(opt$output_plot_file, final_plot, device="pdf") +} else { + plot_log_both <- plot + scale_x_continuous(trans = 'log10') + scale_y_continuous(trans = 'log10') + plot1_log <- plot1 + scale_y_continuous(trans = 'log10') + plot2_log <- plot2 + scale_y_continuous(trans = 'log10') + plot3_log <- plot3 + scale_y_log10(labels=number) + final_plot_log <- ggarrange(plot1_log, plot2_log, plot_log_both, plot3_log, ncol=2, nrow=2) + ggsave(opt$output_plot_file, final_plot_log, device="pdf") +} |
b |
diff -r e6ca62ac65c6 -r b7ea9f09c02f scater-plot-tsne.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scater-plot-tsne.R Tue Sep 03 14:27:39 2019 -0400 |
b |
@@ -0,0 +1,69 @@ +#!/usr/bin/env Rscript + +# Creates a t-SNE plot of a normalised SingleCellExperiment object. + +# Load optparse we need to check inputs + +library(optparse) +library(workflowscriptscommon) +library(LoomExperiment) +library(scater) +library(Rtsne) + +# parse options + +option_list = list( + make_option( + c("-i", "--input-loom"), + action = "store", + default = NA, + type = 'character', + help = "A SingleCellExperiment object file in Loom format." + ), + make_option( + c("-c", "--colour-by"), + action = "store", + default = NULL, + type = 'character', + help = "Feature (from annotation file) to colour t-SNE plot points by. The values represented in this options should be categorical" + ), + make_option( + c("-s", "--size-by"), + action = "store", + default = NULL, + type = 'character', + help = "Feature (from annotation file) to size t-SNE plot points by. The values represented in this options should be numerical and not categorical" + ), + make_option( + c("-p", "--shape-by"), + action = "store", + default = NULL, + type = 'character', + help = "Feature (from annotation file) to shape t-SNE plot points by. The values represented in this options should be categorical" + ), + make_option( + c("-o", "--output-plot-file"), + action = "store", + default = NA, + type = 'character', + help = "Path of the PDF output file to save plot to." + ) +) + +opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_plot_file')) +# Check parameter values + +if ( ! file.exists(opt$input_loom)){ + stop((paste('File', opt$input_loom, 'does not exist'))) +} + + +# Input from Loom format + +scle <- import(opt$input_loom, format='loom', type='SingleCellLoomExperiment') +scle <- normalize(scle, exprs_values = 1) +scle <- runTSNE(scle, perplexity=10) +plot <- plotTSNE(scle, colour_by = opt$colour_by, size_by = opt$size_by, shape_by = opt$shape_by) + + +ggsave(opt$output_plot_file, plot, device="pdf") |
b |
diff -r e6ca62ac65c6 -r b7ea9f09c02f test-data/scater_reads_genes_dist_log.pdf |
b |
Binary file test-data/scater_reads_genes_dist_log.pdf has changed |
b |
diff -r e6ca62ac65c6 -r b7ea9f09c02f test-data/scater_tsne_plot.pdf |
b |
Binary file test-data/scater_tsne_plot.pdf has changed |