Repository 'scater_filter'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/scater_filter

Changeset 1:b7ea9f09c02f (2019-09-03)
Previous changeset 0:e6ca62ac65c6 (2019-07-18) Next changeset 2:7a365ec81b52 (2021-09-09)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 61f3899168453092fd25691cf31871a3a350fd3b"
modified:
README.md
macros.xml
scater-filter.xml
scater-manual-filter.R
scater-plot-dist-scatter.R
added:
environment.yml
scater-plot-tsne.R
test-data/scater_reads_genes_dist_log.pdf
test-data/scater_tsne_plot.pdf
b
diff -r e6ca62ac65c6 -r b7ea9f09c02f README.md
--- a/README.md Thu Jul 18 11:13:41 2019 -0400
+++ b/README.md Tue Sep 03 14:27:39 2019 -0400
[
@@ -20,6 +20,13 @@
 
 ## Command-line usage
 
+The scripts require the installation of scater and few other R/BioConductor packages. An easy way to install them is to create a [conda](https://conda.io/) environment using the `environment.yml` file distributed together with these wrappers:
+
+```
+conda env create -f environment.yml
+conda activate scater
+```
+
 For help with any of the following scripts, run:
  `<script-name> --help`
 
b
diff -r e6ca62ac65c6 -r b7ea9f09c02f environment.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/environment.yml Tue Sep 03 14:27:39 2019 -0400
b
@@ -0,0 +1,14 @@
+name: scater
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconductor-loomexperiment=1.2.0
+  - bioconductor-scater=1.12.2
+  - r-ggpubr=0.2.2
+  - r-mvoutlier=2.0.9
+  - r-optparse=1.6.2
+  - r-rtsne=0.15
+  - r-scales=1.0.0
+  - r-workflowscriptscommon=0.0.4
b
diff -r e6ca62ac65c6 -r b7ea9f09c02f macros.xml
--- a/macros.xml Thu Jul 18 11:13:41 2019 -0400
+++ b/macros.xml Tue Sep 03 14:27:39 2019 -0400
b
@@ -1,11 +1,11 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.10.1</token>
+    <token name="@TOOL_VERSION@">1.12.2</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">bioconductor-scater</requirement>
             <requirement type="package" version="1.6.2">r-optparse</requirement>
             <requirement type="package" version="0.0.4">r-workflowscriptscommon</requirement>
-            <requirement type="package" version="1.0.4">bioconductor-loomexperiment</requirement>
+            <requirement type="package" version="1.2.0">bioconductor-loomexperiment</requirement>
             <yield />
         </requirements>
     </xml>
b
diff -r e6ca62ac65c6 -r b7ea9f09c02f scater-filter.xml
--- a/scater-filter.xml Thu Jul 18 11:13:41 2019 -0400
+++ b/scater-filter.xml Tue Sep 03 14:27:39 2019 -0400
b
@@ -16,6 +16,9 @@
     #if str($filter_type.library_size):
         --library-size ${filter_type.library_size}
     #end if
+    #if str($filter_type.expressed_genes):
+        --expressed-genes ${filter_type.expressed_genes}
+    #end if
     #if str($filter_type.percent_counts_MT):
         --percent-counts-MT ${filter_type.percent_counts_MT}
     #end if
@@ -35,6 +38,7 @@
             <when value="manual">
                 <param name="detection_limit" argument="--detection-limit" type="float" optional="true" label="Number of reads mapped to a gene for it to be counted as expressed" help="Raising this number will raise the stringency and may lower the number of expressed genes" />
                 <param name="library_size" argument="--library-size" type="integer" optional="true" label="Minimum library size (mapped reads) to filter cells on" help="Raising this number will raise the stringency and may lower the number of included cells" />
+                <param name="expressed_genes" argument="--expressed-genes" type="integer" optional="true" label="Minimum number of expressed genes to filter cells on" />
                 <param name="percent_counts_MT" argument="--percent-counts-MT" type="float" optional="true" label="Maximum % of mitochondrial genes expressed per cell" help="Cells that exceed this value will be filtered out" />
             </when>
             <when value="pca" />
@@ -49,6 +53,7 @@
             <param name="filter_type_selector" value="manual" />
             <param name="detection_limit" value="4" />
             <param name="library_size" value="100000" />
+            <param name="expressed_genes" value="400" />
             <param name="percent_counts_MT" value="33.0" />
             <output name="output_loom" file="scater_manual_filtered.loom" compare="sim_size" />
         </test>
b
diff -r e6ca62ac65c6 -r b7ea9f09c02f scater-manual-filter.R
--- a/scater-manual-filter.R Thu Jul 18 11:13:41 2019 -0400
+++ b/scater-manual-filter.R Tue Sep 03 14:27:39 2019 -0400
[
@@ -31,6 +31,13 @@
     help = "Minimum library size (mapped reads) to filter cells on"
   ),
   make_option(
+    c("-e", "--expressed-genes"),
+    action = "store",
+    default = 0,
+    type = 'numeric',
+    help = "Minimum number of expressed genes to filter cells on"
+  ),
+  make_option(
     c("-m", "--percent-counts-MT"),
     action = "store",
     default = 100,
@@ -71,6 +78,14 @@
 
 print(paste("After filtering out low library counts: ", ncol(scle), "cells and", nrow(scle), "features."))
 
+
+# Filter low expressed genes
+to_keep <- scle$total_features_by_counts > opt$expressed_genes
+scle <- scle[, to_keep]
+
+print(paste("After filtering out low expressed: ", ncol(scle), "cells and", nrow(scle), "features."))
+
+
 # Filter out high MT counts
 to_keep <- scle$pct_counts_MT < opt$percent_counts_MT
 scle <- scle[, to_keep]
b
diff -r e6ca62ac65c6 -r b7ea9f09c02f scater-plot-dist-scatter.R
--- a/scater-plot-dist-scatter.R Thu Jul 18 11:13:41 2019 -0400
+++ b/scater-plot-dist-scatter.R Tue Sep 03 14:27:39 2019 -0400
b
@@ -9,6 +9,7 @@
 library(LoomExperiment)
 library(scater)
 library(ggpubr)
+library(scales)
 
 # parse options
 
@@ -26,10 +27,17 @@
     default = NA,
     type = 'character',
     help = "Path of the PDF output file to save plot to."
+  ),
+  make_option(
+    c("-l", "--log-scale"),
+    action="store_true",
+    default=FALSE,
+    type = 'logical',
+    help = "Plot on log scale (recommended for large datasets)."
   )
 )
 
-opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_plot_file'))
+opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_plot_file', 'log_scale'))
 
 # Check parameter values
 
@@ -51,12 +59,21 @@
 read_bins <- max(total_counts / 1e6) / 20
 feat_bins <- max(total_features) / 20
 
-#make the plots
+# Make the plots
 plot <- ggplot(cf_dm, aes(x=total_counts / 1e6, y=total_features)) + geom_point(shape=1) + geom_smooth() + xlab("Read count (millions)") +
    ylab("Feature count") + ggtitle("Scatterplot of reads vs features")
 plot1 <- qplot(total_counts / 1e6, geom="histogram", binwidth = read_bins, ylab="Number of cells", xlab = "Read counts (millions)", fill=I("darkseagreen3")) + ggtitle("Read counts per cell")
 plot2 <- qplot(total_features, geom="histogram", binwidth = feat_bins, ylab="Number of cells", xlab = "Feature counts", fill=I("darkseagreen3")) + ggtitle("Feature counts per cell")
 plot3 <- plotColData(scle, y="pct_counts_MT", x="total_features_by_counts") + ggtitle("% MT genes") + geom_point(shape=1) + theme(text = element_text(size=15)) + theme(plot.title = element_text(size=15))
 
-final_plot <- ggarrange(plot1, plot2, plot, plot3, ncol=2, nrow=2)
-ggsave(opt$output_plot_file, final_plot, device="pdf")
+if (! opt$log_scale){
+  final_plot <- ggarrange(plot1, plot2, plot, plot3, ncol=2, nrow=2)
+  ggsave(opt$output_plot_file, final_plot, device="pdf")
+} else {
+  plot_log_both <- plot + scale_x_continuous(trans = 'log10') + scale_y_continuous(trans = 'log10')
+  plot1_log <- plot1 + scale_y_continuous(trans = 'log10')
+  plot2_log <- plot2 + scale_y_continuous(trans = 'log10')
+  plot3_log <- plot3 + scale_y_log10(labels=number)
+  final_plot_log <- ggarrange(plot1_log, plot2_log, plot_log_both, plot3_log, ncol=2, nrow=2)
+  ggsave(opt$output_plot_file, final_plot_log, device="pdf")
+}
b
diff -r e6ca62ac65c6 -r b7ea9f09c02f scater-plot-tsne.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scater-plot-tsne.R Tue Sep 03 14:27:39 2019 -0400
b
@@ -0,0 +1,69 @@
+#!/usr/bin/env Rscript
+
+# Creates a t-SNE plot of a normalised SingleCellExperiment object.
+
+# Load optparse we need to check inputs
+
+library(optparse)
+library(workflowscriptscommon)
+library(LoomExperiment)
+library(scater)
+library(Rtsne)
+
+# parse options
+
+option_list = list(
+  make_option(
+    c("-i", "--input-loom"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "A SingleCellExperiment object file in Loom format."
+  ),
+  make_option(
+    c("-c", "--colour-by"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "Feature (from annotation file) to colour t-SNE plot points by. The values represented in this options should be categorical"
+  ),
+  make_option(
+    c("-s", "--size-by"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "Feature (from annotation file) to size t-SNE plot points by. The values represented in this options should be numerical and not categorical"
+  ),
+  make_option(
+    c("-p", "--shape-by"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "Feature (from annotation file) to shape t-SNE plot points by. The values represented in this options should be categorical"
+  ),
+  make_option(
+    c("-o", "--output-plot-file"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "Path of the PDF output file to save plot to."
+  )
+)
+
+opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_plot_file'))
+# Check parameter values
+
+if ( ! file.exists(opt$input_loom)){
+  stop((paste('File', opt$input_loom, 'does not exist')))
+}
+
+
+# Input from Loom format
+
+scle <- import(opt$input_loom, format='loom', type='SingleCellLoomExperiment')
+scle <- normalize(scle, exprs_values = 1)
+scle <- runTSNE(scle, perplexity=10)
+plot <- plotTSNE(scle, colour_by = opt$colour_by, size_by = opt$size_by, shape_by = opt$shape_by)
+
+
+ggsave(opt$output_plot_file, plot, device="pdf")
b
diff -r e6ca62ac65c6 -r b7ea9f09c02f test-data/scater_reads_genes_dist_log.pdf
b
Binary file test-data/scater_reads_genes_dist_log.pdf has changed
b
diff -r e6ca62ac65c6 -r b7ea9f09c02f test-data/scater_tsne_plot.pdf
b
Binary file test-data/scater_tsne_plot.pdf has changed