view scater-manual-filter.R @ 1:946179ef029c draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 61f3899168453092fd25691cf31871a3a350fd3b"
author iuc
date Tue, 03 Sep 2019 14:28:53 -0400
parents 87757f7b9974
children
line wrap: on
line source

#!/usr/bin/env Rscript
# Manually filter SingleCellExperiment with user-defined parameters

# Load optparse we need to check inputs
library(optparse)
library(workflowscriptscommon)
library(LoomExperiment)
library(scater)

# parse options
option_list = list(
  make_option(
    c("-i", "--input-loom"),
    action = "store",
    default = NA,
    type = 'character',
    help = "A SingleCellExperiment object file in Loom format."
  ),
  make_option(
    c("-d", "--detection-limit"),
    action = "store",
    default = 0,
    type = 'numeric',
    help = "Numeric scalar providing the value above which observations are deemed to be expressed"
  ),
  make_option(
    c("-l", "--library-size"),
    action = "store",
    default = 0,
    type = 'numeric',
    help = "Minimum library size (mapped reads) to filter cells on"
  ),
  make_option(
    c("-e", "--expressed-genes"),
    action = "store",
    default = 0,
    type = 'numeric',
    help = "Minimum number of expressed genes to filter cells on"
  ),
  make_option(
    c("-m", "--percent-counts-MT"),
    action = "store",
    default = 100,
    type = 'numeric',
    help = "Maximum % of mitochondrial genes expressed per cell. Cells that exceed this value will be filtered out."
  ),
  make_option(
    c("-o", "--output-loom"),
    action = "store",
    default = NA,
    type = 'character',
    help = "File name in which to store the SingleCellExperiment object in Loom format."
  )
)

opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_loom'))

# Check parameter values

if ( ! file.exists(opt$input_loom)){
  stop((paste('File', opt$input_loom, 'does not exist')))
}

# Filter out unexpressed features

scle <- import(opt$input_loom, format='loom', type='SingleCellLoomExperiment')
print(paste("Starting with", ncol(scle), "cells and", nrow(scle), "features."))

# Create a logical vector of features that are expressed (above detection_limit)
feature_expressed <- nexprs(scle, detection_limit = opt$detection_limit, exprs_values = 1, byrow=TRUE) > 0
scle <- scle[feature_expressed, ]

print(paste("After filtering out unexpressed features: ", ncol(scle), "cells and", nrow(scle), "features."))

# Filter low library sizes
to_keep <- scle$total_counts > opt$library_size
scle <- scle[, to_keep]

print(paste("After filtering out low library counts: ", ncol(scle), "cells and", nrow(scle), "features."))


# Filter low expressed genes
to_keep <- scle$total_features_by_counts > opt$expressed_genes
scle <- scle[, to_keep]

print(paste("After filtering out low expressed: ", ncol(scle), "cells and", nrow(scle), "features."))


# Filter out high MT counts
to_keep <- scle$pct_counts_MT < opt$percent_counts_MT
scle <- scle[, to_keep]

print(paste("After filtering out high MT gene counts: ", ncol(scle), "cells and", nrow(scle), "features."))

# Output to a Loom file
if (file.exists(opt$output_loom)) {
  file.remove(opt$output_loom)
}
export(scle, opt$output_loom, format='loom')