Mercurial > repos > iuc > dropletutils
view scripts/dropletutils.Rscript @ 6:8855361fcfc5 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit ed0625fe59342d14a08745996e3e32c6f922a738"
author | iuc |
---|---|
date | Thu, 10 Dec 2020 13:50:06 +0000 |
parents | cdf4443d5625 |
children | 2c1200fba922 |
line wrap: on
line source
## Load in data args <- commandArgs(trailingOnly = T) if (length(args) != 1) { stop("Please provide the config file") } suppressWarnings(suppressPackageStartupMessages(require(DropletUtils))) suppressWarnings(suppressPackageStartupMessages(require(Matrix))) suppressWarnings(suppressPackageStartupMessages(require(scater))) source(args[1]) ## Helper functions set_sparse <- function(obj) { return(as(obj, "dgCMatrix")) } write_tsv <- function(fileout, obj) { write.table(as.matrix(obj), file = fileout, col.names = NA, sep = "\t", quote = FALSE) } determine_geneids <- function(object) { if (!is.null(rowData(object)$Symbol)) { return(rowData(object)$Symbol) } return(rownames(object)) } get_counts <- function(object) { return(Matrix(counts(object), sparse = TRUE)) } write_out <- function(object, fileout, typeout) { if (typeout == "tsv") { write_tsv(fileout, get_counts(object)) } else if (typeout == "h5") { write10xCounts(fileout, get_counts(object), type = "HDF5", gene.symbol = determine_geneids(object), overwrite = TRUE) } else if (typeout == "directory") { write10xCounts(fileout, get_counts(object), type = "sparse", gene.symbol = determine_geneids(object), overwrite = TRUE) } } read_10x_files <- function(filein, typein) { sce <- NULL if (typein == "tsv") { ## Exploding memory problems occured here ## - solution is to use the readSparseCounts function from scater sce <- SingleCellExperiment(assays = list(counts = readSparseCounts(filein))) } else if (typein == "h5") { # use barcodes.tsv as column names sce <- read10xCounts(filein, col.names = T, type = "HDF5") } else if (typein == "directory") { sce <- read10xCounts(filein, col.names = T, type = "sparse") } counts(sce) <- set_sparse(counts(sce)) return(sce) } ## Methods do_empty_drops <- function(files, eparams, intype = "directory", outtype = "h5", fdr_threshold = 0.01) { sce <- read_10x_files(files$infile, intype) eparams$... <- NULL ## hack to remove other parameters from being eparams$m <- Matrix(counts(sce), sparse = TRUE) ## Determine sensible lowerbound m_stats <- summary(colSums(counts(sce))) print("Cell Library Size Distribution:") print(m_stats) if (m_stats["Min."] > eparams$lower) { print(paste0("CAUTION: Min. Lib. Size (", m_stats["Min."] , ") < requested lowerbound (", eparams$lower, ")")) message(paste0("Setting lowerbound to Mean: ", m_stats["Mean"])) eparams$lower <- m_stats["Mean"] } e_out <- do.call(emptyDrops, c(eparams)) bar_names <- colnames(sce) if (length(bar_names) != nrow(e_out)) { stop("Length of barcodes and output metrics don't match.") } e_out <- cbind(bar_names, e_out) e_out$is_cell <- e_out$FDR <= fdr_threshold e_out$is_cellandlimited <- e_out$is_cell & e_out$Limited ## Write to Plot e_out$is_cell[is.na(e_out$is_cell)] <- FALSE xlim_dat <- e_out[complete.cases(e_out), ]$Total ## Write to table write_tsv(files$table, e_out[complete.cases(e_out), ]) png(files$plot) plot(e_out$Total, -e_out$LogProb, col = ifelse(e_out$is_cell, "red", "black"), xlab = "Total UMI count", ylab = "-Log Probability", xlim = c(min(xlim_dat), max(xlim_dat))) dev.off() ## Filtered called <- NULL if (fdr_threshold != 0) { called <- e_out$is_cellandlimited } else { called <- e_out$is_cell } called[is.na(called)] <- FALSE # replace NA's with FALSE sce_filtered <- sce[, called] write_out(sce_filtered, files$out, outtype) message(paste("Cells:", sum(na.omit(e_out$is_cell)))) message(paste("Cells and Limited:", sum(na.omit(e_out$is_cellandlimited)))) } do_default_drops <- function(files, dparams, intype = "directory", outtype = "h5") { sce <- read_10x_files(files$infile, intype) dparams$m <- counts(sce) called <- do.call(defaultDrops, c(dparams)) # Filtered sce_filtered <- sce[, called] write_out(sce_filtered, files$out, outtype) message(paste("Cells:", sum(called))) } do_barcode_rankings <- function(files, bparams, intype = "directory") { sce <- read_10x_files(files$infile, intype) bparams$... <- NULL ## hack bparams$m <- counts(sce) brout <- do.call(barcodeRanks, c(bparams)) png(files$plot) plot(brout$rank, brout$total, log = "xy", xlab = "(log) Rank", ylab = "(log) Total Number of Barcodes") o <- order(brout$rank) lines(brout$rank[o], brout$fitted[o], col = "red") abline(h = brout$knee, col = "dodgerblue", lty = 2) abline(h = brout$inflection, col = "forestgreen", lty = 2) legend("bottomleft", lty = 2, col = c("dodgerblue", "forestgreen"), legend = c("knee", "inflection")) dev.off() print(paste("knee =", brout$knee, ", inflection = ", brout$inflection)) } ## Main set.seed(seed.val) if (do.method == "barcodeRankings") { do_barcode_rankings(files, bparams, intype) } else if (do.method == "defaultDrops") { do_default_drops(files, dparams, intype, outtype) } else if (do.method == "emptyDrops") { do_empty_drops(files, eparams, intype, outtype, empty_fdr_threshold) }