Mercurial > repos > iuc > dropletutils
view scripts/dropletutils.Rscript @ 1:cfe1e6c28d95 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit 40d4a92c62d75fe931baba8657cde006a26d84cf"
author | iuc |
---|---|
date | Mon, 26 Aug 2019 05:06:39 -0400 |
parents | 4cd9f0008d9c |
children | a8aa294401be |
line wrap: on
line source
## Load in data args = commandArgs(trailingOnly = T) if (length(args) != 1){ stop("Please provide the config file") } suppressWarnings(suppressPackageStartupMessages(require(DropletUtils))) suppressWarnings(suppressPackageStartupMessages(require(Matrix))) suppressWarnings(suppressPackageStartupMessages(require(scater))) source(args[1]) ## Helper functions setSparse <- function(obj){ return(as(obj, "dgCMatrix")) } writeTSV <- function(fileout, obj){ write.table(as.matrix(obj), file=fileout, col.names=NA, sep='\t', quote=FALSE) } writeOut <- function(counts, fileout, typeout){ if (typeout == "tsv"){ writeTSV(fileout, counts) } else if (typeout == "h5ad"){ write10xCounts(fileout, counts, type="HDF5", overwrite=TRUE) } else if (typeout == "directory"){ write10xCounts(fileout, counts, type="sparse", overwrite=TRUE) } } read10xFiles <- function(filein, typein){ sce <- NULL if (typein == "tsv"){ ## Exploding memory problems occured here ## - solution is to use the readSparseCounts function from scater sce <- SingleCellExperiment(assays = list(counts = readSparseCounts(filein))) } else if (typein == "h5ad"){ sce <- read10xCounts(filein, col.names=T, type="HDF5") # use barcodes.tsv as column names } else if (typein == "directory"){ sce <- read10xCounts(filein, col.names=T, type="sparse") } counts(sce) <- setSparse(counts(sce)) return(sce) } ## Methods doEmptyDrops <- function(files, eparams, in.type="directory", out.type="h5ad", fdr_threshold = 0.01){ sce <- read10xFiles(files$infile, in.type) eparams$... <- NULL ## hack eparams$m = Matrix(counts(sce), sparse=TRUE) e.out <- do.call(emptyDrops, c(eparams)) bar.names <- colnames(sce) if (length(bar.names) != nrow(e.out)){ stop("Length of barcodes and output metrics don't match.") } e.out <- cbind(bar.names, e.out) e.out$is.Cell <- e.out$FDR <= fdr_threshold e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited # Write to table writeTSV(files$table, e.out) # Print to log print(table(Limited=e.out$Limited, Significant=e.out$is.Cell)) # Write to Plot png(files$plot) plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"), xlab="Total UMI count", ylab="-Log Probability") dev.off() # Filtered called <- e.out$is.CellAndLimited called[is.na(called)] <- FALSE # replace NA's with FALSE sce.filtered <- sce[,called] writeOut(counts(sce.filtered), files$out, out.type) } doDefaultDrops <- function(files, dparams, in.type="directory", out.type="h5ad"){ sce <- read10xFiles(files$infile, in.type) dparams$m = counts(sce) called <- do.call(defaultDrops, c(dparams)) print(table(called)) # Filtered sce.filtered <- sce[,called] writeOut(Matrix(counts(sce.filtered),sparse=TRUE), files$out, out.type) } doBarcodeRankings <- function(files, bparams, in.type="directory"){ sce <- read10xFiles(files$infile, in.type) bparams$... <- NULL ## hack bparams$m = counts(sce) br.out <- do.call(barcodeRanks, c(bparams)) png(files$plot) plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes") o <- order(br.out$rank) lines(br.out$rank[o], br.out$fitted[o], col="red") abline(h=br.out$knee, col="dodgerblue", lty=2) abline(h=br.out$inflection, col="forestgreen", lty=2) legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection")) dev.off() print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection)) } ## Main set.seed(seed.val) if (do.method == "barcodeRankings") { doBarcodeRankings(files, bparams, in.type) } else if (do.method == "defaultDrops") { doDefaultDrops(files, dparams, in.type, out.type) } else if (do.method == "emptyDrops") { doEmptyDrops(files, eparams, in.type, out.type, empty.fdr_threshold) }