view scripts/dropletutils.Rscript @ 3:f0de368eabca draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit a4cee4cac5097029188d836d5b904b605dbb943d"
author iuc
date Tue, 15 Oct 2019 09:03:56 -0400
parents a8aa294401be
children cdf4443d5625
line wrap: on
line source

## Load in data
args = commandArgs(trailingOnly = T)
if (length(args) != 1){
    stop("Please provide the config file")
}

suppressWarnings(suppressPackageStartupMessages(require(DropletUtils)))
suppressWarnings(suppressPackageStartupMessages(require(Matrix)))
suppressWarnings(suppressPackageStartupMessages(require(scater)))

source(args[1])

## Helper functions
setSparse <- function(obj){
    return(as(obj, "dgCMatrix"))
}

writeTSV <- function(fileout, obj){
    write.table(as.matrix(obj), file=fileout, col.names=NA, sep='\t', quote=FALSE)
}

determineGeneIDs <- function(object){
    if (!is.null(rowData(object)$Symbol)){
        return(rowData(object)$Symbol)
    }
    return(rownames(object))
}

getCounts <- function(object){
    return(Matrix(counts(object), sparse=TRUE))
}

writeOut <- function(object, fileout, typeout){
    if (typeout == "tsv"){
        writeTSV(fileout, getCounts(object))
    }
    else if (typeout == "h5ad"){
        write10xCounts(fileout, getCounts(object),
                       type="HDF5",
                       gene.symbol=determineGeneIDs(object),
                       overwrite=TRUE)
    }
    else if (typeout == "directory"){
        write10xCounts(fileout, getCounts(object),
                       type="sparse",
                       gene.symbol=determineGeneIDs(object),
                       overwrite=TRUE)
    }
}

read10xFiles <- function(filein, typein){
    sce <- NULL
    if (typein == "tsv"){
        ## Exploding memory problems occured here
        ## - solution is to use the readSparseCounts function from scater
        sce <- SingleCellExperiment(assays = list(counts = readSparseCounts(filein)))
    }
    else if (typein == "h5ad"){
        sce <- read10xCounts(filein, col.names=T, type="HDF5")   # use barcodes.tsv as column names
    }
    else if (typein == "directory"){
        sce <- read10xCounts(filein, col.names=T, type="sparse")
    }
    counts(sce) <- setSparse(counts(sce))
    return(sce)
}


## Methods


doEmptyDrops <- function(files, eparams, in.type="directory", out.type="h5ad", fdr_threshold = 0.01){
    sce <- read10xFiles(files$infile, in.type)

    eparams$... <- NULL ## hack
    eparams$m = Matrix(counts(sce), sparse=TRUE)

    e.out <- do.call(emptyDrops, c(eparams))

    bar.names <- colnames(sce)
    if (length(bar.names) != nrow(e.out)){
        stop("Length of barcodes and output metrics don't match.")
    }
    e.out <- cbind(bar.names, e.out)
    e.out$is.Cell <- e.out$FDR <= fdr_threshold
    e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited

    ## Write to Plot
    e.out$is.Cell[is.na(e.out$is.Cell)] <- FALSE
    xlim.dat <- e.out[complete.cases(e.out),]$Total

    ## Write to table
    writeTSV(files$table, e.out[complete.cases(e.out),])

    png(files$plot)
    plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"),
         xlab="Total UMI count", ylab="-Log Probability",
         xlim=c(min(xlim.dat),max(xlim.dat)))
    dev.off()

    ## Filtered
    called <- NULL
    if (fdr_threshold != 0){
        called <- e.out$is.CellAndLimited
    } else {
        called <- e.out$is.Cell
    }
    called[is.na(called)] <- FALSE    # replace NA's with FALSE
    sce.filtered <- sce[,called]

    writeOut(sce.filtered, files$out, out.type)

    message(paste("Cells:", sum(na.omit(e.out$is.Cell))))
    message(paste("Cells and Limited:", sum(na.omit(e.out$is.CellAndLimited))))
}


doDefaultDrops <- function(files, dparams, in.type="directory", out.type="h5ad"){
    sce <- read10xFiles(files$infile, in.type)

    dparams$m = counts(sce)
    called <- do.call(defaultDrops, c(dparams))

    # Filtered
    sce.filtered <- sce[,called]

    writeOut(sce.filtered, files$out, out.type)

    message(paste("Cells:", sum(called)))
}


doBarcodeRankings <- function(files, bparams, in.type="directory"){
    sce <- read10xFiles(files$infile, in.type)

    bparams$... <- NULL ## hack
    bparams$m = counts(sce)

    br.out <- do.call(barcodeRanks, c(bparams))

    png(files$plot)
    plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes")
    o <- order(br.out$rank)
    lines(br.out$rank[o], br.out$fitted[o], col="red")

    abline(h=br.out$knee, col="dodgerblue", lty=2)
    abline(h=br.out$inflection, col="forestgreen", lty=2)
    legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection"))
    dev.off()

    print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection))
}

## Main
set.seed(seed.val)

if (do.method == "barcodeRankings") {
    doBarcodeRankings(files, bparams, in.type)

} else if (do.method == "defaultDrops") {
    doDefaultDrops(files, dparams, in.type, out.type)

} else if (do.method == "emptyDrops") {
    doEmptyDrops(files, eparams, in.type, out.type, empty.fdr_threshold)
}