comparison scripts/dropletutils.Rscript @ 6:8855361fcfc5 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit ed0625fe59342d14a08745996e3e32c6f922a738"
author iuc
date Thu, 10 Dec 2020 13:50:06 +0000
parents cdf4443d5625
children 2c1200fba922
comparison
equal deleted inserted replaced
5:cdf4443d5625 6:8855361fcfc5
1 ## Load in data 1 ## Load in data
2 args = commandArgs(trailingOnly = T) 2 args <- commandArgs(trailingOnly = T)
3 if (length(args) != 1){ 3 if (length(args) != 1) {
4 stop("Please provide the config file") 4 stop("Please provide the config file")
5 } 5 }
6 6
7 suppressWarnings(suppressPackageStartupMessages(require(DropletUtils))) 7 suppressWarnings(suppressPackageStartupMessages(require(DropletUtils)))
8 suppressWarnings(suppressPackageStartupMessages(require(Matrix))) 8 suppressWarnings(suppressPackageStartupMessages(require(Matrix)))
9 suppressWarnings(suppressPackageStartupMessages(require(scater))) 9 suppressWarnings(suppressPackageStartupMessages(require(scater)))
10 10
11 source(args[1]) 11 source(args[1])
12 12
13 ## Helper functions 13 ## Helper functions
14 setSparse <- function(obj){ 14 set_sparse <- function(obj) {
15 return(as(obj, "dgCMatrix")) 15 return(as(obj, "dgCMatrix"))
16 } 16 }
17 17
18 writeTSV <- function(fileout, obj){ 18 write_tsv <- function(fileout, obj) {
19 write.table(as.matrix(obj), file=fileout, col.names=NA, sep='\t', quote=FALSE) 19 write.table(as.matrix(obj), file = fileout,
20 col.names = NA, sep = "\t", quote = FALSE)
20 } 21 }
21 22
22 determineGeneIDs <- function(object){ 23 determine_geneids <- function(object) {
23 if (!is.null(rowData(object)$Symbol)){ 24 if (!is.null(rowData(object)$Symbol)) {
24 return(rowData(object)$Symbol) 25 return(rowData(object)$Symbol)
25 } 26 }
26 return(rownames(object)) 27 return(rownames(object))
27 } 28 }
28 29
29 getCounts <- function(object){ 30 get_counts <- function(object) {
30 return(Matrix(counts(object), sparse=TRUE)) 31 return(Matrix(counts(object), sparse = TRUE))
31 } 32 }
32 33
33 writeOut <- function(object, fileout, typeout){ 34 write_out <- function(object, fileout, typeout) {
34 if (typeout == "tsv"){ 35 if (typeout == "tsv") {
35 writeTSV(fileout, getCounts(object)) 36 write_tsv(fileout, get_counts(object))
36 } 37 }
37 else if (typeout == "h5"){ 38 else if (typeout == "h5") {
38 write10xCounts(fileout, getCounts(object), 39 write10xCounts(fileout, get_counts(object),
39 type="HDF5", 40 type = "HDF5",
40 gene.symbol=determineGeneIDs(object), 41 gene.symbol = determine_geneids(object),
41 overwrite=TRUE) 42 overwrite = TRUE)
42 } 43 }
43 else if (typeout == "directory"){ 44 else if (typeout == "directory") {
44 write10xCounts(fileout, getCounts(object), 45 write10xCounts(fileout, get_counts(object),
45 type="sparse", 46 type = "sparse",
46 gene.symbol=determineGeneIDs(object), 47 gene.symbol = determine_geneids(object),
47 overwrite=TRUE) 48 overwrite = TRUE)
48 } 49 }
49 } 50 }
50 51
51 read10xFiles <- function(filein, typein){ 52 read_10x_files <- function(filein, typein) {
52 sce <- NULL 53 sce <- NULL
53 if (typein == "tsv"){ 54 if (typein == "tsv") {
54 ## Exploding memory problems occured here 55 ## Exploding memory problems occured here
55 ## - solution is to use the readSparseCounts function from scater 56 ## - solution is to use the readSparseCounts function from scater
56 sce <- SingleCellExperiment(assays = list(counts = readSparseCounts(filein))) 57 sce <- SingleCellExperiment(assays =
58 list(counts = readSparseCounts(filein)))
57 } 59 }
58 else if (typein == "h5"){ 60 else if (typein == "h5") {
59 sce <- read10xCounts(filein, col.names=T, type="HDF5") # use barcodes.tsv as column names 61 # use barcodes.tsv as column names
62 sce <- read10xCounts(filein, col.names = T, type = "HDF5")
60 } 63 }
61 else if (typein == "directory"){ 64 else if (typein == "directory") {
62 sce <- read10xCounts(filein, col.names=T, type="sparse") 65 sce <- read10xCounts(filein, col.names = T, type = "sparse")
63 } 66 }
64 counts(sce) <- setSparse(counts(sce)) 67 counts(sce) <- set_sparse(counts(sce))
65 return(sce) 68 return(sce)
66 } 69 }
67 70
68 71
69 ## Methods 72 ## Methods
70 73
71 74
72 doEmptyDrops <- function(files, eparams, in.type="directory", out.type="h5", fdr_threshold = 0.01){ 75 do_empty_drops <- function(files, eparams, intype = "directory",
73 sce <- read10xFiles(files$infile, in.type) 76 outtype = "h5", fdr_threshold = 0.01) {
77 sce <- read_10x_files(files$infile, intype)
74 78
75 eparams$... <- NULL ## hack 79 eparams$... <- NULL ## hack to remove other parameters from being
76 eparams$m = Matrix(counts(sce), sparse=TRUE) 80 eparams$m <- Matrix(counts(sce), sparse = TRUE)
77 81
78 e.out <- do.call(emptyDrops, c(eparams)) 82 ## Determine sensible lowerbound
83 m_stats <- summary(colSums(counts(sce)))
84 print("Cell Library Size Distribution:")
85 print(m_stats)
79 86
80 bar.names <- colnames(sce) 87 if (m_stats["Min."] > eparams$lower) {
81 if (length(bar.names) != nrow(e.out)){ 88 print(paste0("CAUTION: Min. Lib. Size (", m_stats["Min."]
89 , ") < requested lowerbound (", eparams$lower, ")"))
90 message(paste0("Setting lowerbound to Mean: ", m_stats["Mean"]))
91 eparams$lower <- m_stats["Mean"]
92 }
93
94 e_out <- do.call(emptyDrops, c(eparams))
95
96 bar_names <- colnames(sce)
97 if (length(bar_names) != nrow(e_out)) {
82 stop("Length of barcodes and output metrics don't match.") 98 stop("Length of barcodes and output metrics don't match.")
83 } 99 }
84 e.out <- cbind(bar.names, e.out) 100 e_out <- cbind(bar_names, e_out)
85 e.out$is.Cell <- e.out$FDR <= fdr_threshold 101 e_out$is_cell <- e_out$FDR <= fdr_threshold
86 e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited 102 e_out$is_cellandlimited <- e_out$is_cell & e_out$Limited
87 103
88 ## Write to Plot 104 ## Write to Plot
89 e.out$is.Cell[is.na(e.out$is.Cell)] <- FALSE 105 e_out$is_cell[is.na(e_out$is_cell)] <- FALSE
90 xlim.dat <- e.out[complete.cases(e.out),]$Total 106 xlim_dat <- e_out[complete.cases(e_out), ]$Total
91 107
92 ## Write to table 108 ## Write to table
93 writeTSV(files$table, e.out[complete.cases(e.out),]) 109 write_tsv(files$table, e_out[complete.cases(e_out), ])
94 110
95 png(files$plot) 111 png(files$plot)
96 plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"), 112 plot(e_out$Total, -e_out$LogProb, col = ifelse(e_out$is_cell,
97 xlab="Total UMI count", ylab="-Log Probability", 113 "red", "black"),
98 xlim=c(min(xlim.dat),max(xlim.dat))) 114 xlab = "Total UMI count", ylab = "-Log Probability",
115 xlim = c(min(xlim_dat), max(xlim_dat)))
99 dev.off() 116 dev.off()
100 117
101 ## Filtered 118 ## Filtered
102 called <- NULL 119 called <- NULL
103 if (fdr_threshold != 0){ 120 if (fdr_threshold != 0) {
104 called <- e.out$is.CellAndLimited 121 called <- e_out$is_cellandlimited
105 } else { 122 } else {
106 called <- e.out$is.Cell 123 called <- e_out$is_cell
107 } 124 }
108 called[is.na(called)] <- FALSE # replace NA's with FALSE 125 called[is.na(called)] <- FALSE # replace NA's with FALSE
109 sce.filtered <- sce[,called] 126 sce_filtered <- sce[, called]
110 127
111 writeOut(sce.filtered, files$out, out.type) 128 write_out(sce_filtered, files$out, outtype)
112 129
113 message(paste("Cells:", sum(na.omit(e.out$is.Cell)))) 130 message(paste("Cells:", sum(na.omit(e_out$is_cell))))
114 message(paste("Cells and Limited:", sum(na.omit(e.out$is.CellAndLimited)))) 131 message(paste("Cells and Limited:", sum(na.omit(e_out$is_cellandlimited))))
115 } 132 }
116 133
117 134
118 doDefaultDrops <- function(files, dparams, in.type="directory", out.type="h5"){ 135 do_default_drops <- function(files, dparams,
119 sce <- read10xFiles(files$infile, in.type) 136 intype = "directory", outtype = "h5") {
137 sce <- read_10x_files(files$infile, intype)
120 138
121 dparams$m = counts(sce) 139 dparams$m <- counts(sce)
122 called <- do.call(defaultDrops, c(dparams)) 140 called <- do.call(defaultDrops, c(dparams))
123 141
124 # Filtered 142 # Filtered
125 sce.filtered <- sce[,called] 143 sce_filtered <- sce[, called]
126 144
127 writeOut(sce.filtered, files$out, out.type) 145 write_out(sce_filtered, files$out, outtype)
128 146
129 message(paste("Cells:", sum(called))) 147 message(paste("Cells:", sum(called)))
130 } 148 }
131 149
132 150 do_barcode_rankings <- function(files, bparams, intype = "directory") {
133 doBarcodeRankings <- function(files, bparams, in.type="directory"){ 151 sce <- read_10x_files(files$infile, intype)
134 sce <- read10xFiles(files$infile, in.type)
135 152
136 bparams$... <- NULL ## hack 153 bparams$... <- NULL ## hack
137 bparams$m = counts(sce) 154 bparams$m <- counts(sce)
138 155
139 br.out <- do.call(barcodeRanks, c(bparams)) 156 brout <- do.call(barcodeRanks, c(bparams))
140 157
141 png(files$plot) 158 png(files$plot)
142 plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes") 159 plot(brout$rank, brout$total, log = "xy",
143 o <- order(br.out$rank) 160 xlab = "(log) Rank", ylab = "(log) Total Number of Barcodes")
144 lines(br.out$rank[o], br.out$fitted[o], col="red") 161 o <- order(brout$rank)
162 lines(brout$rank[o], brout$fitted[o], col = "red")
145 163
146 abline(h=br.out$knee, col="dodgerblue", lty=2) 164 abline(h = brout$knee, col = "dodgerblue", lty = 2)
147 abline(h=br.out$inflection, col="forestgreen", lty=2) 165 abline(h = brout$inflection, col = "forestgreen", lty = 2)
148 legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection")) 166 legend("bottomleft", lty = 2, col = c("dodgerblue", "forestgreen"),
167 legend = c("knee", "inflection"))
149 dev.off() 168 dev.off()
150 169
151 print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection)) 170 print(paste("knee =", brout$knee, ", inflection = ", brout$inflection))
152 } 171 }
153 172
154 ## Main 173 ## Main
155 set.seed(seed.val) 174 set.seed(seed.val)
156 175
157 if (do.method == "barcodeRankings") { 176 if (do.method == "barcodeRankings") {
158 doBarcodeRankings(files, bparams, in.type) 177 do_barcode_rankings(files, bparams, intype)
159 178
160 } else if (do.method == "defaultDrops") { 179 } else if (do.method == "defaultDrops") {
161 doDefaultDrops(files, dparams, in.type, out.type) 180 do_default_drops(files, dparams, intype, outtype)
162 181
163 } else if (do.method == "emptyDrops") { 182 } else if (do.method == "emptyDrops") {
164 doEmptyDrops(files, eparams, in.type, out.type, empty.fdr_threshold) 183 do_empty_drops(files, eparams, intype, outtype, empty_fdr_threshold)
165 } 184 }