Mercurial > repos > iuc > dropletutils
comparison scripts/dropletutils.Rscript @ 6:8855361fcfc5 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit ed0625fe59342d14a08745996e3e32c6f922a738"
author | iuc |
---|---|
date | Thu, 10 Dec 2020 13:50:06 +0000 |
parents | cdf4443d5625 |
children | 2c1200fba922 |
comparison
equal
deleted
inserted
replaced
5:cdf4443d5625 | 6:8855361fcfc5 |
---|---|
1 ## Load in data | 1 ## Load in data |
2 args = commandArgs(trailingOnly = T) | 2 args <- commandArgs(trailingOnly = T) |
3 if (length(args) != 1){ | 3 if (length(args) != 1) { |
4 stop("Please provide the config file") | 4 stop("Please provide the config file") |
5 } | 5 } |
6 | 6 |
7 suppressWarnings(suppressPackageStartupMessages(require(DropletUtils))) | 7 suppressWarnings(suppressPackageStartupMessages(require(DropletUtils))) |
8 suppressWarnings(suppressPackageStartupMessages(require(Matrix))) | 8 suppressWarnings(suppressPackageStartupMessages(require(Matrix))) |
9 suppressWarnings(suppressPackageStartupMessages(require(scater))) | 9 suppressWarnings(suppressPackageStartupMessages(require(scater))) |
10 | 10 |
11 source(args[1]) | 11 source(args[1]) |
12 | 12 |
13 ## Helper functions | 13 ## Helper functions |
14 setSparse <- function(obj){ | 14 set_sparse <- function(obj) { |
15 return(as(obj, "dgCMatrix")) | 15 return(as(obj, "dgCMatrix")) |
16 } | 16 } |
17 | 17 |
18 writeTSV <- function(fileout, obj){ | 18 write_tsv <- function(fileout, obj) { |
19 write.table(as.matrix(obj), file=fileout, col.names=NA, sep='\t', quote=FALSE) | 19 write.table(as.matrix(obj), file = fileout, |
20 col.names = NA, sep = "\t", quote = FALSE) | |
20 } | 21 } |
21 | 22 |
22 determineGeneIDs <- function(object){ | 23 determine_geneids <- function(object) { |
23 if (!is.null(rowData(object)$Symbol)){ | 24 if (!is.null(rowData(object)$Symbol)) { |
24 return(rowData(object)$Symbol) | 25 return(rowData(object)$Symbol) |
25 } | 26 } |
26 return(rownames(object)) | 27 return(rownames(object)) |
27 } | 28 } |
28 | 29 |
29 getCounts <- function(object){ | 30 get_counts <- function(object) { |
30 return(Matrix(counts(object), sparse=TRUE)) | 31 return(Matrix(counts(object), sparse = TRUE)) |
31 } | 32 } |
32 | 33 |
33 writeOut <- function(object, fileout, typeout){ | 34 write_out <- function(object, fileout, typeout) { |
34 if (typeout == "tsv"){ | 35 if (typeout == "tsv") { |
35 writeTSV(fileout, getCounts(object)) | 36 write_tsv(fileout, get_counts(object)) |
36 } | 37 } |
37 else if (typeout == "h5"){ | 38 else if (typeout == "h5") { |
38 write10xCounts(fileout, getCounts(object), | 39 write10xCounts(fileout, get_counts(object), |
39 type="HDF5", | 40 type = "HDF5", |
40 gene.symbol=determineGeneIDs(object), | 41 gene.symbol = determine_geneids(object), |
41 overwrite=TRUE) | 42 overwrite = TRUE) |
42 } | 43 } |
43 else if (typeout == "directory"){ | 44 else if (typeout == "directory") { |
44 write10xCounts(fileout, getCounts(object), | 45 write10xCounts(fileout, get_counts(object), |
45 type="sparse", | 46 type = "sparse", |
46 gene.symbol=determineGeneIDs(object), | 47 gene.symbol = determine_geneids(object), |
47 overwrite=TRUE) | 48 overwrite = TRUE) |
48 } | 49 } |
49 } | 50 } |
50 | 51 |
51 read10xFiles <- function(filein, typein){ | 52 read_10x_files <- function(filein, typein) { |
52 sce <- NULL | 53 sce <- NULL |
53 if (typein == "tsv"){ | 54 if (typein == "tsv") { |
54 ## Exploding memory problems occured here | 55 ## Exploding memory problems occured here |
55 ## - solution is to use the readSparseCounts function from scater | 56 ## - solution is to use the readSparseCounts function from scater |
56 sce <- SingleCellExperiment(assays = list(counts = readSparseCounts(filein))) | 57 sce <- SingleCellExperiment(assays = |
58 list(counts = readSparseCounts(filein))) | |
57 } | 59 } |
58 else if (typein == "h5"){ | 60 else if (typein == "h5") { |
59 sce <- read10xCounts(filein, col.names=T, type="HDF5") # use barcodes.tsv as column names | 61 # use barcodes.tsv as column names |
62 sce <- read10xCounts(filein, col.names = T, type = "HDF5") | |
60 } | 63 } |
61 else if (typein == "directory"){ | 64 else if (typein == "directory") { |
62 sce <- read10xCounts(filein, col.names=T, type="sparse") | 65 sce <- read10xCounts(filein, col.names = T, type = "sparse") |
63 } | 66 } |
64 counts(sce) <- setSparse(counts(sce)) | 67 counts(sce) <- set_sparse(counts(sce)) |
65 return(sce) | 68 return(sce) |
66 } | 69 } |
67 | 70 |
68 | 71 |
69 ## Methods | 72 ## Methods |
70 | 73 |
71 | 74 |
72 doEmptyDrops <- function(files, eparams, in.type="directory", out.type="h5", fdr_threshold = 0.01){ | 75 do_empty_drops <- function(files, eparams, intype = "directory", |
73 sce <- read10xFiles(files$infile, in.type) | 76 outtype = "h5", fdr_threshold = 0.01) { |
77 sce <- read_10x_files(files$infile, intype) | |
74 | 78 |
75 eparams$... <- NULL ## hack | 79 eparams$... <- NULL ## hack to remove other parameters from being |
76 eparams$m = Matrix(counts(sce), sparse=TRUE) | 80 eparams$m <- Matrix(counts(sce), sparse = TRUE) |
77 | 81 |
78 e.out <- do.call(emptyDrops, c(eparams)) | 82 ## Determine sensible lowerbound |
83 m_stats <- summary(colSums(counts(sce))) | |
84 print("Cell Library Size Distribution:") | |
85 print(m_stats) | |
79 | 86 |
80 bar.names <- colnames(sce) | 87 if (m_stats["Min."] > eparams$lower) { |
81 if (length(bar.names) != nrow(e.out)){ | 88 print(paste0("CAUTION: Min. Lib. Size (", m_stats["Min."] |
89 , ") < requested lowerbound (", eparams$lower, ")")) | |
90 message(paste0("Setting lowerbound to Mean: ", m_stats["Mean"])) | |
91 eparams$lower <- m_stats["Mean"] | |
92 } | |
93 | |
94 e_out <- do.call(emptyDrops, c(eparams)) | |
95 | |
96 bar_names <- colnames(sce) | |
97 if (length(bar_names) != nrow(e_out)) { | |
82 stop("Length of barcodes and output metrics don't match.") | 98 stop("Length of barcodes and output metrics don't match.") |
83 } | 99 } |
84 e.out <- cbind(bar.names, e.out) | 100 e_out <- cbind(bar_names, e_out) |
85 e.out$is.Cell <- e.out$FDR <= fdr_threshold | 101 e_out$is_cell <- e_out$FDR <= fdr_threshold |
86 e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited | 102 e_out$is_cellandlimited <- e_out$is_cell & e_out$Limited |
87 | 103 |
88 ## Write to Plot | 104 ## Write to Plot |
89 e.out$is.Cell[is.na(e.out$is.Cell)] <- FALSE | 105 e_out$is_cell[is.na(e_out$is_cell)] <- FALSE |
90 xlim.dat <- e.out[complete.cases(e.out),]$Total | 106 xlim_dat <- e_out[complete.cases(e_out), ]$Total |
91 | 107 |
92 ## Write to table | 108 ## Write to table |
93 writeTSV(files$table, e.out[complete.cases(e.out),]) | 109 write_tsv(files$table, e_out[complete.cases(e_out), ]) |
94 | 110 |
95 png(files$plot) | 111 png(files$plot) |
96 plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"), | 112 plot(e_out$Total, -e_out$LogProb, col = ifelse(e_out$is_cell, |
97 xlab="Total UMI count", ylab="-Log Probability", | 113 "red", "black"), |
98 xlim=c(min(xlim.dat),max(xlim.dat))) | 114 xlab = "Total UMI count", ylab = "-Log Probability", |
115 xlim = c(min(xlim_dat), max(xlim_dat))) | |
99 dev.off() | 116 dev.off() |
100 | 117 |
101 ## Filtered | 118 ## Filtered |
102 called <- NULL | 119 called <- NULL |
103 if (fdr_threshold != 0){ | 120 if (fdr_threshold != 0) { |
104 called <- e.out$is.CellAndLimited | 121 called <- e_out$is_cellandlimited |
105 } else { | 122 } else { |
106 called <- e.out$is.Cell | 123 called <- e_out$is_cell |
107 } | 124 } |
108 called[is.na(called)] <- FALSE # replace NA's with FALSE | 125 called[is.na(called)] <- FALSE # replace NA's with FALSE |
109 sce.filtered <- sce[,called] | 126 sce_filtered <- sce[, called] |
110 | 127 |
111 writeOut(sce.filtered, files$out, out.type) | 128 write_out(sce_filtered, files$out, outtype) |
112 | 129 |
113 message(paste("Cells:", sum(na.omit(e.out$is.Cell)))) | 130 message(paste("Cells:", sum(na.omit(e_out$is_cell)))) |
114 message(paste("Cells and Limited:", sum(na.omit(e.out$is.CellAndLimited)))) | 131 message(paste("Cells and Limited:", sum(na.omit(e_out$is_cellandlimited)))) |
115 } | 132 } |
116 | 133 |
117 | 134 |
118 doDefaultDrops <- function(files, dparams, in.type="directory", out.type="h5"){ | 135 do_default_drops <- function(files, dparams, |
119 sce <- read10xFiles(files$infile, in.type) | 136 intype = "directory", outtype = "h5") { |
137 sce <- read_10x_files(files$infile, intype) | |
120 | 138 |
121 dparams$m = counts(sce) | 139 dparams$m <- counts(sce) |
122 called <- do.call(defaultDrops, c(dparams)) | 140 called <- do.call(defaultDrops, c(dparams)) |
123 | 141 |
124 # Filtered | 142 # Filtered |
125 sce.filtered <- sce[,called] | 143 sce_filtered <- sce[, called] |
126 | 144 |
127 writeOut(sce.filtered, files$out, out.type) | 145 write_out(sce_filtered, files$out, outtype) |
128 | 146 |
129 message(paste("Cells:", sum(called))) | 147 message(paste("Cells:", sum(called))) |
130 } | 148 } |
131 | 149 |
132 | 150 do_barcode_rankings <- function(files, bparams, intype = "directory") { |
133 doBarcodeRankings <- function(files, bparams, in.type="directory"){ | 151 sce <- read_10x_files(files$infile, intype) |
134 sce <- read10xFiles(files$infile, in.type) | |
135 | 152 |
136 bparams$... <- NULL ## hack | 153 bparams$... <- NULL ## hack |
137 bparams$m = counts(sce) | 154 bparams$m <- counts(sce) |
138 | 155 |
139 br.out <- do.call(barcodeRanks, c(bparams)) | 156 brout <- do.call(barcodeRanks, c(bparams)) |
140 | 157 |
141 png(files$plot) | 158 png(files$plot) |
142 plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes") | 159 plot(brout$rank, brout$total, log = "xy", |
143 o <- order(br.out$rank) | 160 xlab = "(log) Rank", ylab = "(log) Total Number of Barcodes") |
144 lines(br.out$rank[o], br.out$fitted[o], col="red") | 161 o <- order(brout$rank) |
162 lines(brout$rank[o], brout$fitted[o], col = "red") | |
145 | 163 |
146 abline(h=br.out$knee, col="dodgerblue", lty=2) | 164 abline(h = brout$knee, col = "dodgerblue", lty = 2) |
147 abline(h=br.out$inflection, col="forestgreen", lty=2) | 165 abline(h = brout$inflection, col = "forestgreen", lty = 2) |
148 legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection")) | 166 legend("bottomleft", lty = 2, col = c("dodgerblue", "forestgreen"), |
167 legend = c("knee", "inflection")) | |
149 dev.off() | 168 dev.off() |
150 | 169 |
151 print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection)) | 170 print(paste("knee =", brout$knee, ", inflection = ", brout$inflection)) |
152 } | 171 } |
153 | 172 |
154 ## Main | 173 ## Main |
155 set.seed(seed.val) | 174 set.seed(seed.val) |
156 | 175 |
157 if (do.method == "barcodeRankings") { | 176 if (do.method == "barcodeRankings") { |
158 doBarcodeRankings(files, bparams, in.type) | 177 do_barcode_rankings(files, bparams, intype) |
159 | 178 |
160 } else if (do.method == "defaultDrops") { | 179 } else if (do.method == "defaultDrops") { |
161 doDefaultDrops(files, dparams, in.type, out.type) | 180 do_default_drops(files, dparams, intype, outtype) |
162 | 181 |
163 } else if (do.method == "emptyDrops") { | 182 } else if (do.method == "emptyDrops") { |
164 doEmptyDrops(files, eparams, in.type, out.type, empty.fdr_threshold) | 183 do_empty_drops(files, eparams, intype, outtype, empty_fdr_threshold) |
165 } | 184 } |