comparison scripts/dropletutils.Rscript @ 2:a8aa294401be draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit 4d89eb1eb951ef094d1f77c46824d9c38be4445b"
author iuc
date Fri, 06 Sep 2019 10:56:16 -0400
parents cfe1e6c28d95
children f0de368eabca
comparison
equal deleted inserted replaced
1:cfe1e6c28d95 2:a8aa294401be
56 doEmptyDrops <- function(files, eparams, in.type="directory", out.type="h5ad", fdr_threshold = 0.01){ 56 doEmptyDrops <- function(files, eparams, in.type="directory", out.type="h5ad", fdr_threshold = 0.01){
57 sce <- read10xFiles(files$infile, in.type) 57 sce <- read10xFiles(files$infile, in.type)
58 58
59 eparams$... <- NULL ## hack 59 eparams$... <- NULL ## hack
60 eparams$m = Matrix(counts(sce), sparse=TRUE) 60 eparams$m = Matrix(counts(sce), sparse=TRUE)
61 61
62 e.out <- do.call(emptyDrops, c(eparams)) 62 e.out <- do.call(emptyDrops, c(eparams))
63 63
64 bar.names <- colnames(sce) 64 bar.names <- colnames(sce)
65 if (length(bar.names) != nrow(e.out)){ 65 if (length(bar.names) != nrow(e.out)){
66 stop("Length of barcodes and output metrics don't match.") 66 stop("Length of barcodes and output metrics don't match.")
67 } 67 }
68 e.out <- cbind(bar.names, e.out) 68 e.out <- cbind(bar.names, e.out)
69 e.out$is.Cell <- e.out$FDR <= fdr_threshold 69 e.out$is.Cell <- e.out$FDR <= fdr_threshold
70 e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited 70 e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited
71 71
72 # Write to table 72 ## Write to Plot
73 writeTSV(files$table, e.out) 73 e.out$is.Cell[is.na(e.out$is.Cell)] <- FALSE
74 74 xlim.dat <- e.out[complete.cases(e.out),]$Total
75 # Print to log 75
76 print(table(Limited=e.out$Limited, Significant=e.out$is.Cell)) 76 ## Write to table
77 77 writeTSV(files$table, e.out[complete.cases(e.out),])
78 # Write to Plot 78
79 png(files$plot) 79 png(files$plot)
80 plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"), 80 plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"),
81 xlab="Total UMI count", ylab="-Log Probability") 81 xlab="Total UMI count", ylab="-Log Probability",
82 xlim=c(min(xlim.dat),max(xlim.dat)))
82 dev.off() 83 dev.off()
83 84
84 # Filtered 85 ## Filtered
85 called <- e.out$is.CellAndLimited 86 called <- NULL
87 if (fdr_threshold != 0){
88 called <- e.out$is.CellAndLimited
89 } else {
90 called <- e.out$is.Cell
91 }
86 called[is.na(called)] <- FALSE # replace NA's with FALSE 92 called[is.na(called)] <- FALSE # replace NA's with FALSE
87 sce.filtered <- sce[,called] 93 sce.filtered <- sce[,called]
88 94
89 writeOut(counts(sce.filtered), files$out, out.type) 95 writeOut(counts(sce.filtered), files$out, out.type)
96
97 message(paste("Cells:", sum(na.omit(e.out$is.Cell))))
98 message(paste("Cells and Limited:", sum(na.omit(e.out$is.CellAndLimited))))
90 } 99 }
91 100
92 101
93 doDefaultDrops <- function(files, dparams, in.type="directory", out.type="h5ad"){ 102 doDefaultDrops <- function(files, dparams, in.type="directory", out.type="h5ad"){
94 sce <- read10xFiles(files$infile, in.type) 103 sce <- read10xFiles(files$infile, in.type)
95 104
96 dparams$m = counts(sce) 105 dparams$m = counts(sce)
97 called <- do.call(defaultDrops, c(dparams)) 106 called <- do.call(defaultDrops, c(dparams))
98 print(table(called)) 107
99
100 # Filtered 108 # Filtered
101 sce.filtered <- sce[,called] 109 sce.filtered <- sce[,called]
102 110
103 writeOut(Matrix(counts(sce.filtered),sparse=TRUE), files$out, out.type) 111 writeOut(Matrix(counts(sce.filtered),sparse=TRUE), files$out, out.type)
112
113 message(paste("Cells:", sum(called)))
104 } 114 }
105 115
106 116
107 doBarcodeRankings <- function(files, bparams, in.type="directory"){ 117 doBarcodeRankings <- function(files, bparams, in.type="directory"){
108 sce <- read10xFiles(files$infile, in.type) 118 sce <- read10xFiles(files$infile, in.type)
109 119
110 bparams$... <- NULL ## hack 120 bparams$... <- NULL ## hack
111 bparams$m = counts(sce) 121 bparams$m = counts(sce)
112 122
113 br.out <- do.call(barcodeRanks, c(bparams)) 123 br.out <- do.call(barcodeRanks, c(bparams))
114 124
115 png(files$plot) 125 png(files$plot)
116 plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes") 126 plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes")
117 o <- order(br.out$rank) 127 o <- order(br.out$rank)
118 lines(br.out$rank[o], br.out$fitted[o], col="red") 128 lines(br.out$rank[o], br.out$fitted[o], col="red")
119 129
120 abline(h=br.out$knee, col="dodgerblue", lty=2) 130 abline(h=br.out$knee, col="dodgerblue", lty=2)
121 abline(h=br.out$inflection, col="forestgreen", lty=2) 131 abline(h=br.out$inflection, col="forestgreen", lty=2)
122 legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection")) 132 legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection"))
123 dev.off() 133 dev.off()
124 134
125 print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection)) 135 print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection))
126 } 136 }
127 137
128 ## Main 138 ## Main
129 set.seed(seed.val) 139 set.seed(seed.val)