comparison scripts/cluster.R @ 6:41f34e925bd5 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit 53916f6803b93234f992f5fd4fad61d7013d82af"
author iuc
date Thu, 15 Apr 2021 18:59:31 +0000
parents 20f522154663
children f3eb2291da05
comparison
equal deleted inserted replaced
5:37a47c4fd84d 6:41f34e925bd5
1 #!/usr/bin/env R 1 #!/usr/bin/env R
2 VERSION = "0.5" 2 VERSION <- "0.5" # nolint
3 3
4 args = commandArgs(trailingOnly = T) 4 args <- commandArgs(trailingOnly = T)
5 5
6 if (length(args) != 1){ 6 if (length(args) != 1) {
7 message(paste("VERSION:", VERSION)) 7 message(paste("VERSION:", VERSION))
8 stop("Please provide the config file") 8 stop("Please provide the config file")
9 } 9 }
10 10
11 suppressWarnings(suppressPackageStartupMessages(require(RaceID))) 11 suppressWarnings(suppressPackageStartupMessages(require(RaceID)))
12 suppressWarnings(suppressPackageStartupMessages(require(scran))) 12 ## suppressWarnings(suppressPackageStartupMessages(require(scran))) # nolint
13 source(args[1]) 13 source(args[1])
14 14
15 15
16 do.filter <- function(sc){ 16 do.filter <- function(sc) { # nolint
17 if (!is.null(filt.lbatch.regexes)){ 17 if (!is.null(filt.lbatch.regexes)) {
18 lar <- filt.lbatch.regexes 18 lar <- filt.lbatch.regexes
19 nn <- colnames(sc@expdata) 19 nn <- colnames(sc@expdata)
20 filt$LBatch <- lapply(1:length(lar), function(m){ return( nn[grep(lar[[m]], nn)] ) }) 20 filt$LBatch <- lapply(1:length(lar), function(m) { # nolint
21 return(nn[grep(lar[[m]], nn)])})
21 } 22 }
22 23
23 sc <- do.call(filterdata, c(sc, filt)) 24 sc <- do.call(filterdata, c(sc, filt))
24 25
25 ## Get histogram metrics for library size and number of features 26 ## Get histogram metrics for library size and number of features
26 raw.lib <- log10(colSums(as.matrix(sc@expdata))) 27 raw_lib <- log10(colSums(as.matrix(sc@expdata)))
27 raw.feat <- log10(colSums(as.matrix(sc@expdata)>0)) 28 raw_feat <- log10(colSums(as.matrix(sc@expdata) > 0))
28 filt.lib <- log10(colSums(getfdata(sc))) 29 filt_lib <- log10(colSums(as.matrix(getfdata(sc))))
29 filt.feat <- log10(colSums(getfdata(sc)>0)) 30 filt_feat <- log10(colSums(as.matrix(getfdata(sc) > 0)))
30 31
31 if (filt.geqone){ 32 if (filt.geqone) {
32 filt.feat <- log10(colSums(getfdata(sc)>=1)) 33 filt_feat <- log10(colSums(as.matrix(getfdata(sc) >= 1))) # nolint
33 } 34 }
34 35
35 br <- 50 36 br <- 50
36 ## Determine limits on plots based on the unfiltered data 37 par(mfrow = c(2, 2))
37 ## (doesn't work, R rejects limits and norm data is too different to compare to exp data 38 print(hist(raw_lib, breaks = br, main = "RawData Log10 LibSize"))
38 ## so let them keep their own ranges) 39 print(hist(raw_feat, breaks = br, main = "RawData Log10 NumFeat"))
39 40 print(hist(filt_lib, breaks = br, main = "FiltData Log10 LibSize"))
40 ## betterrange <- function(floatval){ 41 tmp <- hist(filt_feat, breaks = br, main = "FiltData Log10 NumFeat")
41 ## return(10 * (floor(floatval / 10) + 1))
42 ## }
43
44 ## tmp.lib <- hist(raw.lib, breaks=br, plot=F)
45 ## tmp.feat <- hist(raw.feat, breaks=br, plot=F)
46
47 ## lib.y_lim <- c(0,betterrange(max(tmp.lib$counts)))
48 ## lib.x_lim <- c(0,betterrange(max(tmp.lib$breaks)))
49
50 ## feat.y_lim <- c(0,betterrange(max(tmp.feat$counts)))
51 ## feat.x_lim <- c(0,betterrange(max(tmp.feat$breaks)))
52
53 par(mfrow=c(2,2))
54 print(hist(raw.lib, breaks=br, main="RawData Log10 LibSize")) # , xlim=lib.x_lim, ylim=lib.y_lim)
55 print(hist(raw.feat, breaks=br, main="RawData Log10 NumFeat")) #, xlim=feat.x_lim, ylim=feat.y_lim)
56 print(hist(filt.lib, breaks=br, main="FiltData Log10 LibSize")) # , xlim=lib.x_lim, ylim=lib.y_lim)
57 tmp <- hist(filt.feat, breaks=br, main="FiltData Log10 NumFeat") # , xlim=feat.x_lim, ylim=feat.y_lim)
58 print(tmp) 42 print(tmp)
59 ## required, for extracting midpoint 43 ## required, for extracting midpoint
60 unq <- unique(filt.feat) 44 unq <- unique(filt_feat)
61 if (length(unq) == 1){ 45 if (length(unq) == 1) {
62 abline(v=unq, col="red", lw=2) 46 abline(v = unq, col = "red", lw = 2)
63 text(tmp$mids, table(filt.feat)[[1]] - 100, pos=1, paste(10^unq, "\nFeatures\nin remaining\nCells", sep=""), cex=0.8) 47 text(tmp$mids, table(filt_feat)[[1]] - 100, pos = 1,
48 paste(10^unq, "\nFeatures\nin remaining\nCells",
49 sep = ""), cex = 0.8)
64 } 50 }
65 51
66 if (filt.use.ccorrect){ 52 if (filt.use.ccorrect) {
67 par(mfrow=c(2,2)) 53 par(mfrow = c(2, 2))
68 sc <- do.call(CCcorrect, c(sc, filt.ccc)) 54 sc <- do.call(CCcorrect, c(sc, filt.ccc))
69 print(plotdimsat(sc, change=T)) 55 print(plotdimsat(sc, change = T))
70 print(plotdimsat(sc, change=F)) 56 print(plotdimsat(sc, change = F))
71 } 57 }
72 return(sc) 58 return(sc)
73 } 59 }
74 60
75 do.cluster <- function(sc){ 61 do.cluster <- function(sc) { # nolint
76 sc <- do.call(compdist, c(sc, clust.compdist)) 62 sc <- do.call(compdist, c(sc, clust.compdist))
77 sc <- do.call(clustexp, c(sc, clust.clustexp)) 63 sc <- do.call(clustexp, c(sc, clust.clustexp))
78 if (clust.clustexp$sat){ 64 if (clust.clustexp$sat) {
79 print(plotsaturation(sc, disp=F)) 65 print(plotsaturation(sc, disp = F))
80 print(plotsaturation(sc, disp=T)) 66 print(plotsaturation(sc, disp = T))
81 } 67 }
82 print(plotjaccard(sc)) 68 print(plotjaccard(sc))
83 return(sc) 69 return(sc)
84 } 70 }
85 71
86 do.outlier <- function(sc){ 72 do.outlier <- function(sc) { # nolint
87 sc <- do.call(findoutliers, c(sc, outlier.findoutliers)) 73 sc <- do.call(findoutliers, c(sc, outlier.findoutliers))
88 if (outlier.use.randomforest){ 74 if (outlier.use.randomforest) {
89 sc <- do.call(rfcorrect, c(sc, outlier.rfcorrect)) 75 sc <- do.call(rfcorrect, c(sc, outlier.rfcorrect))
90 } 76 }
91 print(plotbackground(sc)) 77 print(plotbackground(sc))
92 print(plotsensitivity(sc)) 78 print(plotsensitivity(sc))
93 print(plotoutlierprobs(sc)) 79 print(plotoutlierprobs(sc))
94 ## Heatmaps 80 ## Heatmaps
95 test1 <- list() 81 test1 <- list()
96 test1$side = 3 82 test1$side <- 3
97 test1$line = 0 #1 #3 83 test1$line <- 0 #1 #3
98 84
99 x <- clustheatmap(sc, final=FALSE) 85 x <- clustheatmap(sc, final = FALSE)
100 print(do.call(mtext, c(paste("(Initial)"), test1))) ## spacing is a hack 86 print(do.call(mtext, c(paste("(Initial)"), test1)))
101 x <- clustheatmap(sc, final=TRUE) 87 x <- clustheatmap(sc, final = TRUE)
102 print(do.call(mtext, c(paste("(Final)"), test1))) ## spacing is a hack 88 print(do.call(mtext, c(paste("(Final)"), test1)))
103 return(sc) 89 return(sc)
104 } 90 }
105 91
106 do.clustmap <- function(sc){ 92 do.clustmap <- function(sc) { # nolint
107 sc <- do.call(comptsne, c(sc, cluster.comptsne)) 93 sc <- do.call(comptsne, c(sc, cluster.comptsne))
108 sc <- do.call(compfr, c(sc, cluster.compfr)) 94 sc <- do.call(compfr, c(sc, cluster.compfr))
95 sc <- do.call(compumap, c(sc, cluster.compumap))
109 return(sc) 96 return(sc)
110 } 97 }
111 98
112 99
113 mkgenelist <- function(sc){ 100 mkgenelist <- function(sc) {
114 ## Layout 101 ## Layout
115 test <- list() 102 test <- list()
116 test$side = 3 103 test$side <- 4
117 test$line = 0 #1 #3 104 test$line <- -2
118 test$cex = 0.8 105 test$cex <- 0.8
119 106
120 df <- c() 107 df <- c()
121 options(cex = 1) 108 options(cex = 1)
122 lapply(unique(sc@cpart), function(n){ 109 plot.new()
123 dg <- clustdiffgenes(sc, cl=n, pvalue=genelist.pvalue) 110 lapply(unique(sc@cpart), function(n) {
111 dg <- clustdiffgenes(sc, cl = n, pvalue = genelist.pvalue)$dg
124 112
125 dg.goi <- dg[dg$fc > genelist.foldchange,] 113 dg_goi <- dg[dg$fc > genelist.foldchange, ]
126 dg.goi.table <- head(dg.goi, genelist.tablelim) 114 dg_goi_table <- head(dg_goi, genelist.tablelim)
127 df <<- rbind(df, cbind(n, dg.goi.table)) 115 df <<- rbind(df, cbind(n, dg_goi_table))
128 116
129 goi <- head(rownames(dg.goi.table), genelist.plotlim) 117 goi <- head(rownames(dg_goi_table), genelist.plotlim)
118
130 print(plotmarkergenes(sc, goi)) 119 print(plotmarkergenes(sc, goi))
131 buffer <- paste(rep("", 36), collapse=" ") 120 buffer <- paste(rep("", 36), collapse = " ")
132 print(do.call(mtext, c(paste(buffer, "Cluster ",n), test))) ## spacing is a hack 121 print(do.call(mtext, c(paste(buffer, "Cluster ", n), test)))
133 test$line=-1 122 test$line <- -1
134 print(do.call(mtext, c(paste(buffer, "Sig. Genes"), test))) ## spacing is a hack 123 print(do.call(mtext, c(paste(buffer, "Sig. Genes"), test)))
135 test$line=-2 124 test$line <- 0
136 print(do.call(mtext, c(paste(buffer, "(fc > ", genelist.foldchange,")"), test))) ## spacing is a hack 125 print(do.call(mtext, c(paste(buffer, "(fc > ",
137 126 genelist.foldchange, ")"), test)))
138 }) 127 })
139 write.table(df, file=out.genelist, sep="\t", quote=F) 128 write.table(df, file = out.genelist, sep = "\t", quote = F)
140 } 129 }
141 130
142 131
143 writecellassignments <- function(sc){ 132 writecellassignments <- function(sc) {
144 dat <- sc@cluster$kpart 133 dat <- sc@cluster$kpart
145 tab <- data.frame(row.names = NULL, 134 tab <- data.frame(row.names = NULL,
146 cells = names(dat), 135 cells = names(dat),
147 cluster.initial = dat, 136 cluster.initial = dat,
148 cluster.final = sc@cpart, 137 cluster.final = sc@cpart,
149 is.outlier = names(dat) %in% sc@out$out) 138 is.outlier = names(dat) %in% sc@out$out)
150 139
151 write.table(tab, file=out.assignments, sep="\t", quote=F, row.names = F) 140 write.table(tab, file = out.assignments, sep = "\t",
141 quote = F, row.names = F)
152 } 142 }
153 143
154 144
155 pdf(out.pdf) 145 pdf(out.pdf)
156 146
157 if (use.filtnormconf){ 147 if (use.filtnormconf) {
158 sc <- do.filter(sc) 148 sc <- do.filter(sc)
159 message(paste(" - Source:: genes:",nrow(sc@expdata),", cells:",ncol(sc@expdata))) 149 message(paste(" - Source:: genes:", nrow(sc@expdata),
160 message(paste(" - Filter:: genes:",nrow(getfdata(sc)),", cells:",ncol(getfdata(sc)))) 150 ", cells:", ncol(sc@expdata)))
151 message(paste(" - Filter:: genes:", nrow(as.matrix(getfdata(sc))),
152 ", cells:", ncol(as.matrix(getfdata(sc)))))
161 message(paste(" :: ", 153 message(paste(" :: ",
162 sprintf("%.1f", 100 * nrow(getfdata(sc))/nrow(sc@expdata)), "% of genes remain,", 154 sprintf("%.1f", 100 * nrow(as.matrix(
163 sprintf("%.1f", 100 * ncol(getfdata(sc))/ncol(sc@expdata)), "% of cells remain")) 155 getfdata(sc))) / nrow(sc@expdata)),
164 write.table(as.matrix(sc@ndata), file=out.table, col.names=NA, row.names=T, sep="\t", quote=F) 156 "% of genes remain,",
157 sprintf("%.1f", 100 * ncol(as.matrix(
158 getfdata(sc))) / ncol(sc@expdata)),
159 "% of cells remain"))
160 write.table(as.matrix(sc@ndata), file = out.table, col.names = NA,
161 row.names = T, sep = "\t", quote = F)
165 } 162 }
166 163
167 if (use.cluster){ 164 if (use.cluster) {
168 par(mfrow=c(2,2)) 165 par(mfrow = c(2, 2))
169 sc <- do.cluster(sc) 166 sc <- do.cluster(sc)
170 167
171 par(mfrow=c(2,2)) 168 par(mfrow = c(2, 2))
172 sc <- do.outlier(sc) 169 sc <- do.outlier(sc)
173 170
174 par(mfrow=c(2,2), mar=c(1,1,6,1)) 171 par(mfrow = c(2, 2), mar = c(1, 1, 6, 1))
175 sc <- do.clustmap(sc) 172 sc <- do.clustmap(sc)
176 173
177 mkgenelist(sc) 174 mkgenelist(sc)
178 writecellassignments(sc) 175 writecellassignments(sc)
179 } 176 }