raceid_inspectclusters: scripts/cluster.R comparison

comparison scripts/cluster.R @ 6:41f34e925bd5 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit 53916f6803b93234f992f5fd4fad61d7013d82af"

author	iuc
date	Thu, 15 Apr 2021 18:59:31 +0000
parents	20f522154663
children	f3eb2291da05

comparison

equal deleted inserted replaced

-:37a47c4fd84d
+:41f34e925bd5
 #!/usr/bin/env R
-VERSION = "0.5"
+VERSION <- "0.5" # nolint
-args = commandArgs(trailingOnly = T)
+args <- commandArgs(trailingOnly = T)
-if (length(args) != 1){
+if (length(args) != 1) {
 message(paste("VERSION:", VERSION))
 stop("Please provide the config file")
 }
 suppressWarnings(suppressPackageStartupMessages(require(RaceID)))
-suppressWarnings(suppressPackageStartupMessages(require(scran)))
+## suppressWarnings(suppressPackageStartupMessages(require(scran)))  # nolint
 source(args[1])
-do.filter <- function(sc){
+do.filter <- function(sc) { # nolint
-if (!is.null(filt.lbatch.regexes)){
+if (!is.null(filt.lbatch.regexes)) {
 lar <- filt.lbatch.regexes
 nn <- colnames(sc@expdata)
-filt$LBatch <- lapply(1:length(lar), function(m){ return( nn[grep(lar[[m]], nn)] ) })
+filt$LBatch <- lapply(1:length(lar), function(m) {  # nolint
+return(nn[grep(lar[[m]], nn)])})
 }
 sc <- do.call(filterdata, c(sc, filt))
 ## Get histogram metrics for library size and number of features
-raw.lib <- log10(colSums(as.matrix(sc@expdata)))
+raw_lib <- log10(colSums(as.matrix(sc@expdata)))
-raw.feat <- log10(colSums(as.matrix(sc@expdata)>0))
+raw_feat <- log10(colSums(as.matrix(sc@expdata) > 0))
-filt.lib <- log10(colSums(getfdata(sc)))
+filt_lib <- log10(colSums(as.matrix(getfdata(sc))))
-filt.feat <- log10(colSums(getfdata(sc)>0))
+filt_feat <- log10(colSums(as.matrix(getfdata(sc) > 0)))
-if (filt.geqone){
+if (filt.geqone) {
-filt.feat <- log10(colSums(getfdata(sc)>=1))
+filt_feat <- log10(colSums(as.matrix(getfdata(sc) >= 1))) # nolint
 }
 br <- 50
-## Determine limits on plots based on the unfiltered data
+par(mfrow = c(2, 2))
-## (doesn't work, R rejects limits and norm data is too different to compare to exp data
+print(hist(raw_lib, breaks = br, main = "RawData Log10 LibSize"))
-##  so let them keep their own ranges)
+print(hist(raw_feat, breaks = br, main = "RawData Log10 NumFeat"))
+print(hist(filt_lib, breaks = br, main = "FiltData Log10 LibSize"))
-## betterrange <- function(floatval){
+tmp <- hist(filt_feat, breaks = br, main = "FiltData Log10 NumFeat")
-##     return(10 * (floor(floatval / 10) + 1))
-## }
-## tmp.lib <- hist(raw.lib, breaks=br, plot=F)
-## tmp.feat <- hist(raw.feat, breaks=br, plot=F)
-## lib.y_lim <- c(0,betterrange(max(tmp.lib$counts)))
-## lib.x_lim <- c(0,betterrange(max(tmp.lib$breaks)))
-## feat.y_lim <- c(0,betterrange(max(tmp.feat$counts)))
-## feat.x_lim <- c(0,betterrange(max(tmp.feat$breaks)))
-par(mfrow=c(2,2))
-print(hist(raw.lib, breaks=br, main="RawData Log10 LibSize")) # , xlim=lib.x_lim, ylim=lib.y_lim)
-print(hist(raw.feat, breaks=br, main="RawData Log10 NumFeat")) #, xlim=feat.x_lim, ylim=feat.y_lim)
-print(hist(filt.lib, breaks=br, main="FiltData Log10 LibSize")) # , xlim=lib.x_lim, ylim=lib.y_lim)
-tmp <- hist(filt.feat, breaks=br, main="FiltData Log10 NumFeat") # , xlim=feat.x_lim, ylim=feat.y_lim)
 print(tmp)
 ## required, for extracting midpoint
-unq <- unique(filt.feat)
+unq <- unique(filt_feat)
-if (length(unq) == 1){
+if (length(unq) == 1) {
-abline(v=unq, col="red", lw=2)
+abline(v = unq, col = "red", lw = 2)
-text(tmp$mids, table(filt.feat)[[1]] - 100, pos=1, paste(10^unq, "\nFeatures\nin remaining\nCells", sep=""), cex=0.8)
+text(tmp$mids, table(filt_feat)[[1]] - 100, pos = 1,
+paste(10^unq, "\nFeatures\nin remaining\nCells",
+sep = ""), cex = 0.8)
 }
-if (filt.use.ccorrect){
+if (filt.use.ccorrect) {
-par(mfrow=c(2,2))
+par(mfrow = c(2, 2))
 sc <- do.call(CCcorrect, c(sc, filt.ccc))
-print(plotdimsat(sc, change=T))
+print(plotdimsat(sc, change = T))
-print(plotdimsat(sc, change=F))
+print(plotdimsat(sc, change = F))
 }
 return(sc)
 }
-do.cluster <- function(sc){
+do.cluster <- function(sc) { # nolint
 sc <- do.call(compdist, c(sc, clust.compdist))
 sc <- do.call(clustexp, c(sc, clust.clustexp))
-if (clust.clustexp$sat){
+if (clust.clustexp$sat) {
-print(plotsaturation(sc, disp=F))
+print(plotsaturation(sc, disp = F))
-print(plotsaturation(sc, disp=T))
+print(plotsaturation(sc, disp = T))
 }
 print(plotjaccard(sc))
 return(sc)
 }
-do.outlier <- function(sc){
+do.outlier <- function(sc) { # nolint
 sc <- do.call(findoutliers, c(sc, outlier.findoutliers))
-if (outlier.use.randomforest){
+if (outlier.use.randomforest) {
 sc <- do.call(rfcorrect, c(sc, outlier.rfcorrect))
 }
 print(plotbackground(sc))
 print(plotsensitivity(sc))
 print(plotoutlierprobs(sc))
 ## Heatmaps
 test1 <- list()
-test1$side = 3
+test1$side <- 3
-test1$line = 0  #1 #3
+test1$line <- 0  #1 #3
-x <- clustheatmap(sc, final=FALSE)
+x <- clustheatmap(sc, final = FALSE)
-print(do.call(mtext, c(paste("(Initial)"), test1)))  ## spacing is a hack
+print(do.call(mtext, c(paste("(Initial)"), test1)))
-x <- clustheatmap(sc, final=TRUE)
+x <- clustheatmap(sc, final = TRUE)
-print(do.call(mtext, c(paste("(Final)"), test1)))  ## spacing is a hack
+print(do.call(mtext, c(paste("(Final)"), test1)))
 return(sc)
 }
-do.clustmap <- function(sc){
+do.clustmap <- function(sc) { # nolint
 sc <- do.call(comptsne, c(sc, cluster.comptsne))
 sc <- do.call(compfr, c(sc, cluster.compfr))
+sc <- do.call(compumap, c(sc, cluster.compumap))
 return(sc)
 }
-mkgenelist <- function(sc){
+mkgenelist <- function(sc) {
 ## Layout
 test <- list()
-test$side = 3
+test$side <- 4
-test$line = 0  #1 #3
+test$line <- -2
-test$cex = 0.8
+test$cex <- 0.8
 df <- c()
 options(cex = 1)
-lapply(unique(sc@cpart), function(n){
+plot.new()
-dg <- clustdiffgenes(sc, cl=n, pvalue=genelist.pvalue)
+lapply(unique(sc@cpart), function(n) {
+dg <- clustdiffgenes(sc, cl = n, pvalue = genelist.pvalue)$dg
-dg.goi <- dg[dg$fc > genelist.foldchange,]
+dg_goi <- dg[dg$fc > genelist.foldchange, ]
-dg.goi.table <- head(dg.goi, genelist.tablelim)
+dg_goi_table <- head(dg_goi, genelist.tablelim)
-df <<- rbind(df, cbind(n, dg.goi.table))
+df <<- rbind(df, cbind(n, dg_goi_table))
-goi <- head(rownames(dg.goi.table), genelist.plotlim)
+goi <- head(rownames(dg_goi_table), genelist.plotlim)
 print(plotmarkergenes(sc, goi))
-buffer <- paste(rep("", 36), collapse=" ")
+buffer <- paste(rep("", 36), collapse = " ")
-print(do.call(mtext, c(paste(buffer, "Cluster ",n), test)))  ## spacing is a hack
+print(do.call(mtext, c(paste(buffer, "Cluster ", n), test)))
-test$line=-1
+test$line <- -1
-print(do.call(mtext, c(paste(buffer, "Sig. Genes"), test)))  ## spacing is a hack
+print(do.call(mtext, c(paste(buffer, "Sig. Genes"), test)))
-test$line=-2
+test$line <- 0
-print(do.call(mtext, c(paste(buffer, "(fc > ", genelist.foldchange,")"), test)))  ## spacing is a hack
+print(do.call(mtext, c(paste(buffer, "(fc > ",
+genelist.foldchange, ")"), test)))
 })
-write.table(df, file=out.genelist, sep="\t", quote=F)
+write.table(df, file = out.genelist, sep = "\t", quote = F)
 }
-writecellassignments <- function(sc){
+writecellassignments <- function(sc) {
 dat <- sc@cluster$kpart
 tab <- data.frame(row.names = NULL,
 cells = names(dat),
 cluster.initial = dat,
 cluster.final = sc@cpart,
 is.outlier = names(dat) %in% sc@out$out)
-write.table(tab, file=out.assignments, sep="\t", quote=F, row.names = F)
+write.table(tab, file = out.assignments, sep = "\t",
+quote = F, row.names = F)
 }
 pdf(out.pdf)
-if (use.filtnormconf){
+if (use.filtnormconf) {
 sc <- do.filter(sc)
-message(paste(" - Source:: genes:",nrow(sc@expdata),", cells:",ncol(sc@expdata)))
+message(paste(" - Source:: genes:", nrow(sc@expdata),
-message(paste(" - Filter:: genes:",nrow(getfdata(sc)),", cells:",ncol(getfdata(sc))))
+", cells:", ncol(sc@expdata)))
+message(paste(" - Filter:: genes:", nrow(as.matrix(getfdata(sc))),
+", cells:", ncol(as.matrix(getfdata(sc)))))
 message(paste("         :: ",
-sprintf("%.1f", 100 * nrow(getfdata(sc))/nrow(sc@expdata)), "% of genes remain,",
+sprintf("%.1f", 100 * nrow(as.matrix(
-sprintf("%.1f", 100 * ncol(getfdata(sc))/ncol(sc@expdata)), "% of cells remain"))
+getfdata(sc))) / nrow(sc@expdata)),
-write.table(as.matrix(sc@ndata), file=out.table, col.names=NA, row.names=T, sep="\t", quote=F)
+"% of genes remain,",
+sprintf("%.1f", 100 * ncol(as.matrix(
+getfdata(sc))) / ncol(sc@expdata)),
+"% of cells remain"))
+write.table(as.matrix(sc@ndata), file = out.table, col.names = NA,
+row.names = T, sep = "\t", quote = F)
 }
-if (use.cluster){
+if (use.cluster) {
-par(mfrow=c(2,2))
+par(mfrow = c(2, 2))
 sc <- do.cluster(sc)
-par(mfrow=c(2,2))
+par(mfrow = c(2, 2))
 sc <- do.outlier(sc)
-par(mfrow=c(2,2), mar=c(1,1,6,1))
+par(mfrow = c(2, 2), mar = c(1, 1, 6, 1))
 sc <- do.clustmap(sc)
 mkgenelist(sc)
 writecellassignments(sc)
 }

Mercurial > repos > iuc > raceid_inspectclusters

comparison scripts/cluster.R @ 6:41f34e925bd5 draft