Mercurial > repos > goeckslab > clustering_spatialge
diff spatialGE_clust.R @ 0:555ca19d07e6 draft default tip
planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author | goeckslab |
---|---|
date | Wed, 13 Aug 2025 19:32:19 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spatialGE_clust.R Wed Aug 13 19:32:19 2025 +0000 @@ -0,0 +1,125 @@ +# ------------------------------ +# Spatially-Informed Clustering +# ------------------------------ + +# Purpose: +# Group cells into tissue domains using hierarchical clustering on a weighted similarity matrix + +library(spatialGE) +library(optparse) +library(ggplot2) +library(tools) +library(dplyr) + +### Command Line Options + + +option_list <- list( + make_option(c("-f", "--file"), action = "store", default = NA, type = "character", + help = "STlist .rds file path from preprocessing"), + make_option(c("--visium"), action = "store_true", type = "logical", default = FALSE, + help = "Flag for Visium data"), + make_option(c("--raw"), action = "store_true", type = "logical", default = FALSE, + help = "Flag for raw data"), + make_option(c("--cosmx"), action = "store_true", type = "logical", default = FALSE, + help = "Flag for CosMX data"), + make_option(c("-s", "--samples"), action = "store", default = NULL, type = "character", + help = "Sample subset to perform clustering on"), + make_option(c("-w", "--weight"), action = "store", default = 0.025, type = "numeric", + help = "Weight to be applied to spatial distances between 0-1"), + make_option(c("-d", "--dist"), action = "store", default = "euclidean", type = "character", + help = "Distance metric to be used, all methods found in wordspace::dist.matrix"), + make_option(c("--dslogical"), action = "store_true", type = "logical", default = FALSE, + help = "Deepsplit logical flag"), + make_option(c("--logical"), action = "store", default = TRUE, type = "logical", + help = "Control cluster resolution, true will produce more clusters"), + make_option(c("--dsnumeric"), action = "store_true", type = "logical", default = FALSE, + help = "Deepsplit numeric flag"), + make_option(c("--numeric"), action = "store", default = 0, type = "numeric", + help = "control cluster resolution between 0-4, higher number will produce more clusters"), + make_option(c("-p", "--ptsize"), action = "store", default = 2.75, type = "numeric", + help = "Size of points on cluster plot") +) + +### Main + +# parse args +opt <- parse_args(OptionParser(option_list = option_list)) + +# read in ST data from spatialGE preprocessing +STdata <- readRDS(opt$file) + +message("Rds object successfully loaded") + +# if deepSplit flag is included, choose between logical or numeric +if (opt$logical) { + deepsplit <- opt$dslogical +} else if (opt$numeric) { + deepsplit <- opt$dsnumeric +} + +if (!is.null(opt$samples)) { + opt$samples <- strsplit(opt$samples, ",")[[1]] +} else if (is.null(opt$samples)) { + opt$samples <- NULL +} + +# perform data clustering on transformed data +clusters <- STclust(x = STdata, samples = opt$samples, ws = opt$weight, dist_metric = opt$dist, deepSplit = deepsplit) + +message("Unsupervised spatially-informed clustering has been performed") + +# transform S4 to list for easier slot access +S4toList <- function(obj) { + slot_names <- slotNames(obj) + structure(lapply(slot_names, slot, object = obj), names = slot_names) +} + +STdata <- S4toList(STdata) + +message("Transformed to S4 for slot name access") + +# depending on data input type, pull correct sample column +if (opt$visium) { + sample_col <- "sample_id" +} else if (opt$raw) { + sample_col <- "sampleID" +} else if (opt$cosmx) { + sample_col <- "sample_name" +} + + +if (!is.null(opt$samples)) { + samples <- STdata$sample_meta %>% pull(.data[[sample_col]]) %>% intersect(opt$samples) +} else { + samples <- STdata$sample_meta %>% pull(.data[[sample_col]]) +} + + +message("Sample names identified") + +# create cluster plot directory +if (!dir.exists("cluster_plots")) dir.create("cluster_plots") + +# iterate through each sample, generate a plot, and save +for (s in samples) { + + plot <- STplot(x = clusters, ws = opt$weight, ptsize = opt$ptsize, deepSplit = deepsplit, samples = s) + + message("Cluster plots generated") + + + #create unique plot file names based on sample name + + filename <- paste0("clustered_", s, ".png") + + #save plot to subdir + ggsave( + path = "./cluster_plots", + filename = filename, + bg = "white", + width = 12 + ) +} + +message("Cluster plots saved")