diff spatialGE_clust.R @ 0:555ca19d07e6 draft default tip

planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author goeckslab
date Wed, 13 Aug 2025 19:32:19 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spatialGE_clust.R	Wed Aug 13 19:32:19 2025 +0000
@@ -0,0 +1,125 @@
+# ------------------------------
+# Spatially-Informed Clustering
+# ------------------------------
+
+# Purpose:
+# Group cells into tissue domains using hierarchical clustering on a weighted similarity matrix
+
+library(spatialGE)
+library(optparse)
+library(ggplot2)
+library(tools)
+library(dplyr)
+
+### Command Line Options
+
+
+option_list <- list(
+  make_option(c("-f", "--file"), action = "store", default = NA, type = "character",
+              help = "STlist .rds file path from preprocessing"),
+  make_option(c("--visium"), action = "store_true", type = "logical", default = FALSE,
+              help = "Flag for Visium data"),
+  make_option(c("--raw"), action = "store_true", type = "logical", default = FALSE,
+              help = "Flag for raw data"),
+  make_option(c("--cosmx"), action = "store_true", type = "logical", default = FALSE,
+              help = "Flag for CosMX data"),
+  make_option(c("-s", "--samples"), action = "store", default = NULL, type = "character",
+              help = "Sample subset to perform clustering on"),
+  make_option(c("-w", "--weight"), action = "store", default = 0.025, type = "numeric",
+              help = "Weight to be applied to spatial distances between 0-1"),
+  make_option(c("-d", "--dist"), action = "store", default = "euclidean", type = "character",
+              help = "Distance metric to be used, all methods found in wordspace::dist.matrix"),
+  make_option(c("--dslogical"), action = "store_true", type = "logical", default = FALSE,
+              help = "Deepsplit logical flag"),
+  make_option(c("--logical"), action = "store", default = TRUE, type = "logical",
+              help = "Control cluster resolution, true will produce more clusters"),
+  make_option(c("--dsnumeric"), action = "store_true", type = "logical", default = FALSE,
+              help = "Deepsplit numeric flag"),
+  make_option(c("--numeric"), action = "store", default = 0, type = "numeric",
+              help = "control cluster resolution between 0-4, higher number will produce more clusters"),
+  make_option(c("-p", "--ptsize"), action = "store", default = 2.75, type = "numeric",
+              help = "Size of points on cluster plot")
+)
+
+### Main
+
+# parse args
+opt <- parse_args(OptionParser(option_list = option_list))
+
+# read in ST data from spatialGE preprocessing
+STdata <- readRDS(opt$file)
+
+message("Rds object successfully loaded")
+
+# if deepSplit flag is included, choose between logical or numeric
+if (opt$logical) {
+  deepsplit <- opt$dslogical
+} else if (opt$numeric) {
+  deepsplit <- opt$dsnumeric
+}
+
+if (!is.null(opt$samples)) {
+  opt$samples <- strsplit(opt$samples, ",")[[1]]
+} else if (is.null(opt$samples)) {
+  opt$samples <- NULL
+}
+
+# perform data clustering on transformed data
+clusters <- STclust(x = STdata, samples = opt$samples, ws = opt$weight, dist_metric = opt$dist, deepSplit = deepsplit)
+
+message("Unsupervised spatially-informed clustering has been performed")
+
+# transform S4 to list for easier slot access
+S4toList <- function(obj) {
+  slot_names <- slotNames(obj)
+  structure(lapply(slot_names, slot, object = obj), names = slot_names)
+}
+
+STdata <- S4toList(STdata)
+
+message("Transformed to S4 for slot name access")
+
+# depending on data input type, pull correct sample column
+if (opt$visium) {
+  sample_col <- "sample_id"
+} else if (opt$raw) {
+  sample_col <- "sampleID"
+} else if (opt$cosmx) {
+  sample_col <- "sample_name"
+}
+
+
+if (!is.null(opt$samples)) {
+  samples <- STdata$sample_meta %>% pull(.data[[sample_col]]) %>%  intersect(opt$samples)
+} else {
+  samples <- STdata$sample_meta %>% pull(.data[[sample_col]])
+}
+
+
+message("Sample names identified")
+
+# create cluster plot directory
+if (!dir.exists("cluster_plots")) dir.create("cluster_plots")
+
+# iterate through each sample, generate a plot, and save
+for (s in samples) {
+
+  plot <- STplot(x = clusters, ws = opt$weight, ptsize = opt$ptsize, deepSplit = deepsplit, samples = s)
+
+  message("Cluster plots generated")
+
+
+  #create unique plot file names based on sample name
+
+  filename <- paste0("clustered_", s, ".png")
+
+  #save plot to subdir
+  ggsave(
+    path = "./cluster_plots",
+    filename = filename,
+    bg = "white",
+    width = 12
+  )
+}
+
+message("Cluster plots saved")