view spatialGE_clust.R @ 0:555ca19d07e6 draft default tip

planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author goeckslab
date Wed, 13 Aug 2025 19:32:19 +0000
parents
children
line wrap: on
line source

# ------------------------------
# Spatially-Informed Clustering
# ------------------------------

# Purpose:
# Group cells into tissue domains using hierarchical clustering on a weighted similarity matrix

library(spatialGE)
library(optparse)
library(ggplot2)
library(tools)
library(dplyr)

### Command Line Options


option_list <- list(
  make_option(c("-f", "--file"), action = "store", default = NA, type = "character",
              help = "STlist .rds file path from preprocessing"),
  make_option(c("--visium"), action = "store_true", type = "logical", default = FALSE,
              help = "Flag for Visium data"),
  make_option(c("--raw"), action = "store_true", type = "logical", default = FALSE,
              help = "Flag for raw data"),
  make_option(c("--cosmx"), action = "store_true", type = "logical", default = FALSE,
              help = "Flag for CosMX data"),
  make_option(c("-s", "--samples"), action = "store", default = NULL, type = "character",
              help = "Sample subset to perform clustering on"),
  make_option(c("-w", "--weight"), action = "store", default = 0.025, type = "numeric",
              help = "Weight to be applied to spatial distances between 0-1"),
  make_option(c("-d", "--dist"), action = "store", default = "euclidean", type = "character",
              help = "Distance metric to be used, all methods found in wordspace::dist.matrix"),
  make_option(c("--dslogical"), action = "store_true", type = "logical", default = FALSE,
              help = "Deepsplit logical flag"),
  make_option(c("--logical"), action = "store", default = TRUE, type = "logical",
              help = "Control cluster resolution, true will produce more clusters"),
  make_option(c("--dsnumeric"), action = "store_true", type = "logical", default = FALSE,
              help = "Deepsplit numeric flag"),
  make_option(c("--numeric"), action = "store", default = 0, type = "numeric",
              help = "control cluster resolution between 0-4, higher number will produce more clusters"),
  make_option(c("-p", "--ptsize"), action = "store", default = 2.75, type = "numeric",
              help = "Size of points on cluster plot")
)

### Main

# parse args
opt <- parse_args(OptionParser(option_list = option_list))

# read in ST data from spatialGE preprocessing
STdata <- readRDS(opt$file)

message("Rds object successfully loaded")

# if deepSplit flag is included, choose between logical or numeric
if (opt$logical) {
  deepsplit <- opt$dslogical
} else if (opt$numeric) {
  deepsplit <- opt$dsnumeric
}

if (!is.null(opt$samples)) {
  opt$samples <- strsplit(opt$samples, ",")[[1]]
} else if (is.null(opt$samples)) {
  opt$samples <- NULL
}

# perform data clustering on transformed data
clusters <- STclust(x = STdata, samples = opt$samples, ws = opt$weight, dist_metric = opt$dist, deepSplit = deepsplit)

message("Unsupervised spatially-informed clustering has been performed")

# transform S4 to list for easier slot access
S4toList <- function(obj) {
  slot_names <- slotNames(obj)
  structure(lapply(slot_names, slot, object = obj), names = slot_names)
}

STdata <- S4toList(STdata)

message("Transformed to S4 for slot name access")

# depending on data input type, pull correct sample column
if (opt$visium) {
  sample_col <- "sample_id"
} else if (opt$raw) {
  sample_col <- "sampleID"
} else if (opt$cosmx) {
  sample_col <- "sample_name"
}


if (!is.null(opt$samples)) {
  samples <- STdata$sample_meta %>% pull(.data[[sample_col]]) %>%  intersect(opt$samples)
} else {
  samples <- STdata$sample_meta %>% pull(.data[[sample_col]])
}


message("Sample names identified")

# create cluster plot directory
if (!dir.exists("cluster_plots")) dir.create("cluster_plots")

# iterate through each sample, generate a plot, and save
for (s in samples) {

  plot <- STplot(x = clusters, ws = opt$weight, ptsize = opt$ptsize, deepSplit = deepsplit, samples = s)

  message("Cluster plots generated")


  #create unique plot file names based on sample name

  filename <- paste0("clustered_", s, ".png")

  #save plot to subdir
  ggsave(
    path = "./cluster_plots",
    filename = filename,
    bg = "white",
    width = 12
  )
}

message("Cluster plots saved")