view spatialGE_single_input.R @ 0:555ca19d07e6 draft default tip

planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author goeckslab
date Wed, 13 Aug 2025 19:32:19 +0000
parents
children
line wrap: on
line source

# -------------
# Data Cleaning
# -------------

# SINGLE INPUT SCRIPT:
# Accepts single raw data sample and single cosmx sample
# Does not accept single visium sample due to spatial subdirectory

# Purpose:
# Transform data into STlist, perform QC, log transform

library(spatialGE)
library(optparse)
library(ggplot2)
library(tools)
library(fs)


### Command line options

option_list <- list(
  make_option(c("-c", "--counts"), action = "store", default = NA, type = "character",
              help = "Path to count data file(s)"),
  make_option(c("-s", "--spots"), action = "store", default = NULL, type = "character",
              help = "Path to cell coordinates file(s), not required for Visium or Xenium"),
  make_option(c("-m", "--meta"), action = "store", default = NA, type = "character",
              help = "Path to metadata file"),
  make_option(c("-n", "--names"), action = "store", default = NA, type = "character",
              help = "Specific sample names"),
  make_option(c("--plotmeta"), action = "store", default = NULL, type = "character",
              help = "Plots counts per cell or genes per cell"),
  make_option(c("--samples"), action = "store", default = NULL, type = "character",
              help = "Samples to include in plots, defaults to all"),
  make_option(c("--sminreads"), action = "store", default = 0, type = "integer",
              help = "Minimum number of total reads for a spot to be retained"),
  make_option(c("--smaxreads"), action = "store", default = NULL, type = "integer",
              help = "Maximum number of total reads for a spot to be retained"),
  make_option(c("--smingenes"), action = "store", default = 0, type = "integer",
              help = "Minimum number of non-zero counts for a spot to be retained"),
  make_option(c("--smaxgenes"), action = "store", default = NULL, type = "integer",
              help = "Maximum number of non-zero counts for a spot to be retained"),
  make_option(c("--gminreads"), action = "store", default = 0, type = "integer",
              help = "Minimum number of total reads for a gene to be retained"),
  make_option(c("--gmaxreads"), action = "store", default = NULL, type = "integer",
              help = "Maximum number of total reads for a gene to be retained"),
  make_option(c("--gminspots"), action = "store", default = 0, type = "integer",
              help = "Minimum number of spots with non-zero counts for a gene to be retained"),
  make_option(c("--gmaxspots"), action = "store", default = NULL, type = "integer",
              help = "Maximum number of spots with non-zero counts for a gene to be retained"),
  make_option(c("--distplot"), action = "store_true", type = "logical", default = FALSE,
              help = "If set, generate unfiltered distribution plot"),
  make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE,
              help = "If set, apply filtering before transformation"),
  make_option(c("--filterplot"), action = "store_true", type = "logical", default = FALSE,
              help = "If set, generate filtered distribution plot"),
  make_option(c("-t", "--type"), action = "store_true", default = "log", type = "character",
              help = "Type of transformation to apply: log or sct")
)

### Main

#parse args
opt <- parse_args(OptionParser(option_list = option_list))

#check if metadata or sample names were provided
#need metadata for raw, sample names for cosmx
if (!is.na(opt$meta) && is.na(opt$names)) {
  samples_input <- opt$meta
} else if (is.na(opt$meta) && !is.na(opt$names)) {
  samples_input <- opt$names
} else {
  stop("Please only specify either --metadata OR --names")
}

#create STlist with single input flags
st_data <- STlist(rnacounts = opt$counts, spotcoords = opt$spots, samples = samples_input)

message("STlist has been created")

#distribution plot

#create distribution plot if flag is included
if (opt$distplot) {

  #if sample names are provided, separate the character string
  #probably don't need strsplit, keeping for safety
  if (!is.null(opt$samples) && opt$samples != "") {
    sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]]
  } else {
    sample_names <- NULL
  }

  #generate distribution plot
  dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1)

  #create unique plot file names based on sample name
  base_input <- basename(opt$counts)
  base_name <- file_path_sans_ext(base_input)

  filename <- paste0("unfiltered_", base_name, ".png")

  #create output directory for cluster plots
  dir.create("./unfiltered_distribution_plots", showWarnings = FALSE, recursive = TRUE)

  #save plot to subdir
  ggsave(
    path = "./unfiltered_distribution_plots",
    filename = filename,
    bg = "white",
    width = 12
  )

  message("Unfiltered distribution plot saved to ./unfiltered_distribution_plots")
}

#spot/cell filtering

#filter spots if flag is included
if (opt$filter) {

  #filter out spots or genes based on minimum and maximum counts
  st_data <- filter_data(x = st_data, spot_minreads = opt$sminreads, spot_maxreads = opt$smaxreads, spot_mingenes = opt$smingenes,
                         spot_maxgenes = opt$smaxgenes, gene_minreads = opt$gminreads)

  message("Data filtering completed & saved to STlist")
}

#filtered data plot

#create filtered distribution plot if flag is included
if (opt$filterplot) {

  #if sample names are provided, separate the character string
  #probably don't need strsplit, keeping for safety
  if (!is.null(opt$samples) && opt$samples != "") {
    sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]]
  } else {
    sample_names <- NULL
  }

  #generate filtered distribution plot
  filter_dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1)

  #create unique plot file names based on sample name
  base_input_2 <- basename(opt$counts)
  base_name_2 <- file_path_sans_ext(base_input_2)

  filename_2 <- paste0("filtered_", base_name_2, ".png")

  #create output directory for cluster plots
  dir.create("./filtered_distribution_plots", showWarnings = FALSE, recursive = TRUE)

  #save plot to subdir
  ggsave(
    path = "./filtered_distribution_plots",
    filename = filename_2,
    bg = "white",
    width = 12
  )

  message("Filtered distribution plot saved to ./filtered_distribution_plots")
}

#transform data, defaults to log transformation

STobj <- transform_data(x = st_data, method = opt$type)

message("Data has been log transformed, unless otherwise specified")

#save transformed data to .rds

saveRDS(STobj, file = "STobj.rds")

message("STlist has been saved as .rds file")