diff spatialGE_single_input.R @ 0:c84663d92248 draft default tip

planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author goeckslab
date Wed, 13 Aug 2025 19:32:05 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spatialGE_single_input.R	Wed Aug 13 19:32:05 2025 +0000
@@ -0,0 +1,174 @@
+# -------------
+# Data Cleaning
+# -------------
+
+# SINGLE INPUT SCRIPT:
+# Accepts single raw data sample and single cosmx sample
+# Does not accept single visium sample due to spatial subdirectory
+
+# Purpose:
+# Transform data into STlist, perform QC, log transform
+
+library(spatialGE)
+library(optparse)
+library(ggplot2)
+library(tools)
+library(fs)
+
+
+### Command line options
+
+option_list <- list(
+  make_option(c("-c", "--counts"), action = "store", default = NA, type = "character",
+              help = "Path to count data file(s)"),
+  make_option(c("-s", "--spots"), action = "store", default = NULL, type = "character",
+              help = "Path to cell coordinates file(s), not required for Visium or Xenium"),
+  make_option(c("-m", "--meta"), action = "store", default = NA, type = "character",
+              help = "Path to metadata file"),
+  make_option(c("-n", "--names"), action = "store", default = NA, type = "character",
+              help = "Specific sample names"),
+  make_option(c("--plotmeta"), action = "store", default = NULL, type = "character",
+              help = "Plots counts per cell or genes per cell"),
+  make_option(c("--samples"), action = "store", default = NULL, type = "character",
+              help = "Samples to include in plots, defaults to all"),
+  make_option(c("--sminreads"), action = "store", default = 0, type = "integer",
+              help = "Minimum number of total reads for a spot to be retained"),
+  make_option(c("--smaxreads"), action = "store", default = NULL, type = "integer",
+              help = "Maximum number of total reads for a spot to be retained"),
+  make_option(c("--smingenes"), action = "store", default = 0, type = "integer",
+              help = "Minimum number of non-zero counts for a spot to be retained"),
+  make_option(c("--smaxgenes"), action = "store", default = NULL, type = "integer",
+              help = "Maximum number of non-zero counts for a spot to be retained"),
+  make_option(c("--gminreads"), action = "store", default = 0, type = "integer",
+              help = "Minimum number of total reads for a gene to be retained"),
+  make_option(c("--gmaxreads"), action = "store", default = NULL, type = "integer",
+              help = "Maximum number of total reads for a gene to be retained"),
+  make_option(c("--gminspots"), action = "store", default = 0, type = "integer",
+              help = "Minimum number of spots with non-zero counts for a gene to be retained"),
+  make_option(c("--gmaxspots"), action = "store", default = NULL, type = "integer",
+              help = "Maximum number of spots with non-zero counts for a gene to be retained"),
+  make_option(c("--distplot"), action = "store_true", type = "logical", default = FALSE,
+              help = "If set, generate unfiltered distribution plot"),
+  make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE,
+              help = "If set, apply filtering before transformation"),
+  make_option(c("--filterplot"), action = "store_true", type = "logical", default = FALSE,
+              help = "If set, generate filtered distribution plot"),
+  make_option(c("-t", "--type"), action = "store_true", default = "log", type = "character",
+              help = "Type of transformation to apply: log or sct")
+)
+
+### Main
+
+#parse args
+opt <- parse_args(OptionParser(option_list = option_list))
+
+#check if metadata or sample names were provided
+#need metadata for raw, sample names for cosmx
+if (!is.na(opt$meta) && is.na(opt$names)) {
+  samples_input <- opt$meta
+} else if (is.na(opt$meta) && !is.na(opt$names)) {
+  samples_input <- opt$names
+} else {
+  stop("Please only specify either --metadata OR --names")
+}
+
+#create STlist with single input flags
+st_data <- STlist(rnacounts = opt$counts, spotcoords = opt$spots, samples = samples_input)
+
+message("STlist has been created")
+
+#distribution plot
+
+#create distribution plot if flag is included
+if (opt$distplot) {
+
+  #if sample names are provided, separate the character string
+  #probably don't need strsplit, keeping for safety
+  if (!is.null(opt$samples) && opt$samples != "") {
+    sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]]
+  } else {
+    sample_names <- NULL
+  }
+
+  #generate distribution plot
+  dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1)
+
+  #create unique plot file names based on sample name
+  base_input <- basename(opt$counts)
+  base_name <- file_path_sans_ext(base_input)
+
+  filename <- paste0("unfiltered_", base_name, ".png")
+
+  #create output directory for cluster plots
+  dir.create("./unfiltered_distribution_plots", showWarnings = FALSE, recursive = TRUE)
+
+  #save plot to subdir
+  ggsave(
+    path = "./unfiltered_distribution_plots",
+    filename = filename,
+    bg = "white",
+    width = 12
+  )
+
+  message("Unfiltered distribution plot saved to ./unfiltered_distribution_plots")
+}
+
+#spot/cell filtering
+
+#filter spots if flag is included
+if (opt$filter) {
+
+  #filter out spots or genes based on minimum and maximum counts
+  st_data <- filter_data(x = st_data, spot_minreads = opt$sminreads, spot_maxreads = opt$smaxreads, spot_mingenes = opt$smingenes,
+                         spot_maxgenes = opt$smaxgenes, gene_minreads = opt$gminreads)
+
+  message("Data filtering completed & saved to STlist")
+}
+
+#filtered data plot
+
+#create filtered distribution plot if flag is included
+if (opt$filterplot) {
+
+  #if sample names are provided, separate the character string
+  #probably don't need strsplit, keeping for safety
+  if (!is.null(opt$samples) && opt$samples != "") {
+    sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]]
+  } else {
+    sample_names <- NULL
+  }
+
+  #generate filtered distribution plot
+  filter_dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1)
+
+  #create unique plot file names based on sample name
+  base_input_2 <- basename(opt$counts)
+  base_name_2 <- file_path_sans_ext(base_input_2)
+
+  filename_2 <- paste0("filtered_", base_name_2, ".png")
+
+  #create output directory for cluster plots
+  dir.create("./filtered_distribution_plots", showWarnings = FALSE, recursive = TRUE)
+
+  #save plot to subdir
+  ggsave(
+    path = "./filtered_distribution_plots",
+    filename = filename_2,
+    bg = "white",
+    width = 12
+  )
+
+  message("Filtered distribution plot saved to ./filtered_distribution_plots")
+}
+
+#transform data, defaults to log transformation
+
+STobj <- transform_data(x = st_data, method = opt$type)
+
+message("Data has been log transformed, unless otherwise specified")
+
+#save transformed data to .rds
+
+saveRDS(STobj, file = "STobj.rds")
+
+message("STlist has been saved as .rds file")