Mercurial > repos > goeckslab > cleaning_spatialge
diff spatialGE_single_input.R @ 0:c84663d92248 draft default tip
planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author | goeckslab |
---|---|
date | Wed, 13 Aug 2025 19:32:05 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spatialGE_single_input.R Wed Aug 13 19:32:05 2025 +0000 @@ -0,0 +1,174 @@ +# ------------- +# Data Cleaning +# ------------- + +# SINGLE INPUT SCRIPT: +# Accepts single raw data sample and single cosmx sample +# Does not accept single visium sample due to spatial subdirectory + +# Purpose: +# Transform data into STlist, perform QC, log transform + +library(spatialGE) +library(optparse) +library(ggplot2) +library(tools) +library(fs) + + +### Command line options + +option_list <- list( + make_option(c("-c", "--counts"), action = "store", default = NA, type = "character", + help = "Path to count data file(s)"), + make_option(c("-s", "--spots"), action = "store", default = NULL, type = "character", + help = "Path to cell coordinates file(s), not required for Visium or Xenium"), + make_option(c("-m", "--meta"), action = "store", default = NA, type = "character", + help = "Path to metadata file"), + make_option(c("-n", "--names"), action = "store", default = NA, type = "character", + help = "Specific sample names"), + make_option(c("--plotmeta"), action = "store", default = NULL, type = "character", + help = "Plots counts per cell or genes per cell"), + make_option(c("--samples"), action = "store", default = NULL, type = "character", + help = "Samples to include in plots, defaults to all"), + make_option(c("--sminreads"), action = "store", default = 0, type = "integer", + help = "Minimum number of total reads for a spot to be retained"), + make_option(c("--smaxreads"), action = "store", default = NULL, type = "integer", + help = "Maximum number of total reads for a spot to be retained"), + make_option(c("--smingenes"), action = "store", default = 0, type = "integer", + help = "Minimum number of non-zero counts for a spot to be retained"), + make_option(c("--smaxgenes"), action = "store", default = NULL, type = "integer", + help = "Maximum number of non-zero counts for a spot to be retained"), + make_option(c("--gminreads"), action = "store", default = 0, type = "integer", + help = "Minimum number of total reads for a gene to be retained"), + make_option(c("--gmaxreads"), action = "store", default = NULL, type = "integer", + help = "Maximum number of total reads for a gene to be retained"), + make_option(c("--gminspots"), action = "store", default = 0, type = "integer", + help = "Minimum number of spots with non-zero counts for a gene to be retained"), + make_option(c("--gmaxspots"), action = "store", default = NULL, type = "integer", + help = "Maximum number of spots with non-zero counts for a gene to be retained"), + make_option(c("--distplot"), action = "store_true", type = "logical", default = FALSE, + help = "If set, generate unfiltered distribution plot"), + make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE, + help = "If set, apply filtering before transformation"), + make_option(c("--filterplot"), action = "store_true", type = "logical", default = FALSE, + help = "If set, generate filtered distribution plot"), + make_option(c("-t", "--type"), action = "store_true", default = "log", type = "character", + help = "Type of transformation to apply: log or sct") +) + +### Main + +#parse args +opt <- parse_args(OptionParser(option_list = option_list)) + +#check if metadata or sample names were provided +#need metadata for raw, sample names for cosmx +if (!is.na(opt$meta) && is.na(opt$names)) { + samples_input <- opt$meta +} else if (is.na(opt$meta) && !is.na(opt$names)) { + samples_input <- opt$names +} else { + stop("Please only specify either --metadata OR --names") +} + +#create STlist with single input flags +st_data <- STlist(rnacounts = opt$counts, spotcoords = opt$spots, samples = samples_input) + +message("STlist has been created") + +#distribution plot + +#create distribution plot if flag is included +if (opt$distplot) { + + #if sample names are provided, separate the character string + #probably don't need strsplit, keeping for safety + if (!is.null(opt$samples) && opt$samples != "") { + sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] + } else { + sample_names <- NULL + } + + #generate distribution plot + dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) + + #create unique plot file names based on sample name + base_input <- basename(opt$counts) + base_name <- file_path_sans_ext(base_input) + + filename <- paste0("unfiltered_", base_name, ".png") + + #create output directory for cluster plots + dir.create("./unfiltered_distribution_plots", showWarnings = FALSE, recursive = TRUE) + + #save plot to subdir + ggsave( + path = "./unfiltered_distribution_plots", + filename = filename, + bg = "white", + width = 12 + ) + + message("Unfiltered distribution plot saved to ./unfiltered_distribution_plots") +} + +#spot/cell filtering + +#filter spots if flag is included +if (opt$filter) { + + #filter out spots or genes based on minimum and maximum counts + st_data <- filter_data(x = st_data, spot_minreads = opt$sminreads, spot_maxreads = opt$smaxreads, spot_mingenes = opt$smingenes, + spot_maxgenes = opt$smaxgenes, gene_minreads = opt$gminreads) + + message("Data filtering completed & saved to STlist") +} + +#filtered data plot + +#create filtered distribution plot if flag is included +if (opt$filterplot) { + + #if sample names are provided, separate the character string + #probably don't need strsplit, keeping for safety + if (!is.null(opt$samples) && opt$samples != "") { + sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] + } else { + sample_names <- NULL + } + + #generate filtered distribution plot + filter_dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) + + #create unique plot file names based on sample name + base_input_2 <- basename(opt$counts) + base_name_2 <- file_path_sans_ext(base_input_2) + + filename_2 <- paste0("filtered_", base_name_2, ".png") + + #create output directory for cluster plots + dir.create("./filtered_distribution_plots", showWarnings = FALSE, recursive = TRUE) + + #save plot to subdir + ggsave( + path = "./filtered_distribution_plots", + filename = filename_2, + bg = "white", + width = 12 + ) + + message("Filtered distribution plot saved to ./filtered_distribution_plots") +} + +#transform data, defaults to log transformation + +STobj <- transform_data(x = st_data, method = opt$type) + +message("Data has been log transformed, unless otherwise specified") + +#save transformed data to .rds + +saveRDS(STobj, file = "STobj.rds") + +message("STlist has been saved as .rds file")