Mercurial > repos > goeckslab > clustering_spatialge
diff spatialGE_multiple_input.R @ 0:555ca19d07e6 draft default tip
planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author | goeckslab |
---|---|
date | Wed, 13 Aug 2025 19:32:19 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spatialGE_multiple_input.R Wed Aug 13 19:32:19 2025 +0000 @@ -0,0 +1,197 @@ +# ------------- +# Data Cleaning +# ------------- + +# MULTIPLE INPUT SCRIPT: +# Accepts multiple sample input for raw data and cosmx +# Accepts single and multiple sample input for Visium, due to spatial subdirectory + +# Purpose: +# Transform data into STlist, perform QC, log transform + +library(spatialGE) +library(optparse) +library(ggplot2) +library(tools) +library(fs) + + +### Command Line Options + +option_list <- list( + make_option(c("-c", "--counts"), action = "store", default = NA, type = "character", + help = "Path to count data file(s)"), + make_option(c("-s", "--spots"), action = "store", default = NULL, type = "character", + help = "Path to cell coordinates file(s), not required for Visium or Xenium"), + make_option(c("-m", "--meta"), action = "store", default = NA, type = "character", + help = "Path to metadata file"), + make_option(c("-n", "--names"), action = "store", default = NA, type = "character", + help = "Specific sample names"), + make_option(c("--plotmeta"), action = "store", default = NULL, type = "character", + help = "Plots counts per cell or genes per cell"), + make_option(c("--samples"), action = "store", default = NULL, type = "character", + help = "Samples to include in plots, defaults to all"), + make_option(c("--sminreads"), action = "store", default = 0, type = "integer", + help = "Minimum number of total reads for a spot to be retained"), + make_option(c("--smaxreads"), action = "store", default = NULL, type = "integer", + help = "Maximum number of total reads for a spot to be retained"), + make_option(c("--smingenes"), action = "store", default = 0, type = "integer", + help = "Minimum number of non-zero counts for a spot to be retained"), + make_option(c("--smaxgenes"), action = "store", default = NULL, type = "integer", + help = "Maximum number of non-zero counts for a spot to be retained"), + make_option(c("--gminreads"), action = "store", default = 0, type = "integer", + help = "Minimum number of total reads for a gene to be retained"), + make_option(c("--gmaxreads"), action = "store", default = NULL, type = "integer", + help = "Maximum number of total reads for a gene to be retained"), + make_option(c("--gminspots"), action = "store", default = 0, type = "integer", + help = "Minimum number of spots with non-zero counts for a gene to be retained"), + make_option(c("--gmaxspots"), action = "store", default = NULL, type = "integer", + help = "Maximum number of spots with non-zero counts for a gene to be retained"), + make_option(c("--distplot"), action = "store_true", type = "logical", default = FALSE, + help = "If set, generate unfiltered distribution plot"), + make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE, + help = "If set, apply filtering before transformation"), + make_option(c("--filterplot"), action = "store_true", type = "logical", default = FALSE, + help = "If set, generate filtered distribution plot"), + make_option(c("-t", "--type"), action = "store_true", default = "log", type = "character", + help = "Type of transformation to apply: log or sct") +) + +### Main + +#parse args +opt <- parse_args(OptionParser(option_list = option_list)) + +#check if metadata or sample names were provided +#need metadata for raw and visium data, sample names for cosmx +if (!is.na(opt$meta) && is.na(opt$names)) { + samples_input <- opt$meta +} else if (is.na(opt$meta) && !is.na(opt$names)) { + samples_input <- unlist(strsplit(opt$names, split = ",")) +} else { + stop("Please only specify either --metadata OR --names") +} + +#create temporary directory to hold count data +count_dir <- tempdir() +unlink(count_dir, recursive = TRUE) +dir.create(count_dir) + +#create temporary directory to hold coord data +coord_dir <- tempdir() +unlink(coord_dir, recursive = TRUE) +dir.create(coord_dir) + +#if spotcoords were provided, load in count and coord data +#if spotcoords were not provided (visium input), only load the count file +if (!is.null(opt$spots)) { + coord_dir <- as.character(opt$spots) + coord_file <- fs::dir_ls(coord_dir) + count_dir <- as.character(opt$counts) + count_file <- fs::dir_ls(count_dir) +} else { + count_dir <- as.character(opt$counts) + count_file <- fs::dir_ls(count_dir) +} + +#if spotcoords are present, include in stlist input +if (!is.null(opt$spots)) { + st_data <- STlist(rnacounts = count_file, spotcoords = coord_file, samples = samples_input) +} else { + st_data <- STlist(rnacounts = count_file, samples = samples_input) +} + +message("STList has been created") + +#distribution plot + +#create distribution plot if flag is included +if (opt$distplot) { + + #if sample names are provided, separate the character string + if (!is.null(opt$samples) && opt$samples != "") { + sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] + } else { + sample_names <- NULL + } + + #generate distribution plot + dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) + + #create unique plot file names based on sample name + base_input <- basename(opt$counts) + base_name <- file_path_sans_ext(base_input) + + filename <- paste0("unfiltered_", base_name, ".png") + + #create output directory for distribution plots + dir.create("./unfiltered_distribution_plots", showWarnings = FALSE, recursive = TRUE) + + #save plot to subdir + ggsave( + path = "./unfiltered_distribution_plots", + filename = filename, + bg = "white", + width = 12 + ) + + message("Unfiltered distribution plot saved to ./unfiltered_distribution_plots") +} + +#spot/cell filtering + +#filter spots if flag is included +if (opt$filter) { + + #filter out spots or genes based on minimum and maximum counts + st_data <- filter_data(x = st_data, spot_minreads = opt$sminreads, spot_maxreads = opt$smaxreads, spot_mingenes = opt$smingenes, + spot_maxgenes = opt$smaxgenes, gene_minreads = opt$gminreads) + message("Data filtering completed & saved to STlist") +} + +#filtered data plot + +#create filtered distribution plot if flag is included +if (opt$filterplot) { + + #if sample names are provided, separate the character string + if (!is.null(opt$samples) && opt$samples != "") { + sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] + } else { + sample_names <- NULL + } + + #generate filtered distribution plot + filter_dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) + + #create unique plot file names based on sample name + base_input_2 <- basename(opt$counts) + base_name_2 <- file_path_sans_ext(base_input_2) + + filename_2 <- paste0("filtered_", base_name_2, ".png") + + #create output directory for cluster plots + dir.create("./filtered_distribution_plots", showWarnings = FALSE, recursive = TRUE) + + #save plot to subdir + ggsave( + path = "./filtered_distribution_plots", + filename = filename_2, + bg = "white", + width = 12 + ) + + message("Filtered distribution plot saved to ./filtered_distribution_plots") +} + +#transform data, defaults to log transformation + +STobj <- transform_data(x = st_data, method = opt$type) + +message("Data has been log transformed, unless otherwise specified") + +#save transformed data to .rds + +saveRDS(STobj, file = "STobj.rds") + +message("STlist has been saved as .rds file")