Mercurial > repos > goeckslab > clustering_spatialge
view spatialGE_multiple_input.R @ 0:555ca19d07e6 draft default tip
planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author | goeckslab |
---|---|
date | Wed, 13 Aug 2025 19:32:19 +0000 |
parents | |
children |
line wrap: on
line source
# ------------- # Data Cleaning # ------------- # MULTIPLE INPUT SCRIPT: # Accepts multiple sample input for raw data and cosmx # Accepts single and multiple sample input for Visium, due to spatial subdirectory # Purpose: # Transform data into STlist, perform QC, log transform library(spatialGE) library(optparse) library(ggplot2) library(tools) library(fs) ### Command Line Options option_list <- list( make_option(c("-c", "--counts"), action = "store", default = NA, type = "character", help = "Path to count data file(s)"), make_option(c("-s", "--spots"), action = "store", default = NULL, type = "character", help = "Path to cell coordinates file(s), not required for Visium or Xenium"), make_option(c("-m", "--meta"), action = "store", default = NA, type = "character", help = "Path to metadata file"), make_option(c("-n", "--names"), action = "store", default = NA, type = "character", help = "Specific sample names"), make_option(c("--plotmeta"), action = "store", default = NULL, type = "character", help = "Plots counts per cell or genes per cell"), make_option(c("--samples"), action = "store", default = NULL, type = "character", help = "Samples to include in plots, defaults to all"), make_option(c("--sminreads"), action = "store", default = 0, type = "integer", help = "Minimum number of total reads for a spot to be retained"), make_option(c("--smaxreads"), action = "store", default = NULL, type = "integer", help = "Maximum number of total reads for a spot to be retained"), make_option(c("--smingenes"), action = "store", default = 0, type = "integer", help = "Minimum number of non-zero counts for a spot to be retained"), make_option(c("--smaxgenes"), action = "store", default = NULL, type = "integer", help = "Maximum number of non-zero counts for a spot to be retained"), make_option(c("--gminreads"), action = "store", default = 0, type = "integer", help = "Minimum number of total reads for a gene to be retained"), make_option(c("--gmaxreads"), action = "store", default = NULL, type = "integer", help = "Maximum number of total reads for a gene to be retained"), make_option(c("--gminspots"), action = "store", default = 0, type = "integer", help = "Minimum number of spots with non-zero counts for a gene to be retained"), make_option(c("--gmaxspots"), action = "store", default = NULL, type = "integer", help = "Maximum number of spots with non-zero counts for a gene to be retained"), make_option(c("--distplot"), action = "store_true", type = "logical", default = FALSE, help = "If set, generate unfiltered distribution plot"), make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE, help = "If set, apply filtering before transformation"), make_option(c("--filterplot"), action = "store_true", type = "logical", default = FALSE, help = "If set, generate filtered distribution plot"), make_option(c("-t", "--type"), action = "store_true", default = "log", type = "character", help = "Type of transformation to apply: log or sct") ) ### Main #parse args opt <- parse_args(OptionParser(option_list = option_list)) #check if metadata or sample names were provided #need metadata for raw and visium data, sample names for cosmx if (!is.na(opt$meta) && is.na(opt$names)) { samples_input <- opt$meta } else if (is.na(opt$meta) && !is.na(opt$names)) { samples_input <- unlist(strsplit(opt$names, split = ",")) } else { stop("Please only specify either --metadata OR --names") } #create temporary directory to hold count data count_dir <- tempdir() unlink(count_dir, recursive = TRUE) dir.create(count_dir) #create temporary directory to hold coord data coord_dir <- tempdir() unlink(coord_dir, recursive = TRUE) dir.create(coord_dir) #if spotcoords were provided, load in count and coord data #if spotcoords were not provided (visium input), only load the count file if (!is.null(opt$spots)) { coord_dir <- as.character(opt$spots) coord_file <- fs::dir_ls(coord_dir) count_dir <- as.character(opt$counts) count_file <- fs::dir_ls(count_dir) } else { count_dir <- as.character(opt$counts) count_file <- fs::dir_ls(count_dir) } #if spotcoords are present, include in stlist input if (!is.null(opt$spots)) { st_data <- STlist(rnacounts = count_file, spotcoords = coord_file, samples = samples_input) } else { st_data <- STlist(rnacounts = count_file, samples = samples_input) } message("STList has been created") #distribution plot #create distribution plot if flag is included if (opt$distplot) { #if sample names are provided, separate the character string if (!is.null(opt$samples) && opt$samples != "") { sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] } else { sample_names <- NULL } #generate distribution plot dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) #create unique plot file names based on sample name base_input <- basename(opt$counts) base_name <- file_path_sans_ext(base_input) filename <- paste0("unfiltered_", base_name, ".png") #create output directory for distribution plots dir.create("./unfiltered_distribution_plots", showWarnings = FALSE, recursive = TRUE) #save plot to subdir ggsave( path = "./unfiltered_distribution_plots", filename = filename, bg = "white", width = 12 ) message("Unfiltered distribution plot saved to ./unfiltered_distribution_plots") } #spot/cell filtering #filter spots if flag is included if (opt$filter) { #filter out spots or genes based on minimum and maximum counts st_data <- filter_data(x = st_data, spot_minreads = opt$sminreads, spot_maxreads = opt$smaxreads, spot_mingenes = opt$smingenes, spot_maxgenes = opt$smaxgenes, gene_minreads = opt$gminreads) message("Data filtering completed & saved to STlist") } #filtered data plot #create filtered distribution plot if flag is included if (opt$filterplot) { #if sample names are provided, separate the character string if (!is.null(opt$samples) && opt$samples != "") { sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] } else { sample_names <- NULL } #generate filtered distribution plot filter_dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) #create unique plot file names based on sample name base_input_2 <- basename(opt$counts) base_name_2 <- file_path_sans_ext(base_input_2) filename_2 <- paste0("filtered_", base_name_2, ".png") #create output directory for cluster plots dir.create("./filtered_distribution_plots", showWarnings = FALSE, recursive = TRUE) #save plot to subdir ggsave( path = "./filtered_distribution_plots", filename = filename_2, bg = "white", width = 12 ) message("Filtered distribution plot saved to ./filtered_distribution_plots") } #transform data, defaults to log transformation STobj <- transform_data(x = st_data, method = opt$type) message("Data has been log transformed, unless otherwise specified") #save transformed data to .rds saveRDS(STobj, file = "STobj.rds") message("STlist has been saved as .rds file")