comparison spatialGE_single_input.R @ 0:555ca19d07e6 draft default tip

planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author goeckslab
date Wed, 13 Aug 2025 19:32:19 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:555ca19d07e6
1 # -------------
2 # Data Cleaning
3 # -------------
4
5 # SINGLE INPUT SCRIPT:
6 # Accepts single raw data sample and single cosmx sample
7 # Does not accept single visium sample due to spatial subdirectory
8
9 # Purpose:
10 # Transform data into STlist, perform QC, log transform
11
12 library(spatialGE)
13 library(optparse)
14 library(ggplot2)
15 library(tools)
16 library(fs)
17
18
19 ### Command line options
20
21 option_list <- list(
22 make_option(c("-c", "--counts"), action = "store", default = NA, type = "character",
23 help = "Path to count data file(s)"),
24 make_option(c("-s", "--spots"), action = "store", default = NULL, type = "character",
25 help = "Path to cell coordinates file(s), not required for Visium or Xenium"),
26 make_option(c("-m", "--meta"), action = "store", default = NA, type = "character",
27 help = "Path to metadata file"),
28 make_option(c("-n", "--names"), action = "store", default = NA, type = "character",
29 help = "Specific sample names"),
30 make_option(c("--plotmeta"), action = "store", default = NULL, type = "character",
31 help = "Plots counts per cell or genes per cell"),
32 make_option(c("--samples"), action = "store", default = NULL, type = "character",
33 help = "Samples to include in plots, defaults to all"),
34 make_option(c("--sminreads"), action = "store", default = 0, type = "integer",
35 help = "Minimum number of total reads for a spot to be retained"),
36 make_option(c("--smaxreads"), action = "store", default = NULL, type = "integer",
37 help = "Maximum number of total reads for a spot to be retained"),
38 make_option(c("--smingenes"), action = "store", default = 0, type = "integer",
39 help = "Minimum number of non-zero counts for a spot to be retained"),
40 make_option(c("--smaxgenes"), action = "store", default = NULL, type = "integer",
41 help = "Maximum number of non-zero counts for a spot to be retained"),
42 make_option(c("--gminreads"), action = "store", default = 0, type = "integer",
43 help = "Minimum number of total reads for a gene to be retained"),
44 make_option(c("--gmaxreads"), action = "store", default = NULL, type = "integer",
45 help = "Maximum number of total reads for a gene to be retained"),
46 make_option(c("--gminspots"), action = "store", default = 0, type = "integer",
47 help = "Minimum number of spots with non-zero counts for a gene to be retained"),
48 make_option(c("--gmaxspots"), action = "store", default = NULL, type = "integer",
49 help = "Maximum number of spots with non-zero counts for a gene to be retained"),
50 make_option(c("--distplot"), action = "store_true", type = "logical", default = FALSE,
51 help = "If set, generate unfiltered distribution plot"),
52 make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE,
53 help = "If set, apply filtering before transformation"),
54 make_option(c("--filterplot"), action = "store_true", type = "logical", default = FALSE,
55 help = "If set, generate filtered distribution plot"),
56 make_option(c("-t", "--type"), action = "store_true", default = "log", type = "character",
57 help = "Type of transformation to apply: log or sct")
58 )
59
60 ### Main
61
62 #parse args
63 opt <- parse_args(OptionParser(option_list = option_list))
64
65 #check if metadata or sample names were provided
66 #need metadata for raw, sample names for cosmx
67 if (!is.na(opt$meta) && is.na(opt$names)) {
68 samples_input <- opt$meta
69 } else if (is.na(opt$meta) && !is.na(opt$names)) {
70 samples_input <- opt$names
71 } else {
72 stop("Please only specify either --metadata OR --names")
73 }
74
75 #create STlist with single input flags
76 st_data <- STlist(rnacounts = opt$counts, spotcoords = opt$spots, samples = samples_input)
77
78 message("STlist has been created")
79
80 #distribution plot
81
82 #create distribution plot if flag is included
83 if (opt$distplot) {
84
85 #if sample names are provided, separate the character string
86 #probably don't need strsplit, keeping for safety
87 if (!is.null(opt$samples) && opt$samples != "") {
88 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]]
89 } else {
90 sample_names <- NULL
91 }
92
93 #generate distribution plot
94 dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1)
95
96 #create unique plot file names based on sample name
97 base_input <- basename(opt$counts)
98 base_name <- file_path_sans_ext(base_input)
99
100 filename <- paste0("unfiltered_", base_name, ".png")
101
102 #create output directory for cluster plots
103 dir.create("./unfiltered_distribution_plots", showWarnings = FALSE, recursive = TRUE)
104
105 #save plot to subdir
106 ggsave(
107 path = "./unfiltered_distribution_plots",
108 filename = filename,
109 bg = "white",
110 width = 12
111 )
112
113 message("Unfiltered distribution plot saved to ./unfiltered_distribution_plots")
114 }
115
116 #spot/cell filtering
117
118 #filter spots if flag is included
119 if (opt$filter) {
120
121 #filter out spots or genes based on minimum and maximum counts
122 st_data <- filter_data(x = st_data, spot_minreads = opt$sminreads, spot_maxreads = opt$smaxreads, spot_mingenes = opt$smingenes,
123 spot_maxgenes = opt$smaxgenes, gene_minreads = opt$gminreads)
124
125 message("Data filtering completed & saved to STlist")
126 }
127
128 #filtered data plot
129
130 #create filtered distribution plot if flag is included
131 if (opt$filterplot) {
132
133 #if sample names are provided, separate the character string
134 #probably don't need strsplit, keeping for safety
135 if (!is.null(opt$samples) && opt$samples != "") {
136 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]]
137 } else {
138 sample_names <- NULL
139 }
140
141 #generate filtered distribution plot
142 filter_dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1)
143
144 #create unique plot file names based on sample name
145 base_input_2 <- basename(opt$counts)
146 base_name_2 <- file_path_sans_ext(base_input_2)
147
148 filename_2 <- paste0("filtered_", base_name_2, ".png")
149
150 #create output directory for cluster plots
151 dir.create("./filtered_distribution_plots", showWarnings = FALSE, recursive = TRUE)
152
153 #save plot to subdir
154 ggsave(
155 path = "./filtered_distribution_plots",
156 filename = filename_2,
157 bg = "white",
158 width = 12
159 )
160
161 message("Filtered distribution plot saved to ./filtered_distribution_plots")
162 }
163
164 #transform data, defaults to log transformation
165
166 STobj <- transform_data(x = st_data, method = opt$type)
167
168 message("Data has been log transformed, unless otherwise specified")
169
170 #save transformed data to .rds
171
172 saveRDS(STobj, file = "STobj.rds")
173
174 message("STlist has been saved as .rds file")