comparison spatialGE_multiple_input.R @ 0:c84663d92248 draft default tip

planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author goeckslab
date Wed, 13 Aug 2025 19:32:05 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c84663d92248
1 # -------------
2 # Data Cleaning
3 # -------------
4
5 # MULTIPLE INPUT SCRIPT:
6 # Accepts multiple sample input for raw data and cosmx
7 # Accepts single and multiple sample input for Visium, due to spatial subdirectory
8
9 # Purpose:
10 # Transform data into STlist, perform QC, log transform
11
12 library(spatialGE)
13 library(optparse)
14 library(ggplot2)
15 library(tools)
16 library(fs)
17
18
19 ### Command Line Options
20
21 option_list <- list(
22 make_option(c("-c", "--counts"), action = "store", default = NA, type = "character",
23 help = "Path to count data file(s)"),
24 make_option(c("-s", "--spots"), action = "store", default = NULL, type = "character",
25 help = "Path to cell coordinates file(s), not required for Visium or Xenium"),
26 make_option(c("-m", "--meta"), action = "store", default = NA, type = "character",
27 help = "Path to metadata file"),
28 make_option(c("-n", "--names"), action = "store", default = NA, type = "character",
29 help = "Specific sample names"),
30 make_option(c("--plotmeta"), action = "store", default = NULL, type = "character",
31 help = "Plots counts per cell or genes per cell"),
32 make_option(c("--samples"), action = "store", default = NULL, type = "character",
33 help = "Samples to include in plots, defaults to all"),
34 make_option(c("--sminreads"), action = "store", default = 0, type = "integer",
35 help = "Minimum number of total reads for a spot to be retained"),
36 make_option(c("--smaxreads"), action = "store", default = NULL, type = "integer",
37 help = "Maximum number of total reads for a spot to be retained"),
38 make_option(c("--smingenes"), action = "store", default = 0, type = "integer",
39 help = "Minimum number of non-zero counts for a spot to be retained"),
40 make_option(c("--smaxgenes"), action = "store", default = NULL, type = "integer",
41 help = "Maximum number of non-zero counts for a spot to be retained"),
42 make_option(c("--gminreads"), action = "store", default = 0, type = "integer",
43 help = "Minimum number of total reads for a gene to be retained"),
44 make_option(c("--gmaxreads"), action = "store", default = NULL, type = "integer",
45 help = "Maximum number of total reads for a gene to be retained"),
46 make_option(c("--gminspots"), action = "store", default = 0, type = "integer",
47 help = "Minimum number of spots with non-zero counts for a gene to be retained"),
48 make_option(c("--gmaxspots"), action = "store", default = NULL, type = "integer",
49 help = "Maximum number of spots with non-zero counts for a gene to be retained"),
50 make_option(c("--distplot"), action = "store_true", type = "logical", default = FALSE,
51 help = "If set, generate unfiltered distribution plot"),
52 make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE,
53 help = "If set, apply filtering before transformation"),
54 make_option(c("--filterplot"), action = "store_true", type = "logical", default = FALSE,
55 help = "If set, generate filtered distribution plot"),
56 make_option(c("-t", "--type"), action = "store_true", default = "log", type = "character",
57 help = "Type of transformation to apply: log or sct")
58 )
59
60 ### Main
61
62 #parse args
63 opt <- parse_args(OptionParser(option_list = option_list))
64
65 #check if metadata or sample names were provided
66 #need metadata for raw and visium data, sample names for cosmx
67 if (!is.na(opt$meta) && is.na(opt$names)) {
68 samples_input <- opt$meta
69 } else if (is.na(opt$meta) && !is.na(opt$names)) {
70 samples_input <- unlist(strsplit(opt$names, split = ","))
71 } else {
72 stop("Please only specify either --metadata OR --names")
73 }
74
75 #create temporary directory to hold count data
76 count_dir <- tempdir()
77 unlink(count_dir, recursive = TRUE)
78 dir.create(count_dir)
79
80 #create temporary directory to hold coord data
81 coord_dir <- tempdir()
82 unlink(coord_dir, recursive = TRUE)
83 dir.create(coord_dir)
84
85 #if spotcoords were provided, load in count and coord data
86 #if spotcoords were not provided (visium input), only load the count file
87 if (!is.null(opt$spots)) {
88 coord_dir <- as.character(opt$spots)
89 coord_file <- fs::dir_ls(coord_dir)
90 count_dir <- as.character(opt$counts)
91 count_file <- fs::dir_ls(count_dir)
92 } else {
93 count_dir <- as.character(opt$counts)
94 count_file <- fs::dir_ls(count_dir)
95 }
96
97 #if spotcoords are present, include in stlist input
98 if (!is.null(opt$spots)) {
99 st_data <- STlist(rnacounts = count_file, spotcoords = coord_file, samples = samples_input)
100 } else {
101 st_data <- STlist(rnacounts = count_file, samples = samples_input)
102 }
103
104 message("STList has been created")
105
106 #distribution plot
107
108 #create distribution plot if flag is included
109 if (opt$distplot) {
110
111 #if sample names are provided, separate the character string
112 if (!is.null(opt$samples) && opt$samples != "") {
113 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]]
114 } else {
115 sample_names <- NULL
116 }
117
118 #generate distribution plot
119 dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1)
120
121 #create unique plot file names based on sample name
122 base_input <- basename(opt$counts)
123 base_name <- file_path_sans_ext(base_input)
124
125 filename <- paste0("unfiltered_", base_name, ".png")
126
127 #create output directory for distribution plots
128 dir.create("./unfiltered_distribution_plots", showWarnings = FALSE, recursive = TRUE)
129
130 #save plot to subdir
131 ggsave(
132 path = "./unfiltered_distribution_plots",
133 filename = filename,
134 bg = "white",
135 width = 12
136 )
137
138 message("Unfiltered distribution plot saved to ./unfiltered_distribution_plots")
139 }
140
141 #spot/cell filtering
142
143 #filter spots if flag is included
144 if (opt$filter) {
145
146 #filter out spots or genes based on minimum and maximum counts
147 st_data <- filter_data(x = st_data, spot_minreads = opt$sminreads, spot_maxreads = opt$smaxreads, spot_mingenes = opt$smingenes,
148 spot_maxgenes = opt$smaxgenes, gene_minreads = opt$gminreads)
149 message("Data filtering completed & saved to STlist")
150 }
151
152 #filtered data plot
153
154 #create filtered distribution plot if flag is included
155 if (opt$filterplot) {
156
157 #if sample names are provided, separate the character string
158 if (!is.null(opt$samples) && opt$samples != "") {
159 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]]
160 } else {
161 sample_names <- NULL
162 }
163
164 #generate filtered distribution plot
165 filter_dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1)
166
167 #create unique plot file names based on sample name
168 base_input_2 <- basename(opt$counts)
169 base_name_2 <- file_path_sans_ext(base_input_2)
170
171 filename_2 <- paste0("filtered_", base_name_2, ".png")
172
173 #create output directory for cluster plots
174 dir.create("./filtered_distribution_plots", showWarnings = FALSE, recursive = TRUE)
175
176 #save plot to subdir
177 ggsave(
178 path = "./filtered_distribution_plots",
179 filename = filename_2,
180 bg = "white",
181 width = 12
182 )
183
184 message("Filtered distribution plot saved to ./filtered_distribution_plots")
185 }
186
187 #transform data, defaults to log transformation
188
189 STobj <- transform_data(x = st_data, method = opt$type)
190
191 message("Data has been log transformed, unless otherwise specified")
192
193 #save transformed data to .rds
194
195 saveRDS(STobj, file = "STobj.rds")
196
197 message("STlist has been saved as .rds file")