Mercurial > repos > goeckslab > cleaning_spatialge
comparison spatialGE_multiple_input.R @ 0:c84663d92248 draft default tip
planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author | goeckslab |
---|---|
date | Wed, 13 Aug 2025 19:32:05 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c84663d92248 |
---|---|
1 # ------------- | |
2 # Data Cleaning | |
3 # ------------- | |
4 | |
5 # MULTIPLE INPUT SCRIPT: | |
6 # Accepts multiple sample input for raw data and cosmx | |
7 # Accepts single and multiple sample input for Visium, due to spatial subdirectory | |
8 | |
9 # Purpose: | |
10 # Transform data into STlist, perform QC, log transform | |
11 | |
12 library(spatialGE) | |
13 library(optparse) | |
14 library(ggplot2) | |
15 library(tools) | |
16 library(fs) | |
17 | |
18 | |
19 ### Command Line Options | |
20 | |
21 option_list <- list( | |
22 make_option(c("-c", "--counts"), action = "store", default = NA, type = "character", | |
23 help = "Path to count data file(s)"), | |
24 make_option(c("-s", "--spots"), action = "store", default = NULL, type = "character", | |
25 help = "Path to cell coordinates file(s), not required for Visium or Xenium"), | |
26 make_option(c("-m", "--meta"), action = "store", default = NA, type = "character", | |
27 help = "Path to metadata file"), | |
28 make_option(c("-n", "--names"), action = "store", default = NA, type = "character", | |
29 help = "Specific sample names"), | |
30 make_option(c("--plotmeta"), action = "store", default = NULL, type = "character", | |
31 help = "Plots counts per cell or genes per cell"), | |
32 make_option(c("--samples"), action = "store", default = NULL, type = "character", | |
33 help = "Samples to include in plots, defaults to all"), | |
34 make_option(c("--sminreads"), action = "store", default = 0, type = "integer", | |
35 help = "Minimum number of total reads for a spot to be retained"), | |
36 make_option(c("--smaxreads"), action = "store", default = NULL, type = "integer", | |
37 help = "Maximum number of total reads for a spot to be retained"), | |
38 make_option(c("--smingenes"), action = "store", default = 0, type = "integer", | |
39 help = "Minimum number of non-zero counts for a spot to be retained"), | |
40 make_option(c("--smaxgenes"), action = "store", default = NULL, type = "integer", | |
41 help = "Maximum number of non-zero counts for a spot to be retained"), | |
42 make_option(c("--gminreads"), action = "store", default = 0, type = "integer", | |
43 help = "Minimum number of total reads for a gene to be retained"), | |
44 make_option(c("--gmaxreads"), action = "store", default = NULL, type = "integer", | |
45 help = "Maximum number of total reads for a gene to be retained"), | |
46 make_option(c("--gminspots"), action = "store", default = 0, type = "integer", | |
47 help = "Minimum number of spots with non-zero counts for a gene to be retained"), | |
48 make_option(c("--gmaxspots"), action = "store", default = NULL, type = "integer", | |
49 help = "Maximum number of spots with non-zero counts for a gene to be retained"), | |
50 make_option(c("--distplot"), action = "store_true", type = "logical", default = FALSE, | |
51 help = "If set, generate unfiltered distribution plot"), | |
52 make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE, | |
53 help = "If set, apply filtering before transformation"), | |
54 make_option(c("--filterplot"), action = "store_true", type = "logical", default = FALSE, | |
55 help = "If set, generate filtered distribution plot"), | |
56 make_option(c("-t", "--type"), action = "store_true", default = "log", type = "character", | |
57 help = "Type of transformation to apply: log or sct") | |
58 ) | |
59 | |
60 ### Main | |
61 | |
62 #parse args | |
63 opt <- parse_args(OptionParser(option_list = option_list)) | |
64 | |
65 #check if metadata or sample names were provided | |
66 #need metadata for raw and visium data, sample names for cosmx | |
67 if (!is.na(opt$meta) && is.na(opt$names)) { | |
68 samples_input <- opt$meta | |
69 } else if (is.na(opt$meta) && !is.na(opt$names)) { | |
70 samples_input <- unlist(strsplit(opt$names, split = ",")) | |
71 } else { | |
72 stop("Please only specify either --metadata OR --names") | |
73 } | |
74 | |
75 #create temporary directory to hold count data | |
76 count_dir <- tempdir() | |
77 unlink(count_dir, recursive = TRUE) | |
78 dir.create(count_dir) | |
79 | |
80 #create temporary directory to hold coord data | |
81 coord_dir <- tempdir() | |
82 unlink(coord_dir, recursive = TRUE) | |
83 dir.create(coord_dir) | |
84 | |
85 #if spotcoords were provided, load in count and coord data | |
86 #if spotcoords were not provided (visium input), only load the count file | |
87 if (!is.null(opt$spots)) { | |
88 coord_dir <- as.character(opt$spots) | |
89 coord_file <- fs::dir_ls(coord_dir) | |
90 count_dir <- as.character(opt$counts) | |
91 count_file <- fs::dir_ls(count_dir) | |
92 } else { | |
93 count_dir <- as.character(opt$counts) | |
94 count_file <- fs::dir_ls(count_dir) | |
95 } | |
96 | |
97 #if spotcoords are present, include in stlist input | |
98 if (!is.null(opt$spots)) { | |
99 st_data <- STlist(rnacounts = count_file, spotcoords = coord_file, samples = samples_input) | |
100 } else { | |
101 st_data <- STlist(rnacounts = count_file, samples = samples_input) | |
102 } | |
103 | |
104 message("STList has been created") | |
105 | |
106 #distribution plot | |
107 | |
108 #create distribution plot if flag is included | |
109 if (opt$distplot) { | |
110 | |
111 #if sample names are provided, separate the character string | |
112 if (!is.null(opt$samples) && opt$samples != "") { | |
113 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] | |
114 } else { | |
115 sample_names <- NULL | |
116 } | |
117 | |
118 #generate distribution plot | |
119 dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) | |
120 | |
121 #create unique plot file names based on sample name | |
122 base_input <- basename(opt$counts) | |
123 base_name <- file_path_sans_ext(base_input) | |
124 | |
125 filename <- paste0("unfiltered_", base_name, ".png") | |
126 | |
127 #create output directory for distribution plots | |
128 dir.create("./unfiltered_distribution_plots", showWarnings = FALSE, recursive = TRUE) | |
129 | |
130 #save plot to subdir | |
131 ggsave( | |
132 path = "./unfiltered_distribution_plots", | |
133 filename = filename, | |
134 bg = "white", | |
135 width = 12 | |
136 ) | |
137 | |
138 message("Unfiltered distribution plot saved to ./unfiltered_distribution_plots") | |
139 } | |
140 | |
141 #spot/cell filtering | |
142 | |
143 #filter spots if flag is included | |
144 if (opt$filter) { | |
145 | |
146 #filter out spots or genes based on minimum and maximum counts | |
147 st_data <- filter_data(x = st_data, spot_minreads = opt$sminreads, spot_maxreads = opt$smaxreads, spot_mingenes = opt$smingenes, | |
148 spot_maxgenes = opt$smaxgenes, gene_minreads = opt$gminreads) | |
149 message("Data filtering completed & saved to STlist") | |
150 } | |
151 | |
152 #filtered data plot | |
153 | |
154 #create filtered distribution plot if flag is included | |
155 if (opt$filterplot) { | |
156 | |
157 #if sample names are provided, separate the character string | |
158 if (!is.null(opt$samples) && opt$samples != "") { | |
159 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] | |
160 } else { | |
161 sample_names <- NULL | |
162 } | |
163 | |
164 #generate filtered distribution plot | |
165 filter_dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) | |
166 | |
167 #create unique plot file names based on sample name | |
168 base_input_2 <- basename(opt$counts) | |
169 base_name_2 <- file_path_sans_ext(base_input_2) | |
170 | |
171 filename_2 <- paste0("filtered_", base_name_2, ".png") | |
172 | |
173 #create output directory for cluster plots | |
174 dir.create("./filtered_distribution_plots", showWarnings = FALSE, recursive = TRUE) | |
175 | |
176 #save plot to subdir | |
177 ggsave( | |
178 path = "./filtered_distribution_plots", | |
179 filename = filename_2, | |
180 bg = "white", | |
181 width = 12 | |
182 ) | |
183 | |
184 message("Filtered distribution plot saved to ./filtered_distribution_plots") | |
185 } | |
186 | |
187 #transform data, defaults to log transformation | |
188 | |
189 STobj <- transform_data(x = st_data, method = opt$type) | |
190 | |
191 message("Data has been log transformed, unless otherwise specified") | |
192 | |
193 #save transformed data to .rds | |
194 | |
195 saveRDS(STobj, file = "STobj.rds") | |
196 | |
197 message("STlist has been saved as .rds file") |