Mercurial > repos > goeckslab > clustering_spatialge
comparison spatialGE_single_input.R @ 0:555ca19d07e6 draft default tip
planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author | goeckslab |
---|---|
date | Wed, 13 Aug 2025 19:32:19 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:555ca19d07e6 |
---|---|
1 # ------------- | |
2 # Data Cleaning | |
3 # ------------- | |
4 | |
5 # SINGLE INPUT SCRIPT: | |
6 # Accepts single raw data sample and single cosmx sample | |
7 # Does not accept single visium sample due to spatial subdirectory | |
8 | |
9 # Purpose: | |
10 # Transform data into STlist, perform QC, log transform | |
11 | |
12 library(spatialGE) | |
13 library(optparse) | |
14 library(ggplot2) | |
15 library(tools) | |
16 library(fs) | |
17 | |
18 | |
19 ### Command line options | |
20 | |
21 option_list <- list( | |
22 make_option(c("-c", "--counts"), action = "store", default = NA, type = "character", | |
23 help = "Path to count data file(s)"), | |
24 make_option(c("-s", "--spots"), action = "store", default = NULL, type = "character", | |
25 help = "Path to cell coordinates file(s), not required for Visium or Xenium"), | |
26 make_option(c("-m", "--meta"), action = "store", default = NA, type = "character", | |
27 help = "Path to metadata file"), | |
28 make_option(c("-n", "--names"), action = "store", default = NA, type = "character", | |
29 help = "Specific sample names"), | |
30 make_option(c("--plotmeta"), action = "store", default = NULL, type = "character", | |
31 help = "Plots counts per cell or genes per cell"), | |
32 make_option(c("--samples"), action = "store", default = NULL, type = "character", | |
33 help = "Samples to include in plots, defaults to all"), | |
34 make_option(c("--sminreads"), action = "store", default = 0, type = "integer", | |
35 help = "Minimum number of total reads for a spot to be retained"), | |
36 make_option(c("--smaxreads"), action = "store", default = NULL, type = "integer", | |
37 help = "Maximum number of total reads for a spot to be retained"), | |
38 make_option(c("--smingenes"), action = "store", default = 0, type = "integer", | |
39 help = "Minimum number of non-zero counts for a spot to be retained"), | |
40 make_option(c("--smaxgenes"), action = "store", default = NULL, type = "integer", | |
41 help = "Maximum number of non-zero counts for a spot to be retained"), | |
42 make_option(c("--gminreads"), action = "store", default = 0, type = "integer", | |
43 help = "Minimum number of total reads for a gene to be retained"), | |
44 make_option(c("--gmaxreads"), action = "store", default = NULL, type = "integer", | |
45 help = "Maximum number of total reads for a gene to be retained"), | |
46 make_option(c("--gminspots"), action = "store", default = 0, type = "integer", | |
47 help = "Minimum number of spots with non-zero counts for a gene to be retained"), | |
48 make_option(c("--gmaxspots"), action = "store", default = NULL, type = "integer", | |
49 help = "Maximum number of spots with non-zero counts for a gene to be retained"), | |
50 make_option(c("--distplot"), action = "store_true", type = "logical", default = FALSE, | |
51 help = "If set, generate unfiltered distribution plot"), | |
52 make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE, | |
53 help = "If set, apply filtering before transformation"), | |
54 make_option(c("--filterplot"), action = "store_true", type = "logical", default = FALSE, | |
55 help = "If set, generate filtered distribution plot"), | |
56 make_option(c("-t", "--type"), action = "store_true", default = "log", type = "character", | |
57 help = "Type of transformation to apply: log or sct") | |
58 ) | |
59 | |
60 ### Main | |
61 | |
62 #parse args | |
63 opt <- parse_args(OptionParser(option_list = option_list)) | |
64 | |
65 #check if metadata or sample names were provided | |
66 #need metadata for raw, sample names for cosmx | |
67 if (!is.na(opt$meta) && is.na(opt$names)) { | |
68 samples_input <- opt$meta | |
69 } else if (is.na(opt$meta) && !is.na(opt$names)) { | |
70 samples_input <- opt$names | |
71 } else { | |
72 stop("Please only specify either --metadata OR --names") | |
73 } | |
74 | |
75 #create STlist with single input flags | |
76 st_data <- STlist(rnacounts = opt$counts, spotcoords = opt$spots, samples = samples_input) | |
77 | |
78 message("STlist has been created") | |
79 | |
80 #distribution plot | |
81 | |
82 #create distribution plot if flag is included | |
83 if (opt$distplot) { | |
84 | |
85 #if sample names are provided, separate the character string | |
86 #probably don't need strsplit, keeping for safety | |
87 if (!is.null(opt$samples) && opt$samples != "") { | |
88 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] | |
89 } else { | |
90 sample_names <- NULL | |
91 } | |
92 | |
93 #generate distribution plot | |
94 dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) | |
95 | |
96 #create unique plot file names based on sample name | |
97 base_input <- basename(opt$counts) | |
98 base_name <- file_path_sans_ext(base_input) | |
99 | |
100 filename <- paste0("unfiltered_", base_name, ".png") | |
101 | |
102 #create output directory for cluster plots | |
103 dir.create("./unfiltered_distribution_plots", showWarnings = FALSE, recursive = TRUE) | |
104 | |
105 #save plot to subdir | |
106 ggsave( | |
107 path = "./unfiltered_distribution_plots", | |
108 filename = filename, | |
109 bg = "white", | |
110 width = 12 | |
111 ) | |
112 | |
113 message("Unfiltered distribution plot saved to ./unfiltered_distribution_plots") | |
114 } | |
115 | |
116 #spot/cell filtering | |
117 | |
118 #filter spots if flag is included | |
119 if (opt$filter) { | |
120 | |
121 #filter out spots or genes based on minimum and maximum counts | |
122 st_data <- filter_data(x = st_data, spot_minreads = opt$sminreads, spot_maxreads = opt$smaxreads, spot_mingenes = opt$smingenes, | |
123 spot_maxgenes = opt$smaxgenes, gene_minreads = opt$gminreads) | |
124 | |
125 message("Data filtering completed & saved to STlist") | |
126 } | |
127 | |
128 #filtered data plot | |
129 | |
130 #create filtered distribution plot if flag is included | |
131 if (opt$filterplot) { | |
132 | |
133 #if sample names are provided, separate the character string | |
134 #probably don't need strsplit, keeping for safety | |
135 if (!is.null(opt$samples) && opt$samples != "") { | |
136 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] | |
137 } else { | |
138 sample_names <- NULL | |
139 } | |
140 | |
141 #generate filtered distribution plot | |
142 filter_dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) | |
143 | |
144 #create unique plot file names based on sample name | |
145 base_input_2 <- basename(opt$counts) | |
146 base_name_2 <- file_path_sans_ext(base_input_2) | |
147 | |
148 filename_2 <- paste0("filtered_", base_name_2, ".png") | |
149 | |
150 #create output directory for cluster plots | |
151 dir.create("./filtered_distribution_plots", showWarnings = FALSE, recursive = TRUE) | |
152 | |
153 #save plot to subdir | |
154 ggsave( | |
155 path = "./filtered_distribution_plots", | |
156 filename = filename_2, | |
157 bg = "white", | |
158 width = 12 | |
159 ) | |
160 | |
161 message("Filtered distribution plot saved to ./filtered_distribution_plots") | |
162 } | |
163 | |
164 #transform data, defaults to log transformation | |
165 | |
166 STobj <- transform_data(x = st_data, method = opt$type) | |
167 | |
168 message("Data has been log transformed, unless otherwise specified") | |
169 | |
170 #save transformed data to .rds | |
171 | |
172 saveRDS(STobj, file = "STobj.rds") | |
173 | |
174 message("STlist has been saved as .rds file") |