comparison celesta_assign_cells.R @ 0:8001319743c0 draft

planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/celesta commit 0ec46718dfd00f37ccae4e2fa133fa8393fe6d92
author goeckslab
date Wed, 28 Aug 2024 12:46:48 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:8001319743c0
1 # ---------------------------------------------------------------------------------
2 # The main algorithim for CELESTA cell type assignment
3 # ---------------------------------------------------------------------------------
4
5 suppressWarnings(suppressMessages(library(janitor)))
6 suppressWarnings(suppressMessages(library(optparse)))
7 suppressWarnings(suppressMessages(library(dplyr)))
8 suppressWarnings(suppressMessages(library(anndataR)))
9 suppressWarnings(suppressMessages(library(Rmixmod)))
10 suppressWarnings(suppressMessages(library(spdep)))
11 suppressWarnings(suppressMessages(library(ggplot2)))
12 suppressWarnings(suppressMessages(library(reshape2)))
13 suppressWarnings(suppressMessages(library(zeallot)))
14 suppressWarnings(suppressMessages(library(CELESTA)))
15
16 ### Define command line arguments
17 option_list <- list(
18 make_option(c("-i", "--imagingdata"), action = "store", default = NA, type = "character",
19 help = "Path to imaging data"),
20 make_option(c("-p", "--prior"), action = "store", default = NA, type = "character",
21 help = "Path to prior marker info file"),
22 make_option(c("-x", "--xcol"), action = "store", default = NA, type = "character",
23 help = "Name of column in adata.obs containing X coordinate"),
24 make_option(c("-y", "--ycol"), action = "store", default = NA, type = "character",
25 help = "Name of column in adata.obs containing Y coordinate"),
26 make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE,
27 help = "Boolean to filter cells or not (default: no filtering)"),
28 make_option(c("--highfilter"), action = "store", default = 0.9, type = "double",
29 help = "High marker threshold if filtering cells (default: 0.9)"),
30 make_option(c("--lowfilter"), action = "store", default = 0.4, type = "double",
31 help = "Low marker threshold if filtering cells (default: 0.4)"),
32 make_option(c("--maxiteration"), action = "store", default = 10, type = "integer",
33 help = "Maximum iterations allowed in the EM algorithm per round"),
34 make_option(c("--changethresh"), action = "store", default = 0.01, type = "double",
35 help = "Ending condition for the EM algorithm"),
36 make_option(c("--highexpthresh"), action = "store", default = "default_high_thresholds", type = "character",
37 help = "Path to file specifying high expression thresholds for anchor and index cells"),
38 make_option(c("--lowexpthresh"), action = "store", default = "default_low_thresholds", type = "character",
39 help = "Path to file specifying low expression thresholds for anchor and index cells")
40 )
41
42 ### Functions
43 anndata_to_celesta <- function(input_adata, x_col, y_col) {
44
45 #' Function to convert anndata object to dataframe readable by CELESTA
46 #' Coordinates columns in adata.obs are renamed to "X" and "Y", and placed at beginning of dataframe
47 #' Marker intensities are concatted columnwise to the X and Y coords. cols: X,Y,Marker_1,...Marker N
48
49 # initialize output as dataframe from adata.obs
50 celesta_input_dataframe <- data.frame(input_adata$obs)
51
52 # subset to X and Y coordinates from obs only
53 celesta_input_dataframe <- celesta_input_dataframe %>%
54 dplyr::select({{x_col}}, {{y_col}})
55
56 # rename X,Y column names to what CELESTA wants
57 colnames(celesta_input_dataframe) <- c("X", "Y")
58
59 # merge X,Y coords with marker intensities from adata.X
60 x_df <- data.frame(input_adata$X)
61 colnames(x_df) <- input_adata$var_names
62 celesta_input_dataframe <- cbind(celesta_input_dataframe, x_df)
63
64 return(celesta_input_dataframe)
65 }
66
67 ### Main
68 # parse args
69 opt <- parse_args(OptionParser(option_list = option_list))
70
71 # read anndata, convert to celesta format
72 adata <- read_h5ad(opt$imagingdata)
73 celesta_input_df <- anndata_to_celesta(adata, x_col = opt$xcol, y_col = opt$ycol)
74
75 # read prior marker info
76 prior <- read.csv(opt$prior, check.names = FALSE)
77
78 # clean prior names and input dataframe names
79 prior <- janitor::clean_names(prior, case = "all_caps")
80 celesta_input_df <- janitor::clean_names(celesta_input_df, case = "all_caps")
81
82 # instantiate celesta object
83 CelestaObj <- CreateCelestaObject(
84 project_title = "",
85 prior_marker_info = prior,
86 imaging_data_file = celesta_input_df
87 )
88
89 # if filtering is specified, filter out cells outside high and low thresholds
90 if (opt$filter) {
91 print("filtering cells based on expression")
92 CelestaObj <- FilterCells(CelestaObj,
93 high_marker_threshold = opt$highfilter,
94 low_marker_threshold = opt$lowfilter)
95 } else {
96 print("Proceeding to cell type assignment without cell filtering")
97 }
98
99 # check for non-default expression threshold files
100 if (opt$highexpthresh != "default_high_thresholds") {
101 # read high thresholds
102 print("Using custom high expression thresholds")
103 high_expression_thresholds <- read.csv(opt$highexpthresh)
104 hi_exp_thresh_anchor <- high_expression_thresholds$anchor
105 hi_exp_thresh_index <- high_expression_thresholds$index
106 } else {
107 print("Using default high expression thresholds -- this may need adjustment")
108 hi_exp_thresh_anchor <- rep(0.7, length = 50)
109 hi_exp_thresh_index <- rep(0.5, length = 50)
110 }
111
112 if (opt$lowexpthresh != "default_low_thresholds") {
113 # read low thresholds
114 print("Using custom low expression thresholds")
115 low_expression_thresholds <- read.csv(opt$highexpthresh)
116 low_exp_thresh_anchor <- low_expression_thresholds$anchor
117 low_exp_thresh_index <- low_expression_thresholds$index
118 } else {
119 print("Using default low expression thresholds")
120 low_exp_thresh_anchor <- rep(0.9, length = 50)
121 low_exp_thresh_index <- rep(1, length = 50)
122 }
123
124 # run cell type assignment
125 CelestaObj <- AssignCells(CelestaObj,
126 max_iteration = opt$maxiteration,
127 cell_change_threshold = opt$changethresh,
128 high_expression_threshold_anchor = hi_exp_thresh_anchor,
129 low_expression_threshold_anchor = low_exp_thresh_anchor,
130 high_expression_threshold_index = hi_exp_thresh_index,
131 low_expression_threshold_index = low_exp_thresh_index,
132 save_result = FALSE)
133
134 # save object as an RDS file for cell type plotting
135 # for the time being, this is not exposed to Galaxy
136 saveRDS(CelestaObj, file = "celestaobj.rds")
137
138 # rename celesta assignment columns so they are obvious in output anndata
139 celesta_assignments <- CelestaObj@final_cell_type_assignment
140 celesta_assignments <- janitor::clean_names(celesta_assignments)
141 colnames(celesta_assignments) <- paste0("celesta_", colnames(celesta_assignments))
142
143 # merge celesta assignments into anndata object
144 adata$obs <- cbind(adata$obs, celesta_assignments)
145
146 # print cell type value_counts to standard output
147 print("----------------------------------------")
148 print("Final cell type counts")
149 print(adata$obs %>% dplyr::count(celesta_final_cell_type, sort = TRUE))
150 print("----------------------------------------")
151
152 # write output anndata file
153 write_h5ad(adata, "result.h5ad")