Mercurial > repos > goeckslab > celesta
comparison celesta_assign_cells.R @ 0:8001319743c0 draft
planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/celesta commit 0ec46718dfd00f37ccae4e2fa133fa8393fe6d92
author | goeckslab |
---|---|
date | Wed, 28 Aug 2024 12:46:48 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8001319743c0 |
---|---|
1 # --------------------------------------------------------------------------------- | |
2 # The main algorithim for CELESTA cell type assignment | |
3 # --------------------------------------------------------------------------------- | |
4 | |
5 suppressWarnings(suppressMessages(library(janitor))) | |
6 suppressWarnings(suppressMessages(library(optparse))) | |
7 suppressWarnings(suppressMessages(library(dplyr))) | |
8 suppressWarnings(suppressMessages(library(anndataR))) | |
9 suppressWarnings(suppressMessages(library(Rmixmod))) | |
10 suppressWarnings(suppressMessages(library(spdep))) | |
11 suppressWarnings(suppressMessages(library(ggplot2))) | |
12 suppressWarnings(suppressMessages(library(reshape2))) | |
13 suppressWarnings(suppressMessages(library(zeallot))) | |
14 suppressWarnings(suppressMessages(library(CELESTA))) | |
15 | |
16 ### Define command line arguments | |
17 option_list <- list( | |
18 make_option(c("-i", "--imagingdata"), action = "store", default = NA, type = "character", | |
19 help = "Path to imaging data"), | |
20 make_option(c("-p", "--prior"), action = "store", default = NA, type = "character", | |
21 help = "Path to prior marker info file"), | |
22 make_option(c("-x", "--xcol"), action = "store", default = NA, type = "character", | |
23 help = "Name of column in adata.obs containing X coordinate"), | |
24 make_option(c("-y", "--ycol"), action = "store", default = NA, type = "character", | |
25 help = "Name of column in adata.obs containing Y coordinate"), | |
26 make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE, | |
27 help = "Boolean to filter cells or not (default: no filtering)"), | |
28 make_option(c("--highfilter"), action = "store", default = 0.9, type = "double", | |
29 help = "High marker threshold if filtering cells (default: 0.9)"), | |
30 make_option(c("--lowfilter"), action = "store", default = 0.4, type = "double", | |
31 help = "Low marker threshold if filtering cells (default: 0.4)"), | |
32 make_option(c("--maxiteration"), action = "store", default = 10, type = "integer", | |
33 help = "Maximum iterations allowed in the EM algorithm per round"), | |
34 make_option(c("--changethresh"), action = "store", default = 0.01, type = "double", | |
35 help = "Ending condition for the EM algorithm"), | |
36 make_option(c("--highexpthresh"), action = "store", default = "default_high_thresholds", type = "character", | |
37 help = "Path to file specifying high expression thresholds for anchor and index cells"), | |
38 make_option(c("--lowexpthresh"), action = "store", default = "default_low_thresholds", type = "character", | |
39 help = "Path to file specifying low expression thresholds for anchor and index cells") | |
40 ) | |
41 | |
42 ### Functions | |
43 anndata_to_celesta <- function(input_adata, x_col, y_col) { | |
44 | |
45 #' Function to convert anndata object to dataframe readable by CELESTA | |
46 #' Coordinates columns in adata.obs are renamed to "X" and "Y", and placed at beginning of dataframe | |
47 #' Marker intensities are concatted columnwise to the X and Y coords. cols: X,Y,Marker_1,...Marker N | |
48 | |
49 # initialize output as dataframe from adata.obs | |
50 celesta_input_dataframe <- data.frame(input_adata$obs) | |
51 | |
52 # subset to X and Y coordinates from obs only | |
53 celesta_input_dataframe <- celesta_input_dataframe %>% | |
54 dplyr::select({{x_col}}, {{y_col}}) | |
55 | |
56 # rename X,Y column names to what CELESTA wants | |
57 colnames(celesta_input_dataframe) <- c("X", "Y") | |
58 | |
59 # merge X,Y coords with marker intensities from adata.X | |
60 x_df <- data.frame(input_adata$X) | |
61 colnames(x_df) <- input_adata$var_names | |
62 celesta_input_dataframe <- cbind(celesta_input_dataframe, x_df) | |
63 | |
64 return(celesta_input_dataframe) | |
65 } | |
66 | |
67 ### Main | |
68 # parse args | |
69 opt <- parse_args(OptionParser(option_list = option_list)) | |
70 | |
71 # read anndata, convert to celesta format | |
72 adata <- read_h5ad(opt$imagingdata) | |
73 celesta_input_df <- anndata_to_celesta(adata, x_col = opt$xcol, y_col = opt$ycol) | |
74 | |
75 # read prior marker info | |
76 prior <- read.csv(opt$prior, check.names = FALSE) | |
77 | |
78 # clean prior names and input dataframe names | |
79 prior <- janitor::clean_names(prior, case = "all_caps") | |
80 celesta_input_df <- janitor::clean_names(celesta_input_df, case = "all_caps") | |
81 | |
82 # instantiate celesta object | |
83 CelestaObj <- CreateCelestaObject( | |
84 project_title = "", | |
85 prior_marker_info = prior, | |
86 imaging_data_file = celesta_input_df | |
87 ) | |
88 | |
89 # if filtering is specified, filter out cells outside high and low thresholds | |
90 if (opt$filter) { | |
91 print("filtering cells based on expression") | |
92 CelestaObj <- FilterCells(CelestaObj, | |
93 high_marker_threshold = opt$highfilter, | |
94 low_marker_threshold = opt$lowfilter) | |
95 } else { | |
96 print("Proceeding to cell type assignment without cell filtering") | |
97 } | |
98 | |
99 # check for non-default expression threshold files | |
100 if (opt$highexpthresh != "default_high_thresholds") { | |
101 # read high thresholds | |
102 print("Using custom high expression thresholds") | |
103 high_expression_thresholds <- read.csv(opt$highexpthresh) | |
104 hi_exp_thresh_anchor <- high_expression_thresholds$anchor | |
105 hi_exp_thresh_index <- high_expression_thresholds$index | |
106 } else { | |
107 print("Using default high expression thresholds -- this may need adjustment") | |
108 hi_exp_thresh_anchor <- rep(0.7, length = 50) | |
109 hi_exp_thresh_index <- rep(0.5, length = 50) | |
110 } | |
111 | |
112 if (opt$lowexpthresh != "default_low_thresholds") { | |
113 # read low thresholds | |
114 print("Using custom low expression thresholds") | |
115 low_expression_thresholds <- read.csv(opt$highexpthresh) | |
116 low_exp_thresh_anchor <- low_expression_thresholds$anchor | |
117 low_exp_thresh_index <- low_expression_thresholds$index | |
118 } else { | |
119 print("Using default low expression thresholds") | |
120 low_exp_thresh_anchor <- rep(0.9, length = 50) | |
121 low_exp_thresh_index <- rep(1, length = 50) | |
122 } | |
123 | |
124 # run cell type assignment | |
125 CelestaObj <- AssignCells(CelestaObj, | |
126 max_iteration = opt$maxiteration, | |
127 cell_change_threshold = opt$changethresh, | |
128 high_expression_threshold_anchor = hi_exp_thresh_anchor, | |
129 low_expression_threshold_anchor = low_exp_thresh_anchor, | |
130 high_expression_threshold_index = hi_exp_thresh_index, | |
131 low_expression_threshold_index = low_exp_thresh_index, | |
132 save_result = FALSE) | |
133 | |
134 # save object as an RDS file for cell type plotting | |
135 # for the time being, this is not exposed to Galaxy | |
136 saveRDS(CelestaObj, file = "celestaobj.rds") | |
137 | |
138 # rename celesta assignment columns so they are obvious in output anndata | |
139 celesta_assignments <- CelestaObj@final_cell_type_assignment | |
140 celesta_assignments <- janitor::clean_names(celesta_assignments) | |
141 colnames(celesta_assignments) <- paste0("celesta_", colnames(celesta_assignments)) | |
142 | |
143 # merge celesta assignments into anndata object | |
144 adata$obs <- cbind(adata$obs, celesta_assignments) | |
145 | |
146 # print cell type value_counts to standard output | |
147 print("----------------------------------------") | |
148 print("Final cell type counts") | |
149 print(adata$obs %>% dplyr::count(celesta_final_cell_type, sort = TRUE)) | |
150 print("----------------------------------------") | |
151 | |
152 # write output anndata file | |
153 write_h5ad(adata, "result.h5ad") |