comparison citeseq_Seurat.R @ 15:fab6ff46e019 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit b437a46efb50e543b6d7c9988f954efe2caa9046
author iuc
date Fri, 07 Jul 2023 01:43:02 +0000
parents
children
comparison
equal deleted inserted replaced
14:c0fd285eb553 15:fab6ff46e019
1 #' ---
2 #' title: "Seurat Cite-seq Analysis"
3 #' author: "Performed using Galaxy"
4 #' params:
5 #' rna: ""
6 #' prot: ""
7 #' min_cells: ""
8 #' min_genes: ""
9 #' low_thresholds: ""
10 #' high_thresholds: ""
11 #' numPCs: ""
12 #' resolution: ""
13 #' perplexity: ""
14 #' min_pct: ""
15 #' logfc_threshold: ""
16 #' showcode: ""
17 #' warn: ""
18 #' varstate: ""
19 #' vlnfeat: ""
20 #' featplot: ""
21 #' PCplots: ""
22 #' nmds: ""
23 #' heatmaps: ""
24 #' norm_out: ""
25 #' variable_out: ""
26 #' pca_out : ""
27 #' clusters_out: ""
28 #' markers_out: ""
29 #' cite_markers: ""
30 #' comparison: ""
31 #' feat_comp: ""
32 #' marker_compare: ""
33 #' top_x: ""
34 #' ---
35
36 # nolint start
37 #+ echo=F, warning = F, message=F
38 options(show.error.messages = F, error = function() {
39 cat(geterrmessage(), file = stderr()); q("no", 1, F)
40 })
41
42 # we need that to not crash Galaxy with an UTF-8 error on German LC settings.
43 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
44
45 showcode <- as.logical(params$showcode)
46 warn <- as.logical(params$warn)
47 varstate <- as.logical(params$varstate)
48 vlnfeat <- as.logical(params$vlnfeat)
49 featplot <- as.logical(params$featplot)
50 pc_plots <- as.logical(params$PCplots)
51 nmds <- as.logical(params$nmds)
52 heatmaps <- as.logical(params$heatmaps)
53 end_step <- as.integer(params$end_step)
54 norm_out <- as.logical(params$norm_out)
55 comparison <- as.logical(params$comparison)
56 feature <- trimws(unlist(strsplit(as.character(params$feat_comp), ",")))
57 marker_compare <- as.logical(params$marker_compare)
58 top_x <- as.integer(params$top_x)
59 min_cells <- as.integer(params$min_cells)
60 min_genes <- as.integer(params$min_genes)
61 low_thresholds <- as.integer(params$low_thresholds)
62 high_thresholds <- as.integer(params$high_thresholds)
63 num_pcs <- as.integer(params$numPCs)
64 cells_use <- as.integer(params$cells_use)
65 resolution <- as.double(params$resolution)
66 min_pct <- as.double(params$min_pct)
67 logfc_threshold <- as.double(params$logfc_thresh)
68 variable_out <- as.logical(params$variable_out)
69 pca_out <- as.logical(params$pca_out)
70 clusters_out <- as.logical(params$clusters_out)
71 markers_out <- as.logical(params$markers_out)
72
73 print(paste0("Minimum cells: ", min_cells))
74 print(paste0("Minimum features: ", min_genes))
75 print(paste0("Umi low threshold: ", low_thresholds))
76 print(paste0("Umi high threshold: ", high_thresholds))
77 print(paste0("Number of principal components: ", num_pcs))
78 print(paste0("Resolution: ", resolution))
79 print(paste0("Minimum percent of cells", min_pct))
80 print(paste0("Logfold change threshold", logfc_threshold))
81 if (params$perplexity == "") {
82 perplexity <- -1
83 print(paste0("Perplexity: ", perplexity))
84 } else {
85 perplexity <- as.integer(params$perplexity)
86 print(paste0("Perplexity: ", perplexity))
87 }
88
89 #+ echo = FALSE
90 if (showcode == TRUE) print("Read in data, generate inital Seurat object")
91 #+ echo = `showcode`, warning = `warn`, message = F
92 rna <- read.delim(params$rna, row.names = 1)
93 rna <- Seurat::CollapseSpeciesExpressionMatrix(rna)
94 protein <- read.delim(params$prot, row.names = 1)
95 tryCatch(all.equal(colnames(rna), colnames(protein)), error = "Columns do not match in input files")
96 seuset <- Seurat::CreateSeuratObject(counts = rna, min.cells = min_cells, min.features = min_genes)
97
98 if (showcode == TRUE) print("asdf")
99 #+ echo = `showcode`, warning = `warn`, message = F
100 prot_obj <- Seurat::CreateAssayObject(counts = protein)
101
102 if (showcode == TRUE) print("qwer")
103 #+ echo = `showcode`, warning = `warn`, message = F
104 seuset[["ADT"]] <- prot_obj
105
106 if (showcode == TRUE) print("zxcv")
107 #+ echo = `showcode`, warning = `warn`, message = F
108 Seurat::DefaultAssay(seuset) <- "RNA"
109
110 if (showcode == TRUE && vlnfeat == TRUE) print("Raw data vizualization")
111 #+ echo = `showcode`, warning = `warn`, include=`vlnfeat`
112 if (vlnfeat == TRUE){
113 print(Seurat::VlnPlot(object = seuset, features = c("nFeature_RNA", "nCount_RNA")))
114 print(Seurat::FeatureScatter(object = seuset, feature1 = "nCount_RNA", feature2 = "nFeature_RNA"))
115 }
116
117 if (showcode == TRUE) print("Filter and normalize for UMI counts")
118 #+ echo = `showcode`, warning = `warn`
119 seuset <- subset(seuset, subset = `nCount_RNA` > low_thresholds & `nCount_RNA` < high_thresholds)
120 seuset <- Seurat::NormalizeData(seuset, normalization.method = "LogNormalize", scale.factor = 10000)
121 if (norm_out == TRUE) {
122 saveRDS(seuset, "norm_out.rds")
123 }
124
125
126 if (showcode == TRUE && featplot == TRUE) print("Variable Genes")
127 #+ echo = `showcode`, warning = `warn`, include = `featplot`
128 seuset <- Seurat::FindVariableFeatures(object = seuset, selection.method = "mvp")
129 if (featplot == TRUE) {
130 print(Seurat::VariableFeaturePlot(seuset, cols = c("black", "red"), selection.method = "disp"))
131 }
132 seuset <- Seurat::ScaleData(object = seuset, vars.to.regress = "nCount_RNA")
133 if (variable_out == TRUE) {
134 saveRDS(seuset, "var_out.rds")
135 }
136
137
138
139 if (showcode == TRUE && pc_plots == TRUE) print("PCA Visualization")
140 #+ echo = `showcode`, warning = `warn`, include = `pc_plots`
141 seuset <- Seurat::RunPCA(seuset, npcs = num_pcs)
142 seuset <- Seurat::JackStraw(seuset, dims = num_pcs, reduction = "pca", num.replicate = 100)
143 seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:num_pcs)
144 if (pc_plots == TRUE) {
145 print(Seurat::VizDimLoadings(seuset, dims = 1:2))
146 print(Seurat::DimPlot(seuset, dims = c(1, 2), reduction = "pca"))
147 print(Seurat::DimHeatmap(seuset, dims = 1:num_pcs, nfeatures = 10, reduction = "pca"))
148 print(Seurat::JackStrawPlot(seuset, dims = 1:num_pcs))
149 print(Seurat::ElbowPlot(seuset, ndims = num_pcs, reduction = "pca"))
150 }
151 if (pca_out == TRUE) {
152 saveRDS(seuset, "pca_out.rds")
153 }
154
155
156
157 if (showcode == TRUE && nmds == TRUE) print("tSNE and UMAP")
158 #+ echo = `showcode`, warning = `warn`, include = `nmds`
159 seuset <- Seurat::FindNeighbors(object = seuset)
160 seuset <- Seurat::FindClusters(object = seuset)
161 if (perplexity == -1) {
162 seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, check_duplicates = FALSE)
163 } else {
164 seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, perplexity = perplexity, check_duplicates = FALSE)
165 }
166 if (nmds == TRUE) {
167 print(Seurat::DimPlot(seuset, reduction = "tsne"))
168 }
169 seuset <- Seurat::RunUMAP(seuset, dims = 1:num_pcs)
170 if (nmds == TRUE) {
171 print(Seurat::DimPlot(seuset, reduction = "umap"))
172 }
173 if (clusters_out == TRUE) {
174 tsnedata <- Seurat::Embeddings(seuset, reduction="tsne")
175 saveRDS(seuset, "tsne_out.rds")
176 umapdata <- Seurat::Embeddings(seuset, reduction="umap")
177 saveRDS(seuset, "umap_out.rds")
178 }
179
180 if (showcode == TRUE && heatmaps == TRUE) print("Marker Genes")
181 #+ echo = `showcode`, warning = `warn`, include = `heatmaps`
182 markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold)
183 top10 <- dplyr::group_by(markers, cluster)
184 top10 <- dplyr::top_n(top10, n = 10, wt = avg_log2FC)
185 print(top10)
186 if (heatmaps == TRUE) {
187 print(Seurat::DoHeatmap(seuset, features = top10$gene))
188 }
189 if (markers_out == TRUE) {
190 saveRDS(seuset, "markers_out.rds")
191 data.table::fwrite(x = markers, row.names=TRUE, sep="\t", file = "markers_out.tsv")
192 }
193
194 #+ echo = FALSE
195 if (showcode == TRUE && comparison == TRUE) print("Compare")
196 #+ echo = `showcode`, warning = `warn`, include = `comparison`
197 Seurat::DefaultAssay(seuset) <- "ADT"
198 seuset <- Seurat::NormalizeData(seuset, normalization.method = "CLR", margin = 2)
199 Seurat::DefaultAssay(seuset) <- "RNA"
200 seuset <- Seurat::NormalizeData(seuset, normalization.method = "CLR", margin = 2, assay = "ADT")
201 if (comparison == TRUE) {
202 for(x in feature) {
203 Seurat::DefaultAssay(seuset) <- "ADT"
204 p1 <- Seurat::FeaturePlot(seuset, x, cols = c("lightgrey", "red")) + ggplot2::ggtitle(paste0("Protein:", " ", x))
205 Seurat::DefaultAssay(seuset) <- "RNA"
206 p2 <- Seurat::FeaturePlot(seuset, x) + ggplot2::ggtitle(paste0("RNA:", " ", x))
207 print(p1 | p2)
208 label <- as.character(paste0(Seurat::Key(seuset[["ADT"]]), x))
209 print(Seurat::VlnPlot(seuset, paste0("rna_", x)))
210 print(Seurat::VlnPlot(seuset, paste0("adt_", x)))
211 }
212 }
213
214 #+ echo = FALSE
215 if (showcode == TRUE) print("Cite-seq")
216 #+ echo = `showcode`, warning = `warn`, include = `marker_compare`
217 rna_markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold, assay="RNA")
218 protein_markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold, assay="ADT")
219 if (marker_compare == TRUE) {
220 data.table::fwrite(x = rna_markers, sep="\t", file = "rna_out.tsv")
221 data.table::fwrite(x = protein_markers, sep="\t", file = "protein_out.tsv")
222 }
223 toprna <- dplyr::top_n(dplyr::group_by(rna_markers, cluster), n=5, avg_log2FC)
224 toprna <- head(as.list(unique(as.data.frame(toprna)$gene)), top_x)
225 topprot <- dplyr::top_n(dplyr::group_by(protein_markers, cluster), n=5, avg_log2FC)
226 topprot <- head(as.list(unique(as.data.frame(topprot)$gene)), top_x)
227 if(marker_compare == TRUE) {
228 pdf(file="citeseq_out.pdf")
229 rna_labels <- as.vector(toprna)
230 rna_labels <- rna_labels[!duplicated(rna_labels)]
231 prot_labels <- as.vector(topprot)
232 prot_labels <- prot_labels[!duplicated(prot_labels)]
233 for(rnamarker in rna_labels) {
234 rnamarker <- paste("rna_", rnamarker, sep = "")
235 for(protmarker in prot_labels) {
236 protmarker <- paste("adt_", protmarker, sep="")
237 plot <- Seurat::FeatureScatter(seuset, feature1 = rnamarker, feature2 = protmarker) + ggplot2::ggtitle(paste0(rnamarker, " vs ", protmarker))
238 print(plot)
239 }
240 }
241 for(rnamarker in rna_labels) {
242 rnamarker <- paste("rna_", rnamarker, sep = "")
243 for(rnamarker2 in rna_labels) {
244 rnamarker2 <- paste("rna_", rnamarker2, sep="")
245 plot <- Seurat::FeatureScatter(seuset, feature1 = rnamarker, feature2 = rnamarker2) + ggplot2::ggtitle(paste0(rnamarker, " vs ", rnamarker2))
246 print(plot)
247 }
248 }
249 for(protmarker in prot_labels) {
250 protmarker <- paste("adt_", protmarker, sep = "")
251 for(protmarker2 in prot_labels) {
252 protmarker2 <- paste("adt_", protmarker2, sep="")
253 plot <- Seurat::FeatureScatter(seuset, feature1 = protmarker, feature2 = protmarker2) + ggplot2::ggtitle(paste0(protmarker, " vs ", protmarker2))
254 print(plot)
255 }
256 }
257 dev.off()
258 }
259
260 # nolint end