diff citeseq_Seurat.R @ 15:fab6ff46e019 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit b437a46efb50e543b6d7c9988f954efe2caa9046
author iuc
date Fri, 07 Jul 2023 01:43:02 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/citeseq_Seurat.R	Fri Jul 07 01:43:02 2023 +0000
@@ -0,0 +1,260 @@
+#' ---
+#' title: "Seurat Cite-seq Analysis"
+#' author: "Performed using Galaxy"
+#' params:
+#'     rna: ""
+#'     prot: ""
+#'     min_cells: ""
+#'     min_genes: ""
+#'     low_thresholds: ""
+#'     high_thresholds: ""
+#'     numPCs: ""
+#'     resolution: ""
+#'     perplexity: ""
+#'     min_pct: ""
+#'     logfc_threshold: ""
+#'     showcode: ""
+#'     warn: ""
+#'     varstate: ""
+#'     vlnfeat: ""
+#'     featplot: ""
+#'     PCplots: ""
+#'     nmds: ""
+#'     heatmaps: ""
+#'     norm_out: ""
+#'     variable_out: ""
+#'     pca_out : ""
+#'     clusters_out: ""
+#'     markers_out: ""
+#'     cite_markers: ""
+#'     comparison: ""
+#'     feat_comp: ""
+#'     marker_compare: ""
+#'     top_x: ""
+#' ---
+
+# nolint start
+#+ echo=F, warning = F, message=F
+options(show.error.messages = F, error = function() {
+    cat(geterrmessage(), file = stderr()); q("no", 1, F)
+})
+
+# we need that to not crash Galaxy with an UTF-8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+showcode <- as.logical(params$showcode)
+warn <-  as.logical(params$warn)
+varstate <- as.logical(params$varstate)
+vlnfeat <- as.logical(params$vlnfeat)
+featplot <- as.logical(params$featplot)
+pc_plots <- as.logical(params$PCplots)
+nmds <- as.logical(params$nmds)
+heatmaps <- as.logical(params$heatmaps)
+end_step <- as.integer(params$end_step)
+norm_out <- as.logical(params$norm_out)
+comparison <- as.logical(params$comparison)
+feature <- trimws(unlist(strsplit(as.character(params$feat_comp), ",")))
+marker_compare <- as.logical(params$marker_compare)
+top_x <- as.integer(params$top_x)
+min_cells <- as.integer(params$min_cells)
+min_genes <- as.integer(params$min_genes)
+low_thresholds <- as.integer(params$low_thresholds)
+high_thresholds <- as.integer(params$high_thresholds)
+num_pcs <- as.integer(params$numPCs)
+cells_use <- as.integer(params$cells_use)
+resolution <- as.double(params$resolution)
+min_pct <- as.double(params$min_pct)
+logfc_threshold <- as.double(params$logfc_thresh)
+variable_out <- as.logical(params$variable_out)
+pca_out <- as.logical(params$pca_out)
+clusters_out <- as.logical(params$clusters_out)
+markers_out <- as.logical(params$markers_out)
+
+print(paste0("Minimum cells: ", min_cells))
+print(paste0("Minimum features: ", min_genes))
+print(paste0("Umi low threshold: ", low_thresholds))
+print(paste0("Umi high threshold: ", high_thresholds))
+print(paste0("Number of principal components: ", num_pcs))
+print(paste0("Resolution: ", resolution))
+print(paste0("Minimum percent of cells", min_pct))
+print(paste0("Logfold change threshold", logfc_threshold))
+if (params$perplexity == "") {
+    perplexity <- -1
+    print(paste0("Perplexity: ", perplexity))
+} else { 
+    perplexity <- as.integer(params$perplexity)
+    print(paste0("Perplexity: ", perplexity))
+}
+
+#+ echo = FALSE
+if (showcode == TRUE) print("Read in data, generate inital Seurat object")
+#+ echo = `showcode`, warning = `warn`, message = F
+rna <- read.delim(params$rna, row.names = 1)
+rna <- Seurat::CollapseSpeciesExpressionMatrix(rna)
+protein <- read.delim(params$prot, row.names = 1)
+tryCatch(all.equal(colnames(rna), colnames(protein)), error = "Columns do not match in input files")
+seuset <- Seurat::CreateSeuratObject(counts = rna, min.cells = min_cells, min.features = min_genes)
+
+if (showcode == TRUE) print("asdf")
+#+ echo = `showcode`, warning = `warn`, message = F
+prot_obj <- Seurat::CreateAssayObject(counts = protein)
+
+if (showcode == TRUE) print("qwer")
+#+ echo = `showcode`, warning = `warn`, message = F
+seuset[["ADT"]] <- prot_obj
+
+if (showcode == TRUE) print("zxcv")
+#+ echo = `showcode`, warning = `warn`, message = F
+Seurat::DefaultAssay(seuset) <- "RNA"
+
+if (showcode == TRUE && vlnfeat == TRUE) print("Raw data vizualization")
+#+ echo = `showcode`, warning = `warn`, include=`vlnfeat`
+if (vlnfeat == TRUE){
+    print(Seurat::VlnPlot(object = seuset, features = c("nFeature_RNA", "nCount_RNA")))
+    print(Seurat::FeatureScatter(object = seuset, feature1 = "nCount_RNA", feature2 = "nFeature_RNA"))
+}
+
+if (showcode == TRUE) print("Filter and normalize for UMI counts")
+#+ echo = `showcode`, warning = `warn`
+seuset <- subset(seuset, subset = `nCount_RNA` > low_thresholds & `nCount_RNA` < high_thresholds)
+seuset <- Seurat::NormalizeData(seuset, normalization.method = "LogNormalize", scale.factor = 10000)
+if (norm_out == TRUE) {
+        saveRDS(seuset, "norm_out.rds")
+}
+
+
+if (showcode == TRUE && featplot == TRUE) print("Variable Genes")
+#+ echo = `showcode`, warning = `warn`, include = `featplot`
+seuset <- Seurat::FindVariableFeatures(object = seuset, selection.method = "mvp")
+if (featplot == TRUE) {
+    print(Seurat::VariableFeaturePlot(seuset, cols = c("black", "red"), selection.method = "disp"))
+}
+seuset <- Seurat::ScaleData(object = seuset, vars.to.regress = "nCount_RNA")
+if (variable_out == TRUE) {
+    saveRDS(seuset, "var_out.rds")
+}
+
+
+
+if (showcode == TRUE && pc_plots == TRUE) print("PCA Visualization")
+#+ echo = `showcode`, warning = `warn`, include = `pc_plots`
+seuset <- Seurat::RunPCA(seuset, npcs = num_pcs)
+seuset <- Seurat::JackStraw(seuset, dims = num_pcs, reduction = "pca", num.replicate = 100)
+seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:num_pcs)
+if (pc_plots == TRUE) {
+    print(Seurat::VizDimLoadings(seuset, dims = 1:2))
+    print(Seurat::DimPlot(seuset, dims = c(1, 2), reduction = "pca"))
+    print(Seurat::DimHeatmap(seuset, dims = 1:num_pcs, nfeatures = 10, reduction = "pca"))
+    print(Seurat::JackStrawPlot(seuset, dims = 1:num_pcs))
+    print(Seurat::ElbowPlot(seuset, ndims = num_pcs, reduction = "pca"))
+}
+if (pca_out == TRUE) {
+    saveRDS(seuset, "pca_out.rds")
+}
+
+
+
+if (showcode == TRUE && nmds == TRUE) print("tSNE and UMAP")
+#+ echo = `showcode`, warning = `warn`, include = `nmds`
+seuset <- Seurat::FindNeighbors(object = seuset)
+seuset <- Seurat::FindClusters(object = seuset)
+if (perplexity == -1) {
+    seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, check_duplicates = FALSE)
+} else {
+    seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, perplexity = perplexity, check_duplicates = FALSE)
+}
+if (nmds == TRUE) {
+    print(Seurat::DimPlot(seuset, reduction = "tsne"))
+}
+seuset <- Seurat::RunUMAP(seuset, dims = 1:num_pcs)
+if (nmds == TRUE) {
+        print(Seurat::DimPlot(seuset, reduction = "umap"))
+}
+if (clusters_out == TRUE) {
+    tsnedata <- Seurat::Embeddings(seuset, reduction="tsne")
+    saveRDS(seuset, "tsne_out.rds")
+    umapdata <- Seurat::Embeddings(seuset, reduction="umap")
+    saveRDS(seuset, "umap_out.rds")
+}
+
+if (showcode == TRUE && heatmaps == TRUE) print("Marker Genes")
+#+ echo = `showcode`, warning = `warn`, include = `heatmaps`
+markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold)
+top10 <- dplyr::group_by(markers, cluster)
+top10 <- dplyr::top_n(top10, n = 10, wt = avg_log2FC)
+print(top10)
+if (heatmaps == TRUE) {
+    print(Seurat::DoHeatmap(seuset, features = top10$gene))
+}
+if (markers_out == TRUE) {
+    saveRDS(seuset, "markers_out.rds")
+    data.table::fwrite(x = markers, row.names=TRUE, sep="\t", file = "markers_out.tsv")
+}
+
+#+ echo = FALSE
+if (showcode == TRUE && comparison == TRUE) print("Compare")
+#+ echo = `showcode`, warning = `warn`, include = `comparison`
+  Seurat::DefaultAssay(seuset) <- "ADT"
+  seuset <- Seurat::NormalizeData(seuset, normalization.method = "CLR", margin = 2)
+  Seurat::DefaultAssay(seuset) <- "RNA"
+  seuset <- Seurat::NormalizeData(seuset, normalization.method = "CLR", margin = 2, assay = "ADT")
+if (comparison == TRUE) {
+  for(x in feature) {
+    Seurat::DefaultAssay(seuset) <- "ADT"
+    p1 <- Seurat::FeaturePlot(seuset, x, cols = c("lightgrey", "red")) + ggplot2::ggtitle(paste0("Protein:", " ", x))
+    Seurat::DefaultAssay(seuset) <- "RNA"
+    p2 <- Seurat::FeaturePlot(seuset, x) + ggplot2::ggtitle(paste0("RNA:", " ", x))
+    print(p1 | p2)
+    label <- as.character(paste0(Seurat::Key(seuset[["ADT"]]), x))
+    print(Seurat::VlnPlot(seuset, paste0("rna_", x)))
+    print(Seurat::VlnPlot(seuset, paste0("adt_", x)))
+  }
+}
+
+#+ echo = FALSE
+if (showcode == TRUE) print("Cite-seq")
+#+ echo = `showcode`, warning = `warn`, include = `marker_compare`
+rna_markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold, assay="RNA")
+protein_markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold, assay="ADT")
+if (marker_compare == TRUE) {
+  data.table::fwrite(x = rna_markers, sep="\t", file = "rna_out.tsv")
+  data.table::fwrite(x = protein_markers, sep="\t", file = "protein_out.tsv")
+}
+toprna <- dplyr::top_n(dplyr::group_by(rna_markers, cluster), n=5, avg_log2FC)
+toprna <- head(as.list(unique(as.data.frame(toprna)$gene)), top_x)
+topprot <- dplyr::top_n(dplyr::group_by(protein_markers, cluster), n=5, avg_log2FC)
+topprot <- head(as.list(unique(as.data.frame(topprot)$gene)), top_x)
+if(marker_compare == TRUE) {
+  pdf(file="citeseq_out.pdf")
+  rna_labels <- as.vector(toprna)
+  rna_labels <- rna_labels[!duplicated(rna_labels)]
+  prot_labels <- as.vector(topprot)
+  prot_labels <- prot_labels[!duplicated(prot_labels)]
+  for(rnamarker in rna_labels) {
+    rnamarker <-  paste("rna_", rnamarker, sep = "")
+    for(protmarker in prot_labels) {
+      protmarker <- paste("adt_", protmarker, sep="")
+      plot <- Seurat::FeatureScatter(seuset, feature1 = rnamarker, feature2 = protmarker) + ggplot2::ggtitle(paste0(rnamarker, " vs ", protmarker))
+      print(plot)
+    }
+  }
+  for(rnamarker in rna_labels) {
+    rnamarker <-  paste("rna_", rnamarker, sep = "")
+    for(rnamarker2 in rna_labels) {
+      rnamarker2 <- paste("rna_", rnamarker2, sep="")
+      plot <- Seurat::FeatureScatter(seuset, feature1 = rnamarker, feature2 = rnamarker2) + ggplot2::ggtitle(paste0(rnamarker, " vs ", rnamarker2))
+      print(plot)
+    }
+  }
+  for(protmarker in prot_labels) {
+    protmarker <-  paste("adt_", protmarker, sep = "")
+    for(protmarker2 in prot_labels) {
+      protmarker2 <- paste("adt_", protmarker2, sep="")
+      plot <- Seurat::FeatureScatter(seuset, feature1 = protmarker, feature2 = protmarker2) + ggplot2::ggtitle(paste0(protmarker, " vs ", protmarker2))
+      print(plot)
+    }
+  }
+  dev.off()
+}
+
+# nolint end