diff Seurat.R @ 15:fab6ff46e019 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit b437a46efb50e543b6d7c9988f954efe2caa9046
author iuc
date Fri, 07 Jul 2023 01:43:02 +0000
parents c4db6ec33fec
children
line wrap: on
line diff
--- a/Seurat.R	Mon Nov 21 14:35:28 2022 +0000
+++ b/Seurat.R	Fri Jul 07 01:43:02 2023 +0000
@@ -8,19 +8,24 @@
 #'     low_thresholds: ""
 #'     high_thresholds: ""
 #'     numPCs: ""
-#'     cells_use: ""
 #'     resolution: ""
 #'     perplexity: ""
 #'     min_pct: ""
 #'     logfc_threshold: ""
+#'     end_step: ""
 #'     showcode: ""
 #'     warn: ""
 #'     varstate: ""
 #'     vlnfeat: ""
 #'     featplot: ""
 #'     PCplots: ""
-#'     tsne: ""
+#'     nmds: ""
 #'     heatmaps: ""
+#'     norm_out: ""
+#'     variable_out: ""
+#'     pca_out : ""
+#'     clusters_out: ""
+#'     markers_out: ""
 #' ---
 
 # nolint start
@@ -34,87 +39,148 @@
 vlnfeat <- as.logical(params$vlnfeat)
 featplot <- as.logical(params$featplot)
 pc_plots <- as.logical(params$PCplots)
-tsne <- as.logical(params$tsne)
+nmds <- as.logical(params$nmds)
 heatmaps <- as.logical(params$heatmaps)
-
+end_step <- as.integer(params$end_step)
+norm_out <- as.logical(params$norm_out)
 # we need that to not crash Galaxy with an UTF-8 error on German LC settings.
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
 #+ echo = F, warning = `warn`, include =`varstate`
 min_cells <- as.integer(params$min_cells)
 min_genes <- as.integer(params$min_genes)
-low_thresholds <- as.integer(params$low_thresholds)
-high_thresholds <- as.integer(params$high_thresholds)
-num_pcs <- as.integer(params$numPCs)
-cells_use <- as.integer(params$cells_use)
-resolution <- as.double(params$resolution)
-perplexity <- as.integer(params$perplexity)
-min_pct <- as.double(params$min_pct)
-logfc_threshold <- as.double(params$logfc_thresh)
 print(paste0("Minimum cells: ", min_cells))
 print(paste0("Minimum features: ", min_genes))
+low_thresholds <- as.integer(params$low_thresholds)
+high_thresholds <- as.integer(params$high_thresholds)
 print(paste0("Umi low threshold: ", low_thresholds))
 print(paste0("Umi high threshold: ", high_thresholds))
-print(paste0("Number of principal components: ", num_pcs))
-print(paste0("Resolution: ", resolution))
-print(paste0("Perplexity: ", perplexity))
-print(paste0("Minimum percent of cells", min_pct))
-print(paste0("Logfold change threshold", logfc_threshold))
+
+if (end_step >= 2) {
+    variable_out <- as.logical(params$variable_out)
+}
+
 
-#+ echo = FALSE
+if (end_step >= 3) {
+    num_pcs <- as.integer(params$numPCs)
+    print(paste0("Number of principal components: ", num_pcs))
+    pca_out <- as.logical(params$pca_out)
+}
+if (end_step >= 4) {
+    if (params$perplexity == "") {
+       perplexity <- -1
+       print(paste0("Perplexity: ", perplexity))
+    } else { 
+        perplexity <- as.integer(params$perplexity)
+        print(paste0("Perplexity: ", perplexity))
+    }
+    resolution <- as.double(params$resolution)
+    print(paste0("Resolution: ", resolution))
+    clusters_out <- as.logical(params$clusters_out)
+}
+if (end_step >= 5) {
+    min_pct <- as.double(params$min_pct)
+    logfc_threshold <- as.double(params$logfc_thresh)
+    print(paste0("Minimum percent of cells", min_pct))
+    print(paste0("Logfold change threshold", logfc_threshold))
+    markers_out <- as.logical(params$markers_out)
+}
+
+
 if (showcode == TRUE) print("Read in data, generate inital Seurat object")
 #+ echo = `showcode`, warning = `warn`, message = F
 counts <- read.delim(params$counts, row.names = 1)
 seuset <- Seurat::CreateSeuratObject(counts = counts, min.cells = min_cells, min.features = min_genes)
 
-#+ echo = FALSE
 if (showcode == TRUE && vlnfeat == TRUE) print("Raw data vizualization")
 #+ echo = `showcode`, warning = `warn`, include=`vlnfeat`
-Seurat::VlnPlot(object = seuset, features = c("nFeature_RNA", "nCount_RNA"))
-Seurat::FeatureScatter(object = seuset, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
+if (vlnfeat == TRUE){
+    print(Seurat::VlnPlot(object = seuset, features = c("nFeature_RNA", "nCount_RNA")))
+    print(Seurat::FeatureScatter(object = seuset, feature1 = "nCount_RNA", feature2 = "nFeature_RNA"))
+}
 
-#+ echo = FALSE
 if (showcode == TRUE) print("Filter and normalize for UMI counts")
 #+ echo = `showcode`, warning = `warn`
 seuset <- subset(seuset, subset = `nCount_RNA` > low_thresholds & `nCount_RNA` < high_thresholds)
 seuset <- Seurat::NormalizeData(seuset, normalization.method = "LogNormalize", scale.factor = 10000)
+if (norm_out == TRUE) {
+        saveRDS(seuset, "norm_out.rds")
+}
 
-#+ echo = FALSE
-if (showcode == TRUE && featplot == TRUE) print("Variable Genes")
-#+ echo = `showcode`, warning = `warn`, include = `featplot`
-seuset <- Seurat::FindVariableFeatures(object = seuset, selection.method = "mvp")
-Seurat::VariableFeaturePlot(seuset, cols = c("black", "red"), selection.method = "disp")
-seuset <- Seurat::ScaleData(object = seuset, vars.to.regress = "nCount_RNA")
+if (end_step >= 2) {
+    #+ echo = FALSE
+    if (showcode == TRUE && featplot == TRUE) print("Variable Genes")
+    #+ echo = `showcode`, warning = `warn`, include = `featplot`
+    seuset <- Seurat::FindVariableFeatures(object = seuset, selection.method = "mvp")
+    if (featplot == TRUE) {
+        print(Seurat::VariableFeaturePlot(seuset, cols = c("black", "red"), selection.method = "disp"))
+    }
+    seuset <- Seurat::ScaleData(object = seuset, vars.to.regress = "nCount_RNA")
+    if (variable_out == TRUE) {
+        saveRDS(seuset, "var_out.rds")
+    }
+}
 
-#+ echo = FALSE
-if (showcode == TRUE && pc_plots == TRUE) print("PCA Visualization")
-#+ echo = `showcode`, warning = `warn`, include = `pc_plots`
-seuset <- Seurat::RunPCA(seuset, npcs = num_pcs)
-Seurat::VizDimLoadings(seuset, dims = 1:2)
-Seurat::DimPlot(seuset, dims = c(1, 2), reduction = "pca")
-Seurat::DimHeatmap(seuset, dims = 1:num_pcs, nfeatures = 30, reduction = "pca")
-seuset <- Seurat::JackStraw(seuset, dims = num_pcs, reduction = "pca", num.replicate = 100)
-seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:num_pcs)
-Seurat::JackStrawPlot(seuset, dims = 1:num_pcs)
-Seurat::ElbowPlot(seuset, ndims = num_pcs, reduction = "pca")
+if (end_step >= 3) {
+    #+ echo = FALSE
+    if (showcode == TRUE && pc_plots == TRUE) print("PCA Visualization")
+    #+ echo = `showcode`, warning = `warn`, include = `pc_plots`
+    seuset <- Seurat::RunPCA(seuset, npcs = num_pcs)
+    seuset <- Seurat::JackStraw(seuset, dims = num_pcs, reduction = "pca", num.replicate = 100)
+    seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:num_pcs)
+    if (pc_plots == TRUE) {
+        print(Seurat::VizDimLoadings(seuset, dims = 1:2))
+        print(Seurat::DimPlot(seuset, dims = c(1, 2), reduction = "pca"))
+        print(Seurat::DimHeatmap(seuset, dims = 1:num_pcs, nfeatures = 30, reduction = "pca"))
+        print(Seurat::JackStrawPlot(seuset, dims = 1:num_pcs))
+        print(Seurat::ElbowPlot(seuset, ndims = num_pcs, reduction = "pca"))
+    }
+    if (pca_out == TRUE) {
+        saveRDS(seuset, "pca_out.rds")
+    }
+}
 
-#+ echo = FALSE
-if (showcode == TRUE && tsne == TRUE) print("tSNE")
-#+ echo = `showcode`, warning = `warn`, include = `tsne`
-seuset <- Seurat::FindNeighbors(object = seuset)
-seuset <- Seurat::FindClusters(object = seuset)
-if (perplexity == -1) {
-    seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution);
-} else {
-    seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, perplexity = perplexity);
+if (end_step >= 4) {
+    #+ echo = FALSE
+    if (showcode == TRUE && nmds == TRUE) print("tSNE and UMAP")
+    #+ echo = `showcode`, warning = `warn`, include = `nmds`
+    seuset <- Seurat::FindNeighbors(object = seuset)
+    seuset <- Seurat::FindClusters(object = seuset)
+    if (perplexity == -1) {
+        seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution);
+    } else {
+        seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, perplexity = perplexity);
+    }
+    if (nmds == TRUE) {
+        print(Seurat::DimPlot(seuset, reduction = "tsne"))
+    }
+    seuset <- Seurat::RunUMAP(seuset, dims = 1:num_pcs)
+    if (nmds == TRUE) {
+            print(Seurat::DimPlot(seuset, reduction = "umap"))
+    }
+    if (clusters_out == TRUE) {
+        tsnedata <- Seurat::Embeddings(seuset, reduction="tsne")
+        saveRDS(seuset, "tsne_out.rds")
+        umapdata <- Seurat::Embeddings(seuset, reduction="umap")
+        saveRDS(seuset, "umap_out.rds")
+    }
 }
-Seurat::DimPlot(seuset, reduction = "tsne")
+
 
-#+ echo = FALSE
-if (showcode == TRUE && heatmaps == TRUE) print("Marker Genes")
-#+ echo = `showcode`, warning = `warn`, include = `heatmaps`
-markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold)
-top10 <- dplyr::group_by(markers, cluster)
-top10 <- dplyr::top_n(top10, 10, avg_log2FC)
-Seurat::DoHeatmap(seuset, features = top10$gene)
+if (end_step == 5) {
+    #+ echo = FALSE
+    if (showcode == TRUE && heatmaps == TRUE) print("Marker Genes")
+    #+ echo = `showcode`, warning = `warn`, include = `heatmaps`
+    markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold)
+    top10 <- dplyr::group_by(markers, cluster)
+    top10 <- dplyr::top_n(top10, n = 10, wt = avg_log2FC)
+    print(top10)
+    if (heatmaps == TRUE) {
+        print(Seurat::DoHeatmap(seuset, features = top10$gene))
+    }
+    if (markers_out == TRUE) {
+        saveRDS(seuset, "markers_out.rds")
+        data.table::fwrite(x = markers, row.names=TRUE, sep="\t", file = "markers_out.tsv")
+    }
+}
 # nolint end