Mercurial > repos > iuc > seurat
comparison seurat.R @ 0:8d8412d35247 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit 24c0223b9baa6d59bba381ef94f7e77b1c204d80
author | iuc |
---|---|
date | Sun, 26 Aug 2018 16:24:02 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8d8412d35247 |
---|---|
1 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
2 | |
3 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
5 | |
6 suppressPackageStartupMessages({ | |
7 library(Seurat) | |
8 library(SingleCellExperiment) | |
9 library(dplyr) | |
10 library(optparse) | |
11 }) | |
12 | |
13 option_list <- list( | |
14 make_option(c("-counts","--counts"), type="character", help="Counts file"), | |
15 make_option(c("-numPCs","--numPCs"), type="integer", help="Number of PCs to use in plots"), | |
16 make_option(c("-min.cells","--min.cells"), type="integer", help="Minimum cells to include"), | |
17 make_option(c("-min.genes","--min.genes"), type="integer", help="Minimum genes to include"), | |
18 make_option(c("-low.thresholds","--low.thresholds"), type="double", help="Low threshold for filtering cells"), | |
19 make_option(c("-high.thresholds","--high.thresholds"), type="double", help="High threshold for filtering cells"), | |
20 make_option(c("-x.low.cutoff","--x.low.cutoff"), type="double", help="X-axis low cutoff for variable genes"), | |
21 make_option(c("-x.high.cutoff","--x.high.cutoff"), type="double", help="X-axis high cutoff for variable genes"), | |
22 make_option(c("-y.cutoff","--y.cutoff"), type="double", help="Y-axis cutoff for variable genes"), | |
23 make_option(c("-cells.use","--cells.use"), type="integer", help="Cells to use for PCHeatmap"), | |
24 make_option(c("-resolution","--resolution"), type="double", help="Resolution in FindClusters"), | |
25 make_option(c("-min.pct","--min.pct"), type="double", help="Minimum percent cells in FindClusters"), | |
26 make_option(c("-logfc.threshold","--logfc.threshold"), type="double", help="LogFC threshold in FindClusters"), | |
27 make_option(c("-rds","--rds"), type="logical", help="Output Seurat RDS object") | |
28 ) | |
29 | |
30 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) | |
31 args = parse_args(parser) | |
32 | |
33 counts <- read.delim(args$counts, row.names=1) | |
34 seuset <- CreateSeuratObject(raw.data = counts, min.cells = args$min.cells, min.genes = args$min.cells) | |
35 | |
36 # Open PDF for plots | |
37 pdf("out.pdf") | |
38 | |
39 VlnPlot(object = seuset, features.plot = c("nGene", "nUMI"), nCol = 2) | |
40 GenePlot(object = seuset, gene1 = "nUMI", gene2 = "nGene") | |
41 | |
42 print("Filtering cells") | |
43 if (!is.null(args$low.thresholds)){ | |
44 lowthresh <- args$low.thresholds | |
45 } else { | |
46 lowthresh <- "-Inf" | |
47 } | |
48 if (!is.null(args$high.thresholds)){ | |
49 highthresh <- args$high.thresholds | |
50 } else { | |
51 highthresh <- "Inf" | |
52 } | |
53 seuset <- FilterCells(object = seuset, subset.names = c("nUMI"), | |
54 low.thresholds=c(lowthresh), high.thresholds = c(highthresh)) | |
55 | |
56 print("Normalizing the data") | |
57 seuset <- NormalizeData(object = seuset, normalization.method = "LogNormalize", | |
58 scale.factor = 10000) | |
59 | |
60 print("Finding variable genes") | |
61 seuset <- FindVariableGenes(object = seuset, mean.function = ExpMean, | |
62 dispersion.function = LogVMR, | |
63 x.low.cutoff = args$x.low.cutoff, | |
64 x.high.cutoff = args$x.high.cutoff,, | |
65 y.cutoff = args$y.cutoff | |
66 ) | |
67 | |
68 print("Scaling the data and removing unwanted sources of variation") | |
69 seuset <- ScaleData(object = seuset, vars.to.regress = c("nUMI")) | |
70 | |
71 print("Performing PCA analysis") | |
72 seuset <- RunPCA(object = seuset, pc.genes = seuset@var.genes) | |
73 VizPCA(object = seuset, pcs.use = 1:2) | |
74 PCAPlot(object = seuset, dim.1 = 1, dim.2 = 2) | |
75 PCHeatmap( | |
76 object = seuset, | |
77 pc.use = 1:args$numPCs, | |
78 cells.use = args$cell.use, | |
79 do.balanced = TRUE, | |
80 label.columns = FALSE, | |
81 use.full = FALSE | |
82 ) | |
83 | |
84 print("Determining statistically significant principal components") | |
85 seuset <- JackStraw(object = seuset, num.replicate = 100, display.progress= FALSE) | |
86 JackStrawPlot(object = seuset, PCs = 1:args$numPCs) | |
87 PCElbowPlot(object = seuset) | |
88 | |
89 print("Clustering the cells") | |
90 seuset <- FindClusters( | |
91 object = seuset, | |
92 reduction.type = "pca", | |
93 dims.use = 1:args$numPCs, | |
94 resolution = args$resolution, | |
95 print.output = 0, | |
96 save.SNN = TRUE | |
97 ) | |
98 | |
99 print("Running non-linear dimensional reduction (tSNE)") | |
100 seuset <- RunTSNE(object = seuset, dims.use = 1:args$numPCs, do.fast = TRUE) | |
101 TSNEPlot(object = seuset) | |
102 | |
103 print("Finding differentially expressed genes (cluster biomarkers)") | |
104 markers <- FindAllMarkers(object = seuset, only.pos = TRUE, min.pct = args$min.pct, | |
105 logfc.threshold = args$logfc.threshold) | |
106 top10 <- markers %>% group_by(cluster) %>% top_n(10, avg_logFC) | |
107 DoHeatmap(object = seuset, genes.use = top10$gene, slim.col.label = TRUE, remove.key = TRUE) | |
108 | |
109 # Close PDF for plots | |
110 dev.off() | |
111 | |
112 if (!is.null(args$rds) ) { | |
113 saveRDS(seuset, "Seurat.rds") | |
114 } | |
115 | |
116 sessionInfo() |