comparison scripts/seurat-scale-data.R @ 6:5e9ba303f9e1 draft default tip

planemo upload commit 0264c359f1d638bbbbab515a3502231f679cdcf6
author ebi-gxa
date Sat, 02 Mar 2024 10:42:19 +0000
parents
children
comparison
equal deleted inserted replaced
5:d2f6eac0ad42 6:5e9ba303f9e1
1 #!/usr/bin/env Rscript
2
3 # Load optparse we need to check inputs
4
5 suppressPackageStartupMessages(require(optparse))
6
7 # Load common functions
8
9 suppressPackageStartupMessages(require(workflowscriptscommon))
10
11 # parse options
12
13 option_list = list(
14 make_option(
15 c("-i", "--input-object-file"),
16 action = "store",
17 default = NA,
18 type = 'character',
19 help = "File name in which a serialized R matrix object may be found."
20 ),
21 make_option(
22 c("--input-format"),
23 action = "store",
24 default = "seurat",
25 type = 'character',
26 help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read."
27 ),
28 make_option(
29 c("--output-format"),
30 action = "store",
31 default = "seurat",
32 type = 'character',
33 help = "Either loom, seurat, anndata or singlecellexperiment for the output format."
34 ),
35 make_option(
36 c("-e", "--genes-use"),
37 action = "store",
38 default = NULL,
39 type = 'character',
40 help = "File with gene names to scale/center (one gene per line). Default is all genes in object@data."
41 ),
42 make_option(
43 c("-v", "--vars-to-regress"),
44 action = "store",
45 default = NULL,
46 type = 'character',
47 help = "Comma-separated list of variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito."
48 ),
49 make_option(
50 c("-m", "--model-use"),
51 action = "store",
52 default = 'linear',
53 type = 'character',
54 help = "Use a linear model or generalized linear model (poisson, negative binomial) for the regression. Options are 'linear' (default), 'poisson', and 'negbinom'."
55 ),
56 make_option(
57 c("-u", "--use-umi"),
58 action = "store",
59 default = FALSE,
60 type = 'logical',
61 help = "Regress on UMI count data. Default is FALSE for linear modeling, but automatically set to TRUE if model.use is 'negbinom' or 'poisson'."
62 ),
63 make_option(
64 c("-s", "--do-not-scale"),
65 action = "store_true",
66 default = FALSE,
67 type = 'logical',
68 help = "Skip the data scale."
69 ),
70 make_option(
71 c("-c", "--do-not-center"),
72 action = "store_true",
73 default = FALSE,
74 type = 'logical',
75 help = "Skip data centering."
76 ),
77 make_option(
78 c("-x", "--scale-max"),
79 action = "store",
80 default = 10,
81 type = 'double',
82 help = "Max value to return for scaled data. The default is 10. Setting this can help reduce the effects of genes that are only expressed in a very small number of cells. If regressing out latent variables and using a non-linear model, the default is 50."
83 ),
84 make_option(
85 c("-b", "--block-size"),
86 action = "store",
87 default = 1000,
88 type = 'integer',
89 help = "Default size for number of genes to scale at in a single computation. Increasing block.size may speed up calculations but at an additional memory cost."
90 ),
91 make_option(
92 c("-d", "--min-cells-to-block"),
93 action = "store",
94 default = 1000,
95 type = 'integer',
96 help = "If object contains fewer than this number of cells, don't block for scaling calculations."
97 ),
98 make_option(
99 c("-n", "--check-for-norm"),
100 action = "store",
101 default = TRUE,
102 type = 'logical',
103 help = "Check to see if data has been normalized, if not, output a warning (TRUE by default)."
104 ),
105 make_option(
106 c("-o", "--output-object-file"),
107 action = "store",
108 default = NA,
109 type = 'character',
110 help = "File name in which to store serialized R object of type 'Seurat'.'"
111 )
112 )
113
114 opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file'))
115
116 # Check parameter values
117
118 if ( ! file.exists(opt$input_object_file)){
119 stop((paste('File', opt$input_object_file, 'does not exist')))
120 }
121
122 if (! is.null(opt$genes_use)){
123 if (! file.exists(opt$genes_use)){
124 stop((paste('Supplied genes file', opt$genes_use, 'does not exist')))
125 }else{
126 genes_use <- readLines(opt$genes_use)
127 }
128 }else{
129 genes_use <- NULL
130 }
131
132 # break up opt$vars_to_regress into a list if it has commas
133 opt$vars_to_regress <- unlist(strsplit(opt$vars_to_regress, ","))
134
135 # Now we're hapy with the arguments, load Seurat and do the work
136
137 suppressPackageStartupMessages(require(Seurat))
138 if(opt$input_format == "loom" | opt$output_format == "loom") {
139 suppressPackageStartupMessages(require(SeuratDisk))
140 } else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") {
141 suppressPackageStartupMessages(require(scater))
142 }
143
144 # Input from serialized R object
145
146 seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format)
147 # https://stackoverflow.com/questions/9129673/passing-list-of-named-parameters-to-function
148 # might be useful
149 scaled_seurat_object <- ScaleData(seurat_object,
150 features = genes_use,
151 vars.to.regress = opt$vars_to_regress,
152 model.use = opt$model_use,
153 use.umi = opt$use_umi,
154 do.scale = !opt$do_not_scale,
155 do.center = !opt$do_not_center,
156 scale.max = opt$scale_max,
157 block.size = opt$block_size,
158 min.cells.to.block = opt$min_cells_to_block,
159 verbose = FALSE)
160
161
162 # Output to a serialized R object
163 write_seurat4_object(seurat_object = scaled_seurat_object,
164 output_path = opt$output_object_file,
165 format = opt$output_format)