Mercurial > repos > ebi-gxa > seurat_select_integration_features
comparison scripts/seurat-scale-data.R @ 0:699c0ca328f2 draft default tip
planemo upload commit 0264c359f1d638bbbbab515a3502231f679cdcf6
author | ebi-gxa |
---|---|
date | Sat, 02 Mar 2024 10:40:57 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:699c0ca328f2 |
---|---|
1 #!/usr/bin/env Rscript | |
2 | |
3 # Load optparse we need to check inputs | |
4 | |
5 suppressPackageStartupMessages(require(optparse)) | |
6 | |
7 # Load common functions | |
8 | |
9 suppressPackageStartupMessages(require(workflowscriptscommon)) | |
10 | |
11 # parse options | |
12 | |
13 option_list = list( | |
14 make_option( | |
15 c("-i", "--input-object-file"), | |
16 action = "store", | |
17 default = NA, | |
18 type = 'character', | |
19 help = "File name in which a serialized R matrix object may be found." | |
20 ), | |
21 make_option( | |
22 c("--input-format"), | |
23 action = "store", | |
24 default = "seurat", | |
25 type = 'character', | |
26 help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read." | |
27 ), | |
28 make_option( | |
29 c("--output-format"), | |
30 action = "store", | |
31 default = "seurat", | |
32 type = 'character', | |
33 help = "Either loom, seurat, anndata or singlecellexperiment for the output format." | |
34 ), | |
35 make_option( | |
36 c("-e", "--genes-use"), | |
37 action = "store", | |
38 default = NULL, | |
39 type = 'character', | |
40 help = "File with gene names to scale/center (one gene per line). Default is all genes in object@data." | |
41 ), | |
42 make_option( | |
43 c("-v", "--vars-to-regress"), | |
44 action = "store", | |
45 default = NULL, | |
46 type = 'character', | |
47 help = "Comma-separated list of variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito." | |
48 ), | |
49 make_option( | |
50 c("-m", "--model-use"), | |
51 action = "store", | |
52 default = 'linear', | |
53 type = 'character', | |
54 help = "Use a linear model or generalized linear model (poisson, negative binomial) for the regression. Options are 'linear' (default), 'poisson', and 'negbinom'." | |
55 ), | |
56 make_option( | |
57 c("-u", "--use-umi"), | |
58 action = "store", | |
59 default = FALSE, | |
60 type = 'logical', | |
61 help = "Regress on UMI count data. Default is FALSE for linear modeling, but automatically set to TRUE if model.use is 'negbinom' or 'poisson'." | |
62 ), | |
63 make_option( | |
64 c("-s", "--do-not-scale"), | |
65 action = "store_true", | |
66 default = FALSE, | |
67 type = 'logical', | |
68 help = "Skip the data scale." | |
69 ), | |
70 make_option( | |
71 c("-c", "--do-not-center"), | |
72 action = "store_true", | |
73 default = FALSE, | |
74 type = 'logical', | |
75 help = "Skip data centering." | |
76 ), | |
77 make_option( | |
78 c("-x", "--scale-max"), | |
79 action = "store", | |
80 default = 10, | |
81 type = 'double', | |
82 help = "Max value to return for scaled data. The default is 10. Setting this can help reduce the effects of genes that are only expressed in a very small number of cells. If regressing out latent variables and using a non-linear model, the default is 50." | |
83 ), | |
84 make_option( | |
85 c("-b", "--block-size"), | |
86 action = "store", | |
87 default = 1000, | |
88 type = 'integer', | |
89 help = "Default size for number of genes to scale at in a single computation. Increasing block.size may speed up calculations but at an additional memory cost." | |
90 ), | |
91 make_option( | |
92 c("-d", "--min-cells-to-block"), | |
93 action = "store", | |
94 default = 1000, | |
95 type = 'integer', | |
96 help = "If object contains fewer than this number of cells, don't block for scaling calculations." | |
97 ), | |
98 make_option( | |
99 c("-n", "--check-for-norm"), | |
100 action = "store", | |
101 default = TRUE, | |
102 type = 'logical', | |
103 help = "Check to see if data has been normalized, if not, output a warning (TRUE by default)." | |
104 ), | |
105 make_option( | |
106 c("-o", "--output-object-file"), | |
107 action = "store", | |
108 default = NA, | |
109 type = 'character', | |
110 help = "File name in which to store serialized R object of type 'Seurat'.'" | |
111 ) | |
112 ) | |
113 | |
114 opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file')) | |
115 | |
116 # Check parameter values | |
117 | |
118 if ( ! file.exists(opt$input_object_file)){ | |
119 stop((paste('File', opt$input_object_file, 'does not exist'))) | |
120 } | |
121 | |
122 if (! is.null(opt$genes_use)){ | |
123 if (! file.exists(opt$genes_use)){ | |
124 stop((paste('Supplied genes file', opt$genes_use, 'does not exist'))) | |
125 }else{ | |
126 genes_use <- readLines(opt$genes_use) | |
127 } | |
128 }else{ | |
129 genes_use <- NULL | |
130 } | |
131 | |
132 # break up opt$vars_to_regress into a list if it has commas | |
133 opt$vars_to_regress <- unlist(strsplit(opt$vars_to_regress, ",")) | |
134 | |
135 # Now we're hapy with the arguments, load Seurat and do the work | |
136 | |
137 suppressPackageStartupMessages(require(Seurat)) | |
138 if(opt$input_format == "loom" | opt$output_format == "loom") { | |
139 suppressPackageStartupMessages(require(SeuratDisk)) | |
140 } else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") { | |
141 suppressPackageStartupMessages(require(scater)) | |
142 } | |
143 | |
144 # Input from serialized R object | |
145 | |
146 seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format) | |
147 # https://stackoverflow.com/questions/9129673/passing-list-of-named-parameters-to-function | |
148 # might be useful | |
149 scaled_seurat_object <- ScaleData(seurat_object, | |
150 features = genes_use, | |
151 vars.to.regress = opt$vars_to_regress, | |
152 model.use = opt$model_use, | |
153 use.umi = opt$use_umi, | |
154 do.scale = !opt$do_not_scale, | |
155 do.center = !opt$do_not_center, | |
156 scale.max = opt$scale_max, | |
157 block.size = opt$block_size, | |
158 min.cells.to.block = opt$min_cells_to_block, | |
159 verbose = FALSE) | |
160 | |
161 | |
162 # Output to a serialized R object | |
163 write_seurat4_object(seurat_object = scaled_seurat_object, | |
164 output_path = opt$output_object_file, | |
165 format = opt$output_format) |