# HG changeset patch
# User ebi-gxa
# Date 1709376139 0
# Node ID 5e9ba303f9e14a502a9b771484ac3243f8407b93
# Parent d2f6eac0ad42486c8f8d31a47c1f5012aea66f05
planemo upload commit 0264c359f1d638bbbbab515a3502231f679cdcf6
diff -r d2f6eac0ad42 -r 5e9ba303f9e1 extra/macro_mapper_seurat.yaml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extra/macro_mapper_seurat.yaml Sat Mar 02 10:42:19 2024 +0000
@@ -0,0 +1,90 @@
+---
+- option_group:
+ - input-object-file
+ - input-format
+ pre_command_macros:
+ - INPUT_OBJ_PREAMBLE
+ post_command_macros:
+ - INPUT_OBJECT
+ input_declaration_macros:
+ - input_object_params
+- option_group:
+ - output-object-file
+ - output-format
+ post_command_macros:
+ - OUTPUT_OBJECT
+ input_declaration_macros:
+ - output_object_params
+ output_declaration_macros:
+ - output_files
+- option_group:
+ - input-object-files
+ - input-format
+ pre_command_macros:
+ - INPUT_OBJS_PREAMBLE
+ post_command_macros:
+ - INPUT_OBJECTS
+ input_declaration_macros:
+ - input_object_params:
+ multiple: true
+- option_group:
+ - reference-object-files
+ - reference-format
+ pre_command_macros:
+ - REFERENCE_OBJS_PREAMBLE
+ post_command_macros:
+ - REFERENCE_OBJECTS
+ input_declaration_macros:
+ - input_object_params:
+ varname: reference
+ multiple: true
+ optional: true
+- option_group:
+ - reference-object-file
+ - reference-format
+ pre_command_macros:
+ - REFERENCE_OBJ_PREAMBLE
+ post_command_macros:
+ - REFERENCE_OBJECT
+ input_declaration_macros:
+ - input_object_params:
+ varname: reference
+- option_group:
+ - anchors-object-file
+ - anchors-format
+ pre_command_macros:
+ - ANCHORS_OBJ_PREAMBLE
+ post_command_macros:
+ - ANCHORS_OBJECT
+ input_declaration_macros:
+ - input_object_params:
+ varname: anchors
+- option_group:
+ - query-object-file
+ - query-format
+ pre_command_macros:
+ - QUERY_OBJ_PREAMBLE
+ post_command_macros:
+ - QUERY_OBJECT
+ input_declaration_macros:
+ - input_object_params:
+ varname: query
+- option_group:
+ - plot-out
+ post_command_macros:
+ - OUTPUT_PLOT
+ output_declaration_macros:
+ - plot_output_files_format:
+ format: png
+ - plot_output_files_format:
+ format: pdf
+ - plot_output_files_format:
+ format: eps
+ - plot_output_files_format:
+ format: jpg
+ - plot_output_files_format:
+ format: ps
+ - plot_output_files_format:
+ format: tiff
+ - plot_output_files_format:
+ format: svg
diff -r d2f6eac0ad42 -r 5e9ba303f9e1 get_test_data.sh
--- a/get_test_data.sh Fri Mar 04 07:27:57 2022 +0000
+++ b/get_test_data.sh Sat Mar 02 10:42:19 2024 +0000
@@ -15,8 +15,23 @@
LOOM_LINK='https://drive.google.com/uc?export=download&id=1qNk5cg8hJG3Nv1ljTKmUEnxTOf11EEZX'
H5AD_LINK='https://drive.google.com/uc?export=download&id=1YpE0H_t_dkh17P-WBhPijKvRiGP0BlBz'
+
+H5AD_SC182_LINK='https://drive.google.com/uc?export=download&id=16PUJ2KAkXT8F1UkfqU-9LWoOJUkUG1rp'
SCE_LINK='https://drive.google.com/uc?export=download&id=1UKdyf3M01uAt7oBg93JfmRvNVB_jlUKe'
+# Seurat v4 exclusives
+IFNB_BASE_FILE='ifnb_'
+
+IFNB_CTRL_INT_LINK='https://drive.google.com/uc?export=download&id=15E_MLz-UclJYInNaA7YKLhLo5W-qlykL'
+IFNB_STIM_INT_LINK='https://drive.google.com/uc?export=download&id=14iKgCJGPk16dEmpJJF-Gp_lBDcOdo-54'
+
+## Classify and UMAP mapping
+CLASSIFY_QUERY_LINK='https://oc.ebi.ac.uk/s/MlEDILFYRrvkS6E/download'
+CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK='https://drive.google.com/uc?export=download&id=1Xtv4K_CxIU1cJ8RjJ7NTvzLQkLvc8a3i'
+# UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/k4MdM07y9DAnurp/download'
+UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/D1z4z2ef1e3dyc3/download'
+
+
function get_data {
local link=$1
local fname=$2
@@ -28,6 +43,7 @@
}
# get matrix data
+mkdir -p test-data
pushd test-data
get_data $MTX_LINK mtx.zip
unzip mtx.zip
@@ -49,3 +65,13 @@
get_data $LOOM_LINK $BASENAME_FILE"_loom.h5"
get_data $SCE_LINK $BASENAME_FILE"_sce.rds"
get_data $H5AD_LINK $BASENAME_FILE".h5ad"
+
+get_data $H5AD_SC182_LINK $BASENAME_FILE"_sc182.h5ad"
+
+get_data $IFNB_CTRL_INT_LINK $IFNB_BASE_FILE"ctrl_norm_fvg.rds"
+get_data $IFNB_STIM_INT_LINK $IFNB_BASE_FILE"stim_norm_fvg.rds"
+
+get_data $CLASSIFY_QUERY_LINK "Classify_query.rds"
+get_data $CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK "Classify_anchors.rds"
+get_data $UMAP_RESULT_OBJECT_LINK "UMAP_result_integrated.rds"
+
diff -r d2f6eac0ad42 -r 5e9ba303f9e1 scripts/seurat-scale-data.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/seurat-scale-data.R Sat Mar 02 10:42:19 2024 +0000
@@ -0,0 +1,165 @@
+#!/usr/bin/env Rscript
+
+# Load optparse we need to check inputs
+
+suppressPackageStartupMessages(require(optparse))
+
+# Load common functions
+
+suppressPackageStartupMessages(require(workflowscriptscommon))
+
+# parse options
+
+option_list = list(
+ make_option(
+ c("-i", "--input-object-file"),
+ action = "store",
+ default = NA,
+ type = 'character',
+ help = "File name in which a serialized R matrix object may be found."
+ ),
+ make_option(
+ c("--input-format"),
+ action = "store",
+ default = "seurat",
+ type = 'character',
+ help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read."
+ ),
+ make_option(
+ c("--output-format"),
+ action = "store",
+ default = "seurat",
+ type = 'character',
+ help = "Either loom, seurat, anndata or singlecellexperiment for the output format."
+ ),
+ make_option(
+ c("-e", "--genes-use"),
+ action = "store",
+ default = NULL,
+ type = 'character',
+ help = "File with gene names to scale/center (one gene per line). Default is all genes in object@data."
+ ),
+ make_option(
+ c("-v", "--vars-to-regress"),
+ action = "store",
+ default = NULL,
+ type = 'character',
+ help = "Comma-separated list of variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito."
+ ),
+ make_option(
+ c("-m", "--model-use"),
+ action = "store",
+ default = 'linear',
+ type = 'character',
+ help = "Use a linear model or generalized linear model (poisson, negative binomial) for the regression. Options are 'linear' (default), 'poisson', and 'negbinom'."
+ ),
+ make_option(
+ c("-u", "--use-umi"),
+ action = "store",
+ default = FALSE,
+ type = 'logical',
+ help = "Regress on UMI count data. Default is FALSE for linear modeling, but automatically set to TRUE if model.use is 'negbinom' or 'poisson'."
+ ),
+ make_option(
+ c("-s", "--do-not-scale"),
+ action = "store_true",
+ default = FALSE,
+ type = 'logical',
+ help = "Skip the data scale."
+ ),
+ make_option(
+ c("-c", "--do-not-center"),
+ action = "store_true",
+ default = FALSE,
+ type = 'logical',
+ help = "Skip data centering."
+ ),
+ make_option(
+ c("-x", "--scale-max"),
+ action = "store",
+ default = 10,
+ type = 'double',
+ help = "Max value to return for scaled data. The default is 10. Setting this can help reduce the effects of genes that are only expressed in a very small number of cells. If regressing out latent variables and using a non-linear model, the default is 50."
+ ),
+ make_option(
+ c("-b", "--block-size"),
+ action = "store",
+ default = 1000,
+ type = 'integer',
+ help = "Default size for number of genes to scale at in a single computation. Increasing block.size may speed up calculations but at an additional memory cost."
+ ),
+ make_option(
+ c("-d", "--min-cells-to-block"),
+ action = "store",
+ default = 1000,
+ type = 'integer',
+ help = "If object contains fewer than this number of cells, don't block for scaling calculations."
+ ),
+ make_option(
+ c("-n", "--check-for-norm"),
+ action = "store",
+ default = TRUE,
+ type = 'logical',
+ help = "Check to see if data has been normalized, if not, output a warning (TRUE by default)."
+ ),
+ make_option(
+ c("-o", "--output-object-file"),
+ action = "store",
+ default = NA,
+ type = 'character',
+ help = "File name in which to store serialized R object of type 'Seurat'.'"
+ )
+)
+
+opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file'))
+
+# Check parameter values
+
+if ( ! file.exists(opt$input_object_file)){
+ stop((paste('File', opt$input_object_file, 'does not exist')))
+}
+
+if (! is.null(opt$genes_use)){
+ if (! file.exists(opt$genes_use)){
+ stop((paste('Supplied genes file', opt$genes_use, 'does not exist')))
+ }else{
+ genes_use <- readLines(opt$genes_use)
+ }
+}else{
+ genes_use <- NULL
+}
+
+# break up opt$vars_to_regress into a list if it has commas
+opt$vars_to_regress <- unlist(strsplit(opt$vars_to_regress, ","))
+
+# Now we're hapy with the arguments, load Seurat and do the work
+
+suppressPackageStartupMessages(require(Seurat))
+if(opt$input_format == "loom" | opt$output_format == "loom") {
+ suppressPackageStartupMessages(require(SeuratDisk))
+} else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") {
+ suppressPackageStartupMessages(require(scater))
+}
+
+# Input from serialized R object
+
+seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format)
+# https://stackoverflow.com/questions/9129673/passing-list-of-named-parameters-to-function
+# might be useful
+scaled_seurat_object <- ScaleData(seurat_object,
+ features = genes_use,
+ vars.to.regress = opt$vars_to_regress,
+ model.use = opt$model_use,
+ use.umi = opt$use_umi,
+ do.scale = !opt$do_not_scale,
+ do.center = !opt$do_not_center,
+ scale.max = opt$scale_max,
+ block.size = opt$block_size,
+ min.cells.to.block = opt$min_cells_to_block,
+ verbose = FALSE)
+
+
+# Output to a serialized R object
+write_seurat4_object(seurat_object = scaled_seurat_object,
+ output_path = opt$output_object_file,
+ format = opt$output_format)
diff -r d2f6eac0ad42 -r 5e9ba303f9e1 seurat_macros.xml
--- a/seurat_macros.xml Fri Mar 04 07:27:57 2022 +0000
+++ b/seurat_macros.xml Sat Mar 02 10:42:19 2024 +0000
@@ -1,7 +1,7 @@
- 0.3.0
- 3.2.3
+ 4.0.0
+ 4.0.4
seurat-scripts
@@ -13,44 +13,237 @@
]]>
-
-
-
+
+
+
+
-
+
-
+
-
+
-
+
+
+
+
+
+ #if $input.format == 'loom'
+ ln -s '$input.loom_file' input.loom;
+ #else if $input.format == 'h5seurat'
+ ln -s '$input.h5seurat_file' input.h5seurat;
+ #else if $input.format == 'anndata'
+ ## it complains when using links for AnnData...
+ cp '$input.anndata_file' input.h5ad;
+ #end if
+
+
#if $input.format == "anndata"
- --input-object-file '$input.anndata_file' --input-format anndata
+ --input-object-file input.h5ad --input-format anndata
#else if $input.format == "loom"
- --input-object-file '$input.loom_file' --input-format loom
+ --input-object-file input.loom --input-format loom
#else if $input.format == "rds_seurat"
--input-object-file '$input.rds_seurat_file' --input-format seurat
#else if $input.format == "rds_sce"
--input-object-file '$input.rds_sce_file' --input-format singlecellexperiment
+ #else if $input.format == "h5seurat"
+ --input-object-file input.h5seurat --input-format h5seurat
+ #end if
+
+
+
+ #if $query.format == 'loom'
+ ln -s '$query.loom_file' query.loom;
+ #else if $query.format == 'h5seurat'
+ ln -s '$query.h5seurat_file' query.h5seurat;
+ #else if $query.format == 'anndata'
+ ## it complains when using links for AnnData...
+ cp '$query.anndata_file' query.h5ad;
+ #end if
+
+
+
+ #if $query.format == "anndata"
+ --query-object-file query.h5ad --query-format anndata
+ #else if $query.format == "loom"
+ --query-object-file query.loom --query-format loom
+ #else if $query.format == "rds_seurat"
+ --query-object-file '$query.rds_seurat_file' --query-format seurat
+ #else if $query.format == "rds_sce"
+ --query-object-file '$query.rds_sce_file' --query-format singlecellexperiment
+ #else if $query.format == "h5seurat"
+ --query-object-file query.h5seurat --query-format h5seurat
+ #end if
+
+
+
+ #if $anchors.format == 'loom'
+ ln -s '$anchors.loom_file' anchors.loom;
+ #else if $anchors.format == 'h5seurat'
+ ln -s '$anchors.h5seurat_file' anchors.h5seurat;
+ #else if $anchors.format == 'anndata'
+ ## it complains when using links for AnnData...
+ cp '$anchors.anndata_file' anchors.h5ad;
+ #end if
+
+
+
+ #if $anchors.format == "anndata"
+ --anchors-object-file anchors.h5ad --anchors-format anndata
+ #else if $anchors.format == "loom"
+ --anchors-object-file anchors.loom --anchors-format loom
+ #else if $anchors.format == "rds_seurat"
+ --anchors-object-file '$anchors.rds_seurat_file' --anchors-format seurat
+ #else if $anchors.format == "rds_sce"
+ --anchors-object-file '$anchors.rds_sce_file' --anchors-format singlecellexperiment
+ #else if $anchors.format == "h5seurat"
+ --anchors-object-file anchors.h5seurat --anchors-format h5seurat
+ #end if
+
+
+
+ #if $reference.format == 'loom'
+ ln -s '$reference.loom_file' reference.loom;
+ #else if $reference.format == 'h5seurat'
+ ln -s '$reference.h5seurat_file' reference.h5seurat;
+ #else if $reference.format == 'anndata'
+ ## it complains when using links for AnnData...
+ cp '$reference.anndata_file' reference.h5ad;
+ #end if
+
+
+
+ #if $reference.format == "anndata"
+ --reference-object-file reference.h5ad --reference-format anndata
+ #else if $reference.format == "loom"
+ --reference-object-file reference.loom --reference-format loom
+ #else if $reference.format == "rds_seurat"
+ --reference-object-file '$reference.rds_seurat_file' --reference-format seurat
+ #else if $reference.format == "rds_sce"
+ --reference-object-file '$reference.rds_sce_file' --reference-format singlecellexperiment
+ #else if $reference.format == "h5seurat"
+ --reference-object-file reference.h5seurat --reference-format h5seurat
+ #end if
+
+
+
+ #if $input.format == 'loom'
+ #for $i, $fh in enumerate($input.loom_file):
+ ln -s '$fh' input.${i}.loom;
+ #end for
+ #else if $input.format == 'h5seurat'
+ #for $i, $fh in enumerate($input.h5seurat_file):
+ ln -s '$fh' input.${i}.h5seurat;
+ #end for
+ #else if $input.format == 'anndata'
+ ## it complains when using links for AnnData...
+ #for $i, $fh in enumerate($input.anndata_file):
+ cp '$fh' input.${i}.h5ad;
+ #end for
+ #end if
+
+
+
+ #if $input.format == "anndata"
+ --input-object-files
+ #set file_array = [ "input."+str($i)+".h5ad" for $i, $fh in enumerate($input.anndata_file)]
+ #set files = ",".join($file_array)
+ ${files}
+ --input-format anndata
+ #else if $input.format == "loom"
+ --input-object-files
+ #set file_array = [ "input."+str($i)+".loom" for $i, $fh in enumerate($input.loom_file)]
+ #set files = ",".join($file_array)
+ ${files}
+ --input-format loom
+ #else if $input.format == "rds_seurat"
+ --input-object-files
+ #set file_array = $input.rds_seurat_file
+ #set files = ",".join([ str($fh) for $fh in $file_array ])
+ ${files}
+ --input-format seurat
+ #else if $input.format == "rds_sce"
+ --input-object-files
+ #set file_array = $input.rds_sce_file
+ #set files = ",".join([ str($fh) for $fh in $file_array ])
+ ${files}
+ --input-format singlecellexperiment
+ #else if $input.format == "h5seurat"
+ --input-object-files
+ #set file_array = [ "input."+str($i)+".h5seurat" for $i, $fh in enumerate($input.h5seurat)]
+ #set files = ",".join($file_array)
+ ${files}
+ --input-format h5seurat
+ #end if
+
+
+
+ #if $reference.format == 'loom'
+ #for $i, $fh in enumerate($reference.loom_file):
+ ln -s '$fh' reference.${i}.loom;
+ #end for
+ #else if $reference.format == 'h5seurat'
+ #for $i, $fh in enumerate($reference.h5seurat_file):
+ ln -s '$fh' reference.${i}.h5seurat;
+ #end for
+ #else if $reference.format == 'anndata'
+ ## it complains when using links for AnnData...
+ #for $i, $fh in enumerate($reference.anndata_file):
+ cp '$fh' reference.${i}.h5ad;
+ #end for
+ #end if
+
+
+
+ #if $reference.format == "anndata" and $reference.anndata_file:
+ --reference-object-files
+ #set file_array = [ "reference."+str($i)+".h5ad" for $i, $fh in enumerate($reference.anndata_file)]
+ #set files = ",".join($file_array)
+ ${files}
+ --reference-format anndata
+ #else if $reference.format == "loom" and $reference.loom_file:
+ --reference-object-files
+ #set file_array = [ "reference."+str($i)+".loom" for $i, $fh in enumerate($reference.loom_file)]
+ #set files = ",".join($file_array)
+ ${files}
+ --reference-format loom
+ #else if $reference.format == "rds_seurat" and $reference.rds_seurat_file:
+ --reference-object-files
+ #set files = ",".join([ str($fh) for $fh in $reference.rds_seurat_file ])
+ ${files}
+ --reference-format seurat
+ #else if $reference.format == "rds_sce" and $reference.rds_sce_file:
+ --reference-object-files
+ #set files = ",".join([ str($fh) for $fh in $reference.rds_sce_file ])
+ ${files}
+ --reference-format singlecellexperiment
+ #else if $reference.format == "h5seurat" and $reference.h5seurat:
+ --reference-object-files
+ #set file_array = [ "reference."+str($i)+".h5seurat" for $i, $fh in enumerate($reference.h5seurat)]
+ #set files = ",".join($file_array)
+ ${files}
+ --reference-format h5seurat
#end if
-
+
+
@@ -63,6 +256,9 @@
format == 'rds_seurat'
+
+ format == 'anndata'
+
format == 'rds_sce'
@@ -80,6 +276,30 @@
#end if
+
+
+ plot_format == '@FORMAT@'
+
+
+
+
+ #if $plot_format == "png"
+ --plot-out '$plot_out_png'
+ #else if $plot_format == "pdf"
+ --plot-out '$plot_out_pdf'
+ #else if $plot_format == "eps"
+ --plot-out '$plot_out_eps'
+ #else if $plot_format == "ps"
+ --plot-out '$plot_out_ps'
+ #else if $plot_format == "jpg"
+ --plot-out '$plot_out_jpg'
+ #else if $plot_format == "tiff"
+ --plot-out '$plot_out_tiff'
+ #else if $plot_format == "svg"
+ --plot-out '$plot_out_svg'
+ #end if
+
+
@@ -96,6 +316,7 @@
+ 10.1038/s41592-021-01102-w
10.1038/nbt.4096
@misc{r-seurat-scripts.git,
@@ -127,7 +349,6 @@
url = {https://github.com/ebi-gene-expression-group/r-seurat-scripts.git},
}
- 10.1038/s41592-021-01102-w
diff -r d2f6eac0ad42 -r 5e9ba303f9e1 seurat_scale_data.xml
--- a/seurat_scale_data.xml Fri Mar 04 07:27:57 2022 +0000
+++ b/seurat_scale_data.xml Sat Mar 02 10:42:19 2024 +0000
@@ -6,7 +6,7 @@