Galaxy |

Changeset 6:5e9ba303f9e1 (2024-03-02)

Previous changeset 5:d2f6eac0ad42 (2022-03-04)

Commit message:
planemo upload commit 0264c359f1d638bbbbab515a3502231f679cdcf6

modified:
get_test_data.sh
seurat_macros.xml
seurat_scale_data.xml

added:
extra/macro_mapper_seurat.yaml
scripts/seurat-scale-data.R

diff -r d2f6eac0ad42 -r 5e9ba303f9e1 extra/macro_mapper_seurat.yaml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extra/macro_mapper_seurat.yaml Sat Mar 02 10:42:19 2024 +0000

@@ -0,0 +1,90 @@
+---
+- option_group:
+    - input-object-file
+    - input-format
+  pre_command_macros:
+    - INPUT_OBJ_PREAMBLE
+  post_command_macros:
+    - INPUT_OBJECT
+  input_declaration_macros:
+    - input_object_params
+- option_group:
+    - output-object-file
+    - output-format
+  post_command_macros:
+    - OUTPUT_OBJECT
+  input_declaration_macros:
+    - output_object_params
+  output_declaration_macros:
+    - output_files
+- option_group:
+    - input-object-files
+    - input-format
+  pre_command_macros:
+    - INPUT_OBJS_PREAMBLE
+  post_command_macros:
+    - INPUT_OBJECTS
+  input_declaration_macros:
+    - input_object_params:
+        multiple: true
+- option_group:
+    - reference-object-files
+    - reference-format
+  pre_command_macros:
+    - REFERENCE_OBJS_PREAMBLE
+  post_command_macros:
+    - REFERENCE_OBJECTS
+  input_declaration_macros:
+    - input_object_params:
+        varname: reference
+        multiple: true
+        optional: true
+- option_group:
+    - reference-object-file
+    - reference-format
+  pre_command_macros:
+    - REFERENCE_OBJ_PREAMBLE
+  post_command_macros:
+    - REFERENCE_OBJECT
+  input_declaration_macros:
+    - input_object_params:
+        varname: reference
+- option_group:
+    - anchors-object-file
+    - anchors-format
+  pre_command_macros:
+    - ANCHORS_OBJ_PREAMBLE
+  post_command_macros:
+    - ANCHORS_OBJECT
+  input_declaration_macros:
+    - input_object_params:
+        varname: anchors
+- option_group:
+    - query-object-file
+    - query-format
+  pre_command_macros:
+    - QUERY_OBJ_PREAMBLE
+  post_command_macros:
+    - QUERY_OBJECT
+  input_declaration_macros:
+    - input_object_params:
+        varname: query
+- option_group:
+    - plot-out
+  post_command_macros:
+    - OUTPUT_PLOT
+  output_declaration_macros:
+    - plot_output_files_format:
+        format: png
+    - plot_output_files_format:
+        format: pdf
+    - plot_output_files_format:
+        format: eps
+    - plot_output_files_format:
+        format: jpg
+    - plot_output_files_format:
+        format: ps
+    - plot_output_files_format:
+        format: tiff
+    - plot_output_files_format:
+        format: svg

diff -r d2f6eac0ad42 -r 5e9ba303f9e1 get_test_data.sh
--- a/get_test_data.sh Fri Mar 04 07:27:57 2022 +0000
+++ b/get_test_data.sh Sat Mar 02 10:42:19 2024 +0000

@@ -15,8 +15,23 @@

LOOM_LINK='https://drive.google.com/uc?export=download&id=1qNk5cg8hJG3Nv1ljTKmUEnxTOf11EEZX'
H5AD_LINK='https://drive.google.com/uc?export=download&id=1YpE0H_t_dkh17P-WBhPijKvRiGP0BlBz'
+
+H5AD_SC182_LINK='https://drive.google.com/uc?export=download&id=16PUJ2KAkXT8F1UkfqU-9LWoOJUkUG1rp'
SCE_LINK='https://drive.google.com/uc?export=download&id=1UKdyf3M01uAt7oBg93JfmRvNVB_jlUKe'

+# Seurat v4 exclusives
+IFNB_BASE_FILE='ifnb_'
+
+IFNB_CTRL_INT_LINK='https://drive.google.com/uc?export=download&id=15E_MLz-UclJYInNaA7YKLhLo5W-qlykL'
+IFNB_STIM_INT_LINK='https://drive.google.com/uc?export=download&id=14iKgCJGPk16dEmpJJF-Gp_lBDcOdo-54'
+
+## Classify and UMAP mapping
+CLASSIFY_QUERY_LINK='https://oc.ebi.ac.uk/s/MlEDILFYRrvkS6E/download'
+CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK='https://drive.google.com/uc?export=download&id=1Xtv4K_CxIU1cJ8RjJ7NTvzLQkLvc8a3i'
+# UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/k4MdM07y9DAnurp/download'
+UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/D1z4z2ef1e3dyc3/download'
+
+
function get_data {
local link=$1
local fname=$2
@@ -28,6 +43,7 @@
}

# get matrix data
+mkdir -p test-data
pushd test-data
get_data $MTX_LINK mtx.zip
unzip mtx.zip
@@ -49,3 +65,13 @@
get_data $LOOM_LINK $BASENAME_FILE"_loom.h5"
get_data $SCE_LINK $BASENAME_FILE"_sce.rds"
get_data $H5AD_LINK $BASENAME_FILE".h5ad"
+
+get_data $H5AD_SC182_LINK $BASENAME_FILE"_sc182.h5ad"
+
+get_data $IFNB_CTRL_INT_LINK $IFNB_BASE_FILE"ctrl_norm_fvg.rds"
+get_data $IFNB_STIM_INT_LINK $IFNB_BASE_FILE"stim_norm_fvg.rds"
+
+get_data $CLASSIFY_QUERY_LINK "Classify_query.rds"
+get_data $CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK "Classify_anchors.rds"
+get_data $UMAP_RESULT_OBJECT_LINK "UMAP_result_integrated.rds"
+

diff -r d2f6eac0ad42 -r 5e9ba303f9e1 scripts/seurat-scale-data.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/seurat-scale-data.R Sat Mar 02 10:42:19 2024 +0000

@@ -0,0 +1,165 @@
+#!/usr/bin/env Rscript
+
+# Load optparse we need to check inputs
+
+suppressPackageStartupMessages(require(optparse))
+
+# Load common functions
+
+suppressPackageStartupMessages(require(workflowscriptscommon))
+
+# parse options
+
+option_list = list(
+  make_option(
+    c("-i", "--input-object-file"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "File name in which a serialized R matrix object may be found."
+  ),
+  make_option(
+    c("--input-format"),
+    action = "store",
+    default = "seurat",
+    type = 'character',
+    help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read."
+  ),
+  make_option(
+    c("--output-format"),
+    action = "store",
+    default = "seurat",
+    type = 'character',
+    help = "Either loom, seurat, anndata or singlecellexperiment for the output format."
+  ),
+  make_option(
+    c("-e", "--genes-use"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "File with gene names to scale/center (one gene per line). Default is all genes in object@data."
+  ),
+  make_option(
+    c("-v", "--vars-to-regress"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "Comma-separated list of variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito."
+  ),
+  make_option(
+    c("-m", "--model-use"),
+    action = "store",
+    default = 'linear',
+    type = 'character',
+    help = "Use a linear model or generalized linear model (poisson, negative binomial) for the regression. Options are 'linear' (default), 'poisson', and 'negbinom'."
+  ),
+  make_option(
+    c("-u", "--use-umi"),
+    action = "store",
+    default = FALSE,
+    type = 'logical',
+    help = "Regress on UMI count data. Default is FALSE for linear modeling, but automatically set to TRUE if model.use is 'negbinom' or 'poisson'."
+  ),
+  make_option(
+    c("-s", "--do-not-scale"),
+    action = "store_true",
+    default = FALSE,
+    type = 'logical',
+    help = "Skip the data scale."
+  ),
+  make_option(
+    c("-c", "--do-not-center"),
+    action = "store_true",
+    default = FALSE,
+    type = 'logical',
+    help = "Skip data centering."
+  ),
+  make_option(
+    c("-x", "--scale-max"),
+    action = "store",
+    default = 10,
+    type = 'double',
+    help = "Max value to return for scaled data. The default is 10. Setting this can help reduce the effects of genes that are only expressed in a very small number of cells. If regressing out latent variables and using a non-linear model, the default is 50."
+  ),
+  make_option(
+    c("-b", "--block-size"),
+    action = "store",
+    default = 1000,
+    type = 'integer',
+    help = "Default size for number of genes to scale at in a single computation. Increasing block.size may speed up calculations but at an additional memory cost."
+  ),
+  make_option(
+    c("-d", "--min-cells-to-block"),
+    action = "store",
+    default = 1000,
+    type = 'integer',
+    help = "If object contains fewer than this number of cells, don't block for scaling calculations."
+  ),
+  make_option(
+    c("-n", "--check-for-norm"),
+    action = "store",
+    default = TRUE,
+    type = 'logical',
+    help = "Check to see if data has been normalized, if not, output a warning (TRUE by default)."
+  ),
+  make_option(
+    c("-o", "--output-object-file"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "File name in which to store serialized R object of type 'Seurat'.'"
+  )
+)
+
+opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file'))
+
+# Check parameter values
+
+if ( ! file.exists(opt$input_object_file)){
+  stop((paste('File', opt$input_object_file, 'does not exist')))
+}
+
+if (! is.null(opt$genes_use)){
+  if (! file.exists(opt$genes_use)){
+    stop((paste('Supplied genes file', opt$genes_use, 'does not exist')))
+  }else{
+    genes_use <- readLines(opt$genes_use)
+  }
+}else{
+  genes_use <- NULL
+}
+
+# break up opt$vars_to_regress into a list if it has commas
+opt$vars_to_regress <- unlist(strsplit(opt$vars_to_regress, ","))
+
+# Now we're hapy with the arguments, load Seurat and do the work
+
+suppressPackageStartupMessages(require(Seurat))
+if(opt$input_format == "loom" | opt$output_format == "loom") {
+  suppressPackageStartupMessages(require(SeuratDisk))
+} else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") {
+  suppressPackageStartupMessages(require(scater))
+}
+
+# Input from serialized R object
+
+seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format)
+# https://stackoverflow.com/questions/9129673/passing-list-of-named-parameters-to-function
+# might be useful
+scaled_seurat_object <- ScaleData(seurat_object,
+                                  features = genes_use,
+                                  vars.to.regress = opt$vars_to_regress,
+                                  model.use = opt$model_use,
+                                  use.umi = opt$use_umi,
+                                  do.scale = !opt$do_not_scale,
+                                  do.center = !opt$do_not_center,
+                                  scale.max = opt$scale_max,
+                                  block.size = opt$block_size,
+                                  min.cells.to.block = opt$min_cells_to_block,
+                                  verbose = FALSE)
+
+
+# Output to a serialized R object
+write_seurat4_object(seurat_object = scaled_seurat_object,
+                     output_path = opt$output_object_file,
+                     format = opt$output_format)

diff -r d2f6eac0ad42 -r 5e9ba303f9e1 seurat_macros.xml
--- a/seurat_macros.xml Fri Mar 04 07:27:57 2022 +0000
+++ b/seurat_macros.xml Sat Mar 02 10:42:19 2024 +0000

[

b'@@ -1,7 +1,7 @@\n <?xml version="1.0"?>\n <macros>\n- <token name="@VERSION@">0.3.0</token>\n- <token name="@SEURAT_VERSION@">3.2.3</token>\n+ <token name="@VERSION@">4.0.0</token>\n+ <token name="@SEURAT_VERSION@">4.0.4</token>\n <xml name="requirements">\n <requirements>\n <requirement type="package" version="@VERSION@">seurat-scripts</requirement>\n@@ -13,44 +13,237 @@\n ]]></version_command>\n </xml>\n \n- <xml name="input_object_params">\n- <conditional name="input" label="Input format">\n- <param type="select" name="format" label="Choose the format of the input" help="RData, Loom or AnnData">\n+ <xml name="input_object_params" token_multiple="False" token_varname="input" token_optional="False">\n+ <conditional name="@VARNAME@" label="Input format">\n+ <param type="select" name="format" label="Choose the format of the @VARNAME@" help="Seurat RDS, Seurat H5, Single Cell Experiment RDS, Loom or AnnData">\n <option value="rds_seurat" selected="true">RDS with a Seurat object</option>\n <option value="loom">Loom</option>\n+ <option value="h5seurat">Seurat HDF5</option>\n <option value="anndata">AnnData</option>\n <option value="rds_sce">RDS with a Single Cell Experiment object</option>\n </param>\n <when value="anndata">\n- <param type="data" name="anndata_file" label="AnnData file" help="The AnnData format provided by Scanpy" format="h5,h5ad"/>\n+ <param type="data" name="anndata_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="AnnData file" help="Select AnnData files for @VARNAME@" format="h5,h5ad"/>\n </when>\n <when value="loom">\n- <param type="data" name="loom_file" label="Loom file" help="Input as Loom v? file" format="h5,h5loom"/>\n+ <param type="data" name="loom_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Loom file" help="Select Loom file(s) for @VARNAME@" format="h5,h5loom"/>\n </when>\n <when value="rds_seurat">\n- <param type="data" name="rds_seurat_file" label="RDS file" help="Input as RDS file with Seurat 3 object" format="rdata"/>\n+ <param type="data" name="rds_seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Seurat object for @VARNAME@" format="rdata"/>\n </when>\n <when value="rds_sce">\n- <param type="data" name="rds_sce_file" label="RDS file" help="Input as RDS file with Single Cell Experiment object" format="rdata"/>\n+ <param type="data" name="rds_sce_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Single Cell Experiment object for @VARNAME@" format="rdata"/>\n+ </when>\n+ <when value="h5seurat">\n+ <param type="data" name="h5seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Seurat HDF5" help="Select Seurat HDF5 file(s) for @VARNAME" format="h5"/>\n </when>\n </conditional>\n </xml>\n \n+ <token name="@INPUT_OBJ_PREAMBLE@">\n+ #if $input.format == \'loom\'\n+ ln -s \'$input.loom_file\' input.loom;\n+ #else if $input.format == \'h5seurat\'\n+ ln -s \'$input.h5seurat_file\' input.h5seurat;\n+ #else if $input.format == \'anndata\'\n+ ## it complains when using links for AnnData...\n+ cp \'$input.anndata_file\' input.h5ad;\n+ #end if\n+ </token>\n+\n <token name="@INPUT_OBJECT@">\n #if $input.format == "anndata"\n- --input-object-file \'$input.anndata_file\' --input-format anndata\n+ --input-object-file input.h5ad --input-format anndata\n #else if $input.format == "loom"\n- --input-object-file \'$input.loom_file\' --input-format loom\n+ --input-object-file input.loom --input-format loom\n #else if $input.format == "rds_seurat"\n --input-object-file \'$input.rds_seurat_file\' --input-format seurat\n #else if $input.format == "rds_sce"\n --input-object-file \'$input.rds_sce_file\' --input-format single'..b'urat\n #end if\n </token>\n \n <xml name="output_object_params">\n- <param type="select" name="format" label="Choose the format of the output" help="Seurat, Single Cell Experiment or Loom">\n+ <param type="select" name="format" label="Choose the format of the output" help="Seurat, Single Cell Experiment, AnnData or Loom">\n <option value="rds_seurat" selected="true">RDS with a Seurat object</option>\n+ <option value="anndata">AnnData written by Seurat</option>\n <option value="loom">Loom</option>\n <option value="rds_sce">RDS with a Single Cell Experiment object</option>\n </param>\n@@ -63,6 +256,9 @@\n <data name="rds_seurat_file" format="rdata" label="${tool.name} on ${on_string}: Seurat RDS">\n <filter>format == \'rds_seurat\'</filter>\n </data>\n+ <data name="anndata_file" format="h5ad" label="${tool.name} on ${on_string}: AnnData from Seurat">\n+ <filter>format == \'anndata\'</filter>\n+ </data>\n <data name="rds_sce_file" format="rdata" label="${tool.name} on ${on_string}: Seurat Single Cell Experiment RDS">\n <filter>format == \'rds_sce\'</filter>\n </data>\n@@ -80,6 +276,30 @@\n #end if\n </token>\n \n+ <xml name="plot_output_files_format" token_format="png">\n+ <data label="Seurat ${plot_type.plot_type_selector} on ${on_string}: @FORMAT@ plot" name="plot_out_@FORMAT@" format=\'@FORMAT@\' >\n+ <filter>plot_format == \'@FORMAT@\'</filter>\n+ </data>\n+ </xml>\n+\n+ <token name="@OUTPUT_PLOT@">\n+ #if $plot_format == "png"\n+ --plot-out \'$plot_out_png\'\n+ #else if $plot_format == "pdf"\n+ --plot-out \'$plot_out_pdf\'\n+ #else if $plot_format == "eps"\n+ --plot-out \'$plot_out_eps\'\n+ #else if $plot_format == "ps"\n+ --plot-out \'$plot_out_ps\'\n+ #else if $plot_format == "jpg"\n+ --plot-out \'$plot_out_jpg\'\n+ #else if $plot_format == "tiff"\n+ --plot-out \'$plot_out_tiff\'\n+ #else if $plot_format == "svg"\n+ --plot-out \'$plot_out_svg\'\n+ #end if\n+ </token>\n+\n <xml name="genes-use-input">\n <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv,txt,tabular" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/>\n </xml>\n@@ -96,6 +316,7 @@\n \n <token name="@VERSION_HISTORY@"><![CDATA[\n **Version history**\n+4.0.0: Moves to Seurat 4.0.0, introducing a number of methods for merging datasets, plus the whole suite of Seurat plots. Pablo Moreno with funding from AstraZeneca.\n \n 3.2.3+galaxy0: Moves to Seurat 3.2.3 and introduce convert method, improving format interconversion support.\n \n@@ -108,14 +329,15 @@\n Find clusters: removed dims-use, k-param, prune-snn.\n \n 2.3.1+galaxy0: Improved documentation and further exposition of all script\'s options. Pablo Moreno, Jonathan Manning and Ni Huang, Expression Atlas team https://www.ebi.ac.uk/gxa/home at\n-EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski(github.com/drosofff) and Lea Bellenger(github.com/bellenger-l).\n+EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski (GitHub drosofff) and Lea Bellenger (GitHub bellenger-l).\n \n-0.0.1: Initial contribution. Maria Doyle (github.com/mblue9).\n+0.0.1: Initial contribution. Maria Doyle (GitHub mblue9).\n ]]></token>\n \n \n <xml name="citations">\n <citations>\n+ <citation type="doi">10.1038/s41592-021-01102-w</citation>\n <citation type="doi">10.1038/nbt.4096</citation>\n <citation type="bibtex">\n @misc{r-seurat-scripts.git,\n@@ -127,7 +349,6 @@\n url = {https://github.com/ebi-gene-expression-group/r-seurat-scripts.git},\n }\n </citation>\n- <citation type="doi">10.1038/s41592-021-01102-w</citation>\n </citations>\n </xml>\n </macros>\n'

diff -r d2f6eac0ad42 -r 5e9ba303f9e1 seurat_scale_data.xml
--- a/seurat_scale_data.xml Fri Mar 04 07:27:57 2022 +0000
+++ b/seurat_scale_data.xml Sat Mar 02 10:42:19 2024 +0000

[

@@ -6,7 +6,7 @@
     <expand macro="requirements" />
     <expand macro="version" />
     <command detect_errors="exit_code"><![CDATA[
-seurat-scale-data.R
+Rscript '$__tool_directory__/scripts/'seurat-scale-data.R

@INPUT_OBJECT@
#if $vars: