Previous changeset 5:d2f6eac0ad42 (2022-03-04) |
Commit message:
planemo upload commit 0264c359f1d638bbbbab515a3502231f679cdcf6 |
modified:
get_test_data.sh seurat_macros.xml seurat_scale_data.xml |
added:
extra/macro_mapper_seurat.yaml scripts/seurat-scale-data.R |
b |
diff -r d2f6eac0ad42 -r 5e9ba303f9e1 extra/macro_mapper_seurat.yaml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/macro_mapper_seurat.yaml Sat Mar 02 10:42:19 2024 +0000 |
b |
@@ -0,0 +1,90 @@ +--- +- option_group: + - input-object-file + - input-format + pre_command_macros: + - INPUT_OBJ_PREAMBLE + post_command_macros: + - INPUT_OBJECT + input_declaration_macros: + - input_object_params +- option_group: + - output-object-file + - output-format + post_command_macros: + - OUTPUT_OBJECT + input_declaration_macros: + - output_object_params + output_declaration_macros: + - output_files +- option_group: + - input-object-files + - input-format + pre_command_macros: + - INPUT_OBJS_PREAMBLE + post_command_macros: + - INPUT_OBJECTS + input_declaration_macros: + - input_object_params: + multiple: true +- option_group: + - reference-object-files + - reference-format + pre_command_macros: + - REFERENCE_OBJS_PREAMBLE + post_command_macros: + - REFERENCE_OBJECTS + input_declaration_macros: + - input_object_params: + varname: reference + multiple: true + optional: true +- option_group: + - reference-object-file + - reference-format + pre_command_macros: + - REFERENCE_OBJ_PREAMBLE + post_command_macros: + - REFERENCE_OBJECT + input_declaration_macros: + - input_object_params: + varname: reference +- option_group: + - anchors-object-file + - anchors-format + pre_command_macros: + - ANCHORS_OBJ_PREAMBLE + post_command_macros: + - ANCHORS_OBJECT + input_declaration_macros: + - input_object_params: + varname: anchors +- option_group: + - query-object-file + - query-format + pre_command_macros: + - QUERY_OBJ_PREAMBLE + post_command_macros: + - QUERY_OBJECT + input_declaration_macros: + - input_object_params: + varname: query +- option_group: + - plot-out + post_command_macros: + - OUTPUT_PLOT + output_declaration_macros: + - plot_output_files_format: + format: png + - plot_output_files_format: + format: pdf + - plot_output_files_format: + format: eps + - plot_output_files_format: + format: jpg + - plot_output_files_format: + format: ps + - plot_output_files_format: + format: tiff + - plot_output_files_format: + format: svg |
b |
diff -r d2f6eac0ad42 -r 5e9ba303f9e1 get_test_data.sh --- a/get_test_data.sh Fri Mar 04 07:27:57 2022 +0000 +++ b/get_test_data.sh Sat Mar 02 10:42:19 2024 +0000 |
b |
@@ -15,8 +15,23 @@ LOOM_LINK='https://drive.google.com/uc?export=download&id=1qNk5cg8hJG3Nv1ljTKmUEnxTOf11EEZX' H5AD_LINK='https://drive.google.com/uc?export=download&id=1YpE0H_t_dkh17P-WBhPijKvRiGP0BlBz' + +H5AD_SC182_LINK='https://drive.google.com/uc?export=download&id=16PUJ2KAkXT8F1UkfqU-9LWoOJUkUG1rp' SCE_LINK='https://drive.google.com/uc?export=download&id=1UKdyf3M01uAt7oBg93JfmRvNVB_jlUKe' +# Seurat v4 exclusives +IFNB_BASE_FILE='ifnb_' + +IFNB_CTRL_INT_LINK='https://drive.google.com/uc?export=download&id=15E_MLz-UclJYInNaA7YKLhLo5W-qlykL' +IFNB_STIM_INT_LINK='https://drive.google.com/uc?export=download&id=14iKgCJGPk16dEmpJJF-Gp_lBDcOdo-54' + +## Classify and UMAP mapping +CLASSIFY_QUERY_LINK='https://oc.ebi.ac.uk/s/MlEDILFYRrvkS6E/download' +CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK='https://drive.google.com/uc?export=download&id=1Xtv4K_CxIU1cJ8RjJ7NTvzLQkLvc8a3i' +# UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/k4MdM07y9DAnurp/download' +UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/D1z4z2ef1e3dyc3/download' + + function get_data { local link=$1 local fname=$2 @@ -28,6 +43,7 @@ } # get matrix data +mkdir -p test-data pushd test-data get_data $MTX_LINK mtx.zip unzip mtx.zip @@ -49,3 +65,13 @@ get_data $LOOM_LINK $BASENAME_FILE"_loom.h5" get_data $SCE_LINK $BASENAME_FILE"_sce.rds" get_data $H5AD_LINK $BASENAME_FILE".h5ad" + +get_data $H5AD_SC182_LINK $BASENAME_FILE"_sc182.h5ad" + +get_data $IFNB_CTRL_INT_LINK $IFNB_BASE_FILE"ctrl_norm_fvg.rds" +get_data $IFNB_STIM_INT_LINK $IFNB_BASE_FILE"stim_norm_fvg.rds" + +get_data $CLASSIFY_QUERY_LINK "Classify_query.rds" +get_data $CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK "Classify_anchors.rds" +get_data $UMAP_RESULT_OBJECT_LINK "UMAP_result_integrated.rds" + |
b |
diff -r d2f6eac0ad42 -r 5e9ba303f9e1 scripts/seurat-scale-data.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/seurat-scale-data.R Sat Mar 02 10:42:19 2024 +0000 |
b |
@@ -0,0 +1,165 @@ +#!/usr/bin/env Rscript + +# Load optparse we need to check inputs + +suppressPackageStartupMessages(require(optparse)) + +# Load common functions + +suppressPackageStartupMessages(require(workflowscriptscommon)) + +# parse options + +option_list = list( + make_option( + c("-i", "--input-object-file"), + action = "store", + default = NA, + type = 'character', + help = "File name in which a serialized R matrix object may be found." + ), + make_option( + c("--input-format"), + action = "store", + default = "seurat", + type = 'character', + help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read." + ), + make_option( + c("--output-format"), + action = "store", + default = "seurat", + type = 'character', + help = "Either loom, seurat, anndata or singlecellexperiment for the output format." + ), + make_option( + c("-e", "--genes-use"), + action = "store", + default = NULL, + type = 'character', + help = "File with gene names to scale/center (one gene per line). Default is all genes in object@data." + ), + make_option( + c("-v", "--vars-to-regress"), + action = "store", + default = NULL, + type = 'character', + help = "Comma-separated list of variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito." + ), + make_option( + c("-m", "--model-use"), + action = "store", + default = 'linear', + type = 'character', + help = "Use a linear model or generalized linear model (poisson, negative binomial) for the regression. Options are 'linear' (default), 'poisson', and 'negbinom'." + ), + make_option( + c("-u", "--use-umi"), + action = "store", + default = FALSE, + type = 'logical', + help = "Regress on UMI count data. Default is FALSE for linear modeling, but automatically set to TRUE if model.use is 'negbinom' or 'poisson'." + ), + make_option( + c("-s", "--do-not-scale"), + action = "store_true", + default = FALSE, + type = 'logical', + help = "Skip the data scale." + ), + make_option( + c("-c", "--do-not-center"), + action = "store_true", + default = FALSE, + type = 'logical', + help = "Skip data centering." + ), + make_option( + c("-x", "--scale-max"), + action = "store", + default = 10, + type = 'double', + help = "Max value to return for scaled data. The default is 10. Setting this can help reduce the effects of genes that are only expressed in a very small number of cells. If regressing out latent variables and using a non-linear model, the default is 50." + ), + make_option( + c("-b", "--block-size"), + action = "store", + default = 1000, + type = 'integer', + help = "Default size for number of genes to scale at in a single computation. Increasing block.size may speed up calculations but at an additional memory cost." + ), + make_option( + c("-d", "--min-cells-to-block"), + action = "store", + default = 1000, + type = 'integer', + help = "If object contains fewer than this number of cells, don't block for scaling calculations." + ), + make_option( + c("-n", "--check-for-norm"), + action = "store", + default = TRUE, + type = 'logical', + help = "Check to see if data has been normalized, if not, output a warning (TRUE by default)." + ), + make_option( + c("-o", "--output-object-file"), + action = "store", + default = NA, + type = 'character', + help = "File name in which to store serialized R object of type 'Seurat'.'" + ) +) + +opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file')) + +# Check parameter values + +if ( ! file.exists(opt$input_object_file)){ + stop((paste('File', opt$input_object_file, 'does not exist'))) +} + +if (! is.null(opt$genes_use)){ + if (! file.exists(opt$genes_use)){ + stop((paste('Supplied genes file', opt$genes_use, 'does not exist'))) + }else{ + genes_use <- readLines(opt$genes_use) + } +}else{ + genes_use <- NULL +} + +# break up opt$vars_to_regress into a list if it has commas +opt$vars_to_regress <- unlist(strsplit(opt$vars_to_regress, ",")) + +# Now we're hapy with the arguments, load Seurat and do the work + +suppressPackageStartupMessages(require(Seurat)) +if(opt$input_format == "loom" | opt$output_format == "loom") { + suppressPackageStartupMessages(require(SeuratDisk)) +} else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") { + suppressPackageStartupMessages(require(scater)) +} + +# Input from serialized R object + +seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format) +# https://stackoverflow.com/questions/9129673/passing-list-of-named-parameters-to-function +# might be useful +scaled_seurat_object <- ScaleData(seurat_object, + features = genes_use, + vars.to.regress = opt$vars_to_regress, + model.use = opt$model_use, + use.umi = opt$use_umi, + do.scale = !opt$do_not_scale, + do.center = !opt$do_not_center, + scale.max = opt$scale_max, + block.size = opt$block_size, + min.cells.to.block = opt$min_cells_to_block, + verbose = FALSE) + + +# Output to a serialized R object +write_seurat4_object(seurat_object = scaled_seurat_object, + output_path = opt$output_object_file, + format = opt$output_format) |
b |
diff -r d2f6eac0ad42 -r 5e9ba303f9e1 seurat_macros.xml --- a/seurat_macros.xml Fri Mar 04 07:27:57 2022 +0000 +++ b/seurat_macros.xml Sat Mar 02 10:42:19 2024 +0000 |
[ |
b'@@ -1,7 +1,7 @@\n <?xml version="1.0"?>\n <macros>\n- <token name="@VERSION@">0.3.0</token>\n- <token name="@SEURAT_VERSION@">3.2.3</token>\n+ <token name="@VERSION@">4.0.0</token>\n+ <token name="@SEURAT_VERSION@">4.0.4</token>\n <xml name="requirements">\n <requirements>\n <requirement type="package" version="@VERSION@">seurat-scripts</requirement>\n@@ -13,44 +13,237 @@\n ]]></version_command>\n </xml>\n \n- <xml name="input_object_params">\n- <conditional name="input" label="Input format">\n- <param type="select" name="format" label="Choose the format of the input" help="RData, Loom or AnnData">\n+ <xml name="input_object_params" token_multiple="False" token_varname="input" token_optional="False">\n+ <conditional name="@VARNAME@" label="Input format">\n+ <param type="select" name="format" label="Choose the format of the @VARNAME@" help="Seurat RDS, Seurat H5, Single Cell Experiment RDS, Loom or AnnData">\n <option value="rds_seurat" selected="true">RDS with a Seurat object</option>\n <option value="loom">Loom</option>\n+ <option value="h5seurat">Seurat HDF5</option>\n <option value="anndata">AnnData</option>\n <option value="rds_sce">RDS with a Single Cell Experiment object</option>\n </param>\n <when value="anndata">\n- <param type="data" name="anndata_file" label="AnnData file" help="The AnnData format provided by Scanpy" format="h5,h5ad"/>\n+ <param type="data" name="anndata_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="AnnData file" help="Select AnnData files for @VARNAME@" format="h5,h5ad"/>\n </when>\n <when value="loom">\n- <param type="data" name="loom_file" label="Loom file" help="Input as Loom v? file" format="h5,h5loom"/>\n+ <param type="data" name="loom_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Loom file" help="Select Loom file(s) for @VARNAME@" format="h5,h5loom"/>\n </when>\n <when value="rds_seurat">\n- <param type="data" name="rds_seurat_file" label="RDS file" help="Input as RDS file with Seurat 3 object" format="rdata"/>\n+ <param type="data" name="rds_seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Seurat object for @VARNAME@" format="rdata"/>\n </when>\n <when value="rds_sce">\n- <param type="data" name="rds_sce_file" label="RDS file" help="Input as RDS file with Single Cell Experiment object" format="rdata"/>\n+ <param type="data" name="rds_sce_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Single Cell Experiment object for @VARNAME@" format="rdata"/>\n+ </when>\n+ <when value="h5seurat">\n+ <param type="data" name="h5seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Seurat HDF5" help="Select Seurat HDF5 file(s) for @VARNAME" format="h5"/>\n </when>\n </conditional>\n </xml>\n \n+ <token name="@INPUT_OBJ_PREAMBLE@">\n+ #if $input.format == \'loom\'\n+ ln -s \'$input.loom_file\' input.loom;\n+ #else if $input.format == \'h5seurat\'\n+ ln -s \'$input.h5seurat_file\' input.h5seurat;\n+ #else if $input.format == \'anndata\'\n+ ## it complains when using links for AnnData...\n+ cp \'$input.anndata_file\' input.h5ad;\n+ #end if\n+ </token>\n+\n <token name="@INPUT_OBJECT@">\n #if $input.format == "anndata"\n- --input-object-file \'$input.anndata_file\' --input-format anndata\n+ --input-object-file input.h5ad --input-format anndata\n #else if $input.format == "loom"\n- --input-object-file \'$input.loom_file\' --input-format loom\n+ --input-object-file input.loom --input-format loom\n #else if $input.format == "rds_seurat"\n --input-object-file \'$input.rds_seurat_file\' --input-format seurat\n #else if $input.format == "rds_sce"\n --input-object-file \'$input.rds_sce_file\' --input-format single'..b'urat\n #end if\n </token>\n \n <xml name="output_object_params">\n- <param type="select" name="format" label="Choose the format of the output" help="Seurat, Single Cell Experiment or Loom">\n+ <param type="select" name="format" label="Choose the format of the output" help="Seurat, Single Cell Experiment, AnnData or Loom">\n <option value="rds_seurat" selected="true">RDS with a Seurat object</option>\n+ <option value="anndata">AnnData written by Seurat</option>\n <option value="loom">Loom</option>\n <option value="rds_sce">RDS with a Single Cell Experiment object</option>\n </param>\n@@ -63,6 +256,9 @@\n <data name="rds_seurat_file" format="rdata" label="${tool.name} on ${on_string}: Seurat RDS">\n <filter>format == \'rds_seurat\'</filter>\n </data>\n+ <data name="anndata_file" format="h5ad" label="${tool.name} on ${on_string}: AnnData from Seurat">\n+ <filter>format == \'anndata\'</filter>\n+ </data>\n <data name="rds_sce_file" format="rdata" label="${tool.name} on ${on_string}: Seurat Single Cell Experiment RDS">\n <filter>format == \'rds_sce\'</filter>\n </data>\n@@ -80,6 +276,30 @@\n #end if\n </token>\n \n+ <xml name="plot_output_files_format" token_format="png">\n+ <data label="Seurat ${plot_type.plot_type_selector} on ${on_string}: @FORMAT@ plot" name="plot_out_@FORMAT@" format=\'@FORMAT@\' >\n+ <filter>plot_format == \'@FORMAT@\'</filter>\n+ </data>\n+ </xml>\n+\n+ <token name="@OUTPUT_PLOT@">\n+ #if $plot_format == "png"\n+ --plot-out \'$plot_out_png\'\n+ #else if $plot_format == "pdf"\n+ --plot-out \'$plot_out_pdf\'\n+ #else if $plot_format == "eps"\n+ --plot-out \'$plot_out_eps\'\n+ #else if $plot_format == "ps"\n+ --plot-out \'$plot_out_ps\'\n+ #else if $plot_format == "jpg"\n+ --plot-out \'$plot_out_jpg\'\n+ #else if $plot_format == "tiff"\n+ --plot-out \'$plot_out_tiff\'\n+ #else if $plot_format == "svg"\n+ --plot-out \'$plot_out_svg\'\n+ #end if\n+ </token>\n+\n <xml name="genes-use-input">\n <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv,txt,tabular" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/>\n </xml>\n@@ -96,6 +316,7 @@\n \n <token name="@VERSION_HISTORY@"><![CDATA[\n **Version history**\n+4.0.0: Moves to Seurat 4.0.0, introducing a number of methods for merging datasets, plus the whole suite of Seurat plots. Pablo Moreno with funding from AstraZeneca.\n \n 3.2.3+galaxy0: Moves to Seurat 3.2.3 and introduce convert method, improving format interconversion support.\n \n@@ -108,14 +329,15 @@\n Find clusters: removed dims-use, k-param, prune-snn.\n \n 2.3.1+galaxy0: Improved documentation and further exposition of all script\'s options. Pablo Moreno, Jonathan Manning and Ni Huang, Expression Atlas team https://www.ebi.ac.uk/gxa/home at\n-EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski(github.com/drosofff) and Lea Bellenger(github.com/bellenger-l).\n+EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski (GitHub drosofff) and Lea Bellenger (GitHub bellenger-l).\n \n-0.0.1: Initial contribution. Maria Doyle (github.com/mblue9).\n+0.0.1: Initial contribution. Maria Doyle (GitHub mblue9).\n ]]></token>\n \n \n <xml name="citations">\n <citations>\n+ <citation type="doi">10.1038/s41592-021-01102-w</citation>\n <citation type="doi">10.1038/nbt.4096</citation>\n <citation type="bibtex">\n @misc{r-seurat-scripts.git,\n@@ -127,7 +349,6 @@\n url = {https://github.com/ebi-gene-expression-group/r-seurat-scripts.git},\n }\n </citation>\n- <citation type="doi">10.1038/s41592-021-01102-w</citation>\n </citations>\n </xml>\n </macros>\n' |
b |
diff -r d2f6eac0ad42 -r 5e9ba303f9e1 seurat_scale_data.xml --- a/seurat_scale_data.xml Fri Mar 04 07:27:57 2022 +0000 +++ b/seurat_scale_data.xml Sat Mar 02 10:42:19 2024 +0000 |
[ |
@@ -6,7 +6,7 @@ <expand macro="requirements" /> <expand macro="version" /> <command detect_errors="exit_code"><![CDATA[ -seurat-scale-data.R +Rscript '$__tool_directory__/scripts/'seurat-scale-data.R @INPUT_OBJECT@ #if $vars: |