Mercurial > repos > ebi-gxa > seurat_run_umap
changeset 0:b9424c715a0d draft default tip
planemo upload commit 0264c359f1d638bbbbab515a3502231f679cdcf6
author | ebi-gxa |
---|---|
date | Sat, 02 Mar 2024 10:40:43 +0000 |
parents | |
children | |
files | extra/macro_mapper_seurat.yaml get_test_data.sh scripts/seurat-scale-data.R seurat_macros.xml seurat_run_umap.xml |
diffstat | 5 files changed, 888 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/macro_mapper_seurat.yaml Sat Mar 02 10:40:43 2024 +0000 @@ -0,0 +1,90 @@ +--- +- option_group: + - input-object-file + - input-format + pre_command_macros: + - INPUT_OBJ_PREAMBLE + post_command_macros: + - INPUT_OBJECT + input_declaration_macros: + - input_object_params +- option_group: + - output-object-file + - output-format + post_command_macros: + - OUTPUT_OBJECT + input_declaration_macros: + - output_object_params + output_declaration_macros: + - output_files +- option_group: + - input-object-files + - input-format + pre_command_macros: + - INPUT_OBJS_PREAMBLE + post_command_macros: + - INPUT_OBJECTS + input_declaration_macros: + - input_object_params: + multiple: true +- option_group: + - reference-object-files + - reference-format + pre_command_macros: + - REFERENCE_OBJS_PREAMBLE + post_command_macros: + - REFERENCE_OBJECTS + input_declaration_macros: + - input_object_params: + varname: reference + multiple: true + optional: true +- option_group: + - reference-object-file + - reference-format + pre_command_macros: + - REFERENCE_OBJ_PREAMBLE + post_command_macros: + - REFERENCE_OBJECT + input_declaration_macros: + - input_object_params: + varname: reference +- option_group: + - anchors-object-file + - anchors-format + pre_command_macros: + - ANCHORS_OBJ_PREAMBLE + post_command_macros: + - ANCHORS_OBJECT + input_declaration_macros: + - input_object_params: + varname: anchors +- option_group: + - query-object-file + - query-format + pre_command_macros: + - QUERY_OBJ_PREAMBLE + post_command_macros: + - QUERY_OBJECT + input_declaration_macros: + - input_object_params: + varname: query +- option_group: + - plot-out + post_command_macros: + - OUTPUT_PLOT + output_declaration_macros: + - plot_output_files_format: + format: png + - plot_output_files_format: + format: pdf + - plot_output_files_format: + format: eps + - plot_output_files_format: + format: jpg + - plot_output_files_format: + format: ps + - plot_output_files_format: + format: tiff + - plot_output_files_format: + format: svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_test_data.sh Sat Mar 02 10:40:43 2024 +0000 @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +BASENAME_FILE='E-MTAB-6077-3k_features_90_cells' + +MTX_LINK='https://drive.google.com/uc?export=download&id=1-1ejn7scP80xsbrG0FtWzsozjg0hhc23' +RDS_LINK='https://drive.google.com/uc?export=download&id=1KW_GX6xznSUpWRWUykpNaSbAhyClf7_n' +NORM_LINK='https://drive.google.com/uc?export=download&id=1mvo3ENkBvEAOyWG6ejApzQTPDLX5yBKU' +FVG_LINK='https://drive.google.com/uc?export=download&id=13Fhruuj-vEEo1WM138ahtAYqfHc7LsaZ' +SCALED_LINK='https://drive.google.com/uc?export=download&id=18TK8us235LWNajarWDBAtASUXMYAxvw0' +PCA_LINK='https://drive.google.com/uc?export=download&id=1gf3BTB4dygDsom1TzjsBfgZnZepcoG5c' +NEIGHBOURS_LINK='https://drive.google.com/uc?export=download&id=1N2lHoKRBZ7pmAYGfghLWB9KUrLA5WoNX' +CLUSTERS_LINK='https://drive.google.com/uc?export=download&id=1HWxZWHbNUNo4z__9PhhL_CJOLzec_ETa' +TSNE_LINK='https://drive.google.com/uc?export=download&id=1qsvMr_GkCSp1dyTJt1BZ6cElJwFFX2zO' +MARKERS_LINK='https://drive.google.com/uc?export=download&id=18OmWNc7mF-4pzH6DQkPp1eKunN4BfvxD' + +LOOM_LINK='https://drive.google.com/uc?export=download&id=1qNk5cg8hJG3Nv1ljTKmUEnxTOf11EEZX' +H5AD_LINK='https://drive.google.com/uc?export=download&id=1YpE0H_t_dkh17P-WBhPijKvRiGP0BlBz' + +H5AD_SC182_LINK='https://drive.google.com/uc?export=download&id=16PUJ2KAkXT8F1UkfqU-9LWoOJUkUG1rp' +SCE_LINK='https://drive.google.com/uc?export=download&id=1UKdyf3M01uAt7oBg93JfmRvNVB_jlUKe' + +# Seurat v4 exclusives +IFNB_BASE_FILE='ifnb_' + +IFNB_CTRL_INT_LINK='https://drive.google.com/uc?export=download&id=15E_MLz-UclJYInNaA7YKLhLo5W-qlykL' +IFNB_STIM_INT_LINK='https://drive.google.com/uc?export=download&id=14iKgCJGPk16dEmpJJF-Gp_lBDcOdo-54' + +## Classify and UMAP mapping +CLASSIFY_QUERY_LINK='https://oc.ebi.ac.uk/s/MlEDILFYRrvkS6E/download' +CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK='https://drive.google.com/uc?export=download&id=1Xtv4K_CxIU1cJ8RjJ7NTvzLQkLvc8a3i' +# UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/k4MdM07y9DAnurp/download' +UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/D1z4z2ef1e3dyc3/download' + + +function get_data { + local link=$1 + local fname=$2 + + if [ ! -f $fname ]; then + echo "$fname not available locally, downloading.." + wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link + fi +} + +# get matrix data +mkdir -p test-data +pushd test-data +get_data $MTX_LINK mtx.zip +unzip mtx.zip +rm -f mtx.zip + +get_data $RDS_LINK $BASENAME_FILE".rds" +get_data $NORM_LINK $BASENAME_FILE"-normalised.rds" +get_data $FVG_LINK $BASENAME_FILE"-fvg.rds" +get_data $SCALED_LINK $BASENAME_FILE"-scaled.rds" +get_data $PCA_LINK $BASENAME_FILE"-pca.rds" +get_data $NEIGHBOURS_LINK $BASENAME_FILE"-neighbours.rds" +get_data $CLUSTERS_LINK $BASENAME_FILE"-clusters.rds" +get_data $TSNE_LINK $BASENAME_FILE"-tsne.rds" +get_data $MARKERS_LINK $BASENAME_FILE"-markers.csv.zip" + +unzip $BASENAME_FILE"-markers.csv.zip" +rm -f $BASENAME_FILE"-markers.csv.zip" + +get_data $LOOM_LINK $BASENAME_FILE"_loom.h5" +get_data $SCE_LINK $BASENAME_FILE"_sce.rds" +get_data $H5AD_LINK $BASENAME_FILE".h5ad" + +get_data $H5AD_SC182_LINK $BASENAME_FILE"_sc182.h5ad" + +get_data $IFNB_CTRL_INT_LINK $IFNB_BASE_FILE"ctrl_norm_fvg.rds" +get_data $IFNB_STIM_INT_LINK $IFNB_BASE_FILE"stim_norm_fvg.rds" + +get_data $CLASSIFY_QUERY_LINK "Classify_query.rds" +get_data $CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK "Classify_anchors.rds" +get_data $UMAP_RESULT_OBJECT_LINK "UMAP_result_integrated.rds" +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/seurat-scale-data.R Sat Mar 02 10:40:43 2024 +0000 @@ -0,0 +1,165 @@ +#!/usr/bin/env Rscript + +# Load optparse we need to check inputs + +suppressPackageStartupMessages(require(optparse)) + +# Load common functions + +suppressPackageStartupMessages(require(workflowscriptscommon)) + +# parse options + +option_list = list( + make_option( + c("-i", "--input-object-file"), + action = "store", + default = NA, + type = 'character', + help = "File name in which a serialized R matrix object may be found." + ), + make_option( + c("--input-format"), + action = "store", + default = "seurat", + type = 'character', + help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read." + ), + make_option( + c("--output-format"), + action = "store", + default = "seurat", + type = 'character', + help = "Either loom, seurat, anndata or singlecellexperiment for the output format." + ), + make_option( + c("-e", "--genes-use"), + action = "store", + default = NULL, + type = 'character', + help = "File with gene names to scale/center (one gene per line). Default is all genes in object@data." + ), + make_option( + c("-v", "--vars-to-regress"), + action = "store", + default = NULL, + type = 'character', + help = "Comma-separated list of variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito." + ), + make_option( + c("-m", "--model-use"), + action = "store", + default = 'linear', + type = 'character', + help = "Use a linear model or generalized linear model (poisson, negative binomial) for the regression. Options are 'linear' (default), 'poisson', and 'negbinom'." + ), + make_option( + c("-u", "--use-umi"), + action = "store", + default = FALSE, + type = 'logical', + help = "Regress on UMI count data. Default is FALSE for linear modeling, but automatically set to TRUE if model.use is 'negbinom' or 'poisson'." + ), + make_option( + c("-s", "--do-not-scale"), + action = "store_true", + default = FALSE, + type = 'logical', + help = "Skip the data scale." + ), + make_option( + c("-c", "--do-not-center"), + action = "store_true", + default = FALSE, + type = 'logical', + help = "Skip data centering." + ), + make_option( + c("-x", "--scale-max"), + action = "store", + default = 10, + type = 'double', + help = "Max value to return for scaled data. The default is 10. Setting this can help reduce the effects of genes that are only expressed in a very small number of cells. If regressing out latent variables and using a non-linear model, the default is 50." + ), + make_option( + c("-b", "--block-size"), + action = "store", + default = 1000, + type = 'integer', + help = "Default size for number of genes to scale at in a single computation. Increasing block.size may speed up calculations but at an additional memory cost." + ), + make_option( + c("-d", "--min-cells-to-block"), + action = "store", + default = 1000, + type = 'integer', + help = "If object contains fewer than this number of cells, don't block for scaling calculations." + ), + make_option( + c("-n", "--check-for-norm"), + action = "store", + default = TRUE, + type = 'logical', + help = "Check to see if data has been normalized, if not, output a warning (TRUE by default)." + ), + make_option( + c("-o", "--output-object-file"), + action = "store", + default = NA, + type = 'character', + help = "File name in which to store serialized R object of type 'Seurat'.'" + ) +) + +opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file')) + +# Check parameter values + +if ( ! file.exists(opt$input_object_file)){ + stop((paste('File', opt$input_object_file, 'does not exist'))) +} + +if (! is.null(opt$genes_use)){ + if (! file.exists(opt$genes_use)){ + stop((paste('Supplied genes file', opt$genes_use, 'does not exist'))) + }else{ + genes_use <- readLines(opt$genes_use) + } +}else{ + genes_use <- NULL +} + +# break up opt$vars_to_regress into a list if it has commas +opt$vars_to_regress <- unlist(strsplit(opt$vars_to_regress, ",")) + +# Now we're hapy with the arguments, load Seurat and do the work + +suppressPackageStartupMessages(require(Seurat)) +if(opt$input_format == "loom" | opt$output_format == "loom") { + suppressPackageStartupMessages(require(SeuratDisk)) +} else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") { + suppressPackageStartupMessages(require(scater)) +} + +# Input from serialized R object + +seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format) +# https://stackoverflow.com/questions/9129673/passing-list-of-named-parameters-to-function +# might be useful +scaled_seurat_object <- ScaleData(seurat_object, + features = genes_use, + vars.to.regress = opt$vars_to_regress, + model.use = opt$model_use, + use.umi = opt$use_umi, + do.scale = !opt$do_not_scale, + do.center = !opt$do_not_center, + scale.max = opt$scale_max, + block.size = opt$block_size, + min.cells.to.block = opt$min_cells_to_block, + verbose = FALSE) + + +# Output to a serialized R object +write_seurat4_object(seurat_object = scaled_seurat_object, + output_path = opt$output_object_file, + format = opt$output_format)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seurat_macros.xml Sat Mar 02 10:40:43 2024 +0000 @@ -0,0 +1,354 @@ +<?xml version="1.0"?> +<macros> + <token name="@VERSION@">4.0.0</token> + <token name="@SEURAT_VERSION@">4.0.4</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">seurat-scripts</requirement> + </requirements> + </xml> + <xml name="version"> + <version_command><![CDATA[ +echo $(R --version | grep version | grep -v GNU)", seurat version" $(R --vanilla --slave -e "library(seurat); cat(sessionInfo()\$otherPkgs\$seurat\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]></version_command> + </xml> + + <xml name="input_object_params" token_multiple="False" token_varname="input" token_optional="False"> + <conditional name="@VARNAME@" label="Input format"> + <param type="select" name="format" label="Choose the format of the @VARNAME@" help="Seurat RDS, Seurat H5, Single Cell Experiment RDS, Loom or AnnData"> + <option value="rds_seurat" selected="true">RDS with a Seurat object</option> + <option value="loom">Loom</option> + <option value="h5seurat">Seurat HDF5</option> + <option value="anndata">AnnData</option> + <option value="rds_sce">RDS with a Single Cell Experiment object</option> + </param> + <when value="anndata"> + <param type="data" name="anndata_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="AnnData file" help="Select AnnData files for @VARNAME@" format="h5,h5ad"/> + </when> + <when value="loom"> + <param type="data" name="loom_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Loom file" help="Select Loom file(s) for @VARNAME@" format="h5,h5loom"/> + </when> + <when value="rds_seurat"> + <param type="data" name="rds_seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Seurat object for @VARNAME@" format="rdata"/> + </when> + <when value="rds_sce"> + <param type="data" name="rds_sce_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Single Cell Experiment object for @VARNAME@" format="rdata"/> + </when> + <when value="h5seurat"> + <param type="data" name="h5seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Seurat HDF5" help="Select Seurat HDF5 file(s) for @VARNAME" format="h5"/> + </when> + </conditional> + </xml> + + <token name="@INPUT_OBJ_PREAMBLE@"> + #if $input.format == 'loom' + ln -s '$input.loom_file' input.loom; + #else if $input.format == 'h5seurat' + ln -s '$input.h5seurat_file' input.h5seurat; + #else if $input.format == 'anndata' + ## it complains when using links for AnnData... + cp '$input.anndata_file' input.h5ad; + #end if + </token> + + <token name="@INPUT_OBJECT@"> + #if $input.format == "anndata" + --input-object-file input.h5ad --input-format anndata + #else if $input.format == "loom" + --input-object-file input.loom --input-format loom + #else if $input.format == "rds_seurat" + --input-object-file '$input.rds_seurat_file' --input-format seurat + #else if $input.format == "rds_sce" + --input-object-file '$input.rds_sce_file' --input-format singlecellexperiment + #else if $input.format == "h5seurat" + --input-object-file input.h5seurat --input-format h5seurat + #end if + </token> + + <token name="@QUERY_OBJ_PREAMBLE@"> + #if $query.format == 'loom' + ln -s '$query.loom_file' query.loom; + #else if $query.format == 'h5seurat' + ln -s '$query.h5seurat_file' query.h5seurat; + #else if $query.format == 'anndata' + ## it complains when using links for AnnData... + cp '$query.anndata_file' query.h5ad; + #end if + </token> + + <token name="@QUERY_OBJECT@"> + #if $query.format == "anndata" + --query-object-file query.h5ad --query-format anndata + #else if $query.format == "loom" + --query-object-file query.loom --query-format loom + #else if $query.format == "rds_seurat" + --query-object-file '$query.rds_seurat_file' --query-format seurat + #else if $query.format == "rds_sce" + --query-object-file '$query.rds_sce_file' --query-format singlecellexperiment + #else if $query.format == "h5seurat" + --query-object-file query.h5seurat --query-format h5seurat + #end if + </token> + + <token name="@ANCHORS_OBJ_PREAMBLE@"> + #if $anchors.format == 'loom' + ln -s '$anchors.loom_file' anchors.loom; + #else if $anchors.format == 'h5seurat' + ln -s '$anchors.h5seurat_file' anchors.h5seurat; + #else if $anchors.format == 'anndata' + ## it complains when using links for AnnData... + cp '$anchors.anndata_file' anchors.h5ad; + #end if + </token> + + <token name="@ANCHORS_OBJECT@"> + #if $anchors.format == "anndata" + --anchors-object-file anchors.h5ad --anchors-format anndata + #else if $anchors.format == "loom" + --anchors-object-file anchors.loom --anchors-format loom + #else if $anchors.format == "rds_seurat" + --anchors-object-file '$anchors.rds_seurat_file' --anchors-format seurat + #else if $anchors.format == "rds_sce" + --anchors-object-file '$anchors.rds_sce_file' --anchors-format singlecellexperiment + #else if $anchors.format == "h5seurat" + --anchors-object-file anchors.h5seurat --anchors-format h5seurat + #end if + </token> + + <token name="@REFERENCE_OBJ_PREAMBLE@"> + #if $reference.format == 'loom' + ln -s '$reference.loom_file' reference.loom; + #else if $reference.format == 'h5seurat' + ln -s '$reference.h5seurat_file' reference.h5seurat; + #else if $reference.format == 'anndata' + ## it complains when using links for AnnData... + cp '$reference.anndata_file' reference.h5ad; + #end if + </token> + + <token name="@REFERENCE_OBJECT@"> + #if $reference.format == "anndata" + --reference-object-file reference.h5ad --reference-format anndata + #else if $reference.format == "loom" + --reference-object-file reference.loom --reference-format loom + #else if $reference.format == "rds_seurat" + --reference-object-file '$reference.rds_seurat_file' --reference-format seurat + #else if $reference.format == "rds_sce" + --reference-object-file '$reference.rds_sce_file' --reference-format singlecellexperiment + #else if $reference.format == "h5seurat" + --reference-object-file reference.h5seurat --reference-format h5seurat + #end if + </token> + + <token name="@INPUT_OBJS_PREAMBLE@"> + #if $input.format == 'loom' + #for $i, $fh in enumerate($input.loom_file): + ln -s '$fh' input.${i}.loom; + #end for + #else if $input.format == 'h5seurat' + #for $i, $fh in enumerate($input.h5seurat_file): + ln -s '$fh' input.${i}.h5seurat; + #end for + #else if $input.format == 'anndata' + ## it complains when using links for AnnData... + #for $i, $fh in enumerate($input.anndata_file): + cp '$fh' input.${i}.h5ad; + #end for + #end if + </token> + + <token name="@INPUT_OBJECTS@"> + #if $input.format == "anndata" + --input-object-files + #set file_array = [ "input."+str($i)+".h5ad" for $i, $fh in enumerate($input.anndata_file)] + #set files = ",".join($file_array) + ${files} + --input-format anndata + #else if $input.format == "loom" + --input-object-files + #set file_array = [ "input."+str($i)+".loom" for $i, $fh in enumerate($input.loom_file)] + #set files = ",".join($file_array) + ${files} + --input-format loom + #else if $input.format == "rds_seurat" + --input-object-files + #set file_array = $input.rds_seurat_file + #set files = ",".join([ str($fh) for $fh in $file_array ]) + ${files} + --input-format seurat + #else if $input.format == "rds_sce" + --input-object-files + #set file_array = $input.rds_sce_file + #set files = ",".join([ str($fh) for $fh in $file_array ]) + ${files} + --input-format singlecellexperiment + #else if $input.format == "h5seurat" + --input-object-files + #set file_array = [ "input."+str($i)+".h5seurat" for $i, $fh in enumerate($input.h5seurat)] + #set files = ",".join($file_array) + ${files} + --input-format h5seurat + #end if + </token> + + <token name="@REFERENCE_OBJS_PREAMBLE@"> + #if $reference.format == 'loom' + #for $i, $fh in enumerate($reference.loom_file): + ln -s '$fh' reference.${i}.loom; + #end for + #else if $reference.format == 'h5seurat' + #for $i, $fh in enumerate($reference.h5seurat_file): + ln -s '$fh' reference.${i}.h5seurat; + #end for + #else if $reference.format == 'anndata' + ## it complains when using links for AnnData... + #for $i, $fh in enumerate($reference.anndata_file): + cp '$fh' reference.${i}.h5ad; + #end for + #end if + </token> + + <token name="@REFERENCE_OBJECTS@"> + #if $reference.format == "anndata" and $reference.anndata_file: + --reference-object-files + #set file_array = [ "reference."+str($i)+".h5ad" for $i, $fh in enumerate($reference.anndata_file)] + #set files = ",".join($file_array) + ${files} + --reference-format anndata + #else if $reference.format == "loom" and $reference.loom_file: + --reference-object-files + #set file_array = [ "reference."+str($i)+".loom" for $i, $fh in enumerate($reference.loom_file)] + #set files = ",".join($file_array) + ${files} + --reference-format loom + #else if $reference.format == "rds_seurat" and $reference.rds_seurat_file: + --reference-object-files + #set files = ",".join([ str($fh) for $fh in $reference.rds_seurat_file ]) + ${files} + --reference-format seurat + #else if $reference.format == "rds_sce" and $reference.rds_sce_file: + --reference-object-files + #set files = ",".join([ str($fh) for $fh in $reference.rds_sce_file ]) + ${files} + --reference-format singlecellexperiment + #else if $reference.format == "h5seurat" and $reference.h5seurat: + --reference-object-files + #set file_array = [ "reference."+str($i)+".h5seurat" for $i, $fh in enumerate($reference.h5seurat)] + #set files = ",".join($file_array) + ${files} + --reference-format h5seurat + #end if + </token> + + <xml name="output_object_params"> + <param type="select" name="format" label="Choose the format of the output" help="Seurat, Single Cell Experiment, AnnData or Loom"> + <option value="rds_seurat" selected="true">RDS with a Seurat object</option> + <option value="anndata">AnnData written by Seurat</option> + <option value="loom">Loom</option> + <option value="rds_sce">RDS with a Single Cell Experiment object</option> + </param> + </xml> + + <xml name="output_files"> + <data name="loom_file" from_work_dir="seurat_obj.loom" format="h5" label="${tool.name} on ${on_string}: Seurat Loom"> + <filter>format == 'loom'</filter> + </data> + <data name="rds_seurat_file" format="rdata" label="${tool.name} on ${on_string}: Seurat RDS"> + <filter>format == 'rds_seurat'</filter> + </data> + <data name="anndata_file" format="h5ad" label="${tool.name} on ${on_string}: AnnData from Seurat"> + <filter>format == 'anndata'</filter> + </data> + <data name="rds_sce_file" format="rdata" label="${tool.name} on ${on_string}: Seurat Single Cell Experiment RDS"> + <filter>format == 'rds_sce'</filter> + </data> + </xml> + + <token name="@OUTPUT_OBJECT@"> + #if $format == "anndata" + --output-object-file '$anndata_file' --output-format anndata + #else if $format == "loom" + --output-object-file seurat_obj.loom --output-format loom + #else if $format == "rds_seurat" + --output-object-file '$rds_seurat_file' --output-format seurat + #else if $format == "rds_sce" + --output-object-file '$rds_sce_file' --output-format singlecellexperiment + #end if + </token> + + <xml name="plot_output_files_format" token_format="png"> + <data label="Seurat ${plot_type.plot_type_selector} on ${on_string}: @FORMAT@ plot" name="plot_out_@FORMAT@" format='@FORMAT@' > + <filter>plot_format == '@FORMAT@'</filter> + </data> + </xml> + + <token name="@OUTPUT_PLOT@"> + #if $plot_format == "png" + --plot-out '$plot_out_png' + #else if $plot_format == "pdf" + --plot-out '$plot_out_pdf' + #else if $plot_format == "eps" + --plot-out '$plot_out_eps' + #else if $plot_format == "ps" + --plot-out '$plot_out_ps' + #else if $plot_format == "jpg" + --plot-out '$plot_out_jpg' + #else if $plot_format == "tiff" + --plot-out '$plot_out_tiff' + #else if $plot_format == "svg" + --plot-out '$plot_out_svg' + #end if + </token> + + <xml name="genes-use-input"> + <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv,txt,tabular" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/> + </xml> + <xml name="dims-use-input"> + <param name="dims_use" argument="--dims-use" min="1" optional="true" type="integer" label="PCA Dimensions to use" help="Number of PCs (dimensions) to use in construction of the SNN graph."/> + </xml> + + <token name="@SEURAT_INTRO@"><![CDATA[ +Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. +It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and +interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse +types of single cell data. + ]]></token> + + <token name="@VERSION_HISTORY@"><![CDATA[ +**Version history** +4.0.0: Moves to Seurat 4.0.0, introducing a number of methods for merging datasets, plus the whole suite of Seurat plots. Pablo Moreno with funding from AstraZeneca. + +3.2.3+galaxy0: Moves to Seurat 3.2.3 and introduce convert method, improving format interconversion support. + +3.1.2_0.0.8: Update metadata parsing + +3.1.1_0.0.7: Exposes perplexity and enables tab input. + +3.1.1_0.0.6+galaxy0: Moved to Seurat 3. + + Find clusters: removed dims-use, k-param, prune-snn. + +2.3.1+galaxy0: Improved documentation and further exposition of all script's options. Pablo Moreno, Jonathan Manning and Ni Huang, Expression Atlas team https://www.ebi.ac.uk/gxa/home at +EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski (GitHub drosofff) and Lea Bellenger (GitHub bellenger-l). + +0.0.1: Initial contribution. Maria Doyle (GitHub mblue9). + ]]></token> + + + <xml name="citations"> + <citations> + <citation type="doi">10.1038/s41592-021-01102-w</citation> + <citation type="doi">10.1038/nbt.4096</citation> + <citation type="bibtex"> + @misc{r-seurat-scripts.git, + author = {Jonathan Manning, Pablo Moreno, EBI Gene Expression Team}, + year = {2018}, + title = {Seurat-scripts: command line interface for Seurat}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/ebi-gene-expression-group/r-seurat-scripts.git}, + } + </citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seurat_run_umap.xml Sat Mar 02 10:40:43 2024 +0000 @@ -0,0 +1,202 @@ +<tool id="seurat_run_umap" name="Seurat UMAP" profile="18.01" version="@SEURAT_VERSION@+galaxy0"> + <description>dimensionality reduction</description> + <macros> + <import>seurat_macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version" /> + + <command detect_errors="exit_code"><![CDATA[ + @INPUT_OBJ_PREAMBLE@ + + seurat-run-umap.R + @INPUT_OBJECT@ + @OUTPUT_OBJECT@ + +#if $dims +--dims '$dims' +#end if + +#if $reduction +--reduction '$reduction' +#end if + +#if $adv.graph +--graph '$adv.graph' +#end if + +#if $assay +--assay '$assay' +#end if + +#if $adv.nn_name +--nn.name '$adv.nn_name' +#end if + +#if $adv.slot +--slot '$adv.slot' +#end if + +#if $adv.umap_method +--umap.method '$adv.umap_method' +#end if + +#if $adv.reduction_model +--reduction.model '$adv.reduction_model' +#end if +$adv.return_model + +#if $n_neighbors +--n.neighbors '$n_neighbors' +#end if + +#if $n_components +--n.components '$n_components' +#end if + +#if $adv.metric +--metric '$adv.metric' +#end if + +#if $adv.n_epochs +--n.epochs '$adv.n_epochs' +#end if + +#if $adv.learning_rate +--learning.rate '$adv.learning_rate' +#end if + +#if $adv.min_dist +--min.dist '$adv.min_dist' +#end if + +#if $adv.spread +--spread '$adv.spread' +#end if + +#if $local_connectivity +--local.connectivity '$local_connectivity' +#end if + +#if $adv.repulsion_strength +--repulsion.strength '$adv.repulsion_strength' +#end if + +#if $adv.negative_sample_rate +--negative.sample.rate '$adv.negative_sample_rate' +#end if + +#if $adv.a +--a '$adv.a' +#end if + +#if $adv.b +--b '$adv.b' +#end if +$adv.uwot_sgd + +#if $seed_use +--seed.use '$seed_use' +#end if + +##if $adv.metric_kwds +## --metric.kwds '$adv.metric_kwds' +##end if +$adv.angular_rp_forest +$adv.verbose + +#if $reduction_name +--reduction.name '$reduction_name' +#end if + +#if $reduction_key +--reduction.key '$reduction_key' +#end if + + ]]></command> + + +<inputs> + <expand macro="input_object_params"/> + <expand macro="output_object_params"/> + <param label="Dims" optional='true' name="dims" argument="--dims" type="text" help="Which dimensions to use as input features, used only if list('features') is NULL"> + <validator type="regex" message="Value may only include numbers, colon, 'c', commas and parenthesis, to do things such as 1:10 or c(1,2,3).">^[c0-9:,)(]+$</validator> + </param> + <param label="Reduction" optional='true' value='pca' name="reduction" argument="--reduction" type="text" help="Which dimensional reduction (PCA or ICA) to use for the UMAP input. Default is PCA"/> + <param label="Assay" optional='true' name="assay" argument="--assay" type="text" help="Assay to pull data for when using list('features') , or assay used to construct Graph if running UMAP on a Graph"/> + <param label="N neighbors" optional='true' value='30' name="n_neighbors" argument="--n.neighbors" type="integer" help="This determines the number of neighboring points used in local approximations of manifold structure. Larger values will result in more global structure being preserved at the loss of detailed local structure. In general this parameter should often be in the range 5 to 50."/> + <param label="N components" optional='true' value='2' name="n_components" argument="--n.components" type="integer" help="The dimension of the space to embed into."/> + <param label="Local connectivity" optional='true' value='1' name="local_connectivity" argument="--local.connectivity" type="integer" help="The local connectivity required - i.e. the number of nearest neighbors that should be assumed to be connected at a local level. The higher this value the more connected the manifold becomes locally. In practice this should be not more than the local intrinsic dimension of the manifold."/> + <param label="Seed use" optional='true' value='42' name="seed_use" argument="--seed.use" type="integer" help="Set a random seed. By default, sets the seed to 42. Setting NULL will not set a seed"/> + <param label="Reduction name" optional='true' value='umap' name="reduction_name" argument="--reduction.name" type="text" help="Name to store dimensional reduction under in the Seurat object"/> + <param label="Reduction key" optional='true' value='UMAP' name="reduction_key" argument="--reduction.key" type="text" help="dimensional reduction key, specifies the string before the number for the dimension names. UMAP by default"/> + <section name="adv" title="Advanced options"> + <param label="Graph" optional='true' name="graph" argument="--graph" type="text" help="Name of graph on which to run UMAP"/> + <param label="Nn name" optional='true' name="nn_name" argument="--nn.name" type="text" help="Name of knn output on which to run UMAP"/> + <param label="Slot" optional='true' value='data' name="slot" argument="--slot" type="text" help="The slot used to pull data for when using list('features') . data slot is by default."/> + <param label="Umap method" optional='true' value='uwot' name="umap_method" argument="--umap.method" type="text" help="UMAP implementation to run. Can be list uwot, uwot-learn, umap-learn (rquires python umap-learn package)."/> + <param label="Reduction model" optional='true' name="reduction_model" argument="--reduction.model" type="text" help="list('DimReduc') object that contains the umap model"/> + <param label="Return model" optional='true' value='false' name="return_model" argument="--return.model" type="boolean" truevalue='--return.model' falsevalue='' checked='false' help="whether UMAP will return the uwot model"/> + <param label="Metric" optional='true' value='cosine' name="metric" argument="--metric" type="text" help="metric: This determines the choice of metric used to measure distance in the input space. A wide variety of metrics are already coded, and a user defined function can be passed as long as it has been JITd by numba."/> + <param label="N epochs" optional='true' name="n_epochs" argument="--n.epochs" type="integer" help="The number of training epochs to be used in optimizing the low dimensional embedding. Larger values result in more accurate embeddings. If NULL is specified, a value will be selected based on the size of the input dataset (200 for large datasets, 500 for small)."/> + <param label="Learning rate" optional='true' value='1' name="learning_rate" argument="--learning.rate" type="integer" help="The initial learning rate for the embedding optimization."/> + <param label="Min dist" optional='true' value='0' name="min_dist" argument="--min.dist" type="integer" help="This controls how tightly the embedding is allowed compress points together. Larger values ensure embedded points are moreevenly distributed, while smaller values allow the algorithm to optimise more accurately with regard to local structure. Sensible values are in the range 0.001 to 0.5."/> + <param label="Spread" optional='true' value='1' name="spread" argument="--spread" type="integer" help="The effective scale of embedded points. In combination with min.dist this determines how clustered/clumped the embedded points are."/> + <param label="Repulsion strength" optional='true' value='1' name="repulsion_strength" argument="--repulsion.strength" type="integer" help="Weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples."/> + <param label="Negative sample rate" optional='true' value='5' name="negative_sample_rate" argument="--negative.sample.rate" type="integer" help="The number of negative samples to select per positive sample in the optimization process. Increasing this value will result in greater repulsive force being applied, greater optimization cost, but slightly more accuracy."/> + <param label="A" optional='true' name="a" argument="--a" type="text" help="More specific parameters controlling the embedding. If NULL, these values are set automatically as determined by min. dist and spread. Parameter of differentiable approximation of right adjoint functor."/> + <param label="B" optional='true' name="b" argument="--b" type="text" help="More specific parameters controlling the embedding. If NULL, these values are set automatically as determined by min. dist and spread. Parameter of differentiable approximation of right adjoint functor."/> + <param label="Uwot sgd" optional='true' value='false' name="uwot_sgd" argument="--uwot.sgd" type="boolean" truevalue='--uwot.sgd' falsevalue='' checked='false' help="Set list('uwot::umap(fast_sgd = TRUE)') ; see list('umap') for more details"/> + <!-- Potential injection concern, needs to be handled better before being enabled. + <param label="Metric kwds" optional='true' name="metric_kwds" argument="metric.kwds" type="text" help="A dictionary of arguments to pass on to the metric, such as the p value for Minkowski distance. If NULL then no arguments are passed on."/> + --> + <param label="Angular rp forest" optional='true' value='false' name="angular_rp_forest" argument="--angular.rp.forest" type="boolean" truevalue='--angular.rp.forest' falsevalue='' checked='false' help="Whether to use an angular random projection forest to initialise the approximate nearest neighbor search. This can be faster, but is mostly on useful for metric that use an angular style distance such as cosine, correlation etc. In the case of those metrics angular forests will be chosen automatically."/> + <param label="Verbose" optional='true' value='true' name="verbose" argument="--do-not-verbose" type="boolean" truevalue='' falsevalue='--do-not-verbose' checked='true' help="Controls verbosity"/> + </section> +</inputs> + +<outputs> + <expand macro="output_files"/> +</outputs> + + + +<tests> + <!-- MANUAL TESTS --> + <test> + <param name="rds_seurat_file" ftype="rdata" value="E-MTAB-6077-3k_features_90_cells-clusters.rds"/> + <param name="dims" value="1:10"/> + <output name="rds_seurat_file" ftype="rdata" > + <assert_contents> + <has_size value="5067150" delta="200000"/> + </assert_contents> + </output> + </test> + <test> + <param name="rds_seurat_file" ftype="rdata" value="E-MTAB-6077-3k_features_90_cells-clusters.rds"/> + <param name="dims" value="c(1,2,3,4,5,6,7,8,9,10)"/> + <output name="rds_seurat_file" ftype="rdata" > + <assert_contents> + <has_size value="5067150" delta="200000"/> + </assert_contents> + </output> +</test> + <!-- END MANUAL TESTS --> +</tests> +<help> + <!-- MANUAL HELP --> + <![CDATA[ + .. class:: infomark + + **What it does** + + @SEURAT_INTRO@ + + For more details on this method, please see the individual in-line documentation or the same method's Seurat 4 documentation. + + @VERSION_HISTORY@ + ]]> + <!-- END MANUAL HELP --> +</help> +<expand macro="citations" /> +</tool>