Mercurial > repos > ebi-gxa > seurat_integration
changeset 0:dd6db951c33a draft default tip
planemo upload commit 0264c359f1d638bbbbab515a3502231f679cdcf6
| author | ebi-gxa | 
|---|---|
| date | Sat, 02 Mar 2024 10:42:39 +0000 | 
| parents | |
| children | |
| files | extra/macro_mapper_seurat.yaml get_test_data.sh scripts/seurat-scale-data.R seurat_integration.xml seurat_macros.xml | 
| diffstat | 5 files changed, 916 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/macro_mapper_seurat.yaml Sat Mar 02 10:42:39 2024 +0000 @@ -0,0 +1,90 @@ +--- +- option_group: + - input-object-file + - input-format + pre_command_macros: + - INPUT_OBJ_PREAMBLE + post_command_macros: + - INPUT_OBJECT + input_declaration_macros: + - input_object_params +- option_group: + - output-object-file + - output-format + post_command_macros: + - OUTPUT_OBJECT + input_declaration_macros: + - output_object_params + output_declaration_macros: + - output_files +- option_group: + - input-object-files + - input-format + pre_command_macros: + - INPUT_OBJS_PREAMBLE + post_command_macros: + - INPUT_OBJECTS + input_declaration_macros: + - input_object_params: + multiple: true +- option_group: + - reference-object-files + - reference-format + pre_command_macros: + - REFERENCE_OBJS_PREAMBLE + post_command_macros: + - REFERENCE_OBJECTS + input_declaration_macros: + - input_object_params: + varname: reference + multiple: true + optional: true +- option_group: + - reference-object-file + - reference-format + pre_command_macros: + - REFERENCE_OBJ_PREAMBLE + post_command_macros: + - REFERENCE_OBJECT + input_declaration_macros: + - input_object_params: + varname: reference +- option_group: + - anchors-object-file + - anchors-format + pre_command_macros: + - ANCHORS_OBJ_PREAMBLE + post_command_macros: + - ANCHORS_OBJECT + input_declaration_macros: + - input_object_params: + varname: anchors +- option_group: + - query-object-file + - query-format + pre_command_macros: + - QUERY_OBJ_PREAMBLE + post_command_macros: + - QUERY_OBJECT + input_declaration_macros: + - input_object_params: + varname: query +- option_group: + - plot-out + post_command_macros: + - OUTPUT_PLOT + output_declaration_macros: + - plot_output_files_format: + format: png + - plot_output_files_format: + format: pdf + - plot_output_files_format: + format: eps + - plot_output_files_format: + format: jpg + - plot_output_files_format: + format: ps + - plot_output_files_format: + format: tiff + - plot_output_files_format: + format: svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_test_data.sh Sat Mar 02 10:42:39 2024 +0000 @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +BASENAME_FILE='E-MTAB-6077-3k_features_90_cells' + +MTX_LINK='https://drive.google.com/uc?export=download&id=1-1ejn7scP80xsbrG0FtWzsozjg0hhc23' +RDS_LINK='https://drive.google.com/uc?export=download&id=1KW_GX6xznSUpWRWUykpNaSbAhyClf7_n' +NORM_LINK='https://drive.google.com/uc?export=download&id=1mvo3ENkBvEAOyWG6ejApzQTPDLX5yBKU' +FVG_LINK='https://drive.google.com/uc?export=download&id=13Fhruuj-vEEo1WM138ahtAYqfHc7LsaZ' +SCALED_LINK='https://drive.google.com/uc?export=download&id=18TK8us235LWNajarWDBAtASUXMYAxvw0' +PCA_LINK='https://drive.google.com/uc?export=download&id=1gf3BTB4dygDsom1TzjsBfgZnZepcoG5c' +NEIGHBOURS_LINK='https://drive.google.com/uc?export=download&id=1N2lHoKRBZ7pmAYGfghLWB9KUrLA5WoNX' +CLUSTERS_LINK='https://drive.google.com/uc?export=download&id=1HWxZWHbNUNo4z__9PhhL_CJOLzec_ETa' +TSNE_LINK='https://drive.google.com/uc?export=download&id=1qsvMr_GkCSp1dyTJt1BZ6cElJwFFX2zO' +MARKERS_LINK='https://drive.google.com/uc?export=download&id=18OmWNc7mF-4pzH6DQkPp1eKunN4BfvxD' + +LOOM_LINK='https://drive.google.com/uc?export=download&id=1qNk5cg8hJG3Nv1ljTKmUEnxTOf11EEZX' +H5AD_LINK='https://drive.google.com/uc?export=download&id=1YpE0H_t_dkh17P-WBhPijKvRiGP0BlBz' + +H5AD_SC182_LINK='https://drive.google.com/uc?export=download&id=16PUJ2KAkXT8F1UkfqU-9LWoOJUkUG1rp' +SCE_LINK='https://drive.google.com/uc?export=download&id=1UKdyf3M01uAt7oBg93JfmRvNVB_jlUKe' + +# Seurat v4 exclusives +IFNB_BASE_FILE='ifnb_' + +IFNB_CTRL_INT_LINK='https://drive.google.com/uc?export=download&id=15E_MLz-UclJYInNaA7YKLhLo5W-qlykL' +IFNB_STIM_INT_LINK='https://drive.google.com/uc?export=download&id=14iKgCJGPk16dEmpJJF-Gp_lBDcOdo-54' + +## Classify and UMAP mapping +CLASSIFY_QUERY_LINK='https://oc.ebi.ac.uk/s/MlEDILFYRrvkS6E/download' +CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK='https://drive.google.com/uc?export=download&id=1Xtv4K_CxIU1cJ8RjJ7NTvzLQkLvc8a3i' +# UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/k4MdM07y9DAnurp/download' +UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/D1z4z2ef1e3dyc3/download' + + +function get_data { + local link=$1 + local fname=$2 + + if [ ! -f $fname ]; then + echo "$fname not available locally, downloading.." + wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link + fi +} + +# get matrix data +mkdir -p test-data +pushd test-data +get_data $MTX_LINK mtx.zip +unzip mtx.zip +rm -f mtx.zip + +get_data $RDS_LINK $BASENAME_FILE".rds" +get_data $NORM_LINK $BASENAME_FILE"-normalised.rds" +get_data $FVG_LINK $BASENAME_FILE"-fvg.rds" +get_data $SCALED_LINK $BASENAME_FILE"-scaled.rds" +get_data $PCA_LINK $BASENAME_FILE"-pca.rds" +get_data $NEIGHBOURS_LINK $BASENAME_FILE"-neighbours.rds" +get_data $CLUSTERS_LINK $BASENAME_FILE"-clusters.rds" +get_data $TSNE_LINK $BASENAME_FILE"-tsne.rds" +get_data $MARKERS_LINK $BASENAME_FILE"-markers.csv.zip" + +unzip $BASENAME_FILE"-markers.csv.zip" +rm -f $BASENAME_FILE"-markers.csv.zip" + +get_data $LOOM_LINK $BASENAME_FILE"_loom.h5" +get_data $SCE_LINK $BASENAME_FILE"_sce.rds" +get_data $H5AD_LINK $BASENAME_FILE".h5ad" + +get_data $H5AD_SC182_LINK $BASENAME_FILE"_sc182.h5ad" + +get_data $IFNB_CTRL_INT_LINK $IFNB_BASE_FILE"ctrl_norm_fvg.rds" +get_data $IFNB_STIM_INT_LINK $IFNB_BASE_FILE"stim_norm_fvg.rds" + +get_data $CLASSIFY_QUERY_LINK "Classify_query.rds" +get_data $CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK "Classify_anchors.rds" +get_data $UMAP_RESULT_OBJECT_LINK "UMAP_result_integrated.rds" +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/seurat-scale-data.R Sat Mar 02 10:42:39 2024 +0000 @@ -0,0 +1,165 @@ +#!/usr/bin/env Rscript + +# Load optparse we need to check inputs + +suppressPackageStartupMessages(require(optparse)) + +# Load common functions + +suppressPackageStartupMessages(require(workflowscriptscommon)) + +# parse options + +option_list = list( + make_option( + c("-i", "--input-object-file"), + action = "store", + default = NA, + type = 'character', + help = "File name in which a serialized R matrix object may be found." + ), + make_option( + c("--input-format"), + action = "store", + default = "seurat", + type = 'character', + help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read." + ), + make_option( + c("--output-format"), + action = "store", + default = "seurat", + type = 'character', + help = "Either loom, seurat, anndata or singlecellexperiment for the output format." + ), + make_option( + c("-e", "--genes-use"), + action = "store", + default = NULL, + type = 'character', + help = "File with gene names to scale/center (one gene per line). Default is all genes in object@data." + ), + make_option( + c("-v", "--vars-to-regress"), + action = "store", + default = NULL, + type = 'character', + help = "Comma-separated list of variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito." + ), + make_option( + c("-m", "--model-use"), + action = "store", + default = 'linear', + type = 'character', + help = "Use a linear model or generalized linear model (poisson, negative binomial) for the regression. Options are 'linear' (default), 'poisson', and 'negbinom'." + ), + make_option( + c("-u", "--use-umi"), + action = "store", + default = FALSE, + type = 'logical', + help = "Regress on UMI count data. Default is FALSE for linear modeling, but automatically set to TRUE if model.use is 'negbinom' or 'poisson'." + ), + make_option( + c("-s", "--do-not-scale"), + action = "store_true", + default = FALSE, + type = 'logical', + help = "Skip the data scale." + ), + make_option( + c("-c", "--do-not-center"), + action = "store_true", + default = FALSE, + type = 'logical', + help = "Skip data centering." + ), + make_option( + c("-x", "--scale-max"), + action = "store", + default = 10, + type = 'double', + help = "Max value to return for scaled data. The default is 10. Setting this can help reduce the effects of genes that are only expressed in a very small number of cells. If regressing out latent variables and using a non-linear model, the default is 50." + ), + make_option( + c("-b", "--block-size"), + action = "store", + default = 1000, + type = 'integer', + help = "Default size for number of genes to scale at in a single computation. Increasing block.size may speed up calculations but at an additional memory cost." + ), + make_option( + c("-d", "--min-cells-to-block"), + action = "store", + default = 1000, + type = 'integer', + help = "If object contains fewer than this number of cells, don't block for scaling calculations." + ), + make_option( + c("-n", "--check-for-norm"), + action = "store", + default = TRUE, + type = 'logical', + help = "Check to see if data has been normalized, if not, output a warning (TRUE by default)." + ), + make_option( + c("-o", "--output-object-file"), + action = "store", + default = NA, + type = 'character', + help = "File name in which to store serialized R object of type 'Seurat'.'" + ) +) + +opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file')) + +# Check parameter values + +if ( ! file.exists(opt$input_object_file)){ + stop((paste('File', opt$input_object_file, 'does not exist'))) +} + +if (! is.null(opt$genes_use)){ + if (! file.exists(opt$genes_use)){ + stop((paste('Supplied genes file', opt$genes_use, 'does not exist'))) + }else{ + genes_use <- readLines(opt$genes_use) + } +}else{ + genes_use <- NULL +} + +# break up opt$vars_to_regress into a list if it has commas +opt$vars_to_regress <- unlist(strsplit(opt$vars_to_regress, ",")) + +# Now we're hapy with the arguments, load Seurat and do the work + +suppressPackageStartupMessages(require(Seurat)) +if(opt$input_format == "loom" | opt$output_format == "loom") { + suppressPackageStartupMessages(require(SeuratDisk)) +} else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") { + suppressPackageStartupMessages(require(scater)) +} + +# Input from serialized R object + +seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format) +# https://stackoverflow.com/questions/9129673/passing-list-of-named-parameters-to-function +# might be useful +scaled_seurat_object <- ScaleData(seurat_object, + features = genes_use, + vars.to.regress = opt$vars_to_regress, + model.use = opt$model_use, + use.umi = opt$use_umi, + do.scale = !opt$do_not_scale, + do.center = !opt$do_not_center, + scale.max = opt$scale_max, + block.size = opt$block_size, + min.cells.to.block = opt$min_cells_to_block, + verbose = FALSE) + + +# Output to a serialized R object +write_seurat4_object(seurat_object = scaled_seurat_object, + output_path = opt$output_object_file, + format = opt$output_format)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seurat_integration.xml Sat Mar 02 10:42:39 2024 +0000 @@ -0,0 +1,230 @@ +<tool id="seurat_integration" name="Seurat integration" profile="18.01" version="@SEURAT_VERSION@+galaxy0"> + <description>merges datasets based on internal anchors or reference based</description> + <macros> + <import>seurat_macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version" /> + <command detect_errors="exit_code"><![CDATA[ + @INPUT_OBJS_PREAMBLE@ + @REFERENCE_OBJS_PREAMBLE@ + seurat-integration.R + @OUTPUT_OBJECT@ + @INPUT_OBJECTS@ + @REFERENCE_OBJECTS@ + + #if $assay_list + --assay-list '$assay_list' + #end if + + + #if $anchor_features_file + --anchor-features '$anchor_features_file' + #elif $anchor_features + --anchor-features '$anchor_features' + #end if + $scale + + + #if $normalization_method + --normalization-method '$normalization_method' + #end if + + + #if $sct_clip_range + --sct-clip-range '$sct_clip_range' + #end if + + + #if $reduction + --reduction '$reduction' + #end if + + $l2_norm + + + #if $dims + --dims '$dims' + #end if + + + #if $k_anchor + --k-anchor '$k_anchor' + #end if + + + #if $k_filter + --k-filter '$k_filter' + #end if + + + #if $k_score + --k-score '$k_score' + #end if + + + #if $max_features + --max-features '$max_features' + #end if + + + #if $nn_method + --nn-method '$nn_method' + #end if + + + #if $n_trees + --n-trees '$n_trees' + #end if + + + #if $eps + --eps '$eps' + #end if + + $verbose + + + #if $new_assay_name + --new-assay-name '$new_assay_name' + #end if + + + #if $integrate_features_pca + --integrate-features-pca '$integrate_features_pca' + #end if + + + #if $features_to_integrate + --features-to-integrate '$features_to_integrate' + #end if + + + #if $integrate_dims + --integrate-dims '$integrate_dims' + #end if + + + #if $k_weight + --k-weight '$k_weight' + #end if + + + #if $weight_reduction + --weight-reduction '$weight_reduction' + #end if + + + #if $sd_weight + --sd-weight '$sd_weight' + #end if + + + #if $sample_tree + --sample-tree '$sample_tree' + #end if + + $preserve_order + + + #if $integrate_eps + --integrate-eps '$integrate_eps' + #end if + + + ]]></command> + <inputs> + <expand macro="output_object_params" /> + <expand macro="input_object_params" multiple="True" /> + <expand macro="input_object_params" varname="reference" multiple="True" optional="True" /> + <param label="Assay-list" optional='true' name="assay_list" argument="--assay-list" type="text" help="A vector of assay names specifying which assay to use when constructing anchors. If NULL, the current default assay for each object is used."/> + <param label="Anchor-features_file" optional='true' value='2000' name="anchor_features_file" argument="--anchor-features_file" type="data" format='?' help="File option for anchor-features. This overrides the string option if set."/> + <param label="Anchor-features" optional='true' value='2000' name="anchor_features" argument="--anchor-features" type="text" help="A numeric value (this will call 'SelectIntegrationFeatures' to select the provided number of features to be used in anchor finding) or a file with a vector of features to be used as input to the anchor finding process (comma separated)"/> + <param label="Scale" optional='true' value='true' name="scale" argument="--do-not-scale" type="boolean" truevalue='' falsevalue='--do-not-scale' checked='true' help="Whether or not to scale the features provided. Only call if you have previously scaled the features you want to use for each object in the object.list"/> + <param label="Normalization method" name="normalization_method" argument="--normalization-method" type="select" help="Name of normalization method used: LogNormalize or SCT"> + <option value="LogNormalize" selected="true">LogNormalize</option> + <option value="SCT">SCT</option> + </param> + <param label="Sct clip range" optional='true' name="sct_clip_range" argument="--sct-clip-range" type="text" help="Numeric of length two specifying the min and max values the Pearson residual will be clipped to"/> + <param label="Reduction" name="reduction" argument="--reduction" type="select" help="Dimensional reduction to perform when finding anchors. Can be cca (Canonical correlation analysis) or rpca (Reciprocal PCA)"> + <option value="cca" selected="true">Canonical correlation analysis</option> + <option value="rpca">Reciprocal PCA</option> + </param> + <param label="L2 norm" optional='true' value='true' name="l2_norm" argument="--do-not-l2-norm" type="boolean" truevalue='' falsevalue='--do-not-l2-norm' checked='true' help="Perform L2 normalization on the CCA cell embeddings after dimensional reduction"/> + <param label="Dims" optional='true' value='1:30' name="dims" argument="--dims" type="text" help="Which dimensions to use from the CCA to specify the neighbor search space"/> + <param label="K anchor" optional='true' value='5' name="k_anchor" argument="--k-anchor" type="integer" help="How many neighbors (k) to use when picking anchors"/> + <param label="K filter" optional='true' value='200' name="k_filter" argument="--k-filter" type="integer" help="How many neighbors (k) to use when filtering anchors"/> + <param label="K score" optional='true' value='30' name="k_score" argument="--k-score" type="integer" help="How many neighbors (k) to use when scoring anchors"/> + <param label="Max features" optional='true' value='200' name="max_features" argument="--max-features" type="integer" help="The maximum number of features to use when specifying the neighborhood search space in the anchor filtering"/> + <param label="Nn method" name="nn_method" argument="--nn-method" type="select" help="Method for nearest neighbor finding. Options include: rann, annoy"> + <option value="rann">rann</option> + <option value="annoy" selected="true">annoy</option> + </param> + <param label="N trees" optional='true' value='50' name="n_trees" argument="--n-trees" type="integer" help="More trees gives higher precision when using annoy approximate nearest neighbor search"/> + <param label="Eps" optional='true' value='0' name="eps" argument="--eps" type="integer" help="Error bound on the neighbor finding algorithm (from RANN)"/> + <param label="Verbose" optional='true' value='false' name="verbose" argument="--verbose" type="boolean" truevalue='--verbose' falsevalue='' checked='false' help="Print progress bars and output"/> + <param label="New assay name" optional='true' value='integrated' name="new_assay_name" argument="--new-assay-name" type="text" help="Name for the new assay containing the integrated data"/> + <param label="Integrate-features-pca" optional='true' name="integrate_features_pca" argument="--integrate-features-pca" type="text" help="Vector of features to use when computing the PCA to determine the weights. Only set if you want a different set from those used in the anchor finding process"/> + <param label="Features to integrate" optional='true' name="features_to_integrate" argument="--features-to-integrate" type="text" help="Vector of features to integrate. By default, will use the features used in anchor finding."/> + <param label="Integrate-dims" optional='true' value='1:30' name="integrate_dims" argument="--integrate-dims" type="text" help="Number of dimensions to use in the anchor weighting procedure"/> + <param label="K weight" optional='true' value='100' name="k_weight" argument="--k-weight" type="integer" help="Number of neighbors to consider when weighting anchors"/> + <param label="Weight reduction" optional='true' name="weight_reduction" argument="--weight-reduction" type="text" help="Dimension reduction to use when calculating anchor weights. This can be one of: A string, specifying the name of a dimension reduction present in all objects to be integrated; A vector of strings, specifying the name of a dimension reduction to use for each object to be integrated; A vector of DimReduc objects, specifying the object to use for each object in the integration; NULL, in which case a new PCA will be calculated and used to calculate anchor weights. Note that, if specified, the requested dimension reduction will only be used for calculating anchor weights in the first merge between reference and query, as the merged object will subsequently contain more cells than was in query, and weights will need to be calculated for all cells in the object."/> + <param label="Sd weight" optional='true' value='1' name="sd_weight" argument="--sd-weight" type="integer" help="Controls the bandwidth of the Gaussian kernel for weighting"/> + <param label="Sample tree" optional='true' name="sample_tree" argument="--sample-tree" type="text" help="Specify the order of integration. If NULL, will compute automatically."/> + <param label="Preserve order" optional='true' value='false' name="preserve_order" argument="--preserve-order" type="boolean" truevalue='--preserve-order' falsevalue='' checked='false' help="Do not reorder objects based on size for each pairwise integration."/> + <param label="Integrate-eps" optional='true' value='0' name="integrate_eps" argument="--integrate-eps" type="integer" help="Error bound on the neighbor finding algorithm (from 'RANN')"/> + + </inputs> + <outputs> + <expand macro="output_files"/> + </outputs> + <tests> + <!-- MANUAL TESTS --> + <test> + <param name="rds_seurat_file" ftype="rdata" value="ifnb_ctrl_norm_fvg.rds,ifnb_stim_norm_fvg.rds"/> + <output name="rds_seurat_file" ftype="rdata" > + <assert_contents> + <has_size value="103786543" delta="10000000"/> + </assert_contents> + </output> + </test> + <!-- END MANUAL TESTS --> + </tests> +<help> +<!-- MANUAL HELP --> +<![CDATA[ +.. class:: infomark + +**What it does** + +@SEURAT_INTRO@ + +This tool aims to facilitate the first steps of the Seurat 4.0.4 https://satijalab.org/seurat/articles/integration_introduction.html +tutorial, up to the "Perform integration" section. + +It will run the FindIntegrationAnchors method either using the inputs only +or the reference set if given, and then run the IntegrateData method. All +options are documented in-line. + +----- + +**Inputs** + +* A set of Seurat objects (can be given in other formats as well) to integrate. These objects should be at least normalised and have the find variable genes/features method applied. +* All other inputs are optional (see above). + +----- + +**Outputs** + +* A Seurat (or other format depending on selection) with the integrated object. + +For more details on this method, please see the individual in-line documentation or the same method's Seurat 4 documentation. + +@VERSION_HISTORY@ + +]]> +<!-- END MANUAL HELP --> +</help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seurat_macros.xml Sat Mar 02 10:42:39 2024 +0000 @@ -0,0 +1,354 @@ +<?xml version="1.0"?> +<macros> + <token name="@VERSION@">4.0.0</token> + <token name="@SEURAT_VERSION@">4.0.4</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">seurat-scripts</requirement> + </requirements> + </xml> + <xml name="version"> + <version_command><![CDATA[ +echo $(R --version | grep version | grep -v GNU)", seurat version" $(R --vanilla --slave -e "library(seurat); cat(sessionInfo()\$otherPkgs\$seurat\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]></version_command> + </xml> + + <xml name="input_object_params" token_multiple="False" token_varname="input" token_optional="False"> + <conditional name="@VARNAME@" label="Input format"> + <param type="select" name="format" label="Choose the format of the @VARNAME@" help="Seurat RDS, Seurat H5, Single Cell Experiment RDS, Loom or AnnData"> + <option value="rds_seurat" selected="true">RDS with a Seurat object</option> + <option value="loom">Loom</option> + <option value="h5seurat">Seurat HDF5</option> + <option value="anndata">AnnData</option> + <option value="rds_sce">RDS with a Single Cell Experiment object</option> + </param> + <when value="anndata"> + <param type="data" name="anndata_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="AnnData file" help="Select AnnData files for @VARNAME@" format="h5,h5ad"/> + </when> + <when value="loom"> + <param type="data" name="loom_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Loom file" help="Select Loom file(s) for @VARNAME@" format="h5,h5loom"/> + </when> + <when value="rds_seurat"> + <param type="data" name="rds_seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Seurat object for @VARNAME@" format="rdata"/> + </when> + <when value="rds_sce"> + <param type="data" name="rds_sce_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Single Cell Experiment object for @VARNAME@" format="rdata"/> + </when> + <when value="h5seurat"> + <param type="data" name="h5seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Seurat HDF5" help="Select Seurat HDF5 file(s) for @VARNAME" format="h5"/> + </when> + </conditional> + </xml> + + <token name="@INPUT_OBJ_PREAMBLE@"> + #if $input.format == 'loom' + ln -s '$input.loom_file' input.loom; + #else if $input.format == 'h5seurat' + ln -s '$input.h5seurat_file' input.h5seurat; + #else if $input.format == 'anndata' + ## it complains when using links for AnnData... + cp '$input.anndata_file' input.h5ad; + #end if + </token> + + <token name="@INPUT_OBJECT@"> + #if $input.format == "anndata" + --input-object-file input.h5ad --input-format anndata + #else if $input.format == "loom" + --input-object-file input.loom --input-format loom + #else if $input.format == "rds_seurat" + --input-object-file '$input.rds_seurat_file' --input-format seurat + #else if $input.format == "rds_sce" + --input-object-file '$input.rds_sce_file' --input-format singlecellexperiment + #else if $input.format == "h5seurat" + --input-object-file input.h5seurat --input-format h5seurat + #end if + </token> + + <token name="@QUERY_OBJ_PREAMBLE@"> + #if $query.format == 'loom' + ln -s '$query.loom_file' query.loom; + #else if $query.format == 'h5seurat' + ln -s '$query.h5seurat_file' query.h5seurat; + #else if $query.format == 'anndata' + ## it complains when using links for AnnData... + cp '$query.anndata_file' query.h5ad; + #end if + </token> + + <token name="@QUERY_OBJECT@"> + #if $query.format == "anndata" + --query-object-file query.h5ad --query-format anndata + #else if $query.format == "loom" + --query-object-file query.loom --query-format loom + #else if $query.format == "rds_seurat" + --query-object-file '$query.rds_seurat_file' --query-format seurat + #else if $query.format == "rds_sce" + --query-object-file '$query.rds_sce_file' --query-format singlecellexperiment + #else if $query.format == "h5seurat" + --query-object-file query.h5seurat --query-format h5seurat + #end if + </token> + + <token name="@ANCHORS_OBJ_PREAMBLE@"> + #if $anchors.format == 'loom' + ln -s '$anchors.loom_file' anchors.loom; + #else if $anchors.format == 'h5seurat' + ln -s '$anchors.h5seurat_file' anchors.h5seurat; + #else if $anchors.format == 'anndata' + ## it complains when using links for AnnData... + cp '$anchors.anndata_file' anchors.h5ad; + #end if + </token> + + <token name="@ANCHORS_OBJECT@"> + #if $anchors.format == "anndata" + --anchors-object-file anchors.h5ad --anchors-format anndata + #else if $anchors.format == "loom" + --anchors-object-file anchors.loom --anchors-format loom + #else if $anchors.format == "rds_seurat" + --anchors-object-file '$anchors.rds_seurat_file' --anchors-format seurat + #else if $anchors.format == "rds_sce" + --anchors-object-file '$anchors.rds_sce_file' --anchors-format singlecellexperiment + #else if $anchors.format == "h5seurat" + --anchors-object-file anchors.h5seurat --anchors-format h5seurat + #end if + </token> + + <token name="@REFERENCE_OBJ_PREAMBLE@"> + #if $reference.format == 'loom' + ln -s '$reference.loom_file' reference.loom; + #else if $reference.format == 'h5seurat' + ln -s '$reference.h5seurat_file' reference.h5seurat; + #else if $reference.format == 'anndata' + ## it complains when using links for AnnData... + cp '$reference.anndata_file' reference.h5ad; + #end if + </token> + + <token name="@REFERENCE_OBJECT@"> + #if $reference.format == "anndata" + --reference-object-file reference.h5ad --reference-format anndata + #else if $reference.format == "loom" + --reference-object-file reference.loom --reference-format loom + #else if $reference.format == "rds_seurat" + --reference-object-file '$reference.rds_seurat_file' --reference-format seurat + #else if $reference.format == "rds_sce" + --reference-object-file '$reference.rds_sce_file' --reference-format singlecellexperiment + #else if $reference.format == "h5seurat" + --reference-object-file reference.h5seurat --reference-format h5seurat + #end if + </token> + + <token name="@INPUT_OBJS_PREAMBLE@"> + #if $input.format == 'loom' + #for $i, $fh in enumerate($input.loom_file): + ln -s '$fh' input.${i}.loom; + #end for + #else if $input.format == 'h5seurat' + #for $i, $fh in enumerate($input.h5seurat_file): + ln -s '$fh' input.${i}.h5seurat; + #end for + #else if $input.format == 'anndata' + ## it complains when using links for AnnData... + #for $i, $fh in enumerate($input.anndata_file): + cp '$fh' input.${i}.h5ad; + #end for + #end if + </token> + + <token name="@INPUT_OBJECTS@"> + #if $input.format == "anndata" + --input-object-files + #set file_array = [ "input."+str($i)+".h5ad" for $i, $fh in enumerate($input.anndata_file)] + #set files = ",".join($file_array) + ${files} + --input-format anndata + #else if $input.format == "loom" + --input-object-files + #set file_array = [ "input."+str($i)+".loom" for $i, $fh in enumerate($input.loom_file)] + #set files = ",".join($file_array) + ${files} + --input-format loom + #else if $input.format == "rds_seurat" + --input-object-files + #set file_array = $input.rds_seurat_file + #set files = ",".join([ str($fh) for $fh in $file_array ]) + ${files} + --input-format seurat + #else if $input.format == "rds_sce" + --input-object-files + #set file_array = $input.rds_sce_file + #set files = ",".join([ str($fh) for $fh in $file_array ]) + ${files} + --input-format singlecellexperiment + #else if $input.format == "h5seurat" + --input-object-files + #set file_array = [ "input."+str($i)+".h5seurat" for $i, $fh in enumerate($input.h5seurat)] + #set files = ",".join($file_array) + ${files} + --input-format h5seurat + #end if + </token> + + <token name="@REFERENCE_OBJS_PREAMBLE@"> + #if $reference.format == 'loom' + #for $i, $fh in enumerate($reference.loom_file): + ln -s '$fh' reference.${i}.loom; + #end for + #else if $reference.format == 'h5seurat' + #for $i, $fh in enumerate($reference.h5seurat_file): + ln -s '$fh' reference.${i}.h5seurat; + #end for + #else if $reference.format == 'anndata' + ## it complains when using links for AnnData... + #for $i, $fh in enumerate($reference.anndata_file): + cp '$fh' reference.${i}.h5ad; + #end for + #end if + </token> + + <token name="@REFERENCE_OBJECTS@"> + #if $reference.format == "anndata" and $reference.anndata_file: + --reference-object-files + #set file_array = [ "reference."+str($i)+".h5ad" for $i, $fh in enumerate($reference.anndata_file)] + #set files = ",".join($file_array) + ${files} + --reference-format anndata + #else if $reference.format == "loom" and $reference.loom_file: + --reference-object-files + #set file_array = [ "reference."+str($i)+".loom" for $i, $fh in enumerate($reference.loom_file)] + #set files = ",".join($file_array) + ${files} + --reference-format loom + #else if $reference.format == "rds_seurat" and $reference.rds_seurat_file: + --reference-object-files + #set files = ",".join([ str($fh) for $fh in $reference.rds_seurat_file ]) + ${files} + --reference-format seurat + #else if $reference.format == "rds_sce" and $reference.rds_sce_file: + --reference-object-files + #set files = ",".join([ str($fh) for $fh in $reference.rds_sce_file ]) + ${files} + --reference-format singlecellexperiment + #else if $reference.format == "h5seurat" and $reference.h5seurat: + --reference-object-files + #set file_array = [ "reference."+str($i)+".h5seurat" for $i, $fh in enumerate($reference.h5seurat)] + #set files = ",".join($file_array) + ${files} + --reference-format h5seurat + #end if + </token> + + <xml name="output_object_params"> + <param type="select" name="format" label="Choose the format of the output" help="Seurat, Single Cell Experiment, AnnData or Loom"> + <option value="rds_seurat" selected="true">RDS with a Seurat object</option> + <option value="anndata">AnnData written by Seurat</option> + <option value="loom">Loom</option> + <option value="rds_sce">RDS with a Single Cell Experiment object</option> + </param> + </xml> + + <xml name="output_files"> + <data name="loom_file" from_work_dir="seurat_obj.loom" format="h5" label="${tool.name} on ${on_string}: Seurat Loom"> + <filter>format == 'loom'</filter> + </data> + <data name="rds_seurat_file" format="rdata" label="${tool.name} on ${on_string}: Seurat RDS"> + <filter>format == 'rds_seurat'</filter> + </data> + <data name="anndata_file" format="h5ad" label="${tool.name} on ${on_string}: AnnData from Seurat"> + <filter>format == 'anndata'</filter> + </data> + <data name="rds_sce_file" format="rdata" label="${tool.name} on ${on_string}: Seurat Single Cell Experiment RDS"> + <filter>format == 'rds_sce'</filter> + </data> + </xml> + + <token name="@OUTPUT_OBJECT@"> + #if $format == "anndata" + --output-object-file '$anndata_file' --output-format anndata + #else if $format == "loom" + --output-object-file seurat_obj.loom --output-format loom + #else if $format == "rds_seurat" + --output-object-file '$rds_seurat_file' --output-format seurat + #else if $format == "rds_sce" + --output-object-file '$rds_sce_file' --output-format singlecellexperiment + #end if + </token> + + <xml name="plot_output_files_format" token_format="png"> + <data label="Seurat ${plot_type.plot_type_selector} on ${on_string}: @FORMAT@ plot" name="plot_out_@FORMAT@" format='@FORMAT@' > + <filter>plot_format == '@FORMAT@'</filter> + </data> + </xml> + + <token name="@OUTPUT_PLOT@"> + #if $plot_format == "png" + --plot-out '$plot_out_png' + #else if $plot_format == "pdf" + --plot-out '$plot_out_pdf' + #else if $plot_format == "eps" + --plot-out '$plot_out_eps' + #else if $plot_format == "ps" + --plot-out '$plot_out_ps' + #else if $plot_format == "jpg" + --plot-out '$plot_out_jpg' + #else if $plot_format == "tiff" + --plot-out '$plot_out_tiff' + #else if $plot_format == "svg" + --plot-out '$plot_out_svg' + #end if + </token> + + <xml name="genes-use-input"> + <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv,txt,tabular" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/> + </xml> + <xml name="dims-use-input"> + <param name="dims_use" argument="--dims-use" min="1" optional="true" type="integer" label="PCA Dimensions to use" help="Number of PCs (dimensions) to use in construction of the SNN graph."/> + </xml> + + <token name="@SEURAT_INTRO@"><![CDATA[ +Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. +It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and +interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse +types of single cell data. + ]]></token> + + <token name="@VERSION_HISTORY@"><![CDATA[ +**Version history** +4.0.0: Moves to Seurat 4.0.0, introducing a number of methods for merging datasets, plus the whole suite of Seurat plots. Pablo Moreno with funding from AstraZeneca. + +3.2.3+galaxy0: Moves to Seurat 3.2.3 and introduce convert method, improving format interconversion support. + +3.1.2_0.0.8: Update metadata parsing + +3.1.1_0.0.7: Exposes perplexity and enables tab input. + +3.1.1_0.0.6+galaxy0: Moved to Seurat 3. + + Find clusters: removed dims-use, k-param, prune-snn. + +2.3.1+galaxy0: Improved documentation and further exposition of all script's options. Pablo Moreno, Jonathan Manning and Ni Huang, Expression Atlas team https://www.ebi.ac.uk/gxa/home at +EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski (GitHub drosofff) and Lea Bellenger (GitHub bellenger-l). + +0.0.1: Initial contribution. Maria Doyle (GitHub mblue9). + ]]></token> + + + <xml name="citations"> + <citations> + <citation type="doi">10.1038/s41592-021-01102-w</citation> + <citation type="doi">10.1038/nbt.4096</citation> + <citation type="bibtex"> + @misc{r-seurat-scripts.git, + author = {Jonathan Manning, Pablo Moreno, EBI Gene Expression Team}, + year = {2018}, + title = {Seurat-scripts: command line interface for Seurat}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/ebi-gene-expression-group/r-seurat-scripts.git}, + } + </citation> + </citations> + </xml> +</macros>
