changeset 0:699c0ca328f2 draft default tip

planemo upload commit 0264c359f1d638bbbbab515a3502231f679cdcf6
author ebi-gxa
date Sat, 02 Mar 2024 10:40:57 +0000
parents
children
files extra/macro_mapper_seurat.yaml get_test_data.sh scripts/seurat-scale-data.R seurat_macros.xml seurat_select_integration_features.xml
diffstat 5 files changed, 773 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extra/macro_mapper_seurat.yaml	Sat Mar 02 10:40:57 2024 +0000
@@ -0,0 +1,90 @@
+---
+- option_group:
+    - input-object-file
+    - input-format
+  pre_command_macros:
+    - INPUT_OBJ_PREAMBLE
+  post_command_macros:
+    - INPUT_OBJECT
+  input_declaration_macros:
+    - input_object_params
+- option_group:
+    - output-object-file
+    - output-format
+  post_command_macros:
+    - OUTPUT_OBJECT
+  input_declaration_macros:
+    - output_object_params
+  output_declaration_macros:
+    - output_files
+- option_group:
+    - input-object-files
+    - input-format
+  pre_command_macros:
+    - INPUT_OBJS_PREAMBLE
+  post_command_macros:
+    - INPUT_OBJECTS
+  input_declaration_macros:
+    - input_object_params:
+        multiple: true
+- option_group:
+    - reference-object-files
+    - reference-format
+  pre_command_macros:
+    - REFERENCE_OBJS_PREAMBLE
+  post_command_macros:
+    - REFERENCE_OBJECTS
+  input_declaration_macros:
+    - input_object_params:
+        varname: reference
+        multiple: true
+        optional: true
+- option_group:
+    - reference-object-file
+    - reference-format
+  pre_command_macros:
+    - REFERENCE_OBJ_PREAMBLE
+  post_command_macros:
+    - REFERENCE_OBJECT
+  input_declaration_macros:
+    - input_object_params:
+        varname: reference
+- option_group:
+    - anchors-object-file
+    - anchors-format
+  pre_command_macros:
+    - ANCHORS_OBJ_PREAMBLE
+  post_command_macros:
+    - ANCHORS_OBJECT
+  input_declaration_macros:
+    - input_object_params:
+        varname: anchors
+- option_group:
+    - query-object-file
+    - query-format
+  pre_command_macros:
+    - QUERY_OBJ_PREAMBLE
+  post_command_macros:
+    - QUERY_OBJECT
+  input_declaration_macros:
+    - input_object_params:
+        varname: query
+- option_group:
+    - plot-out
+  post_command_macros:
+    - OUTPUT_PLOT
+  output_declaration_macros:
+    - plot_output_files_format:
+        format: png
+    - plot_output_files_format:
+        format: pdf
+    - plot_output_files_format:
+        format: eps
+    - plot_output_files_format:
+        format: jpg
+    - plot_output_files_format:
+        format: ps
+    - plot_output_files_format:
+        format: tiff
+    - plot_output_files_format:
+        format: svg
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_test_data.sh	Sat Mar 02 10:40:57 2024 +0000
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+
+BASENAME_FILE='E-MTAB-6077-3k_features_90_cells'
+
+MTX_LINK='https://drive.google.com/uc?export=download&id=1-1ejn7scP80xsbrG0FtWzsozjg0hhc23'
+RDS_LINK='https://drive.google.com/uc?export=download&id=1KW_GX6xznSUpWRWUykpNaSbAhyClf7_n'
+NORM_LINK='https://drive.google.com/uc?export=download&id=1mvo3ENkBvEAOyWG6ejApzQTPDLX5yBKU'
+FVG_LINK='https://drive.google.com/uc?export=download&id=13Fhruuj-vEEo1WM138ahtAYqfHc7LsaZ'
+SCALED_LINK='https://drive.google.com/uc?export=download&id=18TK8us235LWNajarWDBAtASUXMYAxvw0'
+PCA_LINK='https://drive.google.com/uc?export=download&id=1gf3BTB4dygDsom1TzjsBfgZnZepcoG5c'
+NEIGHBOURS_LINK='https://drive.google.com/uc?export=download&id=1N2lHoKRBZ7pmAYGfghLWB9KUrLA5WoNX'
+CLUSTERS_LINK='https://drive.google.com/uc?export=download&id=1HWxZWHbNUNo4z__9PhhL_CJOLzec_ETa'
+TSNE_LINK='https://drive.google.com/uc?export=download&id=1qsvMr_GkCSp1dyTJt1BZ6cElJwFFX2zO'
+MARKERS_LINK='https://drive.google.com/uc?export=download&id=18OmWNc7mF-4pzH6DQkPp1eKunN4BfvxD'
+
+LOOM_LINK='https://drive.google.com/uc?export=download&id=1qNk5cg8hJG3Nv1ljTKmUEnxTOf11EEZX'
+H5AD_LINK='https://drive.google.com/uc?export=download&id=1YpE0H_t_dkh17P-WBhPijKvRiGP0BlBz'
+
+H5AD_SC182_LINK='https://drive.google.com/uc?export=download&id=16PUJ2KAkXT8F1UkfqU-9LWoOJUkUG1rp'
+SCE_LINK='https://drive.google.com/uc?export=download&id=1UKdyf3M01uAt7oBg93JfmRvNVB_jlUKe'
+
+# Seurat v4 exclusives
+IFNB_BASE_FILE='ifnb_'
+
+IFNB_CTRL_INT_LINK='https://drive.google.com/uc?export=download&id=15E_MLz-UclJYInNaA7YKLhLo5W-qlykL'
+IFNB_STIM_INT_LINK='https://drive.google.com/uc?export=download&id=14iKgCJGPk16dEmpJJF-Gp_lBDcOdo-54'
+
+## Classify and UMAP mapping
+CLASSIFY_QUERY_LINK='https://oc.ebi.ac.uk/s/MlEDILFYRrvkS6E/download'
+CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK='https://drive.google.com/uc?export=download&id=1Xtv4K_CxIU1cJ8RjJ7NTvzLQkLvc8a3i'
+# UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/k4MdM07y9DAnurp/download'
+UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/D1z4z2ef1e3dyc3/download'
+
+
+function get_data {
+  local link=$1
+  local fname=$2
+
+  if [ ! -f $fname ]; then
+    echo "$fname not available locally, downloading.."
+    wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link
+  fi
+}
+
+# get matrix data
+mkdir -p test-data
+pushd test-data
+get_data $MTX_LINK mtx.zip
+unzip mtx.zip
+rm -f mtx.zip
+
+get_data $RDS_LINK $BASENAME_FILE".rds"
+get_data $NORM_LINK $BASENAME_FILE"-normalised.rds"
+get_data $FVG_LINK $BASENAME_FILE"-fvg.rds"
+get_data $SCALED_LINK $BASENAME_FILE"-scaled.rds"
+get_data $PCA_LINK $BASENAME_FILE"-pca.rds"
+get_data $NEIGHBOURS_LINK $BASENAME_FILE"-neighbours.rds"
+get_data $CLUSTERS_LINK $BASENAME_FILE"-clusters.rds"
+get_data $TSNE_LINK $BASENAME_FILE"-tsne.rds"
+get_data $MARKERS_LINK $BASENAME_FILE"-markers.csv.zip"
+
+unzip $BASENAME_FILE"-markers.csv.zip"
+rm -f $BASENAME_FILE"-markers.csv.zip"
+
+get_data $LOOM_LINK $BASENAME_FILE"_loom.h5"
+get_data $SCE_LINK $BASENAME_FILE"_sce.rds"
+get_data $H5AD_LINK $BASENAME_FILE".h5ad"
+
+get_data $H5AD_SC182_LINK $BASENAME_FILE"_sc182.h5ad"
+
+get_data $IFNB_CTRL_INT_LINK $IFNB_BASE_FILE"ctrl_norm_fvg.rds"
+get_data $IFNB_STIM_INT_LINK $IFNB_BASE_FILE"stim_norm_fvg.rds"
+
+get_data $CLASSIFY_QUERY_LINK "Classify_query.rds"
+get_data $CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK "Classify_anchors.rds"
+get_data $UMAP_RESULT_OBJECT_LINK "UMAP_result_integrated.rds"
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/seurat-scale-data.R	Sat Mar 02 10:40:57 2024 +0000
@@ -0,0 +1,165 @@
+#!/usr/bin/env Rscript 
+
+# Load optparse we need to check inputs
+
+suppressPackageStartupMessages(require(optparse))
+
+# Load common functions
+
+suppressPackageStartupMessages(require(workflowscriptscommon))
+
+# parse options
+
+option_list = list(
+  make_option(
+    c("-i", "--input-object-file"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "File name in which a serialized R matrix object may be found."
+  ),
+  make_option(
+    c("--input-format"),
+    action = "store",
+    default = "seurat",
+    type = 'character',
+    help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read."
+  ),
+  make_option(
+    c("--output-format"),
+    action = "store",
+    default = "seurat",
+    type = 'character',
+    help = "Either loom, seurat, anndata or singlecellexperiment for the output format."
+  ),
+  make_option(
+    c("-e", "--genes-use"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "File with gene names to scale/center (one gene per line). Default is all genes in object@data."
+  ),
+  make_option(
+    c("-v", "--vars-to-regress"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = "Comma-separated list of variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito."
+  ),
+  make_option(
+    c("-m", "--model-use"),
+    action = "store",
+    default = 'linear',
+    type = 'character',
+    help = "Use a linear model or generalized linear model (poisson, negative binomial) for the regression. Options are 'linear' (default), 'poisson', and 'negbinom'."
+  ),
+  make_option(
+    c("-u", "--use-umi"),
+    action = "store",
+    default = FALSE,
+    type = 'logical',
+    help = "Regress on UMI count data. Default is FALSE for linear modeling, but automatically set to TRUE if model.use is 'negbinom' or 'poisson'."
+  ),
+  make_option(
+    c("-s", "--do-not-scale"),
+    action = "store_true",
+    default = FALSE,
+    type = 'logical',
+    help = "Skip the data scale."
+  ),
+  make_option(
+    c("-c", "--do-not-center"),
+    action = "store_true",
+    default = FALSE,
+    type = 'logical',
+    help = "Skip data centering."
+  ),  
+  make_option(
+    c("-x", "--scale-max"),
+    action = "store",
+    default = 10,
+    type = 'double',
+    help = "Max value to return for scaled data. The default is 10. Setting this can help reduce the effects of genes that are only expressed in a very small number of cells. If regressing out latent variables and using a non-linear model, the default is 50."
+  ),
+  make_option(
+    c("-b", "--block-size"),
+    action = "store",
+    default = 1000,
+    type = 'integer',
+    help = "Default size for number of genes to scale at in a single computation. Increasing block.size may speed up calculations but at an additional memory cost."
+  ),
+  make_option(
+    c("-d", "--min-cells-to-block"),
+    action = "store",
+    default = 1000,
+    type = 'integer',
+    help = "If object contains fewer than this number of cells, don't block for scaling calculations."
+  ),
+  make_option(
+    c("-n", "--check-for-norm"),
+    action = "store",
+    default = TRUE,
+    type = 'logical',
+    help = "Check to see if data has been normalized, if not, output a warning (TRUE by default)."
+  ),
+  make_option(
+    c("-o", "--output-object-file"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "File name in which to store serialized R object of type 'Seurat'.'"
+  )
+)
+
+opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file'))
+
+# Check parameter values
+
+if ( ! file.exists(opt$input_object_file)){
+  stop((paste('File', opt$input_object_file, 'does not exist')))
+}
+
+if (! is.null(opt$genes_use)){
+  if (! file.exists(opt$genes_use)){
+    stop((paste('Supplied genes file', opt$genes_use, 'does not exist')))
+  }else{
+    genes_use <- readLines(opt$genes_use)
+  }
+}else{
+  genes_use <- NULL
+}
+
+# break up opt$vars_to_regress into a list if it has commas
+opt$vars_to_regress <- unlist(strsplit(opt$vars_to_regress, ","))
+
+# Now we're hapy with the arguments, load Seurat and do the work
+
+suppressPackageStartupMessages(require(Seurat))
+if(opt$input_format == "loom" | opt$output_format == "loom") {
+  suppressPackageStartupMessages(require(SeuratDisk))
+} else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") {
+  suppressPackageStartupMessages(require(scater))
+}
+
+# Input from serialized R object
+
+seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format)
+# https://stackoverflow.com/questions/9129673/passing-list-of-named-parameters-to-function
+# might be useful
+scaled_seurat_object <- ScaleData(seurat_object, 
+                                  features = genes_use, 
+                                  vars.to.regress = opt$vars_to_regress, 
+                                  model.use = opt$model_use, 
+                                  use.umi = opt$use_umi, 
+                                  do.scale = !opt$do_not_scale, 
+                                  do.center = !opt$do_not_center, 
+                                  scale.max = opt$scale_max, 
+                                  block.size = opt$block_size, 
+                                  min.cells.to.block = opt$min_cells_to_block, 
+                                  verbose = FALSE)
+
+
+# Output to a serialized R object
+write_seurat4_object(seurat_object = scaled_seurat_object, 
+                     output_path = opt$output_object_file,
+                     format = opt$output_format)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seurat_macros.xml	Sat Mar 02 10:40:57 2024 +0000
@@ -0,0 +1,354 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@VERSION@">4.0.0</token>
+    <token name="@SEURAT_VERSION@">4.0.4</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">seurat-scripts</requirement>
+        </requirements>
+    </xml>
+    <xml name="version">
+    	<version_command><![CDATA[
+echo $(R --version | grep version | grep -v GNU)", seurat version" $(R --vanilla --slave -e "library(seurat); cat(sessionInfo()\$otherPkgs\$seurat\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+    ]]></version_command>
+    </xml>
+
+   <xml name="input_object_params" token_multiple="False" token_varname="input" token_optional="False">
+     <conditional name="@VARNAME@" label="Input format">
+       <param type="select" name="format" label="Choose the format of the @VARNAME@" help="Seurat RDS, Seurat H5, Single Cell Experiment RDS, Loom or AnnData">
+         <option value="rds_seurat" selected="true">RDS with a Seurat object</option>
+         <option value="loom">Loom</option>
+         <option value="h5seurat">Seurat HDF5</option>
+         <option value="anndata">AnnData</option>
+         <option value="rds_sce">RDS with a Single Cell Experiment object</option>
+       </param>
+       <when value="anndata">
+         <param type="data" name="anndata_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="AnnData file" help="Select AnnData files for @VARNAME@" format="h5,h5ad"/>
+       </when>
+       <when value="loom">
+         <param type="data" name="loom_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Loom file" help="Select Loom file(s) for @VARNAME@" format="h5,h5loom"/>
+       </when>
+       <when value="rds_seurat">
+         <param type="data" name="rds_seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Seurat object for @VARNAME@" format="rdata"/>
+       </when>
+       <when value="rds_sce">
+         <param type="data" name="rds_sce_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Single Cell Experiment object for @VARNAME@" format="rdata"/>
+       </when>
+       <when value="h5seurat">
+         <param type="data" name="h5seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Seurat HDF5" help="Select Seurat HDF5 file(s) for @VARNAME" format="h5"/>
+       </when>
+     </conditional>
+   </xml>
+
+   <token name="@INPUT_OBJ_PREAMBLE@">
+     #if $input.format == 'loom'
+     ln -s '$input.loom_file' input.loom;
+     #else if $input.format == 'h5seurat'
+     ln -s '$input.h5seurat_file' input.h5seurat;
+     #else if $input.format == 'anndata'
+     ## it complains when using links for AnnData...
+     cp '$input.anndata_file' input.h5ad;
+     #end if
+   </token>
+
+   <token name="@INPUT_OBJECT@">
+    #if $input.format == "anndata"
+        --input-object-file input.h5ad --input-format anndata
+    #else if $input.format == "loom"
+        --input-object-file input.loom --input-format loom
+    #else if $input.format == "rds_seurat"
+        --input-object-file '$input.rds_seurat_file' --input-format seurat
+    #else if $input.format == "rds_sce"
+        --input-object-file '$input.rds_sce_file' --input-format singlecellexperiment
+    #else if $input.format == "h5seurat"
+        --input-object-file input.h5seurat --input-format h5seurat
+    #end if
+   </token>
+
+   <token name="@QUERY_OBJ_PREAMBLE@">
+     #if $query.format == 'loom'
+     ln -s '$query.loom_file' query.loom;
+     #else if $query.format == 'h5seurat'
+     ln -s '$query.h5seurat_file' query.h5seurat;
+     #else if $query.format == 'anndata'
+     ## it complains when using links for AnnData...
+     cp '$query.anndata_file' query.h5ad;
+     #end if
+   </token>
+
+   <token name="@QUERY_OBJECT@">
+    #if $query.format == "anndata"
+        --query-object-file query.h5ad --query-format anndata
+    #else if $query.format == "loom"
+        --query-object-file query.loom --query-format loom
+    #else if $query.format == "rds_seurat"
+        --query-object-file '$query.rds_seurat_file' --query-format seurat
+    #else if $query.format == "rds_sce"
+        --query-object-file '$query.rds_sce_file' --query-format singlecellexperiment
+    #else if $query.format == "h5seurat"
+        --query-object-file query.h5seurat --query-format h5seurat
+    #end if
+  </token>
+
+  <token name="@ANCHORS_OBJ_PREAMBLE@">
+    #if $anchors.format == 'loom'
+    ln -s '$anchors.loom_file' anchors.loom;
+    #else if $anchors.format == 'h5seurat'
+    ln -s '$anchors.h5seurat_file' anchors.h5seurat;
+    #else if $anchors.format == 'anndata'
+    ## it complains when using links for AnnData...
+    cp '$anchors.anndata_file' anchors.h5ad;
+    #end if
+  </token>
+
+  <token name="@ANCHORS_OBJECT@">
+   #if $anchors.format == "anndata"
+       --anchors-object-file anchors.h5ad --anchors-format anndata
+   #else if $anchors.format == "loom"
+       --anchors-object-file anchors.loom --anchors-format loom
+   #else if $anchors.format == "rds_seurat"
+       --anchors-object-file '$anchors.rds_seurat_file' --anchors-format seurat
+   #else if $anchors.format == "rds_sce"
+       --anchors-object-file '$anchors.rds_sce_file' --anchors-format singlecellexperiment
+   #else if $anchors.format == "h5seurat"
+       --anchors-object-file anchors.h5seurat --anchors-format h5seurat
+   #end if
+ </token>
+
+ <token name="@REFERENCE_OBJ_PREAMBLE@">
+   #if $reference.format == 'loom'
+   ln -s '$reference.loom_file' reference.loom;
+   #else if $reference.format == 'h5seurat'
+   ln -s '$reference.h5seurat_file' reference.h5seurat;
+   #else if $reference.format == 'anndata'
+   ## it complains when using links for AnnData...
+   cp '$reference.anndata_file' reference.h5ad;
+   #end if
+ </token>
+
+ <token name="@REFERENCE_OBJECT@">
+  #if $reference.format == "anndata"
+      --reference-object-file reference.h5ad --reference-format anndata
+  #else if $reference.format == "loom"
+      --reference-object-file reference.loom --reference-format loom
+  #else if $reference.format == "rds_seurat"
+      --reference-object-file '$reference.rds_seurat_file' --reference-format seurat
+  #else if $reference.format == "rds_sce"
+      --reference-object-file '$reference.rds_sce_file' --reference-format singlecellexperiment
+  #else if $reference.format == "h5seurat"
+      --reference-object-file reference.h5seurat --reference-format h5seurat
+  #end if
+ </token>
+
+   <token name="@INPUT_OBJS_PREAMBLE@">
+     #if $input.format == 'loom'
+     #for $i, $fh in enumerate($input.loom_file):
+        ln -s '$fh' input.${i}.loom;
+     #end for
+     #else if $input.format == 'h5seurat'
+     #for $i, $fh in enumerate($input.h5seurat_file):
+        ln -s '$fh' input.${i}.h5seurat;
+     #end for
+     #else if $input.format == 'anndata'
+     ## it complains when using links for AnnData...
+     #for $i, $fh in enumerate($input.anndata_file):
+        cp '$fh' input.${i}.h5ad;
+     #end for
+     #end if
+   </token>
+
+   <token name="@INPUT_OBJECTS@">
+    #if $input.format == "anndata"
+        --input-object-files
+    #set file_array = [ "input."+str($i)+".h5ad" for $i, $fh in enumerate($input.anndata_file)]
+    #set files = ",".join($file_array)
+        ${files}
+         --input-format anndata
+    #else if $input.format == "loom"
+        --input-object-files
+    #set file_array = [ "input."+str($i)+".loom" for $i, $fh in enumerate($input.loom_file)]
+    #set files = ",".join($file_array)
+        ${files}
+         --input-format loom
+    #else if $input.format == "rds_seurat"
+        --input-object-files
+    #set file_array = $input.rds_seurat_file
+    #set files = ",".join([ str($fh) for $fh in $file_array ])
+        ${files}
+        --input-format seurat
+    #else if $input.format == "rds_sce"
+        --input-object-files
+    #set file_array = $input.rds_sce_file
+    #set files = ",".join([ str($fh) for $fh in $file_array ])
+        ${files}
+        --input-format singlecellexperiment
+    #else if $input.format == "h5seurat"
+        --input-object-files
+    #set file_array = [ "input."+str($i)+".h5seurat" for $i, $fh in enumerate($input.h5seurat)]
+    #set files = ",".join($file_array)
+        ${files}
+        --input-format h5seurat
+    #end if
+   </token>
+
+   <token name="@REFERENCE_OBJS_PREAMBLE@">
+     #if $reference.format == 'loom'
+     #for $i, $fh in enumerate($reference.loom_file):
+        ln -s '$fh' reference.${i}.loom;
+     #end for
+     #else if $reference.format == 'h5seurat'
+     #for $i, $fh in enumerate($reference.h5seurat_file):
+        ln -s '$fh' reference.${i}.h5seurat;
+     #end for
+     #else if $reference.format == 'anndata'
+     ## it complains when using links for AnnData...
+     #for $i, $fh in enumerate($reference.anndata_file):
+        cp '$fh' reference.${i}.h5ad;
+     #end for
+     #end if
+   </token>
+
+   <token name="@REFERENCE_OBJECTS@">
+    #if $reference.format == "anndata" and $reference.anndata_file:
+        --reference-object-files
+    #set file_array = [ "reference."+str($i)+".h5ad" for $i, $fh in enumerate($reference.anndata_file)]
+    #set files = ",".join($file_array)
+        ${files}
+         --reference-format anndata
+    #else if $reference.format == "loom" and $reference.loom_file:
+        --reference-object-files
+    #set file_array = [ "reference."+str($i)+".loom" for $i, $fh in enumerate($reference.loom_file)]
+    #set files = ",".join($file_array)
+        ${files}
+         --reference-format loom
+    #else if $reference.format == "rds_seurat" and $reference.rds_seurat_file:
+        --reference-object-files
+    #set files = ",".join([ str($fh) for $fh in $reference.rds_seurat_file ])
+        ${files}
+        --reference-format seurat
+    #else if $reference.format == "rds_sce" and $reference.rds_sce_file:
+        --reference-object-files
+    #set files = ",".join([ str($fh) for $fh in $reference.rds_sce_file ])
+        ${files}
+        --reference-format singlecellexperiment
+    #else if $reference.format == "h5seurat" and $reference.h5seurat:
+        --reference-object-files
+    #set file_array = [ "reference."+str($i)+".h5seurat" for $i, $fh in enumerate($reference.h5seurat)]
+    #set files = ",".join($file_array)
+        ${files}
+        --reference-format h5seurat
+    #end if
+   </token>
+
+   <xml name="output_object_params">
+     <param type="select" name="format" label="Choose the format of the output" help="Seurat, Single Cell Experiment, AnnData or Loom">
+       <option value="rds_seurat" selected="true">RDS with a Seurat object</option>
+       <option value="anndata">AnnData written by Seurat</option>
+       <option value="loom">Loom</option>
+       <option value="rds_sce">RDS with a Single Cell Experiment object</option>
+     </param>
+   </xml>
+
+   <xml name="output_files">
+    <data name="loom_file" from_work_dir="seurat_obj.loom" format="h5" label="${tool.name} on ${on_string}: Seurat Loom">
+      <filter>format == 'loom'</filter>
+    </data>
+    <data name="rds_seurat_file" format="rdata" label="${tool.name} on ${on_string}: Seurat RDS">
+      <filter>format == 'rds_seurat'</filter>
+    </data>
+    <data name="anndata_file" format="h5ad" label="${tool.name} on ${on_string}: AnnData from Seurat">
+        <filter>format == 'anndata'</filter>
+    </data>
+    <data name="rds_sce_file" format="rdata" label="${tool.name} on ${on_string}: Seurat Single Cell Experiment RDS">
+      <filter>format == 'rds_sce'</filter>
+    </data>
+   </xml>
+
+   <token name="@OUTPUT_OBJECT@">
+    #if $format == "anndata"
+        --output-object-file '$anndata_file' --output-format anndata
+    #else if $format == "loom"
+        --output-object-file seurat_obj.loom --output-format loom
+    #else if $format == "rds_seurat"
+        --output-object-file '$rds_seurat_file' --output-format seurat
+    #else if $format == "rds_sce"
+        --output-object-file '$rds_sce_file' --output-format singlecellexperiment
+    #end if
+   </token>
+
+   <xml name="plot_output_files_format" token_format="png">
+     <data label="Seurat ${plot_type.plot_type_selector} on ${on_string}: @FORMAT@ plot" name="plot_out_@FORMAT@" format='@FORMAT@' >
+       <filter>plot_format == '@FORMAT@'</filter>
+     </data>
+   </xml>
+
+   <token name="@OUTPUT_PLOT@">
+    #if $plot_format == "png"
+        --plot-out '$plot_out_png'
+    #else if $plot_format == "pdf"
+        --plot-out '$plot_out_pdf'
+    #else if $plot_format == "eps"
+        --plot-out '$plot_out_eps'
+    #else if $plot_format == "ps"
+        --plot-out '$plot_out_ps'
+    #else if $plot_format == "jpg"
+        --plot-out '$plot_out_jpg'
+    #else if $plot_format == "tiff"
+        --plot-out '$plot_out_tiff'
+    #else if $plot_format == "svg"
+        --plot-out '$plot_out_svg'
+    #end if
+   </token>
+
+    <xml name="genes-use-input">
+      <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv,txt,tabular" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/>
+    </xml>
+    <xml name="dims-use-input">
+      <param name="dims_use" argument="--dims-use" min="1" optional="true" type="integer" label="PCA Dimensions to use" help="Number of PCs (dimensions) to use in construction of the SNN graph."/>
+    </xml>
+
+    <token name="@SEURAT_INTRO@"><![CDATA[
+Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data.
+It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and
+interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse
+types of single cell data.
+      ]]></token>
+
+    <token name="@VERSION_HISTORY@"><![CDATA[
+**Version history**
+4.0.0: Moves to Seurat 4.0.0, introducing a number of methods for merging datasets, plus the whole suite of Seurat plots. Pablo Moreno with funding from AstraZeneca.
+
+3.2.3+galaxy0: Moves to Seurat 3.2.3 and introduce convert method, improving format interconversion support.
+
+3.1.2_0.0.8: Update metadata parsing
+
+3.1.1_0.0.7: Exposes perplexity and enables tab input.
+
+3.1.1_0.0.6+galaxy0: Moved to Seurat 3.
+
+  Find clusters: removed dims-use, k-param, prune-snn.
+
+2.3.1+galaxy0: Improved documentation and further exposition of all script's options. Pablo Moreno, Jonathan Manning and Ni Huang, Expression Atlas team https://www.ebi.ac.uk/gxa/home  at
+EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski (GitHub drosofff) and Lea Bellenger (GitHub bellenger-l).
+
+0.0.1: Initial contribution. Maria Doyle (GitHub mblue9).
+      ]]></token>
+
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41592-021-01102-w</citation>
+            <citation type="doi">10.1038/nbt.4096</citation>
+            <citation type="bibtex">
+              @misc{r-seurat-scripts.git,
+              author = {Jonathan Manning, Pablo Moreno, EBI Gene Expression Team},
+              year = {2018},
+              title = {Seurat-scripts: command line interface for Seurat},
+              publisher = {GitHub},
+              journal = {GitHub repository},
+              url = {https://github.com/ebi-gene-expression-group/r-seurat-scripts.git},
+            }
+            </citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seurat_select_integration_features.xml	Sat Mar 02 10:40:57 2024 +0000
@@ -0,0 +1,87 @@
+<tool id="seurat_select_integration_features" name="Seurat select integration features" profile="18.01" version="@SEURAT_VERSION@+galaxy0">
+    <description>from a list of studies</description>
+    <macros>
+        <import>seurat_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version" />
+    <command detect_errors="exit_code"><![CDATA[
+    @INPUT_OBJS_PREAMBLE@
+    seurat-select-integration-features.R
+    @INPUT_OBJECTS@
+
+    --nfeatures '$nfeatures'
+
+
+    #if $assay_list
+    --assay-list '$assay_list'
+    #end if
+
+    $verbose
+
+
+    #if $fvf_nfeatures
+    --fvf-nfeatures '$fvf_nfeatures'
+    #end if
+
+
+    --file-out '$file_out'
+
+
+    ]]></command>
+    <inputs>
+        <expand macro="input_object_params" multiple="True"  />
+        <param label="Number of features" optional='true' value='2000' name="nfeatures" argument="--nfeatures" type="integer"   help="Number of features to return"/>
+        <param label="Assay-list" optional='true' name="assay_list" argument="--assay-list" type="text"   help="Name or vector of assay names (one for each object) from which to pull the variable features."/>
+        <param label="Verbose" optional='true' value='true' name="verbose" argument="--do-not-verbose" type="boolean"  truevalue='' falsevalue='--do-not-verbose' checked='true' help="Print messages"/>
+        <param label="Fvf nfeatures" optional='true' value='2000' name="fvf_nfeatures" argument="--fvf-nfeatures" type="integer"   help="nfeatures for FindVariableFeatures. Used if VariableFeatures have not been set for any object in input."/>
+
+    </inputs>
+    <outputs>
+        <data label="${tool.name} on ${on_string}: Rdata file with features" name="file_out" format='rdata' />
+    </outputs>
+    <tests>
+      <!-- MANUAL TESTS -->
+      <test>
+        <param name="rds_seurat_file" ftype="rdata" value="ifnb_ctrl_norm_fvg.rds,ifnb_stim_norm_fvg.rds"/>
+        <output name="file_out" ftype="rdata" >
+          <assert_contents>
+            <has_size value="9506" delta="950"/>
+          </assert_contents>
+        </output>
+      </test>
+      <!-- END MANUAL TESTS -->
+    </tests>
+<help>
+<!-- MANUAL HELP -->
+<![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@SEURAT_INTRO@
+
+This tool aims to facilitate the first steps of the Seurat 4.0.4 https://satijalab.org/seurat/articles/integration_introduction.html
+tutorial, to select features for integration of different datasets. This features can be used then with the integration tool.
+
+All options are documented in-line.
+
+-----
+
+**Inputs**
+
+* A set of Seurat objects (can be given in other formats as well) to integrate. These objects should be at least normalised and have the find variable genes/features method applied.
+* All other inputs are optional (see above).
+
+-----
+
+**Outputs**
+
+* A Seurat (or other format depending on selection) with the integrated object.
+
+@VERSION_HISTORY@
+]]>
+<!-- END MANUAL HELP -->
+</help>
+    <expand macro="citations" />
+</tool>