Mercurial > repos > iuc > seurat_preprocessing
changeset 0:c3170652bd98 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat_v5 commit a9214c07b0cc929a51fd92a369bb89c675b6c88d
author | iuc |
---|---|
date | Wed, 11 Sep 2024 10:20:38 +0000 |
parents | |
children | 6bccf5f85f92 |
files | macros.xml normalize_select_features_scale.xml |
diffstat | 2 files changed, 903 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Sep 11 10:20:38 2024 +0000 @@ -0,0 +1,437 @@ +<macros> + <token name="@TOOL_VERSION@">5.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">23.0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">r-seurat</requirement> + <requirement type="package" version="1.2.1">fit-sne</requirement> + <requirement type="package" version="3.58.1">bioconductor-limma</requirement> + <requirement type="package" version="1.28.0">bioconductor-mast</requirement> + <requirement type="package" version="1.42.0">bioconductor-deseq2</requirement> + <requirement type="package" version="2.1.3">r-svglite</requirement> + <requirement type="package" version="1.1">r-metap</requirement> + <requirement type="package" version="1.14.0">bioconductor-glmGamPoi</requirement> + <requirement type="package" version="0.5.3">umap-learn</requirement> <!-- https://github.com/satijalab/seurat/issues/8283 --> + <requirement type="package" version="0.10.2">leidenalg</requirement> + <requirement type="package" version="1.2.0">r-harmony</requirement> + <requirement type="package" version="1.18.0">bioconductor-batchelor</requirement> + <requirement type="package" version="2.0.0">numpy</requirement> + <requirement type="package" version="2.2.2">pandas</requirement> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1038/s41587-023-01767-y</citation> + </citations> + </xml> + <xml name="sanitize_query" token_validinitial="string.printable"> + <sanitizer> + <valid initial="@VALIDINITIAL@"> + <remove value="'" /> + </valid> + </sanitizer> + </xml> + <xml name="sanitize_vectors" token_validinitial="string.digits"> + <sanitizer> + <valid initial="@VALIDINITIAL@"> + <add value=","/> + </valid> + </sanitizer> + </xml> + <xml name="version_command"> + <version_command><![CDATA[ +echo $(R --version | grep version | grep -v GNU)", Seurat version" $(R --vanilla --slave -e "library(Seurat); cat(sessionInfo()\$otherPkgs\$DESeq2\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]></version_command> + </xml> + + <token name="@CMD_imports@"><![CDATA[ +library(Seurat) + ]]> + </token> + <token name="@reticulate_hack@"><![CDATA[ +library(reticulate) +## HACK: CI biocontainers do not contain a useable conda binary, just the env. +## see: https://github.com/galaxyproject/tools-iuc/issues/5585#issuecomment-1803773923 +is_biocontainer = grepl("^# cmd: /opt/conda/bin/", + paste0(reticulate:::python_info_condaenv_find("/usr/local/"), + "-none")) +if (is_biocontainer) { + ## conda detection false positive + assignInNamespace("is_conda_python", function(x) FALSE, ns="reticulate") + use_python("/usr/local/bin/python") +} else { + conda_path = Sys.getenv("CONDA_PREFIX") + if (conda_path != "") { + ## Active conda env found + use_python(file.path(conda_path, "bin", "python3")) + } else { + ## Not biocontainer or conda, assume system python + use_python("/usr/bin/python3") + } +}]]> + </token> + <xml name="input_rds"> + <param name="seurat_rds" type="data" format="rds" label="Input file with the Seurat object"/> + </xml> + <token name="@CMD_read_inputs@"><![CDATA[ +seurat_obj = readRDS('seurat.rds') + ]]> + </token> + <token name="@CMD_read_expression_matrix@"><![CDATA[ +counts<-read.table("matrix.tab", header=TRUE, row.names=1, sep="\t") + ]]>] + </token> + <token name="@CMD@"><![CDATA[ +cp '$seurat_rds' seurat.rds && +cat '$script_file' > $hidden_output && +Rscript '$script_file' >> $hidden_output + ]]> + </token> + <xml name="inputs_common_advanced"> + <section name="advanced_common" title="Advanced Output" expanded="false"> + <param name="show_log" type="boolean" checked="false" label="Output Log?" /> + </section> + </xml> + <xml name="outputs_common_advanced"> + <data name="hidden_output" format="txt" label="Log file" > + <filter>advanced_common['show_log']</filter> + </data> + </xml> + <xml name="seurat_outputs"> + <data name="rds_out" format="rds" from_work_dir="seurat.rds" label="${tool.name} (${method.method}) on ${on_string}: RDS"> + <filter>method['method'] != 'Inspect'</filter> + </data> + <expand macro="outputs_common_advanced"/> + </xml> + <token name="@CMD_rds_write_outputs@"><![CDATA[ +saveRDS(seurat_obj, 'seurat.rds') + ]]> + </token> + <xml name="variable_out"> + <data name="variable_tabular" format="txt" from_work_dir="variable_out.txt" label="${tool.name} (${method.method}) on ${on_string}: Top variable features list"> + <filter>method['method'] == 'FindVariableFeatures' or method['method'] == 'SCTransform'</filter> + <filter>method['output_topN']['output_topN'] == 'true'</filter> + </data> + </xml> + <token name="@CMD_write_variable_tab@"><![CDATA[ +write.table(top_N, 'variable_out.txt', sep= "\t", col.names = FALSE, quote = FALSE) + ]]> + </token> + <xml name="markers_out"> + <data name="markers_tabular" format="csv" from_work_dir="markers_out.csv" label="${tool.name} (${method.method}) on ${on_string}: Markers list"> + <filter>method['method'] == 'FindAllMarkers' or method['method'] == 'FindMarkers' or method['method'] == 'FindConservedMarkers'</filter> + </data> + </xml> + <token name="@CMD_write_markers_tab@"><![CDATA[ +write.csv(seurat_obj, 'markers_out.csv', quote = FALSE) + ]]> + </token> + <xml name="print_top_pcs"> + <data name="top_pcs" format="txt" from_work_dir="print_pcs.txt" label="${tool.name} Print PCs on ${on_string}"> + <filter>method['method'] == 'RunPCA' and method['print_pcs']['print_pcs'] == 'true'</filter> + </data> + </xml> + <xml name="inspect_out"> + <data name="inspect_tabular" format="tabular" from_work_dir="inspect_out.tab" label="${tool.name} Inspect (${method.inspect.inspect}) on ${on_string}"> + <filter>method['method'] == 'Inspect' and method['inspect']['inspect'] != 'General'</filter> + </data> + <data name="inspect_general" format="txt" from_work_dir="inspect.txt" label="${tool.name} Inspect General on ${on_string}"> + <filter>method['method'] == 'Inspect' and method['inspect']['inspect'] == 'General'</filter> + </data> + </xml> + <token name="@CMD_inspect_rds_outputs@"><![CDATA[ +write.table(inspect, 'inspect_out.tab', sep="\t", col.names = col.names, row.names = row.names, quote = FALSE) + ]]> + </token> + <xml name="plot_out"> + <data name="plot_out_png" format="png" from_work_dir="plot.png" label="${tool.name} (${method.method}) on ${on_string}: png plot"> + <filter>plot_format == 'png'</filter> + </data> + <data name="plot_out_pdf" format="pdf" from_work_dir="plot.pdf" label="${tool.name} (${method.method}) on ${on_string}: pdf plot"> + <filter>plot_format == 'pdf'</filter> + </data> + <data name="plot_out_svg" format="svg" from_work_dir="plot.svg" label="${tool.name} (${method.method}) on ${on_string}: svg plot"> + <filter>plot_format == 'svg'</filter> + </data> + <data name="plot_out_jpeg" format="jpeg" from_work_dir="plot.jpeg" label="${tool.name} (${method.method}) on ${on_string}: jpeg plot"> + <filter>plot_format == 'jpeg'</filter> + </data> + <data name="plot_out_tex" format="tex" from_work_dir="plot.tex" label="${tool.name} (${method.method}) on ${on_string}: tex plot"> + <filter>plot_format == 'tex'</filter> + </data> + <data name="plot_out_tiff" format="tiff" from_work_dir="plot.tiff" label="${tool.name} (${method.method}) on ${on_string}: tiff plot"> + <filter>plot_format == 'tiff'</filter> + </data> + <data name="plot_out_eps" format="eps" from_work_dir="plot.eps" label="${tool.name} (${method.method}) on ${on_string}: eps plot"> + <filter>plot_format == 'eps'</filter> + </data> + </xml> + <xml name="param_eps" tokens="eps_value"> + <param argument="eps" type="float" value="@EPS_VALUE@" label="Small number to avoid numerical errors"/> + </xml> + <xml name="valid_name"> + <validator type="regex" message="Please only use letters, numbers, or _ - .">^[\w\-.]+$</validator> + </xml> + <xml name="valid_reduction_key"> + <validator type="regex" message="Please only use letters and _">^[A-Za-z_]+$</validator> + </xml> + <xml name="valid_list"> + <validator type="regex" message="Please only use letters, numbers, or _ - . ,">^[\w\-., ]+$</validator> + </xml> + <xml name="valid_cell_name"> + <validator type="regex" message="Please only use letters, numbers, or punctuation marks">^[\w[:punct:]]+$</validator> + </xml> + <xml name="valid_cell_list"> + <validator type="regex" message="Please only use letters, numbers, or punctuation marks">^[\w[:punct:]]+$</validator> + </xml> + <xml name="select_assay"> + <param argument="assay" type="text" optional="true" value="" label="Name of assay to use" help="leave blank to use default assay"> + <expand macro="valid_name"/> + </param> + </xml> + <xml name="select_assay_RNA"> + <param argument="assay" type="text" value="RNA" label="Name of assay to use"> + <expand macro="valid_name"/> + </param> + </xml> + <xml name="select_slot_data"> + <param argument="slot" type="select" label="Slot to pull data from"> + <option value="counts">counts</option> + <option value="data" selected="true">data</option> + <option value="scale.data">scale.data</option> + <option value="raw.data">raw.data</option> + </param> + </xml> + <xml name="select_slot_scale"> + <param argument="slot" type="select" label="Slot to pull data from"> + <option value="counts">counts</option> + <option value="data">data</option> + <option value="scale.data" selected="true">scale.data</option> + <option value="raw.data">raw.data</option> + </param> + </xml> + <xml name="select_slot_counts"> + <param argument="slot" type="select" label="Slot to pull data from"> + <option value="counts" selected="true">counts</option> + <option value="data">data</option> + <option value="scale.data">scale.data</option> + <option value="raw.data">raw.data</option> + </param> + </xml> + <xml name="select_layer"> + <param argument="layer" type="text" optional="true" value="" label="Layer to pull data from" help="leave blank to use default"> + <expand macro="valid_name"/> + </param> + </xml> + <xml name="select_reduction_pca"> + <param argument="reduction" type="text" value="pca" label="Name of reduction to use" help="default is pca"> + <expand macro="valid_name"/> + </param> + </xml> + <xml name="select_reduction_umap"> + <param argument="reduction" type="text" value="umap" label="Name of reduction to use" help="first defaults to umap, then tsne, then pca"> + <expand macro="valid_name"/> + </param> + </xml> + <xml name="set_topN"> + <param name="topN" type="integer" value="10" label="Number to show"/> + </xml> + <xml name="set_dims"> + <param argument="dims" type="integer" optional="true" value="10" label="Number of dimensions from reduction to use as input"/> + </xml> + <xml name="normalize"> + <conditional name="normalization_method"> + <param name="normalization_method" type="select" label="Method for normalization" help="(normalization.method)"> + <option value="LogNormalize" selected="true">LogNormalize</option> + <option value="CLR">CLR</option> + <option value="RC">RC</option> + </param> + <when value="LogNormalize"></when> + <when value="CLR"> + <param argument="margin" type="select" checked="true" label="Normalize across features (1) or cells (2)"> + <option value="1" selected="true">features</option> + <option value="2">cells</option> + </param> + </when> + <when value="RC"></when> + </conditional> + <param name="scale_factor" type="integer" value="10000" label="Set scale factor for normalization" help="(scale.factor)"/> + <param name="block_size" type="integer" optional="true" value="" label="Number of cells to run in each block" help="(block.size)"/> + </xml> + <xml name="integration_inputs"> + <param argument="dims" type="integer" value="30" label="Number of dimensions from reduction to use for integration"/> + <param name="dims_to_integrate" type="integer" optional="true" value="" label="Number of dimensions to return integrated values for" help="(dims.to.integrate)"/> + <param name="k_weight" type="integer" value="100" label="Number of neighbors to consider when weighting anchors" help="(k.weight)"/> + <param name="weight_reduction" type="text" optional="true" value="" label="Name of reduction(s) to use for calculating anchor weights" help="leave blank to use full corrected space (weight.reduction)"> + <expand macro="valid_list"/> + </param> + <param name="sd_weight" type="float" value="1" label="Controls bandwidth of Gaussian kernel for weighting"/> + <param name="preserve_order" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Preserve order" help="do not reorder objects based on size for each pairwise integration (preserve.order)"/> + </xml> + <xml name="markers_inputs"> + <param argument="features" type="data" format="txt,tabular" optional="true" value="" label="Features to test" help="text file with one feature on each line, leave empty to use all genes"/> + <param name="logfc_threshold" type="float" value="0.1" label="Minimum log-fold difference to test" help="(logfc.threshold)"/> + <conditional name="test_use"> + <param name="test_use" type="select" label="Select test to run" help="(test.use)"> + <option value="wilcox" selected="true">wilcox</option> + <option value="wilcox_limma">wilcox_limma</option> + <option value="bimod">bimod</option> + <option value="roc">roc</option> + <option value="t">t</option> + <option value="negbinom">negbinom</option> + <option value="poisson">poisson</option> + <option value="LR">LR</option> + <option value="MAST">MAST</option> + <option value="DESeq2">DESeq2</option> + </param> + <when value="wilcox"> + <expand macro="select_slot_data"/> + </when> + <when value="wilcox_limma"> + <expand macro="select_slot_data"/> + </when> + <when value="bimod"> + <expand macro="select_slot_data"/> + </when> + <when value="roc"> + <expand macro="select_slot_data"/> + <param name="return_thresh" type="float" value="0.01" min="0.0" max="1.0" label="Only return markers with a p-value below or power above this threshold" help="(return.thresh)"/> + </when> + <when value="t"> + <expand macro="select_slot_data"/> + </when> + <when value="negbinom"> + <expand macro="select_slot_counts"/> + <param name="latent_vars" type="text" optional="true" value="" label="Select variables to test" help="(latent.vars)"/> + <param name="min_cells_feature" type="integer" value="3" label="Minimum number of cells expressing the feature in at least one cluster" help="(min.cells.feature)"/> + </when> + <when value="poisson"> + <expand macro="select_slot_counts"/> + <param name="latent_vars" type="text" optional="true" value="" label="Select variables to test" help="(latent.vars)"/> + <param name="min_cells_feature" type="integer" value="3" label="Minimum number of cells expressing the feature in at least one cluster" help="(min.cells.feature)"/> + </when> + <when value="LR"> + <expand macro="select_slot_data"/> + <param name="latent_vars" type="text" optional="true" value="" label="Select variables to test" help="(latent.vars)"/> + </when> + <when value="MAST"> + <expand macro="select_slot_data"/> + <param name="latent_vars" type="text" optional="true" value="" label="Select variables to test" help="(latent.vars)"/> + </when> + <when value="DESeq2"> + <expand macro="select_slot_counts"/> + </when> + </conditional> + </xml> + <xml name="advanced_markers_inputs"> + <expand macro="select_assay"/> + <param name="fc_name" type="text" optional="true" value="" label="Choose a name for the fold change, average difference, or custom function column" help="(fc.name)"> + <expand macro="valid_name"/> + </param> + <param name="min_pct" type="float" value="0.01" min="0" max="100" label="Minimum percentage of cells genes must be present in to be tested" help="(min.pct)"/> + <param name="min_diff_pct" type="float" optional="true" value="" label="Minimum difference in percentage of expression between groups for genes to be tested" help="defaults to -Inf (min.diff.pct)"/> + <param name="only_pos" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Only return positive markers" help="(only.pos)"/> + <param name="max_cells_per_ident" type="integer" optional="true" value="" label="Downsample each identity class to a max number of cells" help="defaults to Inf for no downsampling (max.cells.per.ident)"/> + <param name="random_seed" type="integer" optional = "true" value="1" label="Set a random seed for downsampling" help="(random.seed)"/> + <param name="min_cells_group" type="integer" value="3" label="Minimum number of cells in one group" help="(min.cells.group)"/> + <param argument="densify" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Convert to dense matrix before running DE test"/> + </xml> + <xml name="plot_types"> + <param name="plot_format" type="select" label="Format of plot to produce"> + <option value="png">png</option> + <option value="pdf">pdf</option> + <option value="svg">svg</option> + <option value="jpeg">jpeg</option> + <option value="tex">tex</option> + <option value="tiff">tiff</option> + <option value="eps">eps</option> + </param> + </xml> + <xml name="plot_sizes"> + <conditional name="resize"> + <param name="resize" type="select" label="Change size of plot"> + <option value="false" selected="true">No</option> + <option value="true">Yes</option> + </param> + <when value="false"></when> + <when value="true"> + <param argument="width" type="integer" value="2100" label="Width of plot in pixels"/> + <param argument="height" type="integer" value="2100" label="Height of plot in pixels"/> + </when> + </conditional> + </xml> + <xml name="plot_cols"> + <param argument="cols" type="text" optional="true" value="" label="Colours to use for plotting" help="comma separated list"> + <expand macro="valid_list"/> + </param> + </xml> + <xml name="plot_log_scale"> + <param argument="log" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Plot on a log scale"/> + </xml> + <xml name="plot_2_dims"> + <param name="dims_1" type="integer" value="1" label="Dimension to plot on x axis"/> + <param name="dims_2" type="integer" value="2" label="Dimension to plot on y axis"/> + </xml> + <xml name="plot_projected_and_balanced"> + <param argument="projected" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Use reduction values for full dataset" help="i.e. projected dimensional reduction values"/> + <param argument="balanced" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Return an equal number of genes with + and - scores"/> + </xml> + <xml name="plot_disp_min_max"> + <param name="disp_min" type="float" optional="true" value="-2.5" label="Minimum display value" help="all values below are clipped (disp.min)"/> + <param name="disp_max" type="float" optional="true" value="" label="Maximum display value" help="all values above are clipped. Defaults to 2.5 if slot is scale.data, otherwise defaults to 6 (disp.max)"/> + </xml> + <xml name="plot_shuffle_and_seed"> + <conditional name="shuffle"> + <param argument="shuffle" type="select" label="Randomly shuffle order of points" help="can help with crowded plots if points of interest are hidden"> + <option value="TRUE">Yes</option> + <option value="FALSE" selected="true">No</option> + </param> + <when value="TRUE"> + <param argument="seed" type="integer" value="1" label="Set random seed for shuffling"/> + </when> + <when value="FALSE"></when> + </conditional> + </xml> + <xml name="plot_order"> + <param argument="order" type="text" optional="true" value="" label="Specify the order of plotting for the idents" help="a full comma-separated list or the ident to be plotted last on the top"> + <expand macro="valid_list"/> + </param> + </xml> + <xml name="plot_group_by"> + <param name="group_by" type="text" optional="true" value="" label="Factor to group cells by" help="(group.by)"/> + </xml> + <xml name="plot_split_by"> + <param name="split_by" type="text" optional="true" value="" label="Factor or identity to split the plot by" help="(split.by)"/> + </xml> + <xml name="plot_alpha"> + <param argument="alpha" type="integer" value="1" label="Alpha value for points"/> + </xml> + <xml name="plot_pt_size"> + <param name="pt_size" type="float" optional="true" value="" label="Point size for plot" help="(pt.size)"/> + </xml> + <xml name="plot_smooth"> + <param argument="smooth" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Smooth the graph"/> + </xml> + <xml name="plot_ncol"> + <param argument="ncol" type="integer" optional="true" value="" label="Number of columns to display"/> + </xml> + <xml name="raster_select"> + <conditional name="raster"> + <param argument="raster" type="select" label="Convert points to raster format" help="NULL will automatically use raster if more than 100,000 points plotted"> + <option value="NULL" selected="true">NULL</option> + <option value="TRUE">TRUE</option> + <option value="FALSE">FALSE</option> + </param> + <when value="NULL"></when> + <when value="TRUE"> + <param name="raster_x" type="integer" value="512" label="Horizontal length of raster plot (pixels)"/> + <param name="raster_y" type="integer" value="512" label="Vertical height of raster plot (pixels)"/> + </when> + <when value="FALSE"></when> + </conditional> + </xml> + <xml name="raster_boolean"> + <param argument="raster" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Convert to raster format"/> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/normalize_select_features_scale.xml Wed Sep 11 10:20:38 2024 +0000 @@ -0,0 +1,466 @@ +<tool id="seurat_preprocessing" name="Seurat Preprocessing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>- Normalize, Find Variable Features, Scale and Regress</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ +@CMD@ + ]]></command> + <configfiles> + <configfile name="script_file"><![CDATA[ +@CMD_imports@ +@CMD_read_inputs@ + +#if $method.method == 'NormalizeData' +seurat_obj<-NormalizeData( + seurat_obj, + #if $method.assay != '' + assay = '$method.assay', + #end if + normalization.method = '$method.normalization_method.normalization_method', + #if $method.normalization_method.normalization_method == 'CLR' + margin = $method.normalization_method.margin, + #end if + scale.factor = $method.scale_factor, + #if $method.block_size + block.size = $method.block_size + #end if +) + +#else if $method.method == 'FindVariableFeatures' +seurat_obj<-FindVariableFeatures( + seurat_obj, + #if $method.assay != '' + assay = '$method.assay', + #end if + selection.method = '$method.selection_method.selection_method', + #if $method.selection_method.selection_method == 'vst' + loess.span = $method.selection_method.loess_span, + #if $method.selection_method.clip_max + clip.max = $method.selection_method.clip_max, + #end if + #if $method.selection_method.nfeatures + nfeatures = $method.selection_method.nfeatures, + #end if + #else if $method.selection_method.selection_method == 'dispersion' + #if $method.selection_method.nfeatures + nfeatures = $method.selection_method.nfeatures, + #end if + #end if + num.bin = $method.num_bin, + binning.method = '$method.binning_method' +) + + #if $method.output_topN.output_topN == 'true' + N = $method.output_topN.topN + top_N<-head(VariableFeatures(seurat_obj), N) + @CMD_write_variable_tab@ + #end if + +#else if $method.method == 'ScaleData' + + #if $method.scale_features.scale_features == 'list_genes' + features_list<-paste(readLines('$method.scale_features.features_to_scale'), collapse=",") + #else if $method.scale_features.scale_features == 'all_genes' + all.genes<-rownames(seurat_obj) + #end if + +seurat_obj<-ScaleData( + seurat_obj, + #if $method.scale_features.scale_features == 'all_genes' + features = all.genes, + #else if $method.scale_features.scale_features == 'list_genes' + features = c(unlist(strsplit(features_list, ","))), + #end if + #if $method.assay != '' + assay = '$method.assay', + #end if + #if $method.regress.regress == 'true' + vars.to.regress = c(unlist(strsplit(gsub(" ", "", '$method.regress.vars_to_regress'), ","))), + model.use = '$method.regress.model_use', + use.umi = $method.regress.use_umi, + #end if + #if $method.split_by != '' + split.by = '$method.split_by', + #end if + do.scale = $method.do_scale, + do.center = $method.do_center, + #if $method.do_scale == 'true' + scale.max = $method.scale_max, + block.size = $method.block_size, + min.cells.to.block = $method.min_cells_to_block + #end if +) + +#else if $method.method == 'SCTransform' + + #if $method.residual_features.residual_features_options == 'selected_features' + features_list<-paste(readLines('$method.residual_features.residual_features'), collapse=",") + #end if + +seurat_obj<-SCTransform( + seurat_obj, + #if $method.assay != '' + assay = '$method.assay', + #end if + new.assay.name = '$method.new_assay_name', + #if $method.residual_features.residual_features_options == 'NULL' + #if $method.residual_features.variable_features.variable_features == 'set_number' + variable.features.n = $method.residual_features.variable_features.variable_features_n, + #else if $method.residual_features.variable_features.variable_features == 'use_cutoff' + variable.features.rv.th = $method.residual_features.variable_features.variable_features_rv_th, + #end if + #else if $method.residual_features.residual_features_options == 'selected_features' + residual.features = c(unlist(strsplit(features_list, ","))), + #end if + #if $method.vars_to_regress != '' + vars.to.regress = c(unlist(strsplit(gsub(" ", "", '$method.vars_to_regress'), ","))), + #end if + do.scale = $method.do_scale, + do.center = $method.do_center, + #if $method.min_clip_range and $method.max_clip_range + clip.range = c($method.min_clip_range, $method.max_clip_range), + #end if + do.correct.umi = $method.adv.do_correct_umi, + ncells = $method.adv.ncells, + seed.use = $method.adv.seed_use, + vst.flavor = '$method.adv.vst_flavor', + conserve.memory = $method.adv.conserve_memory.conserve_memory, + #if $method.adv.conserve_memory.conserve_memory == 'FALSE' + return.only.var.genes = $method.adv.conserve_memory.return_only_var_genes + #end if +) + + #if $method.output_topN.output_topN == 'true' + N = $method.output_topN.topN + top_N<-head(VariableFeatures(seurat_obj), N) + @CMD_write_variable_tab@ + #end if + +#end if + +@CMD_rds_write_outputs@ + +]]></configfile> + </configfiles> + <inputs> + <expand macro="input_rds"/> + <conditional name="method"> + <param name="method" type="select" label="Method used"> + <option value="NormalizeData">Normalize with 'NormalizeData'</option> + <option value="FindVariableFeatures">Identify highly variable genes with 'FindVariableFeatures'</option> + <option value="ScaleData">Scale and regress with 'ScaleData'</option> + <option value="SCTransform">Complete all preprocessing with 'SCTransform'</option> + </param> + <when value="NormalizeData"> + <expand macro="select_assay"/> + <expand macro="normalize"/> + </when> + <when value="FindVariableFeatures"> + <expand macro="select_assay"/> + <conditional name="selection_method"> + <param name="selection_method" type="select" label="Method to select variable features" help="(selection.method)"> + <option value="vst" selected="true">vst</option> + <option value="mean.var.plot">mean.var.plot</option> + <option value="dispersion">dispersion</option> + </param> + <when value="vst"> + <param name="loess_span" type="float" value="0.3" label="Loess span parameter for fitting variance-mean relationship" help="(loess.span)"/> + <param name="clip_max" type="float" optional="true" value="" label="Maximum value after standardisation" help="leave blank to use default, the square root of number of cells (clip.max)"/> + <param argument="nfeatures" type="integer" optional="true" value="2000" label="Number of features to select as top variable features"/> + </when> + <when value="mean.var.plot"></when> + <when value="dispersion"> + <param argument="nfeatures" type="integer" optional="true" value="2000" label="Number of features to select as top variable features"/> + </when> + </conditional> + <param name="num_bin" type="integer" value="20" label="Number of bins to use" help="(num.bin)"/> + <param name="binning_method" type="select" label="Method to compute bins" help="(binning.method)"> + <option value="equal_width" selected="true">equal width</option> + <option value="equal_frequency">equal frequency</option> + </param> + <conditional name="output_topN"> + <param name="output_topN" type="select" label="Output list of most variable features"> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <expand macro="set_topN"/> + </when> + <when value="false"> + </when> + </conditional> + </when> + <when value="ScaleData"> + <expand macro="select_assay"/> + <conditional name="regress"> + <param name="regress" type="select" label="Regress out a variable"> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <param name="vars_to_regress" type="text" optional="true" value="" label="Variable(s) to regress out" help="comma-separated list e.g. percent.mt, nCount_RNA (vars.to.regress)"> + <expand macro="valid_list"/> + </param> + <param name="model_use" type="select" optional="true" label="Model to use for regression" help="(model.use)"> + <option value="linear" selected="true">linear</option> + <option value="poisson">poisson</option> + <option value="negbinom">negbinom</option> + </param> + <param name="use_umi" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Regress on UMI count data" help="recommended if regression model is not linear (use.umi)"/> + </when> + <when value="false"> + </when> + </conditional> + <conditional name="scale_features"> + <param name="scale_features" type="select" label="Features to scale"> + <option value="variable_features" selected="true">Variable Features</option> + <option value="all_genes">All Features</option> + <option value="list_genes">Enter a list of features</option> + </param> + <when value="variable_features"> + </when> + <when value="all_genes"> + </when> + <when value="list_genes"> + <param name="features_to_scale" type="data" format="txt,tabular" label="List of features to scale" help="text file with one feature on each line"/> + </when> + </conditional> + <param name="split_by" type="text" optional="true" value="" label="Scale cells in groups by a variable, vector or factor" help="(split.by)"/> + <param name="do_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Scale the data" help="(do.scale)"/> + <param name="do_center" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Center the data" help="(do.center)"/> + <param name="scale_max" type="float" value="10" label="Set max value for scaled data" help="(scale.max)"/> + <param name="block_size" type="integer" value="1000" label="Number of features to scale in a single computation" help="(block.size)"/> + <param name="min_cells_to_block" type="integer" value="3000" label="Use blocks for scaling if object has more than this number of cells" help="(min.cells.to.block)"/> + </when> + <when value="SCTransform"> + <expand macro="select_assay_RNA"/> + <param name="new_assay_name" type="text" value="SCT" label="Name for new assay" help="to contain the normalized data (new.assay.name)"> + <expand macro="valid_name"/> + </param> + <conditional name="residual_features"> + <param name="residual_features_options" type="select" label="Genes to calculate residual features for" help="(residual.features)"> + <option value="NULL" selected="true">all genes</option> + <option value="selected_features">selected features</option> + </param> + <when value="NULL"> + <conditional name="variable_features"> + <param name="variable_features" type="select" label="How to set variable features"> + <option value="set_number" selected="true">set number of variable features</option> + <option value="use_cutoff">set cutoff for residual variance</option> + </param> + <when value="set_number"> + <param name="variable_features_n" type="integer" value="3000" label="Use this many features as variable features" help="after ranking residual variance for all genes (variable.features.n)"/> + </when> + <when value="use_cutoff"> + <param name="variable_features_rv_th" type="float" value="1.3" label="Use this residual variance cutoff" help="after calculating residual variance for all genes (variable.features.rv.th)"/> + </when> + </conditional> + </when> + <when value="selected_features"> + <param name="residual_features" type="data" format="txt,tabular" label="List of genes to use" help="text file with one feature on each line. These genes will be set as VariableFeatures in returned object (residual.features)"/> + </when> + </conditional> + <conditional name="output_topN"> + <param name="output_topN" type="select" label="Output list of most variable features"> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <expand macro="set_topN"/> + </when> + <when value="false"> + </when> + </conditional> + <param name="vars_to_regress" type="text" optional="true" value="" label="Variable(s) to regress out" help="comma-separated list e.g. percent.mt, nCount_RNA (vars.to.regress)"> + <expand macro="valid_list"/> + </param> + <param name="do_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Scale the residuals" help="(do.scale)"/> + <param name="do_center" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Center the residuals" help="(do.center)"/> + <param name="min_clip_range" type="float" optional="true" value="" label="Minimum for residual variances" help="clip values below this or leave empty to use default of -sqrt(n/30) (clip.range)"/> + <param name="max_clip_range" type="float" optional="true" value="" label="Maximum for residual variances" help="clip values above this or leave empty to use default of sqrt(n/30) (clip.range)"/> + <section name="adv" title="Advanced Options"> + <param name="do_correct_umi" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Place corrected UMI matrix in assay counts slot" help="(do.correct.umi)"/> + <param argument="ncells" type="integer" value="5000" label="Number of subsampling cells used to build NB regression"/> + <param name="seed_use" type="integer" value="1448145" label="Set random seed" help="--seed.use"/> + <param name="vst_flavor" type="select" label="Vst Flavor" help="version of sctransform to use(vst.flavor)"> + <option value="v1">vs1</option> + <option value="v2" selected="true">vs2</option> + </param> + <conditional name="conserve_memory"> + <param name="conserve_memory" type="select" label="Conserve memory" help="by not creating residual matrix for all genes (conserve.memory)"> + <option value="FALSE" selected="true">No</option> + <option value="TRUE">Yes</option> + </param> + <when value="FALSE"> + <param name="return_only_var_genes" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Only return variable genes in scale.data matrices" help="(return.only.var.genes)"/> + </when> + <when value="TRUE"></when> + </conditional> + </section> + </when> + </conditional> + <expand macro="inputs_common_advanced"/> + </inputs> + <outputs> + <expand macro="seurat_outputs"/> + <expand macro="variable_out"/> + </outputs> + <tests> + <test expect_num_outputs="2"> + <!-- test1: NormalizeData --> + <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/filtered.rds"/> + <conditional name="method"> + <param name="method" value="NormalizeData"/> + <conditional name="normalization_method"> + <param name="normalization_method" value="LogNormalize"/> + </conditional> + <param name="scale_factor" value="10000"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="NormalizeData"/> + </assert_contents> + </output> + <output name="rds_out" location="https://zenodo.org/records/13732784/files/normalized.rds" ftype="rds" compare="sim_size"/> + </test> + <test expect_num_outputs="3"> + <!-- test2: FindVariableFeatures --> + <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/normalized.rds"/> + <conditional name="method"> + <param name="method" value="FindVariableFeatures"/> + <conditional name="selection_method"> + <param name="selection_method" value="vst"/> + <param name="nfeatures" value="100"/> + </conditional> + <param name="num_bin" value="20"/> + <param name="binning_method" value="equal_width"/> + <conditional name="output_topN"> + <param name="output_topN" value="true"/> + </conditional> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="FindVariableFeatures"/> + </assert_contents> + </output> + <output name="rds_out" location="https://zenodo.org/records/13732784/files/variablefeatures.rds" ftype="rds" compare="sim_size"/> + <output name="variable_tabular" location="https://zenodo.org/records/13732784/files/variable_top10.txt" ftype="txt"> + <assert_contents> + <has_n_lines n="10"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <!-- test3: ScaleData --> + <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/variablefeatures.rds"/> + <conditional name="method"> + <param name="method" value="ScaleData"/> + <conditional name="scale_features"> + <param name="scale_features" value="all_genes"/> + </conditional> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="ScaleData"/> + </assert_contents> + </output> + <output name="rds_out" location="https://zenodo.org/records/13732784/files/scaled.rds" ftype="rds" compare="sim_size"/> + </test> + <test expect_num_outputs="2"> + <!-- test4: SCTransform --> + <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/filtered.rds"/> + <conditional name="method"> + <param name="method" value="SCTransform"/> + <param name="vars_to_regress" value="percent.mt"/> + <conditional name="residual_features"> + <param name="residual_features_options" value="selected_features"/> + <param name="residual_features" location="https://zenodo.org/records/13741333/files/residual_features.txt"/> + </conditional> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="SCTransform"/> + </assert_contents> + </output> + <output name="rds_out" location="https://zenodo.org/records/13732784/files/SCTransformed.rds" ftype="rds" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ +Seurat +====== + +Seurat is an R package designed for QC, analysis, and exploration of single-cell RNA-seq data. + +Seurat aims to enable users to identify and interpret sources of heterogeneity from single-cell transcriptomic measurements, and to integrate diverse types of single-cell data. + +NormalizeData +============= + +Normalize the count data present in a given assay. + +Methods: + +“LogNormalize”: Feature counts for each cell are divided by the total counts for that cell and multiplied by the scale.factor. This is then natural-log transformed using log1p + +“CLR”: Applies a centered log ratio transformation + +“RC”: Relative counts. Feature counts for each cell are divided by the total counts for that cell and multiplied by the scale.factor. No log-transformation is applied. For counts per million (CPM) set scale.factor = 1e6 + + +More details on the `seurat documentation +<https://satijalab.org/seurat/reference/normalizedata>`__ + +FindVariableFeatures +==================== + +Identify features that are outliers on a 'mean variability plot'. + +Methods: + +“vst”: First, fits a line to the relationship of log(variance) and log(mean) using local polynomial regression (loess). Then standardizes the feature values using the observed mean and expected variance (given by the fitted line). Feature variance is then calculated on the standardized values after clipping to a maximum (see clip.max parameter). + +“mean.var.plot” (mvp): First, uses a function to calculate average expression (mean.function, using FastExpMean) and dispersion (dispersion.function, using FastLogVMR) for each feature. Next, divides features into num.bin (deafult 20) bins based on their average expression, and calculates z-scores for dispersion within each bin. The purpose of this is to identify variable features while controlling for the strong relationship between variability and average expression + +“dispersion” (disp): selects the genes with the highest dispersion values + +More details on the `seurat documentation +<https://satijalab.org/seurat/reference/findvariablefeatures>`__ + +Scale and regress the data with ScaleData +========================================= + +Scale and center features in the dataset. + +If variables are provided in vars.to.regress, they are individually regressed against each feature, and the resulting residuals are then scaled and centered. + +More details on the `seurat documentation +<https://satijalab.org/seurat/reference/scaledata>`__ + +SCTransform +=========== + +Use this function as an alternative to the NormalizeData, FindVariableFeatures, ScaleData workflow. + +Results are saved in a new assay (named SCT by default) with counts being (corrected) counts, data being log1p(counts), scale.data being pearson residuals; sctransform::vst intermediate results are saved in misc slot of new assay. + +More details on the `seurat documentation +<https://satijalab.org/seurat/reference/sctransform>`__ + + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file