Mercurial > repos > ebi-gxa > seurat_find_variable_genes
changeset 0:a56efad05337 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 9bf9a6e46a330890be932f60d1d996dd166426c4
author | ebi-gxa |
---|---|
date | Wed, 03 Apr 2019 11:17:53 -0400 |
parents | |
children | a6077346f869 |
files | README seurat_find_variable_genes.xml seurat_macros.xml |
diffstat | 3 files changed, 157 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Wed Apr 03 11:17:53 2019 -0400 @@ -0,0 +1,1 @@ +Seurat tools
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seurat_find_variable_genes.xml Wed Apr 03 11:17:53 2019 -0400 @@ -0,0 +1,106 @@ +<tool id="seurat_find_variable_genes" name="Seurat FindVariableGenes" version="2.3.1+galaxy1"> + <description>identify variable genes</description> + <macros> + <import>seurat_macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version" /> + <command detect_errors="exit_code"><![CDATA[ +seurat-find-variable-genes.R + +--input-object-file '$input' +#if $mean: + --mean-function '$mean' +#end if +#if $disp: +--dispersion-function $disp +#end if +#if $xlow: +--x-low-cutoff $xlow +#end if +#if $xhigh: +--x-high-cutoff $xhigh +#end if +#if $ylow: +--y-low-cutoff $ylow +#end if +#if $yhigh: +--y-high-cutoff $yhigh +#end if +--output-object-file '$output' +--output-text-file '$output_tab' +]]></command> + + <inputs> + <param name="input" argument="--input-object-file" type="data" format="rdata" label="Seurat RDS object" help="R serialized object for Seurat, normally the one produced by Seurat FindVariableGenes" /> + <param name="mean" argument="--mean-function" type="text" optional="True" label="Mean function" help="Function to compute x-axis value (average expression). Default is to take the mean of the detected (i.e. non-zero) values."/> + <param name="disp" argument="--dispersion-function" type="text" optional="True" label="Dispersion function" help="Function to compute y-axis value (dispersion). Default is to take the standard deviation of all values." /> + <param name="xlow" argument="--x-low-cutoff" type="float" optional="True" label="X-axis low cutoff" help="Bottom cutoff on x-axis for identifying variable genes."/> + <param name="xhigh" argument="--x-high-cutoff" type="float" optional="True" label="X-axis high cutoff" help="Top cutoff on x-axis for identifying variable genes."/> + <param name="ylow" argument="--y-low-cutoff" type="float" optional="True" label="Y-axis low cutoff" help="Bottom cutoff on y-axis for identifying variable genes."/> + <param name="yhigh" argument="--y-high-cutoff" type="float" optional="True" label="Y-axis high cutoff" help="Top cutoff on y-axis for identifying variable genes."/> + </inputs> + + <outputs> + <data name="output" format="rdata" from_work_dir="*.rds" label="${tool.name} on ${on_string}: Seurat RDS"/> + <data name="output_tab" format="tabular" from_work_dir="*.tab" label="${tool.name} on ${on_string}: Variable genes tabular file"/> + </outputs> + + <tests> + <test> + <param name="input" ftype="rdata" value="out_norm.rds"/> + <output name="output" ftype="rdata" value="out_findvar.rds" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. +It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and +interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse +types of single cell data. + +This tool identifies genes that are outliers on a 'mean variability plot'. First, uses +a function to calculate average expression (mean.function) and dispersion (dispersion.function) +for each gene. Next, divides genes into num.bin (deafult 20) bins based on +their average expression, and calculates z-scores for dispersion within each +bin. The purpose of this is to identify variable genes while controlling for +the strong relationship between variability and average expression. + +Exact parameter settings may vary empirically from dataset to dataset, and +based on visual inspection of the plot. + +Setting the y.cutoff parameter to 2 identifies genes that are more than two standard +deviations away from the average dispersion within a bin. The default X-axis function +is the mean expression level, and for Y-axis it is the log(Variance/mean). All mean/variance +calculations are not performed in log-space, but the results are reported in log-space - +see relevant functions for exact details. + +----- + +**Inputs** + + * Seurat RDS object + * Mean function. Function to compute x-axis value (average expression). Default is to take the mean of the detected (i.e. non-zero) values. + * Dispersion function. Function to compute y-axis value (dispersion). Default is to take the standard deviation of all values. + * Bottom cutoff on x-axis for identifying variable genes. + * Top cutoff on x-axis for identifying variable genes. + * Bottom cutoff on y-axis for identifying variable genes. + * Top cutoff on y-axis for identifying variable genes. + +----- + +**Outputs** + + * Seurat RDS object. Places variable genes in object@var.genes. The result of all analysis is stored in object@hvg.info + * Tabular file of variable genes + +.. _Seurat: https://www.nature.com/articles/nbt.4096 +.. _Satija Lab: https://satijalab.org/seurat/ + +@VERSION_HISTORY@ +]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seurat_macros.xml Wed Apr 03 11:17:53 2019 -0400 @@ -0,0 +1,50 @@ +<?xml version="1.0"?> +<macros> + + <token name="@VERSION@">0.0.5</token> + + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">seurat-scripts</requirement> + </requirements> + </xml> + + <xml name="version"> + <version_command><![CDATA[ +echo $(R --version | grep version | grep -v GNU)", seurat version" $(R --vanilla --slave -e "library(seurat); cat(sessionInfo()\$otherPkgs\$seurat\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]></version_command> + </xml> + + <xml name="genes-use-input"> + <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv, txt" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/> + </xml> + <xml name="dims-use-input"> + <param name="dims_use" argument="--dims-use" min="1" optional="true" type="integer" label="PCA Dimensions to use" help="Number of PCs (dimensions) to use in construction of the SNN graph."/> + </xml> + + <token name="@VERSION_HISTORY@"><![CDATA[ +**Version history** + +0.0.1: Initial contribution. Maria Doyle, https://github.com/mblue9. + +2.3.1+galaxy0: Improved documentation and further exposition of all script's options. Pablo Moreno, Jonathan Manning and Ni Huang, Expression Atlas team https://www.ebi.ac.uk/gxa/home at +EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski(https://github.com/drosofff) and Lea Bellenger(https://github.com/bellenger-l). + ]]></token> + + + <xml name="citations"> + <citations> + <citation type="doi">10.1038/nbt.4096</citation> + <citation type="bibtex"> + @misc{r-seurat-scripts.git, + author = {Jonathan Manning, Pablo Moreno, EBI Gene Expression Team}, + year = {2018}, + title = {Seurat-scripts: command line interface for Seurat}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/ebi-gene-expression-group/r-seurat-scripts.git}, + } + </citation> + </citations> + </xml> +</macros>