Mercurial > repos > artbio > gsc_scran_normalize
diff scran_normalize.xml @ 0:252eded61848 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_scran_normalize commit ddcf915dd9b690d7f3876e08b939adde36cbb8dd"
author | artbio |
---|---|
date | Thu, 26 Sep 2019 10:50:55 -0400 |
parents | |
children | fb2f1b8b0013 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scran_normalize.xml Thu Sep 26 10:50:55 2019 -0400 @@ -0,0 +1,107 @@ +<tool id="scran_normalize" name="scran_normalize" version="0.2.0"> + <description>Normalize raw counts expression values using deconvolution size factors</description> + <requirements> + <requirement type="package" version="1.6.2">r-optparse</requirement> + <requirement type="package" version="1.12.1=r36he1b5a44_0">bioconductor-scran</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal" description="Tool exception" /> + </stdio> + <command detect_errors="exit_code"><![CDATA[ + Rscript $__tool_directory__/scran-normalize.R + --data '$input' + --sep '$input_sep' + #if $metacell.cluster == "Yes": + --cluster + --method '$metacell.method' + --size '$metacell.size' + #end if + -o ${output} +]]></command> + <inputs> + <param name="input" type="data" format="txt,tabular" label="Raw counts of expression data" help = "Must have an header"/> + <param name="input_sep" type="select" label="Input column separator"> + <option value="tab" selected="true">Tabulation</option> + <option value=",">Comma</option> + </param> + <conditional name="metacell"> + <param name="cluster" type="select" label = "Do you want to cluster cells ?" help="Perform scaling method on metacell, see Details"> + <option value="Yes">Yes</option> + <option value="No" selected="true">No</option> + </param> + <when value="Yes"> + <param name="method" type="select" label="Clustering method"> + <option value="hclust" selected="true">hclust</option> + <option value="igraph">igprah</option> + </param> + <param name="size" type="integer" value="100" label="Minimum size of each cluster"/> + </when> + <when value="No"/> + </conditional> + </inputs> + <outputs> + <data name="output" format="tabular" label="Normalized Log counts of ${on_string}"> + </data> + </outputs> + <tests> + <test> + <param name="input" value="counts.tab" ftype="tabular"/> + <output name="output" file="logcounts.tab" ftype="tabular"/> + </test> + <test> + <param name="input" value="counts.tab" ftype="tabular"/> + <param name="cluster" value="Yes"/> + <param name="method" value="igraph"/> + <param name="size" value="25"/> + <output name="output" file="logcounts_igraph.tsv" ftype="tabular"/> + </test> + <test> + <param name="input" value="counts.tab" ftype="tabular"/> + <param name="cluster" value="Yes"/> + <param name="method" value="hclust"/> + <param name="size" value="25"/> + <output name="output" file="logcounts_hclust.tsv" ftype="tabular"/> + </test> + </tests> + <help> + +**What it does** + +Takes a raw count expression matrix and returns a table of log transformed scran-normalized expression values. + +This computes size factors that are used to scale the counts in each cell. The assumption is that +most genes are not differentially expressed (DE) between cells, such that any differences in +expression across the majority of genes represents some technical bias that should be removed. + +Cell-specific biases are normalized using the computeSumFactors method, which implements the +deconvolution strategy for scaling normalization (A. T. Lun, Bach, and Marioni 2016). It creates a reference : + - if no clustering step : the average count of all transcriptomes + - if you choose to cluster your cells : the average count of each cluster. +Then it pools cells and then sum their expression profiles. The size factor is described as the median ration +between the count sums and the average across all genes. Finally it constructs a linear distribution (deconvolution method) +of size factors by taking multiple pools of cells. + +You can apply this method on cell cluster instead of your all set of cells by using quickCluster. +It defines cluster using distances based on Spearman correlation on counts between cells, there is two available methods : + + - *hclust* : hierarchical clustering on the distance matrix and dynamic tree cut. + - *igraph* : constructs a Shared Nearest Neighbor graph (SNN) on the distance matrix and identifies highly connected communities. + + +Note: First header row must NOT start with a '#' comment character + + </help> + <citations> + <citation type="bibtex"> + @Article{, + author = {Aaron T. L. Lun and Davis J. McCarthy and John C. Marioni}, + title = {A step-by-step workflow for low-level analysis of single-cell RNA-seq data with Bioconductor}, + journal = {F1000Res.}, + year = {2016}, + volume = {5}, + pages = {2122}, + doi = {10.12688/f1000research.9501.2}, + } + </citation> + </citations> +</tool>