diff scran_normalize.xml @ 0:252eded61848 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_scran_normalize commit ddcf915dd9b690d7f3876e08b939adde36cbb8dd"
author artbio
date Thu, 26 Sep 2019 10:50:55 -0400
parents
children fb2f1b8b0013
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scran_normalize.xml	Thu Sep 26 10:50:55 2019 -0400
@@ -0,0 +1,107 @@
+<tool id="scran_normalize" name="scran_normalize" version="0.2.0">
+    <description>Normalize raw counts expression values using deconvolution size factors</description>
+    <requirements>
+        <requirement type="package" version="1.6.2">r-optparse</requirement>
+        <requirement type="package" version="1.12.1=r36he1b5a44_0">bioconductor-scran</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Tool exception" />
+    </stdio>
+    <command detect_errors="exit_code"><![CDATA[ 
+        Rscript $__tool_directory__/scran-normalize.R 
+            --data '$input'
+            --sep '$input_sep' 
+            #if $metacell.cluster == "Yes":
+                --cluster
+                --method '$metacell.method'
+                --size '$metacell.size'
+            #end if
+            -o ${output}
+]]></command>
+    <inputs>
+        <param name="input" type="data" format="txt,tabular" label="Raw counts of expression data" help = "Must have an header"/>
+        <param name="input_sep" type="select" label="Input column separator">
+            <option value="tab" selected="true">Tabulation</option>
+            <option value=",">Comma</option>
+        </param>
+        <conditional name="metacell">
+            <param name="cluster" type="select" label = "Do you want to cluster cells ?" help="Perform scaling method on metacell, see Details">
+                <option value="Yes">Yes</option>
+                <option value="No" selected="true">No</option>
+            </param>
+            <when value="Yes">
+                <param name="method" type="select" label="Clustering method">
+                    <option value="hclust" selected="true">hclust</option>
+                    <option value="igraph">igprah</option>
+                </param>
+                <param name="size" type="integer" value="100" label="Minimum size of each cluster"/>
+            </when>
+            <when value="No"/>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" label="Normalized Log counts of ${on_string}">
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="counts.tab" ftype="tabular"/>
+            <output name="output" file="logcounts.tab" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="input" value="counts.tab" ftype="tabular"/>
+            <param name="cluster" value="Yes"/>
+            <param name="method" value="igraph"/>
+            <param name="size" value="25"/>
+            <output name="output" file="logcounts_igraph.tsv" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="input" value="counts.tab" ftype="tabular"/>
+            <param name="cluster" value="Yes"/>
+            <param name="method" value="hclust"/>
+            <param name="size" value="25"/>
+            <output name="output" file="logcounts_hclust.tsv" ftype="tabular"/>
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+Takes a raw count expression matrix and returns a table of log transformed scran-normalized expression values.
+
+This computes size factors that are used to scale the counts in each cell. The assumption is that 
+most genes are not differentially expressed (DE) between cells, such that any differences in 
+expression across the majority of genes represents some technical bias that should be removed.
+
+Cell-specific biases are normalized using the computeSumFactors method, which implements the
+deconvolution strategy for scaling normalization (A. T. Lun, Bach, and Marioni 2016). It creates a reference : 
+  - if no clustering step : the average count of all transcriptomes 
+  - if you choose to cluster your cells : the average count of each cluster. 
+Then it pools cells and then sum their expression profiles. The size factor is described as the median ration
+between the count sums and the average across all genes. Finally it constructs a linear distribution (deconvolution method)
+of size factors by taking multiple pools of cells.
+
+You can apply this method on cell cluster instead of your all set of cells by using quickCluster.
+It defines cluster using distances based on Spearman correlation on counts between cells, there is two available methods : 
+
+  - *hclust* : hierarchical clustering on the distance matrix and dynamic tree cut.
+  - *igraph* : constructs a Shared Nearest Neighbor graph (SNN) on the distance matrix and identifies highly connected communities.
+
+
+Note: First header row must NOT start with a '#' comment character
+
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @Article{,
+                author = {Aaron T. L. Lun and Davis J. McCarthy and John C. Marioni},
+                title = {A step-by-step workflow for low-level analysis of single-cell RNA-seq data with Bioconductor},
+                journal = {F1000Res.},
+                year = {2016},
+                volume = {5},
+                pages = {2122},
+                doi = {10.12688/f1000research.9501.2},
+            }
+        </citation>
+    </citations>
+</tool>