diff filter_genes.xml @ 0:f689c4ea8c43 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_filter_genes commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author artbio
date Mon, 24 Jun 2019 13:38:10 -0400
parents
children 5d2304b09f58
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_genes.xml	Mon Jun 24 13:38:10 2019 -0400
@@ -0,0 +1,101 @@
+<tool id="filter_genes" name="Filter genes in single cell data" version="0.9.0">
+    <description>which are detected in less that a given fraction of the libraries</description>
+    <requirements>
+        <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Tool exception" />
+    </stdio>
+    <command detect_errors="exit_code"><![CDATA[ 
+        Rscript $__tool_directory__/filter_genes.R 
+            --input $input 
+            --sep 
+            #if $sep == 'tab':
+              'tab'
+            #elif $sep == 'comma':
+              'comma'
+            #end if
+            --colnames '$colnames'
+            --percentile_detection '$percentile_detection'
+            --absolute_detection '$absolute_detection'
+            --output $output
+]]></command>
+    <inputs>
+        <param name="input" type="data" format="txt,tabular" label="Expression data" help="a csv or tsv table file" />
+        <param name="sep" type="select" label="Indicate column separator">
+            <option value="tab" selected="true">Tabs</option>
+            <option value="comma">Comma</option>
+        </param>
+        <param name="colnames" type="select" label="Firt row contains column names">
+            <option value="TRUE" selected="true">True</option>
+            <option value="FALSE">False</option>
+        </param>
+        <param name="percentile_detection" value="0.0" type="float" label="remove genes that are expressed in less than this fraction of cells"
+               help="Fraction is expressed as a floatting number &lt; 1" />
+        <param name="absolute_detection" value="0" type="integer" label="remove genes that are expressed in less than this number of cells"
+               help="an absolute number of cells/libraries" />
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" label="Cell data filtered from ${on_string}" />
+    </outputs>
+    <tests>
+        <test> <!-- null case -->
+            <param name="input" value="input.tsv" ftype="txt"/>
+            <param name="sep" value='tab' />
+            <param name="colnames" value="TRUE"/>
+            <output name="output" file="filtered-null.tab" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="input" value="input.csv" ftype="txt"/>
+            <param name="sep" value='comma' />
+            <param name="colnames" value="TRUE"/>
+            <param name="percentile_detection" value="0.05"/>
+            <output name="output" file="filtered-0.05.tab" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="input" value="input.csv" ftype="txt"/>
+            <param name="sep" value='comma' />
+            <param name="colnames" value="TRUE"/>
+            <param name="percentile_detection" value="0.0"/>
+            <param name="absolute_detection" value="5"/>
+            <output name="output" file="filtered-5.tab" ftype="tabular"/>
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+The tools takes a table of *normalized* gene expression values
+(i.e. log2(CPM+1), TPM, RPK, etc...) from single cell RNAseq sequencing libraries (columns)
+and filters out genes (rows) that are detected in less than the specified fraction of libraries,
+or than an absolute number of libraries.
+
+The criteria ("less than this fraction of cells" or "less than this number of cells") left at 0 is not used.
+If none criteria is set, no gene will be filtered out. If both criteria are set (which is logically impossible),
+the criteria "less than this fraction of cells" will be used by default.
+
+A TSV gene expression table for genes that passed the filter is returned.
+
+**Input**
+
+A table of comma (csv) or tabulation (tsv) separated values of _normalized_ gene expressions,
+i.e. log2(CPM+1), TPM, RPK, etc...
+Gene names should be in the first column and cell names should be in the first row.
+Note that in a number of a csv files, header of the gene column is omitted, resulting in
+a first row with one item less than in other rows. Although this is not recommended, the tool
+handles this type of table and will return a filtered table with the same structure.
+
+    </help>
+    <citations>
+        <citation type="bibtex">
+        @Manual{,
+             title = {R: A Language and Environment for Statistical Computing},
+             author = {{R Core Team}},
+             organization = {R Foundation for Statistical Computing},
+             address = {Vienna, Austria},
+             year = {2014},
+             url = {http://www.R-project.org/},
+        }
+        </citation>
+    </citations>
+</tool>