Mercurial > repos > artbio > gsc_filter_genes
diff filter_genes.xml @ 0:f689c4ea8c43 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_filter_genes commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author | artbio |
---|---|
date | Mon, 24 Jun 2019 13:38:10 -0400 |
parents | |
children | 5d2304b09f58 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_genes.xml Mon Jun 24 13:38:10 2019 -0400 @@ -0,0 +1,101 @@ +<tool id="filter_genes" name="Filter genes in single cell data" version="0.9.0"> + <description>which are detected in less that a given fraction of the libraries</description> + <requirements> + <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal" description="Tool exception" /> + </stdio> + <command detect_errors="exit_code"><![CDATA[ + Rscript $__tool_directory__/filter_genes.R + --input $input + --sep + #if $sep == 'tab': + 'tab' + #elif $sep == 'comma': + 'comma' + #end if + --colnames '$colnames' + --percentile_detection '$percentile_detection' + --absolute_detection '$absolute_detection' + --output $output +]]></command> + <inputs> + <param name="input" type="data" format="txt,tabular" label="Expression data" help="a csv or tsv table file" /> + <param name="sep" type="select" label="Indicate column separator"> + <option value="tab" selected="true">Tabs</option> + <option value="comma">Comma</option> + </param> + <param name="colnames" type="select" label="Firt row contains column names"> + <option value="TRUE" selected="true">True</option> + <option value="FALSE">False</option> + </param> + <param name="percentile_detection" value="0.0" type="float" label="remove genes that are expressed in less than this fraction of cells" + help="Fraction is expressed as a floatting number < 1" /> + <param name="absolute_detection" value="0" type="integer" label="remove genes that are expressed in less than this number of cells" + help="an absolute number of cells/libraries" /> + </inputs> + <outputs> + <data name="output" format="tabular" label="Cell data filtered from ${on_string}" /> + </outputs> + <tests> + <test> <!-- null case --> + <param name="input" value="input.tsv" ftype="txt"/> + <param name="sep" value='tab' /> + <param name="colnames" value="TRUE"/> + <output name="output" file="filtered-null.tab" ftype="tabular"/> + </test> + <test> + <param name="input" value="input.csv" ftype="txt"/> + <param name="sep" value='comma' /> + <param name="colnames" value="TRUE"/> + <param name="percentile_detection" value="0.05"/> + <output name="output" file="filtered-0.05.tab" ftype="tabular"/> + </test> + <test> + <param name="input" value="input.csv" ftype="txt"/> + <param name="sep" value='comma' /> + <param name="colnames" value="TRUE"/> + <param name="percentile_detection" value="0.0"/> + <param name="absolute_detection" value="5"/> + <output name="output" file="filtered-5.tab" ftype="tabular"/> + </test> + </tests> + <help> + +**What it does** + +The tools takes a table of *normalized* gene expression values +(i.e. log2(CPM+1), TPM, RPK, etc...) from single cell RNAseq sequencing libraries (columns) +and filters out genes (rows) that are detected in less than the specified fraction of libraries, +or than an absolute number of libraries. + +The criteria ("less than this fraction of cells" or "less than this number of cells") left at 0 is not used. +If none criteria is set, no gene will be filtered out. If both criteria are set (which is logically impossible), +the criteria "less than this fraction of cells" will be used by default. + +A TSV gene expression table for genes that passed the filter is returned. + +**Input** + +A table of comma (csv) or tabulation (tsv) separated values of _normalized_ gene expressions, +i.e. log2(CPM+1), TPM, RPK, etc... +Gene names should be in the first column and cell names should be in the first row. +Note that in a number of a csv files, header of the gene column is omitted, resulting in +a first row with one item less than in other rows. Although this is not recommended, the tool +handles this type of table and will return a filtered table with the same structure. + + </help> + <citations> + <citation type="bibtex"> + @Manual{, + title = {R: A Language and Environment for Statistical Computing}, + author = {{R Core Team}}, + organization = {R Foundation for Statistical Computing}, + address = {Vienna, Austria}, + year = {2014}, + url = {http://www.R-project.org/}, + } + </citation> + </citations> +</tool>