gsc_filter_cells: filter_cells.xml comparison

comparison filter_cells.xml @ 0:e63bd8f13679 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_filter_cells commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe

author	artbio
date	Mon, 24 Jun 2019 13:37:45 -0400
parents
children	6ffcbb980f07

comparison

equal deleted inserted replaced

--1:000000000000
+:e63bd8f13679
+<tool id="filter_cells" name="Filter cells data" version="0.9.0">
+<description>on total aligned reads and/or number of detected genes</description>
+<requirements>
+<requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>
+<requirement type="package" version="2.2.1=r3.3.2_0">r-ggplot2</requirement>
+</requirements>
+<stdio>
+<exit_code range="1:" level="fatal" description="Tool exception" />
+</stdio>
+<command detect_errors="exit_code"><![CDATA[
+Rscript $__tool_directory__/filter_cells.R
+-f $input
+--sep
+#if $sep == 'tab':
+'tab'
+#elif $sep == 'comma':
+'comma'
+#end if
+--percentile_genes '$percentile_genes'
+--percentile_counts '$percentile_counts'
+--absolute_genes '$absolute_genes'
+--absolute_counts '$absolute_counts'
+--manage_cutoffs
+#if $manage_cutoffs == 'union':
+'union'
+#elif $manage_cutoffs == 'intersect':
+'intersect'
+#end if
+--pdfplot $pdfplot
+--output $output
+--output_metada $output_metada
+]]></command>
+<inputs>
+<param name="input" type="data" format="txt,tabular" label="Raw counts of expression data"/>
+<param name="sep" type="select" label="Indicate column separator">
+<option value="tab" selected="true">Tabs</option>
+<option value="comma">Comma</option>
+</param>
+<param name="percentile_genes" value="0" type="integer" label="Detected Genes Percentile Threshold [integer] %"
+help="Cells with % of detected genes below this threshold will be filtered out. Leave at 0 for no filtering" />
+<param name="percentile_counts" value="0" type="integer" label="Aligned read count Percentile Threshold [integer] %"
+help="Cells with number of aligned reads below this % threshold will be filtered out. Leave at 0 for no filtering" />
+<param name="absolute_genes" value="0" type="integer" label="Absolute number of Detected Genes Threshold [integer]"
+help="Cells with number of detected genes below this threshold will be filtered out. Leave at 0 for no filtering" />
+<param name="absolute_counts" value="0" type="integer" label="Absolute number of aligned read Threshold [integer]"
+help="Cells with number of aligned reads below this absolute threshold will be filtered out. Leave at 0 for no filtering" />
+<param name="manage_cutoffs" type="select" label=" filter out intersection or union of cutoffs"
+help="If you use two cutoffs on number of detected genes and number of aligned reads, respectively, there are two options
+for using these cutoffs in filtering: either excluding items that are below one or the other threshold (union) or
+excluding items that are below both thresholds (intersection)" >
+<option value="union" selected="true">Union of cutoffs</option>
+<option value="intersect">Intersection of cutoffs</option>
+</param>
+</inputs>
+<outputs>
+<data name="pdfplot" format="pdf" label="Plots from ${on_string}" />
+<data name="output" format="tabular" label="Cell data filtered with ${on_string}" />
+<data name="output_metada" format="tabular" label="Cell metadata filtered with ${on_string}" />
+</outputs>
+<tests>
+<test>
+<param name="input" value="input.csv" ftype="txt"/>
+<param name="sep" value='comma' />
+<param name="percentile_genes" value="20"/>
+<param name="percentile_counts" value="20"/>
+<output name="pdfplot" file="percentile_gene-and-counts.pdf" ftype="pdf"/>
+<output name="output" file="percentile_gene-and-counts.tab" ftype="tabular"/>
+<output name="output_metada" file="percentile_gene-and-counts.meta" ftype="tabular"/>
+</test>
+<test>
+<param name="input" value="input.csv" ftype="txt"/>
+<param name="sep" value='comma' />
+<param name="percentile_genes" value="20"/>
+<param name="percentile_counts" value="20"/>
+<param name="manage_cutoffs" value="intersect"/>
+<output name="pdfplot" file="intersect_percentile_gene-and-counts.pdf" ftype="pdf"/>
+<output name="output" file="intersect_percentile_gene-and-counts.tab" ftype="tabular"/>
+<output name="output_metada" file="intersect_percentile_gene-and-counts.meta" ftype="tabular"/>
+</test>
+<test>
+<param name="input" value="input.tsv" ftype="txt"/>
+<param name="sep" value='tab' />
+<param name="percentile_genes" value="20"/>
+<param name="percentile_counts" value="20"/>
+<output name="pdfplot" file="percentile_gene-and-counts.pdf" ftype="pdf"/>
+<output name="output" file="percentile_gene-and-counts.tab" ftype="tabular"/>
+<output name="output_metada" file="percentile_gene-and-counts.meta" ftype="tabular"/>
+</test>
+<test>
+<param name="input" value="input.csv" ftype="txt"/>
+<param name="sep" value='comma' />
+<param name="percentile_genes" value="20"/>
+<output name="pdfplot" file="percentile_gene-only.pdf" ftype="pdf"/>
+<output name="output" file="percentile_gene-only.tab" ftype="tabular"/>
+<output name="output_metada" file="percentile_gene-only.meta" ftype="tabular"/>
+</test>
+<test>
+<param name="input" value="input.csv" ftype="txt"/>
+<param name="sep" value='comma' />
+<param name="percentile_counts" value="20"/>
+<output name="pdfplot" file="percentile_counts-only.pdf" ftype="pdf"/>
+<output name="output" file="percentile_counts-only.tab" ftype="tabular"/>
+<output name="output_metada" file="percentile_counts-only.meta" ftype="tabular"/>
+</test>
+<test>
+<param name="input" value="input.csv" ftype="txt"/>
+<param name="sep" value='comma' />
+<output name="pdfplot" file="no-filtering.pdf" ftype="pdf"/>
+<output name="output" file="no-filtering.tab" ftype="tabular"/>
+<output name="output_metada" file="no-filtering.meta" ftype="tabular"/>
+</test>
+<test>
+<param name="input" value="input.csv" ftype="txt"/>
+<param name="sep" value='comma' />
+<param name="absolute_genes" value="5"/>
+<param name="absolute_counts" value="1000"/>
+<output name="pdfplot" file="absolute_gene-and-counts.pdf" ftype="pdf"/>
+<output name="output" file="absolute_gene-and-counts.tab" ftype="tabular"/>
+<output name="output_metada" file="absolute_gene-and-counts.meta" ftype="tabular"/>
+</test>
+<test>
+<param name="input" value="input.csv" ftype="txt"/>
+<param name="sep" value='comma' />
+<param name="absolute_genes" value="5"/>
+<output name="pdfplot" file="absolute_gene-only.pdf" ftype="pdf"/>
+<output name="output" file="absolute_gene-only.tab" ftype="tabular"/>
+<output name="output_metada" file="absolute_gene-only.meta" ftype="tabular"/>
+</test>
+<test>
+<param name="input" value="input.csv" ftype="txt"/>
+<param name="sep" value='comma' />
+<param name="absolute_counts" value="1000"/>
+<output name="pdfplot" file="absolute_counts-only.pdf" ftype="pdf"/>
+<output name="output" file="absolute_counts-only.tab" ftype="tabular"/>
+<output name="output_metada" file="absolute_counts-only.meta" ftype="tabular"/>
+</test>
+</tests>
+<help>
+**What it does**
+The tools takes a table of gene (rows) expression values (as number of reads aligned to genes)
+in single cell RNAseq sequencing libraries (columns) and filters out cells with low number
+of detected genes and/or cells with low number of aligned reads.
+Cutoffs can be applied to absolute numbers of aligned reads or of detected genes, or to
+percentile thresholds for these variables.
+For both absolute or percentile thresholds, only cells exclusively below
+these threshold are excluded (cell cutoffs do not include the threshold values).
+If you choose to combine cutoffs for both the number of detected genes
+and the total number of aligned reads, then you have 2 options: either exclude libraries that
+do not satisfy one OR the other threshold (Union) or exclude libraries that do not satisfy
+both thresholds (Intersection).
+Specifying a value both for an absolute and an percentile threshold of a variable
+(Number of detected genes or Number of aligned counts) is not consistent. In this
+situation, the tools *does not* filter cells with respect to the corresponding variable threshold.
+If a 0 is applied both for an absolute and an percentile threshold of a variable, then
+this variable is not used to filter the cells.
+The tools returns a gene expression table for cells that were retained, a metadata table
+that contains numbers of detected genes and aligned reads for retained cell library and
+a pdf file with three plots illustrating the performed filtering operation.
+**Input**
+A table of comma (csv) or tabulation (tsv) separated expression values, in number (integers)
+of reads aligned to genes.
+Gene names should be in the first column and cell names should be in the first row.
+Note that in a number of a csv files, header of the gene column is omitted, resulting in
+a first row with one item less than in other rows. This is handled by the tool that
+recognises this situation.
+</help>
+<citations>
+<citation type="bibtex">
+@Manual{,
+title = {R: A Language and Environment for Statistical Computing},
+author = {{R Core Team}},
+organization = {R Foundation for Statistical Computing},
+address = {Vienna, Austria},
+year = {2014},
+url = {http://www.R-project.org/},
+}
+</citation>
+</citations>
+</tool>

Mercurial > repos > artbio > gsc_filter_cells

comparison filter_cells.xml @ 0:e63bd8f13679 draft