Mercurial > repos > artbio > gsc_filter_genes
comparison filter_genes.xml @ 0:f689c4ea8c43 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_filter_genes commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author | artbio |
---|---|
date | Mon, 24 Jun 2019 13:38:10 -0400 |
parents | |
children | 5d2304b09f58 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f689c4ea8c43 |
---|---|
1 <tool id="filter_genes" name="Filter genes in single cell data" version="0.9.0"> | |
2 <description>which are detected in less that a given fraction of the libraries</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement> | |
5 </requirements> | |
6 <stdio> | |
7 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
8 </stdio> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 Rscript $__tool_directory__/filter_genes.R | |
11 --input $input | |
12 --sep | |
13 #if $sep == 'tab': | |
14 'tab' | |
15 #elif $sep == 'comma': | |
16 'comma' | |
17 #end if | |
18 --colnames '$colnames' | |
19 --percentile_detection '$percentile_detection' | |
20 --absolute_detection '$absolute_detection' | |
21 --output $output | |
22 ]]></command> | |
23 <inputs> | |
24 <param name="input" type="data" format="txt,tabular" label="Expression data" help="a csv or tsv table file" /> | |
25 <param name="sep" type="select" label="Indicate column separator"> | |
26 <option value="tab" selected="true">Tabs</option> | |
27 <option value="comma">Comma</option> | |
28 </param> | |
29 <param name="colnames" type="select" label="Firt row contains column names"> | |
30 <option value="TRUE" selected="true">True</option> | |
31 <option value="FALSE">False</option> | |
32 </param> | |
33 <param name="percentile_detection" value="0.0" type="float" label="remove genes that are expressed in less than this fraction of cells" | |
34 help="Fraction is expressed as a floatting number < 1" /> | |
35 <param name="absolute_detection" value="0" type="integer" label="remove genes that are expressed in less than this number of cells" | |
36 help="an absolute number of cells/libraries" /> | |
37 </inputs> | |
38 <outputs> | |
39 <data name="output" format="tabular" label="Cell data filtered from ${on_string}" /> | |
40 </outputs> | |
41 <tests> | |
42 <test> <!-- null case --> | |
43 <param name="input" value="input.tsv" ftype="txt"/> | |
44 <param name="sep" value='tab' /> | |
45 <param name="colnames" value="TRUE"/> | |
46 <output name="output" file="filtered-null.tab" ftype="tabular"/> | |
47 </test> | |
48 <test> | |
49 <param name="input" value="input.csv" ftype="txt"/> | |
50 <param name="sep" value='comma' /> | |
51 <param name="colnames" value="TRUE"/> | |
52 <param name="percentile_detection" value="0.05"/> | |
53 <output name="output" file="filtered-0.05.tab" ftype="tabular"/> | |
54 </test> | |
55 <test> | |
56 <param name="input" value="input.csv" ftype="txt"/> | |
57 <param name="sep" value='comma' /> | |
58 <param name="colnames" value="TRUE"/> | |
59 <param name="percentile_detection" value="0.0"/> | |
60 <param name="absolute_detection" value="5"/> | |
61 <output name="output" file="filtered-5.tab" ftype="tabular"/> | |
62 </test> | |
63 </tests> | |
64 <help> | |
65 | |
66 **What it does** | |
67 | |
68 The tools takes a table of *normalized* gene expression values | |
69 (i.e. log2(CPM+1), TPM, RPK, etc...) from single cell RNAseq sequencing libraries (columns) | |
70 and filters out genes (rows) that are detected in less than the specified fraction of libraries, | |
71 or than an absolute number of libraries. | |
72 | |
73 The criteria ("less than this fraction of cells" or "less than this number of cells") left at 0 is not used. | |
74 If none criteria is set, no gene will be filtered out. If both criteria are set (which is logically impossible), | |
75 the criteria "less than this fraction of cells" will be used by default. | |
76 | |
77 A TSV gene expression table for genes that passed the filter is returned. | |
78 | |
79 **Input** | |
80 | |
81 A table of comma (csv) or tabulation (tsv) separated values of _normalized_ gene expressions, | |
82 i.e. log2(CPM+1), TPM, RPK, etc... | |
83 Gene names should be in the first column and cell names should be in the first row. | |
84 Note that in a number of a csv files, header of the gene column is omitted, resulting in | |
85 a first row with one item less than in other rows. Although this is not recommended, the tool | |
86 handles this type of table and will return a filtered table with the same structure. | |
87 | |
88 </help> | |
89 <citations> | |
90 <citation type="bibtex"> | |
91 @Manual{, | |
92 title = {R: A Language and Environment for Statistical Computing}, | |
93 author = {{R Core Team}}, | |
94 organization = {R Foundation for Statistical Computing}, | |
95 address = {Vienna, Austria}, | |
96 year = {2014}, | |
97 url = {http://www.R-project.org/}, | |
98 } | |
99 </citation> | |
100 </citations> | |
101 </tool> |