comparison filter_cells.xml @ 0:e63bd8f13679 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_filter_cells commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author artbio
date Mon, 24 Jun 2019 13:37:45 -0400
parents
children 6ffcbb980f07
comparison
equal deleted inserted replaced
-1:000000000000 0:e63bd8f13679
1 <tool id="filter_cells" name="Filter cells data" version="0.9.0">
2 <description>on total aligned reads and/or number of detected genes</description>
3 <requirements>
4 <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>
5 <requirement type="package" version="2.2.1=r3.3.2_0">r-ggplot2</requirement>
6 </requirements>
7 <stdio>
8 <exit_code range="1:" level="fatal" description="Tool exception" />
9 </stdio>
10 <command detect_errors="exit_code"><![CDATA[
11 Rscript $__tool_directory__/filter_cells.R
12 -f $input
13 --sep
14 #if $sep == 'tab':
15 'tab'
16 #elif $sep == 'comma':
17 'comma'
18 #end if
19 --percentile_genes '$percentile_genes'
20 --percentile_counts '$percentile_counts'
21 --absolute_genes '$absolute_genes'
22 --absolute_counts '$absolute_counts'
23 --manage_cutoffs
24 #if $manage_cutoffs == 'union':
25 'union'
26 #elif $manage_cutoffs == 'intersect':
27 'intersect'
28 #end if
29 --pdfplot $pdfplot
30 --output $output
31 --output_metada $output_metada
32 ]]></command>
33 <inputs>
34 <param name="input" type="data" format="txt,tabular" label="Raw counts of expression data"/>
35 <param name="sep" type="select" label="Indicate column separator">
36 <option value="tab" selected="true">Tabs</option>
37 <option value="comma">Comma</option>
38 </param>
39 <param name="percentile_genes" value="0" type="integer" label="Detected Genes Percentile Threshold [integer] %"
40 help="Cells with % of detected genes below this threshold will be filtered out. Leave at 0 for no filtering" />
41 <param name="percentile_counts" value="0" type="integer" label="Aligned read count Percentile Threshold [integer] %"
42 help="Cells with number of aligned reads below this % threshold will be filtered out. Leave at 0 for no filtering" />
43 <param name="absolute_genes" value="0" type="integer" label="Absolute number of Detected Genes Threshold [integer]"
44 help="Cells with number of detected genes below this threshold will be filtered out. Leave at 0 for no filtering" />
45 <param name="absolute_counts" value="0" type="integer" label="Absolute number of aligned read Threshold [integer]"
46 help="Cells with number of aligned reads below this absolute threshold will be filtered out. Leave at 0 for no filtering" />
47 <param name="manage_cutoffs" type="select" label=" filter out intersection or union of cutoffs"
48 help="If you use two cutoffs on number of detected genes and number of aligned reads, respectively, there are two options
49 for using these cutoffs in filtering: either excluding items that are below one or the other threshold (union) or
50 excluding items that are below both thresholds (intersection)" >
51 <option value="union" selected="true">Union of cutoffs</option>
52 <option value="intersect">Intersection of cutoffs</option>
53 </param>
54 </inputs>
55 <outputs>
56 <data name="pdfplot" format="pdf" label="Plots from ${on_string}" />
57 <data name="output" format="tabular" label="Cell data filtered with ${on_string}" />
58 <data name="output_metada" format="tabular" label="Cell metadata filtered with ${on_string}" />
59 </outputs>
60 <tests>
61 <test>
62 <param name="input" value="input.csv" ftype="txt"/>
63 <param name="sep" value='comma' />
64 <param name="percentile_genes" value="20"/>
65 <param name="percentile_counts" value="20"/>
66 <output name="pdfplot" file="percentile_gene-and-counts.pdf" ftype="pdf"/>
67 <output name="output" file="percentile_gene-and-counts.tab" ftype="tabular"/>
68 <output name="output_metada" file="percentile_gene-and-counts.meta" ftype="tabular"/>
69 </test>
70 <test>
71 <param name="input" value="input.csv" ftype="txt"/>
72 <param name="sep" value='comma' />
73 <param name="percentile_genes" value="20"/>
74 <param name="percentile_counts" value="20"/>
75 <param name="manage_cutoffs" value="intersect"/>
76 <output name="pdfplot" file="intersect_percentile_gene-and-counts.pdf" ftype="pdf"/>
77 <output name="output" file="intersect_percentile_gene-and-counts.tab" ftype="tabular"/>
78 <output name="output_metada" file="intersect_percentile_gene-and-counts.meta" ftype="tabular"/>
79 </test>
80 <test>
81 <param name="input" value="input.tsv" ftype="txt"/>
82 <param name="sep" value='tab' />
83 <param name="percentile_genes" value="20"/>
84 <param name="percentile_counts" value="20"/>
85 <output name="pdfplot" file="percentile_gene-and-counts.pdf" ftype="pdf"/>
86 <output name="output" file="percentile_gene-and-counts.tab" ftype="tabular"/>
87 <output name="output_metada" file="percentile_gene-and-counts.meta" ftype="tabular"/>
88 </test>
89 <test>
90 <param name="input" value="input.csv" ftype="txt"/>
91 <param name="sep" value='comma' />
92 <param name="percentile_genes" value="20"/>
93 <output name="pdfplot" file="percentile_gene-only.pdf" ftype="pdf"/>
94 <output name="output" file="percentile_gene-only.tab" ftype="tabular"/>
95 <output name="output_metada" file="percentile_gene-only.meta" ftype="tabular"/>
96 </test>
97 <test>
98 <param name="input" value="input.csv" ftype="txt"/>
99 <param name="sep" value='comma' />
100 <param name="percentile_counts" value="20"/>
101 <output name="pdfplot" file="percentile_counts-only.pdf" ftype="pdf"/>
102 <output name="output" file="percentile_counts-only.tab" ftype="tabular"/>
103 <output name="output_metada" file="percentile_counts-only.meta" ftype="tabular"/>
104 </test>
105 <test>
106 <param name="input" value="input.csv" ftype="txt"/>
107 <param name="sep" value='comma' />
108 <output name="pdfplot" file="no-filtering.pdf" ftype="pdf"/>
109 <output name="output" file="no-filtering.tab" ftype="tabular"/>
110 <output name="output_metada" file="no-filtering.meta" ftype="tabular"/>
111 </test>
112 <test>
113 <param name="input" value="input.csv" ftype="txt"/>
114 <param name="sep" value='comma' />
115 <param name="absolute_genes" value="5"/>
116 <param name="absolute_counts" value="1000"/>
117 <output name="pdfplot" file="absolute_gene-and-counts.pdf" ftype="pdf"/>
118 <output name="output" file="absolute_gene-and-counts.tab" ftype="tabular"/>
119 <output name="output_metada" file="absolute_gene-and-counts.meta" ftype="tabular"/>
120 </test>
121 <test>
122 <param name="input" value="input.csv" ftype="txt"/>
123 <param name="sep" value='comma' />
124 <param name="absolute_genes" value="5"/>
125 <output name="pdfplot" file="absolute_gene-only.pdf" ftype="pdf"/>
126 <output name="output" file="absolute_gene-only.tab" ftype="tabular"/>
127 <output name="output_metada" file="absolute_gene-only.meta" ftype="tabular"/>
128 </test>
129 <test>
130 <param name="input" value="input.csv" ftype="txt"/>
131 <param name="sep" value='comma' />
132 <param name="absolute_counts" value="1000"/>
133 <output name="pdfplot" file="absolute_counts-only.pdf" ftype="pdf"/>
134 <output name="output" file="absolute_counts-only.tab" ftype="tabular"/>
135 <output name="output_metada" file="absolute_counts-only.meta" ftype="tabular"/>
136 </test>
137 </tests>
138 <help>
139
140 **What it does**
141
142 The tools takes a table of gene (rows) expression values (as number of reads aligned to genes)
143 in single cell RNAseq sequencing libraries (columns) and filters out cells with low number
144 of detected genes and/or cells with low number of aligned reads.
145
146 Cutoffs can be applied to absolute numbers of aligned reads or of detected genes, or to
147 percentile thresholds for these variables.
148
149 For both absolute or percentile thresholds, only cells exclusively below
150 these threshold are excluded (cell cutoffs do not include the threshold values).
151
152 If you choose to combine cutoffs for both the number of detected genes
153 and the total number of aligned reads, then you have 2 options: either exclude libraries that
154 do not satisfy one OR the other threshold (Union) or exclude libraries that do not satisfy
155 both thresholds (Intersection).
156
157 Specifying a value both for an absolute and an percentile threshold of a variable
158 (Number of detected genes or Number of aligned counts) is not consistent. In this
159 situation, the tools *does not* filter cells with respect to the corresponding variable threshold.
160 If a 0 is applied both for an absolute and an percentile threshold of a variable, then
161 this variable is not used to filter the cells.
162
163 The tools returns a gene expression table for cells that were retained, a metadata table
164 that contains numbers of detected genes and aligned reads for retained cell library and
165 a pdf file with three plots illustrating the performed filtering operation.
166
167 **Input**
168
169 A table of comma (csv) or tabulation (tsv) separated expression values, in number (integers)
170 of reads aligned to genes.
171 Gene names should be in the first column and cell names should be in the first row.
172 Note that in a number of a csv files, header of the gene column is omitted, resulting in
173 a first row with one item less than in other rows. This is handled by the tool that
174 recognises this situation.
175
176 </help>
177 <citations>
178 <citation type="bibtex">
179 @Manual{,
180 title = {R: A Language and Environment for Statistical Computing},
181 author = {{R Core Team}},
182 organization = {R Foundation for Statistical Computing},
183 address = {Vienna, Austria},
184 year = {2014},
185 url = {http://www.R-project.org/},
186 }
187 </citation>
188 </citations>
189 </tool>