Mercurial > repos > artbio > gsc_filter_cells
comparison filter_cells.xml @ 0:e63bd8f13679 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_filter_cells commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author | artbio |
---|---|
date | Mon, 24 Jun 2019 13:37:45 -0400 |
parents | |
children | 6ffcbb980f07 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e63bd8f13679 |
---|---|
1 <tool id="filter_cells" name="Filter cells data" version="0.9.0"> | |
2 <description>on total aligned reads and/or number of detected genes</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement> | |
5 <requirement type="package" version="2.2.1=r3.3.2_0">r-ggplot2</requirement> | |
6 </requirements> | |
7 <stdio> | |
8 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
9 </stdio> | |
10 <command detect_errors="exit_code"><![CDATA[ | |
11 Rscript $__tool_directory__/filter_cells.R | |
12 -f $input | |
13 --sep | |
14 #if $sep == 'tab': | |
15 'tab' | |
16 #elif $sep == 'comma': | |
17 'comma' | |
18 #end if | |
19 --percentile_genes '$percentile_genes' | |
20 --percentile_counts '$percentile_counts' | |
21 --absolute_genes '$absolute_genes' | |
22 --absolute_counts '$absolute_counts' | |
23 --manage_cutoffs | |
24 #if $manage_cutoffs == 'union': | |
25 'union' | |
26 #elif $manage_cutoffs == 'intersect': | |
27 'intersect' | |
28 #end if | |
29 --pdfplot $pdfplot | |
30 --output $output | |
31 --output_metada $output_metada | |
32 ]]></command> | |
33 <inputs> | |
34 <param name="input" type="data" format="txt,tabular" label="Raw counts of expression data"/> | |
35 <param name="sep" type="select" label="Indicate column separator"> | |
36 <option value="tab" selected="true">Tabs</option> | |
37 <option value="comma">Comma</option> | |
38 </param> | |
39 <param name="percentile_genes" value="0" type="integer" label="Detected Genes Percentile Threshold [integer] %" | |
40 help="Cells with % of detected genes below this threshold will be filtered out. Leave at 0 for no filtering" /> | |
41 <param name="percentile_counts" value="0" type="integer" label="Aligned read count Percentile Threshold [integer] %" | |
42 help="Cells with number of aligned reads below this % threshold will be filtered out. Leave at 0 for no filtering" /> | |
43 <param name="absolute_genes" value="0" type="integer" label="Absolute number of Detected Genes Threshold [integer]" | |
44 help="Cells with number of detected genes below this threshold will be filtered out. Leave at 0 for no filtering" /> | |
45 <param name="absolute_counts" value="0" type="integer" label="Absolute number of aligned read Threshold [integer]" | |
46 help="Cells with number of aligned reads below this absolute threshold will be filtered out. Leave at 0 for no filtering" /> | |
47 <param name="manage_cutoffs" type="select" label=" filter out intersection or union of cutoffs" | |
48 help="If you use two cutoffs on number of detected genes and number of aligned reads, respectively, there are two options | |
49 for using these cutoffs in filtering: either excluding items that are below one or the other threshold (union) or | |
50 excluding items that are below both thresholds (intersection)" > | |
51 <option value="union" selected="true">Union of cutoffs</option> | |
52 <option value="intersect">Intersection of cutoffs</option> | |
53 </param> | |
54 </inputs> | |
55 <outputs> | |
56 <data name="pdfplot" format="pdf" label="Plots from ${on_string}" /> | |
57 <data name="output" format="tabular" label="Cell data filtered with ${on_string}" /> | |
58 <data name="output_metada" format="tabular" label="Cell metadata filtered with ${on_string}" /> | |
59 </outputs> | |
60 <tests> | |
61 <test> | |
62 <param name="input" value="input.csv" ftype="txt"/> | |
63 <param name="sep" value='comma' /> | |
64 <param name="percentile_genes" value="20"/> | |
65 <param name="percentile_counts" value="20"/> | |
66 <output name="pdfplot" file="percentile_gene-and-counts.pdf" ftype="pdf"/> | |
67 <output name="output" file="percentile_gene-and-counts.tab" ftype="tabular"/> | |
68 <output name="output_metada" file="percentile_gene-and-counts.meta" ftype="tabular"/> | |
69 </test> | |
70 <test> | |
71 <param name="input" value="input.csv" ftype="txt"/> | |
72 <param name="sep" value='comma' /> | |
73 <param name="percentile_genes" value="20"/> | |
74 <param name="percentile_counts" value="20"/> | |
75 <param name="manage_cutoffs" value="intersect"/> | |
76 <output name="pdfplot" file="intersect_percentile_gene-and-counts.pdf" ftype="pdf"/> | |
77 <output name="output" file="intersect_percentile_gene-and-counts.tab" ftype="tabular"/> | |
78 <output name="output_metada" file="intersect_percentile_gene-and-counts.meta" ftype="tabular"/> | |
79 </test> | |
80 <test> | |
81 <param name="input" value="input.tsv" ftype="txt"/> | |
82 <param name="sep" value='tab' /> | |
83 <param name="percentile_genes" value="20"/> | |
84 <param name="percentile_counts" value="20"/> | |
85 <output name="pdfplot" file="percentile_gene-and-counts.pdf" ftype="pdf"/> | |
86 <output name="output" file="percentile_gene-and-counts.tab" ftype="tabular"/> | |
87 <output name="output_metada" file="percentile_gene-and-counts.meta" ftype="tabular"/> | |
88 </test> | |
89 <test> | |
90 <param name="input" value="input.csv" ftype="txt"/> | |
91 <param name="sep" value='comma' /> | |
92 <param name="percentile_genes" value="20"/> | |
93 <output name="pdfplot" file="percentile_gene-only.pdf" ftype="pdf"/> | |
94 <output name="output" file="percentile_gene-only.tab" ftype="tabular"/> | |
95 <output name="output_metada" file="percentile_gene-only.meta" ftype="tabular"/> | |
96 </test> | |
97 <test> | |
98 <param name="input" value="input.csv" ftype="txt"/> | |
99 <param name="sep" value='comma' /> | |
100 <param name="percentile_counts" value="20"/> | |
101 <output name="pdfplot" file="percentile_counts-only.pdf" ftype="pdf"/> | |
102 <output name="output" file="percentile_counts-only.tab" ftype="tabular"/> | |
103 <output name="output_metada" file="percentile_counts-only.meta" ftype="tabular"/> | |
104 </test> | |
105 <test> | |
106 <param name="input" value="input.csv" ftype="txt"/> | |
107 <param name="sep" value='comma' /> | |
108 <output name="pdfplot" file="no-filtering.pdf" ftype="pdf"/> | |
109 <output name="output" file="no-filtering.tab" ftype="tabular"/> | |
110 <output name="output_metada" file="no-filtering.meta" ftype="tabular"/> | |
111 </test> | |
112 <test> | |
113 <param name="input" value="input.csv" ftype="txt"/> | |
114 <param name="sep" value='comma' /> | |
115 <param name="absolute_genes" value="5"/> | |
116 <param name="absolute_counts" value="1000"/> | |
117 <output name="pdfplot" file="absolute_gene-and-counts.pdf" ftype="pdf"/> | |
118 <output name="output" file="absolute_gene-and-counts.tab" ftype="tabular"/> | |
119 <output name="output_metada" file="absolute_gene-and-counts.meta" ftype="tabular"/> | |
120 </test> | |
121 <test> | |
122 <param name="input" value="input.csv" ftype="txt"/> | |
123 <param name="sep" value='comma' /> | |
124 <param name="absolute_genes" value="5"/> | |
125 <output name="pdfplot" file="absolute_gene-only.pdf" ftype="pdf"/> | |
126 <output name="output" file="absolute_gene-only.tab" ftype="tabular"/> | |
127 <output name="output_metada" file="absolute_gene-only.meta" ftype="tabular"/> | |
128 </test> | |
129 <test> | |
130 <param name="input" value="input.csv" ftype="txt"/> | |
131 <param name="sep" value='comma' /> | |
132 <param name="absolute_counts" value="1000"/> | |
133 <output name="pdfplot" file="absolute_counts-only.pdf" ftype="pdf"/> | |
134 <output name="output" file="absolute_counts-only.tab" ftype="tabular"/> | |
135 <output name="output_metada" file="absolute_counts-only.meta" ftype="tabular"/> | |
136 </test> | |
137 </tests> | |
138 <help> | |
139 | |
140 **What it does** | |
141 | |
142 The tools takes a table of gene (rows) expression values (as number of reads aligned to genes) | |
143 in single cell RNAseq sequencing libraries (columns) and filters out cells with low number | |
144 of detected genes and/or cells with low number of aligned reads. | |
145 | |
146 Cutoffs can be applied to absolute numbers of aligned reads or of detected genes, or to | |
147 percentile thresholds for these variables. | |
148 | |
149 For both absolute or percentile thresholds, only cells exclusively below | |
150 these threshold are excluded (cell cutoffs do not include the threshold values). | |
151 | |
152 If you choose to combine cutoffs for both the number of detected genes | |
153 and the total number of aligned reads, then you have 2 options: either exclude libraries that | |
154 do not satisfy one OR the other threshold (Union) or exclude libraries that do not satisfy | |
155 both thresholds (Intersection). | |
156 | |
157 Specifying a value both for an absolute and an percentile threshold of a variable | |
158 (Number of detected genes or Number of aligned counts) is not consistent. In this | |
159 situation, the tools *does not* filter cells with respect to the corresponding variable threshold. | |
160 If a 0 is applied both for an absolute and an percentile threshold of a variable, then | |
161 this variable is not used to filter the cells. | |
162 | |
163 The tools returns a gene expression table for cells that were retained, a metadata table | |
164 that contains numbers of detected genes and aligned reads for retained cell library and | |
165 a pdf file with three plots illustrating the performed filtering operation. | |
166 | |
167 **Input** | |
168 | |
169 A table of comma (csv) or tabulation (tsv) separated expression values, in number (integers) | |
170 of reads aligned to genes. | |
171 Gene names should be in the first column and cell names should be in the first row. | |
172 Note that in a number of a csv files, header of the gene column is omitted, resulting in | |
173 a first row with one item less than in other rows. This is handled by the tool that | |
174 recognises this situation. | |
175 | |
176 </help> | |
177 <citations> | |
178 <citation type="bibtex"> | |
179 @Manual{, | |
180 title = {R: A Language and Environment for Statistical Computing}, | |
181 author = {{R Core Team}}, | |
182 organization = {R Foundation for Statistical Computing}, | |
183 address = {Vienna, Austria}, | |
184 year = {2014}, | |
185 url = {http://www.R-project.org/}, | |
186 } | |
187 </citation> | |
188 </citations> | |
189 </tool> |