annotate tools/stats/filtering.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="Filter1" name="Filter" version="1.1.0">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description>data on any column using simple expressions</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 <command interpreter="python">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 filtering.py $input $out_file1 "$cond" ${input.metadata.columns} "${input.metadata.column_types}"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 <param format="tabular" name="input" type="data" label="Filter" help="Dataset missing? See TIP below."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 <param name="cond" size="40" type="text" value="c1=='chr22'" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 <validator type="empty_field" message="Enter a valid filtering condition, see syntax and examples below."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 <data format="input" name="out_file1" metadata_source="input"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 <param name="input" value="1.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 <param name="cond" value="c1=='chr22'"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 <output name="out_file1" file="filter1_test1.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 <param name="input" value="7.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 <param name="cond" value="c1=='chr1' and c3-c2>=2000 and c6=='+'"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 <output name="out_file1" file="filter1_test2.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 <!-- Test filtering of file with a variable number of columns. -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 <param name="input" value="filter1_in3.sam"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 <param name="cond" value="c3=='chr1' and c5>5"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 <output name="out_file1" file="filter1_test3.sam"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 <param name="input" value="filter1_inbad.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 <param name="cond" value="c1=='chr22'"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 <output name="out_file1" file="filter1_test4.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 .. class:: warningmark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 Double equal signs, ==, must be used as *"equal to"* (e.g., **c1 == 'chr22'**)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 **TIP:** Attempting to apply a filtering condition may throw exceptions if the data type (e.g., string, integer) in every line of the columns being filtered is not appropriate for the condition (e.g., attempting certain numerical calculations on strings). If an exception is thrown when applying the condition to a line, that line is skipped as invalid for the filter condition. The number of invalid skipped lines is documented in the resulting history item as a "Condition/data issue".
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 **TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 **Syntax**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 The filter tool allows you to restrict the dataset using simple conditional statements.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 - Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 - Make sure that multi-character operators contain no white space ( e.g., **&lt;=** is valid while **&lt; =** is not valid )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 - When using 'equal-to' operator **double equal sign '==' must be used** ( e.g., **c1=='chr1'** )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 - Non-numerical values must be included in single or double quotes ( e.g., **c6=='+'** )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 - Filtering condition can include logical operators, but **make sure operators are all lower case** ( e.g., **(c1!='chrX' and c1!='chrY') or not c6=='+'** )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 **Example**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 - **c1=='chr1'** selects lines in which the first column is chr1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 - **c3-c2&lt;100*c4** selects lines where subtracting column 3 from column 2 is less than the value of column 4 times 100
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 - **len(c2.split(',')) &lt; 4** will select lines where the second column has less than four comma separated elements
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 - **c2>=1** selects lines in which the value of column 2 is greater than or equal to 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 - Numbers should not contain commas - **c2&lt;=44,554,350** will not work, but **c2&lt;=44554350** will
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 - Some words in the data can be used, but must be single or double quoted ( e.g., **c3=='exon'** )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 </tool>