annotate tools/stats/dna_filtering.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="dna_filter" name="Filter on ambiguities" version="1.0.0">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description>in polymorphism datasets</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 <command interpreter="python">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 dna_filtering.py
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 --input=$input
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 --output=$out_file1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 --cond="$cond"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 --n_handling=$n_handling
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 --columns=${input.metadata.columns}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 --col_types="${input.metadata.column_types}"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 <param format="tabular" name="input" type="data" label="Filter" help="Dataset missing? See TIP below."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 <param name="cond" size="40" type="text" value="c4 == 'G'" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 <validator type="empty_field" message="Enter a valid filtering condition, see syntax and examples below."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 <param name="n_handling" type="select" label="What is the meaning of N" help="Everything matches everything, Unknown matches nothing">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 <option value="all">Everything (A, T, C, G)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 <option value="none">Unknown</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 <data format="input" name="out_file1" metadata_source="input"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 <param name="input" ftype="tabular" value="dna_filter_in1.tabular" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 <param name="cond" value="c8=='G'" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 <param name="n_handling" value="all" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 <output name="out_file1" ftype="tabular" file="dna_filter_out1.tabular" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 <param name="input" value="dna_filter_in1.tabular" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 <param name="cond" value="(c10 == c11 or c17 == c18) and c6 != 'C' and c23 == 'R'" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 <param name="n_handling" value="all" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 <output name="out_file1" file="dna_filter_out2.tabular" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 <param name="input" value="dna_filter_in1.tabular" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 <param name="cond" value="c4=='B' or c9==c10" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 <param name="n_handling" value="none" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 <output name="out_file1" file="dna_filter_out3.tabular" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 <param name="input" value="dna_filter_in1.tabular" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 <param name="cond" value="c1!='chr1' and c7!='Y' and c25!='+'" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 <param name="n_handling" value="none" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 <output name="out_file1" file="dna_filter_out4.tabular" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 **TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 .. class:: warningmark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 **TIP:** This tool is intended primarily for comparing column values (such as "c5==c12"), although it is also possible to filter on specific values (like "c6!='G'"). Be aware that when searching for specific values, any possible match is considered. So if you search on "c6!='G'", rows will be excluded when c6 is G, K, R, S, B, V, or D (plus N or X if you set that to equal "Everything"), because it is possible those values could indicate G.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 **What it does**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 This tool is written for a very specific case related to an analysis of polymorphism data. Suppose you have a table of SNP data that looks like this::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 chromosome start end patient1 parient2 patient3 patient4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 --------------------------------------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 chr1 100 101 A M C R
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 chr1 200 201 T K C C
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 and your want to select all rows where patient1 has the same base as patient2. Unfortunately you cannot do this with the *Filter and Sort -> Filter* tool because it does not understand DNA ambiguity codes (see below). For example, at position 100 patient1 is the same as patient2 because M is a mix of As and Cs. This tool is designed to make filtering on ambiguities possible.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 **Syntax**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 The filter tool allows you to restrict the dataset using simple conditional statements:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 - Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file (e.g., **c4 == c5**)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 - When using 'equal-to' operator **double equal sign '==' must be used** ( e.g., **c1=='chr1'** )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 - Non-numerical values must be included in single or double quotes ( e.g., **c6=='C'** )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 - Filtering condition can include logical operators, but **make sure operators are all lower case** ( e.g., **(c1!='chrX' and c1!='chrY') or c6=='+'** )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 ------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 **Allowed types of filtering**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 The following types of filtering are allowed:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 - Testing columns for equality (e.g., c2 == c4 or c2 != c4)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 - Testing that a column contains a particular base (e.g., c4 == 'C'). Only bases listed in *DNA Codes* below are allowed.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 - Testing that a column represents a plus or a minus strand (e.g., c3 == '+' or c3 != '-')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 - Testing that a column is a chromosomes (c1 == 'chrX') or a scaffold (c1 == 'scaffold87976')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 All other types of filtering should be done with *Filter and Sort -> Filter* tool.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 **DNA Codes**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 The following are the DNA codes used for filtering::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 Code Meaning
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 ---- --------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 A A
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 T T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 U T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 G G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 C C
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 K G or T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 M A or C
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 R A or G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 Y C or T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 S C or G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 W A or T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 B C, G or T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 V A, C or G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 H A, C or T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 D A, G or T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 X A, C, G or T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 N A, C, G or T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 . not (A, C, G or T)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 - gap of indeterminate length
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 **Example**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 - **c8=='A'** selects lines in which the eighth column is A, M, R, W, V, H, or D, or N or X if appropriate
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 - **c12==c15** selects lines where the value in the twelfth column could be the same as the fifteenth and the fifteenth column could be the same as the twelfth column (based on appropriate codes)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 - **c9!=c19** selects lines where column nine could not be the same as column nineteen or column nineteen could not be the same as column nine (using appropriate codes)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 - **c4 == 'A' and c4 == c5** selects lines where column 4 and 5 are both A, M, R, W, V, H, D or N, or X if appropriate
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 </tool>