annotate filter_gd_snp.xml @ 22:95a05c1ef5d5

update to devshed revision aaece207bd01
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 11 Mar 2013 11:28:06 -0400
parents f04f40a36cc8
children 8997f2ca8c7a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
22
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
1 <tool id="gd_filter_gd_snp" name="Filter SNPs" version="1.1.0">
13
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
2 <description>: Discard some SNPs based on coverage or quality</description>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
3
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
4 <command interpreter="python">
22
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
5 filter_gd_snp.py "$input" "$p1_input" "$output" "$lo_coverage" "$hi_coverage" "$low_ind_cov" "$lo_quality"
13
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
6 #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
7 #set $arg = '%s:%s' % ($individual_col, $individual)
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
8 "$arg"
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
9 #end for
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
10 </command>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
11
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
12 <inputs>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
13 <param name="input" type="data" format="gd_snp" label="SNP dataset" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
14 <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
22
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
15 <param name="lo_coverage" type="text" value="0" label="Lower bound on total coverage">
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
16 <sanitizer>
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
17 <valid initial="string.digits">
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
18 <!-- &#37; is the percent (%) character -->
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
19 <add value="&#37;" />
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
20 </valid>
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
21 </sanitizer>
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
22 </param>
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
23 <param name="hi_coverage" type="text" value="1000" label="Upper bound on total coverage">
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
24 <sanitizer>
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
25 <valid initial="string.digits">
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
26 <!-- &#37; is the percent (%) character -->
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
27 <add value="&#37;" />
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
28 </valid>
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
29 </sanitizer>
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
30 </param>
13
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
31 <param name="low_ind_cov" type="integer" min="0" value="0" label="Lower bound on individual coverage" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
32 <param name="lo_quality" type="integer" min="0" value="0" label="Lower bound on individual quality values" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
33 </inputs>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
34
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
35 <outputs>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
36 <data name="output" format="gd_snp" metadata_source="input" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
37 </outputs>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
38
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
39 <tests>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
40 <test>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
41 <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
42 <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
43 <param name="lo_coverage" value="0" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
44 <param name="hi_coverage" value="1000" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
45 <param name="low_ind_cov" value="3" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
46 <param name="lo_quality" value="30" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
47 <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
48 </test>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
49 </tests>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
50
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
51 <help>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
52
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
53 **Dataset formats**
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
54
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
55 The input datasets are in gd_snp_ and gd_indivs_ formats.
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
56 The output dataset is in gd_snp_ format. (`Dataset missing?`_)
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
57
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
58 .. _gd_snp: ./static/formatHelp.html#gd_snp
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
59 .. _gd_indivs: ./static/formatHelp.html#gd_indivs
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
60 .. _Dataset missing?: ./static/formatHelp.html
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
61
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
62 -----
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
63
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
64 **What it does**
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
65
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
66 The user specifies that some of the individuals in a gd_snp dataset form a
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
67 "population", by supplying a list that has been previously created using the
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
68 Specify Individuals tool. SNPs are then discarded if their total coverage
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
69 for the population is too low or too high, or if their coverage or quality
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
70 score for any individual in the population is too low.
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
71
22
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
72 The upper and lower bounds on total population coverage can be specified
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
73 either as read counts or as percentiles (e.g. "5%", with no decimal places).
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
74 For percentile bounds the SNPs are ranked by read count, so for example, a
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
75 lower bound of "10%" means that the least-covered 10% of the SNPs will be
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
76 discarded, while an upper bound of, say, "80%" will discard all SNPs above
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
77 the 80% mark, i.e. the top 20%. The threshold for the lower bound on
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
78 individual coverage can only be specified as a plain read count.
95a05c1ef5d5 update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
79
13
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
80 -----
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
81
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
82 **Example**
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
83
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
84 - input gd_snp::
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
85
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
86 Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
87 Contig48_chr1_10150253_10151311 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
88 Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
89 etc.
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
90
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
91 - input individuals::
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
92
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
93 9 PB1
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
94 13 PB2
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
95 17 PB3
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
96
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
97 - output when the lower bound on individual coverage is "3"::
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
98
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
99 Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
100 Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
101 etc.
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
102
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
103 </help>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
104 </tool>