Mercurial > repos > miller-lab > genome_diversity
comparison filter_gd_snp.xml @ 22:95a05c1ef5d5
update to devshed revision aaece207bd01
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 11 Mar 2013 11:28:06 -0400 |
parents | f04f40a36cc8 |
children | 8997f2ca8c7a |
comparison
equal
deleted
inserted
replaced
21:d6b961721037 | 22:95a05c1ef5d5 |
---|---|
1 <tool id="gd_filter_gd_snp" name="Filter SNPs" version="1.0.0"> | 1 <tool id="gd_filter_gd_snp" name="Filter SNPs" version="1.1.0"> |
2 <description>: Discard some SNPs based on coverage or quality</description> | 2 <description>: Discard some SNPs based on coverage or quality</description> |
3 | 3 |
4 <command interpreter="python"> | 4 <command interpreter="python"> |
5 modify_snp_table.py "$input" "$p1_input" "$output" "$lo_coverage" "$hi_coverage" "$low_ind_cov" "$lo_quality" | 5 filter_gd_snp.py "$input" "$p1_input" "$output" "$lo_coverage" "$hi_coverage" "$low_ind_cov" "$lo_quality" |
6 #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) | 6 #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) |
7 #set $arg = '%s:%s' % ($individual_col, $individual) | 7 #set $arg = '%s:%s' % ($individual_col, $individual) |
8 "$arg" | 8 "$arg" |
9 #end for | 9 #end for |
10 </command> | 10 </command> |
11 | 11 |
12 <inputs> | 12 <inputs> |
13 <param name="input" type="data" format="gd_snp" label="SNP dataset" /> | 13 <param name="input" type="data" format="gd_snp" label="SNP dataset" /> |
14 <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" /> | 14 <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" /> |
15 <param name="lo_coverage" type="integer" min="0" value="0" label="Lower bound on total coverage" /> | 15 <param name="lo_coverage" type="text" value="0" label="Lower bound on total coverage"> |
16 <param name="hi_coverage" type="integer" min="0" value="1000" label="Upper bound on total coverage" /> | 16 <sanitizer> |
17 <valid initial="string.digits"> | |
18 <!-- % is the percent (%) character --> | |
19 <add value="%" /> | |
20 </valid> | |
21 </sanitizer> | |
22 </param> | |
23 <param name="hi_coverage" type="text" value="1000" label="Upper bound on total coverage"> | |
24 <sanitizer> | |
25 <valid initial="string.digits"> | |
26 <!-- % is the percent (%) character --> | |
27 <add value="%" /> | |
28 </valid> | |
29 </sanitizer> | |
30 </param> | |
17 <param name="low_ind_cov" type="integer" min="0" value="0" label="Lower bound on individual coverage" /> | 31 <param name="low_ind_cov" type="integer" min="0" value="0" label="Lower bound on individual coverage" /> |
18 <param name="lo_quality" type="integer" min="0" value="0" label="Lower bound on individual quality values" /> | 32 <param name="lo_quality" type="integer" min="0" value="0" label="Lower bound on individual quality values" /> |
19 </inputs> | 33 </inputs> |
20 | 34 |
21 <outputs> | 35 <outputs> |
53 "population", by supplying a list that has been previously created using the | 67 "population", by supplying a list that has been previously created using the |
54 Specify Individuals tool. SNPs are then discarded if their total coverage | 68 Specify Individuals tool. SNPs are then discarded if their total coverage |
55 for the population is too low or too high, or if their coverage or quality | 69 for the population is too low or too high, or if their coverage or quality |
56 score for any individual in the population is too low. | 70 score for any individual in the population is too low. |
57 | 71 |
72 The upper and lower bounds on total population coverage can be specified | |
73 either as read counts or as percentiles (e.g. "5%", with no decimal places). | |
74 For percentile bounds the SNPs are ranked by read count, so for example, a | |
75 lower bound of "10%" means that the least-covered 10% of the SNPs will be | |
76 discarded, while an upper bound of, say, "80%" will discard all SNPs above | |
77 the 80% mark, i.e. the top 20%. The threshold for the lower bound on | |
78 individual coverage can only be specified as a plain read count. | |
79 | |
58 ----- | 80 ----- |
59 | 81 |
60 **Example** | 82 **Example** |
61 | 83 |
62 - input gd_snp:: | 84 - input gd_snp:: |