Mercurial > repos > miller-lab > genome_diversity
annotate filter_gd_snp.py @ 39:e56023008e36 default tip
Changed revision of package_fisher_0_1_4 to be2fc454d121
Changed revision of package_matplotlib_1_2 to a03ee94316b5
author | miller-lab |
---|---|
date | Mon, 06 Jul 2015 10:32:24 -0400 |
parents | 8997f2ca8c7a |
children |
rev | line source |
---|---|
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
2 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
3 import gd_util |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
4 import sys |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
5 from Population import Population |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
6 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
7 ################################################################################ |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
8 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
9 def convert_percent(string_value): |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
10 if string_value.endswith('%'): |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
11 val = convert_non_negative_int(string_value[:-1]) |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
12 if val > 100: |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
13 print >> sys.stderr, 'percentage: "%d" > 100' % val |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
14 sys.exit(1) |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
15 val = val * -1 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
16 else: |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
17 val = convert_non_negative_int(string_value) |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
18 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
19 return str(val) |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
20 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
21 def convert_non_negative_int(string_value): |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
22 try: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
23 val = int(string_value) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
24 except: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
25 print >> sys.stderr, '"%s" is not an integer' % string_value |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
26 sys.exit(1) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
27 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
28 if val < 0: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
29 print >> sys.stderr, '"%d" is negative' % val |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
30 sys.exit(1) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
31 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
32 return val |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
33 |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
34 ################################################################################ |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
35 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
36 if len(sys.argv) != 13: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
37 gd_util.die('Usage') |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
38 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
39 input, output, ref_chrom_col, min_spacing, lo_genotypes, p1_input, input_type, lo_coverage, hi_coverage, low_ind_cov, low_quality, ind_arg = sys.argv[1:] |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
40 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
41 p_total = Population() |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
42 p_total.from_wrapped_dict(ind_arg) |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
43 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
44 p1 = Population() |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
45 p1.from_population_file(p1_input) |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
46 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
47 if not p_total.is_superset(p1): |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
48 gd_util.die('There is an individual in the population that is not in the SNP table') |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
49 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
50 lo_coverage = convert_percent(lo_coverage) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
51 hi_coverage = convert_percent(hi_coverage) |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
52 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
53 if input_type == 'gd_snp': |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
54 type_arg = 1 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
55 elif input_type == 'gd_genotype': |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
56 type_arg = 0 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
57 else: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
58 gd_util.die('unknown input_type: {0}'.format(input_type)) |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
59 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
60 ################################################################################ |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
61 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
62 prog = 'filter_snps' |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
63 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
64 args = [ prog ] |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
65 args.append(input) # file containing a Galaxy table |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
66 args.append(type_arg) # 1 for a gd_snp file, 0 for gd_genotype |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
67 args.append(lo_coverage) # lower bound on total coverage (< 0 means interpret as percentage) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
68 args.append(hi_coverage) # upper bound on total coveraae (< 0 means interpret as percentage) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
69 args.append(low_ind_cov) # lower bound on individual coverage |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
70 args.append(low_quality) # lower bound on individual quality value |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
71 args.append(lo_genotypes) # lower bound on the number of defined genotypes |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
72 args.append(min_spacing) # lower bound on the spacing between SNPs |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
73 args.append(ref_chrom_col) # reference-chromosome column (base-1); ref position in next column |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
74 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
75 columns = p1.column_list() |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
76 for column in sorted(columns): |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
77 args.append(column) # the starting columns (base-1) for the chosen individuals |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
78 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
79 with open(output, 'w') as fh: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
80 gd_util.run_program(prog, args, stdout=fh) |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
81 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
82 sys.exit(0) |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
83 |