Mercurial > repos > miller-lab > genome_diversity
comparison filter_gd_snp.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 95a05c1ef5d5 |
children |
comparison
equal
deleted
inserted
replaced
26:91e835060ad2 | 27:8997f2ca8c7a |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 import gd_util | |
3 import sys | 4 import sys |
4 import subprocess | |
5 from Population import Population | 5 from Population import Population |
6 | 6 |
7 ################################################################################ | 7 ################################################################################ |
8 | |
9 def convert_percent(string_value): | |
10 if string_value.endswith('%'): | |
11 val = convert_non_negative_int(string_value[:-1]) | |
12 if val > 100: | |
13 print >> sys.stderr, 'percentage: "%d" > 100' % val | |
14 sys.exit(1) | |
15 val = val * -1 | |
16 else: | |
17 val = convert_non_negative_int(string_value) | |
18 | |
19 return str(val) | |
8 | 20 |
9 def convert_non_negative_int(string_value): | 21 def convert_non_negative_int(string_value): |
10 try: | 22 try: |
11 val = int(string_value) | 23 val = int(string_value) |
12 except: | 24 except: |
16 if val < 0: | 28 if val < 0: |
17 print >> sys.stderr, '"%d" is negative' % val | 29 print >> sys.stderr, '"%d" is negative' % val |
18 sys.exit(1) | 30 sys.exit(1) |
19 | 31 |
20 return val | 32 return val |
21 | |
22 | |
23 def convert_percent(string_value): | |
24 if string_value.endswith('%'): | |
25 val = convert_non_negative_int(string_value[:-1]) | |
26 if val > 100: | |
27 print >> sys.stderr, 'percentage: "%d" > 100' % val | |
28 sys.exit(1) | |
29 val = val * -1 | |
30 else: | |
31 val = convert_non_negative_int(string_value) | |
32 | |
33 return str(val) | |
34 | 33 |
35 ################################################################################ | 34 ################################################################################ |
36 | 35 |
37 if len(sys.argv) < 9: | 36 if len(sys.argv) != 13: |
38 print >> sys.stderr, "Usage" | 37 gd_util.die('Usage') |
39 sys.exit(1) | |
40 | 38 |
41 input, p1_input, output, lo, hi, lo_ind, lo_ind_qual = sys.argv[1:8] | 39 input, output, ref_chrom_col, min_spacing, lo_genotypes, p1_input, input_type, lo_coverage, hi_coverage, low_ind_cov, low_quality, ind_arg = sys.argv[1:] |
42 individual_metadata = sys.argv[8:] | |
43 | 40 |
44 p_total = Population() | 41 p_total = Population() |
45 p_total.from_tag_list(individual_metadata) | 42 p_total.from_wrapped_dict(ind_arg) |
46 | 43 |
47 p1 = Population() | 44 p1 = Population() |
48 p1.from_population_file(p1_input) | 45 p1.from_population_file(p1_input) |
49 | 46 |
50 if not p_total.is_superset(p1): | 47 if not p_total.is_superset(p1): |
51 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' | 48 gd_util.die('There is an individual in the population that is not in the SNP table') |
52 sys.exit(1) | |
53 | 49 |
54 lo = convert_percent(lo) | 50 lo_coverage = convert_percent(lo_coverage) |
55 hi = convert_percent(hi) | 51 hi_coverage = convert_percent(hi_coverage) |
52 | |
53 if input_type == 'gd_snp': | |
54 type_arg = 1 | |
55 elif input_type == 'gd_genotype': | |
56 type_arg = 0 | |
57 else: | |
58 gd_util.die('unknown input_type: {0}'.format(input_type)) | |
56 | 59 |
57 ################################################################################ | 60 ################################################################################ |
58 | 61 |
59 prog = 'filter_snps' | 62 prog = 'filter_snps' |
60 | 63 |
61 args = [] | 64 args = [ prog ] |
62 args.append(prog) | 65 args.append(input) # file containing a Galaxy table |
63 args.append(input) | 66 args.append(type_arg) # 1 for a gd_snp file, 0 for gd_genotype |
64 args.append(lo) | 67 args.append(lo_coverage) # lower bound on total coverage (< 0 means interpret as percentage) |
65 args.append(hi) | 68 args.append(hi_coverage) # upper bound on total coveraae (< 0 means interpret as percentage) |
66 args.append(lo_ind) | 69 args.append(low_ind_cov) # lower bound on individual coverage |
67 args.append(lo_ind_qual) | 70 args.append(low_quality) # lower bound on individual quality value |
71 args.append(lo_genotypes) # lower bound on the number of defined genotypes | |
72 args.append(min_spacing) # lower bound on the spacing between SNPs | |
73 args.append(ref_chrom_col) # reference-chromosome column (base-1); ref position in next column | |
68 | 74 |
69 columns = p1.column_list() | 75 columns = p1.column_list() |
76 for column in sorted(columns): | |
77 args.append(column) # the starting columns (base-1) for the chosen individuals | |
70 | 78 |
71 for column in sorted(columns): | 79 with open(output, 'w') as fh: |
72 args.append(column) | 80 gd_util.run_program(prog, args, stdout=fh) |
73 | |
74 fh = open(output, 'w') | |
75 | |
76 #print "args:", ' '.join(args) | |
77 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) | |
78 rc = p.wait() | |
79 fh.close() | |
80 | 81 |
81 sys.exit(0) | 82 sys.exit(0) |
82 | 83 |