comparison filter_gd_snp.py @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 95a05c1ef5d5
children
comparison
equal deleted inserted replaced
26:91e835060ad2 27:8997f2ca8c7a
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import gd_util
3 import sys 4 import sys
4 import subprocess
5 from Population import Population 5 from Population import Population
6 6
7 ################################################################################ 7 ################################################################################
8
9 def convert_percent(string_value):
10 if string_value.endswith('%'):
11 val = convert_non_negative_int(string_value[:-1])
12 if val > 100:
13 print >> sys.stderr, 'percentage: "%d" > 100' % val
14 sys.exit(1)
15 val = val * -1
16 else:
17 val = convert_non_negative_int(string_value)
18
19 return str(val)
8 20
9 def convert_non_negative_int(string_value): 21 def convert_non_negative_int(string_value):
10 try: 22 try:
11 val = int(string_value) 23 val = int(string_value)
12 except: 24 except:
16 if val < 0: 28 if val < 0:
17 print >> sys.stderr, '"%d" is negative' % val 29 print >> sys.stderr, '"%d" is negative' % val
18 sys.exit(1) 30 sys.exit(1)
19 31
20 return val 32 return val
21
22
23 def convert_percent(string_value):
24 if string_value.endswith('%'):
25 val = convert_non_negative_int(string_value[:-1])
26 if val > 100:
27 print >> sys.stderr, 'percentage: "%d" > 100' % val
28 sys.exit(1)
29 val = val * -1
30 else:
31 val = convert_non_negative_int(string_value)
32
33 return str(val)
34 33
35 ################################################################################ 34 ################################################################################
36 35
37 if len(sys.argv) < 9: 36 if len(sys.argv) != 13:
38 print >> sys.stderr, "Usage" 37 gd_util.die('Usage')
39 sys.exit(1)
40 38
41 input, p1_input, output, lo, hi, lo_ind, lo_ind_qual = sys.argv[1:8] 39 input, output, ref_chrom_col, min_spacing, lo_genotypes, p1_input, input_type, lo_coverage, hi_coverage, low_ind_cov, low_quality, ind_arg = sys.argv[1:]
42 individual_metadata = sys.argv[8:]
43 40
44 p_total = Population() 41 p_total = Population()
45 p_total.from_tag_list(individual_metadata) 42 p_total.from_wrapped_dict(ind_arg)
46 43
47 p1 = Population() 44 p1 = Population()
48 p1.from_population_file(p1_input) 45 p1.from_population_file(p1_input)
49 46
50 if not p_total.is_superset(p1): 47 if not p_total.is_superset(p1):
51 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' 48 gd_util.die('There is an individual in the population that is not in the SNP table')
52 sys.exit(1)
53 49
54 lo = convert_percent(lo) 50 lo_coverage = convert_percent(lo_coverage)
55 hi = convert_percent(hi) 51 hi_coverage = convert_percent(hi_coverage)
52
53 if input_type == 'gd_snp':
54 type_arg = 1
55 elif input_type == 'gd_genotype':
56 type_arg = 0
57 else:
58 gd_util.die('unknown input_type: {0}'.format(input_type))
56 59
57 ################################################################################ 60 ################################################################################
58 61
59 prog = 'filter_snps' 62 prog = 'filter_snps'
60 63
61 args = [] 64 args = [ prog ]
62 args.append(prog) 65 args.append(input) # file containing a Galaxy table
63 args.append(input) 66 args.append(type_arg) # 1 for a gd_snp file, 0 for gd_genotype
64 args.append(lo) 67 args.append(lo_coverage) # lower bound on total coverage (< 0 means interpret as percentage)
65 args.append(hi) 68 args.append(hi_coverage) # upper bound on total coveraae (< 0 means interpret as percentage)
66 args.append(lo_ind) 69 args.append(low_ind_cov) # lower bound on individual coverage
67 args.append(lo_ind_qual) 70 args.append(low_quality) # lower bound on individual quality value
71 args.append(lo_genotypes) # lower bound on the number of defined genotypes
72 args.append(min_spacing) # lower bound on the spacing between SNPs
73 args.append(ref_chrom_col) # reference-chromosome column (base-1); ref position in next column
68 74
69 columns = p1.column_list() 75 columns = p1.column_list()
76 for column in sorted(columns):
77 args.append(column) # the starting columns (base-1) for the chosen individuals
70 78
71 for column in sorted(columns): 79 with open(output, 'w') as fh:
72 args.append(column) 80 gd_util.run_program(prog, args, stdout=fh)
73
74 fh = open(output, 'w')
75
76 #print "args:", ' '.join(args)
77 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
78 rc = p.wait()
79 fh.close()
80 81
81 sys.exit(0) 82 sys.exit(0)
82 83