annotate average_fst.py @ 24:248b06e86022

Added gd_genotype datatype. Modified tools to support new datatype.
author Richard Burhans <burhans@bx.psu.edu>
date Tue, 28 May 2013 16:24:19 -0400
parents f04f40a36cc8
children 8997f2ca8c7a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
3 import sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4 import subprocess
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 from Population import Population
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
9 if len(sys.argv) < 12:
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10 print >> sys.stderr, "Usage"
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
13 input, p1_input, p2_input, input_type, data_source, min_total_count, discard_fixed, output, shuffles, p0_input = sys.argv[1:11]
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
14 individual_metadata = sys.argv[11:]
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16 try:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17 shuffle_count = int(shuffles)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 except:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19 shuffle_count = 0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21 p_total = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
22 p_total.from_tag_list(individual_metadata)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24 p1 = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
25 p1.from_population_file(p1_input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
26 if not p_total.is_superset(p1):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
27 print >> sys.stderr, 'There is an individual in population 1 that is not in the SNP table'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
30 p2 = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31 p2.from_population_file(p2_input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32 if not p_total.is_superset(p2):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33 print >> sys.stderr, 'There is an individual in population 2 that is not in the SNP table'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
34 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36 p0 = None
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37 if shuffle_count > 0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38 p0 = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
39 p0.from_population_file(p0_input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
40 if not p_total.is_superset(p0):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41 print >> sys.stderr, 'There is an individual in population 0 that is not in the SNP table'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46 prog = 'Fst_ave'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48 args = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49 args.append(prog)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50 args.append(input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51 args.append(data_source)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52 args.append(min_total_count)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53 args.append(discard_fixed)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 args.append(shuffles)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
55
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
56 columns = p1.column_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
57 for column in columns:
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
58 if input_type == 'gd_genotype':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
59 column = int(column) - 2
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
60 args.append('{0}:1'.format(column))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
61
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
62 columns = p2.column_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
63 for column in columns:
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
64 if input_type == 'gd_genotype':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
65 column = int(column) - 2
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
66 args.append('{0}:2'.format(column))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
67
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
68 if p0 is not None:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
69 columns = p0.column_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
70 for column in columns:
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
71 if input_type == 'gd_genotype':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 18
diff changeset
72 column = int(column) - 2
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
73 args.append('{0}:0'.format(column))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
74
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
75 fh = open(output, 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
76
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
77 #print "args:", ' '.join(args)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
78 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
79 rc = p.wait()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
80 fh.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
81
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
82 sys.exit(0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
83