Mercurial > repos > miller-lab > genome_diversity
annotate diversity_pi.py @ 26:91e835060ad2
Updates to Admixture, Aggregate Individuals, and Restore Attributes to support gd_genotype
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 03 Jun 2013 12:29:29 -0400 |
parents | 248b06e86022 |
children | 8997f2ca8c7a |
rev | line source |
---|---|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
2 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
3 import sys |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
4 import subprocess |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
5 from Population import Population |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
6 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
7 def run_program(prog, args, stdout_file=None, space_to_tab=False): |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
8 #print "args: ", ' '.join(args) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
9 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
10 (stdoutdata, stderrdata) = p.communicate() |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
11 rc = p.returncode |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
12 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
13 if stdout_file is not None: |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
14 with open(stdout_file, 'w') as ofh: |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
15 lines = stdoutdata.split('\n') |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
16 for line in lines: |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
17 line = line.strip() |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
18 if line: |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
19 if space_to_tab: |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
20 line = line.replace(' ', '\t') |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
21 print >> ofh, line |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
22 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
23 if rc != 0: |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
24 print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
25 print >> sys.stderr, stderrdata |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
26 sys.exit(1) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
27 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
28 ################################################################################ |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
29 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
30 if len(sys.argv) < 7: |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
31 print >> sys.stderr, "Usage" |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
32 sys.exit(1) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
33 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
34 snp_input, coverage_input, indiv_input, min_coverage, output = sys.argv[1:6] |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
35 individual_metadata = sys.argv[6:] |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
36 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
37 p_total = Population() |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
38 p_total.from_tag_list(individual_metadata) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
39 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
40 p1 = Population() |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
41 p1.from_population_file(indiv_input) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
42 if not p_total.is_superset(p1): |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
43 print >> sys.stderr, 'There is an individual in the population individuals that is not in the SNP table' |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
44 sys.exit(1) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
45 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
46 ################################################################################ |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
47 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
48 prog = 'mt_pi' |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
49 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
50 args = [ prog ] |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
51 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
52 args.append(snp_input) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
53 args.append(coverage_input) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
54 args.append(min_coverage) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
55 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
56 for tag in p1.tag_list(): |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
57 args.append(tag) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
58 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
59 run_program(prog, args, stdout_file=output) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
60 sys.exit(0) |