annotate diversity_pi.py @ 32:03c22b722882

remove BeautifulSoup dependency
author Richard Burhans <burhans@bx.psu.edu>
date Fri, 20 Sep 2013 13:54:23 -0400
parents a631c2f6d913
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
1 #!/usr/bin/env python
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
2
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
3 import gd_util
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
4 import sys
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
5 from Population import Population
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
6
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
7 ################################################################################
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
8
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
9 def load_pop(file, wrapped_dict):
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
10 if file == '/dev/null':
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
11 pop = None
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
12 else:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
13 pop = Population()
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
14 pop.from_wrapped_dict(wrapped_dict)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
15 return pop
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
16
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
17 def append_tags(the_list, p, p_type, val):
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
18 if p is None:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
19 return
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
20 for tag in p.tag_list():
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
21 column, name = tag.split(':')
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
22 if p_type == 'gd_genotype':
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
23 column = int(column) - 2
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
24 the_list.append('{0}:{1}:{2}'.format(val, column, name))
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
25
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
26 ################################################################################
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
27
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
28 if len(sys.argv) != 11:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
29 gd_util.die('Usage')
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
30
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
31 snp_input, snp_ext, snp_arg, cov_input, cov_ext, cov_arg, indiv_input, min_coverage, req_thresh, output = sys.argv[1:]
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
32
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
33 p_snp = load_pop(snp_input, snp_arg)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
34 p_cov = load_pop(cov_input, cov_arg)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
35
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
36 p_ind = Population()
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
37 p_ind.from_population_file(indiv_input)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
38
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
39 if not p_snp.is_superset(p_ind):
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
40 gd_util.die('There is an individual in the population individuals that is not in the SNP/Genotype table')
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
41
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
42 if p_cov is not None and (not p_cov.is_superset(p_ind)):
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
43 gd_util.die('There is an individual in the population individuals that is not in the Coverage table')
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
44
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
45 ################################################################################
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
46
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
47 prog = 'mito_pi'
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
48
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
49 args = [ prog ]
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
50 args.append(snp_input)
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
51 args.append(cov_input)
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
52 args.append(min_coverage)
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
53 args.append(req_thresh)
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
54
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
55 append_tags(args, p_ind, 'gd_indivs', 0)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
56 append_tags(args, p_snp, snp_ext, 1)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
57 append_tags(args, p_cov, cov_ext, 2)
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
58
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
59 with open(output, 'w') as fh:
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
60 gd_util.run_program(prog, args, stdout=fh)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
61
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
62 sys.exit(0)
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
63