annotate draw_variants.py @ 32:03c22b722882

remove BeautifulSoup dependency
author Richard Burhans <burhans@bx.psu.edu>
date Fri, 20 Sep 2013 13:54:23 -0400
parents a631c2f6d913
children ea52b23f1141
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
1 #!/usr/bin/env python
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
2
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
3 import gd_util
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
4 import sys
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
5 from Population import Population
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
6
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
7 ################################################################################
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
8
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
9 def load_pop(file, wrapped_dict):
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
10 if file == '/dev/null':
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
11 pop = None
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
12 else:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
13 pop = Population()
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
14 pop.from_wrapped_dict(wrapped_dict)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
15 return pop
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
16
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
17 def append_tags(the_list, p, p_type, val):
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
18 if p is None:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
19 return
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
20 for tag in p.tag_list():
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
21 column, name = tag.split(':')
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
22 if p_type == 'gd_genotype':
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
23 column = int(column) - 2
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
24 the_list.append('{0}:{1}:{2}'.format(val, column, name))
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
25
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
26 ################################################################################
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
27
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
28 if len(sys.argv) != 11:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
29 gd_util.die('Usage')
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
30
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
32 snp_file, snp_ext, snp_arg, indiv_input, annotation_input, cov_file, cov_ext, cov_arg, min_coverage, output = sys.argv[1:]
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
33
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
34 p_snp = load_pop(snp_file, snp_arg)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
35 p_cov = load_pop(cov_file, cov_arg)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
36
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
37 if indiv_input == '/dev/null':
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
38 if p_snp is not None:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
39 p_ind = p_snp
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
40 elif p_cov is not None:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
41 p_ind = p_cov
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
42 else:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
43 p_ind = None
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
44 order_p_ind = True
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
45 else:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
46 p_ind = Population()
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
47 p_ind.from_population_file(indiv_input)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
48 order_p_ind = False
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
49
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
50 ## p ind must be from either p_snp or p_cov
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
51 if p_snp is not None and p_cov is not None:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
52 if not (p_snp.is_superset(p_ind) or p_cov.is_superset(p_ind)):
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
53 gd_util.die('There is an individual in the population individuals that is not in the SNP/Genotype or Coverage table')
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
54 elif p_snp is not None:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
55 if not p_snp.is_superset(p_ind):
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
56 gd_util.die('There is an individual in the population individuals that is not in the SNP/Genotype table')
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
57 elif p_cov is not None:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
58 if not p_cov.is_superset(p_ind):
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
59 gd_util.die('There is an individual in the population individuals that is not in the Coverage table')
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
60
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
61
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
62 ################################################################################
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
63
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
64 prog = 'mito_draw'
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
65
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
66 args = [ prog ]
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
67 args.append(snp_file)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
68 args.append(cov_file)
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
69 args.append(annotation_input)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
70 args.append(min_coverage)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
71
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
72 if order_p_ind:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
73 for column in sorted(p_ind.column_list()):
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
74 individual = p_ind.individual_with_column(column)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
75 name = individual.name.split()[0]
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
76 args.append('{0}:{1}:{2}'.format(0, column, name))
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
77 else:
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
78 append_tags(args, p_ind, 'gd_indivs', 0)
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
79
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
80 append_tags(args, p_snp, snp_ext, 1)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
81 append_tags(args, p_cov, cov_ext, 2)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
82
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
83 with open('Ji.spec', 'w') as fh:
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
84 gd_util.run_program(prog, args, stdout=fh)
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
85
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
86 ################################################################################
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
87
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
88 prog = 'varplot'
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
89
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
90 args = [ prog ]
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
91 args.append('-w')
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
92 args.append(3)
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
93 args.append('-s')
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
94 args.append(0.3)
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
95 args.append('-g')
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
96 args.append(0.2)
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
97 args.append('Ji.spec')
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
98
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
99 with open('Ji.svg', 'w') as fh:
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
100 gd_util.run_program(prog, args, stdout=fh)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
101
31
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
102 ################################################################################
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
103
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
104 prog = 'convert'
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
105
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
106 args = [ prog ]
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
107 args.append('-density')
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
108 args.append(100)
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
109 args.append('Ji.svg')
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
110 args.append('tiff:{0}'.format(output))
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
111
a631c2f6d913 Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents: 27
diff changeset
112 gd_util.run_program(prog, args)
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
113 sys.exit(0)