Mercurial > repos > miller-lab > genome_diversity
comparison assignment_of_optimal_breeding_pairs.py @ 31:a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 20 Sep 2013 13:25:27 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
30:4188853b940b | 31:a631c2f6d913 |
---|---|
1 #!/usr/bin/env python2.6 | |
2 | |
3 import sys | |
4 import munkres | |
5 import random | |
6 | |
7 class Vertex(object): | |
8 def __init__(self, name): | |
9 self.name = name | |
10 self.neighbors = {} | |
11 self.color = 0 | |
12 self.explored = False | |
13 | |
14 def add_neighbor(self, neighbor, weight=0.0): | |
15 if neighbor in self.neighbors: | |
16 if self.neighbors[neighbor] != weight: | |
17 die('multiple edges not supported') | |
18 else: | |
19 self.neighbors[neighbor] = weight | |
20 | |
21 class Graph(object): | |
22 def __init__(self): | |
23 self.vertex_list = {} | |
24 self.vertices = 0 | |
25 self.max_weight = 0.0 | |
26 | |
27 def add_vertex(self, name): | |
28 if name not in self.vertex_list: | |
29 self.vertex_list[name] = Vertex(name) | |
30 self.vertices += 1 | |
31 return self.vertex_list[name] | |
32 | |
33 def add_edge(self, name1, name2, weight): | |
34 vertex1 = self.add_vertex(name1) | |
35 vertex2 = self.add_vertex(name2) | |
36 vertex1.add_neighbor(vertex2, weight) | |
37 vertex2.add_neighbor(vertex1, weight) | |
38 self.max_weight = max(self.max_weight, weight) | |
39 | |
40 def from_edge_file(self, filename): | |
41 fh = try_open(filename) | |
42 line_number = 0 | |
43 for line in fh: | |
44 line_number += 1 | |
45 line = line.rstrip('\r\n') | |
46 elems = line.split() | |
47 if len(elems) < 3: | |
48 die('too few columns on line {0} of {1}:\n{2}'.format(line_number, filename, line)) | |
49 name1 = elems[0] | |
50 name2 = elems[1] | |
51 weight = float_value(elems[2]) | |
52 if weight is None: | |
53 die('invalid weight on line {0} of {1}:\n{2}'.format(line_number, filename, line)) | |
54 self.add_edge(name1, name2, weight) | |
55 fh.close() | |
56 | |
57 def bipartite_partition(self): | |
58 vertices_left = self.vertex_list.values() | |
59 | |
60 while vertices_left: | |
61 fifo = [vertices_left[0]] | |
62 while fifo: | |
63 vertex = fifo.pop(0) | |
64 if not vertex.explored: | |
65 vertex.explored = True | |
66 vertices_left.remove(vertex) | |
67 | |
68 if vertex.color == 0: | |
69 vertex.color = 1 | |
70 neighbor_color = 2 | |
71 elif vertex.color == 1: | |
72 neighbor_color = 2 | |
73 elif vertex.color == 2: | |
74 neighbor_color = 1 | |
75 | |
76 for neighbor in vertex.neighbors: | |
77 if neighbor.color == 0: | |
78 neighbor.color = neighbor_color | |
79 elif neighbor.color != neighbor_color: | |
80 return None, None | |
81 fifo.append(neighbor) | |
82 | |
83 c1 = [] | |
84 c2 = [] | |
85 | |
86 for vertex in self.vertex_list.values(): | |
87 if vertex.color == 1: | |
88 c1.append(vertex) | |
89 elif vertex.color == 2: | |
90 c2.append(vertex) | |
91 | |
92 return c1, c2 | |
93 | |
94 def try_open(*args): | |
95 try: | |
96 return open(*args) | |
97 except IOError: | |
98 die('Failed opening file: {0}'.format(args[0])) | |
99 | |
100 def float_value(token): | |
101 try: | |
102 return float(token) | |
103 except ValueError: | |
104 return None | |
105 | |
106 def die(message): | |
107 print >> sys.stderr, message | |
108 sys.exit(1) | |
109 | |
110 def main(input, randomizations, output): | |
111 graph = Graph() | |
112 graph.from_edge_file(input) | |
113 c1, c2 = graph.bipartite_partition() | |
114 | |
115 if c1 is None: | |
116 die('Graph is not bipartite') | |
117 | |
118 if len(c1) + len(c2) != graph.vertices: | |
119 die('Bipartite partition failed: {0} + {1} != {2}'.format(len(c1), len(c2), graph.vertices)) | |
120 | |
121 with open(output, 'w') as ofh: | |
122 a1 = optimal_assignment(c1, c2, graph.max_weight) | |
123 optimal_total_weight = 0.0 | |
124 for a in a1: | |
125 optimal_total_weight += a[0].neighbors[a[1]] | |
126 | |
127 print >> ofh, 'optimal average {0:.3f}'.format(optimal_total_weight / len(a1)) | |
128 | |
129 if randomizations > 0: | |
130 random_total_count = 0 | |
131 random_total_weight = 0.0 | |
132 for i in range(randomizations): | |
133 a2 = random_assignment(c1, c2) | |
134 random_total_count += len(a2) | |
135 for a in a2: | |
136 random_total_weight += a[0].neighbors[a[1]] | |
137 print >> ofh, 'random average {0:.3f}'.format(random_total_weight / random_total_count) | |
138 | |
139 | |
140 for a in a1: | |
141 print >> ofh, '\t'.join([a[0].name, a[1].name]) | |
142 | |
143 def optimal_assignment(c1, c2, max_weight): | |
144 matrix = [] | |
145 assignment = [] | |
146 | |
147 for v1 in c1: | |
148 row = [] | |
149 for v2 in c2: | |
150 row.append(max_weight + 1.0 - v1.neighbors[v2]) | |
151 matrix.append(row) | |
152 | |
153 m = munkres.Munkres() | |
154 indexes = m.compute(matrix) | |
155 for row, column in indexes: | |
156 assignment.append([c1[row], c2[column]]) | |
157 | |
158 return assignment | |
159 | |
160 def random_assignment(c1, c2): | |
161 assignment = [] | |
162 | |
163 ## note, this assumes that graph is complete bipartite | |
164 ## this needs to be fixed | |
165 c1_len = len(c1) | |
166 c2_len = len(c2) | |
167 idx_list = list(range(max(c1_len, c2_len))) | |
168 random.shuffle(idx_list) | |
169 | |
170 if c1_len <= c2_len: | |
171 for i, v1 in enumerate(c1): | |
172 assignment.append([v1, c2[idx_list[i]]]) | |
173 else: | |
174 for i, v1 in enumerate(c2): | |
175 assignment.append([v1, c1[idx_list[i]]]) | |
176 | |
177 return assignment | |
178 | |
179 ################################################################################ | |
180 | |
181 if len(sys.argv) != 4: | |
182 die('Usage') | |
183 | |
184 input, randomizations, output = sys.argv[1:] | |
185 main(input, int(randomizations), output) |