comparison assignment_of_optimal_breeding_pairs.py @ 31:a631c2f6d913

Update to Miller Lab devshed revision 3c4110ffacc3
author Richard Burhans <burhans@bx.psu.edu>
date Fri, 20 Sep 2013 13:25:27 -0400
parents
children
comparison
equal deleted inserted replaced
30:4188853b940b 31:a631c2f6d913
1 #!/usr/bin/env python2.6
2
3 import sys
4 import munkres
5 import random
6
7 class Vertex(object):
8 def __init__(self, name):
9 self.name = name
10 self.neighbors = {}
11 self.color = 0
12 self.explored = False
13
14 def add_neighbor(self, neighbor, weight=0.0):
15 if neighbor in self.neighbors:
16 if self.neighbors[neighbor] != weight:
17 die('multiple edges not supported')
18 else:
19 self.neighbors[neighbor] = weight
20
21 class Graph(object):
22 def __init__(self):
23 self.vertex_list = {}
24 self.vertices = 0
25 self.max_weight = 0.0
26
27 def add_vertex(self, name):
28 if name not in self.vertex_list:
29 self.vertex_list[name] = Vertex(name)
30 self.vertices += 1
31 return self.vertex_list[name]
32
33 def add_edge(self, name1, name2, weight):
34 vertex1 = self.add_vertex(name1)
35 vertex2 = self.add_vertex(name2)
36 vertex1.add_neighbor(vertex2, weight)
37 vertex2.add_neighbor(vertex1, weight)
38 self.max_weight = max(self.max_weight, weight)
39
40 def from_edge_file(self, filename):
41 fh = try_open(filename)
42 line_number = 0
43 for line in fh:
44 line_number += 1
45 line = line.rstrip('\r\n')
46 elems = line.split()
47 if len(elems) < 3:
48 die('too few columns on line {0} of {1}:\n{2}'.format(line_number, filename, line))
49 name1 = elems[0]
50 name2 = elems[1]
51 weight = float_value(elems[2])
52 if weight is None:
53 die('invalid weight on line {0} of {1}:\n{2}'.format(line_number, filename, line))
54 self.add_edge(name1, name2, weight)
55 fh.close()
56
57 def bipartite_partition(self):
58 vertices_left = self.vertex_list.values()
59
60 while vertices_left:
61 fifo = [vertices_left[0]]
62 while fifo:
63 vertex = fifo.pop(0)
64 if not vertex.explored:
65 vertex.explored = True
66 vertices_left.remove(vertex)
67
68 if vertex.color == 0:
69 vertex.color = 1
70 neighbor_color = 2
71 elif vertex.color == 1:
72 neighbor_color = 2
73 elif vertex.color == 2:
74 neighbor_color = 1
75
76 for neighbor in vertex.neighbors:
77 if neighbor.color == 0:
78 neighbor.color = neighbor_color
79 elif neighbor.color != neighbor_color:
80 return None, None
81 fifo.append(neighbor)
82
83 c1 = []
84 c2 = []
85
86 for vertex in self.vertex_list.values():
87 if vertex.color == 1:
88 c1.append(vertex)
89 elif vertex.color == 2:
90 c2.append(vertex)
91
92 return c1, c2
93
94 def try_open(*args):
95 try:
96 return open(*args)
97 except IOError:
98 die('Failed opening file: {0}'.format(args[0]))
99
100 def float_value(token):
101 try:
102 return float(token)
103 except ValueError:
104 return None
105
106 def die(message):
107 print >> sys.stderr, message
108 sys.exit(1)
109
110 def main(input, randomizations, output):
111 graph = Graph()
112 graph.from_edge_file(input)
113 c1, c2 = graph.bipartite_partition()
114
115 if c1 is None:
116 die('Graph is not bipartite')
117
118 if len(c1) + len(c2) != graph.vertices:
119 die('Bipartite partition failed: {0} + {1} != {2}'.format(len(c1), len(c2), graph.vertices))
120
121 with open(output, 'w') as ofh:
122 a1 = optimal_assignment(c1, c2, graph.max_weight)
123 optimal_total_weight = 0.0
124 for a in a1:
125 optimal_total_weight += a[0].neighbors[a[1]]
126
127 print >> ofh, 'optimal average {0:.3f}'.format(optimal_total_weight / len(a1))
128
129 if randomizations > 0:
130 random_total_count = 0
131 random_total_weight = 0.0
132 for i in range(randomizations):
133 a2 = random_assignment(c1, c2)
134 random_total_count += len(a2)
135 for a in a2:
136 random_total_weight += a[0].neighbors[a[1]]
137 print >> ofh, 'random average {0:.3f}'.format(random_total_weight / random_total_count)
138
139
140 for a in a1:
141 print >> ofh, '\t'.join([a[0].name, a[1].name])
142
143 def optimal_assignment(c1, c2, max_weight):
144 matrix = []
145 assignment = []
146
147 for v1 in c1:
148 row = []
149 for v2 in c2:
150 row.append(max_weight + 1.0 - v1.neighbors[v2])
151 matrix.append(row)
152
153 m = munkres.Munkres()
154 indexes = m.compute(matrix)
155 for row, column in indexes:
156 assignment.append([c1[row], c2[column]])
157
158 return assignment
159
160 def random_assignment(c1, c2):
161 assignment = []
162
163 ## note, this assumes that graph is complete bipartite
164 ## this needs to be fixed
165 c1_len = len(c1)
166 c2_len = len(c2)
167 idx_list = list(range(max(c1_len, c2_len)))
168 random.shuffle(idx_list)
169
170 if c1_len <= c2_len:
171 for i, v1 in enumerate(c1):
172 assignment.append([v1, c2[idx_list[i]]])
173 else:
174 for i, v1 in enumerate(c2):
175 assignment.append([v1, c1[idx_list[i]]])
176
177 return assignment
178
179 ################################################################################
180
181 if len(sys.argv) != 4:
182 die('Usage')
183
184 input, randomizations, output = sys.argv[1:]
185 main(input, int(randomizations), output)