Mercurial > repos > miller-lab > genome_diversity
diff assignment_of_optimal_breeding_pairs.py @ 31:a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 20 Sep 2013 13:25:27 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/assignment_of_optimal_breeding_pairs.py Fri Sep 20 13:25:27 2013 -0400 @@ -0,0 +1,185 @@ +#!/usr/bin/env python2.6 + +import sys +import munkres +import random + +class Vertex(object): + def __init__(self, name): + self.name = name + self.neighbors = {} + self.color = 0 + self.explored = False + + def add_neighbor(self, neighbor, weight=0.0): + if neighbor in self.neighbors: + if self.neighbors[neighbor] != weight: + die('multiple edges not supported') + else: + self.neighbors[neighbor] = weight + +class Graph(object): + def __init__(self): + self.vertex_list = {} + self.vertices = 0 + self.max_weight = 0.0 + + def add_vertex(self, name): + if name not in self.vertex_list: + self.vertex_list[name] = Vertex(name) + self.vertices += 1 + return self.vertex_list[name] + + def add_edge(self, name1, name2, weight): + vertex1 = self.add_vertex(name1) + vertex2 = self.add_vertex(name2) + vertex1.add_neighbor(vertex2, weight) + vertex2.add_neighbor(vertex1, weight) + self.max_weight = max(self.max_weight, weight) + + def from_edge_file(self, filename): + fh = try_open(filename) + line_number = 0 + for line in fh: + line_number += 1 + line = line.rstrip('\r\n') + elems = line.split() + if len(elems) < 3: + die('too few columns on line {0} of {1}:\n{2}'.format(line_number, filename, line)) + name1 = elems[0] + name2 = elems[1] + weight = float_value(elems[2]) + if weight is None: + die('invalid weight on line {0} of {1}:\n{2}'.format(line_number, filename, line)) + self.add_edge(name1, name2, weight) + fh.close() + + def bipartite_partition(self): + vertices_left = self.vertex_list.values() + + while vertices_left: + fifo = [vertices_left[0]] + while fifo: + vertex = fifo.pop(0) + if not vertex.explored: + vertex.explored = True + vertices_left.remove(vertex) + + if vertex.color == 0: + vertex.color = 1 + neighbor_color = 2 + elif vertex.color == 1: + neighbor_color = 2 + elif vertex.color == 2: + neighbor_color = 1 + + for neighbor in vertex.neighbors: + if neighbor.color == 0: + neighbor.color = neighbor_color + elif neighbor.color != neighbor_color: + return None, None + fifo.append(neighbor) + + c1 = [] + c2 = [] + + for vertex in self.vertex_list.values(): + if vertex.color == 1: + c1.append(vertex) + elif vertex.color == 2: + c2.append(vertex) + + return c1, c2 + +def try_open(*args): + try: + return open(*args) + except IOError: + die('Failed opening file: {0}'.format(args[0])) + +def float_value(token): + try: + return float(token) + except ValueError: + return None + +def die(message): + print >> sys.stderr, message + sys.exit(1) + +def main(input, randomizations, output): + graph = Graph() + graph.from_edge_file(input) + c1, c2 = graph.bipartite_partition() + + if c1 is None: + die('Graph is not bipartite') + + if len(c1) + len(c2) != graph.vertices: + die('Bipartite partition failed: {0} + {1} != {2}'.format(len(c1), len(c2), graph.vertices)) + + with open(output, 'w') as ofh: + a1 = optimal_assignment(c1, c2, graph.max_weight) + optimal_total_weight = 0.0 + for a in a1: + optimal_total_weight += a[0].neighbors[a[1]] + + print >> ofh, 'optimal average {0:.3f}'.format(optimal_total_weight / len(a1)) + + if randomizations > 0: + random_total_count = 0 + random_total_weight = 0.0 + for i in range(randomizations): + a2 = random_assignment(c1, c2) + random_total_count += len(a2) + for a in a2: + random_total_weight += a[0].neighbors[a[1]] + print >> ofh, 'random average {0:.3f}'.format(random_total_weight / random_total_count) + + + for a in a1: + print >> ofh, '\t'.join([a[0].name, a[1].name]) + +def optimal_assignment(c1, c2, max_weight): + matrix = [] + assignment = [] + + for v1 in c1: + row = [] + for v2 in c2: + row.append(max_weight + 1.0 - v1.neighbors[v2]) + matrix.append(row) + + m = munkres.Munkres() + indexes = m.compute(matrix) + for row, column in indexes: + assignment.append([c1[row], c2[column]]) + + return assignment + +def random_assignment(c1, c2): + assignment = [] + + ## note, this assumes that graph is complete bipartite + ## this needs to be fixed + c1_len = len(c1) + c2_len = len(c2) + idx_list = list(range(max(c1_len, c2_len))) + random.shuffle(idx_list) + + if c1_len <= c2_len: + for i, v1 in enumerate(c1): + assignment.append([v1, c2[idx_list[i]]]) + else: + for i, v1 in enumerate(c2): + assignment.append([v1, c1[idx_list[i]]]) + + return assignment + +################################################################################ + +if len(sys.argv) != 4: + die('Usage') + +input, randomizations, output = sys.argv[1:] +main(input, int(randomizations), output)