Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/community/centrality.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 """Functions for computing communities based on centrality notions.""" | |
| 2 | |
| 3 import networkx as nx | |
| 4 | |
| 5 __all__ = ["girvan_newman"] | |
| 6 | |
| 7 | |
| 8 def girvan_newman(G, most_valuable_edge=None): | |
| 9 """Finds communities in a graph using the Girvan–Newman method. | |
| 10 | |
| 11 Parameters | |
| 12 ---------- | |
| 13 G : NetworkX graph | |
| 14 | |
| 15 most_valuable_edge : function | |
| 16 Function that takes a graph as input and outputs an edge. The | |
| 17 edge returned by this function will be recomputed and removed at | |
| 18 each iteration of the algorithm. | |
| 19 | |
| 20 If not specified, the edge with the highest | |
| 21 :func:`networkx.edge_betweenness_centrality` will be used. | |
| 22 | |
| 23 Returns | |
| 24 ------- | |
| 25 iterator | |
| 26 Iterator over tuples of sets of nodes in `G`. Each set of node | |
| 27 is a community, each tuple is a sequence of communities at a | |
| 28 particular level of the algorithm. | |
| 29 | |
| 30 Examples | |
| 31 -------- | |
| 32 To get the first pair of communities:: | |
| 33 | |
| 34 >>> G = nx.path_graph(10) | |
| 35 >>> comp = girvan_newman(G) | |
| 36 >>> tuple(sorted(c) for c in next(comp)) | |
| 37 ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]) | |
| 38 | |
| 39 To get only the first *k* tuples of communities, use | |
| 40 :func:`itertools.islice`:: | |
| 41 | |
| 42 >>> import itertools | |
| 43 >>> G = nx.path_graph(8) | |
| 44 >>> k = 2 | |
| 45 >>> comp = girvan_newman(G) | |
| 46 >>> for communities in itertools.islice(comp, k): | |
| 47 ... print(tuple(sorted(c) for c in communities)) # doctest: +SKIP | |
| 48 ... | |
| 49 ([0, 1, 2, 3], [4, 5, 6, 7]) | |
| 50 ([0, 1], [2, 3], [4, 5, 6, 7]) | |
| 51 | |
| 52 To stop getting tuples of communities once the number of communities | |
| 53 is greater than *k*, use :func:`itertools.takewhile`:: | |
| 54 | |
| 55 >>> import itertools | |
| 56 >>> G = nx.path_graph(8) | |
| 57 >>> k = 4 | |
| 58 >>> comp = girvan_newman(G) | |
| 59 >>> limited = itertools.takewhile(lambda c: len(c) <= k, comp) | |
| 60 >>> for communities in limited: | |
| 61 ... print(tuple(sorted(c) for c in communities)) # doctest: +SKIP | |
| 62 ... | |
| 63 ([0, 1, 2, 3], [4, 5, 6, 7]) | |
| 64 ([0, 1], [2, 3], [4, 5, 6, 7]) | |
| 65 ([0, 1], [2, 3], [4, 5], [6, 7]) | |
| 66 | |
| 67 To just choose an edge to remove based on the weight:: | |
| 68 | |
| 69 >>> from operator import itemgetter | |
| 70 >>> G = nx.path_graph(10) | |
| 71 >>> edges = G.edges() | |
| 72 >>> nx.set_edge_attributes(G, {(u, v): v for u, v in edges}, "weight") | |
| 73 >>> def heaviest(G): | |
| 74 ... u, v, w = max(G.edges(data="weight"), key=itemgetter(2)) | |
| 75 ... return (u, v) | |
| 76 ... | |
| 77 >>> comp = girvan_newman(G, most_valuable_edge=heaviest) | |
| 78 >>> tuple(sorted(c) for c in next(comp)) | |
| 79 ([0, 1, 2, 3, 4, 5, 6, 7, 8], [9]) | |
| 80 | |
| 81 To utilize edge weights when choosing an edge with, for example, the | |
| 82 highest betweenness centrality:: | |
| 83 | |
| 84 >>> from networkx import edge_betweenness_centrality as betweenness | |
| 85 >>> def most_central_edge(G): | |
| 86 ... centrality = betweenness(G, weight="weight") | |
| 87 ... return max(centrality, key=centrality.get) | |
| 88 ... | |
| 89 >>> G = nx.path_graph(10) | |
| 90 >>> comp = girvan_newman(G, most_valuable_edge=most_central_edge) | |
| 91 >>> tuple(sorted(c) for c in next(comp)) | |
| 92 ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]) | |
| 93 | |
| 94 To specify a different ranking algorithm for edges, use the | |
| 95 `most_valuable_edge` keyword argument:: | |
| 96 | |
| 97 >>> from networkx import edge_betweenness_centrality | |
| 98 >>> from random import random | |
| 99 >>> def most_central_edge(G): | |
| 100 ... centrality = edge_betweenness_centrality(G) | |
| 101 ... max_cent = max(centrality.values()) | |
| 102 ... # Scale the centrality values so they are between 0 and 1, | |
| 103 ... # and add some random noise. | |
| 104 ... centrality = {e: c / max_cent for e, c in centrality.items()} | |
| 105 ... # Add some random noise. | |
| 106 ... centrality = {e: c + random() for e, c in centrality.items()} | |
| 107 ... return max(centrality, key=centrality.get) | |
| 108 ... | |
| 109 >>> G = nx.path_graph(10) | |
| 110 >>> comp = girvan_newman(G, most_valuable_edge=most_central_edge) | |
| 111 | |
| 112 Notes | |
| 113 ----- | |
| 114 The Girvan–Newman algorithm detects communities by progressively | |
| 115 removing edges from the original graph. The algorithm removes the | |
| 116 "most valuable" edge, traditionally the edge with the highest | |
| 117 betweenness centrality, at each step. As the graph breaks down into | |
| 118 pieces, the tightly knit community structure is exposed and the | |
| 119 result can be depicted as a dendrogram. | |
| 120 | |
| 121 """ | |
| 122 # If the graph is already empty, simply return its connected | |
| 123 # components. | |
| 124 if G.number_of_edges() == 0: | |
| 125 yield tuple(nx.connected_components(G)) | |
| 126 return | |
| 127 # If no function is provided for computing the most valuable edge, | |
| 128 # use the edge betweenness centrality. | |
| 129 if most_valuable_edge is None: | |
| 130 | |
| 131 def most_valuable_edge(G): | |
| 132 """Returns the edge with the highest betweenness centrality | |
| 133 in the graph `G`. | |
| 134 | |
| 135 """ | |
| 136 # We have guaranteed that the graph is non-empty, so this | |
| 137 # dictionary will never be empty. | |
| 138 betweenness = nx.edge_betweenness_centrality(G) | |
| 139 return max(betweenness, key=betweenness.get) | |
| 140 | |
| 141 # The copy of G here must include the edge weight data. | |
| 142 g = G.copy().to_undirected() | |
| 143 # Self-loops must be removed because their removal has no effect on | |
| 144 # the connected components of the graph. | |
| 145 g.remove_edges_from(nx.selfloop_edges(g)) | |
| 146 while g.number_of_edges() > 0: | |
| 147 yield _without_most_central_edges(g, most_valuable_edge) | |
| 148 | |
| 149 | |
| 150 def _without_most_central_edges(G, most_valuable_edge): | |
| 151 """Returns the connected components of the graph that results from | |
| 152 repeatedly removing the most "valuable" edge in the graph. | |
| 153 | |
| 154 `G` must be a non-empty graph. This function modifies the graph `G` | |
| 155 in-place; that is, it removes edges on the graph `G`. | |
| 156 | |
| 157 `most_valuable_edge` is a function that takes the graph `G` as input | |
| 158 (or a subgraph with one or more edges of `G` removed) and returns an | |
| 159 edge. That edge will be removed and this process will be repeated | |
| 160 until the number of connected components in the graph increases. | |
| 161 | |
| 162 """ | |
| 163 original_num_components = nx.number_connected_components(G) | |
| 164 num_new_components = original_num_components | |
| 165 while num_new_components <= original_num_components: | |
| 166 edge = most_valuable_edge(G) | |
| 167 G.remove_edge(*edge) | |
| 168 new_components = tuple(nx.connected_components(G)) | |
| 169 num_new_components = len(new_components) | |
| 170 return new_components |
