comparison env/lib/python3.9/site-packages/networkx/algorithms/community/centrality.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Functions for computing communities based on centrality notions."""
2
3 import networkx as nx
4
5 __all__ = ["girvan_newman"]
6
7
8 def girvan_newman(G, most_valuable_edge=None):
9 """Finds communities in a graph using the Girvan–Newman method.
10
11 Parameters
12 ----------
13 G : NetworkX graph
14
15 most_valuable_edge : function
16 Function that takes a graph as input and outputs an edge. The
17 edge returned by this function will be recomputed and removed at
18 each iteration of the algorithm.
19
20 If not specified, the edge with the highest
21 :func:`networkx.edge_betweenness_centrality` will be used.
22
23 Returns
24 -------
25 iterator
26 Iterator over tuples of sets of nodes in `G`. Each set of node
27 is a community, each tuple is a sequence of communities at a
28 particular level of the algorithm.
29
30 Examples
31 --------
32 To get the first pair of communities::
33
34 >>> G = nx.path_graph(10)
35 >>> comp = girvan_newman(G)
36 >>> tuple(sorted(c) for c in next(comp))
37 ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])
38
39 To get only the first *k* tuples of communities, use
40 :func:`itertools.islice`::
41
42 >>> import itertools
43 >>> G = nx.path_graph(8)
44 >>> k = 2
45 >>> comp = girvan_newman(G)
46 >>> for communities in itertools.islice(comp, k):
47 ... print(tuple(sorted(c) for c in communities)) # doctest: +SKIP
48 ...
49 ([0, 1, 2, 3], [4, 5, 6, 7])
50 ([0, 1], [2, 3], [4, 5, 6, 7])
51
52 To stop getting tuples of communities once the number of communities
53 is greater than *k*, use :func:`itertools.takewhile`::
54
55 >>> import itertools
56 >>> G = nx.path_graph(8)
57 >>> k = 4
58 >>> comp = girvan_newman(G)
59 >>> limited = itertools.takewhile(lambda c: len(c) <= k, comp)
60 >>> for communities in limited:
61 ... print(tuple(sorted(c) for c in communities)) # doctest: +SKIP
62 ...
63 ([0, 1, 2, 3], [4, 5, 6, 7])
64 ([0, 1], [2, 3], [4, 5, 6, 7])
65 ([0, 1], [2, 3], [4, 5], [6, 7])
66
67 To just choose an edge to remove based on the weight::
68
69 >>> from operator import itemgetter
70 >>> G = nx.path_graph(10)
71 >>> edges = G.edges()
72 >>> nx.set_edge_attributes(G, {(u, v): v for u, v in edges}, "weight")
73 >>> def heaviest(G):
74 ... u, v, w = max(G.edges(data="weight"), key=itemgetter(2))
75 ... return (u, v)
76 ...
77 >>> comp = girvan_newman(G, most_valuable_edge=heaviest)
78 >>> tuple(sorted(c) for c in next(comp))
79 ([0, 1, 2, 3, 4, 5, 6, 7, 8], [9])
80
81 To utilize edge weights when choosing an edge with, for example, the
82 highest betweenness centrality::
83
84 >>> from networkx import edge_betweenness_centrality as betweenness
85 >>> def most_central_edge(G):
86 ... centrality = betweenness(G, weight="weight")
87 ... return max(centrality, key=centrality.get)
88 ...
89 >>> G = nx.path_graph(10)
90 >>> comp = girvan_newman(G, most_valuable_edge=most_central_edge)
91 >>> tuple(sorted(c) for c in next(comp))
92 ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])
93
94 To specify a different ranking algorithm for edges, use the
95 `most_valuable_edge` keyword argument::
96
97 >>> from networkx import edge_betweenness_centrality
98 >>> from random import random
99 >>> def most_central_edge(G):
100 ... centrality = edge_betweenness_centrality(G)
101 ... max_cent = max(centrality.values())
102 ... # Scale the centrality values so they are between 0 and 1,
103 ... # and add some random noise.
104 ... centrality = {e: c / max_cent for e, c in centrality.items()}
105 ... # Add some random noise.
106 ... centrality = {e: c + random() for e, c in centrality.items()}
107 ... return max(centrality, key=centrality.get)
108 ...
109 >>> G = nx.path_graph(10)
110 >>> comp = girvan_newman(G, most_valuable_edge=most_central_edge)
111
112 Notes
113 -----
114 The Girvan–Newman algorithm detects communities by progressively
115 removing edges from the original graph. The algorithm removes the
116 "most valuable" edge, traditionally the edge with the highest
117 betweenness centrality, at each step. As the graph breaks down into
118 pieces, the tightly knit community structure is exposed and the
119 result can be depicted as a dendrogram.
120
121 """
122 # If the graph is already empty, simply return its connected
123 # components.
124 if G.number_of_edges() == 0:
125 yield tuple(nx.connected_components(G))
126 return
127 # If no function is provided for computing the most valuable edge,
128 # use the edge betweenness centrality.
129 if most_valuable_edge is None:
130
131 def most_valuable_edge(G):
132 """Returns the edge with the highest betweenness centrality
133 in the graph `G`.
134
135 """
136 # We have guaranteed that the graph is non-empty, so this
137 # dictionary will never be empty.
138 betweenness = nx.edge_betweenness_centrality(G)
139 return max(betweenness, key=betweenness.get)
140
141 # The copy of G here must include the edge weight data.
142 g = G.copy().to_undirected()
143 # Self-loops must be removed because their removal has no effect on
144 # the connected components of the graph.
145 g.remove_edges_from(nx.selfloop_edges(g))
146 while g.number_of_edges() > 0:
147 yield _without_most_central_edges(g, most_valuable_edge)
148
149
150 def _without_most_central_edges(G, most_valuable_edge):
151 """Returns the connected components of the graph that results from
152 repeatedly removing the most "valuable" edge in the graph.
153
154 `G` must be a non-empty graph. This function modifies the graph `G`
155 in-place; that is, it removes edges on the graph `G`.
156
157 `most_valuable_edge` is a function that takes the graph `G` as input
158 (or a subgraph with one or more edges of `G` removed) and returns an
159 edge. That edge will be removed and this process will be repeated
160 until the number of connected components in the graph increases.
161
162 """
163 original_num_components = nx.number_connected_components(G)
164 num_new_components = original_num_components
165 while num_new_components <= original_num_components:
166 edge = most_valuable_edge(G)
167 G.remove_edge(*edge)
168 new_components = tuple(nx.connected_components(G))
169 num_new_components = len(new_components)
170 return new_components