comparison env/lib/python3.9/site-packages/networkx/algorithms/clique.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Functions for finding and manipulating cliques.
2
3 Finding the largest clique in a graph is NP-complete problem, so most of
4 these algorithms have an exponential running time; for more information,
5 see the Wikipedia article on the clique problem [1]_.
6
7 .. [1] clique problem:: https://en.wikipedia.org/wiki/Clique_problem
8
9 """
10 from collections import deque
11 from itertools import chain
12 from itertools import combinations
13 from itertools import islice
14 import networkx as nx
15 from networkx.utils import not_implemented_for
16
17
18 __all__ = [
19 "find_cliques",
20 "find_cliques_recursive",
21 "make_max_clique_graph",
22 "make_clique_bipartite",
23 "graph_clique_number",
24 "graph_number_of_cliques",
25 "node_clique_number",
26 "number_of_cliques",
27 "cliques_containing_node",
28 "enumerate_all_cliques",
29 "max_weight_clique",
30 ]
31
32
33 @not_implemented_for("directed")
34 def enumerate_all_cliques(G):
35 """Returns all cliques in an undirected graph.
36
37 This function returns an iterator over cliques, each of which is a
38 list of nodes. The iteration is ordered by cardinality of the
39 cliques: first all cliques of size one, then all cliques of size
40 two, etc.
41
42 Parameters
43 ----------
44 G : NetworkX graph
45 An undirected graph.
46
47 Returns
48 -------
49 iterator
50 An iterator over cliques, each of which is a list of nodes in
51 `G`. The cliques are ordered according to size.
52
53 Notes
54 -----
55 To obtain a list of all cliques, use
56 `list(enumerate_all_cliques(G))`. However, be aware that in the
57 worst-case, the length of this list can be exponential in the number
58 of nodes in the graph (for example, when the graph is the complete
59 graph). This function avoids storing all cliques in memory by only
60 keeping current candidate node lists in memory during its search.
61
62 The implementation is adapted from the algorithm by Zhang, et
63 al. (2005) [1]_ to output all cliques discovered.
64
65 This algorithm ignores self-loops and parallel edges, since cliques
66 are not conventionally defined with such edges.
67
68 References
69 ----------
70 .. [1] Yun Zhang, Abu-Khzam, F.N., Baldwin, N.E., Chesler, E.J.,
71 Langston, M.A., Samatova, N.F.,
72 "Genome-Scale Computational Approaches to Memory-Intensive
73 Applications in Systems Biology".
74 *Supercomputing*, 2005. Proceedings of the ACM/IEEE SC 2005
75 Conference, pp. 12, 12--18 Nov. 2005.
76 <https://doi.org/10.1109/SC.2005.29>.
77
78 """
79 index = {}
80 nbrs = {}
81 for u in G:
82 index[u] = len(index)
83 # Neighbors of u that appear after u in the iteration order of G.
84 nbrs[u] = {v for v in G[u] if v not in index}
85
86 queue = deque(([u], sorted(nbrs[u], key=index.__getitem__)) for u in G)
87 # Loop invariants:
88 # 1. len(base) is nondecreasing.
89 # 2. (base + cnbrs) is sorted with respect to the iteration order of G.
90 # 3. cnbrs is a set of common neighbors of nodes in base.
91 while queue:
92 base, cnbrs = map(list, queue.popleft())
93 yield base
94 for i, u in enumerate(cnbrs):
95 # Use generators to reduce memory consumption.
96 queue.append(
97 (
98 chain(base, [u]),
99 filter(nbrs[u].__contains__, islice(cnbrs, i + 1, None)),
100 )
101 )
102
103
104 @not_implemented_for("directed")
105 def find_cliques(G):
106 """Returns all maximal cliques in an undirected graph.
107
108 For each node *v*, a *maximal clique for v* is a largest complete
109 subgraph containing *v*. The largest maximal clique is sometimes
110 called the *maximum clique*.
111
112 This function returns an iterator over cliques, each of which is a
113 list of nodes. It is an iterative implementation, so should not
114 suffer from recursion depth issues.
115
116 Parameters
117 ----------
118 G : NetworkX graph
119 An undirected graph.
120
121 Returns
122 -------
123 iterator
124 An iterator over maximal cliques, each of which is a list of
125 nodes in `G`. The order of cliques is arbitrary.
126
127 See Also
128 --------
129 find_cliques_recursive
130 A recursive version of the same algorithm.
131
132 Notes
133 -----
134 To obtain a list of all maximal cliques, use
135 `list(find_cliques(G))`. However, be aware that in the worst-case,
136 the length of this list can be exponential in the number of nodes in
137 the graph. This function avoids storing all cliques in memory by
138 only keeping current candidate node lists in memory during its search.
139
140 This implementation is based on the algorithm published by Bron and
141 Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi
142 (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. It
143 essentially unrolls the recursion used in the references to avoid
144 issues of recursion stack depth (for a recursive implementation, see
145 :func:`find_cliques_recursive`).
146
147 This algorithm ignores self-loops and parallel edges, since cliques
148 are not conventionally defined with such edges.
149
150 References
151 ----------
152 .. [1] Bron, C. and Kerbosch, J.
153 "Algorithm 457: finding all cliques of an undirected graph".
154 *Communications of the ACM* 16, 9 (Sep. 1973), 575--577.
155 <http://portal.acm.org/citation.cfm?doid=362342.362367>
156
157 .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,
158 "The worst-case time complexity for generating all maximal
159 cliques and computational experiments",
160 *Theoretical Computer Science*, Volume 363, Issue 1,
161 Computing and Combinatorics,
162 10th Annual International Conference on
163 Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42
164 <https://doi.org/10.1016/j.tcs.2006.06.015>
165
166 .. [3] F. Cazals, C. Karande,
167 "A note on the problem of reporting maximal cliques",
168 *Theoretical Computer Science*,
169 Volume 407, Issues 1--3, 6 November 2008, Pages 564--568,
170 <https://doi.org/10.1016/j.tcs.2008.05.010>
171
172 """
173 if len(G) == 0:
174 return
175
176 adj = {u: {v for v in G[u] if v != u} for u in G}
177 Q = [None]
178
179 subg = set(G)
180 cand = set(G)
181 u = max(subg, key=lambda u: len(cand & adj[u]))
182 ext_u = cand - adj[u]
183 stack = []
184
185 try:
186 while True:
187 if ext_u:
188 q = ext_u.pop()
189 cand.remove(q)
190 Q[-1] = q
191 adj_q = adj[q]
192 subg_q = subg & adj_q
193 if not subg_q:
194 yield Q[:]
195 else:
196 cand_q = cand & adj_q
197 if cand_q:
198 stack.append((subg, cand, ext_u))
199 Q.append(None)
200 subg = subg_q
201 cand = cand_q
202 u = max(subg, key=lambda u: len(cand & adj[u]))
203 ext_u = cand - adj[u]
204 else:
205 Q.pop()
206 subg, cand, ext_u = stack.pop()
207 except IndexError:
208 pass
209
210
211 # TODO Should this also be not implemented for directed graphs?
212 def find_cliques_recursive(G):
213 """Returns all maximal cliques in a graph.
214
215 For each node *v*, a *maximal clique for v* is a largest complete
216 subgraph containing *v*. The largest maximal clique is sometimes
217 called the *maximum clique*.
218
219 This function returns an iterator over cliques, each of which is a
220 list of nodes. It is a recursive implementation, so may suffer from
221 recursion depth issues.
222
223 Parameters
224 ----------
225 G : NetworkX graph
226
227 Returns
228 -------
229 iterator
230 An iterator over maximal cliques, each of which is a list of
231 nodes in `G`. The order of cliques is arbitrary.
232
233 See Also
234 --------
235 find_cliques
236 An iterative version of the same algorithm.
237
238 Notes
239 -----
240 To obtain a list of all maximal cliques, use
241 `list(find_cliques_recursive(G))`. However, be aware that in the
242 worst-case, the length of this list can be exponential in the number
243 of nodes in the graph. This function avoids storing all cliques in memory
244 by only keeping current candidate node lists in memory during its search.
245
246 This implementation is based on the algorithm published by Bron and
247 Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi
248 (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. For a
249 non-recursive implementation, see :func:`find_cliques`.
250
251 This algorithm ignores self-loops and parallel edges, since cliques
252 are not conventionally defined with such edges.
253
254 References
255 ----------
256 .. [1] Bron, C. and Kerbosch, J.
257 "Algorithm 457: finding all cliques of an undirected graph".
258 *Communications of the ACM* 16, 9 (Sep. 1973), 575--577.
259 <http://portal.acm.org/citation.cfm?doid=362342.362367>
260
261 .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,
262 "The worst-case time complexity for generating all maximal
263 cliques and computational experiments",
264 *Theoretical Computer Science*, Volume 363, Issue 1,
265 Computing and Combinatorics,
266 10th Annual International Conference on
267 Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42
268 <https://doi.org/10.1016/j.tcs.2006.06.015>
269
270 .. [3] F. Cazals, C. Karande,
271 "A note on the problem of reporting maximal cliques",
272 *Theoretical Computer Science*,
273 Volume 407, Issues 1--3, 6 November 2008, Pages 564--568,
274 <https://doi.org/10.1016/j.tcs.2008.05.010>
275
276 """
277 if len(G) == 0:
278 return iter([])
279
280 adj = {u: {v for v in G[u] if v != u} for u in G}
281 Q = []
282
283 def expand(subg, cand):
284 u = max(subg, key=lambda u: len(cand & adj[u]))
285 for q in cand - adj[u]:
286 cand.remove(q)
287 Q.append(q)
288 adj_q = adj[q]
289 subg_q = subg & adj_q
290 if not subg_q:
291 yield Q[:]
292 else:
293 cand_q = cand & adj_q
294 if cand_q:
295 yield from expand(subg_q, cand_q)
296 Q.pop()
297
298 return expand(set(G), set(G))
299
300
301 def make_max_clique_graph(G, create_using=None):
302 """Returns the maximal clique graph of the given graph.
303
304 The nodes of the maximal clique graph of `G` are the cliques of
305 `G` and an edge joins two cliques if the cliques are not disjoint.
306
307 Parameters
308 ----------
309 G : NetworkX graph
310
311 create_using : NetworkX graph constructor, optional (default=nx.Graph)
312 Graph type to create. If graph instance, then cleared before populated.
313
314 Returns
315 -------
316 NetworkX graph
317 A graph whose nodes are the cliques of `G` and whose edges
318 join two cliques if they are not disjoint.
319
320 Notes
321 -----
322 This function behaves like the following code::
323
324 import networkx as nx
325 G = nx.make_clique_bipartite(G)
326 cliques = [v for v in G.nodes() if G.nodes[v]['bipartite'] == 0]
327 G = nx.bipartite.project(G, cliques)
328 G = nx.relabel_nodes(G, {-v: v - 1 for v in G})
329
330 It should be faster, though, since it skips all the intermediate
331 steps.
332
333 """
334 if create_using is None:
335 B = G.__class__()
336 else:
337 B = nx.empty_graph(0, create_using)
338 cliques = list(enumerate(set(c) for c in find_cliques(G)))
339 # Add a numbered node for each clique.
340 B.add_nodes_from(i for i, c in cliques)
341 # Join cliques by an edge if they share a node.
342 clique_pairs = combinations(cliques, 2)
343 B.add_edges_from((i, j) for (i, c1), (j, c2) in clique_pairs if c1 & c2)
344 return B
345
346
347 def make_clique_bipartite(G, fpos=None, create_using=None, name=None):
348 """Returns the bipartite clique graph corresponding to `G`.
349
350 In the returned bipartite graph, the "bottom" nodes are the nodes of
351 `G` and the "top" nodes represent the maximal cliques of `G`.
352 There is an edge from node *v* to clique *C* in the returned graph
353 if and only if *v* is an element of *C*.
354
355 Parameters
356 ----------
357 G : NetworkX graph
358 An undirected graph.
359
360 fpos : bool
361 If True or not None, the returned graph will have an
362 additional attribute, `pos`, a dictionary mapping node to
363 position in the Euclidean plane.
364
365 create_using : NetworkX graph constructor, optional (default=nx.Graph)
366 Graph type to create. If graph instance, then cleared before populated.
367
368 Returns
369 -------
370 NetworkX graph
371 A bipartite graph whose "bottom" set is the nodes of the graph
372 `G`, whose "top" set is the cliques of `G`, and whose edges
373 join nodes of `G` to the cliques that contain them.
374
375 The nodes of the graph `G` have the node attribute
376 'bipartite' set to 1 and the nodes representing cliques
377 have the node attribute 'bipartite' set to 0, as is the
378 convention for bipartite graphs in NetworkX.
379
380 """
381 B = nx.empty_graph(0, create_using)
382 B.clear()
383 # The "bottom" nodes in the bipartite graph are the nodes of the
384 # original graph, G.
385 B.add_nodes_from(G, bipartite=1)
386 for i, cl in enumerate(find_cliques(G)):
387 # The "top" nodes in the bipartite graph are the cliques. These
388 # nodes get negative numbers as labels.
389 name = -i - 1
390 B.add_node(name, bipartite=0)
391 B.add_edges_from((v, name) for v in cl)
392 return B
393
394
395 def graph_clique_number(G, cliques=None):
396 """Returns the clique number of the graph.
397
398 The *clique number* of a graph is the size of the largest clique in
399 the graph.
400
401 Parameters
402 ----------
403 G : NetworkX graph
404 An undirected graph.
405
406 cliques : list
407 A list of cliques, each of which is itself a list of nodes. If
408 not specified, the list of all cliques will be computed, as by
409 :func:`find_cliques`.
410
411 Returns
412 -------
413 int
414 The size of the largest clique in `G`.
415
416 Notes
417 -----
418 You should provide `cliques` if you have already computed the list
419 of maximal cliques, in order to avoid an exponential time search for
420 maximal cliques.
421
422 """
423 if cliques is None:
424 cliques = find_cliques(G)
425 if len(G.nodes) < 1:
426 return 0
427 return max([len(c) for c in cliques] or [1])
428
429
430 def graph_number_of_cliques(G, cliques=None):
431 """Returns the number of maximal cliques in the graph.
432
433 Parameters
434 ----------
435 G : NetworkX graph
436 An undirected graph.
437
438 cliques : list
439 A list of cliques, each of which is itself a list of nodes. If
440 not specified, the list of all cliques will be computed, as by
441 :func:`find_cliques`.
442
443 Returns
444 -------
445 int
446 The number of maximal cliques in `G`.
447
448 Notes
449 -----
450 You should provide `cliques` if you have already computed the list
451 of maximal cliques, in order to avoid an exponential time search for
452 maximal cliques.
453
454 """
455 if cliques is None:
456 cliques = list(find_cliques(G))
457 return len(cliques)
458
459
460 def node_clique_number(G, nodes=None, cliques=None):
461 """ Returns the size of the largest maximal clique containing
462 each given node.
463
464 Returns a single or list depending on input nodes.
465 Optional list of cliques can be input if already computed.
466 """
467 if cliques is None:
468 if nodes is not None:
469 # Use ego_graph to decrease size of graph
470 if isinstance(nodes, list):
471 d = {}
472 for n in nodes:
473 H = nx.ego_graph(G, n)
474 d[n] = max(len(c) for c in find_cliques(H))
475 else:
476 H = nx.ego_graph(G, nodes)
477 d = max(len(c) for c in find_cliques(H))
478 return d
479 # nodes is None--find all cliques
480 cliques = list(find_cliques(G))
481
482 if nodes is None:
483 nodes = list(G.nodes()) # none, get entire graph
484
485 if not isinstance(nodes, list): # check for a list
486 v = nodes
487 # assume it is a single value
488 d = max([len(c) for c in cliques if v in c])
489 else:
490 d = {}
491 for v in nodes:
492 d[v] = max([len(c) for c in cliques if v in c])
493 return d
494
495 # if nodes is None: # none, use entire graph
496 # nodes=G.nodes()
497 # elif not isinstance(nodes, list): # check for a list
498 # nodes=[nodes] # assume it is a single value
499
500 # if cliques is None:
501 # cliques=list(find_cliques(G))
502 # d={}
503 # for v in nodes:
504 # d[v]=max([len(c) for c in cliques if v in c])
505
506 # if nodes in G:
507 # return d[v] #return single value
508 # return d
509
510
511 def number_of_cliques(G, nodes=None, cliques=None):
512 """Returns the number of maximal cliques for each node.
513
514 Returns a single or list depending on input nodes.
515 Optional list of cliques can be input if already computed.
516 """
517 if cliques is None:
518 cliques = list(find_cliques(G))
519
520 if nodes is None:
521 nodes = list(G.nodes()) # none, get entire graph
522
523 if not isinstance(nodes, list): # check for a list
524 v = nodes
525 # assume it is a single value
526 numcliq = len([1 for c in cliques if v in c])
527 else:
528 numcliq = {}
529 for v in nodes:
530 numcliq[v] = len([1 for c in cliques if v in c])
531 return numcliq
532
533
534 def cliques_containing_node(G, nodes=None, cliques=None):
535 """Returns a list of cliques containing the given node.
536
537 Returns a single list or list of lists depending on input nodes.
538 Optional list of cliques can be input if already computed.
539 """
540 if cliques is None:
541 cliques = list(find_cliques(G))
542
543 if nodes is None:
544 nodes = list(G.nodes()) # none, get entire graph
545
546 if not isinstance(nodes, list): # check for a list
547 v = nodes
548 # assume it is a single value
549 vcliques = [c for c in cliques if v in c]
550 else:
551 vcliques = {}
552 for v in nodes:
553 vcliques[v] = [c for c in cliques if v in c]
554 return vcliques
555
556
557 class MaxWeightClique(object):
558 """A class for the maximum weight clique algorithm.
559
560 This class is a helper for the `max_weight_clique` function. The class
561 should not normally be used directly.
562
563 Parameters
564 ----------
565 G : NetworkX graph
566 The undirected graph for which a maximum weight clique is sought
567 weight : string or None, optional (default='weight')
568 The node attribute that holds the integer value used as a weight.
569 If None, then each node has weight 1.
570
571 Attributes
572 ----------
573 G : NetworkX graph
574 The undirected graph for which a maximum weight clique is sought
575 node_weights: dict
576 The weight of each node
577 incumbent_nodes : list
578 The nodes of the incumbent clique (the best clique found so far)
579 incumbent_weight: int
580 The weight of the incumbent clique
581 """
582
583 def __init__(self, G, weight):
584 self.G = G
585 self.incumbent_nodes = []
586 self.incumbent_weight = 0
587
588 if weight is None:
589 self.node_weights = {v: 1 for v in G.nodes()}
590 else:
591 for v in G.nodes():
592 if weight not in G.nodes[v]:
593 err = "Node {} does not have the requested weight field."
594 raise KeyError(err.format(v))
595 if not isinstance(G.nodes[v][weight], int):
596 err = "The '{}' field of node {} is not an integer."
597 raise ValueError(err.format(weight, v))
598 self.node_weights = {v: G.nodes[v][weight] for v in G.nodes()}
599
600 def update_incumbent_if_improved(self, C, C_weight):
601 """Update the incumbent if the node set C has greater weight.
602
603 C is assumed to be a clique.
604 """
605 if C_weight > self.incumbent_weight:
606 self.incumbent_nodes = C[:]
607 self.incumbent_weight = C_weight
608
609 def greedily_find_independent_set(self, P):
610 """Greedily find an independent set of nodes from a set of
611 nodes P."""
612 independent_set = []
613 P = P[:]
614 while P:
615 v = P[0]
616 independent_set.append(v)
617 P = [w for w in P if v != w and not self.G.has_edge(v, w)]
618 return independent_set
619
620 def find_branching_nodes(self, P, target):
621 """Find a set of nodes to branch on."""
622 residual_wt = {v: self.node_weights[v] for v in P}
623 total_wt = 0
624 P = P[:]
625 while P:
626 independent_set = self.greedily_find_independent_set(P)
627 min_wt_in_class = min(residual_wt[v] for v in independent_set)
628 total_wt += min_wt_in_class
629 if total_wt > target:
630 break
631 for v in independent_set:
632 residual_wt[v] -= min_wt_in_class
633 P = [v for v in P if residual_wt[v] != 0]
634 return P
635
636 def expand(self, C, C_weight, P):
637 """Look for the best clique that contains all the nodes in C and zero or
638 more of the nodes in P, backtracking if it can be shown that no such
639 clique has greater weight than the incumbent.
640 """
641 self.update_incumbent_if_improved(C, C_weight)
642 branching_nodes = self.find_branching_nodes(P, self.incumbent_weight - C_weight)
643 while branching_nodes:
644 v = branching_nodes.pop()
645 P.remove(v)
646 new_C = C + [v]
647 new_C_weight = C_weight + self.node_weights[v]
648 new_P = [w for w in P if self.G.has_edge(v, w)]
649 self.expand(new_C, new_C_weight, new_P)
650
651 def find_max_weight_clique(self):
652 """Find a maximum weight clique."""
653 # Sort nodes in reverse order of degree for speed
654 nodes = sorted(self.G.nodes(), key=lambda v: self.G.degree(v), reverse=True)
655 nodes = [v for v in nodes if self.node_weights[v] > 0]
656 self.expand([], 0, nodes)
657
658
659 @not_implemented_for("directed")
660 def max_weight_clique(G, weight="weight"):
661 """Find a maximum weight clique in G.
662
663 A *clique* in a graph is a set of nodes such that every two distinct nodes
664 are adjacent. The *weight* of a clique is the sum of the weights of its
665 nodes. A *maximum weight clique* of graph G is a clique C in G such that
666 no clique in G has weight greater than the weight of C.
667
668 Parameters
669 ----------
670 G : NetworkX graph
671 Undirected graph
672 weight : string or None, optional (default='weight')
673 The node attribute that holds the integer value used as a weight.
674 If None, then each node has weight 1.
675
676 Returns
677 -------
678 clique : list
679 the nodes of a maximum weight clique
680 weight : int
681 the weight of a maximum weight clique
682
683 Notes
684 -----
685 The implementation is recursive, and therefore it may run into recursion
686 depth issues if G contains a clique whose number of nodes is close to the
687 recursion depth limit.
688
689 At each search node, the algorithm greedily constructs a weighted
690 independent set cover of part of the graph in order to find a small set of
691 nodes on which to branch. The algorithm is very similar to the algorithm
692 of Tavares et al. [1]_, other than the fact that the NetworkX version does
693 not use bitsets. This style of algorithm for maximum weight clique (and
694 maximum weight independent set, which is the same problem but on the
695 complement graph) has a decades-long history. See Algorithm B of Warren
696 and Hicks [2]_ and the references in that paper.
697
698 References
699 ----------
700 .. [1] Tavares, W.A., Neto, M.B.C., Rodrigues, C.D., Michelon, P.: Um
701 algoritmo de branch and bound para o problema da clique máxima
702 ponderada. Proceedings of XLVII SBPO 1 (2015).
703
704 .. [2] Warrent, Jeffrey S, Hicks, Illya V.: Combinatorial Branch-and-Bound
705 for the Maximum Weight Independent Set Problem. Technical Report,
706 Texas A&M University (2016).
707 """
708
709 mwc = MaxWeightClique(G, weight)
710 mwc.find_max_weight_clique()
711 return mwc.incumbent_nodes, mwc.incumbent_weight