Mercurial > repos > shellac > sam_consensus_v3
diff env/lib/python3.9/site-packages/networkx/algorithms/clique.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.9/site-packages/networkx/algorithms/clique.py Mon Mar 22 18:12:50 2021 +0000 @@ -0,0 +1,711 @@ +"""Functions for finding and manipulating cliques. + +Finding the largest clique in a graph is NP-complete problem, so most of +these algorithms have an exponential running time; for more information, +see the Wikipedia article on the clique problem [1]_. + +.. [1] clique problem:: https://en.wikipedia.org/wiki/Clique_problem + +""" +from collections import deque +from itertools import chain +from itertools import combinations +from itertools import islice +import networkx as nx +from networkx.utils import not_implemented_for + + +__all__ = [ + "find_cliques", + "find_cliques_recursive", + "make_max_clique_graph", + "make_clique_bipartite", + "graph_clique_number", + "graph_number_of_cliques", + "node_clique_number", + "number_of_cliques", + "cliques_containing_node", + "enumerate_all_cliques", + "max_weight_clique", +] + + +@not_implemented_for("directed") +def enumerate_all_cliques(G): + """Returns all cliques in an undirected graph. + + This function returns an iterator over cliques, each of which is a + list of nodes. The iteration is ordered by cardinality of the + cliques: first all cliques of size one, then all cliques of size + two, etc. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + Returns + ------- + iterator + An iterator over cliques, each of which is a list of nodes in + `G`. The cliques are ordered according to size. + + Notes + ----- + To obtain a list of all cliques, use + `list(enumerate_all_cliques(G))`. However, be aware that in the + worst-case, the length of this list can be exponential in the number + of nodes in the graph (for example, when the graph is the complete + graph). This function avoids storing all cliques in memory by only + keeping current candidate node lists in memory during its search. + + The implementation is adapted from the algorithm by Zhang, et + al. (2005) [1]_ to output all cliques discovered. + + This algorithm ignores self-loops and parallel edges, since cliques + are not conventionally defined with such edges. + + References + ---------- + .. [1] Yun Zhang, Abu-Khzam, F.N., Baldwin, N.E., Chesler, E.J., + Langston, M.A., Samatova, N.F., + "Genome-Scale Computational Approaches to Memory-Intensive + Applications in Systems Biology". + *Supercomputing*, 2005. Proceedings of the ACM/IEEE SC 2005 + Conference, pp. 12, 12--18 Nov. 2005. + <https://doi.org/10.1109/SC.2005.29>. + + """ + index = {} + nbrs = {} + for u in G: + index[u] = len(index) + # Neighbors of u that appear after u in the iteration order of G. + nbrs[u] = {v for v in G[u] if v not in index} + + queue = deque(([u], sorted(nbrs[u], key=index.__getitem__)) for u in G) + # Loop invariants: + # 1. len(base) is nondecreasing. + # 2. (base + cnbrs) is sorted with respect to the iteration order of G. + # 3. cnbrs is a set of common neighbors of nodes in base. + while queue: + base, cnbrs = map(list, queue.popleft()) + yield base + for i, u in enumerate(cnbrs): + # Use generators to reduce memory consumption. + queue.append( + ( + chain(base, [u]), + filter(nbrs[u].__contains__, islice(cnbrs, i + 1, None)), + ) + ) + + +@not_implemented_for("directed") +def find_cliques(G): + """Returns all maximal cliques in an undirected graph. + + For each node *v*, a *maximal clique for v* is a largest complete + subgraph containing *v*. The largest maximal clique is sometimes + called the *maximum clique*. + + This function returns an iterator over cliques, each of which is a + list of nodes. It is an iterative implementation, so should not + suffer from recursion depth issues. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + Returns + ------- + iterator + An iterator over maximal cliques, each of which is a list of + nodes in `G`. The order of cliques is arbitrary. + + See Also + -------- + find_cliques_recursive + A recursive version of the same algorithm. + + Notes + ----- + To obtain a list of all maximal cliques, use + `list(find_cliques(G))`. However, be aware that in the worst-case, + the length of this list can be exponential in the number of nodes in + the graph. This function avoids storing all cliques in memory by + only keeping current candidate node lists in memory during its search. + + This implementation is based on the algorithm published by Bron and + Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi + (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. It + essentially unrolls the recursion used in the references to avoid + issues of recursion stack depth (for a recursive implementation, see + :func:`find_cliques_recursive`). + + This algorithm ignores self-loops and parallel edges, since cliques + are not conventionally defined with such edges. + + References + ---------- + .. [1] Bron, C. and Kerbosch, J. + "Algorithm 457: finding all cliques of an undirected graph". + *Communications of the ACM* 16, 9 (Sep. 1973), 575--577. + <http://portal.acm.org/citation.cfm?doid=362342.362367> + + .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi, + "The worst-case time complexity for generating all maximal + cliques and computational experiments", + *Theoretical Computer Science*, Volume 363, Issue 1, + Computing and Combinatorics, + 10th Annual International Conference on + Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42 + <https://doi.org/10.1016/j.tcs.2006.06.015> + + .. [3] F. Cazals, C. Karande, + "A note on the problem of reporting maximal cliques", + *Theoretical Computer Science*, + Volume 407, Issues 1--3, 6 November 2008, Pages 564--568, + <https://doi.org/10.1016/j.tcs.2008.05.010> + + """ + if len(G) == 0: + return + + adj = {u: {v for v in G[u] if v != u} for u in G} + Q = [None] + + subg = set(G) + cand = set(G) + u = max(subg, key=lambda u: len(cand & adj[u])) + ext_u = cand - adj[u] + stack = [] + + try: + while True: + if ext_u: + q = ext_u.pop() + cand.remove(q) + Q[-1] = q + adj_q = adj[q] + subg_q = subg & adj_q + if not subg_q: + yield Q[:] + else: + cand_q = cand & adj_q + if cand_q: + stack.append((subg, cand, ext_u)) + Q.append(None) + subg = subg_q + cand = cand_q + u = max(subg, key=lambda u: len(cand & adj[u])) + ext_u = cand - adj[u] + else: + Q.pop() + subg, cand, ext_u = stack.pop() + except IndexError: + pass + + +# TODO Should this also be not implemented for directed graphs? +def find_cliques_recursive(G): + """Returns all maximal cliques in a graph. + + For each node *v*, a *maximal clique for v* is a largest complete + subgraph containing *v*. The largest maximal clique is sometimes + called the *maximum clique*. + + This function returns an iterator over cliques, each of which is a + list of nodes. It is a recursive implementation, so may suffer from + recursion depth issues. + + Parameters + ---------- + G : NetworkX graph + + Returns + ------- + iterator + An iterator over maximal cliques, each of which is a list of + nodes in `G`. The order of cliques is arbitrary. + + See Also + -------- + find_cliques + An iterative version of the same algorithm. + + Notes + ----- + To obtain a list of all maximal cliques, use + `list(find_cliques_recursive(G))`. However, be aware that in the + worst-case, the length of this list can be exponential in the number + of nodes in the graph. This function avoids storing all cliques in memory + by only keeping current candidate node lists in memory during its search. + + This implementation is based on the algorithm published by Bron and + Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi + (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. For a + non-recursive implementation, see :func:`find_cliques`. + + This algorithm ignores self-loops and parallel edges, since cliques + are not conventionally defined with such edges. + + References + ---------- + .. [1] Bron, C. and Kerbosch, J. + "Algorithm 457: finding all cliques of an undirected graph". + *Communications of the ACM* 16, 9 (Sep. 1973), 575--577. + <http://portal.acm.org/citation.cfm?doid=362342.362367> + + .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi, + "The worst-case time complexity for generating all maximal + cliques and computational experiments", + *Theoretical Computer Science*, Volume 363, Issue 1, + Computing and Combinatorics, + 10th Annual International Conference on + Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42 + <https://doi.org/10.1016/j.tcs.2006.06.015> + + .. [3] F. Cazals, C. Karande, + "A note on the problem of reporting maximal cliques", + *Theoretical Computer Science*, + Volume 407, Issues 1--3, 6 November 2008, Pages 564--568, + <https://doi.org/10.1016/j.tcs.2008.05.010> + + """ + if len(G) == 0: + return iter([]) + + adj = {u: {v for v in G[u] if v != u} for u in G} + Q = [] + + def expand(subg, cand): + u = max(subg, key=lambda u: len(cand & adj[u])) + for q in cand - adj[u]: + cand.remove(q) + Q.append(q) + adj_q = adj[q] + subg_q = subg & adj_q + if not subg_q: + yield Q[:] + else: + cand_q = cand & adj_q + if cand_q: + yield from expand(subg_q, cand_q) + Q.pop() + + return expand(set(G), set(G)) + + +def make_max_clique_graph(G, create_using=None): + """Returns the maximal clique graph of the given graph. + + The nodes of the maximal clique graph of `G` are the cliques of + `G` and an edge joins two cliques if the cliques are not disjoint. + + Parameters + ---------- + G : NetworkX graph + + create_using : NetworkX graph constructor, optional (default=nx.Graph) + Graph type to create. If graph instance, then cleared before populated. + + Returns + ------- + NetworkX graph + A graph whose nodes are the cliques of `G` and whose edges + join two cliques if they are not disjoint. + + Notes + ----- + This function behaves like the following code:: + + import networkx as nx + G = nx.make_clique_bipartite(G) + cliques = [v for v in G.nodes() if G.nodes[v]['bipartite'] == 0] + G = nx.bipartite.project(G, cliques) + G = nx.relabel_nodes(G, {-v: v - 1 for v in G}) + + It should be faster, though, since it skips all the intermediate + steps. + + """ + if create_using is None: + B = G.__class__() + else: + B = nx.empty_graph(0, create_using) + cliques = list(enumerate(set(c) for c in find_cliques(G))) + # Add a numbered node for each clique. + B.add_nodes_from(i for i, c in cliques) + # Join cliques by an edge if they share a node. + clique_pairs = combinations(cliques, 2) + B.add_edges_from((i, j) for (i, c1), (j, c2) in clique_pairs if c1 & c2) + return B + + +def make_clique_bipartite(G, fpos=None, create_using=None, name=None): + """Returns the bipartite clique graph corresponding to `G`. + + In the returned bipartite graph, the "bottom" nodes are the nodes of + `G` and the "top" nodes represent the maximal cliques of `G`. + There is an edge from node *v* to clique *C* in the returned graph + if and only if *v* is an element of *C*. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + fpos : bool + If True or not None, the returned graph will have an + additional attribute, `pos`, a dictionary mapping node to + position in the Euclidean plane. + + create_using : NetworkX graph constructor, optional (default=nx.Graph) + Graph type to create. If graph instance, then cleared before populated. + + Returns + ------- + NetworkX graph + A bipartite graph whose "bottom" set is the nodes of the graph + `G`, whose "top" set is the cliques of `G`, and whose edges + join nodes of `G` to the cliques that contain them. + + The nodes of the graph `G` have the node attribute + 'bipartite' set to 1 and the nodes representing cliques + have the node attribute 'bipartite' set to 0, as is the + convention for bipartite graphs in NetworkX. + + """ + B = nx.empty_graph(0, create_using) + B.clear() + # The "bottom" nodes in the bipartite graph are the nodes of the + # original graph, G. + B.add_nodes_from(G, bipartite=1) + for i, cl in enumerate(find_cliques(G)): + # The "top" nodes in the bipartite graph are the cliques. These + # nodes get negative numbers as labels. + name = -i - 1 + B.add_node(name, bipartite=0) + B.add_edges_from((v, name) for v in cl) + return B + + +def graph_clique_number(G, cliques=None): + """Returns the clique number of the graph. + + The *clique number* of a graph is the size of the largest clique in + the graph. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + cliques : list + A list of cliques, each of which is itself a list of nodes. If + not specified, the list of all cliques will be computed, as by + :func:`find_cliques`. + + Returns + ------- + int + The size of the largest clique in `G`. + + Notes + ----- + You should provide `cliques` if you have already computed the list + of maximal cliques, in order to avoid an exponential time search for + maximal cliques. + + """ + if cliques is None: + cliques = find_cliques(G) + if len(G.nodes) < 1: + return 0 + return max([len(c) for c in cliques] or [1]) + + +def graph_number_of_cliques(G, cliques=None): + """Returns the number of maximal cliques in the graph. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + cliques : list + A list of cliques, each of which is itself a list of nodes. If + not specified, the list of all cliques will be computed, as by + :func:`find_cliques`. + + Returns + ------- + int + The number of maximal cliques in `G`. + + Notes + ----- + You should provide `cliques` if you have already computed the list + of maximal cliques, in order to avoid an exponential time search for + maximal cliques. + + """ + if cliques is None: + cliques = list(find_cliques(G)) + return len(cliques) + + +def node_clique_number(G, nodes=None, cliques=None): + """ Returns the size of the largest maximal clique containing + each given node. + + Returns a single or list depending on input nodes. + Optional list of cliques can be input if already computed. + """ + if cliques is None: + if nodes is not None: + # Use ego_graph to decrease size of graph + if isinstance(nodes, list): + d = {} + for n in nodes: + H = nx.ego_graph(G, n) + d[n] = max(len(c) for c in find_cliques(H)) + else: + H = nx.ego_graph(G, nodes) + d = max(len(c) for c in find_cliques(H)) + return d + # nodes is None--find all cliques + cliques = list(find_cliques(G)) + + if nodes is None: + nodes = list(G.nodes()) # none, get entire graph + + if not isinstance(nodes, list): # check for a list + v = nodes + # assume it is a single value + d = max([len(c) for c in cliques if v in c]) + else: + d = {} + for v in nodes: + d[v] = max([len(c) for c in cliques if v in c]) + return d + + # if nodes is None: # none, use entire graph + # nodes=G.nodes() + # elif not isinstance(nodes, list): # check for a list + # nodes=[nodes] # assume it is a single value + + # if cliques is None: + # cliques=list(find_cliques(G)) + # d={} + # for v in nodes: + # d[v]=max([len(c) for c in cliques if v in c]) + + # if nodes in G: + # return d[v] #return single value + # return d + + +def number_of_cliques(G, nodes=None, cliques=None): + """Returns the number of maximal cliques for each node. + + Returns a single or list depending on input nodes. + Optional list of cliques can be input if already computed. + """ + if cliques is None: + cliques = list(find_cliques(G)) + + if nodes is None: + nodes = list(G.nodes()) # none, get entire graph + + if not isinstance(nodes, list): # check for a list + v = nodes + # assume it is a single value + numcliq = len([1 for c in cliques if v in c]) + else: + numcliq = {} + for v in nodes: + numcliq[v] = len([1 for c in cliques if v in c]) + return numcliq + + +def cliques_containing_node(G, nodes=None, cliques=None): + """Returns a list of cliques containing the given node. + + Returns a single list or list of lists depending on input nodes. + Optional list of cliques can be input if already computed. + """ + if cliques is None: + cliques = list(find_cliques(G)) + + if nodes is None: + nodes = list(G.nodes()) # none, get entire graph + + if not isinstance(nodes, list): # check for a list + v = nodes + # assume it is a single value + vcliques = [c for c in cliques if v in c] + else: + vcliques = {} + for v in nodes: + vcliques[v] = [c for c in cliques if v in c] + return vcliques + + +class MaxWeightClique(object): + """A class for the maximum weight clique algorithm. + + This class is a helper for the `max_weight_clique` function. The class + should not normally be used directly. + + Parameters + ---------- + G : NetworkX graph + The undirected graph for which a maximum weight clique is sought + weight : string or None, optional (default='weight') + The node attribute that holds the integer value used as a weight. + If None, then each node has weight 1. + + Attributes + ---------- + G : NetworkX graph + The undirected graph for which a maximum weight clique is sought + node_weights: dict + The weight of each node + incumbent_nodes : list + The nodes of the incumbent clique (the best clique found so far) + incumbent_weight: int + The weight of the incumbent clique + """ + + def __init__(self, G, weight): + self.G = G + self.incumbent_nodes = [] + self.incumbent_weight = 0 + + if weight is None: + self.node_weights = {v: 1 for v in G.nodes()} + else: + for v in G.nodes(): + if weight not in G.nodes[v]: + err = "Node {} does not have the requested weight field." + raise KeyError(err.format(v)) + if not isinstance(G.nodes[v][weight], int): + err = "The '{}' field of node {} is not an integer." + raise ValueError(err.format(weight, v)) + self.node_weights = {v: G.nodes[v][weight] for v in G.nodes()} + + def update_incumbent_if_improved(self, C, C_weight): + """Update the incumbent if the node set C has greater weight. + + C is assumed to be a clique. + """ + if C_weight > self.incumbent_weight: + self.incumbent_nodes = C[:] + self.incumbent_weight = C_weight + + def greedily_find_independent_set(self, P): + """Greedily find an independent set of nodes from a set of + nodes P.""" + independent_set = [] + P = P[:] + while P: + v = P[0] + independent_set.append(v) + P = [w for w in P if v != w and not self.G.has_edge(v, w)] + return independent_set + + def find_branching_nodes(self, P, target): + """Find a set of nodes to branch on.""" + residual_wt = {v: self.node_weights[v] for v in P} + total_wt = 0 + P = P[:] + while P: + independent_set = self.greedily_find_independent_set(P) + min_wt_in_class = min(residual_wt[v] for v in independent_set) + total_wt += min_wt_in_class + if total_wt > target: + break + for v in independent_set: + residual_wt[v] -= min_wt_in_class + P = [v for v in P if residual_wt[v] != 0] + return P + + def expand(self, C, C_weight, P): + """Look for the best clique that contains all the nodes in C and zero or + more of the nodes in P, backtracking if it can be shown that no such + clique has greater weight than the incumbent. + """ + self.update_incumbent_if_improved(C, C_weight) + branching_nodes = self.find_branching_nodes(P, self.incumbent_weight - C_weight) + while branching_nodes: + v = branching_nodes.pop() + P.remove(v) + new_C = C + [v] + new_C_weight = C_weight + self.node_weights[v] + new_P = [w for w in P if self.G.has_edge(v, w)] + self.expand(new_C, new_C_weight, new_P) + + def find_max_weight_clique(self): + """Find a maximum weight clique.""" + # Sort nodes in reverse order of degree for speed + nodes = sorted(self.G.nodes(), key=lambda v: self.G.degree(v), reverse=True) + nodes = [v for v in nodes if self.node_weights[v] > 0] + self.expand([], 0, nodes) + + +@not_implemented_for("directed") +def max_weight_clique(G, weight="weight"): + """Find a maximum weight clique in G. + + A *clique* in a graph is a set of nodes such that every two distinct nodes + are adjacent. The *weight* of a clique is the sum of the weights of its + nodes. A *maximum weight clique* of graph G is a clique C in G such that + no clique in G has weight greater than the weight of C. + + Parameters + ---------- + G : NetworkX graph + Undirected graph + weight : string or None, optional (default='weight') + The node attribute that holds the integer value used as a weight. + If None, then each node has weight 1. + + Returns + ------- + clique : list + the nodes of a maximum weight clique + weight : int + the weight of a maximum weight clique + + Notes + ----- + The implementation is recursive, and therefore it may run into recursion + depth issues if G contains a clique whose number of nodes is close to the + recursion depth limit. + + At each search node, the algorithm greedily constructs a weighted + independent set cover of part of the graph in order to find a small set of + nodes on which to branch. The algorithm is very similar to the algorithm + of Tavares et al. [1]_, other than the fact that the NetworkX version does + not use bitsets. This style of algorithm for maximum weight clique (and + maximum weight independent set, which is the same problem but on the + complement graph) has a decades-long history. See Algorithm B of Warren + and Hicks [2]_ and the references in that paper. + + References + ---------- + .. [1] Tavares, W.A., Neto, M.B.C., Rodrigues, C.D., Michelon, P.: Um + algoritmo de branch and bound para o problema da clique máxima + ponderada. Proceedings of XLVII SBPO 1 (2015). + + .. [2] Warrent, Jeffrey S, Hicks, Illya V.: Combinatorial Branch-and-Bound + for the Maximum Weight Independent Set Problem. Technical Report, + Texas A&M University (2016). + """ + + mwc = MaxWeightClique(G, weight) + mwc.find_max_weight_clique() + return mwc.incumbent_nodes, mwc.incumbent_weight