## Mercurial > repos > shellac > sam_consensus_v3

### diff env/lib/python3.9/site-packages/networkx/algorithms/approximation/kcomponents.py @ 0:4f3585e2f14b draft default tip

Find changesets by keywords (author, files, the commit message), revision
number or hash, or revset expression.

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"

author | shellac |
---|---|

date | Mon, 22 Mar 2021 18:12:50 +0000 |

parents | |

children |

line wrap: on

line diff

--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.9/site-packages/networkx/algorithms/approximation/kcomponents.py Mon Mar 22 18:12:50 2021 +0000 @@ -0,0 +1,368 @@ +""" Fast approximation for k-component structure +""" +import itertools +from collections import defaultdict +from collections.abc import Mapping + +import networkx as nx +from networkx.exception import NetworkXError +from networkx.utils import not_implemented_for + +from networkx.algorithms.approximation import local_node_connectivity + + +__all__ = ["k_components"] + + +not_implemented_for("directed") + + +def k_components(G, min_density=0.95): + r"""Returns the approximate k-component structure of a graph G. + + A `k`-component is a maximal subgraph of a graph G that has, at least, + node connectivity `k`: we need to remove at least `k` nodes to break it + into more components. `k`-components have an inherent hierarchical + structure because they are nested in terms of connectivity: a connected + graph can contain several 2-components, each of which can contain + one or more 3-components, and so forth. + + This implementation is based on the fast heuristics to approximate + the `k`-component structure of a graph [1]_. Which, in turn, it is based on + a fast approximation algorithm for finding good lower bounds of the number + of node independent paths between two nodes [2]_. + + Parameters + ---------- + G : NetworkX graph + Undirected graph + + min_density : Float + Density relaxation threshold. Default value 0.95 + + Returns + ------- + k_components : dict + Dictionary with connectivity level `k` as key and a list of + sets of nodes that form a k-component of level `k` as values. + + + Examples + -------- + >>> # Petersen graph has 10 nodes and it is triconnected, thus all + >>> # nodes are in a single component on all three connectivity levels + >>> from networkx.algorithms import approximation as apxa + >>> G = nx.petersen_graph() + >>> k_components = apxa.k_components(G) + + Notes + ----- + The logic of the approximation algorithm for computing the `k`-component + structure [1]_ is based on repeatedly applying simple and fast algorithms + for `k`-cores and biconnected components in order to narrow down the + number of pairs of nodes over which we have to compute White and Newman's + approximation algorithm for finding node independent paths [2]_. More + formally, this algorithm is based on Whitney's theorem, which states + an inclusion relation among node connectivity, edge connectivity, and + minimum degree for any graph G. This theorem implies that every + `k`-component is nested inside a `k`-edge-component, which in turn, + is contained in a `k`-core. Thus, this algorithm computes node independent + paths among pairs of nodes in each biconnected part of each `k`-core, + and repeats this procedure for each `k` from 3 to the maximal core number + of a node in the input graph. + + Because, in practice, many nodes of the core of level `k` inside a + bicomponent actually are part of a component of level k, the auxiliary + graph needed for the algorithm is likely to be very dense. Thus, we use + a complement graph data structure (see `AntiGraph`) to save memory. + AntiGraph only stores information of the edges that are *not* present + in the actual auxiliary graph. When applying algorithms to this + complement graph data structure, it behaves as if it were the dense + version. + + See also + -------- + k_components + + References + ---------- + .. [1] Torrents, J. and F. Ferraro (2015) Structural Cohesion: + Visualization and Heuristics for Fast Computation. + https://arxiv.org/pdf/1503.04476v1 + + .. [2] White, Douglas R., and Mark Newman (2001) A Fast Algorithm for + Node-Independent Paths. Santa Fe Institute Working Paper #01-07-035 + http://eclectic.ss.uci.edu/~drwhite/working.pdf + + .. [3] Moody, J. and D. White (2003). Social cohesion and embeddedness: + A hierarchical conception of social groups. + American Sociological Review 68(1), 103--28. + http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf + + """ + # Dictionary with connectivity level (k) as keys and a list of + # sets of nodes that form a k-component as values + k_components = defaultdict(list) + # make a few functions local for speed + node_connectivity = local_node_connectivity + k_core = nx.k_core + core_number = nx.core_number + biconnected_components = nx.biconnected_components + density = nx.density + combinations = itertools.combinations + # Exact solution for k = {1,2} + # There is a linear time algorithm for triconnectivity, if we had an + # implementation available we could start from k = 4. + for component in nx.connected_components(G): + # isolated nodes have connectivity 0 + comp = set(component) + if len(comp) > 1: + k_components[1].append(comp) + for bicomponent in nx.biconnected_components(G): + # avoid considering dyads as bicomponents + bicomp = set(bicomponent) + if len(bicomp) > 2: + k_components[2].append(bicomp) + # There is no k-component of k > maximum core number + # \kappa(G) <= \lambda(G) <= \delta(G) + g_cnumber = core_number(G) + max_core = max(g_cnumber.values()) + for k in range(3, max_core + 1): + C = k_core(G, k, core_number=g_cnumber) + for nodes in biconnected_components(C): + # Build a subgraph SG induced by the nodes that are part of + # each biconnected component of the k-core subgraph C. + if len(nodes) < k: + continue + SG = G.subgraph(nodes) + # Build auxiliary graph + H = _AntiGraph() + H.add_nodes_from(SG.nodes()) + for u, v in combinations(SG, 2): + K = node_connectivity(SG, u, v, cutoff=k) + if k > K: + H.add_edge(u, v) + for h_nodes in biconnected_components(H): + if len(h_nodes) <= k: + continue + SH = H.subgraph(h_nodes) + for Gc in _cliques_heuristic(SG, SH, k, min_density): + for k_nodes in biconnected_components(Gc): + Gk = nx.k_core(SG.subgraph(k_nodes), k) + if len(Gk) <= k: + continue + k_components[k].append(set(Gk)) + return k_components + + +def _cliques_heuristic(G, H, k, min_density): + h_cnumber = nx.core_number(H) + for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)): + cands = {n for n, c in h_cnumber.items() if c == c_value} + # Skip checking for overlap for the highest core value + if i == 0: + overlap = False + else: + overlap = set.intersection( + *[{x for x in H[n] if x not in cands} for n in cands] + ) + if overlap and len(overlap) < k: + SH = H.subgraph(cands | overlap) + else: + SH = H.subgraph(cands) + sh_cnumber = nx.core_number(SH) + SG = nx.k_core(G.subgraph(SH), k) + while not (_same(sh_cnumber) and nx.density(SH) >= min_density): + # This subgraph must be writable => .copy() + SH = H.subgraph(SG).copy() + if len(SH) <= k: + break + sh_cnumber = nx.core_number(SH) + sh_deg = dict(SH.degree()) + min_deg = min(sh_deg.values()) + SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg) + SG = nx.k_core(G.subgraph(SH), k) + else: + yield SG + + +def _same(measure, tol=0): + vals = set(measure.values()) + if (max(vals) - min(vals)) <= tol: + return True + return False + + +class _AntiGraph(nx.Graph): + """ + Class for complement graphs. + + The main goal is to be able to work with big and dense graphs with + a low memory foodprint. + + In this class you add the edges that *do not exist* in the dense graph, + the report methods of the class return the neighbors, the edges and + the degree as if it was the dense graph. Thus it's possible to use + an instance of this class with some of NetworkX functions. In this + case we only use k-core, connected_components, and biconnected_components. + """ + + all_edge_dict = {"weight": 1} + + def single_edge_dict(self): + return self.all_edge_dict + + edge_attr_dict_factory = single_edge_dict + + def __getitem__(self, n): + """Returns a dict of neighbors of node n in the dense graph. + + Parameters + ---------- + n : node + A node in the graph. + + Returns + ------- + adj_dict : dictionary + The adjacency dictionary for nodes connected to n. + + """ + all_edge_dict = self.all_edge_dict + return { + node: all_edge_dict for node in set(self._adj) - set(self._adj[n]) - {n} + } + + def neighbors(self, n): + """Returns an iterator over all neighbors of node n in the + dense graph. + """ + try: + return iter(set(self._adj) - set(self._adj[n]) - {n}) + except KeyError as e: + raise NetworkXError(f"The node {n} is not in the graph.") from e + + class AntiAtlasView(Mapping): + """An adjacency inner dict for AntiGraph""" + + def __init__(self, graph, node): + self._graph = graph + self._atlas = graph._adj[node] + self._node = node + + def __len__(self): + return len(self._graph) - len(self._atlas) - 1 + + def __iter__(self): + return (n for n in self._graph if n not in self._atlas and n != self._node) + + def __getitem__(self, nbr): + nbrs = set(self._graph._adj) - set(self._atlas) - {self._node} + if nbr in nbrs: + return self._graph.all_edge_dict + raise KeyError(nbr) + + class AntiAdjacencyView(AntiAtlasView): + """An adjacency outer dict for AntiGraph""" + + def __init__(self, graph): + self._graph = graph + self._atlas = graph._adj + + def __len__(self): + return len(self._atlas) + + def __iter__(self): + return iter(self._graph) + + def __getitem__(self, node): + if node not in self._graph: + raise KeyError(node) + return self._graph.AntiAtlasView(self._graph, node) + + @property + def adj(self): + return self.AntiAdjacencyView(self) + + def subgraph(self, nodes): + """This subgraph method returns a full AntiGraph. Not a View""" + nodes = set(nodes) + G = _AntiGraph() + G.add_nodes_from(nodes) + for n in G: + Gnbrs = G.adjlist_inner_dict_factory() + G._adj[n] = Gnbrs + for nbr, d in self._adj[n].items(): + if nbr in G._adj: + Gnbrs[nbr] = d + G._adj[nbr][n] = d + G.graph = self.graph + return G + + class AntiDegreeView(nx.reportviews.DegreeView): + def __iter__(self): + all_nodes = set(self._succ) + for n in self._nodes: + nbrs = all_nodes - set(self._succ[n]) - {n} + yield (n, len(nbrs)) + + def __getitem__(self, n): + nbrs = set(self._succ) - set(self._succ[n]) - {n} + # AntiGraph is a ThinGraph so all edges have weight 1 + return len(nbrs) + (n in nbrs) + + @property + def degree(self): + """Returns an iterator for (node, degree) and degree for single node. + + The node degree is the number of edges adjacent to the node. + + Parameters + ---------- + nbunch : iterable container, optional (default=all nodes) + A container of nodes. The container will be iterated + through once. + + weight : string or None, optional (default=None) + The edge attribute that holds the numerical value used + as a weight. If None, then each edge has weight 1. + The degree is the sum of the edge weights adjacent to the node. + + Returns + ------- + deg: + Degree of the node, if a single node is passed as argument. + nd_iter : an iterator + The iterator returns two-tuples of (node, degree). + + See Also + -------- + degree + + Examples + -------- + >>> G = nx.path_graph(4) + >>> G.degree(0) # node 0 with degree 1 + 1 + >>> list(G.degree([0, 1])) + [(0, 1), (1, 2)] + + """ + return self.AntiDegreeView(self) + + def adjacency(self): + """Returns an iterator of (node, adjacency set) tuples for all nodes + in the dense graph. + + This is the fastest way to look at every edge. + For directed graphs, only outgoing adjacencies are included. + + Returns + ------- + adj_iter : iterator + An iterator of (node, adjacency set) for all nodes in + the graph. + + """ + for n in self._adj: + yield (n, set(self._adj) - set(self._adj[n]) - {n})