Mercurial > repos > laurenmarazzi > netisce_test
diff tools/myTools/bin/sfa/control/influence.py @ 1:7e5c71b2e71f draft default tip
Uploaded
author | laurenmarazzi |
---|---|
date | Wed, 22 Dec 2021 16:00:34 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/myTools/bin/sfa/control/influence.py Wed Dec 22 16:00:34 2021 +0000 @@ -0,0 +1,347 @@ +from collections import Counter + +import numpy as np +import scipy as sp +import pandas as pd + + +def compute_influence(W, + alpha=0.9, + beta=0.1, + S=None, + rtype='df', + outputs=None, + n2i=None, + max_iter=1000, + tol=1e-7, + get_iter=False, + device="cpu", + sparse=False): + r"""Compute the influence. + It estimates the effects of a node to the other nodes, + by calculating partial derivative with respect to source nodes, + based on a simple iterative method. + + Based on the below difference equation, + + x(t+1) = alpha*W.dot(x(t)) + (1-alpha)*b + + The influence matrix, S, is computed using chain rule of + partial derivative as follows. + + \begin{align} + S_{ij} &= \frac{\partial{x_i}}{\partial{x_j}} \\ + &= (I + \alpha W + \alpha^2 W^2 + ... + \alpha^{\infty}W^{\infty})_{ij} \\ + &\approx (I + \alpha W + \alpha^2 W^2 + ... + \alpha^{l}W^{l})_{ij} \\ + \end{align} + + This is the summation of the weight multiplications along all paths + including cycles. $S_{ij}$ denotes the influence of node (j) on node (i). + + An iterative method for an approximated solution is as follows. + + S(t+1) = \alpha WS(t) + I, + + where $S(0) = \beta I$ and $S(1) = \beta(I + \alpha W)$ $(t>1)$. + + The iteration continues until $||S(t+1) - S(t)|| \leq tol$. + + + Parameters + ---------- + W : numpy.ndarray + Weight matrix. + alpha : float, optional + Hyperparameter for adjusting the effect of signal flow. + beta : float, optional + Hyperparameter for adjusting the effect of basal activity. + S : numpy.ndarray, optional + Initial influence matrix. + rtype: str (optional) + Return object type: 'df' or 'array'. + outputs: list (or iterable) of str, optional + Names of output nodes, which is necessary for 'df' rtype. + n2i: dict, optional + Name to index dict, which is necessary for 'df' rtype. + max_iter : int, optional + The maximum iteration number for the estimation. + tol : float, optional + Tolerance for terminating the iteration. + get_iter : bool, optional + Determine whether the actual iteration number is returned. + device : str, optional, {'CPU', 'GPU:0', 'GPU:1', ...} + Select which device to use. 'CPU' is default. + sparse : bool, optional + Use sparse matrices for the computation. + + Returns + ------- + S : numpy.ndarray, optional + 2D array of influence. + df : pd.DataFrame, optional + Influences for each output in DataFrame. + num_iter : int, optional + The actual number of iteration. + """ + # TODO: Test rendering the above mathematical expressions in LaTeX form. + + if max_iter < 2: + raise ValueError("max_iter should be greater than 2.") + + device = device.lower() + + if 'cpu' in device: + if sparse: + ret = _compute_influence_cpu_sparse(W, alpha, beta, S, + max_iter, tol, get_iter) + else: + ret = _compute_influence_cpu(W, alpha, beta, S, + max_iter, tol, get_iter) + elif 'gpu'in device: + _, id_device = device.split(':') + ret = _compute_influence_gpu(W, alpha, beta, S, + max_iter, tol, get_iter, id_device) + + if rtype == 'df': + import cupy as cp + if isinstance(ret, cp.core.core.ndarray): + ret = cp.asnumpy(ret) + + if get_iter: + S_ret, num_iter = ret + else: + S_ret = ret + + if rtype == 'array': + return ret + elif rtype == 'df': + if not outputs: + err_msg = "outputs should be designated for 'df' return type." + raise ValueError(err_msg) + + df = pd.DataFrame(columns=outputs) + + for trg in outputs: + for src in n2i: + if src == trg: + df.loc[src, trg] = np.inf + + idx_src = n2i[src] + idx_trg = n2i[trg] + df.loc[src, trg] = S_ret[idx_trg, idx_src] + + if get_iter: + return df, num_iter + else: + return df + else: + raise ValueError("Unknown return type: %s"%(rtype)) + + +def _compute_influence_cpu(W, alpha=0.5, beta=0.5, S=None, + max_iter=1000, tol=1e-6, get_iter=False): + N = W.shape[0] + if S is not None: + S1 = S + else: + S1 = np.eye(N, dtype=np.float) + + I = np.eye(N, dtype=np.float) + S2 = np.zeros_like(W) + aW = alpha * W + for cnt in range(max_iter): + S2[:, :] = S1.dot(aW) + I + norm = np.linalg.norm(S2 - S1) + if norm < tol: + break + # end of if + S1[:, :] = S2 + # end of for + + S_fin = beta * S2 + if get_iter: + return S_fin, cnt + else: + return S_fin + + +def _compute_influence_cpu_sparse(W, alpha, beta, S, + max_iter, tol, get_iter): + N = W.shape[0] + if S is not None: + S1 = S + else: + S1 = sp.sparse.lil_matrix(sp.sparse.eye(N, dtype=np.float)) + + + I = sp.sparse.eye(N, dtype=np.float) + S2 = sp.sparse.lil_matrix((N,N), dtype=np.float) + aW = sp.sparse.csc_matrix(alpha * W) + for cnt in range(max_iter): + S2[:, :] = S1.dot(aW) + I + norm = sp.sparse.linalg.norm(S2 - S1) + if norm < tol: + break + # end of if + S1[:, :] = S2 + # end of for + + S_fin = beta * S2 + if get_iter: + return S_fin, cnt + else: + return S_fin + + +def _compute_influence_gpu(W, alpha=0.5, beta=0.5, S=None, + max_iter=1000, tol=1e-6, get_iter=False, + id_device=0): + import cupy as cp + cp.cuda.Device(id_device).use() + N = W.shape[0] + I = cp.eye(N, dtype=cp.float32) #np.eye(N, N, dtype=np.float) + if S is not None: + S1 = cp.array(S, dtype=cp.float32) + else: + S1 = cp.eye(N, dtype=cp.float32) + + S2 = cp.zeros((N,N), dtype=cp.float32) + aW = alpha * cp.array(W, dtype=cp.float32) + + tol_gpu = cp.array(tol) + + for cnt in range(max_iter): + S2[:, :] = cp.dot(S1, aW) + I + mat_norm = cp.linalg.norm(S2 - S1) + if mat_norm < tol_gpu: + break + # end of if + S1[:, :] = S2 + # end of for + + S_fin = beta*S2 + if get_iter: + return S_fin, cnt + else: + return S_fin + + + +def arrange_si( + df_splo, + df_inf, + output, + min_splo=None, + max_splo=None, + thr_inf=1e-10, + ascending=True): + + # SPLO-Influence data + if not min_splo: + min_splo = df_splo.min() + + if not max_splo: + max_splo = df_splo.max() + + mask_splo = (min_splo <= df_splo) & (df_splo <= max_splo) + df_splo = df_splo[mask_splo] + + df_splo = pd.DataFrame(df_splo) + df_splo.columns = ['SPLO'] + + if output in df_splo.index: + df_splo.drop(output, inplace=True) + + index_common = df_splo.index.intersection(df_inf.index) + df_inf = pd.DataFrame(df_inf.loc[index_common]) + + mark_drop = df_inf[output].abs() <= thr_inf + df_inf.drop(df_inf.loc[mark_drop, output].index, + inplace=True) + + + df_si = df_inf.join(df_splo.loc[index_common]) + df_si.index.name = 'Source' + df_si.reset_index(inplace=True) + + cnt_splo = Counter(df_si['SPLO']) + splos = sorted(cnt_splo.keys()) + + si = {} + for i, splo in enumerate(splos): + df_sub = df_si[df_si['SPLO'] == splo] + df_sub = df_sub.sort_values(by=output, + ascending=ascending) + #num_items = df_sub[output].count() + #influence = np.zeros((cnt_max,)) # Influence + #num_empty = cnt_max - num_items + #influence[num_empty:] = df_sub[output] + #names = df_sub['Source'].tolist() + si[splo] = df_sub #[output] + + return si + + +def prioritize(df_splo, + df_inf, + output, + dac, + thr_rank=3, + min_group_size=0, + min_splo=None, + max_splo=None, + thr_inf=1e-10, +): + """Prioritize target candiates. + + Parameters + ---------- + df_splo : pandas.DataFrame + Dataframe for SPLO information. + df_inf : pandas.DataFrame + Dataframe for influence information. + output : str + Names of output node, which is necessary for 'df_inf'. + dac : int + Direction of activity change (DAC) of the output. + thr_rank : int or float + Rank to filter out the entities. + The entities whose ranks are greater than thr_rank survive. + min_group_size : int + Minimum group size to be satisfied. + """ + ascending = True if dac < 0 else False + + df_inf_dac = df_inf[np.sign(df_inf[output]) == dac] + si = arrange_si(df_splo, + df_inf_dac, + output, + min_splo, + max_splo, + thr_inf, + ascending) + targets = [] + for splo in si: + # Get the group of this SPLO. + df_sub = si[splo] + + if df_sub.shape[0] < min_group_size: + continue + + # Get the entities that have the designated dac. + df_sub = df_sub[np.sign(df_sub[output]) == dac] + + # Get the enetities whose rank exceeds the threshods. + if 0 < thr_rank < 1: + ix_max_rank = int(thr_rank * df_sub.shape[0]) + if ix_max_rank == 0: + ix_max_rank = df_sub.shape[0] + else: + ix_max_rank = thr_rank + + #print(ix_max_rank) + df_top = df_sub.iloc[:ix_max_rank, :] + + targets.extend(df_top['Source'].tolist()) + # end of for + return targets \ No newline at end of file