Mercurial > repos > laurenmarazzi > netisce_test
comparison tools/myTools/bin/sfa/control/influence.py @ 1:7e5c71b2e71f draft default tip
Uploaded
author | laurenmarazzi |
---|---|
date | Wed, 22 Dec 2021 16:00:34 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:f24d4892aaed | 1:7e5c71b2e71f |
---|---|
1 from collections import Counter | |
2 | |
3 import numpy as np | |
4 import scipy as sp | |
5 import pandas as pd | |
6 | |
7 | |
8 def compute_influence(W, | |
9 alpha=0.9, | |
10 beta=0.1, | |
11 S=None, | |
12 rtype='df', | |
13 outputs=None, | |
14 n2i=None, | |
15 max_iter=1000, | |
16 tol=1e-7, | |
17 get_iter=False, | |
18 device="cpu", | |
19 sparse=False): | |
20 r"""Compute the influence. | |
21 It estimates the effects of a node to the other nodes, | |
22 by calculating partial derivative with respect to source nodes, | |
23 based on a simple iterative method. | |
24 | |
25 Based on the below difference equation, | |
26 | |
27 x(t+1) = alpha*W.dot(x(t)) + (1-alpha)*b | |
28 | |
29 The influence matrix, S, is computed using chain rule of | |
30 partial derivative as follows. | |
31 | |
32 \begin{align} | |
33 S_{ij} &= \frac{\partial{x_i}}{\partial{x_j}} \\ | |
34 &= (I + \alpha W + \alpha^2 W^2 + ... + \alpha^{\infty}W^{\infty})_{ij} \\ | |
35 &\approx (I + \alpha W + \alpha^2 W^2 + ... + \alpha^{l}W^{l})_{ij} \\ | |
36 \end{align} | |
37 | |
38 This is the summation of the weight multiplications along all paths | |
39 including cycles. $S_{ij}$ denotes the influence of node (j) on node (i). | |
40 | |
41 An iterative method for an approximated solution is as follows. | |
42 | |
43 S(t+1) = \alpha WS(t) + I, | |
44 | |
45 where $S(0) = \beta I$ and $S(1) = \beta(I + \alpha W)$ $(t>1)$. | |
46 | |
47 The iteration continues until $||S(t+1) - S(t)|| \leq tol$. | |
48 | |
49 | |
50 Parameters | |
51 ---------- | |
52 W : numpy.ndarray | |
53 Weight matrix. | |
54 alpha : float, optional | |
55 Hyperparameter for adjusting the effect of signal flow. | |
56 beta : float, optional | |
57 Hyperparameter for adjusting the effect of basal activity. | |
58 S : numpy.ndarray, optional | |
59 Initial influence matrix. | |
60 rtype: str (optional) | |
61 Return object type: 'df' or 'array'. | |
62 outputs: list (or iterable) of str, optional | |
63 Names of output nodes, which is necessary for 'df' rtype. | |
64 n2i: dict, optional | |
65 Name to index dict, which is necessary for 'df' rtype. | |
66 max_iter : int, optional | |
67 The maximum iteration number for the estimation. | |
68 tol : float, optional | |
69 Tolerance for terminating the iteration. | |
70 get_iter : bool, optional | |
71 Determine whether the actual iteration number is returned. | |
72 device : str, optional, {'CPU', 'GPU:0', 'GPU:1', ...} | |
73 Select which device to use. 'CPU' is default. | |
74 sparse : bool, optional | |
75 Use sparse matrices for the computation. | |
76 | |
77 Returns | |
78 ------- | |
79 S : numpy.ndarray, optional | |
80 2D array of influence. | |
81 df : pd.DataFrame, optional | |
82 Influences for each output in DataFrame. | |
83 num_iter : int, optional | |
84 The actual number of iteration. | |
85 """ | |
86 # TODO: Test rendering the above mathematical expressions in LaTeX form. | |
87 | |
88 if max_iter < 2: | |
89 raise ValueError("max_iter should be greater than 2.") | |
90 | |
91 device = device.lower() | |
92 | |
93 if 'cpu' in device: | |
94 if sparse: | |
95 ret = _compute_influence_cpu_sparse(W, alpha, beta, S, | |
96 max_iter, tol, get_iter) | |
97 else: | |
98 ret = _compute_influence_cpu(W, alpha, beta, S, | |
99 max_iter, tol, get_iter) | |
100 elif 'gpu'in device: | |
101 _, id_device = device.split(':') | |
102 ret = _compute_influence_gpu(W, alpha, beta, S, | |
103 max_iter, tol, get_iter, id_device) | |
104 | |
105 if rtype == 'df': | |
106 import cupy as cp | |
107 if isinstance(ret, cp.core.core.ndarray): | |
108 ret = cp.asnumpy(ret) | |
109 | |
110 if get_iter: | |
111 S_ret, num_iter = ret | |
112 else: | |
113 S_ret = ret | |
114 | |
115 if rtype == 'array': | |
116 return ret | |
117 elif rtype == 'df': | |
118 if not outputs: | |
119 err_msg = "outputs should be designated for 'df' return type." | |
120 raise ValueError(err_msg) | |
121 | |
122 df = pd.DataFrame(columns=outputs) | |
123 | |
124 for trg in outputs: | |
125 for src in n2i: | |
126 if src == trg: | |
127 df.loc[src, trg] = np.inf | |
128 | |
129 idx_src = n2i[src] | |
130 idx_trg = n2i[trg] | |
131 df.loc[src, trg] = S_ret[idx_trg, idx_src] | |
132 | |
133 if get_iter: | |
134 return df, num_iter | |
135 else: | |
136 return df | |
137 else: | |
138 raise ValueError("Unknown return type: %s"%(rtype)) | |
139 | |
140 | |
141 def _compute_influence_cpu(W, alpha=0.5, beta=0.5, S=None, | |
142 max_iter=1000, tol=1e-6, get_iter=False): | |
143 N = W.shape[0] | |
144 if S is not None: | |
145 S1 = S | |
146 else: | |
147 S1 = np.eye(N, dtype=np.float) | |
148 | |
149 I = np.eye(N, dtype=np.float) | |
150 S2 = np.zeros_like(W) | |
151 aW = alpha * W | |
152 for cnt in range(max_iter): | |
153 S2[:, :] = S1.dot(aW) + I | |
154 norm = np.linalg.norm(S2 - S1) | |
155 if norm < tol: | |
156 break | |
157 # end of if | |
158 S1[:, :] = S2 | |
159 # end of for | |
160 | |
161 S_fin = beta * S2 | |
162 if get_iter: | |
163 return S_fin, cnt | |
164 else: | |
165 return S_fin | |
166 | |
167 | |
168 def _compute_influence_cpu_sparse(W, alpha, beta, S, | |
169 max_iter, tol, get_iter): | |
170 N = W.shape[0] | |
171 if S is not None: | |
172 S1 = S | |
173 else: | |
174 S1 = sp.sparse.lil_matrix(sp.sparse.eye(N, dtype=np.float)) | |
175 | |
176 | |
177 I = sp.sparse.eye(N, dtype=np.float) | |
178 S2 = sp.sparse.lil_matrix((N,N), dtype=np.float) | |
179 aW = sp.sparse.csc_matrix(alpha * W) | |
180 for cnt in range(max_iter): | |
181 S2[:, :] = S1.dot(aW) + I | |
182 norm = sp.sparse.linalg.norm(S2 - S1) | |
183 if norm < tol: | |
184 break | |
185 # end of if | |
186 S1[:, :] = S2 | |
187 # end of for | |
188 | |
189 S_fin = beta * S2 | |
190 if get_iter: | |
191 return S_fin, cnt | |
192 else: | |
193 return S_fin | |
194 | |
195 | |
196 def _compute_influence_gpu(W, alpha=0.5, beta=0.5, S=None, | |
197 max_iter=1000, tol=1e-6, get_iter=False, | |
198 id_device=0): | |
199 import cupy as cp | |
200 cp.cuda.Device(id_device).use() | |
201 N = W.shape[0] | |
202 I = cp.eye(N, dtype=cp.float32) #np.eye(N, N, dtype=np.float) | |
203 if S is not None: | |
204 S1 = cp.array(S, dtype=cp.float32) | |
205 else: | |
206 S1 = cp.eye(N, dtype=cp.float32) | |
207 | |
208 S2 = cp.zeros((N,N), dtype=cp.float32) | |
209 aW = alpha * cp.array(W, dtype=cp.float32) | |
210 | |
211 tol_gpu = cp.array(tol) | |
212 | |
213 for cnt in range(max_iter): | |
214 S2[:, :] = cp.dot(S1, aW) + I | |
215 mat_norm = cp.linalg.norm(S2 - S1) | |
216 if mat_norm < tol_gpu: | |
217 break | |
218 # end of if | |
219 S1[:, :] = S2 | |
220 # end of for | |
221 | |
222 S_fin = beta*S2 | |
223 if get_iter: | |
224 return S_fin, cnt | |
225 else: | |
226 return S_fin | |
227 | |
228 | |
229 | |
230 def arrange_si( | |
231 df_splo, | |
232 df_inf, | |
233 output, | |
234 min_splo=None, | |
235 max_splo=None, | |
236 thr_inf=1e-10, | |
237 ascending=True): | |
238 | |
239 # SPLO-Influence data | |
240 if not min_splo: | |
241 min_splo = df_splo.min() | |
242 | |
243 if not max_splo: | |
244 max_splo = df_splo.max() | |
245 | |
246 mask_splo = (min_splo <= df_splo) & (df_splo <= max_splo) | |
247 df_splo = df_splo[mask_splo] | |
248 | |
249 df_splo = pd.DataFrame(df_splo) | |
250 df_splo.columns = ['SPLO'] | |
251 | |
252 if output in df_splo.index: | |
253 df_splo.drop(output, inplace=True) | |
254 | |
255 index_common = df_splo.index.intersection(df_inf.index) | |
256 df_inf = pd.DataFrame(df_inf.loc[index_common]) | |
257 | |
258 mark_drop = df_inf[output].abs() <= thr_inf | |
259 df_inf.drop(df_inf.loc[mark_drop, output].index, | |
260 inplace=True) | |
261 | |
262 | |
263 df_si = df_inf.join(df_splo.loc[index_common]) | |
264 df_si.index.name = 'Source' | |
265 df_si.reset_index(inplace=True) | |
266 | |
267 cnt_splo = Counter(df_si['SPLO']) | |
268 splos = sorted(cnt_splo.keys()) | |
269 | |
270 si = {} | |
271 for i, splo in enumerate(splos): | |
272 df_sub = df_si[df_si['SPLO'] == splo] | |
273 df_sub = df_sub.sort_values(by=output, | |
274 ascending=ascending) | |
275 #num_items = df_sub[output].count() | |
276 #influence = np.zeros((cnt_max,)) # Influence | |
277 #num_empty = cnt_max - num_items | |
278 #influence[num_empty:] = df_sub[output] | |
279 #names = df_sub['Source'].tolist() | |
280 si[splo] = df_sub #[output] | |
281 | |
282 return si | |
283 | |
284 | |
285 def prioritize(df_splo, | |
286 df_inf, | |
287 output, | |
288 dac, | |
289 thr_rank=3, | |
290 min_group_size=0, | |
291 min_splo=None, | |
292 max_splo=None, | |
293 thr_inf=1e-10, | |
294 ): | |
295 """Prioritize target candiates. | |
296 | |
297 Parameters | |
298 ---------- | |
299 df_splo : pandas.DataFrame | |
300 Dataframe for SPLO information. | |
301 df_inf : pandas.DataFrame | |
302 Dataframe for influence information. | |
303 output : str | |
304 Names of output node, which is necessary for 'df_inf'. | |
305 dac : int | |
306 Direction of activity change (DAC) of the output. | |
307 thr_rank : int or float | |
308 Rank to filter out the entities. | |
309 The entities whose ranks are greater than thr_rank survive. | |
310 min_group_size : int | |
311 Minimum group size to be satisfied. | |
312 """ | |
313 ascending = True if dac < 0 else False | |
314 | |
315 df_inf_dac = df_inf[np.sign(df_inf[output]) == dac] | |
316 si = arrange_si(df_splo, | |
317 df_inf_dac, | |
318 output, | |
319 min_splo, | |
320 max_splo, | |
321 thr_inf, | |
322 ascending) | |
323 targets = [] | |
324 for splo in si: | |
325 # Get the group of this SPLO. | |
326 df_sub = si[splo] | |
327 | |
328 if df_sub.shape[0] < min_group_size: | |
329 continue | |
330 | |
331 # Get the entities that have the designated dac. | |
332 df_sub = df_sub[np.sign(df_sub[output]) == dac] | |
333 | |
334 # Get the enetities whose rank exceeds the threshods. | |
335 if 0 < thr_rank < 1: | |
336 ix_max_rank = int(thr_rank * df_sub.shape[0]) | |
337 if ix_max_rank == 0: | |
338 ix_max_rank = df_sub.shape[0] | |
339 else: | |
340 ix_max_rank = thr_rank | |
341 | |
342 #print(ix_max_rank) | |
343 df_top = df_sub.iloc[:ix_max_rank, :] | |
344 | |
345 targets.extend(df_top['Source'].tolist()) | |
346 # end of for | |
347 return targets |