ubu*_*oob 10 python numpy graph
给定具有节点对(p,q)的有向图G,我们得到了
我想计算这个递归函数的值,其中L(p)表示节点p的无向链路邻居集.此函数用于第(k + 1)个值.
我知道如何计算L(p)和L(q).
这是我的尝试 -
from __future__ import division
import copy
import numpy as np
import scipy
import scipy.sparse as sp
import time
import warnings
class Algo():
def __init__(self, c=0.8, max_iter=5, is_verbose=False, eps=1e-3):
self.c = c
self.max_iter = max_iter
self.is_verbose = is_verbose
self.eps = eps
def compute_similarity(self, a):
'''
Note: The adjacency matrix is a (0,1)-matrix with zeros on its
diagonal.
'''
a_transpose=np.transpose(a)
a_undirected=np.add(a,a_transpose)
self.num_nodes = np.shape(a)[0]
current_sim = np.eye(self.num_nodes)
prev_sim = np.zeros(shape=(self.num_nodes, self.num_nodes))
#Determine the set of Neighbors for each node
nbs = []
for i in range(self.num_nodes):
nbs.append(np.nonzero(a_undirected[:, i])[0])
for itr in range(self.max_iter):
if scipy.allclose(current_sim, prev_sim, atol=self.eps):
break
prev_sim = copy.deepcopy(current_sim)
for i in range(self.num_nodes):
for j in range(self.num_nodes):
if i == j:
continue
if len(nbs[i]) == 0 or len(nbs[j]) == 0:
current_sim[i][j] = 0
else:
#commonSet=0.0
differenceSetofp_ij=0.0
differenceSetofq_ij=0.0
commonSet=len(np.intersect1d(nbs[i],nbs[j]))/len(np.union1d(nbs[i],nbs[j]))
for x in np.setdiff1d(nbs[i],nbs[j]):
for y in nbs[j]:
differenceSetofp_ij+=prev_sim[x][y]
differenceSetofp_ij*=1/(len(np.union1d(nbs[i],nbs[j]))*len(nbs[j]))
#print differenceSetofp
for x in np.setdiff1d(nbs[j],nbs[i]):
for y in nbs[i]:
differenceSetofq_ij+=prev_sim[x][y]
differenceSetofq_ij*=1/(len(np.union1d(nbs[j],nbs[i]))*len(nbs[i]))
current_sim[i][j] = self.c*(commonSet+differenceSetofp_ij+differenceSetofq_ij)
if self.is_verbose:
print('SimRank: converged after {0} iterations'.format(itr))
return current_sim
if __name__ == "__main__":
a = np.array([ [0, 1, 1, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0]])
obj = Algo(is_verbose=True, eps=1e-2)
start_time = time.time()
S = obj.compute_similarity(a)
print(S)
print('Runtime: ', time.time() - start_time, '\n')
Run Code Online (Sandbox Code Playgroud)
我得到的输出不正确.
预期的答案R_inf[1,2] = 0.8和R_inf[6,8] = 0.8OP的例子a是应用于有向图的算法的结果a.T,而不是无向的.
这是R_2使用OP的代码(带有一些小修复)和我的代码计算的.并且R_inf也用我的代码计算:
after 2 iterations with pp_loop:
0.8000 0 0 0 0 0 0 0.8000 0
0 0.8000 0.8000 0 0 0.4000 0.3200 0 0.3200
0 0.8000 0.8000 0 0 0.4000 0.3200 0 0.3200
0 0 0 0.8000 0.4800 0 0 0 0
0 0 0 0.4800 0.8000 0 0 0 0
0 0.4000 0.4000 0 0 0.8000 0.1600 0 0.1600
0 0.3200 0.3200 0 0 0.1600 0.8000 0 0.8000
0.8000 0 0 0 0 0 0 0.8000 0
0 0.3200 0.3200 0 0 0.1600 0.8000 0 0.8000
after 2 iterations with OP:
0.8000 0 0 0 0 0 0 0.8000 0
0 0.8000 0.8000 0 0 0.4000 0.3200 0 0.3200
0 0.8000 0.8000 0 0 0.4000 0.3200 0 0.3200
0 0 0 0.8000 0.4800 0 0 0 0
0 0 0 0.4800 0.8000 0 0 0 0
0 0.4000 0.4000 0 0 0.8000 0.1600 0 0.1600
0 0.3200 0.3200 0 0 0.1600 0.8000 0 0.8000
0.8000 0 0 0 0 0 0 0.8000 0
0 0.3200 0.3200 0 0 0.1600 0.8000 0 0.8000
in fixed point:
0.8000 0 0 0 0 0 0 0.8000 0
0 0.8000 0.8000 0 0 0.4000 0.3200 0 0.3200
0 0.8000 0.8000 0 0 0.4000 0.3200 0 0.3200
0 0 0 0.8000 0.4800 0.0960 0.0256 0 0.0256
0 0 0 0.4800 0.8000 0.1280 0 0 0
0 0.4000 0.4000 0.0960 0.1280 0.8000 0.1600 0 0.1600
0 0.3200 0.3200 0.0256 0 0.1600 0.8000 0 0.8000
0.8000 0 0 0 0 0 0 0.8000 0
0 0.3200 0.3200 0.0256 0 0.1600 0.8000 0 0.8000
Run Code Online (Sandbox Code Playgroud)
我的解决方案更快,可以计算网络到~1000节点的固定点.
一点解释:策略是用矩阵代数表达递归关系,因为那时我们可以利用blasnumpy/scipy带来的高度优化的动力线性代数.例如,可以通过将当前值的矩阵R_k与转置的邻接矩阵相乘来完成内部求和 - 所有这些求和.考虑到邻接矩阵中的所有零,这可能看起来很浪费,但是blas非常非常快.类似地,我们可以通过将邻接矩阵与其转置等相乘来计算交叉点的大小.
假设最终目标不是R的序列而是固定点,我们可以通过使用适当的线性求解器来节省更多时间,因为递归是线性的(+偏移).由于编写n^2xn^2此操作的矩阵效率不高,我们使用迭代求解器来LinearOperator代替矩阵.
如果公差设置得足够紧,则两种方法都会产生相同的固定点.
代码包括OP的修复:
import numpy as np
from scipy import sparse
from scipy.sparse import linalg
def mock_data(n, p):
data = (np.random.random((n, n)) < p).astype(int)
np.einsum('ii->i', data)[:] = 0
return data
import copy
import scipy
import scipy.sparse as sp
import time
import warnings
class Algo():
def __init__(self, c=0.8, max_iter=5, is_verbose=False, eps=1e-3):
self.c = c
self.max_iter = max_iter
self.is_verbose = is_verbose
self.eps = eps
def compute_similarity(self, a, R0=None, use_undirected=False,
no_neighbor_policy='10'):
'''
Note: The adjacency matrix is a (0,1)-matrix with zeros on its
diagonal.
'''
a_transpose=np.transpose(a)
a_undirected=np.add(a,a_transpose)
a_use = a_undirected if use_undirected else a
self.num_nodes = np.shape(a)[0]
current_sim = np.eye(self.num_nodes) if R0 is None else R0
prev_sim = np.zeros(shape=(self.num_nodes, self.num_nodes))
#Determine the set of Neighbors for each node
nbs = []
for i in range(self.num_nodes):
nbs.append(np.nonzero(a_use[i])[0])
R = []
for itr in range(self.max_iter):
if scipy.allclose(current_sim, prev_sim, atol=self.eps):
break
prev_sim = copy.deepcopy(current_sim)
R.append(prev_sim)
for i in range(self.num_nodes):
for j in range(self.num_nodes):
if len(nbs[i]) == 0 and len(nbs[j]) == 0:
current_sim[i][j] = (
0 if (no_neighbor_policy=='00' or
(no_neighbor_policy=='10' and i!=j))
else self.c)
elif i == j:
current_sim[i][j] = self.c
elif len(nbs[i]) == 0 or len(nbs[j]) == 0:
current_sim[i][j] = 0
else:
#commonSet=0.0
differenceSetofp_ij=0.0
differenceSetofq_ij=0.0
commonSet=len(np.intersect1d(nbs[i],nbs[j]))/np.maximum(len(np.union1d(nbs[i],nbs[j])), 1)
for x in np.setdiff1d(nbs[i],nbs[j]):
for y in nbs[j]:
differenceSetofp_ij+=prev_sim[x][y]
differenceSetofp_ij*=1/np.maximum(len(np.union1d(nbs[i],nbs[j]))*len(nbs[j]), 1)
#print differenceSetofp
for x in np.setdiff1d(nbs[j],nbs[i]):
for y in nbs[i]:
differenceSetofq_ij+=prev_sim[x][y]
differenceSetofq_ij*=1/np.maximum(len(np.union1d(nbs[j],nbs[i]))*len(nbs[i]), 1)
current_sim[i][j] = self.c*(commonSet+differenceSetofp_ij+differenceSetofq_ij)
if self.is_verbose:
print('SimRank: converged after {0} iterations'.format(itr))
R.append(current_sim)
return np.array(R)
def f_OP(adj, c, max_n, R0=None, use_undirected=False, no_neighbor_policy='10'):
return Algo(c, max_n).compute_similarity(adj, R0, use_undirected,
no_neighbor_policy)
def f_pp(adj, c, max_n, R0=None, use_undirected=False,
no_neighbor_policy='10', solver=linalg.gcrotmk):
n, n = adj.shape
if use_undirected:
adj = adj | adj.T
aind = np.where(adj.T)
n_neigh = adj.sum(axis=1)
n_inter = adj @ adj.T
n_union = np.add.outer(n_neigh, n_neigh) - n_inter
r_union = c / np.maximum(n_union, 1)
if no_neighbor_policy == '11':
n_inter[n_union==0] = 1
r_union[n_union==0] = c
elif no_neighbor_policy == '10':
np.einsum('ii->i', n_inter)[np.einsum('ii->i', n_union)==0] = 1
np.einsum('ii->i', r_union)[np.einsum('ii->i', n_union)==0] = c
elif no_neighbor_policy != '00':
raise ValueError
avg_ngh = adj.T / np.maximum(n_neigh, 1)
if solver is None:
R = np.empty((max_n+1, n, n))
R[0] = 0 if R0 is None else R0
if R0 is None:
np.einsum('ii->i', R[0])[:] = 1
for i in range(max_n):
rowsums = R[i] @ avg_ngh
rowsums[aind] = 0
rec_sum = adj @ rowsums
R[i+1] = r_union * (n_inter + rec_sum + rec_sum.T)
if np.allclose(R[i+1], R[i], rtol=1e-7, atol=1e-10):
return R[:i+2]
return R
else:
def LO(x):
R = x.reshape(n, n)
rowsums = R @ avg_ngh
rowsums[aind] = 0
rec_sum = adj @ rowsums
return (r_union * (rec_sum + rec_sum.T) - R).ravel()
R0 = np.identity(n) if R0 == None else R0
LO = linalg.LinearOperator((n*n, n*n), matvec=LO)#, rmatvec=LO)
res = solver(LO, (-r_union * n_inter).ravel(), R0.ravel(), tol=1e-9)
assert res[1]==0
return res[0].reshape(n, n)
def f_pp_loop(adj, c, max_n, R0=None, use_undirected=False,
no_neighbor_policy='10'):
return f_pp(adj, c, max_n, R0, use_undirected, no_neighbor_policy, None)
# test on OP's example:
a = np.array([ [0, 1, 1, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0]])
a = a.T
m, c, nnp = 2, 0.8, '11'
sol = {}
for f in f_pp_loop, f_OP:
name = f.__name__[2:]
sol[name] = f(a, c, m, None, False, nnp)
print(f"after {len(sol[name])-1} iterations with {name}:")
print("\n".join(" ".join("0 " if i == 0 else f"{i:6.4f}" for i in row) for row in sol[name][-1]))
print("in fixed point:")
print("\n".join(" ".join("0 " if i == 0 else f"{i:6.4f}" for i in row) for row in f_pp(a, c, 64, None, False, nnp)))
print()
# demo large network
n, m, p, c, nnp = 1000, 256, 0.02, 0.9, '11'
adj = mock_data(n, p)
print(f'size {n}, maxiter {m}, connection prob {p}, c {c}')
from time import time
t = time()
spp_loop = f_pp_loop(adj, c, m, None, False, nnp)
print('pp_loop: {:8.5f} sec'.format(time()-t))
print('squared rel err after {} iterations: {}'.format(len(spp_loop)-1, np.nanmax(((spp_loop[-1]-spp_loop[-2])/(spp_loop[-1]+spp_loop[-2]))**2)))
t = time()
spp = f_pp(adj, c, m, None, False, nnp)
print('pp: {:8.5f} sec'.format(time()-t))
def comp(a, b):
print(np.allclose(a, b))
print('abserr', np.nanmax(np.abs(a-b)), 'relerr', np.nanmax(2 * np.abs(a-b)/(a+b)))
print('fixed points equal:', end=' ')
comp(spp_loop[-1], spp)
Run Code Online (Sandbox Code Playgroud)