计算图形顶点对上函数的双重求和值

ubu*_*oob 10 python numpy graph

给定具有节点对(p,q)的有向图G,我们得到了

在此输入图像描述

我想计算这个递归函数的值,其中L(p)表示节点p的无向链路邻居集.此函数用于第(k + 1)个值.

我知道如何计算L(p)和L(q).

这是我的尝试 -

from __future__ import division
import copy

import numpy as np
import scipy
import scipy.sparse as sp
import time
import warnings




class Algo():
    def __init__(self, c=0.8, max_iter=5, is_verbose=False, eps=1e-3):
        self.c = c
        self.max_iter = max_iter
        self.is_verbose = is_verbose
        self.eps = eps

    def compute_similarity(self, a):
        '''

        Note: The adjacency matrix is a (0,1)-matrix with zeros on its 
        diagonal.
        '''                                 
        a_transpose=np.transpose(a)
        a_undirected=np.add(a,a_transpose)

        self.num_nodes = np.shape(a)[0]
        current_sim = np.eye(self.num_nodes)
        prev_sim = np.zeros(shape=(self.num_nodes, self.num_nodes))

        #Determine the set of Neighbors for each node
        nbs = []

        for i in range(self.num_nodes):

            nbs.append(np.nonzero(a_undirected[:, i])[0])


        for itr in range(self.max_iter):

            if scipy.allclose(current_sim, prev_sim, atol=self.eps):
                break

            prev_sim = copy.deepcopy(current_sim)

            for i in range(self.num_nodes):
                for j in range(self.num_nodes):
                    if i == j:
                        continue

                    if len(nbs[i]) == 0 or len(nbs[j]) == 0:
                        current_sim[i][j] = 0
                    else:
                        #commonSet=0.0
                        differenceSetofp_ij=0.0
                        differenceSetofq_ij=0.0
                        commonSet=len(np.intersect1d(nbs[i],nbs[j]))/len(np.union1d(nbs[i],nbs[j]))


                        for x in np.setdiff1d(nbs[i],nbs[j]):
                            for y in nbs[j]:

                                differenceSetofp_ij+=prev_sim[x][y]
                                differenceSetofp_ij*=1/(len(np.union1d(nbs[i],nbs[j]))*len(nbs[j]))
                                #print differenceSetofp



                        for x in np.setdiff1d(nbs[j],nbs[i]):
                            for y in nbs[i]:

                                differenceSetofq_ij+=prev_sim[x][y]
                                differenceSetofq_ij*=1/(len(np.union1d(nbs[j],nbs[i]))*len(nbs[i])) 

                        current_sim[i][j] = self.c*(commonSet+differenceSetofp_ij+differenceSetofq_ij)


        if self.is_verbose:

            print('SimRank: converged after {0} iterations'.format(itr))
        return current_sim                  



if __name__ == "__main__":

    a = np.array([ [0, 1, 1, 0, 0, 1, 0, 0, 0],
                   [0, 0, 0, 0, 1, 0, 0, 0, 0],
                   [0, 0, 0, 1, 0, 0, 0, 0, 0],
                   [0, 0, 0, 0, 0, 0, 0, 0, 0],
                   [0, 0, 0, 0, 0, 0, 1, 0, 1],
                   [0, 0, 0, 1, 0, 0, 0, 0, 0],
                   [0, 0, 0, 0, 0, 1, 0, 0, 0],
                   [0, 0, 0, 0, 0, 0, 1, 0, 1],
                   [0, 0, 0, 0, 0, 0, 0, 0, 0]])                    



    obj = Algo(is_verbose=True, eps=1e-2)
    start_time = time.time()
    S = obj.compute_similarity(a)
    print(S)
    print('Runtime: ', time.time() - start_time, '\n')  
Run Code Online (Sandbox Code Playgroud)

我得到的输出不正确.

Pau*_*zer 6

预期的答案R_inf[1,2] = 0.8R_inf[6,8] = 0.8OP的例子a是应用于有向图的算法的结果a.T,而不是无向的.

这是R_2使用OP的代码(带有一些小修复)和我的代码计算的.并且R_inf也用我的代码计算:

after 2 iterations with pp_loop:
0.8000 0      0      0      0      0      0      0.8000 0     
0      0.8000 0.8000 0      0      0.4000 0.3200 0      0.3200
0      0.8000 0.8000 0      0      0.4000 0.3200 0      0.3200
0      0      0      0.8000 0.4800 0      0      0      0     
0      0      0      0.4800 0.8000 0      0      0      0     
0      0.4000 0.4000 0      0      0.8000 0.1600 0      0.1600
0      0.3200 0.3200 0      0      0.1600 0.8000 0      0.8000
0.8000 0      0      0      0      0      0      0.8000 0     
0      0.3200 0.3200 0      0      0.1600 0.8000 0      0.8000
after 2 iterations with OP:
0.8000 0      0      0      0      0      0      0.8000 0     
0      0.8000 0.8000 0      0      0.4000 0.3200 0      0.3200
0      0.8000 0.8000 0      0      0.4000 0.3200 0      0.3200
0      0      0      0.8000 0.4800 0      0      0      0     
0      0      0      0.4800 0.8000 0      0      0      0     
0      0.4000 0.4000 0      0      0.8000 0.1600 0      0.1600
0      0.3200 0.3200 0      0      0.1600 0.8000 0      0.8000
0.8000 0      0      0      0      0      0      0.8000 0     
0      0.3200 0.3200 0      0      0.1600 0.8000 0      0.8000
in fixed point:
0.8000 0      0      0      0      0      0      0.8000 0     
0      0.8000 0.8000 0      0      0.4000 0.3200 0      0.3200
0      0.8000 0.8000 0      0      0.4000 0.3200 0      0.3200
0      0      0      0.8000 0.4800 0.0960 0.0256 0      0.0256
0      0      0      0.4800 0.8000 0.1280 0      0      0     
0      0.4000 0.4000 0.0960 0.1280 0.8000 0.1600 0      0.1600
0      0.3200 0.3200 0.0256 0      0.1600 0.8000 0      0.8000
0.8000 0      0      0      0      0      0      0.8000 0     
0      0.3200 0.3200 0.0256 0      0.1600 0.8000 0      0.8000
Run Code Online (Sandbox Code Playgroud)

我的解决方案更快,可以计算网络到~1000节点的固定点.

一点解释:策略是用矩阵代数表达递归关系,因为那时我们可以利用blasnumpy/scipy带来的高度优化的动力线性代数.例如,可以通过将当前值的矩阵R_k与转置的邻接矩阵相乘来完成内部求和 - 所有这些求和.考虑到邻接矩阵中的所有零,这可能看起来很浪费,但是blas非常非常快.类似地,我们可以通过将邻接矩阵与其转置等相乘来计算交叉点的大小.

假设最终目标不是R的序列而是固定点,我们可以通过使用适当的线性求解器来节省更多时间,因为递归是线性的(+偏移).由于编写n^2xn^2此操作的矩阵效率不高,我们使用迭代求解器来LinearOperator代替矩阵.

如果公差设置得足够紧,则两种方法都会产生相同的固定点.

代码包括OP的修复:

import numpy as np
from scipy import sparse
from scipy.sparse import linalg

def mock_data(n, p):
    data = (np.random.random((n, n)) < p).astype(int)
    np.einsum('ii->i', data)[:] = 0
    return data

import copy

import scipy
import scipy.sparse as sp
import time
import warnings

class Algo():
    def __init__(self, c=0.8, max_iter=5, is_verbose=False, eps=1e-3):
        self.c = c
        self.max_iter = max_iter
        self.is_verbose = is_verbose
        self.eps = eps

    def compute_similarity(self, a, R0=None, use_undirected=False,
                           no_neighbor_policy='10'):
        '''

        Note: The adjacency matrix is a (0,1)-matrix with zeros on its 
        diagonal.
        '''                                 
        a_transpose=np.transpose(a)
        a_undirected=np.add(a,a_transpose)
        a_use = a_undirected if use_undirected else a

        self.num_nodes = np.shape(a)[0]
        current_sim = np.eye(self.num_nodes) if R0 is None else R0
        prev_sim = np.zeros(shape=(self.num_nodes, self.num_nodes))

        #Determine the set of Neighbors for each node
        nbs = []

        for i in range(self.num_nodes):

            nbs.append(np.nonzero(a_use[i])[0])
        R = []

        for itr in range(self.max_iter):

            if scipy.allclose(current_sim, prev_sim, atol=self.eps):
                break

            prev_sim = copy.deepcopy(current_sim)
            R.append(prev_sim)
            for i in range(self.num_nodes):
                for j in range(self.num_nodes):
                    if len(nbs[i]) == 0 and len(nbs[j]) == 0:
                        current_sim[i][j] = (
                            0 if (no_neighbor_policy=='00' or
                                  (no_neighbor_policy=='10' and i!=j))
                            else self.c)
                    elif i == j:
                        current_sim[i][j] = self.c
                    elif len(nbs[i]) == 0 or len(nbs[j]) == 0:
                        current_sim[i][j] = 0
                    else:
                        #commonSet=0.0
                        differenceSetofp_ij=0.0
                        differenceSetofq_ij=0.0
                        commonSet=len(np.intersect1d(nbs[i],nbs[j]))/np.maximum(len(np.union1d(nbs[i],nbs[j])), 1)


                        for x in np.setdiff1d(nbs[i],nbs[j]):
                            for y in nbs[j]:

                                differenceSetofp_ij+=prev_sim[x][y]
                        differenceSetofp_ij*=1/np.maximum(len(np.union1d(nbs[i],nbs[j]))*len(nbs[j]), 1)
                                #print differenceSetofp



                        for x in np.setdiff1d(nbs[j],nbs[i]):
                            for y in nbs[i]:

                                differenceSetofq_ij+=prev_sim[x][y]
                        differenceSetofq_ij*=1/np.maximum(len(np.union1d(nbs[j],nbs[i]))*len(nbs[i]), 1) 

                        current_sim[i][j] = self.c*(commonSet+differenceSetofp_ij+differenceSetofq_ij)


        if self.is_verbose:

            print('SimRank: converged after {0} iterations'.format(itr))
        R.append(current_sim)
        return np.array(R)

def f_OP(adj, c, max_n, R0=None, use_undirected=False, no_neighbor_policy='10'):
    return Algo(c, max_n).compute_similarity(adj, R0, use_undirected,
                                             no_neighbor_policy)

def f_pp(adj, c, max_n, R0=None, use_undirected=False,
         no_neighbor_policy='10', solver=linalg.gcrotmk):
    n, n = adj.shape
    if use_undirected:
         adj = adj | adj.T
    aind = np.where(adj.T)
    n_neigh = adj.sum(axis=1)
    n_inter = adj @ adj.T
    n_union = np.add.outer(n_neigh, n_neigh) - n_inter
    r_union = c / np.maximum(n_union, 1)
    if no_neighbor_policy == '11':
        n_inter[n_union==0] = 1
        r_union[n_union==0] = c
    elif no_neighbor_policy == '10':
        np.einsum('ii->i', n_inter)[np.einsum('ii->i', n_union)==0] = 1
        np.einsum('ii->i', r_union)[np.einsum('ii->i', n_union)==0] = c
    elif no_neighbor_policy != '00':
        raise ValueError
    avg_ngh = adj.T / np.maximum(n_neigh, 1)
    if solver is None:
        R = np.empty((max_n+1, n, n))
        R[0] = 0 if R0 is None else R0
        if R0 is None:
            np.einsum('ii->i', R[0])[:] = 1       
        for i in range(max_n):
            rowsums = R[i] @ avg_ngh
            rowsums[aind] = 0
            rec_sum = adj @ rowsums
            R[i+1] = r_union * (n_inter + rec_sum + rec_sum.T)
            if np.allclose(R[i+1], R[i], rtol=1e-7, atol=1e-10):
                return R[:i+2]
        return R
    else:
        def LO(x):
            R = x.reshape(n, n)
            rowsums = R @ avg_ngh
            rowsums[aind] = 0
            rec_sum = adj @ rowsums
            return (r_union * (rec_sum + rec_sum.T) - R).ravel()
        R0 = np.identity(n) if R0 == None else R0
        LO = linalg.LinearOperator((n*n, n*n), matvec=LO)#, rmatvec=LO)
        res = solver(LO, (-r_union * n_inter).ravel(), R0.ravel(), tol=1e-9)
        assert res[1]==0
        return res[0].reshape(n, n)

def f_pp_loop(adj, c, max_n, R0=None, use_undirected=False,
              no_neighbor_policy='10'):
    return f_pp(adj, c, max_n, R0, use_undirected, no_neighbor_policy, None)


# test on OP's example:
a = np.array([ [0, 1, 1, 0, 0, 1, 0, 0, 0],
               [0, 0, 0, 0, 1, 0, 0, 0, 0],
               [0, 0, 0, 1, 0, 0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 1, 0, 1],
               [0, 0, 0, 1, 0, 0, 0, 0, 0],
               [0, 0, 0, 0, 0, 1, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 1, 0, 1],
               [0, 0, 0, 0, 0, 0, 0, 0, 0]])

a = a.T
m, c, nnp = 2, 0.8, '11'

sol = {}
for f in f_pp_loop, f_OP:
    name = f.__name__[2:]
    sol[name] = f(a, c, m, None, False, nnp)
    print(f"after {len(sol[name])-1} iterations with {name}:")
    print("\n".join(" ".join("0     " if i == 0 else f"{i:6.4f}" for i in row) for row in sol[name][-1]))
print("in fixed point:")
print("\n".join(" ".join("0     " if i == 0 else f"{i:6.4f}" for i in row) for row in f_pp(a, c, 64, None, False, nnp)))

print()

# demo large network
n, m, p, c, nnp = 1000, 256, 0.02, 0.9, '11'
adj = mock_data(n, p)
print(f'size {n}, maxiter {m}, connection prob {p}, c {c}')
from time import time
t = time()
spp_loop = f_pp_loop(adj, c, m, None, False, nnp)
print('pp_loop: {:8.5f} sec'.format(time()-t))
print('squared rel err after {} iterations: {}'.format(len(spp_loop)-1, np.nanmax(((spp_loop[-1]-spp_loop[-2])/(spp_loop[-1]+spp_loop[-2]))**2)))
t = time()
spp = f_pp(adj, c, m, None, False, nnp)
print('pp: {:8.5f} sec'.format(time()-t))
def comp(a, b):
    print(np.allclose(a, b))
    print('abserr', np.nanmax(np.abs(a-b)), 'relerr', np.nanmax(2 * np.abs(a-b)/(a+b)))
print('fixed points equal:', end=' ')
comp(spp_loop[-1], spp)
Run Code Online (Sandbox Code Playgroud)