Max*_*xim 7 python arrays numpy max vectorization
假设我有两个numpy数组,A
形状(d, f)
和I
形状(d,)
包含索引0..n
,例如
I = np.array([0, 0, 1, 0, 2, 1])
A = np.arange(12).reshape(6, 2)
Run Code Online (Sandbox Code Playgroud)
我要寻找一个快速的方式,使削减,特别是sum
,mean
和max
,在所有切片A[I == i, :]
; 一个慢版本
results = np.zeros((I.max() + 1, A.shape[1]))
for i in np.unique(I):
results[i, :] = np.mean(A[I == i, :], axis=0)
Run Code Online (Sandbox Code Playgroud)
在这种情况下给出
results = [[ 2.66666667, 3.66666667],
[ 7. , 8. ],
[ 8. , 9. ]])
Run Code Online (Sandbox Code Playgroud)
编辑:我根据Divakar的回答和之前的海报(已删除)pandas
回答做了一些时间安排.
时间码:
from __future__ import division, print_function
import numpy as np, pandas as pd
from time import time
np.random.seed(0)
d = 500000
f = 500
n = 500
I = np.hstack((np.arange(n), np.random.randint(n, size=(d - n,))))
np.random.shuffle(I)
A = np.random.rand(d, f)
def reduce_naive(A, I, op="avg"):
target_dtype = (np.float if op=="avg" else A.dtype)
results = np.zeros((I.max() + 1, A.shape[1]), dtype=target_dtype)
npop = {"avg": np.mean, "sum": np.sum, "max": np.max}.get(op)
for i in np.unique(I):
results[i, :] = npop(A[I == i, :], axis=0)
return results
def reduce_reduceat(A, I, op="avg"):
sidx = I.argsort()
sI = I[sidx]
sortedA = A[sidx]
idx = np.r_[ 0, np.flatnonzero(sI[1:] != sI[:-1])+1 ]
if op == "max":
return np.maximum.reduceat(sortedA, idx, axis=0)
sums = np.add.reduceat(sortedA, idx, axis=0)
if op == "sum":
return sums
if op == "avg":
count = np.r_[idx[1:] - idx[:-1], A.shape[0] - idx[-1]]
return sums/count.astype(float)[:,None]
def reduce_bincount(A, I, op="avg"):
ids = (I[:,None] + (I.max()+1)*np.arange(A.shape[1])).ravel()
sums = np.bincount(ids, A.ravel()).reshape(A.shape[1],-1).T
if op == "sum":
return sums
if op == "avg":
return sums/np.bincount(ids).reshape(A.shape[1],-1).T
def reduce_pandas(A, I, op="avg"):
group = pd.concat([pd.DataFrame(A), pd.DataFrame(I, columns=("i",))
], axis=1
).groupby('i')
if op == "sum":
return group.sum().values
if op == "avg":
return group.mean().values
if op == "max":
return group.max().values
def reduce_hybrid(A, I, op="avg"):
sidx = I.argsort()
sI = I[sidx]
sortedA = A[sidx]
idx = np.r_[ 0, np.flatnonzero(sI[1:] != sI[:-1])+1 ]
unq_sI = sI[idx]
m = I.max()+1
N = A.shape[1]
target_dtype = (np.float if op=="avg" else A.dtype)
out = np.zeros((m,N),dtype=target_dtype)
ss_idx = np.r_[idx,A.shape[0]]
npop = {"avg": np.mean, "sum": np.sum, "max": np.max}.get(op)
for i in range(len(idx)):
out[unq_sI[i]] = npop(sortedA[ss_idx[i]:ss_idx[i+1]], axis=0)
return out
for op in ("sum", "avg", "max"):
for name, method in (("naive ", reduce_naive),
("reduceat", reduce_reduceat),
("pandas ", reduce_pandas),
("bincount", reduce_bincount),
("hybrid ", reduce_hybrid)
("numba ", reduce_numba)
):
if op == "max" and name == "bincount":
continue
# if name is not "naive":
# assert np.allclose(method(A, I, op), reduce_naive(A, I, op))
times = []
for tries in range(3):
time0 = time(); method(A, I, op)
times.append(time() - time0);
print(name, op, "{:.2f}".format(np.min(times)))
print()
Run Code Online (Sandbox Code Playgroud)
时间:
naive sum 1.10
reduceat sum 4.62
pandas sum 5.29
bincount sum 1.54
hybrid sum 0.62
numba sum 0.31
naive avg 1.12
reduceat avg 4.45
pandas avg 5.23
bincount avg 2.43
hybrid avg 0.61
numba avg 0.33
naive max 1.19
reduceat max 3.18
pandas max 5.24
hybrid max 0.72
numba max 0.34
Run Code Online (Sandbox Code Playgroud)
(我选择d
并n
作为我的用例的典型值 - 我在我的答案中添加了numba版本的代码).
方法#1:使用NumPy ufunc reduceat
我们有ufuncs
三个减少操作,幸运的是我们还必须 ufunc.reduceat
沿轴线以特定间隔执行限制.所以,使用这些,我们将这三个操作计算如此 -
# Gives us sorted array based on input indices I and indices at which the
# sorted array should be interval-limited for reduceat operations to be
# applied later on using those results
def sorted_array_intervals(A, I):
# Compute sort indices for I. To be later used for sorting A based on it.
sidx = I.argsort()
sI = I[sidx]
sortedA = A[sidx]
# Get indices at which intervals change. Also, get count in each interval
idx = np.r_[ 0, np.flatnonzero(sI[1:] != sI[:-1])+1 ]
return sortedA, idx
# Groupby sum reduction using the interval indices
# to perform interval-limited ufunc reductions
def groupby_sum(A, I):
sortedA, idx = sorted_array_intervals(A,I)
return np.add.reduceat(sortedA, idx, axis=0)
# Groupby mean reduction
def groupby_mean(A, I):
sortedA, idx = sorted_array_intervals(A,I)
sums = np.add.reduceat(sortedA, idx, axis=0)
count = np.r_[idx[1:] - idx[:-1], A.shape[0] - idx[-1]]
return sums/count.astype(float)[:,None]
# Groupby max reduction
def groupby_max(A, I):
sortedA, idx = sorted_array_intervals(A,I)
return np.maximum.reduceat(sortedA, idx, axis=0)
Run Code Online (Sandbox Code Playgroud)
因此,如果我们需要所有这些操作,我们可以重用一个实例sorted_array_intervals
,如此 -
def groupby_sum_mean_max(A, I):
sortedA, idx = sorted_array_intervals(A,I)
sums = np.add.reduceat(sortedA, idx, axis=0)
count = np.r_[idx[1:] - idx[:-1], A.shape[0] - idx[-1]]
avgs = sums/count.astype(float)[:,None]
maxs = np.maximum.reduceat(sortedA, idx, axis=0)
return sums, avgs, maxs
Run Code Online (Sandbox Code Playgroud)
方法#1-B:混合版本(排序+切片+减少)
这是一个混合版本,它确实需要帮助sorted_array_intervals
来获取排序的数组和间隔变为下一组的索引,但是在最后一个阶段使用切片来对每个间隔求和并对每个组迭代地执行此操作.在我们正在使用时,切片有助于此views
.
实现看起来像这样 -
def reduce_hybrid(A, I, op="avg"):
sidx = I.argsort()
sI = I[sidx]
sortedA = A[sidx]
# Get indices at which intervals change. Also, get count in each interval
idx = np.r_[ 0, np.flatnonzero(sI[1:] != sI[:-1])+1 ]
unq_sI = sI[idx]
m = I.max()+1
N = A.shape[1]
target_dtype = (np.float if op=="avg" else A.dtype)
out = np.zeros((m,N),dtype=target_dtype)
ss_idx = np.r_[idx,A.shape[0]]
npop = {"avg": np.mean, "sum": np.sum, "max": np.max}.get(op)
for i in range(len(idx)):
out[unq_sI[i]] = npop(sortedA[ss_idx[i]:ss_idx[i+1]], axis=0)
return out
Run Code Online (Sandbox Code Playgroud)
运行时测试(使用问题中发布的基准测试中的设置) -
In [432]: d = 500000
...: f = 500
...: n = 500
...: I = np.hstack((np.arange(n), np.random.randint(n, size=(d - n,))))
...: np.random.shuffle(I)
...: A = np.random.rand(d, f)
...:
In [433]: %timeit reduce_naive(A, I, op="sum")
...: %timeit reduce_hybrid(A, I, op="sum")
...:
1 loops, best of 3: 1.03 s per loop
1 loops, best of 3: 549 ms per loop
In [434]: %timeit reduce_naive(A, I, op="avg")
...: %timeit reduce_hybrid(A, I, op="avg")
...:
1 loops, best of 3: 1.04 s per loop
1 loops, best of 3: 550 ms per loop
In [435]: %timeit reduce_naive(A, I, op="max")
...: %timeit reduce_hybrid(A, I, op="max")
...:
1 loops, best of 3: 1.14 s per loop
1 loops, best of 3: 631 ms per loop
Run Code Online (Sandbox Code Playgroud)
方法#2:使用NumPy bincount
这是另一种使用np.bincount
基于bin的求和的方法.因此,有了它,我们可以计算总和和平均值,并避免在过程中进行排序,如此 -
ids = (I[:,None] + (I.max()+1)*np.arange(A.shape[1])).ravel()
sums = np.bincount(ids, A.ravel()).reshape(A.shape[1],-1).T
avgs = sums/np.bincount(ids).reshape(A.shape[1],-1).T
Run Code Online (Sandbox Code Playgroud)