Nik*_*Nik 5 python random numpy python-3.x
我想采样~10?来自约 10 人的次数?没有替换和权重的整数,每次选择 10 个元素。每次采样后,我都会改变权重。我在以下脚本中为两种方法(python3 和 numpy)计时。这两种方法对我来说似乎都非常缓慢,你有没有办法加快速度?
import numpy as np
import random
@profile
def test_choices():
population = list(range(10**7))
weights = np.random.uniform(size=10**7)
np_weights = np.array(weights)
def numpy_choice():
np_w = np_weights / sum(np_weights)
c = np.random.choice(population, size=10, replace=False, p=np_w)
def python_choice():
c = []
while len(c) < 10:
c += random.choices(population=population, weights=weights, k=10 - len(c))
c = list(set(c))
for i in range(10**1):
numpy_choice()
python_choice()
add_weight = np.random.uniform()
random_element = random.randint(0, 10**7)
weights[random_element] += add_weight
np_weights[random_element] += add_weight
test_choices()
Run Code Online (Sandbox Code Playgroud)
随着计时器结果:
Line # Hits Time Per Hit % Time Line Contents
==============================================================
24 10 20720062.0 2072006.2 56.6 numpy_choice()
25 10 15593925.0 1559392.5 42.6 python_choice()
Run Code Online (Sandbox Code Playgroud)
这只是对 jdhesa 答案的评论。问题是考虑仅增加一个权重的情况是否有用 -> 是的!
\n例子
\n@nb.njit(parallel=True)\ndef numba_choice_opt(population, weights, k,wc,b_full_wc_calc,ind,value):\n # Get cumulative weights\n if b_full_wc_calc:\n acc=0\n for i in range(weights.shape[0]):\n acc+=weights[i]\n wc[i]=acc\n #Increase only one weight (faster than recalculating the cumulative weight)\n else:\n weights[ind]+=value\n for i in nb.prange(ind,wc.shape[0]):\n wc[i]+=value\n\n # Total of weights\n m = wc[-1]\n # Arrays of sample and sampled indices\n sample = np.empty(k, population.dtype)\n sample_idx = np.full(k, -1, np.int32)\n # Sampling loop\n i = 0\n while i < k:\n # Pick random weight value\n r = m * np.random.rand()\n # Get corresponding index\n idx = np.searchsorted(wc, r, side='right')\n # Check index was not selected before\n # If not using Numba you can just do `np.isin(idx, sample_idx)`\n for j in range(i):\n if sample_idx[j] == idx:\n continue\n # Save sampled value and index\n sample[i] = population[idx]\n sample_idx[i] = population[idx]\n i += 1\n return sample\nRun Code Online (Sandbox Code Playgroud)\n例子
\nnp.random.seed(0)\npopulation = np.random.randint(100, size=1_000_000)\nweights = np.random.rand(len(population))\nk = 10\nwc = np.empty_like(weights)\n\n#Initial calculation \n%timeit numba_choice_opt(population, weights, k,wc,True,0,0)\n#1.41 ms \xc2\xb1 9.21 \xc2\xb5s per loop (mean \xc2\xb1 std. dev. of 7 runs, 1 loop each)\n\n#Increase weight[100] by 3 and calculate\n%timeit numba_choice_opt(population, weights, k,wc,False,100,3)\n#213 \xc2\xb5s \xc2\xb1 6.06 \xc2\xb5s per loop (mean \xc2\xb1 std. dev. of 7 runs, 1000 loops each)\n\n#For comparison\n#Please note that it is the memory allcocation of wc which makes\n#it so much slower than the initial calculation from above\n%timeit numba_choice(population, weights, k)\n#4.23 ms \xc2\xb1 64.9 \xc2\xb5s per loop (mean \xc2\xb1 std. dev. of 7 runs, 1 loop each)\nRun Code Online (Sandbox Code Playgroud)\n
| 归档时间: |
|
| 查看次数: |
336 次 |
| 最近记录: |