如何最有效地使用collections.deque(popleft 与appendleft)

mys*_*ysl 6 python queue performance deque data-structures

我在学习Python数据结构时一直在学习队列,并想问一个有关其使用的问题。

我想有两种方法从队列中追加/弹出。第一种方法是使用deque.append()deque.popleft()。另一种方法是使用deque.appendleft()deque.pop()。两者之间有性能差异吗?如果没有,根据您的经验,哪一种更常用?您是否出于其他原因推荐其中之一?

从我的角度来看,它们本质上做同样的事情,因为它们都实现先进先出。我们将非常感谢您的意见!

kay*_*ya3 7

根据文档

双端队列支持线程安全、内存高效的从双端队列的任意一侧追加和弹出,在任一方向上具有大致相同的 O(1) 性能。

所以两个选项之间不存在渐近差异;无论哪种方式,您的“入队”和“轮询”操作都是在恒定时间内完成的。这是可以预料的,因为双端队列(双端队列)的全部要点是在两侧都有高效的添加和删除操作。

使用timeit证实的经验结果基本上没有差异:

from collections import deque

def append_popleft():
    d = deque()
    for i in range(10000):
        d.append(i)
    for j in range(10000):
        d.popleft()

def appendleft_pop():
    d = deque()
    for i in range(10000):
        d.appendleft(i)
    for j in range(10000):
        d.pop()

import timeit
t = timeit.timeit(append_popleft, number=10000)
print('append / popleft:', t)
t = timeit.timeit(appendleft_pop, number=10000)
print('appendleft / pop:', t)
Run Code Online (Sandbox Code Playgroud)

输出:

append / popleft: 12.000681700999849
appendleft / pop: 11.937629571999423
Run Code Online (Sandbox Code Playgroud)


ggo*_*len 5

使用哪一端并不重要。两者都同样是最佳的。与列表不同,列表仅在列表的右侧/顶部提供 O(1) 追加和弹出操作,保证追加和弹出操作在双端队列的两端如果不是,它的存在就没有意义,我们不妨使用一个列表。

查看CPython 3.12 的源代码,方法poppopleft几乎相同:

static PyObject *
deque_pop_impl(dequeobject *deque)
{
    PyObject *item;
    block *prevblock;

    if (Py_SIZE(deque) == 0) {
        PyErr_SetString(PyExc_IndexError, "pop from an empty deque");
        return NULL;
    }
    item = deque->rightblock->data[deque->rightindex];
    deque->rightindex--;
    Py_SET_SIZE(deque, Py_SIZE(deque) - 1);
    deque->state++;

    if (deque->rightindex < 0) {
        if (Py_SIZE(deque)) {
            prevblock = deque->rightblock->leftlink;
            assert(deque->leftblock != deque->rightblock);
            freeblock(deque, deque->rightblock);
            CHECK_NOT_END(prevblock);
            MARK_END(prevblock->rightlink);
            deque->rightblock = prevblock;
            deque->rightindex = BLOCKLEN - 1;
        } else {
            assert(deque->leftblock == deque->rightblock);
            assert(deque->leftindex == deque->rightindex+1);
            /* re-center instead of freeing a block */
            deque->leftindex = CENTER + 1;
            deque->rightindex = CENTER;
        }
    }
    return item;
}

static PyObject *
deque_popleft_impl(dequeobject *deque)
{
    PyObject *item;
    block *prevblock;

    if (Py_SIZE(deque) == 0) {
        PyErr_SetString(PyExc_IndexError, "pop from an empty deque");
        return NULL;
    }
    assert(deque->leftblock != NULL);
    item = deque->leftblock->data[deque->leftindex];
    deque->leftindex++;
    Py_SET_SIZE(deque, Py_SIZE(deque) - 1);
    deque->state++;

    if (deque->leftindex == BLOCKLEN) {
        if (Py_SIZE(deque)) {
            assert(deque->leftblock != deque->rightblock);
            prevblock = deque->leftblock->rightlink;
            freeblock(deque, deque->leftblock);
            CHECK_NOT_END(prevblock);
            MARK_END(prevblock->leftlink);
            deque->leftblock = prevblock;
            deque->leftindex = 0;
        } else {
            assert(deque->leftblock == deque->rightblock);
            assert(deque->leftindex == deque->rightindex+1);
            /* re-center instead of freeing a block */
            deque->leftindex = CENTER + 1;
            deque->rightindex = CENTER;
        }
    }
    return item;
}
Run Code Online (Sandbox Code Playgroud)

对于appends来说也是如此:

static inline int
deque_append_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen)
{
    if (deque->rightindex == BLOCKLEN - 1) {
        block *b = newblock(deque);
        if (b == NULL)
            return -1;
        b->leftlink = deque->rightblock;
        CHECK_END(deque->rightblock->rightlink);
        deque->rightblock->rightlink = b;
        deque->rightblock = b;
        MARK_END(b->rightlink);
        deque->rightindex = -1;
    }
    Py_SET_SIZE(deque, Py_SIZE(deque) + 1);
    deque->rightindex++;
    deque->rightblock->data[deque->rightindex] = item;
    if (NEEDS_TRIM(deque, maxlen)) {
        PyObject *olditem = deque_popleft_impl(deque);
        Py_DECREF(olditem);
    } else {
        deque->state++;
    }
    return 0;
}

static inline int
deque_appendleft_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen)
{
    if (deque->leftindex == 0) {
        block *b = newblock(deque);
        if (b == NULL)
            return -1;
        b->rightlink = deque->leftblock;
        CHECK_END(deque->leftblock->leftlink);
        deque->leftblock->leftlink = b;
        deque->leftblock = b;
        MARK_END(b->leftlink);
        deque->leftindex = BLOCKLEN;
    }
    Py_SET_SIZE(deque, Py_SIZE(deque) + 1);
    deque->leftindex--;
    deque->leftblock->data[deque->leftindex] = item;
    if (NEEDS_TRIM(deque, deque->maxlen)) {
        PyObject *olditem = deque_pop_impl(deque);
        Py_DECREF(olditem);
    } else {
        deque->state++;
    }
    return 0;
}
Run Code Online (Sandbox Code Playgroud)

CPython 中的双端队列以块的形式分配和释放内存,以最大限度地减少堆访问并提高缓存局部性(数组中的连续元素可能属于同一缓存块)。这些方法都根据需要分配或释放内存,并增加/减少一些索引。都是恒定时间。