C++ large deque - 程序需要很长时间才能退出?

5 c++ performance stl deque

考虑以下 C++ 程序:

#include <deque>
#include <iostream>

using namespace std;

int main()
{
    deque<double> d(30000000);
    cout << "Done\n";
}
Run Code Online (Sandbox Code Playgroud)

第一行中的内存分配只需要一秒钟,但在打印之后Done,需要 33 秒 (!) 退出回终端。将元素数量减少到 20000000 将该时间减少到 22 秒,因此显然它与元素数量呈线性关系。

我在 Windows 10 上编译,同样的事情发生在 GCC 10.2.0 和 Visual Studio 2019 上。

这里发生了什么?我是否以deque不应该使用的方式使用它?

编辑:

#include <deque>
#include <iostream>

using namespace std;

void test_deque()
{
    deque<double> d(30000000);
    cout << "Function done\n";
}

int main()
{
    test_deque();
    cout << "Main done\n";
}
Run Code Online (Sandbox Code Playgroud)

现在它会打印Function done,然后有 33 秒的延迟。所以我认为这与函数退出时执行的析构函数有关。但是为什么销毁 240 MB 的内存需要这么长时间?

编辑 2:在 Ubuntu 上用 GCC 尝试过它(第二个版本),运行只需要几分之一秒!与一些在线 C++ 编译器相同。这是 Windows 特有的问题吗?

编辑 3:运行vector它也需要几分之一秒。但是,使用list(和forward_list) 我得到了类似的极长延迟。

编辑 4:在发布(而不是调试)配置中使用 MSVC 进行编译也需要几分之一秒。我不确定 GCC 相当于什么,但是使用-O3(最大优化)执行时间仍然是 33 秒。

rus*_*tyx 0

MSVC 有一个内置的分析器。我们可以运行它(按 Alt-F2)来查看大部分 CPU 时间都花在构造函数和析构函数中,它们分别调用deque::resize()deque::_Tidy()运行。

msvc 分析器

如果我们进一步深入,我们会发现这deque::emplace_back()会产生相当多的代码

#define _PUSH_BACK_BEGIN                                                                                \
    if ((_Myoff() + _Mysize()) % _DEQUESIZ == 0 && _Mapsize() <= (_Mysize() + _DEQUESIZ) / _DEQUESIZ) { \
        _Growmap(1);                                                                                    \
    }                                                                                                   \
    _Myoff() &= _Mapsize() * _DEQUESIZ - 1;                                                             \
    size_type _Newoff = _Myoff() + _Mysize();                                                           \
    size_type _Block  = _Getblock(_Newoff);                                                             \
    if (_Map()[_Block] == nullptr) {                                                                    \
        _Map()[_Block] = _Getal().allocate(_DEQUESIZ);                                                  \
    }

#define _PUSH_BACK_END ++_Mysize()

    template <class... _Valty>
    decltype(auto) emplace_back(_Valty&&... _Val) {
        _Orphan_all();
        _PUSH_BACK_BEGIN;
        _Alty_traits::construct(
            _Getal(), _Unfancy(_Map()[_Block] + _Newoff % _DEQUESIZ), _STD forward<_Valty>(_Val)...);
        _PUSH_BACK_END;

#if _HAS_CXX17
        return back();
#endif // _HAS_CXX17
    }
Run Code Online (Sandbox Code Playgroud)

拆解图:

    template <class... _Valty>
    decltype(auto) emplace_back(_Valty&&... _Val) {
00007FF674A238E0  mov         qword ptr [rsp+8],rcx  
00007FF674A238E5  push        rbp  
00007FF674A238E6  push        rdi  
00007FF674A238E7  sub         rsp,138h  
00007FF674A238EE  lea         rbp,[rsp+20h]  
00007FF674A238F3  mov         rdi,rsp  
00007FF674A238F6  mov         ecx,4Eh  
00007FF674A238FB  mov         eax,0CCCCCCCCh  
00007FF674A23900  rep stos    dword ptr [rdi]  
00007FF674A23902  mov         rcx,qword ptr [rsp+158h]  
00007FF674A2390A  lea         rcx,[__0657B1E2_deque (07FF674A3E02Fh)]  
00007FF674A23911  call        __CheckForDebuggerJustMyCode (07FF674A21159h)  
        _Orphan_all();
00007FF674A23916  mov         rcx,qword ptr [this]  
00007FF674A2391D  call        std::deque<double,std::allocator<double> >::_Orphan_all (07FF674A217FDh)  
        _PUSH_BACK_BEGIN;
00007FF674A23922  mov         rcx,qword ptr [this]  
00007FF674A23929  call        std::deque<double,std::allocator<double> >::_Myoff (07FF674A2139Dh)  
00007FF674A2392E  mov         qword ptr [rbp+0F8h],rax  
00007FF674A23935  mov         rcx,qword ptr [this]  
00007FF674A2393C  call        std::deque<double,std::allocator<double> >::_Mysize (07FF674A211B8h)  
00007FF674A23941  mov         rcx,qword ptr [rbp+0F8h]  
00007FF674A23948  mov         rcx,qword ptr [rcx]  
00007FF674A2394B  add         rcx,qword ptr [rax]  
00007FF674A2394E  mov         rax,rcx  
00007FF674A23951  xor         edx,edx  
00007FF674A23953  mov         ecx,2  
00007FF674A23958  div         rax,rcx  
00007FF674A2395B  mov         rax,rdx  
00007FF674A2395E  test        rax,rax  
00007FF674A23961  jne         std::deque<double,std::allocator<double> >::emplace_back<>+0D0h (07FF674A239B0h)  
00007FF674A23963  mov         rcx,qword ptr [this]  
00007FF674A2396A  call        std::deque<double,std::allocator<double> >::_Mapsize (07FF674A214BFh)  
00007FF674A2396F  mov         qword ptr [rbp+0F8h],rax  
00007FF674A23976  mov         rcx,qword ptr [this]  
00007FF674A2397D  call        std::deque<double,std::allocator<double> >::_Mysize (07FF674A211B8h)  
00007FF674A23982  mov         rax,qword ptr [rax]  
00007FF674A23985  add         rax,2  
00007FF674A23989  xor         edx,edx  
00007FF674A2398B  mov         ecx,2  
00007FF674A23990  div         rax,rcx  
00007FF674A23993  mov         rcx,qword ptr [rbp+0F8h]  
00007FF674A2399A  cmp         qword ptr [rcx],rax  
00007FF674A2399D  ja          std::deque<double,std::allocator<double> >::emplace_back<>+0D0h (07FF674A239B0h)  
00007FF674A2399F  mov         edx,1  
00007FF674A239A4  mov         rcx,qword ptr [this]  
00007FF674A239AB  call        std::deque<double,std::allocator<double> >::_Growmap (07FF674A21640h)  
00007FF674A239B0  mov         rcx,qword ptr [this]  
00007FF674A239B7  call        std::deque<double,std::allocator<double> >::_Mapsize (07FF674A214BFh)  
00007FF674A239BC  mov         rax,qword ptr [rax]  
00007FF674A239BF  lea         rax,[rax+rax-1]  
00007FF674A239C4  mov         qword ptr [rbp+0F8h],rax  
00007FF674A239CB  mov         rcx,qword ptr [this]  
00007FF674A239D2  call        std::deque<double,std::allocator<double> >::_Myoff (07FF674A2139Dh)  
00007FF674A239D7  mov         qword ptr [rbp+100h],rax  
00007FF674A239DE  mov         rax,qword ptr [rbp+100h]  
00007FF674A239E5  mov         rax,qword ptr [rax]  
00007FF674A239E8  mov         qword ptr [rbp+108h],rax  
00007FF674A239EF  mov         rax,qword ptr [rbp+0F8h]  
00007FF674A239F6  mov         rcx,qword ptr [rbp+108h]  
00007FF674A239FD  and         rcx,rax  
00007FF674A23A00  mov         rax,rcx  
00007FF674A23A03  mov         rcx,qword ptr [rbp+100h]  
00007FF674A23A0A  mov         qword ptr [rcx],rax  
00007FF674A23A0D  mov         rcx,qword ptr [this]  
00007FF674A23A14  call        std::deque<double,std::allocator<double> >::_Myoff (07FF674A2139Dh)  
00007FF674A23A19  mov         qword ptr [rbp+0F8h],rax  
00007FF674A23A20  mov         rcx,qword ptr [this]  
00007FF674A23A27  call        std::deque<double,std::allocator<double> >::_Mysize (07FF674A211B8h)  
00007FF674A23A2C  mov         rcx,qword ptr [rbp+0F8h]  
00007FF674A23A33  mov         rcx,qword ptr [rcx]  
00007FF674A23A36  add         rcx,qword ptr [rax]  
00007FF674A23A39  mov         rax,rcx  
00007FF674A23A3C  mov         qword ptr [_Newoff],rax  
00007FF674A23A40  mov         rdx,qword ptr [_Newoff]  
00007FF674A23A44  mov         rcx,qword ptr [this]  
00007FF674A23A4B  call        std::deque<double,std::allocator<double> >::_Getblock (07FF674A21334h)  
00007FF674A23A50  mov         qword ptr [_Block],rax  
00007FF674A23A54  mov         rcx,qword ptr [this]  
00007FF674A23A5B  call        std::deque<double,std::allocator<double> >::_Map (07FF674A21753h)  
00007FF674A23A60  mov         rax,qword ptr [rax]  
00007FF674A23A63  mov         rcx,qword ptr [_Block]  
00007FF674A23A67  cmp         qword ptr [rax+rcx*8],0  
00007FF674A23A6C  jne         std::deque<double,std::allocator<double> >::emplace_back<>+1D7h (07FF674A23AB7h)  
00007FF674A23A6E  mov         rcx,qword ptr [this]  
00007FF674A23A75  call        std::deque<double,std::allocator<double> >::_Getal (07FF674A216CCh)  
00007FF674A23A7A  mov         qword ptr [rbp+0F8h],rax  
00007FF674A23A81  mov         edx,2  
00007FF674A23A86  mov         rcx,qword ptr [rbp+0F8h]  
00007FF674A23A8D  call        std::allocator<double>::allocate (07FF674A216C7h)  
00007FF674A23A92  mov         qword ptr [rbp+100h],rax  
00007FF674A23A99  mov         rcx,qword ptr [this]  
00007FF674A23AA0  call        std::deque<double,std::allocator<double> >::_Map (07FF674A21753h)  
00007FF674A23AA5  mov         rax,qword ptr [rax]  
00007FF674A23AA8  mov         rcx,qword ptr [_Block]  
00007FF674A23AAC  mov         rdx,qword ptr [rbp+100h]  
00007FF674A23AB3  mov         qword ptr [rax+rcx*8],rdx  
        _Alty_traits::construct(
00007FF674A23AB7  mov         rcx,qword ptr [this]  
00007FF674A23ABE  call        std::deque<double,std::allocator<double> >::_Map (07FF674A21753h)  
00007FF674A23AC3  mov         rax,qword ptr [rax]  
00007FF674A23AC6  mov         qword ptr [rbp+0F8h],rax  
00007FF674A23ACD  xor         edx,edx  
00007FF674A23ACF  mov         rax,qword ptr [_Newoff]  
00007FF674A23AD3  mov         ecx,2  
00007FF674A23AD8  div         rax,rcx  
00007FF674A23ADB  mov         rax,rdx  
00007FF674A23ADE  mov         rcx,qword ptr [_Block]  
00007FF674A23AE2  mov         rdx,qword ptr [rbp+0F8h]  
00007FF674A23AE9  mov         rcx,qword ptr [rdx+rcx*8]  
00007FF674A23AED  lea         rax,[rcx+rax*8]  
00007FF674A23AF1  mov         rcx,rax  
00007FF674A23AF4  call        std::_Unfancy<double> (07FF674A214A6h)  
00007FF674A23AF9  mov         qword ptr [rbp+100h],rax  
00007FF674A23B00  mov         rcx,qword ptr [this]  
00007FF674A23B07  call        std::deque<double,std::allocator<double> >::_Getal (07FF674A216CCh)  
00007FF674A23B0C  mov         qword ptr [rbp+108h],rax  
00007FF674A23B13  mov         rdx,qword ptr [rbp+100h]  
00007FF674A23B1A  mov         rcx,qword ptr [rbp+108h]  
00007FF674A23B21  call        std::_Default_allocator_traits<std::allocator<double> >::construct<double> (07FF674A211E5h)  
            _Getal(), _Unfancy(_Map()[_Block] + _Newoff % _DEQUESIZ), _STD forward<_Valty>(_Val)...);
        _PUSH_BACK_END;
00007FF674A23B26  mov         rcx,qword ptr [this]  
00007FF674A23B2D  call        std::deque<double,std::allocator<double> >::_Mysize (07FF674A211B8h)  
00007FF674A23B32  mov         qword ptr [rbp+0F8h],rax  
00007FF674A23B39  mov         rax,qword ptr [rbp+0F8h]  
00007FF674A23B40  mov         rax,qword ptr [rax]  
00007FF674A23B43  inc         rax  
00007FF674A23B46  mov         rcx,qword ptr [rbp+0F8h]  
00007FF674A23B4D  mov         qword ptr [rcx],rax  

#if _HAS_CXX17
        return back();
00007FF674A23B50  mov         rcx,qword ptr [this]  
00007FF674A23B57  call        std::deque<double,std::allocator<double> >::back (07FF674A2127Bh)  
#endif // _HAS_CXX17
    }
00007FF674A23B5C  lea         rsp,[rbp+118h]  
00007FF674A23B63  pop         rdi  
00007FF674A23B64  pop         rbp  
00007FF674A23B65  ret
Run Code Online (Sandbox Code Playgroud)

显然std::deque没有预先分配元素,而是使用循环将它们一一添加。所以难怪它很慢。

/Ob1您可以通过启用一些优化(例如)和减少运行时检查(例如删除)来加速调试构建/RTC1

但实际上,std::deque从性能的角度来看,这只是一个可怕的结构(微小向量的向量 - 根本不适合缓存)。