C++线程应用程序比非线程运行得慢

IDD*_*DQD 5 c++ multithreading c++11

我目前正在用C++编写素数生成器.我先制作了一个单线程版本,后来又制作了一个多线程版本.

我发现如果我的程序生成的值小于100'000,则单线程版本比多线程版本更快.显然我做错了什么.

我的代码如下:

#include <iostream>
#include <fstream>
#include <set>
#include <string>
#include <thread>
#include <mutex>
#include <shared_mutex>

using namespace std;

set<unsigned long long> primeContainer;
shared_mutex m;

void checkPrime(const unsigned long long p)
{
    if (p % 3 == 0)
        return;

    bool isPrime = true;
    for (set<unsigned long long>::const_iterator it = primeContainer.cbegin(); it != primeContainer.cend(); ++it)
    {
        if (p % *it == 0)
        {
            isPrime = false;
            break;
        }
        if (*it * *it > p) // check only up to square root
            break;
    }

    if (isPrime)
        primeContainer.insert(p);
}

void checkPrimeLock(const unsigned long long p)
{
    if (p % 3 == 0)
        return;

    bool isPrime = true;
    try
    {
        shared_lock<shared_mutex> l(m);
        for (set<unsigned long long>::const_iterator it = primeContainer.cbegin(); it != primeContainer.cend(); ++it)
        {
            if (p % *it == 0)
            {
                isPrime = false;
                break;
            }
            if (*it * *it > p)
                break;
        }
    }
    catch (exception& e)
    {
        cout << e.what() << endl;
        system("pause");
    }

    if (isPrime)
    {
        try
        {
            unique_lock<shared_mutex> l(m);
            primeContainer.insert(p);
        }
        catch (exception& e)
        {
            cout << e.what() << endl;
            system("pause");
        }
    }
}

void runLoopThread(const unsigned long long& l)
{
    for (unsigned long long i = 10; i < l; i += 10)
    {
        thread t1(checkPrimeLock, i + 1);
        thread t2(checkPrimeLock, i + 3);
        thread t3(checkPrimeLock, i + 7);
        thread t4(checkPrimeLock, i + 9);
        t1.join();
        t2.join();
        t3.join();
        t4.join();
    }
}

void runLoop(const unsigned long long& l)
{
    for (unsigned long long i = 10; i < l; i += 10)
    {
        checkPrime(i + 1);
        checkPrime(i + 3);
        checkPrime(i + 7);
        checkPrime(i + 9);
    }
}

void printPrimes(const unsigned long long& l)
{
    if (1U <= l)
        cout << "1 ";
    if (2U <= l)
        cout << "2 ";
    if (3U <= l)
        cout << "3 ";
    if (5U <= l)
        cout << "5 ";

    for (auto it = primeContainer.cbegin(); it != primeContainer.cend(); ++it)
    {
        if (*it <= l)
            cout << *it << " ";
    }
    cout << endl;
}

void writeToFile(const unsigned long long& l)
{
    string name = "primes_" + to_string(l) + ".txt";
    ofstream f(name);

    if (f.is_open())
    {
        if (1U <= l)
            f << "1 ";
        if (2U <= l)
            f << "2 ";
        if (3U <= l)
            f << "3 ";
        if (5U <= l)
            f << "5 ";

        for (auto it = primeContainer.cbegin(); it != primeContainer.cend(); ++it)
        {
            if (*it <= l)
                f << *it << " ";
        }
    }
    else
    {
        cout << "Error opening file." << endl;
        system("pause");
    }
}

int main()
{
    unsigned int n = thread::hardware_concurrency();
    std::cout << n << " concurrent threads are supported." << endl;

    unsigned long long limit;
    cout << "Please enter the limit of prime generation: ";
    cin >> limit;

    primeContainer.insert(7);

    if (10 < limit)
    {
        //runLoop(limit); //single-threaded
        runLoopThread(limit); //multi-threaded
    }

    printPrimes(limit);
    //writeToFile(limit);
    system("pause");
    return 0;
}
Run Code Online (Sandbox Code Playgroud)

在该main函数中,您将找到关于哪个函数是单线程和多线程的注释.

它们之间的主要区别在于锁的使用,容器迭代的共享以及插入的唯一性.如果重要,我的CPU有4个核心.

为什么单线程版本更快?

Dav*_*rtz 10

你有几个问题.

首先,你不必要地创建和销毁线程.让每个线程循环工作,直到没有更多的工作要做.

其次,你的锁的方式太细,因此,你获得他们的方式过于频繁.让每个线程抓住一个包含100个数字的块来进行测试,而不是一次一个,并让它们一次性插入每个块中找到的素数.

  • 它并不像"线程让事情变得更快"那么简单.使用线程可以更快地完成很多事情,但要做到足以获得显着的性能提升并不简单.具有讽刺意味的是,像这样的"玩具"代码比在实际代码中更难. (2认同)