惯用地分割 string_view

use*_*413 5 c++ string-view c++17

我阅读了迭代字符串单词的最优雅的方式,并喜欢答案的简洁性。现在我想对 string_view 做同样的事情。问题是,stringstream不能采取string_view

#include <iostream>
#include <string>
#include <sstream>
#include <algorithm>
#include <iterator>

int main() {
    using namespace std;
    string_view sentence = "And I feel fine...";
    istringstream iss(sentence); // <== error
    copy(istream_iterator<string_view>(iss),
         istream_iterator<string_view>(),
         ostream_iterator<string_view>(cout, "\n"));
}
Run Code Online (Sandbox Code Playgroud)

那么有没有办法做到这一点呢?如果不是,那么为什么这样的事情不符合惯用语呢?

Con*_*ngo 6

用分隔符分割并返回vector<string_view>.

专为快速分割文件中的行而设计.csv

测试MSVC 2017 v15.9.6Intel Compiler v19.0编译C++17(这是必需的string_view)。

#include <string_view>

std::vector<std::string_view> Split(const std::string_view str, const char delim = ',')
{   
    std::vector<std::string_view> result;

    int indexCommaToLeftOfColumn = 0;
    int indexCommaToRightOfColumn = -1;

    for (int i=0;i<static_cast<int>(str.size());i++)
    {
        if (str[i] == delim)
        {
            indexCommaToLeftOfColumn = indexCommaToRightOfColumn;
            indexCommaToRightOfColumn = i;
            int index = indexCommaToLeftOfColumn + 1;
            int length = indexCommaToRightOfColumn - index;

            // Bounds checking can be omitted as logically, this code can never be invoked 
            // Try it: put a breakpoint here and run the unit tests.
            /*if (index + length >= static_cast<int>(str.size()))
            {
                length--;
            }               
            if (length < 0)
            {
                length = 0;
            }*/

            std::string_view column(str.data() + index, length);
            result.push_back(column);
        }
    }
    const std::string_view finalColumn(str.data() + indexCommaToRightOfColumn + 1, str.size() - indexCommaToRightOfColumn - 1);
    result.push_back(finalColumn);
    return result;
}
Run Code Online (Sandbox Code Playgroud)

小心生命周期:astring_view永远不应该比string它作为窗口的父对象的生命周期长。如果父级string超出范围,则指向的内容string_view无效。在这种特殊情况下,API 设计很难出错,因为输入/输出都是string_view父字符串的窗口。这最终在内存复制和 CPU 使用方面相当高效。

请注意,如果使用,string_view唯一的缺点是失去隐式空终止。因此,请使用支持 的函数string_view,例如lexical_castBoost 中用于将字符串转换为数字的函数。

我用它来快速解析 .csv 文件。为了获取 .csv 文件中的每一新行,我使用了istringstreamgetLine(),它的速度非常快(~2GB/秒或单核上每秒 1,200,000 行)。

单元测试。使用Google Test进行测试(我使用vcpkg安装)。

// Google Test integrates into VS2017 if ReSharper is installed. 
#include "gtest/gtest.h" // Can install using vcpkg
// In main(), call:   
// ::testing::InitGoogleTest(&argc, argv);return RUN_ALL_TESTS();

TEST(Strings, Split)
{
    {
        const std::string str = "A,B,C";
        auto tokens = Split(str, ',');
        EXPECT_TRUE(tokens.size() == 3);
        EXPECT_TRUE(tokens[0] == "A");
        EXPECT_TRUE(tokens[1] == "B");
        EXPECT_TRUE(tokens[2] == "C");
    }       
    {
        const std::string str = ",B,C";
        auto tokens = Split(str, ',');
        EXPECT_TRUE(tokens.size() == 3);
        EXPECT_TRUE(tokens[0] == "");
        EXPECT_TRUE(tokens[1] == "B");
        EXPECT_TRUE(tokens[2] == "C");
    }
    {
        const std::string str = "A,B,";
        auto tokens = Split(str, ',');
        EXPECT_TRUE(tokens.size() == 3);
        EXPECT_TRUE(tokens[0] == "A");
        EXPECT_TRUE(tokens[1] == "B");
        EXPECT_TRUE(tokens[2] == "");
    }
    {
        const std::string str = "";
        auto tokens = Split(str, ',');
        EXPECT_TRUE(tokens.size() == 1);
        EXPECT_TRUE(tokens[0] == "");
    }
    {
        const std::string str =  "A";
        auto tokens = Split(str, ',');
        EXPECT_TRUE(tokens.size() == 1);
        EXPECT_TRUE(tokens[0] == "A");
    }
    {
        const std::string str =  ",";
        auto tokens = Split(str, ',');
        EXPECT_TRUE(tokens.size() == 2);
        EXPECT_TRUE(tokens[0] == "");
        EXPECT_TRUE(tokens[1] == "");
    }
    {
        const std::string str =  ",,";
        auto tokens = Split(str, ',');
        EXPECT_TRUE(tokens.size() == 3);
        EXPECT_TRUE(tokens[0] == "");
        EXPECT_TRUE(tokens[1] == "");
        EXPECT_TRUE(tokens[2] == "");
    }
    {
        const std::string str = "A,";
        auto tokens = Split(str, ',');
        EXPECT_TRUE(tokens.size() == 2);
        EXPECT_TRUE(tokens[0] == "A");
        EXPECT_TRUE(tokens[1] == "");
    }
    {
        const std::string str = ",B";
        auto tokens = Split(str, ',');
        EXPECT_TRUE(tokens.size() == 2);
        EXPECT_TRUE(tokens[0] == "");
        EXPECT_TRUE(tokens[1] == "B");
    }       
}
Run Code Online (Sandbox Code Playgroud)


Bar*_*rry 1

如果您想使用该特定方法,只需将 显式转换string_view为 a string

istringstream iss{string(sentence)}; // N.B. braces to avoid most vexing parse
copy(istream_iterator<string>(iss),
     istream_iterator<string>(),
     ostream_iterator<string_view>(cout, "\n"));
Run Code Online (Sandbox Code Playgroud)

C++ 标准库没有良好的字符串操作功能。您可能想看看BoostAbseil等中可用的内容。它们中的任何一个都比这更好。