tej*_*jas 4 c++ string-literals c++17 c++20
有没有办法让原始字符串文字知道缩进?
例如
{
std::string_view str(
R"(
Hello
World
)");
std::cout << "ref\n" << str;
}
Run Code Online (Sandbox Code Playgroud)
印刷
ref
Hello
World
Run Code Online (Sandbox Code Playgroud)
但我想要
ref
Hello
World
Run Code Online (Sandbox Code Playgroud)
我看到这个答案解决了这个问题,但这是运行时。我认为使用 c23#embed可以解决这个问题。
但是有没有办法在编译时做到这一点,最好使用c++17,c++20也可以。
std::cout << R"(\n Hello\n World\n)"_M << std::endl;\nRun Code Online (Sandbox Code Playgroud)\nconstexpr auto str = unindent(R"(\n Hello\n World!\n)"); // type of str would be std::array<char, N>\nstd::cout << str.data() << std::endl;\nRun Code Online (Sandbox Code Playgroud)\n在这种情况下,C++17 的最大问题是我们需要修改字符串(删除其中的空格)——因此我们需要将修改后的字符串存储在某处。
\n在 C++20 中,我们可以利用字符串文字运算符 template获取字符串作为模板参数,例如:
\ntemplate<class _char_type, std::size_t size>\nstruct string_wrapper {\n using char_type = _char_type;\n\n consteval string_wrapper(const char_type (&arr)[size]) {\n std::ranges::copy(arr, str);\n }\n\n char_type str[size];\n};\n\ntemplate<string_wrapper str>\nconsteval decltype(auto) operator"" _M() {\n return do_unindent<str>();\n}\n\n// R"(foo)"_M\n// would be interpreted as:\n// operator"" _M<string_wrapper{R"(foo)"}>() \nRun Code Online (Sandbox Code Playgroud)\n这允许我们将未缩进的字符串存储在模板参数中,因此它的生命周期永远不会成为问题;我们可以直接返回对模板参数中存储的数组的引用。
\n另一方面,在 C++17 中,我们不能将类类型作为模板参数,并且字符串文字运算符模板也不可用。因此,没有一种巧妙的方法来全局存储修改后的字符串(static常量表达式中不允许使用变量,因此这不是一个选项)
因此,在 C++17 中实现这一点的唯一方法是返回 a std::array(或类似的东西):
template<std::size_t size>\nconstexpr auto unindent(const char (&str)[size]) {\n return std::array<char, size>{ /* ... */ };\n}\nRun Code Online (Sandbox Code Playgroud)\n然而,这有几个主要缺点:
\n.data()将其转换std::array为const char*:\nstd::cout << unindent(R"(foo)").data() << std::endl;\nRun Code Online (Sandbox Code Playgroud)\nstd::array:\n// BAD: Woops: dangling pointer!\nconst char* str = unindent(R"(foo)").data();\n\n// GOOD: keep array alive\nauto arr = unindent(R"(foo)");\nconst char* str = arr.data();\nRun Code Online (Sandbox Code Playgroud)\nstd::array<char, size>)。由于str它是函数的参数,它不是常量表达式,因此我们无法使用它来确定未缩进字符串的实际大小(因此我们只有原始字符串的长度可供使用):\nconstexpr std::array<char, 8> arr = unindent(" a\\n b");\n// arr would be {\'a\', \'\\n\', \'b\', \'\\0\', \'\\0\', \'\\0\', \'\\0\', \'\\0\'};\nRun Code Online (Sandbox Code Playgroud)\'\\0\'\n因此,除了相当混乱(arr.size()不是字符串的真实长度)之外,这很可能会导致最终的二进制文件中出现很多额外的\'constexpr /*...*/ operator"" _M(const char* str, std::size_t len) {\n return std::array<char, 1000> { /* ... */ };\n}\nRun Code Online (Sandbox Code Playgroud)\n...但是 thestr和 it's 的长度都不再是常量表达式,所以我们不能使用两者来表示我们的大小std::array;的大小。所以唯一可能的选择是使用固定大小的数组 - 这不是很有用。由于这些原因,我将仅提供 C++20 实现。
\n这是完整的 C++20 实现:
\n\n#include <algorithm>\n#include <string_view>\n#include <vector>\n#include <ranges>\n\nnamespace multiline_raw_string {\n template<class char_type>\n using string_view = std::basic_string_view<char_type>;\n\n // characters that are considered space\n // we need this because std::isspace is not constexpr\n template<class char_type>\n constexpr string_view<char_type> space_chars = std::declval<string_view<char_type>>();\n template<>\n constexpr string_view<char> space_chars<char> = " \\f\\n\\r\\t\\v";\n template<>\n constexpr string_view<wchar_t> space_chars<wchar_t> = L" \\f\\n\\r\\t\\v";\n template<>\n constexpr string_view<char8_t> space_chars<char8_t> = u8" \\f\\n\\r\\t\\v";\n template<>\n constexpr string_view<char16_t> space_chars<char16_t> = u" \\f\\n\\r\\t\\v";\n template<>\n constexpr string_view<char32_t> space_chars<char32_t> = U" \\f\\n\\r\\t\\v";\n \n \n // list of all potential line endings that could be encountered\n template<class char_type>\n constexpr string_view<char_type> potential_line_endings[] = std::declval<string_view<char_type>[]>();\n template<>\n constexpr string_view<char> potential_line_endings<char>[] = {\n "\\r\\n",\n "\\r",\n "\\n"\n };\n template<>\n constexpr string_view<wchar_t> potential_line_endings<wchar_t>[] = {\n L"\\r\\n",\n L"\\r",\n L"\\n"\n };\n template<>\n constexpr string_view<char8_t> potential_line_endings<char8_t>[] = {\n u8"\\r\\n",\n u8"\\r",\n u8"\\n"\n };\n template<>\n constexpr string_view<char16_t> potential_line_endings<char16_t>[] = {\n u"\\r\\n",\n u"\\r",\n u"\\n"\n };\n template<>\n constexpr string_view<char32_t> potential_line_endings<char32_t>[] = {\n U"\\r\\n",\n U"\\r",\n U"\\n"\n };\n\n // null-terminator for the different character types\n template<class char_type>\n constexpr char_type null_char = std::declval<char_type>();\n template<>\n constexpr char null_char<char> = \'\\0\';\n template<>\n constexpr wchar_t null_char<wchar_t> = L\'\\0\';\n template<>\n constexpr char8_t null_char<char8_t> = u8\'\\0\';\n template<>\n constexpr char16_t null_char<char16_t> = u\'\\0\';\n template<>\n constexpr char32_t null_char<char32_t> = U\'\\0\';\n\n // detects the line ending used within a string.\n // e.g. detect_line_ending("foo\\nbar\\nbaz") -> "\\n"\n template<class char_type>\n consteval string_view<char_type> detect_line_ending(string_view<char_type> str) {\n return *std::ranges::max_element(\n potential_line_endings<char_type>,\n {},\n [str](string_view<char_type> line_ending) {\n // count the number of lines we would get with line_ending\n auto view = std::views::split(str, line_ending);\n return std::ranges::distance(view);\n }\n );\n }\n\n // returns a view to the leading sequence of space characters within a string\n // e.g. get_leading_space_sequence(" \\t foo") -> " \\t "\n template<class char_type>\n consteval string_view<char_type> get_leading_space_sequence(string_view<char_type> line) {\n return line.substr(0, line.find_first_not_of(space_chars<char_type>));\n }\n\n // checks if a line consists purely out of space characters\n // e.g. is_line_empty(" \\t") -> true\n // is_line_empty(" foo") -> false\n template<class char_type>\n consteval bool is_line_empty(string_view<char_type> line) {\n return get_leading_space_sequence(line).size() == line.size();\n }\n\n // splits a string into individual lines\n // and removes the first & last line if they are empty\n // e.g. split_lines("\\na\\nb\\nc\\n", "\\n") -> {"a", "b", "c"}\n template<class char_type>\n consteval std::vector<string_view<char_type>> split_lines(\n string_view<char_type> str,\n string_view<char_type> line_ending\n ) {\n std::vector<string_view<char_type>> lines;\n\n for (auto line : std::views::split(str, line_ending)) {\n lines.emplace_back(line.begin(), line.end());\n }\n\n // remove first/last lines in case they are completely empty\n if(lines.size() > 1 && is_line_empty(lines[0])) {\n lines.erase(lines.begin());\n }\n if(lines.size() > 1 && is_line_empty(lines[lines.size()-1])) {\n lines.erase(lines.end()-1);\n }\n\n return lines;\n }\n\n // determines the longest possible sequence of space characters\n // that we can remove from each line.\n // e.g. determine_common_space_prefix_sequence({" \\ta", " foo", " \\t\\\xc5\xa7bar"}) -> " "\n template<class char_type>\n consteval string_view<char_type> determine_common_space_prefix_sequence(\n std::vector<string_view<char_type>> const& lines\n ) {\n std::vector<string_view<char_type>> space_sequences = {\n string_view<char_type>{} // empty string\n };\n\n for(string_view<char_type> line : lines) {\n string_view<char_type> spaces = get_leading_space_sequence(line);\n for(std::size_t len = 1; len <= spaces.size(); len++) {\n space_sequences.emplace_back(spaces.substr(0, len));\n }\n \n // remove duplicates\n std::ranges::sort(space_sequences);\n auto [first, last] = std::ranges::unique(space_sequences);\n space_sequences.erase(first, last);\n }\n\n // only consider space prefix sequences that apply to all lines\n auto shared_prefixes = std::views::filter(\n space_sequences,\n [&lines](string_view<char_type> prefix) {\n return std::ranges::all_of(\n lines,\n [&prefix](string_view<char_type> line) {\n return line.starts_with(prefix);\n }\n );\n }\n );\n\n // select the longest possible space prefix sequence\n return *std::ranges::max_element(\n shared_prefixes,\n {},\n &string_view<char_type>::size\n );\n }\n\n // unindents the individual lines of a raw string literal\n // e.g. unindent_string(" \\n a\\n b\\n c\\n") -> "a\\nb\\nc"\n template<class char_type>\n consteval std::vector<char_type> unindent_string(string_view<char_type> str) {\n string_view<char_type> line_ending = detect_line_ending(str);\n std::vector<string_view<char_type>> lines = split_lines(str, line_ending);\n string_view<char_type> common_space_sequence = determine_common_space_prefix_sequence(lines);\n\n std::vector<char_type> new_string;\n bool is_first = true;\n for(auto line : lines) {\n // append newline\n if(is_first) {\n is_first = false;\n } else {\n new_string.insert(new_string.end(), line_ending.begin(), line_ending.end());\n }\n\n // append unindented line\n auto unindented = line.substr(common_space_sequence.size());\n new_string.insert(new_string.end(), unindented.begin(), unindented.end());\n }\n\n // add null terminator\n new_string.push_back(null_char<char_type>);\n\n return new_string;\n }\n\n // returns the size required for the unindented string\n template<class char_type>\n consteval std::size_t unindent_string_size(string_view<char_type> str) {\n return unindent_string(str).size();\n }\n\n // simple type that stores a raw string\n // we need this to get around the limitation that string literals\n // are not considered valid non-type template arguments.\n template<class _char_type, std::size_t size>\n struct string_wrapper {\n using char_type = _char_type;\n\n consteval string_wrapper(const char_type (&arr)[size]) {\n std::ranges::copy(arr, str);\n }\n\n char_type str[size];\n };\n\n // used for sneakily creating and storing\n // the unindented string in a template parameter.\n template<string_wrapper sw>\n struct unindented_string_wrapper {\n using char_type = typename decltype(sw)::char_type;\n static constexpr std::size_t buffer_size = unindent_string_size<char_type>(sw.str);\n using array_ref = const char_type (&)[buffer_size];\n\n consteval unindented_string_wrapper(int) {\n auto newstr = unindent_string<char_type>(sw.str);\n std::ranges::copy(newstr, buffer);\n }\n\n consteval array_ref get() const {\n return buffer;\n }\n\n char_type buffer[buffer_size];\n };\n\n // uses a defaulted template argument that depends on the str\n // to initialize the unindented string within a template parameter.\n // this enables us to return a reference to the unindented string.\n template<string_wrapper str, unindented_string_wrapper<str> unindented = 0>\n consteval decltype(auto) do_unindent() {\n return unindented.get();\n }\n\n // the actual user-defined string literal operator\n template<string_wrapper str>\n consteval decltype(auto) operator"" _M() {\n return do_unindent<str>();\n }\n}\n\nusing multiline_raw_string::operator"" _M;\nRun Code Online (Sandbox Code Playgroud)\n用法示例:神螺栓
\nstd::cout << R"(\n a\n b\n c\n d\n)"_M << std::endl;\n/* Will print the following:\n a\nb\n c\nd\n*/\n\n// The type of R"(...)"_M is still const char (&)[N],\n// so it can be used like a normal string literal:\nstd::cout << std::size(R"(asdf)"_M) << std::endl;\n// (will print 5)\n\n// Lifetime is not a problem; can be stored in a std::string_view:\nconstexpr std::string_view str = R"(\n foo\n bar\n)"_M;\n\n// also works with wchar_t, char8_t, char16_t and char32_t literals:\nstd::wcout << LR"(foo)"_M << std::endl;\nRun Code Online (Sandbox Code Playgroud)\nstd::isspace()不是 constexpr,所以我们必须自己推出:(您可以添加其他字符,您希望将其视为“空格”以取消缩进):\ntemplate<>\nconstexpr string_view<char> space_chars<char> = " \\f\\n\\r\\t\\v";\nRun Code Online (Sandbox Code Playgroud)\n\\r\\n,\\n并且\\r作为潜在的行结尾,这些应该涵盖大多数用例:\ntemplate<>\nconstexpr string_view<char> potential_line_endings<char>[] = {\n "\\r\\n",\n "\\r",\n "\\n"\n};\nRun Code Online (Sandbox Code Playgroud)\n// mixed indentation (indentation will remain)\nconstexpr std::string_view str = R"(\n<tab>foo\n bar\n)"_M;\nRun Code Online (Sandbox Code Playgroud)\n但这将会:\n// all lines have the same indentation pattern\nconstexpr std::string_view str = R"(\n<tab> foo\n<tab> bar\n)"_M;\nRun Code Online (Sandbox Code Playgroud)\n| 归档时间: |
|
| 查看次数: |
570 次 |
| 最近记录: |