Ste*_*ing 5 c++ unicode boost utf-8 boost-spirit
#include <algorithm>
#include <iostream>
#include <string>
#include <vector>
#define BOOST_SPIRIT_UNICODE // We'll use unicode (UTF8) all throughout
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_parse.hpp>
#include <boost/spirit/include/support_standard_wide.hpp>
void parse_simple_string()
{
namespace qi = boost::spirit::qi;
namespace encoding = boost::spirit::unicode;
//namespace stw = boost::spirit::standard_wide;
typedef std::wstring::const_iterator iterator_type;
std::vector<std::wstring> result;
std::wstring const input = LR"(12,3","ab,cd","G,G\"GG","kkk","10,\"0","99987","PPP","??)";
qi::rule<iterator_type, std::wstring()> key = +(qi::unicode::char_ - qi::lit(L"\",\""));
qi::phrase_parse(input.begin(), input.end(),
key % qi::lit(L"\",\""),
encoding::space,
result);
//std::copy(result.rbegin(), result.rend(), std::ostream_iterator<std::wstring, wchar_t> (std::wcout, L"\n"));
for(auto const &data : result) std::wcout<<data<<std::endl;
}
Run Code Online (Sandbox Code Playgroud)
我研究了这篇文章如何使用Boost Spirit来解析中文(unicode utf-16)? 并按照指南,但无法解析"你好"
预期的结果应该是
12,3 ab,cd G,G \"GG kkk 10,\"0 99987 PPP你好
但实际结果是12,3 ab,cd G,G \"GG kkk 10,\ 0 09999 PPP
无法解析中文单词"你好"
操作系统是win7 64位,我的编辑器保存为UTF-8
如果您在输入有UTF-8,那么你可以尝试使用Unicode的迭代器从Boost.Regex.
例如,使用boost :: u8_to_u32_iterator:
生成UTF8字符的基础序列的双向迭代器适配器看起来像UTF32字符的(只读)序列.
#include <boost/regex/pending/unicode_iterator.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/range.hpp>
#include <iterator>
#include <iostream>
#include <ostream>
#include <cstdint>
#include <vector>
int main()
{
using namespace boost;
using namespace spirit::qi;
using namespace std;
auto &&utf8_text=u8"??????";
u8_to_u32_iterator<const char*>
tbegin(begin(utf8_text)), tend(end(utf8_text));
vector<uint32_t> result;
parse(tbegin, tend, *standard_wide::char_, result);
for(auto &&code_point : result)
cout << "&#" << code_point << ";";
cout << endl;
}
Run Code Online (Sandbox Code Playgroud)
输出是:
你好,世界!�
Run Code Online (Sandbox Code Playgroud)