我有这个csv线
std::string s = R"(1997,Ford,E350,"ac, abs, moon","some "rusty" parts",3000.00)";
Run Code Online (Sandbox Code Playgroud)
我可以用boost::tokenizer以下方法解析它:
typedef boost::tokenizer< boost::escaped_list_separator<char> , std::string::const_iterator, std::string> Tokenizer;
boost::escaped_list_separator<char> seps('\\', ',', '\"');
Tokenizer tok(s, seps);
for (auto i : tok)
{
std::cout << i << std::endl;
}
Run Code Online (Sandbox Code Playgroud)
它是正确的,除了令牌"生锈"应该有双引号被剥离.
这是我尝试使用boost :: spirit
boost::spirit::classic::rule<> list_csv_item = !(boost::spirit::classic::confix_p('\"', *boost::spirit::classic::c_escape_ch_p, '\"') | boost::spirit::classic::longest_d[boost::spirit::classic::real_p | boost::spirit::classic::int_p]);
std::vector<std::string> vec_item;
std::vector<std::string> vec_list;
boost::spirit::classic::rule<> list_csv = boost::spirit::classic::list_p(list_csv_item[boost::spirit::classic::push_back_a(vec_item)],',')[boost::spirit::classic::push_back_a(vec_list)];
boost::spirit::classic::parse_info<> result = parse(s.c_str(), list_csv);
if (result.hit)
{
for (auto i : vec_item)
{
cout << i << endl;
} …Run Code Online (Sandbox Code Playgroud) 我正在攻击的一个简单的骨架实用程序的一部分我有一个语法来触发文本中的替换.我认为这是一种很好的方式来适应Boost.Spirit,但模板错误是一种独特的喜悦.
以下是完整的代码:
#include <iostream>
#include <iterator>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace bsq = boost::spirit::qi;
namespace {
template<typename Iterator>
struct skel_grammar : public bsq::grammar<Iterator> {
skel_grammar();
private:
bsq::rule<Iterator> macro_b;
bsq::rule<Iterator> macro_e;
bsq::rule<Iterator, bsq::ascii::space_type> id;
bsq::rule<Iterator> macro;
bsq::rule<Iterator> text;
bsq::rule<Iterator> start;
};
template<typename Iterator>
skel_grammar<Iterator>::skel_grammar() : skel_grammar::base_type(start)
{
text = bsq::no_skip[+(bsq::char_ - macro_b)[bsq::_val += bsq::_1]];
macro_b = bsq::lit("<<");
macro_e = bsq::lit(">>");
macro %= macro_b >> id >> macro_e;
id %= -(bsq::ascii::alpha | bsq::char_('_'))
>> +(bsq::ascii::alnum | bsq::char_('_'));
start = *(text | macro); …Run Code Online (Sandbox Code Playgroud) 我想解析一个句子,其中一些字符串可能不加引号,"引用"或"引用".下面的代码几乎可以工作 - 但它无法匹配收尾报价.我猜这是因为qq参考.修改在代码中被注释,修改引用"引用"或"引用"也解析并帮助显示原始问题与结束引用.该代码还描述了确切的语法.
要完全清楚:不带引号的字符串解析.引用的字符串'hello'将解析打开的引号',所有字符 hello,但然后无法解析最终引用'.
我做了另一次尝试,类似于boost教程中的开始/结束标记匹配,但没有成功.
template <typename Iterator>
struct test_parser : qi::grammar<Iterator, dectest::Test(), ascii::space_type>
{
test_parser()
:
test_parser::base_type(test, "test")
{
using qi::fail;
using qi::on_error;
using qi::lit;
using qi::lexeme;
using ascii::char_;
using qi::repeat;
using namespace qi::labels;
using boost::phoenix::construct;
using boost::phoenix::at_c;
using boost::phoenix::push_back;
using boost::phoenix::val;
using boost::phoenix::ref;
using qi::space;
char qq;
arrow = lit("->");
open_quote = (char_('\'') | char_('"')) [ref(qq) = _1]; // Remember what the opening quote was
close_quote = lit(val(qq)); …Run Code Online (Sandbox Code Playgroud) 我用来boost::iostreams::mapped_file_source从特定位置读取文本文件到特定位置并操作每一行(使用编译g++ -Wall -O3 -lboost_iostreams -o test main.cpp):
#include <iostream>
#include <string>
#include <boost/iostreams/device/mapped_file.hpp>
int main() {
boost::iostreams::mapped_file_source f_read;
f_read.open("in.txt");
long long int alignment_offset(0);
// set the start point
const char* pt_current(f_read.data() + alignment_offset);
// set the end point
const char* pt_last(f_read.data() + f_read.size());
const char* pt_current_line_start(pt_current);
std::string buffer;
while (pt_current && (pt_current != pt_last)) {
if ((pt_current = static_cast<const char*>(memchr(pt_current, '\n', pt_last - pt_current)))) {
buffer.assign(pt_current_line_start, pt_current - pt_current_line_start + 1);
// do something with buffer …Run Code Online (Sandbox Code Playgroud)