Has*_*yed 5 c++ parsing boost-spirit lexer
编辑:我已经删除了词法分析器,因为它没有与Qi完全集成,只是混淆了语法(见这里).
on_success
没有很好的文档记录,我正在尝试将其连接到我的解析器.on_success
处理解析器的例子只是构建在qi
--ie,no lex
.
这就是我试图介绍构造的方式:
using namespace qi::labels;
qi::on_success(event_entry_,std::cout << _val << _1);
Run Code Online (Sandbox Code Playgroud)
但它不会编译.我担心这个问题lex
.有人可以告诉我我做错了什么,然后告诉我所有占位符可用,类型和它们代表什么(因为它们没有记录).
完整文件如下:
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/home/phoenix/bind/bind_member_variable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/none.hpp>
#include <boost/cstdint.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <string>
#include <exception>
#include <vector>
namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
tokens()
: left_curly("\"{\""),
right_curly("\"}\""),
left_paren("\"(\""),
right_paren("\")\""),
colon(":"),
scolon(";"),
namespace_("(?i:namespace)"),
event("(?i:event)"),
optional("(?i:optional)"),
required("(?i:required)"),
repeated("(?i:repeated)"),
t_int_4("(?i:int4)"),
t_int_8("(?i:int8)"),
t_string("(?i:string)"),
ordinal("\\d+"),
identifier("\\w+")
{
using boost::spirit::lex::_val;
this->self
=
left_curly [ std::cout << px::val("lpar") << std::endl]
| right_curly [ std::cout << px::val("rpar") << std::endl]
| left_paren
| right_paren
| colon [ std::cout << px::val("colon") << std::endl]
| scolon
| namespace_ [ std::cout << px::val("kw namesapce") << std::endl]
| event [ std::cout << px::val("kw event") << std::endl]
| optional [ std::cout << px::val("optional ") << "-->" << _val << "<--" << std::endl]
| required [ std::cout << px::val("required") << std::endl]
| repeated
| t_int_4
| t_int_8
| t_string
| ordinal [ std::cout << px::val("val ordinal (") << _val << ")" << std::endl]
| identifier [std::cout << px::val("val identifier(") << _val << ")" << std::endl];
this->self("WS") = lex::token_def<>("[ \\t\\n]+");
}
lex::token_def<lex::omit> left_curly, right_curly, colon, scolon,repeated, left_paren, right_paren;
lex::token_def<lex::omit> namespace_, event, optional, required,t_int_4, t_int_8, t_string;
lex::token_def<boost::uint32_t> ordinal;
lex::token_def<> identifier;
};
enum event_entry_qualifier
{
ENTRY_OPTIONAL,
ENTRY_REQUIRED,
ENTRY_REPEATED
};
enum entry_type
{
RBL_INT4,
RBL_INT8,
RBL_STRING,
RBL_EVENT
};
struct oid
{
boost::uint32_t ordinal;
std::string name;
};
BOOST_FUSION_ADAPT_STRUCT
(
oid,
(boost::uint32_t, ordinal)
(std::string, name)
)
struct type_descriptor
{
entry_type type_id;
std::string referenced_event;
};
BOOST_FUSION_ADAPT_STRUCT
(
type_descriptor,
(entry_type, type_id)
(std::string, referenced_event)
)
struct event_entry
{
event_entry_qualifier qualifier;
oid identifier;
type_descriptor descriptor;
};
BOOST_FUSION_ADAPT_STRUCT
(
event_entry,
(event_entry_qualifier, qualifier)
(oid, identifier)
(type_descriptor, descriptor)
)
struct event_descriptor
{
oid identifier;
std::vector<event_entry> event_entries;
};
BOOST_FUSION_ADAPT_STRUCT
(
event_descriptor,
(oid, identifier)
(std::vector<event_entry>, event_entries)
)
template <typename Iterator, typename Lexer>
struct grammar : qi::grammar<Iterator,event_descriptor(), qi::in_state_skipper<Lexer> >
{
template <typename TokenDef>
grammar(TokenDef const& tok)
: grammar::base_type(event_descriptor_)
{
using qi::_val;
//start = event;
event_descriptor_ = tok.event >> oid_ >> tok.left_curly >> *(event_entry_) >> tok.right_curly;
event_entry_ = event_qualifier >> oid_ >> type_descriptor_ >> tok.scolon;
event_qualifier = tok.optional [ _val = ENTRY_OPTIONAL]
| tok.required [ _val = ENTRY_REQUIRED]
| tok.repeated [ _val = ENTRY_REPEATED];
oid_ = tok.ordinal
>> tok.colon
>> tok.identifier;
type_descriptor_
= (( atomic_type >> qi::attr(""))
| ( event_type >> tok.left_paren >> tok.identifier >> tok.right_paren));
atomic_type = tok.t_int_4 [ _val = RBL_INT4]
| tok.t_int_8 [ _val = RBL_INT8]
| tok.t_string [ _val = RBL_STRING];
event_type = tok.event [_val = RBL_EVENT];
using namespace qi::labels;
qi::on_success(event_entry_,std::cout << _val << _1);
}
qi::rule<Iterator> start;
qi::rule<Iterator, event_descriptor(), qi::in_state_skipper<Lexer> > event_descriptor_;
qi::rule<Iterator, event_entry(), qi::in_state_skipper<Lexer> > event_entry_;
qi::rule<Iterator, event_entry_qualifier()> event_qualifier;
qi::rule<Iterator, entry_type()> atomic_type;
qi::rule<Iterator, entry_type()> event_type;
qi::rule<Iterator, type_descriptor(),qi::in_state_skipper<Lexer> > type_descriptor_;
qi::rule<Iterator, oid()> oid_;
};
std::string test = " EVENT 1:sihan { OPTIONAL 123:hassan int4; OPTIONAL 123:hassan int4; } ";
int main()
{
typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef tokens<lexer_type>::iterator_type iterator_type;
tokens<lexer_type> token_lexer;
grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer);
std::string::iterator it = test.begin();
iterator_type first = token_lexer.begin(it, test.end());
iterator_type last = token_lexer.end();
bool r;
r = qi::phrase_parse(first, last, grammar, qi::in_state("WS")[token_lexer.self]);
if(r)
;
else
{
std::cout << "parsing failed" << std::endl;
}
}
Run Code Online (Sandbox Code Playgroud)
查看头文件我认为占位符的含义是:
\n\n_1 = Iterator position when the rule was tried.\n_2 = Iterator to the end of the input.\n_3 = Iterator position right after the rule has been successfully matched.\n
Run Code Online (Sandbox Code Playgroud)\n\n(因为我不确定上面的行是否可以理解,这里有一个包含您输入的小例子)
\n\n rule being tried\n _________________________________\n \xc2\xb4 `\n[EVENT][1][:][sihan][{][OPTIONAL][123][:][hassan][int4][;][OPTIONAL][321][:][hassan2][int4][;][}]\n _1 _3 _2\n
Run Code Online (Sandbox Code Playgroud)\n\n正如 GManNickG 在评论中提到的那样,这些是词法分析器迭代器,您无法使用它们轻松访问原始字符串。它结合了词法分析器和/conjure2 example
的使用。为了实现这一点,它使用一种特殊的令牌,. 该令牌始终可以访问与其自身关联的原始字符串的迭代器对(当您使用 时,普通令牌会丢失此信息)。有几个有趣的方法。返回一个,并返回相应的迭代器。 on_error
on_success
position_token
lex::omit
position_token
matched()
iterator_range<OriginalIterator>
begin()
end()
在下面的代码中,我选择创建一个phoenix::function
接受两个词法分析器迭代器(使用 _1 和 _3 调用)并返回一个覆盖它们之间距离的字符串(使用std::string(begin_iter->begin(), end_iter->begin())
)。
我发现的一个问题是,空白处于不同状态这一事实导致返回的迭代器position_token
无效。我为解决这个问题所做的是将所有内容置于相同状态,然后简单地lex::_pass = lex::pass_flags::pass_ignore
与空格一起使用。
最后一个(小)问题是,如果您想使用,std::cout << _val
则需要定义operator<<
您感兴趣的类型。
PS:我总是使用 BOOST_SPIRIT_USE_PHOENIX_V3,这要求每个 Spirit/phoenix 包含都来自boost/spirit/include/...
. 如果出于任何原因,您需要/想要使用 V2,则需要更改 phoenix::function。我也无法使用旧样式的 for 循环,因此如果您不能使用 c++11,则必须更改 event_descriptor 的运算符 << 的定义。
#define BOOST_SPIRIT_USE_PHOENIX_V3\n// #define BOOST_SPIRIT_DEBUG\n#include <boost/spirit/include/qi.hpp>\n#include <boost/spirit/include/phoenix_core.hpp>\n#include <boost/spirit/include/phoenix_bind.hpp> //CHANGED\n#include <boost/spirit/include/lex_lexertl.hpp>\n#include <boost/spirit/include/lex_lexertl_position_token.hpp> //ADDED\n#include <boost/none.hpp>\n#include <boost/cstdint.hpp>\n#include <boost/fusion/include/adapt_struct.hpp>\n#include <string>\n#include <exception>\n#include <vector>\n\nnamespace lex = boost::spirit::lex;\nnamespace px = boost::phoenix;\nnamespace qi = boost::spirit::qi;\nnamespace ascii = boost::spirit::ascii;\n\n\ntemplate <typename Lexer>\nstruct tokens : lex::lexer<Lexer>\n{\n tokens()\n : left_curly("\\"{\\""),\n right_curly("\\"}\\""),\n left_paren("\\"(\\""),\n right_paren("\\")\\""),\n colon(":"),\n scolon(";"),\n namespace_("(?i:namespace)"),\n event("(?i:event)"),\n optional("(?i:optional)"),\n required("(?i:required)"),\n repeated("(?i:repeated)"),\n t_int_4("(?i:int4)"),\n t_int_8("(?i:int8)"),\n t_string("(?i:string)"),\n ordinal("\\\\d+"),\n identifier("\\\\w+")\n\n {\n using boost::spirit::lex::_val;\n\n this->self\n = \n left_curly //[ std::cout << px::val("lpar") << std::endl]\n | right_curly //[ std::cout << px::val("rpar") << std::endl]\n | left_paren\n | right_paren\n | colon //[ std::cout << px::val("colon") << std::endl]\n | scolon\n | namespace_ // [ std::cout << px::val("kw namesapce") << std::endl]\n | event // [ std::cout << px::val("kw event") << std::endl]\n | optional //[ std::cout << px::val("optional ") << "-->" << _val << "<--" << std::endl]\n | required //[ std::cout << px::val("required") << std::endl]\n | repeated\n | t_int_4\n | t_int_8\n | t_string\n | ordinal //[ std::cout << px::val("val ordinal (") << _val << ")" << std::endl]\n | identifier //[std::cout << px::val("val identifier(") << _val << ")" << std::endl]\n | lex::token_def<>("[ \\\\t\\\\n]+") [lex::_pass = lex::pass_flags::pass_ignore] //CHANGED\n ;\n }\n\n\n lex::token_def<lex::omit> left_curly, right_curly, left_paren, right_paren, colon, scolon;\n lex::token_def<lex::omit> namespace_, event, optional, required, repeated, t_int_4, t_int_8, t_string;\n lex::token_def<boost::uint32_t> ordinal;\n lex::token_def<> identifier;\n};\n\nenum event_entry_qualifier\n{\n ENTRY_OPTIONAL,\n ENTRY_REQUIRED,\n ENTRY_REPEATED\n};\n\nenum entry_type\n{\n RBL_INT4,\n RBL_INT8,\n RBL_STRING,\n RBL_EVENT\n};\n\nstruct oid\n{\n boost::uint32_t ordinal;\n std::string name;\n};\n\nBOOST_FUSION_ADAPT_STRUCT\n(\n oid,\n (boost::uint32_t, ordinal)\n (std::string, name)\n)\n\nstd::ostream& operator<<(std::ostream& os, const oid& val) //ADDED\n{\n return os << val.ordinal << "-" << val.name;\n}\n\nstruct type_descriptor\n{\n entry_type type_id;\n std::string referenced_event;\n};\n\nBOOST_FUSION_ADAPT_STRUCT\n(\n type_descriptor,\n (entry_type, type_id)\n (std::string, referenced_event)\n)\n\nstd::ostream& operator<<(std::ostream& os, const type_descriptor& val) //ADDED\n{\n return os << val.type_id << "-" << val.referenced_event;\n}\n\nstruct event_entry\n{\n event_entry_qualifier qualifier;\n oid identifier;\n type_descriptor descriptor;\n};\n\n\nBOOST_FUSION_ADAPT_STRUCT\n(\n event_entry,\n (event_entry_qualifier, qualifier)\n (oid, identifier)\n (type_descriptor, descriptor)\n)\n\nstd::ostream& operator<<(std::ostream& os, const event_entry& val) //ADDED\n{\n return os << val.qualifier << "-" << val.identifier << "-" << val.descriptor;\n}\n\nstruct event_descriptor\n{\n oid identifier;\n std::vector<event_entry> event_entries;\n};\n\n\n\nBOOST_FUSION_ADAPT_STRUCT\n(\n event_descriptor,\n (oid, identifier)\n (std::vector<event_entry>, event_entries)\n)\n\nstd::ostream& operator<<(std::ostream& os, const event_descriptor& val) //ADDED\n{\n os << val.identifier << "[";\n for(const auto& entry: val.event_entries) //C++11\n os << entry;\n os << "]";\n return os;\n}\n\nstruct build_string_impl //ADDED\n{\n template <typename Sig>\n struct result;\n template <typename This, typename Iter1, typename Iter2>\n struct result<This(Iter1,Iter2)>\n {\n typedef std::string type;\n };\n\n template <typename Iter1, typename Iter2>\n std::string operator()(Iter1 begin, Iter2 end) const\n {\n return std::string(begin->begin(),end->begin());\n }\n};\n\npx::function<build_string_impl> build_string;\n\ntemplate <typename Iterator, typename Lexer>\nstruct grammar : qi::grammar<Iterator,event_descriptor() >\n{\n template <typename TokenDef>\n grammar(TokenDef const& tok)\n : grammar::base_type(event_descriptor_)\n {\n using qi::_val;\n //start = event;\n event_descriptor_ = tok.event >> oid_ >> tok.left_curly >> *(event_entry_) >> tok.right_curly;\n\n event_entry_ = event_qualifier >> oid_ >> type_descriptor_ >> tok.scolon;\n\n event_qualifier = tok.optional [ _val = ENTRY_OPTIONAL] \n | tok.required [ _val = ENTRY_REQUIRED]\n | tok.repeated [ _val = ENTRY_REPEATED];\n\n oid_ = tok.ordinal \n >> tok.colon \n >> tok.identifier;\n\n type_descriptor_ \n = (( atomic_type >> qi::attr("")) \n | ( event_type >> tok.left_paren >> tok.identifier >> tok.right_paren));\n\n atomic_type = tok.t_int_4 [ _val = RBL_INT4]\n | tok.t_int_8 [ _val = RBL_INT8]\n | tok.t_string [ _val = RBL_STRING];\n\n event_type = tok.event [_val = RBL_EVENT];\n\n using namespace qi::labels;\n qi::on_success(event_entry_,std::cout << _val << " " << build_string(_1,_3) << std::endl); //CHANGED\n // BOOST_SPIRIT_DEBUG_NODES( (event_descriptor_)(event_entry_)(event_qualifier)(oid_)(type_descriptor_)(atomic_type)(event_type) );\n\n }\n\n qi::rule<Iterator> start;\n qi::rule<Iterator, event_descriptor()> event_descriptor_; \n qi::rule<Iterator, event_entry()> event_entry_;\n qi::rule<Iterator, event_entry_qualifier()> event_qualifier;\n qi::rule<Iterator, entry_type()> atomic_type;\n qi::rule<Iterator, entry_type()> event_type;\n qi::rule<Iterator, type_descriptor()> type_descriptor_;\n qi::rule<Iterator, oid()> oid_;\n\n\n};\n\nstd::string test = " EVENT 1:sihan { OPTIONAL 123:hassan int4; OPTIONAL 321:hassan2 int4; } ";\n\nint main()\n{\n typedef lex::lexertl::position_token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type; //CHANGED\n typedef lex::lexertl::actor_lexer<token_type> lexer_type;\n typedef tokens<lexer_type>::iterator_type iterator_type;\n\n tokens<lexer_type> token_lexer;\n grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer);\n\n std::string::iterator it = test.begin();\n iterator_type first = token_lexer.begin(it, test.end());\n iterator_type last = token_lexer.end();\n\n bool r; \n\n r = qi::parse(first, last, grammar); //CHANGED\n\n if(r)\n ;\n else\n {\n std::cout << "parsing failed" << std::endl;\n }\n}\n
Run Code Online (Sandbox Code Playgroud)\n