Vla*_*ylo 2 c++ boost abstract-syntax-tree boost-spirit
使用 Boost.Spirit 将某些表达式转换为 AST 的正确方法是什么?
我试图构建它,但我认为它很混乱并且可以简化很多。
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace ast {
struct unary_operator;
struct binary_operator;
struct expression {
typedef boost::variant<
double,
std::string,
boost::recursive_wrapper<unary_operator>,
boost::recursive_wrapper<binary_operator>,
boost::recursive_wrapper<expression>
> type;
expression() {
}
template<typename Expr>
expression(const Expr &expr)
: expr(expr) {
}
expression &operator+=(expression rhs);
expression &operator-=(expression rhs);
expression &operator*=(expression rhs);
expression &operator/=(expression rhs);
expression &and_(expression rhs);
expression &or_(expression rhs);
expression &equals(expression rhs);
expression ¬_equals(expression rhs);
expression &less_than(expression rhs);
expression &less_equals(expression rhs);
expression &greater_than(expression rhs);
expression &greater_equals(expression rhs);
expression &factor(expression rhs);
expression &dot(expression rhs);
type expr;
};
struct unary_operator {
std::string op;
expression rhs;
unary_operator() {}
unary_operator(std::string op, expression rhs)
: op(std::move(op)), rhs(std::move(rhs)) {
}
};
struct binary_operator {
std::string op;
expression lhs;
expression rhs;
binary_operator() {}
binary_operator(std::string op, expression lhs, expression rhs)
: op(std::move(op)), lhs(std::move(lhs)), rhs(std::move(rhs)) {
}
};
expression &expression::operator+=(expression rhs) {
expr = binary_operator("+", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator-=(expression rhs) {
expr = binary_operator("-", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator*=(expression rhs) {
expr = binary_operator("*", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator/=(expression rhs) {
expr = binary_operator("/", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::and_(expression rhs) {
expr = binary_operator("&&", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::or_(expression rhs) {
expr = binary_operator("||", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::equals(expression rhs) {
expr = binary_operator("==", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::not_equals(expression rhs) {
expr = binary_operator("!=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::less_than(expression rhs) {
expr = binary_operator("<", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::less_equals(expression rhs) {
expr = binary_operator("<=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::greater_than(expression rhs) {
expr = binary_operator(">", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::greater_equals(expression rhs) {
expr = binary_operator(">=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::factor(expression rhs) {
expr = binary_operator("**", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::dot(expression rhs) {
expr = binary_operator(".", std::move(expr), std::move(rhs));
return *this;
}
struct printer {
void operator()(const double n) const {
std::cout << n;
}
void operator()(const std::string &s) const {
std::cout << s;
}
void operator()(const expression &ast) const {
boost::apply_visitor(*this, ast.expr);
}
void operator()(const binary_operator &expr) const {
std::cout << "op:" << expr.op << "(";
boost::apply_visitor(*this, expr.lhs.expr);
std::cout << ", ";
boost::apply_visitor(*this, expr.rhs.expr);
std::cout << ')';
}
void operator()(const unary_operator &expr) const {
std::cout << "op:" << expr.op << "(";
boost::apply_visitor(*this, expr.rhs.expr);
std::cout << ')';
}
};
struct operators {
struct and_ {
};
struct or_ {
};
struct equals {
};
struct not_equals {
};
struct less_than {
};
struct less_equals {
};
struct greater_than {
};
struct greater_equals {
};
struct factor {
};
struct dot {
};
expression &operator()(expression &lhs, expression rhs, and_) const {
return lhs.and_(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, or_) const {
return lhs.or_(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, equals) const {
return lhs.equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, not_equals) const {
return lhs.not_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, less_than) const {
return lhs.less_than(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, less_equals) const {
return lhs.less_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, greater_than) const {
return lhs.greater_than(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, greater_equals) const {
return lhs.greater_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, factor) const {
return lhs.factor(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, dot) const {
return lhs.dot(std::move(rhs));
}
};
}
namespace qi = boost::spirit::qi;
struct expectation_handler {
template<typename Iterator>
void operator()(Iterator first, Iterator last, const boost::spirit::info &info) const {
std::stringstream msg;
msg << "Expected " << info << " at \"" << std::string(first, last) << "\"";
throw std::runtime_error(msg.str());
}
};
template<typename Iterator>
struct grammar : qi::grammar<Iterator, ast::expression(), qi::ascii::space_type> {
grammar()
: grammar::base_type(expression) {
variable = qi::lexeme[qi::alpha >> *(qi::alnum | '_')];
expression = logical.alias() > qi::eoi;
logical = equality[qi::_val = qi::_1]
>> *(
((qi::lit("&&") > equality[op(qi::_val, qi::_1, ast::operators::and_{})]) |
(qi::lit("||") > equality[op(qi::_val, qi::_1, ast::operators::or_{})]))
);
equality = relational[qi::_val = qi::_1]
>> *(
((qi::lit("==") > relational[op(qi::_val, qi::_1, ast::operators::equals{})]) |
(qi::lit("!=") > relational[op(qi::_val, qi::_1, ast::operators::not_equals{})]))
);
relational = additive[qi::_val = qi::_1]
>> *(
((qi::lit("<") > relational[op(qi::_val, qi::_1, ast::operators::less_than{})]) |
(qi::lit("<=") > relational[op(qi::_val, qi::_1, ast::operators::less_equals{})]) |
(qi::lit(">") > relational[op(qi::_val, qi::_1, ast::operators::greater_than{})]) |
(qi::lit(">=") > relational[op(qi::_val, qi::_1, ast::operators::greater_equals{})]))
);
additive = multiplicative[qi::_val = qi::_1]
>> *(
((qi::lit("+") > multiplicative[qi::_val += qi::_1]) |
(qi::lit("-") > multiplicative[qi::_val -= qi::_1]))
);
multiplicative = factor[qi::_val = qi::_1]
>> *(
((qi::lit("*") > factor[qi::_val *= qi::_1]) |
(qi::lit("/") > factor[qi::_val /= qi::_1]))
);
factor = primary[qi::_val = qi::_1]
>> *((qi::lit("**")) > primary[op(qi::_val, qi::_1, ast::operators::factor{})]);
primary =
qi::double_[qi::_val = qi::_1]
| ('(' > expression[qi::_val = qi::_1] > ')')
>> *(qi::char_('.') > variable[qi::_val = op(qi::_val, qi::_1, ast::operators::dot{})])
| variable[qi::_val = qi::_1]
>> *(qi::char_('.') > variable[qi::_val = op(qi::_val, qi::_1, ast::operators::dot{})]);
qi::on_error<qi::fail>(
expression,
boost::phoenix::bind(boost::phoenix::ref(err_handler), qi::_3, qi::_2, qi::_4));
}
qi::rule<Iterator, ast::expression(), qi::ascii::space_type> expression, logical, equality, relational, additive, multiplicative, factor, unary, binary, primary;
qi::rule<Iterator, std::string()> variable;
boost::phoenix::function<ast::operators> op;
expectation_handler err_handler;
};
int main(int argc, const char *argv[]) {
std::string input("2 + 5 + t.a");
auto it_begin(input.begin()), it_end(input.end());
grammar<decltype(it_begin)> parser;
ast::expression expression;
qi::phrase_parse(it_begin, it_end, parser, qi::ascii::space, expression);
ast::printer printer;
printer(expression);
return 0;
}
Run Code Online (Sandbox Code Playgroud)
印刷
op:+(op:+(2, 5), op:.(t, a))
Run Code Online (Sandbox Code Playgroud)
我会按照我“发现”你的代码的顺序来叙述这个。然后我将介绍一些我认为最后最重要的调整。
我喜欢你所做的很多事情。
可以(应该?)改进一些名称。例如,ast::operators没有任何暗示其目的。它是二元运算符表达式的惰性工厂。
所以,命名它make_binary或类似的。
与phoenix::function<>包装它的包装器相同。op在语义动作中并没有很好地表达那里发生的事情。
与其让op(别名make_binary)actor 对 _val 参数产生副作用,不如考虑让它返回一个不同的值。然后一切都可以变得不可变,语义动作更好地表达意图:
rule = expr [ _val = foo(_val, _1, _2, _3) ];
Run Code Online (Sandbox Code Playgroud)
表示 _val 已更新为根据给定参数创建的内容。
在语法层面,事情看起来并不“整洁”。很多都可以通过简单地改进using namespace qi::labels,并摆脱多余的qi::lit()包装器,这会发生变化,例如
logical = equality[qi::_val = qi::_1]
>> *(
((qi::lit("&&") > equality[op(qi::_val, qi::_1, ast::operators::and_{})]) |
(qi::lit("||") > equality[op(qi::_val, qi::_1, ast::operators::or_{})]))
);
Run Code Online (Sandbox Code Playgroud)
进入
using ast::operators;
using namespace qi::labels;
logical = equality[_val = _1]
>> *(
(("&&" > equality[op(_val, _1, operators::and_{})]) |
("||" > equality[op(_val, _1, operators::or_{})]))
);
Run Code Online (Sandbox Code Playgroud)
你检查eoi你的语法(对你有好处!)。然而,它被放在一个递归规则中:
expression = logical.alias() > qi::eoi;
Run Code Online (Sandbox Code Playgroud)
这意味着(a+b)*3永远不会解析,因为)在eoi需要的地方找到。通过放在eoi顶层来修复它。
您在语法级别有一个船长,这意味着人们必须传递正确的船长。如果他们不这样做,他们可能会破坏语法。相反,将船长设置为内部,以便您控制它,并且界面更易于使用(正确):
start = qi::skip(qi::ascii::space) [ expression ];
Run Code Online (Sandbox Code Playgroud)
用法:
if (qi::parse(it_begin, it_end, parser, expression)) {
Run Code Online (Sandbox Code Playgroud)
也许:
if (qi::parse(it_begin, it_end, parser > qi::eoi, expression)) {
Run Code Online (Sandbox Code Playgroud)
我意识到驱动程序代码 ( main) 可能超出了您的审查范围,但我将向您展示缺少的错误处理,因为它可能是非常微妙的部分解析:
int main() {
ast::printer printer;
grammar<std::string::const_iterator> parser;
for (std::string const input : {
"2 + 5 + t.a",
"(2 + 5) + t.a", // note the removed eoi constraint
"2 + 5 * t.a",
"2 * 5 - t.a",
"partial match",
"uhoh *",
})
try {
std::cout << "----- " << std::quoted(input) << " ---- \n";
auto it_begin(input.begin()), it_end(input.end());
ast::expression expression;
if (qi::parse(it_begin, it_end, parser, expression)) {
printer(expression);
std::cout << std::endl;
} else {
std::cout << "Not matched\n";
}
if (it_begin != it_end) {
std::string tail(it_begin, it_end);
std::cout << "Remaining unparsed input: " << std::quoted(tail) << "\n";
}
} catch(std::exception const& e) {
std::cout << "Exception: " << std::quoted(e.what()) << "\n";
}
}
Run Code Online (Sandbox Code Playgroud)
请注意,除非您命名规则,否则期望不会提供有用的消息。
Exception: Expected <unnamed-rule> at ""
Run Code Online (Sandbox Code Playgroud)
命名它们的惯用方法是使用 DEBUG 宏:
BOOST_SPIRIT_DEBUG_NODES(
(start)
(expression)(logical)(equality)
(relational)(additive)(multiplicative)
(factor)(unary)(binary)(primary)
(variable)
)
Run Code Online (Sandbox Code Playgroud)
现在:
Exception: Expected <factor> at ""
Run Code Online (Sandbox Code Playgroud)
中场休息:这里的表面变化:Live On Coliru
在打印机中有很多重复 ( apply_visitor(*this...) 并且由于operator(). 我的偏好是中继到 acall或apply函数
同样在打印机中,不要对输出流进行硬编码。有一天(TM)你会想要格式化为一个字符串。或std::cerr, 或文件
在打印机上结合这些注意事项:Live On Coliru
Run Code Online (Sandbox Code Playgroud)struct printer { std::ostream& _os; template <typename T> std::ostream& operator()(T const& v) const { return call(v); } private: std::ostream& call(expression const& ast) const { return boost::apply_visitor(*this, ast.expr); } std::ostream& call(binary_operator const& expr) const { _os << "op:" << expr.op << "("; call(expr.lhs) << ", "; return call(expr.rhs) << ')'; } std::ostream& call(unary_operator const& expr) const { _os << "op:" << expr.op << "("; return call(expr.rhs) << ')'; } template <typename Lit> std::ostream& call(Lit const& v) const { return _os << v; } };
其逻辑扩展是使其成为实际的输出操纵器:
std::cout << "Parsed: " << fmt_expr{expression} << std::endl;
Run Code Online (Sandbox Code Playgroud)
同样,Live On Coliru,也
printer再次简化了访问者:Run Code Online (Sandbox Code Playgroud)std::ostream& call(binary_operator const& expr) const { return _os << "op:" << expr.op << "(" << fmt_expr{expr.lhs} << ", " << fmt_expr{expr.rhs} << ')'; }
在 AST 中,您将实际操作符动态存储为字符串。在我看来,仅针对所有 ast 构建重载(ast::operator::operator()以及 的所有委托成员ast::expr)对运算符进行静态编码并没有太多价值。相反,每次只传递一个字符串?
现在 builder 命名空间可以消失,不对称的工厂成员,并且整个 phoenix 函数是语法本地的:
struct make_binary_f {
ast::binary_operator operator()(ast::expression lhs, ast::expression rhs, std::string op) const {
return { op, lhs, rhs };
}
};
boost::phoenix::function<make_binary_f> make;
Run Code Online (Sandbox Code Playgroud)
另一个中间站Live On Coliru
成就解锁
编写 113 行代码(现在是 218 行而不是 331 行代码)
随机地点:
variable = qi::lexeme[qi::alpha >> *(qi::alnum | '_')];
Run Code Online (Sandbox Code Playgroud)
'_'等价于qi::lit('_'), 不是qi::char_('_')这样,这将删除所有下划线。要么使用 char_,要么使用raw[]直接从源迭代器构造参数。
现在我们进入细节:[_val=_1]我们可以使用自动属性传播来代替(参见Boost Spirit: “Semantic actions are evil”?和operator %=rule init)。
分解出更常见的子表达式。与之前的项目符号一起:
primary
= qi::double_[_val = _1]
| ('(' > expression[_val = _1] > ')')
>> *("." > variable[_val = make(_val, _1, ".")])
| variable[_val = _1]
>> *("." > variable[_val = make(_val, _1, ".")]);
Run Code Online (Sandbox Code Playgroud)
变成:
primary %= qi::double_
| (('(' > expression > ')') | variable)
>> *("." > variable[_val = make(_val, _1, ".")])
;
Run Code Online (Sandbox Code Playgroud)
将变体类型提升到外部,expression以便您可以在expression. 另外,请考虑expression从变体 ( LSK )派生。在您的情况下,实际上不需要嵌套表达式,因为一元/二元节点已经强加了顺序。所以你的整个 AST 可以是:
struct unary_operator;
struct binary_operator;
typedef boost::variant<
double,
std::string,
boost::recursive_wrapper<unary_operator>,
boost::recursive_wrapper<binary_operator>
> expr_variant;
struct expression : expr_variant {
using expr_variant::expr_variant;
using expr_variant::operator=;
};
struct unary_operator { expression rhs; std::string op; } ;
struct binary_operator { expression lhs; expression rhs; std::string op; } ;
Run Code Online (Sandbox Code Playgroud)
移动expectation_handler语法类(这是没有用的别的)内,并选择将其与凤凰::功能的现代化?无论如何,由于函子是无状态的,所以不需要ref(当然也不是ref代替cref):
qi::on_error<qi::fail>(
expression,
boost::phoenix::bind(expectation_handler{}, _3, _2, _4));
Run Code Online (Sandbox Code Playgroud)
其实只要做好
auto handler = [](Iterator first, Iterator last, const boost::spirit::info &info) {
std::stringstream msg;
msg << "Expected " << info << " at \"" << std::string(first, last) << "\"";
throw std::runtime_error(msg.str());
};
qi::on_error<qi::fail>(
expression,
boost::phoenix::bind(handler, _3, _2, _4));
Run Code Online (Sandbox Code Playgroud)
小问题:使用std::quoted而不是“假”引用:)
后期脑电波,你可以提取大部分语义动作:
auto make_bin =
_val = px::bind(make_<ast::binary_expr>{}, _val, _2, _1);
Run Code Online (Sandbox Code Playgroud)
只要所有的肢体都是无状态的/按值,这不是问题(尽管与将解析器分配给自动变量相反!)。现在只需让运算符公开属性:
expression %= equality
>> *(
(qi::string("&&") > equality)[make_bin] |
(qi::string("||") > equality)[make_bin]
);
equality %= relational
>> *(
(qi::string("==") > relational)[make_bin] |
(qi::string("!=") > relational)[make_bin]
);
relational %= additive
>> *(
(qi::string("<") > relational)[make_bin] |
(qi::string("<=") > relational)[make_bin] |
(qi::string(">") > relational)[make_bin] |
(qi::string(">=") > relational)[make_bin]
);
additive %= multiplicative
>> *(
(qi::string("+") > multiplicative)[make_bin] |
(qi::string("-") > multiplicative)[make_bin]
);
multiplicative %= factor
>> *(
(qi::string("*") > factor)[make_bin] |
(qi::string("/") > factor)[make_bin]
);
factor %= primary
>> *(
(qi::string("**") > primary)[make_bin]
);
primary %= qi::double_
| (('(' > expression > ')') | variable)
>> *(qi::string(".") > variable)[make_bin]
;
Run Code Online (Sandbox Code Playgroud)
实际上,刚刚检查并phoenix::construct可以进行聚合:
auto make_bin =
_val = boost::phoenix::construct<ast::binary_expr>(_1, _val, _2);
Run Code Online (Sandbox Code Playgroud)
还删除了未使用的unary_*机器,将 IO 操纵器移入io命名空间(而不是ast)并重新引入eoi检查main驱动程序...
哎呀,使用一些 c++17,您可以组合每个产品的分支:
auto op = [](auto... sym) { return qi::copy((qi::string(sym) | ...)); };
expression %= equality >> *(op("&&","||") > equality)[make_bin];
equality %= relational >> *(op("==","!=") > relational)[make_bin];
relational %= additive >> *(op("<","<=",">",">=") > relational)[make_bin];
additive %= multiplicative >> *(op("+","-") > multiplicative)[make_bin];
multiplicative %= factor >> *(op("*","/") > factor)[make_bin];
factor %= primary >> *(op("**") > primary)[make_bin];
Run Code Online (Sandbox Code Playgroud)
只是没有设法将其降低到 100 LoC 以下,但我在此过程中添加了更多测试用例。
Coliru 上的现场演示(我发现phoenix::construct<>聚合需要 GCC 或最近的提升或两者兼而有之,因此添加了一个构造函数)
rule = expr [ _val = foo(_val, _1, _2, _3) ];
Run Code Online (Sandbox Code Playgroud)
印刷
"2 + 5 + t.a" -> op:+(op:+(2, 5), op:.(t, a))
"(2 + 5) + t.a" -> op:+(op:+(2, 5), op:.(t, a))
"2 + 5 * t.a" -> op:+(2, op:*(5, op:.(t, a)))
"2 * 5 - t.a" -> op:-(op:*(2, 5), op:.(t, a))
Exception: Expected <eoi> at " match"
Exception: Expected <factor> at ""
"under_scores" -> under_scores
Run Code Online (Sandbox Code Playgroud)
我将考虑的超出范围的事情与您的语法/ast 语义有关。
运算符优先级有点嘈杂。您想要的是一些元数据,允许您“组合”二进制操作数并显示正确的优先级,如下所示:
expression %= primary
>> *(
(binop > expression) [_val = make_bin(_1, _val, _2)]
);
Run Code Online (Sandbox Code Playgroud)
我已经在这个答案的扩展聊天中应用了这个策略,结果代码在 github 上:https : //github.com/sehe/qi-extended-parser-evaluator
如果您有 C++14 支持,请考虑使用 X3。编译时间会少很多。
| 归档时间: |
|
| 查看次数: |
113 次 |
| 最近记录: |