以提升精神实现运营商优先

Bor*_*der 2 c++ parsing boost

我试图复制这个例子来实现像 C++ 这样的运算符优先规则(我从一个子集开始,但我最终计划添加其他的)。

尽我所能,我无法获得解析单个二进制操作的语法。它会解析文字 (44, 3.42, "stackoverflow") 就好了,但会失败像3 + 4.

我没有看这个问题,而这一次,试图让我的解决方案的工作,但得到了同样的结果。

(为了保持简短,我将仅在此处发布相关部分,完整代码在此处

AST 的相关数据结构:

enum class BinaryOperator
{
    ADD, SUBTRACT, MULTIPLY, DIVIDE, MODULO, LEFT_SHIFT, RIGHT_SHIFT, EQUAL, NOT_EQUAL, LOWER, LOWER_EQUAL, GREATER, GREATER_EQUAL,
};

typedef boost::variant<double, int, std::string> Litteral;

struct Identifier { std::string name; };

typedef boost::variant<
    Litteral, 
    Identifier,
    boost::recursive_wrapper<UnaryOperation>,
    boost::recursive_wrapper<BinaryOperation>,
    boost::recursive_wrapper<FunctionCall>
> Expression;

struct BinaryOperation
{
    Expression rhs, lhs;
    BinaryOperator op;

    BinaryOperation() {}
    BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};
Run Code Online (Sandbox Code Playgroud)

语法:

template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
    BoltGrammar() : BoltGrammar::base_type(start, "start")
    {
        equalOp.add("==", BinaryOperator::EQUAL)("!=", BinaryOperator::NOT_EQUAL);
        equal %= (lowerGreater >> equalOp >> lowerGreater);
        equal.name("equal");

        lowerGreaterOp.add("<", BinaryOperator::LOWER)("<=", BinaryOperator::LOWER_EQUAL)(">", BinaryOperator::GREATER)(">=", BinaryOperator::GREATER_EQUAL);
        lowerGreater %= (shift >> lowerGreaterOp >> shift);
        lowerGreater.name("lower or greater");

        shiftOp.add("<<", BinaryOperator::LEFT_SHIFT)(">>", BinaryOperator::RIGHT_SHIFT);
        shift %= (addSub >> shiftOp >> addSub);
        shift.name("shift");

        addSubOp.add("+", BinaryOperator::ADD)("-", BinaryOperator::SUBTRACT);
        addSub %= (multDivMod >> addSubOp >> multDivMod);
        addSub.name("add or sub");

        multDivModOp.add("*", BinaryOperator::MULTIPLY)("/", BinaryOperator::DIVIDE)("%", BinaryOperator::MODULO);
        multDivMod %= (value >> multDivModOp >> value);
        multDivMod.name("mult, div, or mod");

        value %= identifier | litteral | ('(' > expression > ')');
        value.name("value");

        start %= qi::eps >> *(value >> qi::lit(';'));
        start.name("start");

        expression %= identifier | litteral | equal;
        expression.name("expression");

        identifier %= qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
        identifier.name("identifier");

        litteral %= qi::double_ | qi::int_ | quotedString;
        litteral.name("litteral");

        quotedString %= qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
        quotedString.name("quoted string");

        namespace phx = boost::phoenix;
        using namespace qi::labels;
        qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
}

qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
qi::rule<Iterator, Skipper, BinaryOperation()> equal, lowerGreater, shift, addSub, multDivMod;
qi::rule<Iterator, Skipper, Expression()> value;

qi::rule<Iterator, Skipper, Program()> start;
qi::rule<Iterator, Skipper, Expression()> expression;
qi::rule<Iterator, Skipper, Identifier()> identifier;
qi::rule<Iterator, Skipper, Litteral()> litteral;
qi::rule<Iterator, Skipper, std::string()> quotedString;
};
Run Code Online (Sandbox Code Playgroud)

seh*_*ehe 5

主要问题(确实)似乎在您链接到的第二个答案中得到解决。

让我谈谈几点:

  1. 主要问题是复合:

我想你会想要更像

    expression   = '(' >> expression >> ')' | equal | value;
    value        = identifier | literal;

    // and then
    start        = qi::eps >> -expression % ';';
Run Code Online (Sandbox Code Playgroud)
  1. 您的BinaryOperation作品允许有操作员在场的情况;这打破了为运算符优先级嵌套规则的方式: amultDivOp永远不会被接受为匹配项,除非它碰巧后跟 an addSubOp

    addSub %= (multDivMod >> addSubOp >> multDivMod);
    multDivMod %= (value >> multDivModOp >> value);
    
    Run Code Online (Sandbox Code Playgroud)

这可以最好地修复,如链接的答案所示:

    addSub     = multDivMod >> -(addSubOp     >> multDivMod);
    multDivMod = value      >> -(multDivModOp >> value);
Run Code Online (Sandbox Code Playgroud)

您可以在其中使用语义操作“动态”构建 AST 节点:

    addSub     = multDivMod >> -(addSubOp     >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
    multDivMod = value      >> -(multDivModOp >> value)      [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
Run Code Online (Sandbox Code Playgroud)

这击败了“乏味”的声明式方法(这会导致很多回溯,参见例如使用替代解析器提升精神表现不佳

  1. 文字规则会将整数解析为双精度数,因为它并不严格:

    literal   %= qi::double_ | qi::int_ | quotedString;
    
    Run Code Online (Sandbox Code Playgroud)

    你可以解决这个问题:

    qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
    
    literal      = quotedString | strict_double | qi::int_;
    
    Run Code Online (Sandbox Code Playgroud)
  2. FunctionCall应该适应functionName作为Identifier(不是std::string

    BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
    
    Run Code Online (Sandbox Code Playgroud)
  3. Expression operator<<可以(应该)是一个boost::static_visitor这样你

    • 消除魔术类型开关号
    • 让编译器检查开关的完整性
    • 可以利用重载解析来打开变体成员类型

使用 c++11,代码仍然可以在 one 函数中:

    std::ostream& operator<<(std::ostream& os, const Expression& expr)
    {
        os << "Expression ";
        struct v : boost::static_visitor<> {
            v(std::ostream& os) : os(os) {}
            std::ostream& os;

            void operator()(Literal         const& e) const { os << "(literal: "    << e                           << ")"; }
            void operator()(Identifier      const& e) const { os << "(identifier: " << e.name                      << ")"; }
            void operator()(UnaryOperation  const& e) const { os << "(unary op: "   << boost::fusion::as_vector(e) << ")"; }
            void operator()(BinaryOperation const& e) const { os << "(binary op: "  << boost::fusion::as_vector(e) << ")"; }
            void operator()(FunctionCall    const& e) const {
                os << "(function call: " << e.functionName << "("; 
                if (e.args.size() > 0) os << e.args.front();
                for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
                os << ")";
            }
        };
        boost::apply_visitor(v(os), expr);
        return os;
    }
Run Code Online (Sandbox Code Playgroud)
  1. 您可以使用BOOST_SPIRIT_DEBUG_NODES宏来命名规则

    BOOST_SPIRIT_DEBUG_NODES(
        (start)(expression)(identifier)(literal)(quotedString)
        (equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
    )
    
    Run Code Online (Sandbox Code Playgroud)
  2. 您应该从spirit/include/目录中包含,然后转发到spirit/home/phoenix/include/而不是直接包含它们。

这是一个完整的示例,它还改进了Live On Coliru可读性的语法规则:

//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3

#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/variant.hpp>
#include <iostream>
#include <string>
#include <vector>

namespace qi    = boost::spirit::qi;
namespace phx   = boost::phoenix;
namespace ascii = boost::spirit::ascii;

enum class UnaryOperator
{
    NOT,
    PLUS,
    MINUS,
};

std::ostream& operator<<(std::ostream& os, const UnaryOperator op)
{
    switch (op)
    {
        case UnaryOperator::NOT:   return os << "!";
        case UnaryOperator::PLUS:  return os << "+";
        case UnaryOperator::MINUS: return os << "-";
    }

    assert(false);
}

enum class BinaryOperator
{
    ADD,        SUBTRACT,      MULTIPLY, DIVIDE,
    MODULO,
    LEFT_SHIFT, RIGHT_SHIFT,
    EQUAL,      NOT_EQUAL,
    LOWER,      LOWER_EQUAL,
    GREATER,    GREATER_EQUAL,
};

std::ostream& operator<<(std::ostream& os, const BinaryOperator op)
{
    switch (op)
    {
        case BinaryOperator::ADD:           return os << "+";
        case BinaryOperator::SUBTRACT:      return os << "-";
        case BinaryOperator::MULTIPLY:      return os << "*";
        case BinaryOperator::DIVIDE:        return os << "/";
        case BinaryOperator::MODULO:        return os << "%";
        case BinaryOperator::LEFT_SHIFT:    return os << "<<";
        case BinaryOperator::RIGHT_SHIFT:   return os << ">>";
        case BinaryOperator::EQUAL:         return os << "==";
        case BinaryOperator::NOT_EQUAL:     return os << "!=";
        case BinaryOperator::LOWER:         return os << "<";
        case BinaryOperator::LOWER_EQUAL:   return os << "<=";
        case BinaryOperator::GREATER:       return os << ">";
        case BinaryOperator::GREATER_EQUAL: return os << ">=";
    }

    assert(false);
}

typedef boost::variant<
    double, 
    int, 
    std::string
> Literal;

struct Identifier
{
    std::string name;
};
BOOST_FUSION_ADAPT_STRUCT(Identifier, (std::string, name))

struct UnaryOperation;
struct BinaryOperation;
struct FunctionCall;

typedef boost::variant<
    Literal, 
    Identifier,
    boost::recursive_wrapper<UnaryOperation>,
    boost::recursive_wrapper<BinaryOperation>,
    boost::recursive_wrapper<FunctionCall>
> Expression;

struct UnaryOperation
{
    Expression rhs;
    UnaryOperator op;
};
BOOST_FUSION_ADAPT_STRUCT(UnaryOperation, (Expression,rhs)(UnaryOperator,op))

struct BinaryOperation
{
    Expression rhs;
    BinaryOperator op;
    Expression lhs;

    BinaryOperation() {}
    BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};
BOOST_FUSION_ADAPT_STRUCT(BinaryOperation, (Expression,rhs)(BinaryOperator,op)(Expression,lhs))

struct FunctionCall
{
    Identifier functionName;
    std::vector<Expression> args;
};
BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))

struct Program
{
    std::vector<Expression> statements;
};
BOOST_FUSION_ADAPT_STRUCT(Program, (std::vector<Expression>, statements))

std::ostream& operator<<(std::ostream& os, const Expression& expr)
{
    os << "Expression ";
    struct v : boost::static_visitor<> {
        v(std::ostream& os) : os(os) {}
        std::ostream& os;

        void operator()(Literal         const& e) const { os << "(literal: "    << e                           << ")"; }
        void operator()(Identifier      const& e) const { os << "(identifier: " << e.name                      << ")"; }
        void operator()(UnaryOperation  const& e) const { os << "(unary op: "   << boost::fusion::as_vector(e) << ")"; }
        void operator()(BinaryOperation const& e) const { os << "(binary op: "  << boost::fusion::as_vector(e) << ")"; }
        void operator()(FunctionCall    const& e) const {
            os << "(function call: " << e.functionName << "("; 
            if (e.args.size() > 0) os << e.args.front();
            for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
            os << ")";
        }
    };
    boost::apply_visitor(v(os), expr);
    return os;
}

std::ostream& operator<<(std::ostream& os, const Program& prog)
{
    os << "Program" << std::endl << "{" << std::endl;
    for (const Expression& expr : prog.statements) 
    { 
        std::cout << "\t" << expr << std::endl; 
    }
    os << "}" << std::endl;

    return os;
}

template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
    BoltGrammar() : BoltGrammar::base_type(start, "start")
    {
        using namespace qi::labels;

        equalOp.add
            ("==", BinaryOperator::EQUAL)
            ("!=", BinaryOperator::NOT_EQUAL);
        lowerGreaterOp.add
            ("<", BinaryOperator::LOWER)
            ("<=", BinaryOperator::LOWER_EQUAL)
            (">", BinaryOperator::GREATER)
            (">=", BinaryOperator::GREATER_EQUAL);
        shiftOp.add
            ("<<", BinaryOperator::LEFT_SHIFT)
            (">>", BinaryOperator::RIGHT_SHIFT);
        addSubOp.add
            ("+", BinaryOperator::ADD)
            ("-", BinaryOperator::SUBTRACT);
        multDivModOp.add
            ("*", BinaryOperator::MULTIPLY)
            ("/", BinaryOperator::DIVIDE)
            ("%", BinaryOperator::MODULO);

        equal        = lowerGreater [ _val=_1 ] >> -(equalOp        >> lowerGreater) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
        lowerGreater = shift        [ _val=_1 ] >> -(lowerGreaterOp >> shift)        [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
        shift        = addSub       [ _val=_1 ] >> -(shiftOp        >> addSub)       [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
        addSub       = multDivMod   [ _val=_1 ] >> -(addSubOp       >> multDivMod)   [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
        multDivMod   = value        [ _val=_1 ] >> -(multDivModOp   >> value)        [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];

        start        = qi::eps >> -expression % ';';
        expression   = '(' >> expression >> ')' | equal | value;
        value        = identifier | literal;
        identifier   = qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];

        qi::real_parser<double, qi::strict_real_policies<double> > strict_double;

        literal      = quotedString | strict_double | qi::int_;
        quotedString = qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];

        qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);

        BOOST_SPIRIT_DEBUG_NODES((start)(expression)(identifier)(literal)(quotedString)
                (equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
                )
    }

    qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
    qi::rule<Iterator, Skipper, Expression()>  equal,  lowerGreater,  shift,  addSub,  multDivMod;
    qi::rule<Iterator, Skipper, Expression()>  value;

    qi::rule<Iterator, Skipper, Program()>     start;
    qi::rule<Iterator, Skipper, Expression()>  expression;
    qi::rule<Iterator, Skipper, Identifier()>  identifier;
    qi::rule<Iterator, Skipper, Literal()>     literal;
    qi::rule<Iterator, Skipper, std::string()> quotedString;
};

typedef std::string::iterator Iterator;
typedef boost::spirit::ascii::space_type Skipper;

int main()
{
    BoltGrammar<Iterator, Skipper> grammar;

    std::string str("3; 4.2; \"lounge <c++>\"; 3 + 4;");

    Program prog;
    Iterator iter = str.begin(), last = str.end();
    bool r = phrase_parse(iter, last, grammar, ascii::space, prog);

    if (r && iter == last)
    {
        std::cout << "Parsing succeeded: " << prog << std::endl;
    }
    else
    {
        std::cout << "Parsing failed, remaining: " << std::string(iter, last) << std::endl;
    }
    
    return 0;
}
Run Code Online (Sandbox Code Playgroud)

印刷:

Parsing succeeded: Program
{
    Expression (literal: 3)
    Expression (literal: 4.2)
    Expression (literal: lounge <c++>)
    Expression (binary op: (Expression (literal: 3) + Expression (literal: 4)))
}
Run Code Online (Sandbox Code Playgroud)