提升精神istream_iterator从流中消耗太多

Mar*_*ark 5 c++ boost boost-spirit boost-spirit-qi

考虑从更复杂的代码中提取的以下示例:

#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <map>
#include <string>

namespace qi  = boost::spirit::qi;
namespace phx = boost::phoenix;

// The class implements a XML tag storing the name and a variable number of attributes:
struct Tag
{
    // The typedef defines the type used for a XML name:
    typedef std::string name_type;

    // The typedef defines the type used for a XML value:
    typedef std::string value_type;

    // The typedef defines the type of a XML attribute:
    typedef std::pair<
        name_type,
        value_type
    > attribute_type;

    // The type defines a list of attributes.
    // Note: We use a std::map to simplify the attribute search.
    typedef std::map<
        name_type,
        value_type
    > list_type;

    // Clear all information stored within the instance:
    void clear( )
    {
        m_name.clear( ); m_attribute.clear( );
    }

    std::size_t m_indent;    // The tag shall be / is indented by m_indent number of tabs.
    name_type   m_name;      // Name of the tag.
    list_type   m_attribute; // List of tag attributes.
};

// Define the mapping between Tools::Serialization::Archive::Type::Xml::Format::Tag and boost::fusion:
BOOST_FUSION_ADAPT_STRUCT( Tag,
                         ( std::size_t   , m_indent    )
                         ( Tag::name_type, m_name      )
                         ( Tag::list_type, m_attribute ) )

// This class implements the decoder skipper grammar:
template < typename _Iterator >
    struct skipper
    : qi::grammar< _Iterator >
    {
        skipper( ) : skipper::base_type( m_skipper )
        {
            // The rule defines the default skipper grammar:
            m_skipper = ( qi::space )  // Skip all "spaces".
                        |
                        ( qi::cntrl ); // Skip all "cntrl".
        }

        // The following variables define the rules used within this grammar:
        qi::rule< _Iterator > m_skipper;
    };

// This class implements the grammar used to parse a XML "begin tag".
// The expected format is as follows: <name a="xyz" b="xyz" ... N="xyz">
template < typename _Iterator, typename _Skipper >
    struct tag_begin : qi::grammar< _Iterator, Tag( ), _Skipper >
    {
        tag_begin( ) : tag_begin::base_type( m_tag )
        {
            // The rule for a XML name shall stop when a ' ' or '>' is detected:
            m_string = qi::lexeme[ *( qi::char_( "a-zA-Z0-9_.:" ) ) ];

            // The rule for a XML attribute shall parse the following format: 'name="value"':
            m_attribute =    m_string
                          >> "=\""
                          >> m_string
                          >> '"';

            // The rule for an XML attribute list is a sequence of attributes separated by ' ':
            m_list = *( m_attribute - '>' );

            // Finally the resulting XML tag has the following format: <name a="xyz" b="xyz" ... N="xyz">
            m_tag =     '<'
                     >> -qi::int_
                     >> m_string
                     >> m_list
                     >> '>';

            // Enable debug support for the used rules. To activate the debug output define macro BOOST_SPIRIT_DEBUG:
            BOOST_SPIRIT_DEBUG_NODES( ( m_string )( m_attribute )( m_list ) )
        }

    // The following variables define the rules used within this grammar:
    qi::rule< _Iterator, Tag::name_type( )     , _Skipper > m_string;
    qi::rule< _Iterator, Tag::attribute_type( ), _Skipper > m_attribute;
    qi::rule< _Iterator, Tag::list_type( )     , _Skipper > m_list;
    qi::rule< _Iterator, Tag( )                , _Skipper > m_tag;
};

bool beginTag( std::istream& stream, Tag& tag )
{
    // Ensure that no whitespace characters are skipped:
    stream.unsetf( std::ios::skipws );

    // Create begin and end iterator for given stream:
    boost::spirit::istream_iterator begin( stream );
    boost::spirit::istream_iterator end;

    // Define the grammar skipper type:
    typedef skipper<
        boost::spirit::istream_iterator
    > skipper_type;

    // Create an instance of the used skipper:
    skipper_type sk;

    // Create an instance of the used grammar:
    tag_begin<
        boost::spirit::istream_iterator,
        skipper_type
    > gr;

    // Try to parse the data stored within the stream according the grammar and store the result in the tag variable:
    bool r = boost::spirit::qi::phrase_parse( begin,
                                              end,
                                              gr,
                                              sk,
                                              tag );

    char nextSym = 0;
    stream >> nextSym;

    for( auto i = tag.m_attribute.begin( ); i != tag.m_attribute.end( ); ++i )
    {
        std::cout << i->first << " : " << i->second << std::endl;
    }
    std::cout << "Next symbol: " << nextSym << std::endl;

    return r;
}

int main( )
{
    std::stringstream s;
    s << "<object cName=\"bool\" cVersion=\"1\" vName=\"bool\">       <value>0</value></object>";

    Tag t;
    beginTag( s, t );

    return 0;
}
Run Code Online (Sandbox Code Playgroud)

我使用语法来提取xml标记内容.原则上这可以按预期工作,结果如下:

cName : bool
cVersion : 1
vName : bool
Next symbol: v
Run Code Online (Sandbox Code Playgroud)

问题是解析器消耗了大量数据.我的期望是解析器在第一个标签关闭时停止'>'.但似乎解析器也消耗了以下空格和'<'符号.因此,从流中读取的下一个符号等于'v'.我想避免这种情况,因为以下解析器调用期望'<'符号.有任何想法吗 ?

seh*_*ehe 4

没有可靠的方法来实现这一目标。

\n\n

问题是您没有istream_iterator在解析调用中重复使用 s 。整个目的是在 InputIterator\xc2\xb9 之上boost::spirit::istream_iterator提供一个功能强大的迭代器。multi_pass

\n\n

由于 Spirit 允许任意语法和任意回溯,因此您无法阻止消耗超过实际成功解析的输入。

\n\n

这里显而易见的解决方案是将所有后续步骤集成到相同的语法中和/或重用迭代器(因此迭代器存储的回溯缓冲区仍然包含您需要的字符)。

\n\n
\n\n

演示/概念证明

\n\n

这是一个在循环中解析开放标签的版本

\n\n
while (boost::spirit::qi::phrase_parse(begin, end, gr, sk, tag)) {\n    std::cout << "============\\nParsed open tag \'" << tag.m_name << "\'\\n";\n    for (auto const& p: tag.m_attribute)\n        std::cout << p.first << ": " << p.second << "\\n";\n\n    count += 1;\n    tag.clear();\n};\n\nstd::cout << "Next symbol: ";\nstd::copy(begin, end, std::ostream_iterator<char>(std::cout));\n
Run Code Online (Sandbox Code Playgroud)\n\n

它打印:

\n\n
============\nParsed open tag \'object\'\ncName: bool\ncVersion: 1\nvName: bool\n============\nParsed open tag \'value\'\nNext symbol: 0</value>\n        </object>\n
Run Code Online (Sandbox Code Playgroud)\n\n

Live On Coliru

\n\n
//#define BOOST_SPIRIT_DEBUG\n#include <boost/fusion/adapted.hpp>\n#include <boost/spirit/include/qi.hpp>\n#include <map>\n\nnamespace qi = boost::spirit::qi;\n\n// The class implements a XML tag storing the name and a variable number of\n// attributes:\nstruct Tag {\n    typedef std::string name_type;\n    typedef std::string value_type;\n\n    typedef std::pair<name_type, value_type> attribute_type;\n    typedef std::map<name_type, value_type>  list_type;\n\n    // Clear all information stored within the instance:\n    void clear() {\n        m_name.clear();\n        m_attribute.clear();\n    }\n\n    std::size_t m_indent;  // The tag shall be / is indented by m_indent number of tabs.\n    name_type m_name;      // Name of the tag.\n    list_type m_attribute; // List of tag attributes.\n};\n\nBOOST_FUSION_ADAPT_STRUCT(Tag, m_indent, m_name, m_attribute)\n\n// This class implements the grammar used to parse a "XML" begin tag.\n// The expected format is as follows: <name a="xyz" b="xyz" ... N="xyz">\ntemplate <typename Iterator, typename Skipper> struct tag_begin : qi::grammar<Iterator, Tag(), Skipper> {\n    tag_begin() : tag_begin::base_type(m_tag) {\n        m_string     = *qi::char_("a-zA-Z0-9_.:");\n        m_attribute  = m_string >> \'=\' >> qi::lexeme[\'"\' >> m_string >> \'"\'];\n        m_attributes = *m_attribute;\n        m_tag        = \'<\' >> -qi::int_ >> m_string >> m_attributes >> \'>\';\n\n        BOOST_SPIRIT_DEBUG_NODES((m_string)(m_attribute)(m_attributes))\n    }\n  private:\n\n    // The following variables define the rules used within this grammar:\n    qi::rule<Iterator, Tag::attribute_type(), Skipper> m_attribute;\n    qi::rule<Iterator, Tag::list_type(), Skipper> m_attributes;\n    qi::rule<Iterator, Tag(), Skipper> m_tag;\n    // lexemes\n    qi::rule<Iterator, Tag::name_type()> m_string;\n};\n\nbool beginTag(std::istream &stream, Tag &tag) {\n    // Ensure that no whitespace characters are skipped:\n    stream.unsetf(std::ios::skipws);\n\n    typedef boost::spirit::istream_iterator It; \n    typedef qi::rule<It> skipper_type;\n\n    skipper_type sk = qi::space | qi::cntrl;\n    tag_begin<boost::spirit::istream_iterator, skipper_type> gr;\n\n    It begin(stream), end;\n\n    int count = 0;\n    while (boost::spirit::qi::phrase_parse(begin, end, gr, sk, tag)) {\n        std::cout << "============\\nParsed open tag \'" << tag.m_name << "\'\\n";\n        for (auto const& p: tag.m_attribute)\n            std::cout << p.first << ": " << p.second << "\\n";\n\n        count += 1;\n        tag.clear();\n    };\n\n    std::cout << "Next symbol: ";\n    std::copy(begin, end, std::ostream_iterator<char>(std::cout));\n\n    return count > 0;\n}\n\nint main() {\n    std::stringstream s;\n    s << R"(\n        <object cName="bool" cVersion="1" vName="bool">\n            <value>0</value>\n        </object>\n    )";\n\n    Tag t;\n    beginTag(s, t);\n}\n
Run Code Online (Sandbox Code Playgroud)\n\n

\xc2\xb9 (严格只进且不能重复取消引用)

\n