Rel*_*lla 3 c++ string parsing boost split
所以我们有一个简单的分裂:
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <iterator>
using namespace std;
vector<string> split(const string& s, const string& delim, const bool keep_empty = true) {
vector<string> result;
if (delim.empty()) {
result.push_back(s);
return result;
}
string::const_iterator substart = s.begin(), subend;
while (true) {
subend = search(substart, s.end(), delim.begin(), delim.end());
string temp(substart, subend);
if (keep_empty || !temp.empty()) {
result.push_back(temp);
}
if (subend == s.end()) {
break;
}
substart = subend + delim.size();
}
return result;
}
Run Code Online (Sandbox Code Playgroud)
或提升分裂.我们有简单的主要喜欢:
int main() {
const vector<string> words = split("close no \"\n matter\" how \n far", " ");
copy(words.begin(), words.end(), ostream_iterator<string>(cout, "\n"));
}
Run Code Online (Sandbox Code Playgroud)
如何使它成为类似的东西
close
no
"\n matter"
how
end symbol found.
Run Code Online (Sandbox Code Playgroud)
我们想要介绍一些未分割的分裂structures和将要结束解析过程的字符.怎么办这样的事情?
seh*_*ehe 11
更新了通过'谢谢'来奖励奖金我去了并实现了我最初跳过的4个功能,因为"你不需要它".
现在支持部分引用的列
这是您报告的问题:例如,使用分隔符
,只会test,"one,two",three有效,而不是test,one","two","three.现在两者都被接受了
现在支持自定义分隔符表达式
您只能将单个字符指定为分隔符.现在,您可以将任何Spirit Qi解析器表达式指定为分隔符规则.例如
Run Code Online (Sandbox Code Playgroud)splitInto(input, output, ' '); // single space splitInto(input, output, +qi.lit(' ')); // one or more spaces splitInto(input, output, +qi.lit(" \t")); // one or more spaces or tabs splitInto(input, output, (qi::double_ >> !'#') // -- any parse expression请注意,这会更改默认重载的行为
旧版本默认将重复空格视为单个分隔符.你现在必须明确指定(2 次例如,如果你想的话).
现在支持引用值内的引号("")(而不仅仅是让它们消失)
请参阅代码示例.当然很简单.请注意,
""带引号的构造外部的序列仍然表示空字符串(为了与例如冗余引用空字符串的现有CSV输出格式兼容)
除了容器作为输入之外,还支持提升范围(例如char [])
好吧,你不会需要它(但是为了能够写出来,它对我来说非常方便
splitInto("a char array", ...):)
正如我所预料的一半,你是会需要部分引述领域(查看您的评论1.好了,现在你在这里(瓶颈是得到它在不同版本的Boost)一致地工作).
随机笔记和读者观察:
splitInto 模板函数愉快地支持你抛出的任何东西:
vector<string> (所有线条扁平化)vector<vector<string>> (每行代币)list<list<string>> (如果你更喜欢)set<set<string>> (独特的linewise tokensets)\n输出显示?为理解(safechars)+qi::lit(' ')而不是默认(' '),您将跳过空字段(即重复分隔符)这是使用编译的
它适用于(测试)
注意:输出容器的扁平化似乎仅适用于Spirit V2.5(增强1.47.0).
(这可能很简单,因为旧版本需要额外的包含?)
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 80
// YAGNI #4 - support boost ranges in addition to containers as input (e.g. char[])
#define SUPPORT_BOOST_RANGE // our own define for splitInto
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/phoenix.hpp> // for pre 1.47.0 boost only
#include <boost/spirit/version.hpp>
#include <sstream>
namespace /*anon*/
{
namespace phx=boost::phoenix;
namespace qi =boost::spirit::qi;
namespace karma=boost::spirit::karma;
template <typename Iterator, typename Output>
struct my_grammar : qi::grammar<Iterator, Output()>
{
typedef qi::rule<Iterator> delim_t;
//my_grammar(delim_t const& _delim) : delim(_delim),
my_grammar(delim_t _delim) : delim(_delim),
my_grammar::base_type(rule, "quoted_delimited")
{
using namespace qi;
noquote = char_ - '"';
plain = +((!delim) >> (noquote - eol));
quoted = lit('"') > *(noquote | '"' >> char_('"')) > '"';
#if SPIRIT_VERSION >= 0x2050 // boost 1.47.0
mixed = *(quoted|plain);
#else
// manual folding
mixed = *( (quoted|plain) [_a << _1]) [_val=_a.str()];
#endif
// you gotta love simple truths:
rule = mixed % delim % eol;
BOOST_SPIRIT_DEBUG_NODE(rule);
BOOST_SPIRIT_DEBUG_NODE(plain);
BOOST_SPIRIT_DEBUG_NODE(quoted);
BOOST_SPIRIT_DEBUG_NODE(noquote);
BOOST_SPIRIT_DEBUG_NODE(delim);
}
private:
qi::rule<Iterator> delim;
qi::rule<Iterator, char()> noquote;
#if SPIRIT_VERSION >= 0x2050 // boost 1.47.0
qi::rule<Iterator, std::string()> plain, quoted, mixed;
#else
qi::rule<Iterator, std::string()> plain, quoted;
qi::rule<Iterator, std::string(), qi::locals<std::ostringstream> > mixed;
#endif
qi::rule<Iterator, Output()> rule;
};
}
template <typename Input, typename Container, typename Delim>
bool splitInto(const Input& input, Container& result, Delim delim)
{
#ifdef SUPPORT_BOOST_RANGE
typedef typename boost::range_const_iterator<Input>::type It;
It first(boost::begin(input)), last(boost::end(input));
#else
typedef typename Input::const_iterator It;
It first(input.begin()), last(input.end());
#endif
try
{
my_grammar<It, Container> parser(delim);
bool r = qi::parse(first, last, parser, result);
r = r && (first == last);
if (!r)
std::cerr << "parsing failed at: \"" << std::string(first, last) << "\"\n";
return r;
}
catch (const qi::expectation_failure<It>& e)
{
std::cerr << "FIXME: expected " << e.what_ << ", got '";
std::cerr << std::string(e.first, e.last) << "'" << std::endl;
return false;
}
}
template <typename Input, typename Container>
bool splitInto(const Input& input, Container& result)
{
return splitInto(input, result, ' '); // default space delimited
}
/********************************************************************
* replaces '\n' character by '?' so that the demo output is more *
* comprehensible (see when a \n was parsed and when one was output *
* deliberately) *
********************************************************************/
void safechars(char& ch)
{
switch (ch) { case '\r': case '\n': ch = '?'; break; }
}
int main()
{
using namespace karma; // demo output generators only :)
std::string input;
#if SPIRIT_VERSION >= 0x2050 // boost 1.47.0
// sample invocation: simple vector of elements in order - flattened across lines
std::vector<std::string> flattened;
input = "actually on\ntwo lines";
if (splitInto(input, flattened))
std::cout << format(*char_[safechars] % '|', flattened) << std::endl;
#endif
std::list<std::set<std::string> > linewise, custom;
// YAGNI #1 - now supports partially quoted columns
input = "partially q\"oute\"d columns";
if (splitInto(input, linewise))
std::cout << format(( "set[" << ("'" << *char_[safechars] << "'") % ", " << "]") % '\n', linewise) << std::endl;
// YAGNI #2 - now supports custom delimiter expressions
input="custom delimiters: 1997-03-14 10:13am";
if (splitInto(input, custom, +qi::char_("- 0-9:"))
&& splitInto(input, custom, +(qi::char_ - qi::char_("0-9"))))
std::cout << format(( "set[" << ("'" << *char_[safechars] << "'") % ", " << "]") % '\n', custom) << std::endl;
// YAGNI #3 - now supports quotes ("") inside quoted values (instead of just making them disappear)
input = "would like ne\"\"sted \"quotes like \"\"\n\"\" that\"";
custom.clear();
if (splitInto(input, custom, qi::char_("() ")))
std::cout << format(( "set[" << ("'" << *char_[safechars] << "'") % ", " << "]") % '\n', custom) << std::endl;
return 0;
}
Run Code Online (Sandbox Code Playgroud)
如下所示的样本输出:
actually|on|two|lines
set['columns', 'partially', 'qouted']
set['am', 'custom', 'delimiters']
set['', '03', '10', '13', '14', '1997']
set['like', 'nested', 'quotes like "?" that', 'would']
Run Code Online (Sandbox Code Playgroud)
更新以前失败的测试用例的输出:
--server=127.0.0.1:4774/|--username=robota|--userdescr=robot A ? I am cool robot ||--robot|>|echo.txt
Run Code Online (Sandbox Code Playgroud)
1我必须承认,在阅读'它崩溃'时,我笑得很开心.[ 原文如此 ].这听起来很像我的最终用户.准确地说:崩溃是一种无法恢复的应用程序故障.你遇到的是一个处理错误,从你的角度来看只不过是"意外的行为".无论如何,这已经修好了:)