Sta*_*tan 5 c++ boost tokenize
有没有办法让Boost标记生成器分割到字符串下方而不拆分引用的部分?
string s = "1st 2nd \"3rd with some comment\" 4th";
Exptected output:
1st
2nd
3rd with some comment
4th
Run Code Online (Sandbox Code Playgroud)
尝试这个代码,这样你就可以避免使用 Boost.Tokenizer 和 Boost.Spirit 库
#include <vector>
#include <string>
#include <iostream>
const char Separators[] = { ' ', 9 };
bool Str_IsSeparator( const char Ch )
{
for ( size_t i = 0; i != sizeof( Separators ); i++ )
{
if ( Separators[i] == Ch ) { return true; }
}
return false;
}
void SplitLine( size_t FromToken, size_t ToToken, const std::string& Str, std::vector<std::string>& Components /*, bool ShouldTrimSpaces*/ )
{
size_t TokenNum = 0;
size_t Offset = FromToken - 1;
const char* CStr = Str.c_str();
const char* CStrj = Str.c_str();
while ( *CStr )
{
// bypass spaces & delimiting chars
while ( *CStr && Str_IsSeparator( *CStr ) ) { CStr++; }
if ( !*CStr ) { return; }
bool InsideQuotes = ( *CStr == '\"' );
if ( InsideQuotes )
{
for ( CStrj = ++CStr; *CStrj && *CStrj != '\"'; CStrj++ );
}
else
{
for ( CStrj = CStr; *CStrj && !Str_IsSeparator( *CStrj ); CStrj++ );
}
// extract token
if ( CStr != CStrj )
{
TokenNum++;
// store each token found
if ( TokenNum >= FromToken )
{
Components[ TokenNum-Offset ].assign( CStr, CStrj );
// if ( ShouldTrimSpaces ) { Str_TrimSpaces( &Components[ TokenNum-Offset ] ); }
// proceed to next token
if ( TokenNum >= ToToken ) { return; }
}
CStr = CStrj;
// exclude last " from token, handle EOL
if ( *CStr ) { CStr++; }
}
}
}
int main()
{
std::string test = "1st 2nd \"3rd with some comment\" 4th";
std::vector<std::string> Out;
Out.resize(5);
SplitLine(1, 4, test, Out);
for(size_t j = 0 ; j != Out.size() ; j++) { std::cout << Out[j] << std::endl; }
return 0;
}
Run Code Online (Sandbox Code Playgroud)
它使用预先分配的字符串数组(它不是从零开始的,但很容易修复)并且非常简单。
| 归档时间: |
|
| 查看次数: |
1168 次 |
| 最近记录: |