好奇,如果这可以简化......
internal static IEnumerable<string> Split(string str, char sep = ',')
{
int lastIndex = 0;
bool quoted = false;
bool escaped = false;
bool bracketed = false;
char lastQuote = '\0';
for (int i = 0; i < str.Length; ++i)
{
if (str[i] == '[')
{
if (!quoted && !escaped)
bracketed = true;
escaped = false;
}
else if (str[i] == ']')
{
if (!quoted && !escaped)
bracketed = false;
escaped = false;
}
else if (str[i] == '\\')
{
escaped = !escaped;
}
else if (str[i] == '"' || str[i] == '\'')
{
if (!escaped)
{
if (quoted)
{
if (lastQuote == str[i])
quoted = false;
}
else
{
quoted = true;
lastQuote = str[i];
}
}
escaped = false;
}
else if (str[i] == sep)
{
if (!quoted && !escaped && !bracketed)
{
yield return str.Substring(lastIndex, i - lastIndex);
lastIndex = i + 1;
}
escaped = false;
}
else
{
escaped = false;
}
}
yield return str.Substring(lastIndex);
}
Run Code Online (Sandbox Code Playgroud)
写了这个方法来分割不在里面[],没有引用,并且没有转义的逗号.这本质上是一个棘手的问题,还是我采取了愚蠢的方法?
输入:
foreach(var sel in SharpQuery.SplitCommas("\"comma, in quotes\", comma[in,brackets], comma[in \"quotes, and brackets\"], \"woah, 'nelly,' \\\"now you,re [talking, crazy\\\"\"")) {
Console.WriteLine(sel);
}
Run Code Online (Sandbox Code Playgroud)
预期产量:
"comma, in quotes"
comma[in,brackets]
comma[in "quotes, and brackets"]
"woah, 'nelly,' \"now you,re [talking, crazy\""
Run Code Online (Sandbox Code Playgroud)
保持你的自动机状态有点尴尬的选择.在这种情况下,我会使用单个变量或堆栈.所以你现在的状态总是如此stateStack.Peek().易于阅读.易于处理嵌套状态.
编辑:这是一个快速的样本.我相信你可以扩展它以添加错误处理和规则的细节.
enum ParserState
{
Text,
Bracketed,
Quoted,
EscapChar,
}
internal static IEnumerable<string> Split(string str, char sep)
{
int lastIdx = 0;
char c;
ParserState s;
Stack<ParserState> state = new Stack<ParserState>();
state.Push(ParserState.Text);
for (int i = 0; i < str.Length; i++)
{
c = str[i];
s = state.Peek();
if (s == ParserState.EscapChar
|| (s == ParserState.Bracketed && c == ']')
|| (s == ParserState.Quoted && c == '"'))
{
state.Pop();
}
else if (c == '[')
state.Push(ParserState.Bracketed);
else if (c == '"')
state.Push(ParserState.Quoted);
else if (c == '\\')
state.Push(ParserState.EscapChar);
else if (s == ParserState.Text && c == sep)
{
yield return str.Substring(lastIdx, i - lastIdx);
lastIdx = i + 1;
}
}
yield return str.Substring(lastIdx);
}
Run Code Online (Sandbox Code Playgroud)