Ank*_*esh 3 c# regex string-split
我想用白色空格分割一个字符串,除非字符串中的文本是双引号("text")或单引号('text').
我正在使用此功能:
public static string[] ParseKeywordExpression(string keywordExpressionValue, bool isUniqueKeywordReq)
{
keywordExpressionValue = keywordExpressionValue.Trim();
if (keywordExpressionValue == null || !(keywordExpressionValue.Length > 0))
return new string[0];
int idx = keywordExpressionValue.Trim().IndexOf(" ");
if (idx == -1)
return new string[] { keywordExpressionValue };
//idx = idx + 1;
int count = keywordExpressionValue.Length;
ArrayList extractedList = new ArrayList();
while (count > 0)
{
if (keywordExpressionValue[0] == '"')
{
int temp = keywordExpressionValue.IndexOf(BACKSLASH, 1, keywordExpressionValue.Length - 1);
while (keywordExpressionValue[temp - 1] == '\\')
{
temp = keywordExpressionValue.IndexOf(BACKSLASH, temp + 1, keywordExpressionValue.Length - temp - 1);
}
idx = temp + 1;
}
if (keywordExpressionValue[0] == '\'')
{
int temp = keywordExpressionValue.IndexOf(BACKSHASH_QUOTE, 1, keywordExpressionValue.Length - 1);
while (keywordExpressionValue[temp - 1] == '\\')
{
temp = keywordExpressionValue.IndexOf(BACKSHASH_QUOTE, temp + 1, keywordExpressionValue.Length - temp - 1);
}
idx = temp + 1;
}
string s = keywordExpressionValue.Substring(0, idx);
int left = count - idx;
keywordExpressionValue = keywordExpressionValue.Substring(idx, left).Trim();
if (isUniqueKeywordReq)
{
if (!extractedList.Contains(s.Trim('"')))
{
extractedList.Add(s.Trim('"'));
}
}
else
{
extractedList.Add(s.Trim('"'));
}
count = keywordExpressionValue.Length;
idx = keywordExpressionValue.IndexOf(SPACE);
if (idx == -1)
{
string add = keywordExpressionValue.Trim('"', ' ');
if (add.Length > 0)
{
if (isUniqueKeywordReq )
{
if (!extractedList.Contains(add))
{
extractedList.Add(add);
}
}
else
{
extractedList.Add(add);
}
}
break;
}
}
return (string[])extractedList.ToArray(typeof(string));
}
Run Code Online (Sandbox Code Playgroud)
有没有其他方法可以做到这一点,还是可以优化这个功能?
例如,我希望拆分字符串
%ABC %% aasdf%aalasdjjfas"c:\ Document and Setting\Program Files\abc.exe"
至
%ABC %%
aasdf%
aalasdjjfas
"c:\ Document and Setting\Program Files\abc.exe"
最简单的正则表达式,处理单引号和双引号:
("((\\")|([^"]))*")|('((\\')|([^']))*')|(\S+)
var regex = new Regex(@"(""((\\"")|([^""]))*"")|('((\\')|([^']))*')|(\S+)");
var matches = regex.Matches(inputstring);
foreach (Match match in matches) {
extractedList.Add(match.Value);
}
Run Code Online (Sandbox Code Playgroud)
所以基本上四到五行代码就足够了.
表达,解释说:
Main structure:
("((\\")|([^"]))*") Double-quoted token
| , or
('((\\')|([^']))*') single-quoted token
| , or
(\S+) any group of non-space characters
Double-quoted token:
( Group starts
" Initial double-quote
( Inner group starts
(\\") Either a backslash followed by a double-quote
| , or
([^"]) any non-double-quote character
)* The inner group repeats any number of times (or zero)
" Ending double-quote
)
Single-quoted token:
( Group starts
' Initial single-quote
( Inner group starts
(\\') Either a backslash followed by a single-quote
| , or
([^']) any non-single-quote character
)* The inner group repeats any number of times (or zero)
' Ending single-quote
)
Non-space characters:
( Group starts
\S Non-white-space character
+ , repeated at least once
) Group ends
Run Code Online (Sandbox Code Playgroud)