在C#中,使用Regex该类,如何解析以逗号分隔的值,其中某些值可能引用包含逗号的字符串?
using System ;
using System.Text.RegularExpressions ;
class Example
{
public static void Main ( )
{
string myString = "cat,dog,\"0 = OFF, 1 = ON\",lion,tiger,'R = red, G = green, B = blue',bear" ;
Console.WriteLine ( "\nmyString is ...\n\t" + myString + "\n" ) ;
Regex regex = new Regex ( "(?<=,(\"|\')).*?(?=(\"|\'),)|(^.*?(?=,))|((?<=,).*?(?=,))|((?<=,).*?$)" ) ;
Match match = regex.Match ( myString ) ;
int j = 0 ;
while ( match.Success )
{
Console.WriteLine ( j++ + " \t" + match ) ;
match = match.NextMatch() ;
}
}
}
Run Code Online (Sandbox Code Playgroud)
输出(部分)如下所示:
0 cat
1 dog
2 "0 = OFF
3 1 = ON"
4 lion
5 tiger
6 'R = red
7 G = green
8 B = blue'
9 bear
Run Code Online (Sandbox Code Playgroud)
但是,所需的输出是:
0 cat
1 dog
2 0 = OFF, 1 = ON
3 lion
4 tiger
5 R = red, G = green, B = blue
6 bear
Run Code Online (Sandbox Code Playgroud)
CMS*_*CMS 23
试试这个正则表达式:
"[^"\r\n]*"|'[^'\r\n]*'|[^,\r\n]*
Run Code Online (Sandbox Code Playgroud)
Regex regexObj = new Regex(@"""[^""\r\n]*""|'[^'\r\n]*'|[^,\r\n]*");
Match matchResults = regexObj.Match(input);
while (matchResults.Success)
{
Console.WriteLine(matchResults.Value);
matchResults = matchResults.NextMatch();
}
Run Code Online (Sandbox Code Playgroud)
.OUPUTS:
注意:此正则表达式解决方案适用于您的情况,但我建议您使用像FileHelpers这样的专用库.
Jud*_*ngo 21
为什么不听从专家的意见,不要推出自己的CSV解析器.
你的第一个想法是,"我需要在引号内处理逗号."
你的下一个想法是,"哦,废话,我需要在引号内处理引号.转义引号.双引号.单引号......"
这是一条通向疯狂的道路.不要自己写.找到一个具有广泛的单元测试覆盖率的图书馆,该图书馆覆盖了所有的硬件,并为您经历了地狱.对于.NET,请使用免费和开源的FileHelpers库.
它不是正则表达式,但我使用Microsoft.VisualBasic.FileIO.TextFieldParser来完成csv文件.是的,在C#应用程序中添加对Microsoft.VisualBasic的引用可能会有点奇怪,甚至可能有点脏,但是嘿它有效.
啊,RegEx.现在你有两个问题.;)
我使用了一个标记器/解析器,因为它非常简单,更重要的是,它更易于阅读以便以后进行维护.
这有效,例如:
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
class Program
{
static void Main(string[] args)
{
string myString = "cat,dog,\"0 = OFF, 1 = ON\",lion,tiger,'R = red, G = green, B = blue',bear";
Console.WriteLine("\nmyString is ...\n\t" + myString + "\n");
CsvParser parser = new CsvParser(myString);
Int32 lineNumber = 0;
foreach (string s in parser)
{
Console.WriteLine(lineNumber + ": " + s);
}
Console.ReadKey();
}
}
internal enum TokenType
{
Comma,
Quote,
Value
}
internal class Token
{
public Token(TokenType type, string value)
{
Value = value;
Type = type;
}
public String Value { get; private set; }
public TokenType Type { get; private set; }
}
internal class StreamTokenizer : IEnumerable<Token>
{
private TextReader _reader;
public StreamTokenizer(TextReader reader)
{
_reader = reader;
}
public IEnumerator<Token> GetEnumerator()
{
String line;
StringBuilder value = new StringBuilder();
while ((line = _reader.ReadLine()) != null)
{
foreach (Char c in line)
{
switch (c)
{
case '\'':
case '"':
if (value.Length > 0)
{
yield return new Token(TokenType.Value, value.ToString());
value.Length = 0;
}
yield return new Token(TokenType.Quote, c.ToString());
break;
case ',':
if (value.Length > 0)
{
yield return new Token(TokenType.Value, value.ToString());
value.Length = 0;
}
yield return new Token(TokenType.Comma, c.ToString());
break;
default:
value.Append(c);
break;
}
}
// Thanks, dpan
if (value.Length > 0)
{
yield return new Token(TokenType.Value, value.ToString());
}
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
internal class CsvParser : IEnumerable<String>
{
private StreamTokenizer _tokenizer;
public CsvParser(Stream data)
{
_tokenizer = new StreamTokenizer(new StreamReader(data));
}
public CsvParser(String data)
{
_tokenizer = new StreamTokenizer(new StringReader(data));
}
public IEnumerator<string> GetEnumerator()
{
Boolean inQuote = false;
StringBuilder result = new StringBuilder();
foreach (Token token in _tokenizer)
{
switch (token.Type)
{
case TokenType.Comma:
if (inQuote)
{
result.Append(token.Value);
}
else
{
yield return result.ToString();
result.Length = 0;
}
break;
case TokenType.Quote:
// Toggle quote state
inQuote = !inQuote;
break;
case TokenType.Value:
result.Append(token.Value);
break;
default:
throw new InvalidOperationException("Unknown token type: " + token.Type);
}
}
if (result.Length > 0)
{
yield return result.ToString();
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
Run Code Online (Sandbox Code Playgroud)
只需添加我今天早上工作的解决方案.
var regex = new Regex("(?<=^|,)(\"(?:[^\"]|\"\")*\"|[^,]*)");
foreach (Match m in regex.Matches("<-- input line -->"))
{
var s = m.Value;
}
Run Code Online (Sandbox Code Playgroud)
如您所见,您需要每行调用regex.Matches().然后它将返回一个MatchCollection,其具有与列相同数量的项目.显然,每个匹配的Value属性是已解析的值.
这仍然是一项正在进行中的工作,但它很乐意解析CSV字符串,如:
2,3.03,"Hello, my name is ""Joshua""",A,B,C,,,D
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
42958 次 |
| 最近记录: |