Art*_*ich 1 .net c# memory-management csvhelper
当我尝试使用CsvHelper将非常多的数据(具有30万行及更多行的列表)写入内存流时,它将引发异常“ System.IO.IOException:流太长”。。
数据类相当大,具有约30个属性,因此文件中的每个记录将具有约30列。
这是实际的写代码,其中的异常抛出(这段代码是基于这样说 CsvHelper LIB的作者的回答):
using (var memoryStream = new MemoryStream())
{
using (var streamWriter = new StreamWriter(memoryStream, encoding ?? Encoding.ASCII))
{
var csvWriter = new CsvWriter(streamWriter, GetConfiguration(delimiter, mappingClassType, mappingActions));
csvWriter.WriteRecords(data); //data is IEnumerable<T> and has more than 300k records
streamWriter.Flush();
return memoryStream.ToArray();
}
}
Run Code Online (Sandbox Code Playgroud)
然后,将结果字节数组保存到文件中。
File.WriteAllBytes(filePath, resultedBytesArray);
Run Code Online (Sandbox Code Playgroud)
请注意,当我向文件写入100 000条记录时,相同的代码会很好地工作(在这种情况下,文件的大小约为1GB)。顺便说一句,我的目标是写出60万条数据记录。
这是与此问题相关的堆栈跟踪的相关部分。
Stream was too long.|System.IO.IOException: Stream was too long.
at System.IO.MemoryStream.Write(Byte[] buffer, Int32 offset, Int32 count)
at System.IO.StreamWriter.Flush(Boolean flushStream, Boolean flushEncoder)
at System.IO.StreamWriter.Write(Char[] buffer, Int32 index, Int32 count)
at CsvHelper.CsvWriter.NextRecord() in C:\Users\Josh\Projects\CsvHelper\src\CsvHelper\CsvWriter.cs:line 290
at CsvHelper.CsvWriter.WriteRecords(IEnumerable records) in C:\Users\Josh\Projects\CsvHelper\src\CsvHelper\CsvWriter.cs:line 490
at FileExport.Csv.CsvDocument.Create[T](IEnumerable`1 data, String delimiter, Encoding encoding, Type mappingClassType, IDictionary`2 mappingActions) in d:\Dev\DrugDevExport\FileExport\Csv\CsvDocument.cs:line 33
Run Code Online (Sandbox Code Playgroud)
就我而言,实现目标和避免该问题的基本方法是将我的书面数据列表分成几个部分,然后将它们连接在一起,但是可能有没有任何明显而又简单的解决方案代码重构(例如增加默认的流/缓冲区大小等)?
还请记住,为了防止“ Out Of Memory”对象异常,我还应用了两种可能的解决方案。
提前致谢。
非常感谢Spender,就像他在问题下方的评论中提到的那样,它已通过用 FileStream 替换 MemoryStream 并将数据直接写入文件来修复。
在我的情况下,将数据写入 MemoryStream 然后毫无理由地再次将其复制到文件中是绝对没用的。再次感谢他让我看到了这个事实。
我的固定代码如下。
using (var fileStream = File.Create(path))
{
using (var streamWriter = new StreamWriter(fileStream, encoding ?? Encoding.ASCII))
{
var csvWriter = new CsvWriter(streamWriter, GetConfiguration(delimiter, mappingClassType, mappingActions));
csvWriter.WriteRecords(data);
}
}
Run Code Online (Sandbox Code Playgroud)
现在它可以处理任意数量的输入数据。
您可以通过编写自己的MemoryStream来解决2GB的限制:
class HugeMemoryStream : Stream
{
#region Fields
private const int PAGE_SIZE = 1024000;
private const int ALLOC_STEP = 1024;
private byte[][] _streamBuffers;
private int _pageCount = 0;
private long _allocatedBytes = 0;
private long _position = 0;
private long _length = 0;
#endregion Fields
#region Internals
private int GetPageCount(long length)
{
int pageCount = (int)(length / PAGE_SIZE) + 1;
if ((length % PAGE_SIZE) == 0)
pageCount--;
return pageCount;
}
private void ExtendPages()
{
if (_streamBuffers == null)
{
_streamBuffers = new byte[ALLOC_STEP][];
}
else
{
byte[][] streamBuffers = new byte[_streamBuffers.Length + ALLOC_STEP][];
Array.Copy(_streamBuffers, streamBuffers, _streamBuffers.Length);
_streamBuffers = streamBuffers;
}
_pageCount = _streamBuffers.Length;
}
private void AllocSpaceIfNeeded(long value)
{
if (value < 0)
throw new InvalidOperationException("AllocSpaceIfNeeded < 0");
if (value == 0)
return;
int currentPageCount = GetPageCount(_allocatedBytes);
int neededPageCount = GetPageCount(value);
while (currentPageCount < neededPageCount)
{
if (currentPageCount == _pageCount)
ExtendPages();
_streamBuffers[currentPageCount++] = new byte[PAGE_SIZE];
}
_allocatedBytes = (long)currentPageCount * PAGE_SIZE;
value = Math.Max(value, _length);
if (_position > (_length = value))
_position = _length;
}
#endregion Internals
#region Stream
public override bool CanRead => true;
public override bool CanSeek => true;
public override bool CanWrite => true;
public override long Length => _length;
public override long Position
{
get { return _position; }
set
{
if (value > _length)
throw new InvalidOperationException("Position > Length");
else if (value < 0)
throw new InvalidOperationException("Position < 0");
else
_position = value;
}
}
public override void Flush() { }
public override int Read(byte[] buffer, int offset, int count)
{
int currentPage = (int)(_position / PAGE_SIZE);
int currentOffset = (int)(_position % PAGE_SIZE);
int currentLength = PAGE_SIZE - currentOffset;
long startPosition = _position;
if (startPosition + count > _length)
count = (int)(_length - startPosition);
while (count != 0 && _position < _length)
{
if (currentLength > count)
currentLength = count;
Array.Copy(_streamBuffers[currentPage++], currentOffset, buffer, offset, currentLength);
offset += currentLength;
_position += currentLength;
count -= currentLength;
currentOffset = 0;
currentLength = PAGE_SIZE;
}
return (int)(_position - startPosition);
}
public override long Seek(long offset, SeekOrigin origin)
{
switch (origin)
{
case SeekOrigin.Begin:
break;
case SeekOrigin.Current:
offset += _position;
break;
case SeekOrigin.End:
offset = _length - offset;
break;
default:
throw new ArgumentOutOfRangeException("origin");
}
return Position = offset;
}
public override void SetLength(long value)
{
if (value < 0)
throw new InvalidOperationException("SetLength < 0");
if (value == 0)
{
_streamBuffers = null;
_allocatedBytes = _position = _length = 0;
_pageCount = 0;
return;
}
int currentPageCount = GetPageCount(_allocatedBytes);
int neededPageCount = GetPageCount(value);
// Removes unused buffers if decreasing stream length
while (currentPageCount > neededPageCount)
_streamBuffers[--currentPageCount] = null;
AllocSpaceIfNeeded(value);
if (_position > (_length = value))
_position = _length;
}
public override void Write(byte[] buffer, int offset, int count)
{
int currentPage = (int)(_position / PAGE_SIZE);
int currentOffset = (int)(_position % PAGE_SIZE);
int currentLength = PAGE_SIZE - currentOffset;
long startPosition = _position;
AllocSpaceIfNeeded(_position + count);
while (count != 0)
{
if (currentLength > count)
currentLength = count;
Array.Copy(buffer, offset, _streamBuffers[currentPage++], currentOffset, currentLength);
offset += currentLength;
_position += currentLength;
count -= currentLength;
currentOffset = 0;
currentLength = PAGE_SIZE;
}
}
#endregion Stream
}
using ICSharpCode.SharpZipLib.GZip;
using System;
using System.IO;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
// HugeMemoryStrem Test
string filename = @"gzip-filename.gz";
HugeMemoryStream ms = new HugeMemoryStream();
using (StreamWriter sw = new StreamWriter(ms, Encoding.UTF8, 16384, true))
using (FileStream fs = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read))
using (GZipInputStream gzipStream = new GZipInputStream(fs))
using (StreamReader sr = new StreamReader(gzipStream, Encoding.UTF8, false, 16384, true))
{
for (string line = sr.ReadLine(); line != null; line = sr.ReadLine())
sw.WriteLine(line);
}
ms.Seek(0, SeekOrigin.Begin);
using (StreamReader srm = new StreamReader(ms, Encoding.UTF8, false, 16384, true))
using (FileStream fs = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read))
using (GZipInputStream gzipStream = new GZipInputStream(fs))
using (StreamReader sr = new StreamReader(gzipStream, Encoding.UTF8, false, 16384, true))
{
for (string line1 = sr.ReadLine(), line2 = srm.ReadLine(); line1 != null; line1 = sr.ReadLine(), line2 = srm.ReadLine())
{
if (line1 != line2)
throw new InvalidDataException();
}
}
Run Code Online (Sandbox Code Playgroud)